ical2csv/ical2txt.py at master · ncthanh2000/ical2csv · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/python3

import sys
import os.path
from icalendar import Calendar
import recurring_ical_events
from bs4 import BeautifulSoup
import warnings
from dateutil.parser import parse
import datetime

warnings.filterwarnings("ignore", category=UserWarning, module='bs4') # We don't want warnings about URL's. We just what the URL printed, if there.

if len(sys.argv) <= 1:
    print("Please call this script with an ics-file as parameter.\n")
    print("Even better, call it with start and end dates:\n")
    print(sys.argv[0] + " myexport.ics 20210101 20210201")
    print(sys.argv[0] + " myexport.ics 2021-01-01T00:00:00 2021-01-31T23:59:59\n")
    exit(1)

filename = sys.argv[1]
# TODO: use regex to get file extension (chars after last period), in case it's not exactly 3 chars.
file_extension = str(sys.argv[1])[-3:]
headers = ('Summary', 'UID', 'Description', 'Location', 'Start Time', 'End Time', 'URL')

class CalendarEvent:
    """Calendar event class"""
    summary = ''
    uid = ''
    description = ''
    location = ''
    start = ''
    end = ''
    url = ''

    def __init__(self, name):
        self.name = name

events = []

def removehtml(html):
    # Almost word for word copy from here: https://stackoverflow.com/questions/328356/extracting-text-from-html-file-using-python

    soup = BeautifulSoup(html, features="html.parser")
    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract() # remove it

    text = soup.get_text() # Get plain text

    # break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())
    # break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    # drop blank lines
    text = '\n'.join(chunk for chunk in chunks if chunk)

    return text

def open_cal():
    if os.path.isfile(filename):
        if file_extension == 'ics':
            print("Extracting events from file:", filename, "\n")
            f = open(sys.argv[1], 'rb')
            gcal = Calendar.from_ical(f.read())
            revents = recurring_ical_events.of(gcal).between(istart,istop)

#           for component in gcal.walk():
            for component in revents:
                event = CalendarEvent("event")
                v=(dir(component).count('get')) # Only proces data if object is a valid event
                if (v != 0):
                     if component.get('TRANSP') == 'TRANSPARENT': continue #skip all day events and the like
                     if component.get('SUMMARY') == None: continue #skip blank items
                     event.summary = component.get('SUMMARY')
                     event.uid = component.get('UID')
                     if component.get('DESCRIPTION') == None: continue #skip blank items
                     event.description = component.get('DESCRIPTION')
                     event.location = component.get('LOCATION')
                     if hasattr(component.get('dtstart'), 'dt'):
                         event.start = component.get('dtstart').dt
                     if hasattr(component.get('dtend'), 'dt'):
                         event.end = component.get('dtend').dt

                     event.url = component.get('URL')
                     events.append(event)
            f.close()
        else:
            print("You entered ", filename, ". ")
            print(file_extension.upper(), " is not a valid file format. Looking for an ICS file.")
            exit(0)
    else:
        print("I can't find the file ", filename, ".")
        print("Please enter an ics file located in the same folder as this script.")
        exit(0)


def txt_write(icsfile):
    txtfile = icsfile[:-3] + "txt"
    prevdate=""
    spent=0
    evcount=0
    evskip=0
    sys.stdout.write("Processing events : ")
    try:
        with open(txtfile, 'w') as myfile:
            for event in sortedevents:

                if prevdate != event.start.strftime("%Y-%m-%d") and spent > 0: # Make a header for each day
                    if prevdate != '': # If you don't want a summary of the time spent added, comment this section.
                        th=divmod(spent, 3600)[0]
                        tm=divmod(spent, 3600)[1]/60
                        myfile.write("\nTime Total: " + '{:02.0f}'.format(th) + ":" + '{:02.0f}'.format(tm) + "\n")
                        spent=0
                if event.start.timestamp() > istart.timestamp() and event.start.timestamp() < istop.timestamp():
                    if prevdate != event.start.strftime("%Y-%m-%d"): # Make a header for each day
                        prevdate = event.start.strftime("%Y-%m-%d")
                        myfile.write("\nWorklog, " + prevdate + "\n===================\n")

                    duration = event.end - event.start
                    ds = duration.total_seconds()
                    spent += ds
                    hours = divmod(ds, 3600)[0]
                    minutes = divmod(ds,3600)[1]/60
                    description=removehtml(event.description.encode('utf-8').decode())
                    values = event.start.strftime("%H:%M") + " - " + event.end.strftime("%H:%M") + " (" + '{:02.0f}'.format(hours) + ":" + '{:02.0f}'.format(minutes) + ") " + event.summary.encode('utf-8').decode()
                    if event.location != '': values = values + " [" + event.location + "]" # Only include location if there is one

                    # Remove Google Meet and Skype Meeting part of description
                    trimmed=description.split('-::~')[0].split('......')[0]
                    #print("DescLen: " + str(len(description)) + " TrimmedLen: " + str(len(trimmed)) + " : " + trimmed) # For debugging
                    description=trimmed
                    if description != '':
                        values = values + "\n" + description + "\n"
                    myfile.write(values+"\n")
                    sys.stdout.write(".")
                    sys.stdout.flush()
                    evcount+=1
                else:
                    sys.stdout.write("S")
                    sys.stdout.flush()
                    evskip+=1

            th=divmod(spent, 3600)[0]
            tm=divmod(spent, 3600)[1]/60
            myfile.write("\nTime Total: " + '{:02.0f}'.format(th) + ":" + '{:02.0f}'.format(tm) + "\n")

            print("\n\nWrote " + str(evcount) + " events to ", txtfile, " and skipped ", str(evskip), " events\n")
    except IOError:
        print("Could not open file!")
        exit(0)


def debug_event(class_name):
    print("Contents of ", class_name.name, ":")
    print(class_name.summary)
    print(class_name.uid)
    print(class_name.description)
    print(class_name.location)
    print(class_name.start)
    print(class_name.end)
    print(class_name.url, "\n")

now=datetime.datetime.now()
istart=datetime.datetime.fromtimestamp(0) # Start of UNIX epoch (1970-01-01T00:00:00)
istop=now+datetime.timedelta(seconds=157680000) # Stop 5 years in the future, if no enddate is given, to make sure reucurring events don't go on forever ...

if len(sys.argv) > 3:
   if sys.argv[2] != '':
      istart=parse(sys.argv[2])
   if sys.argv[3] != '':
      istop=parse(sys.argv[3])

open_cal()          # Open ics file and do initial parsing of events
sortedevents=sorted(events, key=lambda obj: obj.start) # Make sure events are in chronological order
txt_write(filename) # Write the matching events to the textfile. With recurring_ical_events, scoping is already done.
#debug_event(event)