-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdates.py
More file actions
373 lines (269 loc) · 9.13 KB
/
dates.py
File metadata and controls
373 lines (269 loc) · 9.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
"""Generates data for the Dates dimension table.
This module provides functions to create a comprehensive date dimension table
including attributes such as day of week, month, holidays, work days, and
calendar-based calculations.
"""
from datetime import date, datetime, timedelta
import pandas as pd
# --- Constants ---
DAY_OF_WEEK_NAMES = {
1: "Sunday",
2: "Monday",
3: "Tuesday",
4: "Wednesday",
5: "Thursday",
6: "Friday",
7: "Saturday",
}
DAY_OF_WEEK_NUMBERS = {
"Sunday": 1,
"Monday": 2,
"Tuesday": 3,
"Wednesday": 4,
"Thursday": 5,
"Friday": 6,
"Saturday": 7,
}
MONTH_NAMES = {
1: "January",
2: "February",
3: "March",
4: "April",
5: "May",
6: "June",
7: "July",
8: "August",
9: "September",
10: "October",
11: "November",
12: "December",
}
THIRTY_DAY_MONTHS = {4, 6, 9, 11}
# --- Utility functions ---
def get_cy_day(this_date: date) -> int:
"""Returns the numeric day of the calendar year for a date.
Args:
this_date: Date.
Returns:
Day of the calendar year.
"""
return this_date.timetuple().tm_yday
def get_cy_week(this_date: date) -> int:
"""Returns the numeric week of the calendar year for a date.
Args:
this_date: Date.
Returns:
Week of the calendar year.
"""
return int(this_date.strftime("%U")) + 1
def get_cy_quarter(month: int) -> int:
"""Returns the numeric quarter of the calendar year for a month.
Args:
month: Month.
Returns:
Quarter of the calendar year.
"""
assert 1 <= month <= 12
return (month - 1) // 3 + 1
def get_day_of_week_name(day_of_week: int) -> str:
"""Returns the full name of the day.
Args:
day_of_week: Day of the week (SQL-based).
Returns:
Name of the day of the week.
"""
assert 1 <= day_of_week <= 7
return DAY_OF_WEEK_NAMES[day_of_week]
def get_month_name(month: int) -> str:
"""Returns the full name of the month.
Args:
month: Month.
Returns:
Name of the month.
"""
assert 1 <= month <= 12
return MONTH_NAMES[month]
def to_date(iso_string: str) -> date:
"""Converts a string in ISO 8601 date format to a date object.
Args:
iso_string: Date in ISO 8601 format.
Returns:
Date instance.
"""
return datetime.strptime(iso_string, "%Y-%m-%d").date()
def to_sql_weekday(this_date: date) -> int:
"""Converts the weekday number. SQL counts from Sunday = 1.
Args:
this_date: Date.
Returns:
Day of the week from Sunday = 1 to Saturday = 6.
"""
return this_date.isoweekday() % 7 + 1
# --- Date logic ---
def is_last_day_of_month(this_date: date) -> bool:
"""Determines if the date is the last day of a month.
Args:
this_date: Date.
Returns:
True if it's the last day of the month, False otherwise.
"""
next_day = this_date + timedelta(days=1)
return next_day.month != this_date.month
def is_weekend(day_of_week: int) -> bool:
"""Determines if the date is a weekend.
Args:
day_of_week: Day of the week (SQL-based).
Returns:
True if it's a weekend, False otherwise.
"""
assert 1 <= day_of_week <= 7
return day_of_week in {1, 7}
def nth_weekday_of_month(
n: int,
weekday: int,
month: int,
year: int,
) -> date:
"""Returns the date of the nth weekday of the nth month, e.g., the 4th
Thursday of November.
Args:
n: The nth occurrence.
weekday: Day of the week.
month: Month.
year: Year.
Returns:
Date.
"""
first_day = date(year, month, 1)
first_weekday = to_sql_weekday(first_day)
days_offset = (weekday - first_weekday + 7) % 7
return first_day + timedelta(days=days_offset + (n - 1) * 7)
# --- Holiday logic ---
def is_fixed_holiday(this_date: date) -> bool:
"""Determines if the date is a holiday of a fixed date.
Args:
this_date: Date.
Returns:
True if it's a holiday, False otherwise.
"""
day_of_week = to_sql_weekday(this_date)
is_weekday = 1 < day_of_week < 7
is_friday = day_of_week == 6
is_monday = day_of_week == 2
# New Year's Day
is_on_weekday = this_date.month == 1 and this_date.day == 1 and is_weekday
is_on_saturday = this_date.month == 12 and this_date.day == 31 and is_friday
is_on_sunday = this_date.month == 1 and this_date.day == 2 and is_monday
if is_on_weekday or is_on_saturday or is_on_sunday:
return True
# Independence Day
is_on_weekday = this_date.month == 7 and this_date.day == 4 and is_weekday
is_on_saturday = this_date.month == 7 and this_date.day == 3 and is_friday
is_on_sunday = this_date.month == 7 and this_date.day == 5 and is_monday
if is_on_weekday or is_on_saturday or is_on_sunday:
return True
# Christmas Day
is_on_weekday = this_date.month == 12 and this_date.day == 25 and is_weekday
is_on_saturday = this_date.month == 12 and this_date.day == 24 and is_friday
is_on_sunday = this_date.month == 12 and this_date.day == 26 and is_monday
if is_on_weekday or is_on_saturday or is_on_sunday:
return True
return False
def is_floating_holiday(this_date: date) -> bool:
"""Determines if the date is a holiday of a floating date.
Args:
this_date: Date.
Returns:
True if it's a holiday, False otherwise.
"""
year = this_date.year
day_of_week = to_sql_weekday(this_date)
is_monday = day_of_week == DAY_OF_WEEK_NUMBERS["Monday"]
# Martin Luther King Jr. Day (Third Monday of January)
mlk_day = nth_weekday_of_month(3, 2, 1, year)
# Memorial Day (Last Monday in May)
memorial_day = this_date.month == 5 and is_monday and this_date.day >= 25
# Labor Day (First Monday in September)
labor_day = nth_weekday_of_month(1, 2, 9, year)
# Thanksgiving Day (Fourth Thursday in November)
thanksgiving = nth_weekday_of_month(4, 5, 11, year)
# Black Friday (Day after Thanksgiving)
black_friday = thanksgiving + timedelta(days=1)
return (
this_date in {labor_day, thanksgiving, black_friday}
or (this_date == mlk_day and year >= 1986) # MLK Day
or memorial_day
)
def is_holiday(this_date: date) -> bool:
"""Determines if the date is a holiday.
Args:
this_date: Date.
Returns:
True if it's a holiday, False otherwise.
"""
return is_fixed_holiday(this_date) or is_floating_holiday(this_date)
def is_work_day(this_date: date) -> bool:
"""Determines if the date is a work day.
Args:
this_date: Date.
Returns:
True if it's a work day, False otherwise.
"""
return not (is_holiday(this_date) or is_weekend(to_sql_weekday(this_date)))
# --- Main Table Generator ---
def create_dates(start: date, end: date) -> pd.DataFrame:
"""Creates a date table.
Args:
start: Start date.
end: End date.
Returns:
Table of date dimensions.
"""
date_range = pd.date_range(start, end)
dates = pd.DataFrame({"DateDate": date_range})
# Convert from Pandas Timestamp to Python date for easier processing
dates["DateDate"] = dates["DateDate"].dt.date
dates["DateYear"] = dates["DateDate"].apply(lambda x: x.year)
dates["DateMonth"] = dates["DateDate"].apply(lambda x: x.month)
dates["DateDay"] = dates["DateDate"].apply(lambda x: x.day)
dates["DateDayOfWeek"] = dates["DateDate"].apply(to_sql_weekday)
dates["IsLastDayOfMonth"] = dates["DateDate"].apply(is_last_day_of_month)
dates["IsWeekend"] = dates["DateDayOfWeek"].apply(is_weekend)
dates["IsHoliday"] = dates["DateDate"].apply(is_holiday)
dates["IsWorkDay"] = dates["DateDate"].apply(is_work_day)
dates["IsPayDay"] = False
dates["DayOfWeekName"] = dates["DateDayOfWeek"].apply(get_day_of_week_name)
dates["NameOfMonth"] = dates["DateMonth"].apply(get_month_name)
dates["CYQuarter"] = dates["DateMonth"].apply(get_cy_quarter)
dates["CYDay"] = dates["DateDate"].apply(get_cy_day)
dates["CYWeek"] = dates["DateDate"].apply(get_cy_week)
return dates
def fill_pay_days(dates: pd.DataFrame) -> None:
"""Fills the pay days.
Args:
dates: Table of date dimensions.
"""
assert not dates.empty
current = date(2011, 1, 7)
max_date = dates["DateDate"].max()
while current <= max_date:
pay_day = current
# Shift the pay date to Thursday if it falls on a holiday, except for
# Black Friday
black_friday = nth_weekday_of_month(4, 5, 11, current.year) + timedelta(days=1)
is_bf = current == black_friday
if is_holiday(current) and not is_bf:
pay_day -= timedelta(days=1)
dates.loc[dates["DateDate"] == pay_day, "IsPayDay"] = True
current += timedelta(days=14)
def get_dates(start: date, end: date) -> pd.DataFrame:
"""Wrapper function to create a date table.
Args:
start: Start date in ISO 8601 format.
end: End date in ISO 8601 format.
Returns:
Table of date dimensions.
"""
dates = create_dates(start, end)
fill_pay_days(dates)
return dates