-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathduckdb-python-write-csv.py
More file actions
47 lines (39 loc) · 1.53 KB
/
duckdb-python-write-csv.py
File metadata and controls
47 lines (39 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Persistent Storage
import duckdb
# Define constants for headers
HEADER_CAPTURE_DATE = "capture date"
HEADER_RESPONSE_CODE = "response code"
# Output CSV file name
OUTPUT_CSV = "reject_code_summary.csv"
# Connect to DuckDB
con = duckdb.connect()
# Execute the query and write to CSV using COPY
# con.execute(f"""
# COPY (
# SELECT
# "{HEADER_CAPTURE_DATE}",
# "{HEADER_RESPONSE_CODE}",
# SUM("count") AS total_rejections,
# 100.0 * SUM("count") / SUM(SUM("count")) OVER (PARTITION BY "{HEADER_CAPTURE_DATE}") AS percentage
# FROM read_csv_auto('reject-code.csv')
# GROUP BY "{HEADER_CAPTURE_DATE}", "{HEADER_RESPONSE_CODE}"
# ORDER BY "{HEADER_CAPTURE_DATE}" ASC, SUM("count") DESC
# ) TO '{OUTPUT_CSV}' (HEADER, DELIMITER ',');
# """)
# Execute the query and write to CSV with formatted percentages
# Add formatting in the query for percentage
con.execute(f"""
COPY (
SELECT
"{HEADER_CAPTURE_DATE}",
"{HEADER_RESPONSE_CODE}",
SUM("count") AS total_rejections,
CAST(ROUND(100.0 * SUM("count") / SUM(SUM("count")) OVER (PARTITION BY "{HEADER_CAPTURE_DATE}"), 2) AS STRING) || '%' AS percentage
FROM read_csv_auto('reject-code.csv')
GROUP BY "{HEADER_CAPTURE_DATE}", "{HEADER_RESPONSE_CODE}"
ORDER BY "{HEADER_CAPTURE_DATE}" ASC, SUM("count") DESC
) TO '{OUTPUT_CSV}' (HEADER, DELIMITER ',');
""")
# Close the connection
con.close()
print(f"Results have been written to {OUTPUT_CSV}")