From 845485d1696228cb3355054aff70a7553ec7c311 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Thu, 22 May 2025 12:22:14 +0200 Subject: [PATCH] io --- data/iissp/main.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/data/iissp/main.py b/data/iissp/main.py index d094f64f..534c3943 100644 --- a/data/iissp/main.py +++ b/data/iissp/main.py @@ -2,12 +2,12 @@ import gzip import os from datetime import date +from io import TextIOWrapper from urllib.request import Request, urlopen -import lxml.etree from tqdm import tqdm -url = "https://monitor.statnipokladna.cz/data/xml/ucjed.xml" +url = "https://monitor.statnipokladna.gov.cz/data/csv/CIS_UCJED.CSV" table_name = "ucetni_jednotky" # XSD nema vsechno, dafuq @@ -61,16 +61,18 @@ def main(outdir: str, partial: bool = False): target_file = os.path.join(outdir, f"{table_name}.csv") request = Request(url, headers={"Accept-Encoding": "gzip"}) - with urlopen(request, timeout=60) as f, open( - target_file, "w", encoding="utf8" - ) as fw: + with ( + urlopen(request, timeout=60) as f, + open(target_file, "w", encoding="utf8") as fw, + ): if f.info().get("Content-Encoding") == "gzip": f = gzip.GzipFile(fileobj=f) cw = csv.DictWriter(fw, fieldnames=cols, lineterminator="\n") cw.writeheader() - et = lxml.etree.iterparse(f) + tr = TextIOWrapper(f, encoding="utf-8") + cr = csv.DictReader(tr, delimiter=";") for num, (action, element) in tqdm(enumerate(et)): if partial and num > 4e5: