From 2dbb69568e764d20a3b86fd6e9ee145100c1060b Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Wed, 18 May 2022 16:40:13 +0200 Subject: [PATCH 1/3] [misc] rejstrik trestu PO --- data/misc/README.md | 5 +++++ data/misc/main.py | 20 ++++++++++++++++++++ data/misc/schema.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 data/misc/README.md create mode 100644 data/misc/main.py create mode 100644 data/misc/schema.py diff --git a/data/misc/README.md b/data/misc/README.md new file mode 100644 index 00000000..c5eb8783 --- /dev/null +++ b/data/misc/README.md @@ -0,0 +1,5 @@ +# Misc + +Náhodné další datasety, které jsou a) malé, b) jednotabulkové + +- [Rejstřík trestů právnických osob](https://eservice-po.rejtr.justice.cz/public/odsouzeni;jsessionid=E925E6A2467577AB9730F7F9BA96D3E9.pocluster1?0) diff --git a/data/misc/main.py b/data/misc/main.py new file mode 100644 index 00000000..2c25ff5a --- /dev/null +++ b/data/misc/main.py @@ -0,0 +1,20 @@ +import csv +import io +import os +from urllib.request import urlopen + +URL_ODSOUZENE_PO = "https://eservice-po.rejtr.justice.cz/public/odsouzeni_csv" + + +def main(outdir: str, partial: bool = False): + # urlretrieve(URL_ODSOUZENE_PO, os.path.join(outdir, "odsouzene_po.csv")) + with urlopen(URL_ODSOUZENE_PO) as r, open( + os.path.join(outdir, "odsouzene_po.csv"), "w", encoding="utf-8" + ) as fw: + cw = csv.writer(fw, lineterminator="\n") + for row in csv.reader(io.TextIOWrapper(r, encoding="utf-8")): + cw.writerow(row) + + +if __name__ == "__main__": + main(".") diff --git a/data/misc/schema.py b/data/misc/schema.py new file mode 100644 index 00000000..e7362e26 --- /dev/null +++ b/data/misc/schema.py @@ -0,0 +1,28 @@ +from sqlalchemy import Column, MetaData, Table +from sqlalchemy.sql.sqltypes import Integer, Text + +meta = MetaData() +"IČO", "Obchodní jméno", "Sídlo", "Stát", "Odsouzeni" + + +schema = [ + Table( + "odsouzene_po", + meta, + Column("ico", Integer, nullable=True, index=True), + Column("obchodni_jmeno", Text, nullable=False), + Column("sidlo", Text, nullable=True), + Column("stat", Text, nullable=False), + Column("odsouzeni", Text, nullable=False), + ) +] + + +if __name__ == "__main__": + from sqlalchemy import create_engine + from sqlalchemy.schema import CreateTable + + engine = create_engine("sqlite:///:memory:") + for table in schema: + print(f"-- {table.name} as created in SQLite") + print(CreateTable(table).compile(engine)) From a92f3af502d64c58d80b3001d159460d425d60e3 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Wed, 18 May 2022 16:50:03 +0200 Subject: [PATCH 2/3] nit --- data/misc/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data/misc/main.py b/data/misc/main.py index 2c25ff5a..55bd92c6 100644 --- a/data/misc/main.py +++ b/data/misc/main.py @@ -7,7 +7,6 @@ def main(outdir: str, partial: bool = False): - # urlretrieve(URL_ODSOUZENE_PO, os.path.join(outdir, "odsouzene_po.csv")) with urlopen(URL_ODSOUZENE_PO) as r, open( os.path.join(outdir, "odsouzene_po.csv"), "w", encoding="utf-8" ) as fw: From a1194e2f7a3ca0893170d4039283468e088af6c3 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Wed, 18 May 2022 16:52:07 +0200 Subject: [PATCH 3/3] ... --- data/misc/schema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data/misc/schema.py b/data/misc/schema.py index e7362e26..4947f338 100644 --- a/data/misc/schema.py +++ b/data/misc/schema.py @@ -2,7 +2,6 @@ from sqlalchemy.sql.sqltypes import Integer, Text meta = MetaData() -"IČO", "Obchodní jméno", "Sídlo", "Stát", "Odsouzeni" schema = [