From a549d8b6f6321115209070f4503ab50ef725345d Mon Sep 17 00:00:00 2001 From: aguilerapy <48607824+aguilerapy@users.noreply.github.com> Date: Thu, 17 Dec 2020 13:01:38 -0300 Subject: [PATCH 1/7] Update main README --- README.md | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 340d772..6122927 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,18 @@ La plataforma de Control Ciudadano nace con el objetivo de poner a disposición de la ciudadanía un portal de datos abiertos para controlar los recursos -públicos. Los datos que se encuentran disponibles en la actualidad son: las -compras realizadas durante la pandemia de la COVID-19, las declaraciones -juradas subidas a la fecha en la web de la Contraloría General de la República, -datos sobre autoridades electas, relación con proveedores del Estado que -cuenten con los mismos datos de contacto, las facturas de ANDE y ESSAP. La -plataforma pretende contar con más datos que ayuden a ejercer el derecho de +públicos. Los datos que se encuentran disponibles en la actualidad son: + +* Las compras realizadas durante la pandemia de la COVID-19 +* Las declaraciones juradas subidas a la fecha en la web de la Contraloría General de la República +* Datos sobre autoridades electas +* Relación con proveedores del Estado que cuenten con los mismos datos de contacto +* Las facturas de ANDE y ESSAP + +La plataforma pretende contar con más datos que ayuden a ejercer el derecho de “contralores ciudadanos”, convirtiéndose en una herramienta colaborativa, apoyada por la comunidad de organizaciones y personas que deseen sumar sus -esfuerzos a la sostenibilidad de un portal útil  – para promover el acceso a la +esfuerzos a la sostenibilidad de un portal útil – para promover el acceso a la información pública y contribuir al desarrollo de un país transparente, libre de corrupción e impunidad. @@ -35,14 +38,14 @@ repositorio central. En este repositorio se almacena la información de: * **API**: Servicios web utilizados para mostrar información dentro del portal * **ETL**: Conjunto de ETL (programas para extracción transformación y carga) que nutren la base de datos del portal -* **infra**: Conjunto de archivos que describen la infraestructura requerida +* **Infra**: Conjunto de archivos que describen la infraestructura requerida para el portal, incluyendo servidores de base de datos, de archivos, de aplicación, etc. # Apoyo La sección de análisis de compras covid con técnicas de Inteligencia Artificial -fue posible gracias al apoyo de la Open Contracting Partnership. +fue posible gracias al apoyo de la [Open Contracting Partnership](https://www.open-contracting.org/es/). # Términos y condiciones @@ -89,3 +92,19 @@ condiciones de la página externa. El portal se encuentra abierto a contribuciones de toda la comunidad. Para colaborar primero se debe crear una [incidencia](https://github.com/InstIDEA/controlciudadano/issues/new), y en la misma describir la propuesta. + + +# Cómo instalar? + +Clone el repositorio y accede al nuevo directorio: + +```sh +git clone https://github.com/InstIDEA/controlciudadano +cd controlciudadano +``` + +Para levantar localmente el portal web puede referirse al siguiente +[README](https://github.com/InstIDEA/controlciudadano/blob/master/portal/README.md). + +Para levantar localmente el api rest puede referirse al siguiente +[README](https://github.com/InstIDEA/controlciudadano/blob/master/api/README.md). From 3c9dc99d881335bcfaa37317eb4e468edf6f2d35 Mon Sep 17 00:00:00 2001 From: aguilerapy <48607824+aguilerapy@users.noreply.github.com> Date: Thu, 17 Dec 2020 13:02:41 -0300 Subject: [PATCH 2/7] Add api README --- api/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 api/README.md diff --git a/api/README.md b/api/README.md new file mode 100644 index 0000000..6ea9e06 --- /dev/null +++ b/api/README.md @@ -0,0 +1,42 @@ +# API + +Estos servicios web se utilizan para realizar búsquedas en la base de datos y mostrar información dentro del portal. + +## Instalación +### Requerimientos +1. npm 6+ o mayor + +### Instalación +Accede al directorio de la api: + +```sh +cd api +``` + +Para instalar los paquetes y dependencias, ejecute: + +```sh +npm install +``` + +### Configuración +Establezca las siguientes variables de entorno: +* PGUSER: usuario de la base de datos. +* PGHOST: host de la base de datos. +* PGPASSWORD: contraseña de acceso a la base de datos. +* PGDATABASE: la instancia de la base de datos. +* PGPORT: puerto de la base de datos (25432 por defecto). +* PORT: puerto del servicio (3001 por defecto). + + +### Levantar el servicio +Para iniciar el servicio, ejecute: + +```sh +npm start +``` + +Alternativamente, para levantar el servicio y configurar las variables en un simple comando, puede ejecutar: +```sh +PGUSER=usuario PGHOST=host PGPASSWORD=pass PGDATABASE=db PGPORT=25432 PORT=3001 npm start +``` From 601a9c2babfd498706a539d4d1276973af932f64 Mon Sep 17 00:00:00 2001 From: aguilerapy <48607824+aguilerapy@users.noreply.github.com> Date: Thu, 17 Dec 2020 13:03:54 -0300 Subject: [PATCH 3/7] Update portal README --- portal/README.md | 92 ++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/portal/README.md b/portal/README.md index 9c40dcd..ecfa29b 100644 --- a/portal/README.md +++ b/portal/README.md @@ -1,68 +1,68 @@ -This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). +# Portal -## Available Scripts +Este portal se creó con [Create React App](https://github.com/facebook/create-react-app). -In the project directory, you can run: +## Instalación +### Requerimientos +1. Yarn 1.22+ o mayor -### `yarn start` +### Instalación +Accede al directorio del portal: -Runs the app in the development mode.
-Open [http://localhost:3000](http://localhost:3000) to view it in the browser. +```sh +cd portal +``` -The page will reload if you make edits.
-You will also see any lint errors in the console. +Para instalar los paquetes y dependencias, ejecute: -### `yarn test` - -Launches the test runner in the interactive watch mode.
-See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. - -### `yarn build` +```sh +yarn install +``` -Builds the app for production to the `build` folder.
-It correctly bundles React in production mode and optimizes the build for the best performance. - -The build is minified and the filenames include the hashes.
-Your app is ready to be deployed! - -See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. - -### `yarn eject` +Con el comando `install` se instala los componentes de la arquitectura yarn que consiste en los paquetes virtuales y el +árbol de dependencias. Más información en la documentación de [Yarn](https://yarnpkg.com/advanced/architecture). -**Note: this is a one-way operation. Once you `eject`, you can’t go back!** +### Levantar el proyecto +En el directorio del proyecto, puede ejecutar la aplicación en el modo de desarrollo.: -If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. +```sh +yarn start +``` -Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own. +Abra [http://localhost:3000](http://localhost:3000) para verlo en el navegador.
+La página se recargará si realiza modificaciones.
+También verá cualquier error en la consola. -You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it. +## Scripts disponibles -## Learn More - -You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). - -To learn React, check out the [React documentation](https://reactjs.org/). - -### Code Splitting - -This section has moved here: https://facebook.github.io/create-react-app/docs/code-splitting +### `yarn test` -### Analyzing the Bundle Size +Launches the test runner in the interactive watch mode.
+Inicia el corredor de pruebas en el modo interactivo.
+Ve la sección de [running tests](https://facebook.github.io/create-react-app/docs/running-tests) para más +información. -This section has moved here: https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size +### `yarn build` -### Making a Progressive Web App +Compila la aplicación para producción en la carpeta `build`.
+Agrupa correctamente React en el modo de producción y optimiza la compilación para obtener el mejor rendimiento. +La aplicación esta lista para la implementación.Compila la aplicación para producción en la carpeta `build`.
+Ve la sección de [deployment](https://facebook.github.io/create-react-app/docs/deployment) para más información. -This section has moved here: https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app +### `yarn eject` -### Advanced Configuration +**Nota: esta es una operación unidireccional. Una vez ejecutado `eject`, no podrá volver atrás!** -This section has moved here: https://facebook.github.io/create-react-app/docs/advanced-configuration +Si no está satisfecho con la herramienta de compilación y las opciones de configuración, puede ejecutar `eject` en +cualquier momento. Este comando eliminará la dependencia de compilación única de su proyecto. -### Deployment +No es necesario que ejecute nunca `eject`. El conjunto de funciones seleccionadas es adecuado para implementaciones +pequeñas y medianas, y no debe sentirse obligado a utilizar esta función. Sin embargo, entendemos que esta herramienta +no sería útil si no pudiera personalizarla cuando esté listo para usarla. -This section has moved here: https://facebook.github.io/create-react-app/docs/deployment +## Aprende más -### `yarn build` fails to minify +Puede aprender más en la documentación de +[Create React App](https://facebook.github.io/create-react-app/docs/getting-started). -This section has moved here: https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify +Para aprender React, revisa la documentación de [React](https://es.reactjs.org/). From bf2aae8af2a8acd547d5667d96957341fe38a221 Mon Sep 17 00:00:00 2001 From: aguilerapy <48607824+aguilerapy@users.noreply.github.com> Date: Fri, 18 Dec 2020 18:01:57 -0300 Subject: [PATCH 4/7] Fix title (#143) --- portal/src/pages/OCDSAwardItemsPage.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portal/src/pages/OCDSAwardItemsPage.tsx b/portal/src/pages/OCDSAwardItemsPage.tsx index e3c9ea5..f622722 100644 --- a/portal/src/pages/OCDSAwardItemsPage.tsx +++ b/portal/src/pages/OCDSAwardItemsPage.tsx @@ -47,7 +47,7 @@ export function OCDSAwardItemsPage() { style={{border: '1 px solid rgb(235, 237, 240)'}} onBack={() => history.push('/')} backIcon={null} - title="¿Qué se compró?"> + title="¿Se compró más caro?"> Ránking de items con posibles sobrecostos, comparados con sus precios antes de la pandemia. From b4f8d3575c4c2aaceb325cb3a6576f028d4e70e4 Mon Sep 17 00:00:00 2001 From: Luxter77 <19197331+Luxter77@users.noreply.github.com> Date: Mon, 21 Dec 2020 21:01:46 -0300 Subject: [PATCH 5/7] added .vscode to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8f643f5..d42eb69 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ sources/**/*.sql sources/**/*.json sources/**/*.png sources/**/*.jpg +.vscode/** \ No newline at end of file From f1006d28a8da314dbc8dbdd1da3d0eaae5873f1f Mon Sep 17 00:00:00 2001 From: Luxter77 <19197331+Luxter77@users.noreply.github.com> Date: Tue, 29 Dec 2020 15:16:39 -0300 Subject: [PATCH 6/7] various changes: - added tsje_elected_with_stats - added various helper scripts for uploading various tsje related things to the database - added dac to update tsje_elected material view --- migrations/R_Elected_with_document.sql | 146 ++++++++++------- migrations/R_Elected_with_stats.sql | 26 +++ .../python/airflow/dags/contralory_get_pdf.py | 2 + scripts/python/airflow/dags/tsje_elected.py | 48 ++++++ .../airflow/dags/tsje_elected/__init__.py | 109 +++++++++++++ scripts/python/tsje/tsje_elected_to_db.py | 101 ++++++++++++ .../python/tsje/tsje_elected_with_stats.py | 150 ++++++++++++++++++ scripts/python/tsje/tsje_local_to_db.py | 102 ++++++++++++ 8 files changed, 629 insertions(+), 55 deletions(-) create mode 100644 migrations/R_Elected_with_stats.sql create mode 100644 scripts/python/airflow/dags/tsje_elected.py create mode 100644 scripts/python/airflow/dags/tsje_elected/__init__.py create mode 100755 scripts/python/tsje/tsje_elected_to_db.py create mode 100755 scripts/python/tsje/tsje_elected_with_stats.py create mode 100755 scripts/python/tsje/tsje_local_to_db.py diff --git a/migrations/R_Elected_with_document.sql b/migrations/R_Elected_with_document.sql index 96577dc..166bd52 100644 --- a/migrations/R_Elected_with_document.sql +++ b/migrations/R_Elected_with_document.sql @@ -1,56 +1,92 @@ DROP MATERIALIZED VIEW IF EXISTS analysis.tsje_elected; -CREATE MATERIALIZED VIEW analysis.tsje_elected AS -( -WITH authorities AS ( - SELECT DISTINCT CONCAT(REPLACE(e.apellido, '�', 'Ñ'), ', ', REPLACE(e.nombre, '�', 'Ñ')) AS name, - e.apellido as apellido, - e.nombre as nombre - FROM staging.tsje_elected e -), - - by_hand (nombre, apellido, cedula) as ( - values ('JUANA', 'PAEZ DE ESCURRA', '1790762'), - ('HERIBERTO', 'SILVERA ACOSTA', '311107'), - ('GALEANO LUGO', 'ELADIO', '480243'), - ('MARIA BLANCA LILA', 'MIGNARRO DE GONZALEZ', '384004'), - ('EVARISTO', 'MOREL ROJAS', '940792') - ), - - with_by_hand AS ( - SELECT DISTINCT ON (a.name) a.name, a.apellido, a.nombre, h.cedula - FROM authorities a - LEFT JOIN by_hand h ON h.nombre = a.nombre AND h.apellido = a.apellido - ), - - with_set_simple AS ( - SELECT DISTINCT ON (a.name) a.name, a.apellido, a.nombre, COALESCE(a.cedula, set.ruc) as cedula - FROM with_by_hand a - LEFT JOIN staging.set set ON a.cedula IS NULL AND set.nombre = a.name - ), - - with_set_complex AS ( - SELECT DISTINCT ON (a.name) a.name, a.apellido, a.nombre, COALESCE(a.cedula, set.ruc) as cedula - FROM with_set_simple a - LEFT JOIN staging.set set - ON a.cedula IS NULL AND substr(a.name, 0, 20) = substr(set.nombre, 0, 20) - --- ON SIMILARITY(set.nombre, a.name) > 0.9 - ), - - with_sfp_simple AS ( - SELECT DISTINCT ON (a.name) a.name, a.apellido, a.nombre, COALESCE(a.cedula, sfp.documento) as cedula - FROM with_set_complex a - LEFT JOIN staging.sfp sfp - ON a.cedula IS NULL - AND sfp.nombres = a.nombre AND sfp.apellidos = a.apellido - ), - - - final AS ( - SELECT a.* - FROM with_sfp_simple a - ) -SELECT a.cedula, elected.* -FROM final a - LEFT JOIN staging.tsje_elected elected ON elected.nombre = a.nombre AND elected.apellido = a.apellido - ); +CREATE MATERIALIZED VIEW analysis.tsje_elected AS ( + WITH authorities AS ( + SELECT DISTINCT CONCAT( + REPLACE(e.apellido, '�', 'Ñ'), + ', ', + REPLACE(e.nombre, '�', 'Ñ') + ) AS name, + e.apellido as apellido, + e.nombre as nombre + FROM staging.tsje_elected e + ), + by_hand (nombre, apellido, cedula) as ( + values ('JUANA', 'PAEZ DE ESCURRA', '1790762'), + ('HERIBERTO', 'SILVERA ACOSTA', '311107'), + ('GALEANO LUGO', 'ELADIO', '480243'), + ( + 'MARIA BLANCA LILA', + 'MIGNARRO DE GONZALEZ', + '384004' + ), + ('EVARISTO', 'MOREL ROJAS', '940792') + ), + with_by_hand AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + h.cedula + FROM authorities a + LEFT JOIN by_hand h ON h.nombre = a.nombre + AND h.apellido = a.apellido + ), + with_set_simple AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + COALESCE( + a.cedula, + set.ruc + ) as cedula + FROM with_by_hand a + LEFT JOIN staging.set + set ON a.cedula IS NULL + AND + set.nombre = a.name + ), + with_set_complex AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + COALESCE( + a.cedula, + set.ruc + ) as cedula + FROM with_set_simple a + LEFT JOIN staging.set + set ON a.cedula IS NULL + AND substr(a.name, 0, 20) = substr( + set.nombre, + 0, + 20 + ) + ), + with_sfp_simple AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + COALESCE(a.cedula, sfp.documento) as cedula + FROM with_set_complex a + LEFT JOIN staging.sfp sfp ON a.cedula IS NULL + AND sfp.nombres = a.nombre + AND sfp.apellidos = a.apellido + ), + with_a_quien_elegimos_simple AS ( + SELECT DISTINCT ON (a.nombre) a.nombre, + a.apellido, + COALESCE(a.cedula, aqes.identifier) as cedula + FROM with_sfp_simple a + LEFT JOIN staging.a_quien_elegimos aqes ON a.cedula IS NULL + AND aqes.name = a.nombre + AND aqes.lastname = a.apellido + ), + final AS ( + SELECT a.* + FROM with_a_quien_elegimos_simple a + ) + SELECT a.cedula, + elected.* + FROM final a + LEFT JOIN staging.tsje_elected elected ON elected.nombre = a.nombre + AND elected.apellido = a.apellido +); \ No newline at end of file diff --git a/migrations/R_Elected_with_stats.sql b/migrations/R_Elected_with_stats.sql new file mode 100644 index 0000000..b723f7a --- /dev/null +++ b/migrations/R_Elected_with_stats.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS staging.tsje_elected_with_stats CASCADE; +CREATE TABLE staging.tsje_elected_with_stats +( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 ), + year integer NOT NULL, + nombre text, + apellido text, + nombre_completo text, + edad integer, + sexo "char", + cand_desc text, + candidatura integer, + departamento integer, + dep_desc text, + desc_tit_sup text, + dis_desc text, + distrito integer, + lista integer, + orden integer, + siglas_lista text, + tit_sup integer, + PRIMARY KEY (id) +); + +ALTER TABLE staging.tsje_elected_with_stats + OWNER to postgres; \ No newline at end of file diff --git a/scripts/python/airflow/dags/contralory_get_pdf.py b/scripts/python/airflow/dags/contralory_get_pdf.py index 34c265b..36c6a0f 100644 --- a/scripts/python/airflow/dags/contralory_get_pdf.py +++ b/scripts/python/airflow/dags/contralory_get_pdf.py @@ -33,6 +33,7 @@ "target_dir": target_dir, "error__dir": error__dir, "contraloria_py": "https://djbpublico.contraloria.gov.py/", + "max_active_runs": 1, }, } @@ -42,6 +43,7 @@ description="Process files from https://djbpublico.contraloria.gov.py/", start_date=days_ago(2), schedule_interval=timedelta(weeks=1), + ) with dag: diff --git a/scripts/python/airflow/dags/tsje_elected.py b/scripts/python/airflow/dags/tsje_elected.py new file mode 100644 index 0000000..89444b8 --- /dev/null +++ b/scripts/python/airflow/dags/tsje_elected.py @@ -0,0 +1,48 @@ +from datetime import timedelta + +from airflow import DAG + +from airflow.utils.dates import days_ago +from airflow.operators.dummy_operator import DummyOperator +from airflow.operators.python_operator import PythonOperator + +from tsje_elected import SQL_QUERY_CREATE_VIEW_NOT_NULL +from tsje_elected import SQL_QUERT_CREATE_VIEW_NULL + +default_args = { + 'owner': 'airflow', + 'depends_on_past': False, + 'email': ['arturovolpe@gmail.com'], + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 5, + 'retry_delay': timedelta(minutes=5), +} + +dag = DAG( + dag_id='consolidata_data_on_tsje_elected', + default_args=default_args, + description='Consolidates other data on tsje_elected tables', + start_date=days_ago(2), + schedule_interval=timedelta(weeks=1), +) + +start = DummyOperator(task_id='start', dag=dag) + +primary_view = PythonOperator( + task_id="create_view", + python_callable=SQL_QUERY_CREATE_VIEW_NOT_NULL, + dag=dag, +) + +secondary_view = PythonOperator( + task_id="create_missing_view", + python_callable=SQL_QUERT_CREATE_VIEW_NULL, + dag=dag, +) + +start >> primary_view >> secondary_view + +if __name__ == '__main__': + dag.clear(reset_dag_runs=True) + dag.run() diff --git a/scripts/python/airflow/dags/tsje_elected/__init__.py b/scripts/python/airflow/dags/tsje_elected/__init__.py new file mode 100644 index 0000000..3ce50e8 --- /dev/null +++ b/scripts/python/airflow/dags/tsje_elected/__init__.py @@ -0,0 +1,109 @@ +from airflow.hooks.postgres_hook import PostgresHook + +def execute(query) -> None: + db_hook = PostgresHook(postgres_conn_id="postgres_default") + db_conn = db_hook.get_conn() + db_cursor = db_conn.cursor() + db_cursor.execute(query) + db_conn.commit() + +SQL_QUERY_CREATE_VIEW_BASE = """ +DROP MATERIALIZED VIEW IF EXISTS analysis.tsje_elected; +CREATE MATERIALIZED VIEW analysis.tsje_elected AS ( + WITH authorities AS ( + SELECT DISTINCT CONCAT( + REPLACE(e.apellido, '�', 'Ñ'), + ', ', + REPLACE(e.nombre, '�', 'Ñ') + ) AS name, + e.apellido as apellido, + e.nombre as nombre + FROM staging.tsje_elected e + ), + by_hand (nombre, apellido, cedula) as ( + values ('JUANA', 'PAEZ DE ESCURRA', '1790762'), + ('HERIBERTO', 'SILVERA ACOSTA', '311107'), + ('GALEANO LUGO', 'ELADIO', '480243'), + ( + 'MARIA BLANCA LILA', + 'MIGNARRO DE GONZALEZ', + '384004' + ), + ('EVARISTO', 'MOREL ROJAS', '940792') + ), + with_by_hand AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + h.cedula + FROM authorities a + LEFT JOIN by_hand h ON h.nombre = a.nombre + AND h.apellido = a.apellido + ), + with_set_simple AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + COALESCE( + a.cedula, + set.ruc + ) as cedula + FROM with_by_hand a + LEFT JOIN staging.set + set ON a.cedula IS NULL + AND + set.nombre = a.name + ), + with_set_complex AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + COALESCE( + a.cedula, + set.ruc + ) as cedula + FROM with_set_simple a + LEFT JOIN staging.set + set ON a.cedula IS NULL + AND substr(a.name, 0, 20) = substr( + set.nombre, + 0, + 20 + ) + ), + with_sfp_simple AS ( + SELECT DISTINCT ON (a.name) a.name, + a.apellido, + a.nombre, + COALESCE(a.cedula, sfp.documento) as cedula + FROM with_set_complex a + LEFT JOIN staging.sfp sfp ON a.cedula IS NULL + AND sfp.nombres = a.nombre + AND sfp.apellidos = a.apellido + ), + with_a_quien_elegimos_simple AS ( + SELECT DISTINCT ON (a.nombre) a.nombre, + a.apellido, + COALESCE(a.cedula, aqes.identifier) as cedula + FROM with_sfp_simple a + LEFT JOIN staging.a_quien_elegimos aqes ON a.cedula IS NULL + AND aqes.name = a.nombre + AND aqes.lastname = a.apellido + ), + final AS ( + SELECT a.* + FROM with_a_quien_elegimos_simple a + ) + SELECT a.cedula, + elected.* + FROM final a + LEFT JOIN staging.tsje_elected elected ON elected.nombre = a.nombre + AND elected.apellido = a.apellido""" + +def SQL_QUERY_CREATE_VIEW_NOT_NULL(): + query = SQL_QUERY_CREATE_VIEW_BASE + """WHERE "cedula" IS NOT NULL);""" + execute(query) + +def SQL_QUERT_CREATE_VIEW_NULL(): + query = SQL_QUERY_CREATE_VIEW_BASE + """ WHERE "cedula" IS NULL );""" + execute(query) diff --git a/scripts/python/tsje/tsje_elected_to_db.py b/scripts/python/tsje/tsje_elected_to_db.py new file mode 100755 index 0000000..628b9f9 --- /dev/null +++ b/scripts/python/tsje/tsje_elected_to_db.py @@ -0,0 +1,101 @@ +from sqlalchemy import create_engine + +import csv +import os + + +db_uri = "postgresql://USER:PASS@HOST:PORT/DB" +tsje_path = os.path.join( + "somewhere", "to", "EleccionesGeneralesElectos_1998_a_2018_140319.csv" +) + +raise (Exception("You should set tsje_path and db_uri value")) + +engine = create_engine(db_uri) + +with open(tsje_path, "r", encoding="iso-8859-1") as f: + tsje = [ + {k: (str(v) if bool(v) else None) for k, v in row.items()} + for row in csv.DictReader(f, skipinitialspace=True, delimiter=",") + ] + +create_table = """ +DROP TABLE IF EXISTS staging.tsje_elected CASCADE; +CREATE TABLE staging.tsje_elected ( + id integer NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 ), + ano bigint, + codeleccion bigint, + departamento bigint, + dep_desc text, + candidatura bigint, + cand_desc text, + lista bigint, + nombre_lista text, + siglas_lista text, + total_votos bigint, + tit_sup bigint, + desc_tit_sup text, + orden_lista bigint, + orden_dhont bigint, + nro_div bigint, + cociente bigint, + apellido text, + nombre text, + sexo text, + nacionalidad text, + edad bigint, + CONSTRAINT tsje_elected_pkey PRIMARY KEY (id) +); + +ALTER TABLE staging.tsje_elected OWNER to postgres; +""" + +query = ( + """ +INSERT INTO staging.tsje_elected( + ano, codeleccion, departamento, + dep_desc, candidatura, cand_desc, + lista, nombre_lista, siglas_lista, + total_votos, tit_sup, desc_tit_sup, + orden_lista, orden_dhont, nro_div, + cociente, apellido, nombre, sexo, + nacionalidad, edad + ) VALUES (""" + + "%s, " * 20 + + """%s);""" +) + +data = [] +for autority in tsje: + data.append( + [ + int(autority["año"]), + int(autority["codeleccion"]), + int(autority["departamento"]), + autority["dep_desc"], + int(autority["candidatura"]), + autority["cand_desc"], + int(autority["lista"]), + autority["nombre_lista"], + autority["siglas_lista"], + int(autority["total_votos"]), + int(autority["tit_sup"]), + autority["desc_tit_sup"], + int(autority["orden_lista"]), + int(autority["orden_dhont"]), + int(autority["nro_div"]), + int(autority["cociente"]), + autority["apellido"], + autority["nombre"], + autority["sexo"], + autority["nacionalidad"], + int(autority["edad"]), + ] + ) + +connection = engine.raw_connection() +cursor = connection.cursor() +cursor.execute(create_table) +cursor.executemany(query, data) +connection.commit() +connection.close() diff --git a/scripts/python/tsje/tsje_elected_with_stats.py b/scripts/python/tsje/tsje_elected_with_stats.py new file mode 100755 index 0000000..e5108ce --- /dev/null +++ b/scripts/python/tsje/tsje_elected_with_stats.py @@ -0,0 +1,150 @@ +from sqlalchemy import create_engine +from tqdm.auto import tqdm +from glob import glob + +import csv +import os + +# GOT FROM: http://datosabiertos.tsje.gov.py/group/elecciones-municipales + +db_uri = "postgresql://USER:PASS@HOST:PORT/DB" +tsje_path = os.path.join("somewhere", "where", "*.csv") + +raise (Exception("You should set tsje_path and db_uri value")) + +engine = create_engine(db_uri) + +tsje, data = [], [] + +for fn in tqdm(glob(tsje_path), desc="Loading Files"): + + if "2-1-" in fn: + year = 2010 + elif "2-2-" in fn: + year = 2006 + elif "2-3-" in fn: + year = 2001 + elif "2-4-" in fn: + year = 2015 + elif "2-5-" in fn: + year = 1996 + else: + raise (Exception("This should not happen")) + + with open(fn, "r", encoding="iso-8859-1") as f: + tsje.append( + ( + [ + {k: (str(v) if bool(v) else None) for k, v in row.items()} + for row in csv.DictReader(f, skipinitialspace=True, delimiter=",") + ], + year, + ) + ) + +create_table = """ +DROP TABLE IF EXISTS staging.tsje_elected_with_stats CASCADE; +CREATE TABLE staging.tsje_elected_with_stats +( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 ), + year integer NOT NULL, + nombre text, + apellido text, + nombre_completo text, + edad integer, + sexo "char", + cand_desc text, + candidatura integer, + departamento integer, + dep_desc text, + desc_tit_sup text, + dis_desc text, + distrito integer, + lista integer, + orden integer, + siglas_lista text, + tit_sup integer, + PRIMARY KEY (id) +); + +ALTER TABLE staging.tsje_elected_with_stats + OWNER to postgres; +""" + +query = ( + """ + INSERT INTO staging.tsje_elected_with_stats( + "year", "nombre", "apellido", + "nombre_completo", "edad", "sexo", + "cand_desc", "candidatura", "departamento", + "dep_desc", "desc_tit_sup", "dis_desc", + "distrito", "lista", "orden", "siglas_lista", "tit_sup" + ) VALUES (""" + + "%s, " * 16 + + """%s); +""" +) + + +def proc_name(nom, ape) -> tuple: + if bool(ape) and bool(nom): + return ( + str(nom), + str(ape), + f"{str(nom)} {str(ape)}", + ) + elif bool(nom) and not (bool(ape)): + return ( + None, + None, + str(nom), + ) + + +for n, x in tqdm(enumerate(tsje), desc="Generating Querys", leave=True): + for i, y in enumerate(x[0]): + + if None in (x[0][i]["nombre"], x[0][i]["sexo"]): + continue + + if not (x[0][i].get("orden") is None): + orden = int(x[0][i].get("orden")) + elif not (x[0][i].get("orden_dhont") is None): + orden = int(x[0][i].get("orden_dhont")) + else: + orden = None + + data.append( + ( + int(x[1]), + *proc_name(x[0][i].get("nombre"), x[0][i].get("apellido"),), + int(x[0][i].get("edad")) if x[0][i].get("edad") else None, + str(x[0][i].get("sexo")) if x[0][i].get("sexo") else None, + str(x[0][i].get("cand_desc")) if x[0][i].get("cand_desc") else None, + int(x[0][i].get("candidatura")) if x[0][i].get("candidatura") else None, + int(x[0][i].get("departamento")) + if x[0][i].get("departamento") + else None, + str(x[0][i].get("dep_desc")) if x[0][i].get("dep_desc") else None, + str(x[0][i].get("desc_tit_sup")) + if x[0][i].get("desc_tit_sup") + else None, + str(x[0][i].get("dis_desc")) if x[0][i].get("dis_desc") else None, + int(x[0][i].get("distrito")) if x[0][i].get("distrito") else None, + int(x[0][i].get("lista")) if x[0][i].get("lista") else None, + orden, + str(x[0][i].get("siglas_lista")) + if x[0][i].get("siglas_lista") + else None, + int(x[0][i].get("tit_sup")) if x[0][i].get("tit_sup") else None, + ) + ) + +print("Uploading to db") + +connection = engine.raw_connection() +cursor = connection.cursor() +cursor.execute(create_table) +cursor.executemany(query, data) +connection.commit() +connection.close() diff --git a/scripts/python/tsje/tsje_local_to_db.py b/scripts/python/tsje/tsje_local_to_db.py new file mode 100755 index 0000000..0ce746a --- /dev/null +++ b/scripts/python/tsje/tsje_local_to_db.py @@ -0,0 +1,102 @@ +from sqlalchemy import create_engine + +import csv +import os + + +db_uri = "postgresql://USER:PASS@HOST:PORT/DB" + +tsje_path = os.path.join( + "somwhere", "to", "resultados-1996-2018-municipales-y-generales.csv" +) +raise (Exception("You should set tsje_path and db_uri value")) + +engine = create_engine(db_uri) + +with open(tsje_path, "r", encoding="iso-8859-1") as f: + tsje = [ + {k: (str(v) if bool(v) else None) for k, v in row.items()} + for row in csv.DictReader(f, skipinitialspace=True, delimiter=";") + ] + +create_table = """ +DROP TABLE IF EXISTS staging.tsje_local_elected CASCADE; +CREATE TABLE staging.tsje_local_elected +( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 ), + ano text, + tipo_eleccion text, + dep integer, + depdes text, + dis integer, + disdes text, + zon integer, + zondes text, + loc integer, + locdes text, + candidatura integer, + cand_desc text, + lista integer, + siglas_lista text, + nombre_lista text, + votos integer, + nulos integer, + blancos integer, + total_votos integer, + PRIMARY KEY (id) +); + +ALTER TABLE staging.tsje_local_elected + OWNER to postgres; +""" + +query = ( + """ +INSERT INTO staging.tsje_local_elected( + ano, tipo_eleccion, dep, depdes, + dis, disdes, zon, zondes, loc, + locdes, candidatura, cand_desc, + lista, siglas_lista, nombre_lista, + votos, nulos, blancos, total_votos +) VALUES (""" + + "%s, " * 18 + + """%s);""" +) +data = [] + +for autority in tsje: + if autority["tipo_eleccion"] == "municipales": + data.append( + [ + autority["año"], + autority["tipo_eleccion"], + int(autority["dep"]) if autority["dep"] else None, + autority["depdes"], + int(autority["dis"]) if autority["dis"] else None, + autority["disdes"], + int(autority["zon"]) if autority["zon"] else None, + autority["zondes"], + int(autority["loc"]) if autority["loc"] else None, + autority["locdes"], + int(autority["candidatura"]) + if autority["candidatura"] + else None, + autority["cand_desc"], + int(autority["lista"]) if autority["lista"] else None, + autority["nombre_lista"], + autority["siglas_lista"], + int(autority["votos"]) if autority["votos"] else None, + int(autority["nulos"]) if autority["nulos"] else None, + int(autority["blancos"]) if autority["blancos"] else None, + int(autority["total_votos"]) + if autority["total_votos"] + else None, + ] + ) + +connection = engine.raw_connection() +cursor = connection.cursor() +cursor.execute(create_table) +cursor.executemany(query, data) +connection.commit() +connection.close() From 0d2a05d73ebab4114f7cb9716f445e9e35735e5b Mon Sep 17 00:00:00 2001 From: Luxter77 <19197331+Luxter77@users.noreply.github.com> Date: Tue, 29 Dec 2020 16:13:15 -0300 Subject: [PATCH 7/7] typo --- scripts/python/airflow/dags/tsje_elected/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/python/airflow/dags/tsje_elected/__init__.py b/scripts/python/airflow/dags/tsje_elected/__init__.py index 3ce50e8..42e0994 100644 --- a/scripts/python/airflow/dags/tsje_elected/__init__.py +++ b/scripts/python/airflow/dags/tsje_elected/__init__.py @@ -1,6 +1,6 @@ from airflow.hooks.postgres_hook import PostgresHook -def execute(query) -> None: +def execute(query: str) -> None: db_hook = PostgresHook(postgres_conn_id="postgres_default") db_conn = db_hook.get_conn() db_cursor = db_conn.cursor()