diff --git a/experimental/dm/marimo_stuff/distribution_dashboard.py b/experimental/dm/marimo_stuff/distribution_dashboard.py new file mode 100644 index 0000000000..903e643a6d --- /dev/null +++ b/experimental/dm/marimo_stuff/distribution_dashboard.py @@ -0,0 +1,128 @@ +import marimo + +__generated_with = "0.17.7" +app = marimo.App(width="medium") + +with app.setup(hide_code=True): + import marimo as mo + import pandas as pd + + from dcpy.connectors.edm.bytes import BytesConnector + from dcpy.connectors.edm.open_data_nyc import OpenDataConnector + from dcpy.lifecycle import product_metadata + from dcpy.lifecycle.scripts import version_compare + + +@app.cell +def _(): + versions = version_compare.run() + return (versions,) + + +@app.cell +def _(versions): + versions + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + ## Distribution Dashboard + + This notebook is for comparing the versions of datasets on Bytes and Open Data to inform distribution of data updates. + """) + return + + +@app.cell(hide_code=True) +def _(): + mo.md(r""" + /// details | Environemnt variable details + type: warn + + Some environemnt variables must be set before running this notebook: + - `PRODUCT_METADATA_REPO_PATH` + - `SOCRATA_USER` + - `SOCRATA_PASSWORD` + """) + return + + +@app.cell +def _(): + # all_latest_bytes_versions = BytesConnector().fetch_all_latest_versions_df() + return + + +@app.cell +def _(): + # all_latest_bytes_versions + return + + +@app.cell +def _(): + data_engineering_datasets = [ + { + "base_key": "zap.bbls", + "open_data_destination_id": "socrata", + }, + { + "base_key": "zap.projects", + "open_data_destination_id": "socrata", + }, + { + "base_key": "ztl.ztl", + "open_data_destination_id": "socrata", + }, + ] + return (data_engineering_datasets,) + + +@app.function +def open_data_url(key: str): + product, dataset, destination_id = key.split(".") + metadata = product_metadata.load(version="dummy") + four_four = ( + metadata.product(product) + .dataset(dataset) + .get_destination(destination_id) + .custom.get("four_four") + ) + return f"https://data.cityofnewyork.us/d/{four_four}" + + +@app.cell +def _(data_engineering_datasets): + def check_versions(datasets: list[dict]) -> pd.DataFrame: + for dataset in data_engineering_datasets: + open_data_key = ".".join( + [dataset["base_key"], dataset["open_data_destination_id"]] + ) + url = open_data_url(open_data_key) + print(dataset["base_key"]) + print(BytesConnector().get_latest_version(dataset["base_key"])) + print(OpenDataConnector().list_versions(open_data_key)) + print(url) + print("-----") + return (check_versions,) + + +@app.cell +def _(check_versions, data_engineering_datasets): + check_versions(data_engineering_datasets) + return + + +@app.cell +def _(): + colp_open_data_version = OpenDataConnector().list_versions( + key="colp.colp.socrata" + ) + colp_open_data_version + return + + +if __name__ == "__main__": + app.run() diff --git a/experimental/dm/marimo_stuff/requirements.in b/experimental/dm/marimo_stuff/requirements.in index 582ea3b3cc..2f3b646e9e 100644 --- a/experimental/dm/marimo_stuff/requirements.in +++ b/experimental/dm/marimo_stuff/requirements.in @@ -1,2 +1,3 @@ leafmap[duckdb] marimo[recommended] +dcpy @ git+https://github.com/NYCPlanning/data-engineering diff --git a/experimental/dm/marimo_stuff/requirements.txt b/experimental/dm/marimo_stuff/requirements.txt index e9aae41ebb..835361f7cd 100644 --- a/experimental/dm/marimo_stuff/requirements.txt +++ b/experimental/dm/marimo_stuff/requirements.txt @@ -2,7 +2,7 @@ # uv pip compile requirements.in -o requirements.txt aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.13.1 +aiohttp==3.13.2 # via jupyter-server-proxy aiosignal==1.4.0 # via aiohttp @@ -12,6 +12,7 @@ annotated-types==0.7.0 # via pydantic anyio==4.11.0 # via + # google-genai # httpx # jupyter-server # openai @@ -31,16 +32,37 @@ attrs==25.4.0 # aiohttp # jsonschema # referencing +azure-core==1.36.0 + # via + # azure-storage-blob + # azure-storage-file-datalake +azure-storage-blob==12.27.1 + # via + # azure-storage-file-datalake + # cloudpathlib +azure-storage-file-datalake==12.22.0 + # via cloudpathlib backoff==2.2.1 # via posthog +bcrypt==5.0.0 + # via paramiko beautifulsoup4==4.14.2 # via + # dcpy # gdown # nbconvert -bleach==6.2.0 +bleach==6.3.0 # via nbconvert blinker==1.9.0 # via flask +boto3==1.40.67 + # via + # cloudpathlib + # dcpy +botocore==1.40.67 + # via + # boto3 + # s3transfer bqplot==0.12.45 # via leafmap branca==0.8.2 @@ -48,6 +70,8 @@ branca==0.8.2 # folium # ipyleaflet # maplibre +cachetools==6.2.1 + # via google-auth certifi==2025.10.5 # via # httpcore @@ -56,21 +80,33 @@ certifi==2025.10.5 # pyproj # requests cffi==2.0.0 - # via argon2-cffi-bindings + # via + # argon2-cffi-bindings + # cryptography + # pynacl charset-normalizer==3.4.4 # via requests click==8.3.0 # via # flask # marimo + # typer # uvicorn # whitebox +cloudpathlib==0.23.0 + # via dcpy comm==0.2.3 # via ipywidgets contourpy==1.3.3 # via matplotlib +cryptography==46.0.3 + # via + # azure-storage-blob + # paramiko cycler==0.12.1 # via matplotlib +dcpy @ git+https://github.com/NYCPlanning/data-engineering@e0766e70a5c4563d6ee17c5ba2ba7024a0cdefa8 + # via -r requirements.in decorator==5.2.1 # via ipython defusedxml==0.7.1 @@ -79,19 +115,26 @@ distro==1.9.0 # via # openai # posthog -docutils==0.22.2 +docutils==0.22.3 # via marimo duckdb==1.4.1 # via + # dcpy # duckdb-engine # leafmap # marimo duckdb-engine==0.17.0 # via leafmap +et-xmlfile==2.0.0 + # via openpyxl eval-type-backport==0.2.2 # via maplibre +execnet==2.1.1 + # via pytest-xdist executing==2.2.1 # via stack-data +faker==37.12.0 + # via dcpy fastjsonschema==2.21.2 # via nbformat filelock==3.20.0 @@ -114,10 +157,38 @@ frozenlist==1.8.0 # aiosignal gdown==5.2.0 # via leafmap +geoalchemy2==0.18.0 + # via dcpy geojson==3.2.0 # via leafmap geopandas==1.1.1 - # via leafmap + # via + # dcpy + # leafmap +google-api-core==2.28.1 + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.43.0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage + # google-genai +google-cloud-core==2.5.0 + # via google-cloud-storage +google-cloud-storage==3.5.0 + # via cloudpathlib +google-crc32c==1.7.1 + # via + # google-cloud-storage + # google-resumable-media +google-genai==1.49.0 + # via marimo +google-resumable-media==2.7.2 + # via google-cloud-storage +googleapis-common-protos==1.71.0 + # via google-api-core greenlet==3.2.4 # via sqlalchemy h11==0.16.0 @@ -127,7 +198,9 @@ h11==0.16.0 httpcore==1.0.9 # via httpx httpx==0.28.1 - # via openai + # via + # google-genai + # openai idna==3.11 # via # anyio @@ -135,6 +208,12 @@ idna==3.11 # jsonschema # requests # yarl +ijson==3.4.0.post0 + # via dcpy +iniconfig==2.3.0 + # via pytest +invoke==2.2.1 + # via paramiko ipyevents==2.0.4 # via leafmap ipyfilechooser==0.6.0 @@ -143,7 +222,7 @@ ipyfilechooser==0.6.0 # whiteboxgui ipyleaflet==0.20.0 # via leafmap -ipython==9.6.0 +ipython==9.7.0 # via ipywidgets ipython-genutils==0.2.0 # via jupysql @@ -155,7 +234,7 @@ ipyvue==1.11.3 # via ipyvuetify ipyvuetify==1.11.3 # via leafmap -ipywidgets==8.1.7 +ipywidgets==8.1.8 # via # anywidget # bqplot @@ -166,6 +245,10 @@ ipywidgets==8.1.7 # ipyvue # leafmap # whiteboxgui +isodate==0.7.2 + # via + # azure-storage-blob + # azure-storage-file-datalake isoduration==20.11.0 # via jsonschema itsdangerous==2.2.0 @@ -180,6 +263,7 @@ jinja2==3.1.6 # via # altair # branca + # dcpy # flask # folium # jupysql @@ -188,6 +272,10 @@ jinja2==3.1.6 # nbconvert jiter==0.11.1 # via openai +jmespath==1.0.1 + # via + # boto3 + # botocore jsonpointer==3.0.0 # via jsonschema jsonschema==4.25.1 @@ -225,24 +313,30 @@ jupyter-server-terminals==0.5.3 # via jupyter-server jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-widgets==3.0.15 +jupyterlab-widgets==3.0.16 # via ipywidgets kiwisolver==1.4.9 # via matplotlib -lark==1.3.0 +lark==1.3.1 # via rfc3987-syntax -leafmap==0.55.0 - # via -r requirements.in +leafmap==0.57.1 + # via + # -r requirements.in + # dcpy loro==1.8.2 # via marimo +lxml==6.0.2 + # via dcpy maplibre==0.3.5 # via leafmap -marimo==0.17.0 +marimo==0.17.7 # via -r requirements.in -markdown==3.9 +markdown==3.10 # via # marimo # pymdown-extensions +markdown-it-py==4.0.0 + # via rich markupsafe==3.0.3 # via # flask @@ -253,6 +347,8 @@ matplotlib==3.10.7 # via leafmap matplotlib-inline==0.2.1 # via ipython +mdurl==0.1.2 + # via markdown-it-py mistune==3.1.4 # via nbconvert msgspec-m==0.19.2 @@ -261,7 +357,7 @@ multidict==6.7.0 # via # aiohttp # yarl -narwhals==2.9.0 +narwhals==2.10.2 # via # altair # marimo @@ -287,12 +383,15 @@ numpy==2.3.4 # pandas # pyogrio # shapely -openai==2.6.0 +openai==2.7.1 # via marimo +openpyxl==3.1.5 + # via dcpy packaging==25.0 # via # altair # duckdb-engine + # geoalchemy2 # geopandas # jupyter-events # jupyter-server @@ -301,13 +400,17 @@ packaging==25.0 # nbconvert # plotly # pyogrio + # pytest pandas==2.3.3 # via # bqplot + # dcpy # geopandas # leafmap pandocfilters==1.5.1 # via nbconvert +paramiko==4.0.0 + # via dcpy parso==0.8.5 # via jedi pexpect==4.9.0 @@ -320,16 +423,20 @@ ploomber-core==0.2.27 # via # jupysql # jupysql-plugin -plotly==6.3.1 +plotly==6.4.0 # via leafmap -polars==1.34.0 +pluggy==1.6.0 + # via pytest +polars==1.35.1 # via marimo -polars-runtime-32==1.34.0 +polars-runtime-32==1.35.1 # via polars -posthog==6.7.9 +posthog==6.9.0 # via ploomber-core prettytable==3.16.0 # via jupysql +probableparsing==0.0.1 + # via usaddress prometheus-client==0.23.1 # via jupyter-server prompt-toolkit==3.0.52 @@ -338,8 +445,17 @@ propcache==0.4.1 # via # aiohttp # yarl -psutil==7.1.1 +proto-plus==1.26.1 + # via google-api-core +protobuf==6.33.0 + # via + # google-api-core + # googleapis-common-protos + # proto-plus +psutil==7.1.3 # via marimo +psycopg2-binary==2.9.11 + # via dcpy psygnal==0.15.0 # via anywidget ptyprocess==0.7.0 @@ -348,26 +464,47 @@ ptyprocess==0.7.0 # terminado pure-eval==0.2.3 # via stack-data -pyarrow==21.0.0 - # via polars +pyarrow==22.0.0 + # via + # dcpy + # polars +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.2 + # via google-auth pycparser==2.23 # via cffi -pydantic==2.12.3 +pydantic==2.12.4 # via + # dcpy + # google-genai # maplibre # openai -pydantic-core==2.41.4 - # via pydantic + # pydantic-xml +pydantic-core==2.41.5 + # via + # pydantic + # pydantic-xml +pydantic-xml==2.18.0 + # via dcpy pygments==2.19.2 # via # ipython # ipython-pygments-lexers # marimo # nbconvert + # pytest + # rich pymdown-extensions==10.16.1 # via marimo +pynacl==1.6.0 + # via paramiko pyogrio==0.11.1 - # via geopandas + # via + # dcpy + # geopandas pyparsing==3.2.5 # via matplotlib pyproj==3.7.2 @@ -378,23 +515,36 @@ pystac==1.14.1 # via pystac-client pystac-client==0.9.0 # via leafmap +pytest==8.4.2 + # via pytest-xdist +pytest-xdist==3.8.0 + # via dcpy python-box==7.3.2 # via leafmap +python-crfsuite==0.9.11 + # via usaddress python-dateutil==2.9.0.post0 # via # arrow + # botocore + # dcpy # jupyter-client # matplotlib # pandas # posthog # pystac # pystac-client +python-dotenv==1.2.1 + # via dcpy python-json-logger==4.0.0 # via jupyter-events pytz==2025.2 - # via pandas + # via + # dcpy + # pandas pyyaml==6.0.3 # via + # dcpy # jupyter-events # marimo # ploomber-core @@ -410,10 +560,16 @@ referencing==0.37.0 # jupyter-events requests==2.32.5 # via + # azure-core + # dcpy # folium # gdown + # google-api-core + # google-cloud-storage + # google-genai # posthog # pystac-client + # socrata-py rfc3339-validator==0.1.4 # via # jsonschema @@ -424,18 +580,30 @@ rfc3986-validator==0.1.1 # jupyter-events rfc3987-syntax==1.1.0 # via jsonschema +rich==14.2.0 + # via + # dcpy + # typer rpds-py==0.28.0 # via # jsonschema # referencing -ruff==0.14.2 +rsa==4.9.1 + # via google-auth +ruff==0.14.3 # via marimo -scooby==0.10.2 +s3transfer==0.14.0 + # via boto3 +scooby==0.11.0 # via leafmap send2trash==1.8.3 # via jupyter-server shapely==2.1.2 - # via geopandas + # via + # dcpy + # geopandas +shellingham==1.5.4 + # via typer simpervisor==1.0.0 # via jupyter-server-proxy six==1.17.0 @@ -447,13 +615,17 @@ sniffio==1.3.1 # via # anyio # openai +socrata-py==1.1.13 + # via dcpy soupsieve==2.8 # via beautifulsoup4 sqlalchemy==2.0.44 # via + # dcpy # duckdb-engine + # geoalchemy2 # jupysql -sqlglot==27.28.1 +sqlglot==27.29.0 # via # jupysql # marimo @@ -463,8 +635,12 @@ sqlparse==0.5.3 # via jupysql stack-data==0.6.3 # via ipython -starlette==0.48.0 +starlette==0.50.0 # via marimo +tabulate==0.9.0 + # via dcpy +tenacity==9.1.2 + # via google-genai terminado==0.18.1 # via # jupyter-server @@ -502,34 +678,47 @@ traittypes==0.2.3 # via # bqplot # ipyleaflet +typer==0.20.0 + # via dcpy typing-extensions==4.15.0 # via # altair # anywidget + # azure-core + # azure-storage-blob + # azure-storage-file-datalake # beautifulsoup4 + # google-genai # openai # posthog # pydantic # pydantic-core # sqlalchemy + # typer # typing-inspection typing-inspection==0.4.2 # via pydantic tzdata==2025.2 # via # arrow + # faker # pandas uri-template==1.3.0 # via jsonschema urllib3==2.5.0 - # via requests + # via + # botocore + # dcpy + # requests +usaddress==0.5.16 + # via dcpy uvicorn==0.38.0 # via marimo wcwidth==0.2.14 # via # prettytable # prompt-toolkit -webcolors==24.11.1 +webcolors==25.10.0 # via jsonschema webencodings==0.5.1 # via @@ -538,7 +727,9 @@ webencodings==0.5.1 websocket-client==1.9.0 # via jupyter-server websockets==15.0.1 - # via marimo + # via + # google-genai + # marimo werkzeug==3.1.3 # via # flask @@ -547,9 +738,11 @@ whitebox==2.3.6 # via whiteboxgui whiteboxgui==2.3.0 # via leafmap -widgetsnbextension==4.0.14 +widgetsnbextension==4.0.15 # via ipywidgets -xyzservices==2025.4.0 +xlrd==2.0.2 + # via dcpy +xyzservices==2025.10.0 # via # folium # ipyleaflet