diff --git a/.dockerignore b/.dockerignore index 8ee3850..20bb136 100644 --- a/.dockerignore +++ b/.dockerignore @@ -18,4 +18,5 @@ coverage.xml .git .pytest_cache .env -*.egg-info \ No newline at end of file +*.egg-info +azure diff --git a/Dockerfile b/Dockerfile index 09e1db2..9c56a29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:16-bullseye-slim +FROM node:20-bullseye-slim # Setup environment variables ENV LC_ALL=C.UTF-8 diff --git a/LICENSE b/LICENSE index da7e522..40c09c1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2024 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc. +Copyright (c) 2025 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index 29ef781..f80dc54 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Open Data Access Tools The Open Energy Data Initiative (OEDI) provides a number of tools to enable the use of the open data published through this initiative. The source is largely written in Python, including Jupyter notebooks. -Copyright (c) 2024 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc. +Copyright (c) 2025 Alliance for Sustainable Energy, LLC and Skye Analytics, Inc. Open Data Access Tools: NREL SWR-20-57. Azure Data Tools: SWR-23-92. diff --git a/oedi/AWS/data_lake/construct.py b/oedi/AWS/data_lake/construct.py index 973355b..2c3213e 100644 --- a/oedi/AWS/data_lake/construct.py +++ b/oedi/AWS/data_lake/construct.py @@ -63,10 +63,9 @@ def create_crawler_role(self): managed_policies=managed_policies, ) - def create_crawler(self, location, tags): + def create_crawler(self, location, table_prefix, tags): """Create crawler in data lake by given dataset location.""" crawler_name = generate_crawler_name(s3url=location) - table_prefix = generate_table_prefix(s3url=location) if not self.crawler_role: self.crawler_role() diff --git a/oedi/AWS/data_lake/stack.py b/oedi/AWS/data_lake/stack.py index dbb24a8..943e82f 100644 --- a/oedi/AWS/data_lake/stack.py +++ b/oedi/AWS/data_lake/stack.py @@ -27,5 +27,11 @@ def __init__(self, scope: Construct, config: OEDIConfigBase) -> None: data_lake.create_database() data_lake.create_crawler_role() #TODO: data_lake.create_workgroup() - for dataset_location in database['Locations']: - data_lake.create_crawler(location=dataset_location, tags=tags) + if 'Table Prefixes' in database.keys(): + table_prefixes = database['Table Prefixes'] # Prefix for each table + elif 'Table Prefix' in database.keys(): + table_prefixes = [database['Table Prefix']] * len(database['Locations']) # One prefix for all tables + else: + table_prefixes = ['table_'] * len(database['Locations']) # No prefix specified, use generic prefix + for dataset_location, table_prefix in zip(database['Locations'], table_prefixes): + data_lake.create_crawler(location=dataset_location, table_prefix=table_prefix, tags=tags) diff --git a/oedi/AWS/utils.py b/oedi/AWS/utils.py index 6cab413..295bafa 100644 --- a/oedi/AWS/utils.py +++ b/oedi/AWS/utils.py @@ -32,6 +32,7 @@ def generate_crawler_name(s3url): bucket, path = parse_s3url(s3url) dashed_path = path.replace("/", "-") name = f"{bucket}-{dashed_path}".replace("_", "-") +# name = name[-128:] # Crawler names have a limit of 128 characters return name.lower() @@ -93,5 +94,5 @@ def generate_table_prefix(s3url): prefix = os.path.dirname(path).replace("/", "-") + "_" table_prefix = prefix.replace("-", "_").lower() - + table_prefix = table_prefix[-128:] # A table prefix has a limit of 128 characters return table_prefix diff --git a/oedi/__init__.py b/oedi/__init__.py index 788da1f..fe404ae 100644 --- a/oedi/__init__.py +++ b/oedi/__init__.py @@ -1 +1 @@ -__version__ = "0.2.4" +__version__ = "0.2.5" diff --git a/oedi/config.yaml b/oedi/config.yaml index ca604c7..fde25a5 100644 --- a/oedi/config.yaml +++ b/oedi/config.yaml @@ -10,13 +10,46 @@ AWS: - s3://oedi-data-lake/pv-rooftop/developable-planes/ - s3://oedi-data-lake/pv-rooftop/rasd/ - s3://oedi-data-lake/pv-rooftop-pr/developable-planes/ - - Identifier: buildstock - Name: oedi_buildstock + Table Prefixes: + - pv_rooftop_ + - pv_rooftop_ + - pv_rooftop_ + - pv_rooftop_ + - pv_rooftop_pr_ + - Identifier: comstock + Name: oedi_comstock_amy2018_release_2 Locations: - - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/comstock_amy2018_release_1/ - - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/comstock_tmy3_release_1/ - - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/resstock_amy2018_release_1/ - - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2021/resstock_tmy3_release_1/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/weather/amy2018/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/metadata_and_annual_results/national/parquet/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_midwest/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_northeast/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_south/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_puma_west/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2023/comstock_amy2018_release_2/timeseries_individual_buildings/by_state/ + Table Prefixes: + - amy_2018_ + - weather_ + - metadata_and_annual_results_national_ + - timeseries_individual_buildings_ + - timeseries_individual_buildings_ + - timeseries_individual_buildings_ + - timeseries_individual_buildings_ + - timeseries_individual_buildings_ + - Identifier: resstock + Name: oedi_resstock_2022_tmy3_1.1 + Locations: + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/metadata/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/metadata_and_annual_results/national/parquet/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/metadata_income/parquet/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/timeseries_individual_buildings/by_state/ + - s3://oedi-data-lake/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_tmy3_release_1.1/weather/ + Table Prefixes: + - resstock_tmy3_ + - metadata_and_annual_results_national_ + - metadata_income_ + - timeseries_individual_buildings_ + - resstock_tmy3_ - Identifier: tracking_the_sun Name: oedi_tracking_the_sun Locations: @@ -26,6 +59,7 @@ AWS: - s3://oedi-data-lake/tracking-the-sun/2021/ - s3://oedi-data-lake/tracking-the-sun/2022/ - s3://oedi-data-lake/tracking-the-sun/2023/ + Table Prefix: tracking_the_sun_ - Identifier: atb Name: oedi_atb Locations: @@ -38,6 +72,16 @@ AWS: - s3://oedi-data-lake/ATB/transportation/parquet/2022/fuels - s3://oedi-data-lake/ATB/transportation/parquet/2022/vehicles - s3://oedi-data-lake/ATB/transportation/parquet/2022/vehicles_fuels + Table Prefixes: + - atb_electricity_ + - atb_electricity_ + - atb_electricity_ + - atb_electricity_ + - atb_electricity_ + - atb_transportation_2022_ + - atb_transportation_2022_ + - atb_transportation_2022_ + - atb_transportation_2022_ - Identifier: pvdaq Name: oedi_pvdaq Locations: @@ -50,6 +94,7 @@ AWS: - s3://oedi-data-lake/pvdaq/parquet/mount/ - s3://oedi-data-lake/pvdaq/parquet/other-instruments/ - s3://oedi-data-lake/pvdaq/parquet/pvdata/ + Table Prefix: pvdaq_ - Identifier: nso Name: oedi_nso Locations: @@ -60,10 +105,10 @@ AWS: - s3://oedi-data-lake-rawdata/NSO-2/loads_20Hz/ - s3://oedi-data-lake-rawdata/NSO-2/wake_masts_1min/ - s3://oedi-data-lake-rawdata/NSO-2/wake_masts_20Hz/ - + Table Prefix: 'nso_' Staging Location: s3://user-owned-staging-bucket/ Tags: - Key: Project Value: OEDI - Key: Release - Value: 0.2.4 + Value: 0.2.5