From cc6e6f42e5d2e32823407f717f055870ff9dbebc Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 10 Dec 2024 16:20:10 -0500 Subject: [PATCH 1/8] Updating to support felis yaml files including changes to create_database --- astrodbkit/astrodb.py | 42 +++- astrodbkit/schema_example_felis.yaml | 337 +++++++++++++++++++++++++++ astrodbkit/tests/test_astrodb.py | 17 ++ docs/index.rst | 16 ++ pyproject.toml | 5 +- 5 files changed, 411 insertions(+), 6 deletions(-) create mode 100644 astrodbkit/schema_example_felis.yaml diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index bb6aac7..2203a9c 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -6,6 +6,7 @@ import os import sqlite3 import shutil +import yaml import numpy as np import pandas as pd @@ -189,7 +190,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record): cursor.close() -def create_database(connection_string, drop_tables=False): +def create_database(connection_string, drop_tables=False, felis_schema=None): """ Create a database from a schema that utilizes the `astrodbkit2.astrodb.Base` class. Some databases, eg Postgres, must already exist but any tables should be dropped. @@ -200,12 +201,43 @@ def create_database(connection_string, drop_tables=False): Connection string to database drop_tables : bool Flag to drop existing tables. This is needed when the schema changes. (Default: False) + felis_schema : str + Path to schema yaml file """ - session, base, engine = load_connection(connection_string, base=Base) - if drop_tables: - base.metadata.drop_all() - base.metadata.create_all(engine) # this explicitly creates the database + if felis_schema is not None: + # Felis loader requires felis_schema + from felis.datamodel import Schema + from felis.metadata import MetaDataBuilder + + # Load and validate the felis-formatted schema + data = yaml.safe_load(open(felis_schema, "r")) + schema = Schema.model_validate(data) + schema_name = data["name"] # get schema_name from the felis schema file + + # engine = create_engine(connection_string) + session, base, engine = load_connection(connection_string) + + # Workaround for SQLite since it doesn't support schema + if connection_string.startswith("sqlite"): + db_name = connection_string.split("/")[-1] + with engine.begin() as conn: + conn.execute(text(f"ATTACH '{db_name}' AS {schema_name}")) + + # Drop tables, if requested + if drop_tables: + base.metadata.drop_all() + + # Create the database + metadata = MetaDataBuilder(schema).build() + metadata.create_all(bind=engine) + base.metadata = metadata + else: + session, base, engine = load_connection(connection_string, base=Base) + if drop_tables: + base.metadata.drop_all() + base.metadata.create_all(engine) # this explicitly creates the database + return session, base, engine diff --git a/astrodbkit/schema_example_felis.yaml b/astrodbkit/schema_example_felis.yaml new file mode 100644 index 0000000..9424a0d --- /dev/null +++ b/astrodbkit/schema_example_felis.yaml @@ -0,0 +1,337 @@ +name: felis_temp +"@id": "#felis_temp" +description: "Template database for testing use" + +tables: + + - name: Publications + "@id": "#Publications" + description: Reference information (DOI, bibcodes, etc) + primaryKey: + - "#Publications.name" + + columns: + - name: name + "@id": "#Publications.name" + datatype: string + length: 30 + description: Publication reference identifier + ivoa:ucd: meta.ref;meta.main + nullable: false + - name: bibcode + "@id": "#Publications.bibcode" + datatype: string + length: 100 + description: Publication bibcode + ivoa:ucd: meta.bib.bibcode + - name: doi + "@id": "#Publications.doi" + datatype: string + length: 100 + description: Publication DOI + ivoa:ucd: meta.ref.doi + - name: description + "@id": "#Publications.description" + datatype: string + length: 1000 + description: Publication description + + + - name: Telescopes + "@id": "#Telescopes" + description: Telescope, mission, and survey information + primaryKey: + - "#Telescopes.name" + + columns: + - name: name + "@id": "#Telescopes.name" + datatype: string + length: 30 + description: Telescope, mission, or survey name + ivoa:ucd: meta.id;meta.main + nullable: false + - name: reference + "@id": "#Telescopes.reference" + datatype: string + length: 30 + description: Publication reference; links to Publications table + + constraints: + - name: Telescopes_reference_Publications_name + "@type": "ForeignKey" + "@id": "#FK_Telescopes_reference_Publications_name" + description: Link Telescopes reference to Publications table + columns: + - "#Telescopes.reference" + referencedColumns: + - "#Publications.name" + + + - name: Instruments + "@id": "#Instruments" + description: Instrument information + primaryKey: + - "#Instruments.name" + + columns: + - name: name + "@id": "#Instruments.name" + datatype: string + length: 30 + description: Instrument name + ivoa:ucd: instr;meta.main + nullable: false + - name: reference + "@id": "#Instruments.reference" + datatype: string + length: 30 + description: Publication reference; links to Publications table + ivoa:ucd: meta.ref + + constraints: + - name: Instruments_reference_Publications_name + "@type": "ForeignKey" + "@id": "#FK_Instruments_reference_Publications_name" + description: Link Instruments reference to Publications table + columns: + - "#Instruments.reference" + referencedColumns: + - "#Publications.name" + + + - name: Regimes + "@id": "#Regimes" + description: Regime lookup table + primaryKey: + - "#Regimes.regime" + + columns: + - name: regime + "@id": "#Regimes.regime" + datatype: string + length: 30 + description: Regime identifier string + ivoa:ucd: meta.id;meta.main + nullable: false + + + - name: Sources + "@id": "#Sources" + description: "Main identifiers for objects along with coordinates." + primaryKey: + - "#Sources.source" + + columns: + - name: source + "@id": "#Sources.source" + datatype: string + length: 100 + description: Unique identfier for an object + ivoa:ucd: meta.id;src;meta.main + nullable: false + - name: ra + "@id": "#Sources.ra" + datatype: double + description: ICRS Right Ascension of object + fits:tunit: deg + ivoa:ucd: pos.eq.ra;meta.main + - name: dec + "@id": "#Sources.dec" + datatype: double + description: ICRS Declination of object + fits:tunit: deg + ivoa:ucd: pos.eq.dec;meta.main + - name: shortname + "@id": "#Sources.shortname" + datatype: string + length: 30 + description: Short identfier for an object + ivoa:ucd: meta.id + - name: reference + "@id": "#Sources.reference" + datatype: string + length: 30 + description: Publication reference; links to Publications table + ivoa:ucd: meta.ref;meta.main + nullable: false + - name: comments + "@id": "#Sources.comments" + datatype: string + length: 1000 + description: Free-form comments on this Source + + + indexes: + - name: PK_Sources_source + "@id": "#PK_Sources_source" + description: Primary key for Sources table + columns: + - "#Sources.source" + constraints: + - name: check_ra + "@type": Check + "@id": "#Sources_check_ra_bounds" + description: Validate RA range + expression: ra >= 0 AND ra <= 360 + - name: check_dec + "@type": Check + "@id": "#Sources_check_dec_bounds" + description: Validate Dec range + expression: dec >= -90 AND dec <= 90 + - name: Source_reference_Publications_name + "@type": "ForeignKey" + "@id": "#FK_Sources_reference_Publications_name" + description: Link Source reference to Publications table + columns: + - "#Sources.reference" + referencedColumns: + - "#Publications.name" + + + - name: Names + "@id": "#Names" + description: "Additional identifiers for objects in Sources table" + primaryKey: + - "#Names.source" + - "#Names.other_name" + + columns: + - name: source + "@id": "#Names.source" + datatype: string + length: 100 + description: Main identfier for an object; links to Sources table + ivoa:ucd: meta.id;meta.main + nullable: false + - name: other_name + "@id": "#Names.other_name" + datatype: string + length: 100 + description: Alternate identifier for an object + ivoa:ucd: meta.id + nullable: false + + indexes: + - name: PK_Names_source + "@id": "#PK_Names_source" + description: Primary key for Names table + columns: + - "#Names.source" + - "#Names.other_name" + constraints: + - name: Names_source_Source_source + "@type": "ForeignKey" + "@id": "#FK_Names_source_Source_source" + description: Link Names primary identifer to Sources table + columns: + - "#Names.source" + referencedColumns: + - "#Sources.source" + + + - name: Photometry + "@id": "#Photometry" + description: Photometry for Sources + primaryKey: + - "#Photometry.source" + - "#Photometry.band" + - "#Photometry.reference" + + columns: + - name: source + "@id": "#Photometry.source" + datatype: string + length: 100 + description: Main identfier for an object; links to Sources table + ivoa:ucd: meta.id;meta.main + nullable: false + - name: band + "@id": "#Photometry.band" + datatype: string + length: 30 + description: Photometry band for this measurement + - name: ucd + "@id": "#Photometry.ucd" + datatype: string + length: 100 + description: UCD for this measurement + - name: magnitude + "@id": "#Photometry.magnitude" + datatype: double + description: Magnitude value for this entry + fits:tunit: mag + - name: magnitude_error + "@id": "#Photometry.magnitude_error" + datatype: double + description: Uncertainty of this magnitude value + fits:tunit: mag + - name: telescope + "@id": "#Photometry.telescope" + datatype: string + length: 30 + description: Telescope, mission, or survey name; links to Telescopes table + - name: instrument + "@id": "#Photometry.instrument" + datatype: string + length: 30 + description: Instrument name; links to Instruments table + - name: epoch + "@id": "#Photometry.epoch" + datatype: double + description: Decimal year + fits:tunit: yr + - name: comments + "@id": "#Photometry.comments" + datatype: string + length: 1000 + description: Free-form comments for this entry + - name: reference + "@id": "#Photometry.reference" + datatype: string + length: 30 + description: Publication reference; links to Publications table + nullable: false + + indexes: + - name: PK_Photometry + "@id": "#PK_Photometry" + description: Primary key for Photometry table + columns: + - "#Photometry.source" + - "#Photometry.band" + - "#Photometry.reference" + constraints: + - name: Photometry_source_Sources_source + "@type": "ForeignKey" + "@id": "#FK_Photometry_source_Sources_source" + description: Link Photometry source to Sources table + columns: + - "#Photometry.source" + referencedColumns: + - "#Sources.source" + - name: Photometry_telescope_Telescopes_name + "@type": "ForeignKey" + "@id": "#FK_Photometry_telescope_Telescopes_name" + description: Link Photometry telescope to Telescopes table + columns: + - "#Photometry.telescope" + referencedColumns: + - "#Telescopes.name" + - name: Photometry_instrument_Instruments_name + "@type": "ForeignKey" + "@id": "#FK_Photometry_instrument_Instruments_name" + description: Link Photometry instrument to Instruments table + columns: + - "#Photometry.telescope" + referencedColumns: + - "#Instruments.name" + - name: Photometry_reference_Publications_name + "@type": "ForeignKey" + "@id": "#FK_Photometry_reference_Publications_name" + description: Link Photometry reference to Publications table + columns: + - "#Photometry.reference" + referencedColumns: + - "#Publications.name" + diff --git a/astrodbkit/tests/test_astrodb.py b/astrodbkit/tests/test_astrodb.py index fb072fb..a17d8f1 100644 --- a/astrodbkit/tests/test_astrodb.py +++ b/astrodbkit/tests/test_astrodb.py @@ -55,6 +55,23 @@ def db(): return db +def test_felis_db(): + # Test felis database creation logic + db_path = "felis.db" + + # Clear copy if file present + if os.path.exists(db_path): + os.remove(db_path) + + # Actually attempt to create database using the felis-formatted yaml file + connection_string = 'sqlite:///' + db_path + create_database(connection_string, felis_schema="astrodbkit/schema_example_felis.yaml") + assert os.path.exists(db_path) + + if os.path.exists(db_path): + os.remove(db_path) + + def test_add_data(db): # Load example data to the database diff --git a/docs/index.rst b/docs/index.rst index 9c63a78..cc4afc8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -60,6 +60,22 @@ With that on hand, users should import their schema and prepare the database:: connection_string = 'sqlite:///SIMPLE.db' # connection string for a SQLite database named SIMPLE.db create_database(connection_string) +Creating a Database with Felis schema +------------------------------------- + +The `LSST Felis package `_ provides an alternate way of writing a database schema file. +An example yaml file is provided in this repo (see schema_example_felis.yaml). +Users that want to use Felis will need to install that package (lsst-felis), which you can do alongside astrodbkit with `pip install astrodbkit[felis]`. +Note that Python 3.11 or higher is required to use Felis. + +With a Felis schema file, the creation call can happen either through Felis's examples (see their docs) or with something like:: + + from astrodbkit.astrodb import create_database + + connection_string = 'sqlite:///SIMPLE.db' # connection string for a SQLite database named SIMPLE.db + felis_schema = "path/to/felis/schema.yaml" + create_database(connection_string, felis_schema=felis_schema) + Accessing the Database ====================== diff --git a/pyproject.toml b/pyproject.toml index e4c1d18..ff58793 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,10 @@ test = [ docs = [ "sphinx-astropy" ] -all = ["astrodbkit[test, docs]"] +felis = [ + "lsst-felis" +] +all = ["astrodbkit[test, docs, felis]"] [project.urls] Repository = "https://github.com/astrodbtoolkit/AstrodbKit" From 2adf34d39107736b328e7d0b3e0d965f3f1a6c54 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 10 Dec 2024 16:30:47 -0500 Subject: [PATCH 2/8] Updating docstring; setting mininum python requirements to be 3.11 or later --- .github/workflows/test-package.yml | 2 +- astrodbkit/astrodb.py | 2 ++ pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index cc5c846..dc8712b 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, '3.10', 3.11] + python-version: [3.11, 3.12] steps: - uses: actions/checkout@v4 diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index 2203a9c..e81c3a8 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -194,6 +194,8 @@ def create_database(connection_string, drop_tables=False, felis_schema=None): """ Create a database from a schema that utilizes the `astrodbkit2.astrodb.Base` class. Some databases, eg Postgres, must already exist but any tables should be dropped. + If using Felis yaml files, the path to the schema needs to be provided via the felis_schema parameter. + Otherwise, this assumes you have imported a schema.py file with SQLAlchemy definitions. Parameters ---------- diff --git a/pyproject.toml b/pyproject.toml index ff58793..db24daa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ license = {text = "BSD 3-Clause"} authors = [ {name = "David Rodriguez", email = "drodriguez@stsci.edu"}, ] -requires-python = ">= 3.7" +requires-python = ">= 3.11" dependencies = [ "astropy", "astroquery", From 22daef86bed9097abc644a9dbcdafc87d365762d Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 10 Dec 2024 16:31:29 -0500 Subject: [PATCH 3/8] Update docs/index.rst Co-authored-by: Kelle Cruz --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index cc4afc8..94ab52a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -63,7 +63,7 @@ With that on hand, users should import their schema and prepare the database:: Creating a Database with Felis schema ------------------------------------- -The `LSST Felis package `_ provides an alternate way of writing a database schema file. +The `LSST Felis package `_ provides a way of writing a database schema file. An example yaml file is provided in this repo (see schema_example_felis.yaml). Users that want to use Felis will need to install that package (lsst-felis), which you can do alongside astrodbkit with `pip install astrodbkit[felis]`. Note that Python 3.11 or higher is required to use Felis. From a3a2bd1aaab6ec5699fdd3abe5f1bd2993ec18db Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 10 Dec 2024 16:54:46 -0500 Subject: [PATCH 4/8] including python 3.13 in test matrix --- .github/workflows/test-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index dc8712b..ade4cc6 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.11, 3.12] + python-version: [3.11, 3.12, 3.13] steps: - uses: actions/checkout@v4 From 204f3fcf68aed017b1680effec17a9d442066a86 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 11 Dec 2024 09:30:36 -0500 Subject: [PATCH 5/8] adding helper parameter to specify postgres schema --- astrodbkit/astrodb.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index e81c3a8..c0d4013 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -310,6 +310,7 @@ def __init__( column_type_overrides={}, sqlite_foreign=True, connection_arguments={}, + schema=None, ): """ Wrapper for database calls and utility functions @@ -335,8 +336,15 @@ def __init__( Flag to enable/disable use of foreign keys with SQLite. Default: True connection_arguments : dict Additional connection arguments, like {'check_same_thread': False}. Default: {} + schema : str + Helper for setting default PostgreSQL schema. Equivalent to connection_arguments={"options": f"-csearch_path={schema}"} """ + # Helper logic to set default postgres schema, if specified + if connection_string.lower().startswith("postgres") and schema is not None: + if connection_string.get("options") is None: + connection_string["options"] = f"-csearch_path={schema}" + if connection_string == "sqlite://": self.session, self.base, self.engine = create_database(connection_string) else: From b9989473192ede232f6ecb442dfb766164856c21 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 11 Dec 2024 09:39:04 -0500 Subject: [PATCH 6/8] Bug fixes and better postgres handling --- astrodbkit/astrodb.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index c0d4013..05ad492 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -4,13 +4,13 @@ import json import os -import sqlite3 import shutil -import yaml +import sqlite3 import numpy as np import pandas as pd import sqlalchemy.types as sqlalchemy_types +import yaml from astropy.coordinates import SkyCoord from astropy.table import Table as AstropyTable from astropy.units.quantity import Quantity @@ -18,6 +18,7 @@ from sqlalchemy.engine import Engine from sqlalchemy.orm import declarative_base, sessionmaker from sqlalchemy.orm.query import Query +from sqlalchemy.schema import CreateSchema from tqdm import tqdm from . import FOREIGN_KEY, PRIMARY_TABLE, PRIMARY_TABLE_KEY, REFERENCE_TABLES @@ -220,11 +221,15 @@ def create_database(connection_string, drop_tables=False, felis_schema=None): # engine = create_engine(connection_string) session, base, engine = load_connection(connection_string) - # Workaround for SQLite since it doesn't support schema + # Schema handling for various database types if connection_string.startswith("sqlite"): db_name = connection_string.split("/")[-1] with engine.begin() as conn: conn.execute(text(f"ATTACH '{db_name}' AS {schema_name}")) + elif connection_string.startswith("postgres"): + with engine.connect() as connection: + connection.execute(CreateSchema(schema_name, if_not_exists=True)) + connection.commit() # Drop tables, if requested if drop_tables: @@ -341,9 +346,9 @@ def __init__( """ # Helper logic to set default postgres schema, if specified - if connection_string.lower().startswith("postgres") and schema is not None: - if connection_string.get("options") is None: - connection_string["options"] = f"-csearch_path={schema}" + if connection_string.startswith("postgres") and schema is not None: + if connection_arguments.get("options") is None: + connection_arguments["options"] = f"-csearch_path={schema}" if connection_string == "sqlite://": self.session, self.base, self.engine = create_database(connection_string) From 6cf06ae78dd7b1a5a57883bf819c2eef273e615b Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 11 Dec 2024 10:48:12 -0500 Subject: [PATCH 7/8] minor update --- astrodbkit/astrodb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index 05ad492..f3bf06d 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -168,7 +168,7 @@ def load_connection(connection_string, sqlite_foreign=True, base=None, connectio session = Session() # Enable foreign key checks in SQLite - if "sqlite" in connection_string and sqlite_foreign: + if connection_string.startswith("sqlite") and sqlite_foreign: set_sqlite() # elif 'postgresql' in connection_string: # # Set up schema in postgres (must be lower case?) From eb778797bb5ee8f57ea5c53b4be18ae95ae8fcbe Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 11 Dec 2024 11:15:38 -0500 Subject: [PATCH 8/8] Update astrodbkit/astrodb.py Co-authored-by: Kelle Cruz --- astrodbkit/astrodb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index f3bf06d..c79c7d8 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -195,8 +195,8 @@ def create_database(connection_string, drop_tables=False, felis_schema=None): """ Create a database from a schema that utilizes the `astrodbkit2.astrodb.Base` class. Some databases, eg Postgres, must already exist but any tables should be dropped. - If using Felis yaml files, the path to the schema needs to be provided via the felis_schema parameter. - Otherwise, this assumes you have imported a schema.py file with SQLAlchemy definitions. + The default behavior is to assume that a schema with SQLAlchemy definitions has been imported prior to calling this function. + If instead, Felis is being used to define the schema, the path to the YAML file needs to be provided to the felis_schema parameter (as a string). Parameters ----------