From 1ee909633bed7ee7f59aab2b49959a253e8f2c15 Mon Sep 17 00:00:00 2001 From: kaseyLee123 <142427883+kaseyLee123@users.noreply.github.com> Date: Wed, 26 Nov 2025 19:09:07 -0500 Subject: [PATCH 1/3] added getting Started --- .../ingesting/getting_started_ingesting.rst | 2 +- docs/pages/ingesting/ingest_scripts.rst | 66 ----------------- .../getting_started/getting_started.rst | 70 +++++++++++++++++++ .../ingest_scripts/ingest_scripts.rst | 15 ++++ 4 files changed, 86 insertions(+), 67 deletions(-) delete mode 100644 docs/pages/ingesting/ingest_scripts.rst create mode 100644 docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst create mode 100644 docs/pages/ingesting/ingest_scripts/ingest_scripts.rst diff --git a/docs/pages/ingesting/getting_started_ingesting.rst b/docs/pages/ingesting/getting_started_ingesting.rst index 2b61d48..5989e90 100644 --- a/docs/pages/ingesting/getting_started_ingesting.rst +++ b/docs/pages/ingesting/getting_started_ingesting.rst @@ -8,7 +8,7 @@ Ingesting and Modifying Data ingesting_publications spectra/* - ingest_scripts + ingest_scripts/* diff --git a/docs/pages/ingesting/ingest_scripts.rst b/docs/pages/ingesting/ingest_scripts.rst deleted file mode 100644 index 886ac7b..0000000 --- a/docs/pages/ingesting/ingest_scripts.rst +++ /dev/null @@ -1,66 +0,0 @@ -Ingest Scripts -============== -Ingest scripts can be used to add a bunch of data to the database at once. -Often ingests are performed by reading in a file (e.g., csv) that contains -a table of data and then ingesting each row of the table into the database. -Below is an example script for ingesting sources discovered by -Rojas et al. 2012 into the SIMPLE Archive from a .csv file -that has columns named `name`, `ra`, `dec`. - -.. code-block:: python - - from astropy.io import ascii - from simple.schema import REFERENCE_TABLES - from astrodb_utils import load_astrodb, logger, AstroDBError - from astrodb_utils.sources import ingest_source - from astrodb_utils.publications import ingest_publication - - SAVE_DB = False # Set to True to write out the JSON files at the end of the script - RECREATE_DB = True # Set to True to recreate the database from the JSON files - - # Load the database - db = load_astrodb("SIMPLE.sqlite", - recreatedb=RECREATE_DB, - reference_tables=REFERENCE_TABLES, - felis_schema="simple/schema.yaml", - ) - - - def ingest_pubs(db): - # Ingest discovery publication - ingest_publication( - db, - doi="10.1088/0004-637X/748/2/93" - ) - - def ingest_sources(db): - # read the csv data into an astropy table - data_table = ascii.read(file.csv, format="csv") - - n_added = 0 - n_skipped = 0 - - for source in data_table: - try: - ingest_source( - db, - source=data_table['name'], - ra=data_table['ra'], - dec=data_table['dec'], - reference="Roja12"], - ) - logger.info(f"Source {source['name']} ingested.") - n_added += 1 - except AstroDBError as e: - logger.warning(f"Error ingesting source {source['name']}: {e}") - n_skipped += 1 - continue - - - ingest_pubs(db) - ingest_sources(db) - - logger.info(f"Added {n_added} sources, skipped {n_skipped} sources.") - - if DB_SAVE: - db.save() diff --git a/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst b/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst new file mode 100644 index 0000000..20047ee --- /dev/null +++ b/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst @@ -0,0 +1,70 @@ +Getting Started +======================= + +Ingest scripts can be used to add a bunch of data to the database at once. +Below is a snippet of code taken from a script ingesting proper motions from compilation by Zhang et al. +These lines should exist in every ingest script: + +.. code-block:: python + + logger = logging.getLogger("AstroDB") + logger.setLevel(logging.INFO) + SAVE_DB = False + RECREATE_DB = True + SCHEMA_PATH = "simple/schema.yaml" + + db = load_astrodb("SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES, felis_schema=SCHEMA_PATH) + + L6T6_link = ( + "scripts/ingests/zjzhang/L6_to_T6_benchmarks08062025.csv" + ) + + L6T6_table = ascii.read( + L6T6_link, + format="csv", + data_start=1, + header_start=0, + guess=False, + fast_reader=False, + delimiter=",", + ) + +Logging Setup +-------------------- + +When working with data ingestion scripts or database-building workflows, it's important to have a reliable way to understand what the script is doing internally. +Python's built-in logging module provides a structured system for reporting events, progress updates, and errors during execution. + +.. code-block:: python + + logger = logging.getLogger("AstroDB") + logger.setLevel(logging.INFO) + +By instantiating a logger for your script, it creates an easier way for you to track what your script is doing: database loading, ingest errors, warnings, etc. + +The line ``logger.setLevel(logging.INFO)`` configures the logger to display only log messages at level INFO or higher. +Python provides multiple logging levels, including: +- DEBUG:extremely detailed diagnostic output +- INFO: general runtime information +- WARNING: unexpected events that do not stop execution +- ERROR: serious problems that prevent part of the script from running +- CRITICAL: errors severe enough to stop execution entirely + +Database ingestion often involves multiple operations happening quickly, therefore setting the level prevents you from being flooded with low-level debugging messages. +This filters out unimportant information, making it easier to read and facilitates the process of diagnosing ingestion problems or error messages. + + +Loading the Database +------------------------- + +.. code-block:: python + + SAVE_DB = False + RECREATE_DB = True + SCHEMA_PATH = "simple/schema.yaml" + db = load_astrodb("SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES, felis_schema=SCHEMA_PATH) + + + + + diff --git a/docs/pages/ingesting/ingest_scripts/ingest_scripts.rst b/docs/pages/ingesting/ingest_scripts/ingest_scripts.rst new file mode 100644 index 0000000..5d3abe1 --- /dev/null +++ b/docs/pages/ingesting/ingest_scripts/ingest_scripts.rst @@ -0,0 +1,15 @@ +Ingest Scripts +============== + +.. toctree:: + :glob: + :maxdepth: 2 + :titlesonly: + + getting_started/* + writing_scripts/* + error_messages/* + tips/* + + + From a95b77fba7ed2deae30aae668ffdf26b455dea3a Mon Sep 17 00:00:00 2001 From: kaseyLee123 <142427883+kaseyLee123@users.noreply.github.com> Date: Wed, 26 Nov 2025 19:34:50 -0500 Subject: [PATCH 2/3] basicallt finished getting started --- .../getting_started/getting_started.rst | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst b/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst index 20047ee..b00e2c0 100644 --- a/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst +++ b/docs/pages/ingesting/ingest_scripts/getting_started/getting_started.rst @@ -64,6 +64,44 @@ Loading the Database SCHEMA_PATH = "simple/schema.yaml" db = load_astrodb("SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES, felis_schema=SCHEMA_PATH) +When loading the database, it is important to know at which stage you are running your script at. +For example, SAVE_DB will save the data files in addition to modifying the .db file. +This should only be true for when you are sure your ingest script works so you don't run into errors. +RECREATE_DB forces a full rebuild of the SIMPLE database from the data files, essentially reconstructing it from scratch. +Having this set to true will initialize a clean database for you to work off of when you are still in the beginning stages of writing your script and if you are still rerunning your script often, while setting it to false will preserve any existing data. + +Our schema path variable simply points to the YAML schema file which defines the structure of our database, including all the tables, columns, constraints, and foreign keys. +This is important for when we actually load our database so it is built with the correct structure and information. + + +Setting Up Your Data +------------------------- + +Often ingests are performed by reading in a file (e.g., csv) that contains a table of data and then ingesting each row of the table into the database. +Therefore, it is important to convert your data into a format that is easy to read in Python. + +.. code-block:: python + + L6T6_link = ( + "scripts/ingests/zjzhang/L6_to_T6_benchmarks08062025.csv" + ) + + L6T6_table = ascii.read( + L6T6_link, + format="csv", + data_start=1, + header_start=0, + guess=False, + fast_reader=False, + delimiter=",", + ) + +First, we define a variable that points to the location of our data file, in which we then use to read in our data file as an Astropy Table. +Here, we specify that our file is in csv format and provide additional parameters to ensure the file is read correctly. +For example, data_start and header_start specify which rows contain the data and the header, respectively, while delimiter indicates that the file is comma-separated. +The resulting ``L6T6_table`` variable is now an Astropy Table object that contains all the data from the csv file, which we can then loop through and ingest each row into the database. + + From 4966c0e9fbca65a9c48c96cca939abfc4a8a2caf Mon Sep 17 00:00:00 2001 From: kaseyLee123 <142427883+kaseyLee123@users.noreply.github.com> Date: Wed, 26 Nov 2025 20:07:42 -0500 Subject: [PATCH 3/3] Create writing_scripts --- .../writing_scipts/writing_scripts.rst | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 docs/pages/ingesting/ingest_scripts/writing_scipts/writing_scripts.rst diff --git a/docs/pages/ingesting/ingest_scripts/writing_scipts/writing_scripts.rst b/docs/pages/ingesting/ingest_scripts/writing_scipts/writing_scripts.rst new file mode 100644 index 0000000..6a977f7 --- /dev/null +++ b/docs/pages/ingesting/ingest_scripts/writing_scipts/writing_scripts.rst @@ -0,0 +1,74 @@ +Writing Scripts +================== + +When writing ingest scripts, there are two different ways to go about it: using existing ingest functions from `astrodb_utils` or using sqlalchemy commands. + + +Using Existing Ingest functions +--------------------------------- +Using existing ingest functions helps streamline the process of writing an ingest script. +However, only few ingest functions exist, namely for sources, names, and instruments. +Therefore, if your data fits into one of these categories, it is recommended to use the existing functions. + +Below is an example of how to use the `ingest_source` function to ingest source data into the database: + +.. code-block:: python + + for source in bones_sheet_table: + + ingest_source( + db, + source=source["NAME"], + reference=reference[1], + ra=source["RA"], + dec=source["DEC"], + ra_col_name="ra", + dec_col_name="dec", + epoch_col_name="epoch", + raise_error=True, + search_db=True, + comment="Discovery reference from the BONES archive", + ) + +Note that the basic structure for any ingest is looping through each row of your data table and appropriately ingesting each row into the database with the relevant parameters. +Each ingest function will have different required and optional parameters, so be sure to check the API documentation for more details. + + +Using SQLAlchemy Commands +--------------------------------- +If there is no existing ingest function for your data type, you can use sqlalchemy commands to directly ingest into the database. + +Below is an example of how to ingest modeled parameters data into the database using sqlalchemy commands: + +.. code-block:: python + + for row in L6T6_table: + with db.engine.connect() as conn: + conn.execute( + db.ModeledParameters.insert().values( + { + "source": L6T6_table["NAME"], + "model": L6T6_table["MODEL"], + "parameter": L6T6_table["PARAM"], + "value": L6T6_table["VAL"], + "upper_error": L6T6_table["UPP_ERR"], + "lower_error": L6T6_table["LOW_ERR"], + "unit": L6T6_table["UNIT"], + "comments": "Ingested from compilation by Zhang et al. (2020ApJ...891..171Z)", + "reference": L6T6_table["REF"] + } + ) + ) + conn.commit() + +Here, we follow the same format of looping through each row of our data table and then using insert commands to add each row into the database. + +Since there is no existing ingest function, there are a few things to keep note of. +For example, make sure to change the table name after ``db.`` to the appropriate table you are ingesting into. + +It is also important to reference the schema to ensure your code matches the database structure. +For example, make sure that the column names inside the ``values()`` method match exactly with the column names in the database schema. +Additionally, the schema, which is availible in your code under the utils folder, will indicate which columns are required versus optional (check nullable in the column you are referencing), so be sure to include all required columns in your code to avoid any errors. +Finally, make sure to commit the changes to the database after executing the command with ``conn.commit()``. + +