diff --git a/astrodbkit/__init__.py b/astrodbkit/__init__.py index c6f5412..dcfa77f 100644 --- a/astrodbkit/__init__.py +++ b/astrodbkit/__init__.py @@ -22,7 +22,7 @@ # Global variables # These describe the various database tables and their links -REFERENCE_TABLES = [ +LOOKUP_TABLES = [ "Publications", "Telescopes", "Instruments", @@ -39,8 +39,10 @@ "CompanionList", "SourceTypeList", ] +REFERENCE_TABLES = LOOKUP_TABLES # prior name, for backwards compatibility # REFERENCE_TABLES is a list of tables that do not link to the primary table. # These are treated separately from the other data tables that are all assumed to be linked to the primary table. +# There are also known as lookup tables. PRIMARY_TABLE = "Sources" # the primary table used for storing objects PRIMARY_TABLE_KEY = "source" # the name of the primary key in the primary table; this is used for joining tables FOREIGN_KEY = "source" # the name of the foreign key in other tables that refer back to the primary diff --git a/astrodbkit/astrodb.py b/astrodbkit/astrodb.py index 085d098..50fae9d 100644 --- a/astrodbkit/astrodb.py +++ b/astrodbkit/astrodb.py @@ -21,7 +21,7 @@ from sqlalchemy.schema import CreateSchema from tqdm import tqdm -from . import FOREIGN_KEY, PRIMARY_TABLE, PRIMARY_TABLE_KEY, REFERENCE_TABLES +from . import FOREIGN_KEY, PRIMARY_TABLE, PRIMARY_TABLE_KEY, LOOKUP_TABLES from .spectra import load_spectrum from .utils import datetime_json_parser, deprecated_alias, get_simbad_names, json_serializer @@ -210,8 +210,8 @@ def create_database(connection_string, drop_tables=False, felis_schema=None): if felis_schema is not None: # Felis loader requires felis_schema - from felis.datamodel import Schema - from felis.metadata import MetaDataBuilder + from felis.datamodel import Schema # noqa: PLC0415 + from felis.metadata import MetaDataBuilder # noqa: PLC0415 # Load and validate the felis-formatted schema data = yaml.safe_load(open(felis_schema, "r")) @@ -306,10 +306,11 @@ def copy_database_schema( class Database: """Database handler class""" + @deprecated_alias(reference_tables="lookup_tables") def __init__( self, connection_string, - reference_tables=REFERENCE_TABLES, + lookup_tables=LOOKUP_TABLES, primary_table=PRIMARY_TABLE, primary_table_key=PRIMARY_TABLE_KEY, foreign_key=FOREIGN_KEY, @@ -325,9 +326,9 @@ def __init__( ---------- connection_string : str Connection string to establish a database connection - reference_tables : list - List of reference tables; these are treated separately from data tables. - Default: ['Publications', 'Telescopes', 'Instruments'] + lookup_tables : list + List of lookup tables; these are treated separately from data tables as they represent many-to-many relationships (eg, filter or telescope names). + See __init__.LOOKUP_TABLES for the default. primary_table : str Name of the primary source table. Default: Sources primary_table_key : str @@ -369,7 +370,7 @@ def __init__( with self.engine.connect() as conn: self.metadata.reflect(conn) - self._reference_tables = reference_tables + self._lookup_tables = lookup_tables self._primary_table = primary_table self._primary_table_key = primary_table_key self._foreign_key = foreign_key @@ -482,7 +483,7 @@ def inventory(self, name, pretty_print=False): # Loop over tables (not reference tables) and gather the information. Start with the primary table, though self._inventory_query(data_dict, self._primary_table, name) for table in self.metadata.tables: - if table in self._reference_tables + [self._primary_table]: + if table in self._lookup_tables + [self._primary_table]: continue self._inventory_query(data_dict, table, name) @@ -851,7 +852,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct # Output reference tables print(f"Storing reference tables to {os.path.join(directory, reference_directory)}...") - for table in self._reference_tables: + for table in self._lookup_tables: # Skip reference tables that are not actually in the database if table not in self.metadata.tables.keys(): continue @@ -995,7 +996,7 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory conn.execute(self.metadata.tables[table.name].delete()) # Load reference tables first - for table in self._reference_tables: + for table in self._lookup_tables: if verbose: print(f"Loading {table} table") # Check if the reference table is in the sub-directory @@ -1018,7 +1019,7 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory for file in tqdm(os.listdir(directory_of_sources)): # Skip reference tables core_name = file.replace(".json", "") - if core_name in self._reference_tables: + if core_name in self._lookup_tables: continue # Skip non-JSON files or hidden files diff --git a/docs/index.rst b/docs/index.rst index 632e23a..e2009df 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,15 +22,15 @@ For example, the `SIMPLE database ` - the primary Sources table, with coordinate information for each target - several object data tables, like Photometry, Spectra, etc, that contain information for each target - - reference tables, like Publications, Telescopes, etc, that list other information that is used throughout the database, but doesn't refer to a particular target + - lookup tables, like Publications, Telescopes, etc, that list other information that is used throughout the database, but doesn't refer to a particular target. These are sometimes referred to as lookup or reference tables. The goal of **AstrodbKit** is to link together the object tables together in order -to express them as a single entity, while still retaining the information for other reference tables. +to express them as a single entity, while still retaining the information for other lookup tables. **AstrodbKit** can read and write out an entire target's data as a single JSON file for ease of transport and version -control. Reference tables are also written as JSON files, but organized differently- +control. Lookup tables are also written as JSON files, but organized differently- a single file per table with multiple records. An **AstrodbKit**-supported database can thus be exported to two types of JSON files: -individual target files and reference table files +individual target files and lookup table files If your database is constructed in a similar fashion, it will work well with **AstrodbKit**. Other databases can still benefit from some of the functionality of **AstrodbKit**, but they might not work properly if attempting to use the save/load methods. @@ -90,30 +90,32 @@ then initialize the database with the :py:class:`astrodbkit.astrodb.Database()` The database is now read to be used. If the database is empty, see below how to populate it. .. note:: The :py:class:`astrodbkit.astrodb.Database()` class has many parameters that can be set to - control the names of primary/reference tables. By default, these match the SIMPLE database, but users can + control the names of primary/lookup tables. By default, these match the SIMPLE database, but users can configure them for their own needs and can pass them here or modify their __init__.py file. When using PostgreSQL databases, it may be useful to pass along connection_arguments that specify the schema to use. For example:: CONNECTION_STRING = "postgresql+psycopg2://user:password@server:port/database" - REFERENCE_TABLES = [ + LOOKUP_TABLES = [ "Publications", "Surveys", ] db = Database(CONNECTION_STRING, - reference_tables=REFERENCE_TABLES, + lookup_tables=LOOKUP_TABLES, connection_arguments={'options': '-csearch_path=my_schema'} ) # This will use my_schema as the default schema for this connection +.. note:: For historical reasons, lookup tables are referred internally as reference tables. + Loading the Database -------------------- **Astrodbkit2** contains methods to output the full contents of the database as a list of JSON files. It can likewise read in a directory of these files to populate the database. -By default, reference tables (eg, Publications, Telescopes, etc) and source tables are respectively stored in `reference/` and `source/` sub-directories of `data/`. +By default, lookup tables (eg, Publications, Telescopes, etc) and source tables are respectively stored in `reference/` and `source/` sub-directories of `data/`. This is how SIMPLE is currently version controlled. To load a database of this form, do the following:: @@ -271,7 +273,7 @@ Full String Search Similar to the Identifier Search above, one can perform a case-insensitive search for any string against every string column in the database with :py:meth:`~astrodbkit.astrodb.Database.search_string`. The output is a dictionary with keys for each table that matched results. -This can be useful to find all results matching a particular reference regardless of table:: +This can be useful to find all results matching a particular publication regardless of table:: db.search_string('twa') # search for any records with 'twa' anywhere in the database db.search_string('Cruz18', fuzzy_search=False) # search for strings exactly matching Cruz19 anywhere in the database @@ -438,8 +440,8 @@ Saving the Database =================== If users perform changes to a database, they will want to output this to disk to be version controlled. -**Astrodbkit** provides methods to save an individual source or reference table as well as all of the data stored in the database. -By default, reference tables are stored in a sub-directory of `data/` called "reference"; this can be overwritten by +**Astrodbkit** provides methods to save an individual source or lookup table as well as all of the data stored in the database. +By default, lookup/reference tables are stored in a sub-directory of `data/` called "reference"; this can be overwritten by supplying a `reference_directory` variable into `save_database` or `save_reference_table`. Similarly, source/object tables are stored in a sub-directory of `data/` called "source" which can be overwritten by supplying a `source_directory` variable. @@ -448,7 +450,7 @@ We recommend using `save_database` as that outputs the entire database contents # Save single object db.save_json('2MASS J13571237+1428398', 'data') - # Save single reference table + # Save single lookup/reference table db.save_reference_table('Publications', 'data') # Save entire database to directory 'data/' with 'reference/' and 'source/' subdirectories. @@ -517,7 +519,7 @@ Here we provide useful tips or guidance when working with **AstrodbKit**. Handling Relationships Between Object Tables -------------------------------------------- -Becuase **AstrodbKit** expects a single primary table, object tables that point back to it, and any number of reference tables, it can be difficult to handle relationships between object tables. +Becuase **AstrodbKit** expects a single primary table, object tables that point back to it, and any number of lookup/reference tables, it can be difficult to handle relationships between object tables. As an example, consider the scenario where you want to store companion information to your sources, such as a table to store the relationship with orbital separation and a separate one to store general parameters. You may be calling these CompanionRelationship and CompanionParameters, respectively. @@ -527,7 +529,7 @@ You might find it attempting to load CompanionParameters only to find that Compa The better approach is to define a lookup table that will store the companion identifiers which will be used as the foreign keys. For example, a CompanionList table that both CompanionParameters and CompanionRelationship can refer to. -This would be a reference table, similar to Telescopes or Publications, while CompanionParameters and CompanionRelationship would be object tables that require tying back to a specific source in the Sources table. +This would be a lookup table, similar to Telescopes or Publications, while CompanionParameters and CompanionRelationship would be object tables that require tying back to a specific source in the Sources table. Essentially, this is normalizing the database a bit further and serves to avoid some common issues with foreign keys. Reference/API