Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion astrodbkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# Global variables

# These describe the various database tables and their links
REFERENCE_TABLES = [
LOOKUP_TABLES = [
"Publications",
"Telescopes",
"Instruments",
Expand All @@ -39,8 +39,10 @@
"CompanionList",
"SourceTypeList",
]
REFERENCE_TABLES = LOOKUP_TABLES # prior name, for backwards compatibility
# REFERENCE_TABLES is a list of tables that do not link to the primary table.
# These are treated separately from the other data tables that are all assumed to be linked to the primary table.
# There are also known as lookup tables.
PRIMARY_TABLE = "Sources" # the primary table used for storing objects
PRIMARY_TABLE_KEY = "source" # the name of the primary key in the primary table; this is used for joining tables
FOREIGN_KEY = "source" # the name of the foreign key in other tables that refer back to the primary
25 changes: 13 additions & 12 deletions astrodbkit/astrodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from sqlalchemy.schema import CreateSchema
from tqdm import tqdm

from . import FOREIGN_KEY, PRIMARY_TABLE, PRIMARY_TABLE_KEY, REFERENCE_TABLES
from . import FOREIGN_KEY, PRIMARY_TABLE, PRIMARY_TABLE_KEY, LOOKUP_TABLES
from .spectra import load_spectrum
from .utils import datetime_json_parser, deprecated_alias, get_simbad_names, json_serializer

Expand Down Expand Up @@ -210,8 +210,8 @@ def create_database(connection_string, drop_tables=False, felis_schema=None):

if felis_schema is not None:
# Felis loader requires felis_schema
from felis.datamodel import Schema
from felis.metadata import MetaDataBuilder
from felis.datamodel import Schema # noqa: PLC0415
from felis.metadata import MetaDataBuilder # noqa: PLC0415

# Load and validate the felis-formatted schema
data = yaml.safe_load(open(felis_schema, "r"))
Expand Down Expand Up @@ -306,10 +306,11 @@ def copy_database_schema(
class Database:
"""Database handler class"""

@deprecated_alias(reference_tables="lookup_tables")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should make it so you can use either reference_tables or lookup_tables for the parameter, with the former having a deprecation warning.

def __init__(
self,
connection_string,
reference_tables=REFERENCE_TABLES,
lookup_tables=LOOKUP_TABLES,
primary_table=PRIMARY_TABLE,
primary_table_key=PRIMARY_TABLE_KEY,
foreign_key=FOREIGN_KEY,
Expand All @@ -325,9 +326,9 @@ def __init__(
----------
connection_string : str
Connection string to establish a database connection
reference_tables : list
List of reference tables; these are treated separately from data tables.
Default: ['Publications', 'Telescopes', 'Instruments']
lookup_tables : list
List of lookup tables; these are treated separately from data tables as they represent many-to-many relationships (eg, filter or telescope names).
See __init__.LOOKUP_TABLES for the default.
primary_table : str
Name of the primary source table. Default: Sources
primary_table_key : str
Expand Down Expand Up @@ -369,7 +370,7 @@ def __init__(
with self.engine.connect() as conn:
self.metadata.reflect(conn)

self._reference_tables = reference_tables
self._lookup_tables = lookup_tables
self._primary_table = primary_table
self._primary_table_key = primary_table_key
self._foreign_key = foreign_key
Expand Down Expand Up @@ -482,7 +483,7 @@ def inventory(self, name, pretty_print=False):
# Loop over tables (not reference tables) and gather the information. Start with the primary table, though
self._inventory_query(data_dict, self._primary_table, name)
for table in self.metadata.tables:
if table in self._reference_tables + [self._primary_table]:
if table in self._lookup_tables + [self._primary_table]:
continue
self._inventory_query(data_dict, table, name)

Expand Down Expand Up @@ -851,7 +852,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct

# Output reference tables
print(f"Storing reference tables to {os.path.join(directory, reference_directory)}...")
for table in self._reference_tables:
for table in self._lookup_tables:
# Skip reference tables that are not actually in the database
if table not in self.metadata.tables.keys():
continue
Expand Down Expand Up @@ -995,7 +996,7 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory
conn.execute(self.metadata.tables[table.name].delete())

# Load reference tables first
for table in self._reference_tables:
for table in self._lookup_tables:
if verbose:
print(f"Loading {table} table")
# Check if the reference table is in the sub-directory
Expand All @@ -1018,7 +1019,7 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory
for file in tqdm(os.listdir(directory_of_sources)):
# Skip reference tables
core_name = file.replace(".json", "")
if core_name in self._reference_tables:
if core_name in self._lookup_tables:
continue

# Skip non-JSON files or hidden files
Expand Down
30 changes: 16 additions & 14 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ For example, the `SIMPLE database <https://github.com/SIMPLE-AstroDB/SIMPLE-db>`

- the primary Sources table, with coordinate information for each target
- several object data tables, like Photometry, Spectra, etc, that contain information for each target
- reference tables, like Publications, Telescopes, etc, that list other information that is used throughout the database, but doesn't refer to a particular target
- lookup tables, like Publications, Telescopes, etc, that list other information that is used throughout the database, but doesn't refer to a particular target. These are sometimes referred to as lookup or reference tables.

The goal of **AstrodbKit** is to link together the object tables together in order
to express them as a single entity, while still retaining the information for other reference tables.
to express them as a single entity, while still retaining the information for other lookup tables.
**AstrodbKit** can read and write out an entire target's data as a single JSON file for ease of transport and version
control. Reference tables are also written as JSON files, but organized differently-
control. Lookup tables are also written as JSON files, but organized differently-
a single file per table with multiple records.
An **AstrodbKit**-supported database can thus be exported to two types of JSON files:
individual target files and reference table files
individual target files and lookup table files
If your database is constructed in a similar fashion, it will work well with **AstrodbKit**.
Other databases can still benefit from some of the functionality of **AstrodbKit**,
but they might not work properly if attempting to use the save/load methods.
Expand Down Expand Up @@ -90,30 +90,32 @@ then initialize the database with the :py:class:`astrodbkit.astrodb.Database()`
The database is now read to be used. If the database is empty, see below how to populate it.

.. note:: The :py:class:`astrodbkit.astrodb.Database()` class has many parameters that can be set to
control the names of primary/reference tables. By default, these match the SIMPLE database, but users can
control the names of primary/lookup tables. By default, these match the SIMPLE database, but users can
configure them for their own needs and can pass them here or modify their __init__.py file.


When using PostgreSQL databases, it may be useful to pass along connection_arguments that specify the schema to use. For example::

CONNECTION_STRING = "postgresql+psycopg2://user:password@server:port/database"
REFERENCE_TABLES = [
LOOKUP_TABLES = [
"Publications",
"Surveys",
]

db = Database(CONNECTION_STRING,
reference_tables=REFERENCE_TABLES,
lookup_tables=LOOKUP_TABLES,
connection_arguments={'options': '-csearch_path=my_schema'}
)
# This will use my_schema as the default schema for this connection

.. note:: For historical reasons, lookup tables are referred internally as reference tables.

Loading the Database
--------------------

**Astrodbkit2** contains methods to output the full contents of the database as a list of JSON files.
It can likewise read in a directory of these files to populate the database.
By default, reference tables (eg, Publications, Telescopes, etc) and source tables are respectively stored in `reference/` and `source/` sub-directories of `data/`.
By default, lookup tables (eg, Publications, Telescopes, etc) and source tables are respectively stored in `reference/` and `source/` sub-directories of `data/`.
This is how SIMPLE is currently version controlled.

To load a database of this form, do the following::
Expand Down Expand Up @@ -271,7 +273,7 @@ Full String Search
Similar to the Identifier Search above, one can perform a case-insensitive search for
any string against every string column in the database with :py:meth:`~astrodbkit.astrodb.Database.search_string`.
The output is a dictionary with keys for each table that matched results.
This can be useful to find all results matching a particular reference regardless of table::
This can be useful to find all results matching a particular publication regardless of table::

db.search_string('twa') # search for any records with 'twa' anywhere in the database
db.search_string('Cruz18', fuzzy_search=False) # search for strings exactly matching Cruz19 anywhere in the database
Expand Down Expand Up @@ -438,8 +440,8 @@ Saving the Database
===================

If users perform changes to a database, they will want to output this to disk to be version controlled.
**Astrodbkit** provides methods to save an individual source or reference table as well as all of the data stored in the database.
By default, reference tables are stored in a sub-directory of `data/` called "reference"; this can be overwritten by
**Astrodbkit** provides methods to save an individual source or lookup table as well as all of the data stored in the database.
By default, lookup/reference tables are stored in a sub-directory of `data/` called "reference"; this can be overwritten by
supplying a `reference_directory` variable into `save_database` or `save_reference_table`.
Similarly, source/object tables are stored in a sub-directory of `data/` called "source" which can be overwritten by supplying a `source_directory` variable.

Expand All @@ -448,7 +450,7 @@ We recommend using `save_database` as that outputs the entire database contents
# Save single object
db.save_json('2MASS J13571237+1428398', 'data')

# Save single reference table
# Save single lookup/reference table
db.save_reference_table('Publications', 'data')

# Save entire database to directory 'data/' with 'reference/' and 'source/' subdirectories.
Expand Down Expand Up @@ -517,7 +519,7 @@ Here we provide useful tips or guidance when working with **AstrodbKit**.
Handling Relationships Between Object Tables
--------------------------------------------

Becuase **AstrodbKit** expects a single primary table, object tables that point back to it, and any number of reference tables, it can be difficult to handle relationships between object tables.
Becuase **AstrodbKit** expects a single primary table, object tables that point back to it, and any number of lookup/reference tables, it can be difficult to handle relationships between object tables.

As an example, consider the scenario where you want to store companion information to your sources, such as a table to store the relationship with orbital separation and a separate one to store general parameters.
You may be calling these CompanionRelationship and CompanionParameters, respectively.
Expand All @@ -527,7 +529,7 @@ You might find it attempting to load CompanionParameters only to find that Compa

The better approach is to define a lookup table that will store the companion identifiers which will be used as the foreign keys.
For example, a CompanionList table that both CompanionParameters and CompanionRelationship can refer to.
This would be a reference table, similar to Telescopes or Publications, while CompanionParameters and CompanionRelationship would be object tables that require tying back to a specific source in the Sources table.
This would be a lookup table, similar to Telescopes or Publications, while CompanionParameters and CompanionRelationship would be object tables that require tying back to a specific source in the Sources table.
Essentially, this is normalizing the database a bit further and serves to avoid some common issues with foreign keys.

Reference/API
Expand Down