Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,19 @@ Below is an example AWS lambda test event that shows the format of the event tha
"is_test": true,
"rds_config": {
"records": "",
"columns": [],
"columns": "",
"where": "",
"limit": 0
"limit": -1
}
}
```
- `rds_config`: Block required to contain the query items.
- `records`: The Cumulus database table name to get records for.
- `columns`: The columns to request from the database. This will default to `*` if nothing is provided.
- `where`: A Postgresql compliant where clause.
- `limit`: The number of records to return. 0 means return all records that match the query and will default to 100 if not provided.
- `records`: The Cumulus database table name to get records for (providers, collections, rules, granules, executions, async_operations, pdrs).
- `columns`: The columns to request from the database `"column_1, column_2"`. This will default to `*` if nothing is provided.
- `where`: A Postgresql compliant where clause:
- `"granule_id LIKE '%value' AND collection_id = 'value'"`
- `"collection_id='rssmif17d3d___7' and status IN('failed', 'queued', 'running') and published = true"`.
- `limit`: The number of records to return. `-1` means return all records that match the query and will default to 100 if not provided.
- `is_test`: If true, the code will not be run as a `cumulus_task` and the input event will not go through the CMA.

The `columns`, `where`, and `limit` keys are optional.
Expand Down
259 changes: 259 additions & 0 deletions task/api_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@

async_operations_db_columns = (
'async_operation_id',
'cumulus_id',
'id',
'description',
'operation_type',
'output',
'status',
'task_arn',
'created_at',
'updated_at'
)

collections_db_columns = (
'collection_id',
'name',
'version',
'sample_file_name',
'granule_id_validation_regex',
'granule_id_extraction_regex',
'files',
'process',
'url_path',
'duplicate_handling',
'report_to_ems',
'ignore_files_config_for_discovery',
'meta',
'tags',
'created_at',
'updated_at'
)

executions_db_columns = (
'execution',
'cumulus_id',
'arn',
'async_operation_cumulus_id',
'collection_cumulus_id',
'parent_cumulus_id',
'cumulus_version',
'url',
'status',
'tasks',
'error',
'workflow_name',
'duration',
'original_payload'
)

files_db_columns = (
'files',
'cumulus_id',
'granule_cumulus_id',
'created_at',
'updated_at',
'file_size',
'bucket',
'checksum_type',
'checksum_value',
'file_name',
'key',
'path',
'source',
'type'
)


granules_db_columns = (
# 'granules.cumulus_id',
'granule_id',
'status',
'files',
# 'granules.collection_cumulus_id',
'created_at',
'updated_at',
'published',
'duration',
'time_to_archive',
'time_to_process',
'product_volume',
'error',
'cmr_link',
# 'granules.pdr_cumulus_id',
# 'granules.provider_cumulus_id',
'beginning_date_time',
'ending_date_time',
'last_update_date_time',
'processing_end_date_time',
'processing_start_date_time',
'collection_id'
)

pdrs_db_columns = (
# 'cumulus_id',
# 'collection_cumulus_id',
# 'provider_cumulus_id',
# 'execution_cumulus_id',
'status',
'name',
'progress',
'pan_sent',
'pan_message',
'stats',
'address',
'original_url',
'duration',
'timestamp',
'created_at',
'updated_at'
)

providers_db_columns = (
# 'cumulus_id',
'id',
'name',
'protocol',
'host',
'port',
'username',
'password',
'global_connection_limit',
'private_key',
'cm_key_id',
'certificate_uri',
'created_at',
'updated_at',
'allowed_redirects',
'max_download_time'
)

rules_db_columns = (
# 'cumulus_id',
'name',
'workflow',
# 'collection_cumulus_id',
# 'provider_cumulus_id',
'type',
'enabled',
'value',
'arn',
'log_event_arn',
'execution_name_prefix',
'payload',
'meta',
'tags',
'queue_url'
)

granule_model_fields = (
'beginningDateTime',
'cmrLink',
'collectionId',
'createdAt',
'duration',
'endingDateTime',
'error',
'execution',
'files',
'granuleId',
'lastUpdateDateTime',
'pdrName',
'processingEndDateTime',
'processingStartDateTime',
'productVolume',
'productionDateTime',
'provider',
'published',
'queryFields',
'status',
'timeToArchive',
'timeToPreprocess',
'timestamp',
'updatedAt'
)

def api_field_names_to_db_column_names(field_names):
db_column_names = []
for field_name in field_names:
db_column_names.append(api_field_name_to_db_column(field_name))

return db_column_names

def api_field_name_to_db_column(field_name):
db_column_name = ''
if field_name in granule_model_fields:
for character in field_name:
new_character = character
if character.isupper():
new_character = f'_{character.lower()}'
db_column_name = f'{db_column_name}{new_character}'

print(f'{field_name} -> {db_column_name}')

return db_column_name

def db_column_names_to_api_keys(column_names):
api_keys = []
for column_name in column_names:
new_str = db_column_names_to_api_keys(column_name)
api_keys.append(new_str)

return api_keys

def db_column_name_to_api_field_name(db_column_name):
new_str = ''
capitalize_character = False
for character in db_column_name:
new_character = character
if new_character == '_':
capitalize_character = True
continue
elif capitalize_character:
new_character = new_character.upper()
capitalize_character = False
else: # Case handled by new_character=character
pass
new_str = f'{new_str}{new_character}'

if new_str not in granule_model_fields:
new_str = ''

return new_str


def test_column_api():
keys = ['beginningDateTime', 'fieldTwo', 'fieldThree', 'something_else']
print(keys)
column_names = api_field_names_to_db_column_names(keys)
print(column_names)
keys = db_column_names_to_api_keys(column_names)
print(keys)

def parse_where_clause(query_str):
parsed_query = ''
query_term = ''
for character in query_str:
if character.isalnum():
query_term = f'{query_term}{character}'
else:
db_column_name = api_field_name_to_db_column(query_term)
if db_column_name:
query_term = db_column_name

parsed_query = f'{parsed_query}{query_term}{character}'
query_term = ''

parsed_query = f'{parsed_query}{query_term}'

return parsed_query

def test_parse_where_clause():
query = 'SELECT * where granules.granuleId LIKE someValue AND collectionId=anotherValue'
parsed_query = parse_where_clause(query)
print(parsed_query)


if __name__ == '__main__':
pass
Loading