Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions isamples_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.1'
124 changes: 124 additions & 0 deletions isamples_api/controlled_vocabulary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import logging
from typing import Optional, Any

import requests

from isamples_api.metadata_constants import METADATA_LABEL, METADATA_IDENTIFIER


# Inherit from dict in order to make this class JSON serializable
class VocabularyTerm(dict):
def __init__(self, key: Optional[str], label: str, uri: Optional[str]):
self.key = key
self.label = label
self.uri = uri
super().__init__(self.metadata_dict())

def metadata_dict(self) -> dict[str, str]:
metadata_dict = {
METADATA_LABEL: self.label
}
if self.uri is not None:
metadata_dict[METADATA_IDENTIFIER] = self.uri
return metadata_dict


class ControlledVocabulary:
def __init__(self, uijson_dict: dict[str, Any], key_prefix: str):
self.vocabulary_terms_by_key: dict[str, VocabularyTerm] = {}
self.vocabulary_terms_by_label: dict[str, VocabularyTerm] = {}
self.vocabulary_terms_by_uri: dict[str, VocabularyTerm] = {}
self._uijson_dict = uijson_dict
self._key_prefix = key_prefix
self._is_first = True
self._process_uijson_dict(uijson_dict)

def _term_key_for_label(self, label: str):
return f"{self._key_prefix}:{label}"

def _process_uijson_dict(self, uijson_dict: dict[str, Any]):
for dict_key, value in uijson_dict.items():
# structure looks like this:
"""
"https://w3id.org/isample/vocabulary/material/1.0/material":
{
"label":
{
"en": "Material"
},
"children":
[
"""
uri = dict_key
label = value.get("label").get("en")
last_piece_of_uri = dict_key.rsplit("/", 1)[-1]
term_key = self._term_key_for_label(last_piece_of_uri)
term = VocabularyTerm(term_key, label, uri)
# There's a mix of callers that use both namespaced and non-namespaced keys to look terms up.
# We should support both, e.g. "biogenicnonorganicmaterial" and "spec:biogenicnonorganicmaterial"
self.vocabulary_terms_by_key[term_key.lower()] = term
self.vocabulary_terms_by_key[last_piece_of_uri] = term
self.vocabulary_terms_by_label[label.lower()] = term
self.vocabulary_terms_by_uri[uri] = term
if self._is_first:
self._root_term = term
self._is_first = False
for child in value.get("children"):
self._process_uijson_dict(child)

def root_term(self) -> VocabularyTerm:
return self._root_term

def term_for_key(self, key: str) -> VocabularyTerm:
term = self.vocabulary_terms_by_key.get(key.lower())
if term is None:
term = self.vocabulary_terms_by_label.get(self._term_key_for_label(key.lower()))
if term is None:
logging.warning(f"Unable to look up vocabulary term for key {key}, returning root term instead.")
term = self.root_term()
return term

def term_for_label(self, label: str) -> VocabularyTerm:
term = self.vocabulary_terms_by_label.get(label.lower())
if term is None:
# There are cases where we may already have the uri, allow those through
term = self.vocabulary_terms_by_uri.get(label.lower())
if term is None:
term = self.vocabulary_terms_by_key.get(label.lower())
if term is None:
logging.warning(f"Unable to look up vocabulary term for label {label}, returning root term instead.")
term = self.root_term()
return term

@staticmethod
def _fetch_uijson_from_uri(uri: str) -> dict:
response = requests.get(uri)
return response.json()

MATERIAL_SAMPLE_OBJECT_TYPE = None
MATERIAL_TYPE = None
SAMPLED_FEATURE_TYPE = None

@staticmethod
def material_sample_object_type() -> "ControlledVocabulary":
if ControlledVocabulary.MATERIAL_SAMPLE_OBJECT_TYPE is None:
uijson = ControlledVocabulary._fetch_uijson_from_uri("https://central.isample.xyz/isamples_central/vocabulary/material_sample_type")
assert uijson is not None
ControlledVocabulary.MATERIAL_SAMPLE_OBJECT_TYPE = ControlledVocabulary(uijson, "spec")
return ControlledVocabulary.MATERIAL_SAMPLE_OBJECT_TYPE

@staticmethod
def material_type() -> "ControlledVocabulary":
if ControlledVocabulary.MATERIAL_TYPE is None:
uijson = ControlledVocabulary._fetch_uijson_from_uri("https://central.isample.xyz/isamples_central/vocabulary/material_type")
assert uijson is not None
ControlledVocabulary.MATERIAL_TYPE = ControlledVocabulary(uijson, "mat")
return ControlledVocabulary.MATERIAL_TYPE

@staticmethod
def sampled_feature_type() -> "ControlledVocabulary":
if ControlledVocabulary.SAMPLED_FEATURE_TYPE is None:
uijson = ControlledVocabulary._fetch_uijson_from_uri("https://central.isample.xyz/isamples_central/vocabulary/sampled_feature_type")
assert uijson is not None
ControlledVocabulary.SAMPLED_FEATURE_TYPE = ControlledVocabulary(uijson, "sf")
return ControlledVocabulary.SAMPLED_FEATURE_TYPE
46 changes: 46 additions & 0 deletions isamples_api/metadata_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
String constants used in metadata transformation
"""

METADATA_SCHEMA = "$schema"
METADATA_AT_ID = "@id"
METADATA_LABEL = "label"
METADATA_IDENTIFIER = "identifier"
METADATA_SAMPLE_IDENTIFIER = "sample_identifier"
METADATA_DESCRIPTION = "description"
METADATA_HAS_CONTEXT_CATEGORY = "has_context_category"
METADATA_HAS_CONTEXT_CATEGORY_CONFIDENCE = "has_context_category_confidence"
METADATA_HAS_MATERIAL_CATEGORY = "has_material_category"
METADATA_HAS_MATERIAL_CATEGORY_CONFIDENCE = "has_material_category_confidence"
METADATA_HAS_SAMPLE_OBJECT_TYPE = "has_sample_object_type"
METADATA_HAS_SAMPLE_OBJECT_TYPE_CONFIDENCE = "has_sample_object_type_confidence"
METADATA_INFORMAL_CLASSIFICATION = "informal_classification"
METADATA_KEYWORDS = "keywords"
METADATA_KEYWORD = "keyword"
METADATA_SCHEME_NAME = "scheme_name"
METADATA_KEYWORD_URI = "keyword_uri"
METADATA_PRODUCED_BY = "produced_by"
METADATA_RESPONSIBILITY = "responsibility"
METADATA_HAS_FEATURE_OF_INTEREST = "has_feature_of_interest"
METADATA_RESULT_TIME = "result_time"
METADATA_SAMPLING_SITE = "sampling_site"
METADATA_SAMPLE_LOCATION = "sample_location"
METADATA_LOCATION = "location"
METADATA_ELEVATION = "elevation"
METADATA_LATITUDE = "latitude"
METADATA_LONGITUDE = "longitude"
METADATA_PLACE_NAME = "place_name"
METADATA_SUBSAMPLE = "subsample"
METADATA_REGISTRANT = "registrant"
METADATA_SAMPLING_PURPOSE = "sampling_purpose"
METADATA_CURATION = "curation"
METADATA_ACCESS_CONSTRAINTS = "access_constraints"
METADATA_CURATION_LOCATION = "curation_location"
METADATA_RELATED_RESOURCE = "related_resource"
METADATA_RELATIONSHIP = "relationship"
METADATA_TARGET = "target"
METADATA_AUTHORIZED_BY = "authorized_by"
METADATA_COMPLIES_WITH = "complies_with"
METADATA_ROLE = "role"
METADATA_NAME = "name"
METADATA_LAST_MODIFIED_TIME = "last_modified_time"
196 changes: 196 additions & 0 deletions isamples_api/transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
from abc import ABC, abstractmethod
from typing import Optional, SupportsFloat

from isamples_api.controlled_vocabulary import VocabularyTerm
from isamples_api.metadata_constants import METADATA_AT_ID, METADATA_LABEL, METADATA_SAMPLE_IDENTIFIER, \
METADATA_DESCRIPTION, METADATA_HAS_CONTEXT_CATEGORY, METADATA_HAS_MATERIAL_CATEGORY, \
METADATA_HAS_SAMPLE_OBJECT_TYPE, METADATA_INFORMAL_CLASSIFICATION, METADATA_KEYWORDS, METADATA_PRODUCED_BY, \
METADATA_HAS_FEATURE_OF_INTEREST, METADATA_RESPONSIBILITY, METADATA_RESULT_TIME, METADATA_SAMPLING_SITE, \
METADATA_SAMPLE_LOCATION, METADATA_ELEVATION, METADATA_LATITUDE, METADATA_LONGITUDE, METADATA_PLACE_NAME, \
METADATA_REGISTRANT, METADATA_SAMPLING_PURPOSE, METADATA_CURATION, METADATA_ACCESS_CONSTRAINTS, \
METADATA_CURATION_LOCATION, METADATA_RELATED_RESOURCE, METADATA_AUTHORIZED_BY, METADATA_COMPLIES_WITH


class AbstractTransformer(ABC):

def transform(self) -> dict:
"""Do the actual work of transforming a source record into an iSamples record.

Arguments:
sample -- The Sesar record to be transformed
Return value:
The record transformed into an iSamples record
"""
context_categories = self.has_context_categories()
material_categories = self.has_material_categories()
material_sample_object_type_categories = self.has_material_sample_object_type_categories()
transformed_record = {
"$schema": "iSamplesSchemaCore1.0.json",
METADATA_AT_ID: self.id_string(),
METADATA_LABEL: self.sample_label(),
METADATA_SAMPLE_IDENTIFIER: self.sample_identifier_string(),
METADATA_DESCRIPTION: self.sample_description(),
METADATA_HAS_CONTEXT_CATEGORY: context_categories,
# "hasContextCategoryConfidence": self.has_context_category_confidences(context_categories),
METADATA_HAS_MATERIAL_CATEGORY: material_categories,
# "hasMaterialCategoryConfidence": self.has_material_category_confidences(material_categories),
METADATA_HAS_SAMPLE_OBJECT_TYPE: material_sample_object_type_categories,
# "hasSpecimenCategoryConfidence": self.has_specimen_category_confidences(specimen_categories),
METADATA_INFORMAL_CLASSIFICATION: self.informal_classification(),
METADATA_KEYWORDS: self.keywords(),
METADATA_PRODUCED_BY: {
METADATA_AT_ID: self.produced_by_id_string(),
METADATA_LABEL: self.produced_by_label(),
METADATA_DESCRIPTION: self.produced_by_description(),
METADATA_HAS_FEATURE_OF_INTEREST: self.produced_by_feature_of_interest(),
METADATA_RESPONSIBILITY: self.produced_by_responsibilities(),
METADATA_RESULT_TIME: self.produced_by_result_time(),
METADATA_SAMPLING_SITE: {
METADATA_DESCRIPTION: self.sampling_site_description(),
METADATA_LABEL: self.sampling_site_label(),
METADATA_SAMPLE_LOCATION: {
METADATA_ELEVATION: self.sampling_site_elevation(),
METADATA_LATITUDE: self.sampling_site_latitude(),
METADATA_LONGITUDE: self.sampling_site_longitude(),
},
METADATA_PLACE_NAME: self.sampling_site_place_names(),
},
},
METADATA_REGISTRANT: self.sample_registrant(),
METADATA_SAMPLING_PURPOSE: self.sample_sampling_purpose(),
METADATA_CURATION: {
METADATA_LABEL: self.curation_label(),
METADATA_DESCRIPTION: self.curation_description(),
METADATA_ACCESS_CONSTRAINTS: self.curation_access_constraints(),
METADATA_CURATION_LOCATION: self.curation_location(),
METADATA_RESPONSIBILITY: self.curation_responsibility(),
},
METADATA_RELATED_RESOURCE: self.related_resources(),
METADATA_AUTHORIZED_BY: self.authorized_by(),
METADATA_COMPLIES_WITH: self.complies_with(),
}
return transformed_record

@abstractmethod
def has_context_categories(self) -> list[VocabularyTerm]:
pass

@abstractmethod
def has_material_categories(self) -> list[VocabularyTerm]:
pass

@abstractmethod
def has_material_sample_object_type_categories(self) -> list[VocabularyTerm]:
pass

@abstractmethod
def id_string(self) -> str:
pass

@abstractmethod
def sample_label(self) -> str:
pass

@abstractmethod
def sample_identifier_string(self) -> str:
pass

@abstractmethod
def sample_description(self) -> str:
pass

@abstractmethod
def informal_classification(self) -> list[str]:
pass

@abstractmethod
def keywords(self) -> list[dict[str, str]]:
pass

@abstractmethod
def produced_by_id_string(self) -> str:
pass

@abstractmethod
def produced_by_label(self) -> str:
pass

@abstractmethod
def produced_by_description(self) -> str:
pass

@abstractmethod
def produced_by_feature_of_interest(self) -> str:
pass

@abstractmethod
def produced_by_responsibilities(self) -> list[dict[str, str]]:
pass

@abstractmethod
def produced_by_result_time(self) -> str:
pass

@abstractmethod
def sampling_site_description(self) -> str:
pass

@abstractmethod
def sampling_site_label(self) -> str:
pass

@abstractmethod
def sampling_site_elevation(self) -> str:
pass

@abstractmethod
def sampling_site_latitude(self) -> Optional[SupportsFloat]:
pass

@abstractmethod
def sampling_site_longitude(self) -> Optional[SupportsFloat]:
pass

@abstractmethod
def sampling_site_place_names(self) -> list[str]:
pass

@abstractmethod
def sample_registrant(self) -> str:
pass

@abstractmethod
def sample_sampling_purpose(self) -> str:
pass

@abstractmethod
def curation_label(self) -> str:
pass

@abstractmethod
def curation_description(self) -> str:
pass

@abstractmethod
def curation_access_constraints(self) -> list[str]:
pass

@abstractmethod
def curation_location(self) -> str:
pass

@abstractmethod
def curation_responsibility(self) -> list[dict[str, str]]:
pass

@abstractmethod
def related_resources(self) -> list[dict]:
pass

@abstractmethod
def authorized_by(self) -> list[str]:
pass

@abstractmethod
def complies_with(self) -> list[str]:
pass
Loading