Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions MSMetaEnhancer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


class Application:
def __init__(self, log_level='info', log_file=None):
def __init__(self, log_level="info", log_file=None):
self.data = None
logger.setup(log_level, log_file)

Expand All @@ -25,12 +25,12 @@ def load_data(self, filename, file_format):
:param filename: path to source spectra file
:param file_format: format of spectra
"""
if file_format in ['msp', 'mgf', 'json']:
if file_format in ["msp", "mgf", "json"]:
self.data = Spectra()
elif file_format in ['csv', 'tsv', 'tabular', 'xlsx']:
elif file_format in ["csv", "tsv", "tabular", "xlsx"]:
self.data = DataFrame()
else:
raise UnknownFileFormat(f'Format {file_format} not supported.')
raise UnknownFileFormat(f"Format {file_format} not supported.")
self.data.load_data(filename, file_format)

def save_data(self, filename, file_format):
Expand All @@ -51,12 +51,14 @@ def curate_metadata(self):
curated_metadata = Curator().curate_metadata(self.data.get_metadata())
self.data.fuse_metadata(curated_metadata)

async def annotate_spectra(self,
converters,
jobs=None,
repeat: bool = False,
monitor: Monitor = Monitor(),
annotator: Annotator = Annotator()):
async def annotate_spectra(
self,
converters,
jobs=None,
repeat: bool = False,
monitor: Monitor = Monitor(),
annotator: Annotator = Annotator(),
):
"""
Annotates current Spectra data by specified jobs.

Expand All @@ -72,9 +74,11 @@ async def annotate_spectra(self,
async with aiohttp.ClientSession() as session:
builder = ConverterBuilder()
builder.validate_converters(converters)
converters, web_converters = builder.build_converters(session, converters)
compute_converters, web_converters = builder.build_converters(
session, converters
)

annotator.set_converters(converters)
annotator.set_converters(compute_converters | web_converters)
monitor.set_converters(web_converters)

# start converters status checker and wait for first status
Expand All @@ -86,16 +90,20 @@ async def annotate_spectra(self,
if not jobs:
jobs = []
converter: Converter
for converter in converters.values():
for converter in annotator.converters.values():
jobs += converter.get_conversion_functions()
jobs = convert_to_jobs(jobs)

metadata_list = self.data.get_metadata()

logger.set_target_attributes(jobs, len(metadata_list))

results = await asyncio.gather(*[annotator.annotate(metadata, jobs, repeat)
for metadata in metadata_list])
results = await asyncio.gather(
*[
annotator.annotate(metadata, jobs, repeat)
for metadata in metadata_list
]
)
finally:
monitor.join()

Expand Down
31 changes: 24 additions & 7 deletions MSMetaEnhancer/libs/Annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@

from MSMetaEnhancer.libs.Curator import Curator
from MSMetaEnhancer.libs.utils import logger
from MSMetaEnhancer.libs.utils.Errors import TargetAttributeNotRetrieved, SourceAttributeNotAvailable, \
ServiceNotAvailable, UnknownResponse, DataAlreadyPresent
from MSMetaEnhancer.libs.utils.Errors import (
TargetAttributeNotRetrieved,
SourceAttributeNotAvailable,
ServiceNotAvailable,
UnknownResponse,
DataAlreadyPresent,
)
from MSMetaEnhancer.libs.utils.Logger import LogRecord


class Annotator:
"""
Annotator is responsible for annotation process of single spectra.
"""

def __init__(self):
self.converters = dict()
self.curator = Curator()
Expand Down Expand Up @@ -41,17 +47,28 @@ async def annotate(self, metadata, jobs, repeat=False):
for job in jobs:
if job.target not in metadata:
try:
metadata, cache = await self.execute_job_with_cache(job, metadata, cache, log)
metadata, cache = await self.execute_job_with_cache(
job, metadata, cache, log
)
if repeat:
added_metadata = True
except (SourceAttributeNotAvailable, TargetAttributeNotRetrieved) as exc:
except (
SourceAttributeNotAvailable,
TargetAttributeNotRetrieved,
) as exc:
log.update(exc, job, level=3)
except (ServiceNotAvailable, UnknownResponse) as exc:
log.update(exc, job, level=2)
except Exception:
log.update(Exception(traceback.format_exc()), job, level=1)
else:
log.update(DataAlreadyPresent(f'Requested attribute {job.target} already present.'), job, level=2)
log.update(
DataAlreadyPresent(
f"Requested attribute {job.target} already present."
),
job,
level=2,
)

logger.add_logs(log)
logger.add_coverage_after(metadata.keys())
Expand Down Expand Up @@ -85,7 +102,7 @@ async def execute_job_with_cache(self, job, metadata, cache, warning):
if job.target in cache[job.converter]:
metadata[job.target] = cache[job.converter][job.target]
else:
raise TargetAttributeNotRetrieved('No data retrieved.')
raise TargetAttributeNotRetrieved("No data retrieved.")
else:
raise ServiceNotAvailable(f'Service {job.converter} not available.')
raise ServiceNotAvailable(f"Service {job.converter} not available.")
return metadata, cache
14 changes: 9 additions & 5 deletions MSMetaEnhancer/libs/Converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class Converter(ABC):
"""
General class for conversions.
"""

def __init__(self):
self.is_available = True

Expand Down Expand Up @@ -46,13 +47,15 @@ def get_conversion_functions(self) -> list:
:return: a list of available conversion functions
"""
available_conversions = []
methods = [method_name for method_name in dir(self) if '_to_' in method_name]
methods = [method_name for method_name in dir(self) if "_to_" in method_name]
for method in methods:
available_conversions.append((*method.split('_to_'), self.converter_name))
available_conversions.append((*method.split("_to_"), self.converter_name))
return available_conversions


def create_top_level_method(obj: Converter, source: str, target: str, method: str, asynch: bool = True):
def create_top_level_method(
obj: Converter, source: str, target: str, method: str, asynch: bool = True
):
"""
Assign a new method to {obj} called {source}_to_{target} which calls {method}.

Expand All @@ -62,14 +65,15 @@ def create_top_level_method(obj: Converter, source: str, target: str, method: st
:param method: method which is called in the object with single argument
:param asynch: whether to create asynchronous methods
"""

async def async_conversion(key):
return await getattr(obj, str(method))(key)

def sync_conversion(key):
return getattr(obj, str(method))(key)

doc = f'Convert {source} to {target} using {obj.__class__.__name__} converter'
name = f'{source}_to_{target}'
doc = f"Convert {source} to {target} using {obj.__class__.__name__} converter"
name = f"{source}_to_{target}"

if asynch:
async_conversion.__doc__ = doc
Expand Down
33 changes: 21 additions & 12 deletions MSMetaEnhancer/libs/Curator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import (
is_valid_smiles, is_valid_inchi, is_valid_inchikey
is_valid_smiles,
is_valid_inchi,
is_valid_inchikey,
)
from MSMetaEnhancer.libs.utils.Errors import InvalidAttributeFormat

Expand All @@ -10,7 +12,7 @@
inchikey = "VNWKTOKETHGBQD-UHFFFAOYSA-N"

print(is_valid_smiles(smiles)) # True if valid SMILES
print(is_valid_inchi(inchi)) # True if valid InChI
print(is_valid_inchi(inchi)) # True if valid InChI
print(is_valid_inchikey(inchikey)) # True if valid InChIKey


Expand All @@ -21,6 +23,7 @@ class Curator:

Additionally, it supports metadata validation to make sure the produced data are correct.
"""

def curate_metadata(self, metadata_list):
"""
Iterates over given metadata and curates individual entries.
Expand All @@ -40,8 +43,8 @@ def curate_casno(self, metadata):
:param metadata: given metadata
:return: curated metadata
"""
if 'casno' in metadata:
metadata['casno'] = self.fix_cas_number(metadata['casno'])
if "casno" in metadata:
metadata["casno"] = self.fix_cas_number(metadata["casno"])
return metadata

@staticmethod
Expand All @@ -54,7 +57,7 @@ def fix_cas_number(cas_number):
"""
if isinstance(cas_number, str):
if "-" not in cas_number:
return f'{cas_number[:-3]}-{cas_number[-3:-1]}-{cas_number[-1]}'
return f"{cas_number[:-3]}-{cas_number[-3:-1]}-{cas_number[-1]}"
return cas_number

@staticmethod
Expand All @@ -68,20 +71,26 @@ def filter_invalid_metadata(metadata, log, job):
:return: only valid metadata
"""
filters = {
'smiles': is_valid_smiles,
'canonical_smiles': is_valid_smiles,
'isomeric_smiles': is_valid_smiles,
'inchi': is_valid_inchi,
'inchikey': is_valid_inchikey
"smiles": is_valid_smiles,
"canonical_smiles": is_valid_smiles,
"isomeric_smiles": is_valid_smiles,
"inchi": is_valid_inchi,
"inchikey": is_valid_inchikey,
}

valid_metadata = {}
for (attribute, value) in metadata.items():
for attribute, value in metadata.items():
if attribute in filters.keys():
if filters[attribute](value):
valid_metadata[attribute] = value
else:
log.update(InvalidAttributeFormat(f'Obtained {attribute} in invalid format: {value}'), job, level=2)
log.update(
InvalidAttributeFormat(
f"Obtained {attribute} in invalid format: {value}"
),
job,
level=2,
)
else:
valid_metadata[attribute] = value
return valid_metadata
3 changes: 2 additions & 1 deletion MSMetaEnhancer/libs/converters/compute/ComputeConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ class ComputeConverter(Converter):
"""
General class for computation conversion.
"""

async def convert(self, source, target, data):
return getattr(self, f'{source}_to_{target}')(data)
return getattr(self, f"{source}_to_{target}")(data)
31 changes: 19 additions & 12 deletions MSMetaEnhancer/libs/converters/compute/RDKit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ class RDKit(ComputeConverter):
"""
RDKit is a collection of chemo-informatics and machine-learning software.
"""

def __init__(self):
super().__init__()
# generate top level methods defining allowed conversions
conversions = [('smiles', 'mw', 'from_smiles'),
('canonical_smiles', 'mw', 'from_smiles'),
('isomeric_smiles', 'mw', 'from_smiles')]
conversions = [
("smiles", "mw", "from_smiles"),
("canonical_smiles", "mw", "from_smiles"),
("isomeric_smiles", "mw", "from_smiles"),
]
self.create_top_level_conversion_methods(conversions, asynch=False)

def from_smiles(self, smiles):
Expand All @@ -29,7 +32,7 @@ def from_smiles(self, smiles):
:return: computed molecular weight
"""
weight = ExactMolWt(MolFromSmiles(smiles))
return {'mw': weight}
return {"mw": weight}

def inchi_to_canonical_smiles(self, inchi):
"""
Expand All @@ -39,7 +42,7 @@ def inchi_to_canonical_smiles(self, inchi):
:return: computed canonical SMILES
"""
smiles = MolToSmiles(MolFromInchi(inchi), isomericSmiles=False)
return {'canonical_smiles': smiles}
return {"canonical_smiles": smiles}

def inchi_to_isomeric_smiles(self, inchi):
"""
Expand All @@ -49,7 +52,7 @@ def inchi_to_isomeric_smiles(self, inchi):
:return: computed isomeric SMILES
"""
smiles = MolToSmiles(MolFromInchi(inchi))
return {'isomeric_smiles': smiles}
return {"isomeric_smiles": smiles}

def formula_to_mw(self, formula):
"""
Expand All @@ -66,9 +69,13 @@ def formula_to_mw(self, formula):
continue

atom = Atom(parts[index])
multiplier = int(parts[index + 1]) if len(parts) > index + 1 and parts[index + 1].isnumeric() else 1
multiplier = (
int(parts[index + 1])
if len(parts) > index + 1 and parts[index + 1].isnumeric()
else 1
)
mass += atom.GetMass() * multiplier
return {'mw': mass}
return {"mw": mass}

def smiles_to_formula(self, smiles: str) -> dict:
"""
Expand All @@ -79,11 +86,11 @@ def smiles_to_formula(self, smiles: str) -> dict:
"""
mol = MolFromSmiles(smiles)
if mol is None:
return {'formula': ''}
return {"formula": ""}

formula = CalcMolFormula(mol)

return {'formula': formula}
return {"formula": formula}

def inchi_to_formula(self, inchi: str) -> dict:
"""
Expand All @@ -94,6 +101,6 @@ def inchi_to_formula(self, inchi: str) -> dict:
"""
mol = MolFromInchi(inchi)
if mol is None:
return {'formula': ''}
return {"formula": ""}
formula = CalcMolFormula(mol)
return {'formula': formula}
return {"formula": formula}
2 changes: 1 addition & 1 deletion MSMetaEnhancer/libs/converters/compute/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from MSMetaEnhancer.libs.converters.compute.RDKit import RDKit

__all__ = ['RDKit']
__all__ = ["RDKit"]
Loading