From edc05baa7f970651153170980a3f068c6bfcdf4c Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 17 Jan 2023 00:45:22 -0600 Subject: [PATCH 001/146] Minor fixes and adding some utility functionality --- modelseedpy/core/msmodelutl.py | 26 ++++++++++++++++++++++++ modelseedpy/core/mstemplate.py | 37 +++++++++++++++------------------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index d4494938..af499773 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -4,6 +4,7 @@ import time import json import sys +import pandas as pd from cobra import Model, Reaction, Metabolite from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem @@ -306,6 +307,31 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): print(len(output)) self.model.add_reactions(output) return output + + ################################################################################# + # Functions related to utility functions + ################################################################################# + def build_model_data_hash(self): + data = { + "Model":self.id, + "Genome":self.genome.info.metadata["Name"], + "Genes":self.genome.info.metadata["Number of Protein Encoding Genes"], + + } + return data + + def compare_reactions(self, reaction_list,filename): + data = {} + for rxn in reaction_list: + for met in rxn.metabolites: + if met.id not in data: + data[met.id] = {} + for other_rxn in reaction_list: + data[met.id][other_rxn.id] = 0 + data[met.id][rxn.id] = rxn.metabolites[met] + df = pd.DataFrame(data) + df = df.transpose() + df.to_csv(filename) ################################################################################# # Functions related to managing biomass reactions diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 51dc2e38..1814e774 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -640,7 +640,7 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): return model.metabolites.get_by_id(fullid) if tempid in self._template.compcompounds: met = self._template.compcompounds.get_by_id(tempid).to_metabolite(index) - model.metabolites.add(met) + model.add_metabolites([met]) return met logger.error( "Could not find biomass metabolite [%s] in model or template!", @@ -658,13 +658,13 @@ def get_or_create_reaction(self, model, baseid, compartment=None, index=None): return model.reactions.get_by_id(fullid) if tempid in self._template.reactions: rxn = self._template.reactions.get_by_id(tempid).to_reaction(model, index) - model.reactions.add(rxn) + model.add_reactions([rxn]) return rxn newrxn = Reaction(fullid, fullid, "biomasses", 0, 1000) - model.reactions.add(newrxn) + model.add_reactions(newrxn) return newrxn - def build_biomass(self, model, index="0", classic=False, GC=0.5): + def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=True): types = [ "cofactor", "lipid", @@ -700,7 +700,8 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) - specific_reactions["dna"].metabolites[met] = 1 + specific_reactions["dna"].add_metabolites({met:1}) + metabolites[met] = 1 metabolites[met] = -1 * self.dna if not classic and self.protein > 0: met = self.get_or_create_metabolite(model, "cpd11463", "c", index) @@ -710,7 +711,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) - specific_reactions["protein"].metabolites[met] = 1 + specific_reactions["protein"].add_metabolites({met:1}) metabolites[met] = -1 * self.protein if not classic and self.rna > 0: met = self.get_or_create_metabolite(model, "cpd11462", "c", index) @@ -720,7 +721,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) - specific_reactions["rna"].metabolites[met] = 1 + specific_reactions["rna"].add_metabolites({met:1}) metabolites[met] = -1 * self.rna bio_type_hash = {} for type in types: @@ -752,13 +753,13 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): coef = comp.coefficient elif comp.coefficient_type == "AT": coef = ( - comp.coefficient + 2 * comp.coefficient * (1 - GC) * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) elif comp.coefficient_type == "GC": coef = ( - comp.coefficient + 2 * comp.coefficient * GC * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) @@ -771,10 +772,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): metabolites[met] = coef elif not classic: coef = coef / type_abundances[type] - if met in metabolites: - specific_reactions[type].metabolites[met] += coef - else: - specific_reactions[type].metabolites[met] = coef + specific_reactions[type].add_metabolites({met:coef}) for l_met in comp.linked_metabolites: met = self.get_or_create_metabolite( model, l_met.id, None, index @@ -787,16 +785,13 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): else: metabolites[met] = coef * comp.linked_metabolites[l_met] elif not classic: - if met in metabolites: - specific_reactions[type].metabolites[met] += ( - coef * comp.linked_metabolites[l_met] - ) - else: - specific_reactions[type].metabolites[met] = ( - coef * comp.linked_metabolites[l_met] - ) + specific_reactions[type].add_metabolites({met:coef * comp.linked_metabolites[l_met]}) biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" biorxn.add_metabolites(metabolites) + if add_to_model: + if biorxn.id in model.reactions: + model.remove_reactions([biorxn.id]) + model.add_reactions([biorxn]) return biorxn def get_data(self): From e0a7f4fb882fe321901e13a62c3ce529edf1abb2 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 18 Jan 2023 04:06:01 -0600 Subject: [PATCH 002/146] index fix --- modelseedpy/core/msbuilder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index c8763c2d..e8b21f0b 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -896,6 +896,7 @@ def build( @param annotate_with_rast: @return: """ + self.index = index if annotate_with_rast: rast = RastClient() From a27257ca9321b8a8e5cefc576456dfcd206940e0 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 31 Jan 2023 08:57:33 -0600 Subject: [PATCH 003/146] missing import --- modelseedpy/biochem/modelseed_biochem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index ccdd8d76..80594e0e 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -7,6 +7,7 @@ from modelseedpy.biochem.modelseed_compound import ModelSEEDCompound, ModelSEEDCompound2 from modelseedpy.biochem.modelseed_reaction import ModelSEEDReaction, ModelSEEDReaction2 from modelseedpy.helpers import config +from modelseedpy.core.msmodel import get_reaction_constraints_from_direction logger = logging.getLogger(__name__) From f2eb10e3fdeae8d0f2c3b62edf8e277a8031b59a Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 1 Feb 2023 10:25:06 -0600 Subject: [PATCH 004/146] fixed builder to detect biomass added compounds --- examples/Model Reconstruction/Biomass.ipynb | 162 +++++++++- .../build_metabolic_model.ipynb | 283 ++++++++++++++++++ modelseedpy/core/msbuilder.py | 29 +- modelseedpy/core/mstemplate.py | 1 + 4 files changed, 468 insertions(+), 7 deletions(-) diff --git a/examples/Model Reconstruction/Biomass.ipynb b/examples/Model Reconstruction/Biomass.ipynb index e4a2c901..3726f959 100644 --- a/examples/Model Reconstruction/Biomass.ipynb +++ b/examples/Model Reconstruction/Biomass.ipynb @@ -2,18 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "5434992c-fc67-40f5-ae08-82f44790666c", "metadata": {}, "outputs": [], "source": [ - "from modelseedpy.helpers import get_template\n", - "from modelseedpy.core.mstemplate import MSTemplateBuilder" + "import modelseedpy" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 2, "id": "b243e00a-4a8b-489d-a778-61844a439e63", "metadata": {}, "outputs": [ @@ -21,7 +20,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "cobrakbase 0.2.8\n" + "cobrakbase 0.3.1\n" ] } ], @@ -30,6 +29,157 @@ "kbase = cobrakbase.KBaseAPI()" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3a177c16-ecb0-4050-bbf5-47aad10f2af9", + "metadata": {}, + "outputs": [], + "source": [ + "template = kbase.get_from_ws('GramNegModelTemplateV3', 'NewKBaseModelTemplates')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4ce52552-dce2-4c44-9884-cf00d15e76ab", + "metadata": {}, + "outputs": [], + "source": [ + "from modelseedpy import MSBuilder" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6f216f6a-5e25-4697-bf6b-9ae63475b5c7", + "metadata": {}, + "outputs": [], + "source": [ + "from cobra.core import Model\n", + "model = Model('test')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d9763d58-daba-4751-811f-23581b390025", + "metadata": {}, + "outputs": [], + "source": [ + "biomass = template.biomasses[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d3e884ac-2568-445a-ac04-1508b536c88a", + "metadata": {}, + "outputs": [], + "source": [ + "reaction = biomass.build_biomass(model, '0', True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f5140ac5-273f-4eb5-b806-ddd9178b252e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpd00010_c0 {'modelseed_template_id': 'cpd00010_c'}\n", + "cpd11493_c0 {'modelseed_template_id': 'cpd11493_c'}\n", + "cpd12370_c0 {'modelseed_template_id': 'cpd12370_c'}\n", + "cpd00003_c0 {'modelseed_template_id': 'cpd00003_c'}\n", + "cpd00006_c0 {'modelseed_template_id': 'cpd00006_c'}\n", + "cpd00205_c0 {'modelseed_template_id': 'cpd00205_c'}\n", + "cpd00254_c0 {'modelseed_template_id': 'cpd00254_c'}\n", + "cpd10516_c0 {'modelseed_template_id': 'cpd10516_c'}\n", + "cpd00063_c0 {'modelseed_template_id': 'cpd00063_c'}\n", + "cpd00009_c0 {'modelseed_template_id': 'cpd00009_c'}\n", + "cpd00099_c0 {'modelseed_template_id': 'cpd00099_c'}\n", + "cpd00149_c0 {'modelseed_template_id': 'cpd00149_c'}\n", + "cpd00058_c0 {'modelseed_template_id': 'cpd00058_c'}\n", + "cpd00015_c0 {'modelseed_template_id': 'cpd00015_c'}\n", + "cpd10515_c0 {'modelseed_template_id': 'cpd10515_c'}\n", + "cpd00030_c0 {'modelseed_template_id': 'cpd00030_c'}\n", + "cpd00048_c0 {'modelseed_template_id': 'cpd00048_c'}\n", + "cpd00034_c0 {'modelseed_template_id': 'cpd00034_c'}\n", + "cpd00016_c0 {'modelseed_template_id': 'cpd00016_c'}\n", + "cpd00220_c0 {'modelseed_template_id': 'cpd00220_c'}\n", + "cpd00017_c0 {'modelseed_template_id': 'cpd00017_c'}\n", + "cpd00201_c0 {'modelseed_template_id': 'cpd00201_c'}\n", + "cpd00087_c0 {'modelseed_template_id': 'cpd00087_c'}\n", + "cpd00345_c0 {'modelseed_template_id': 'cpd00345_c'}\n", + "cpd00042_c0 {'modelseed_template_id': 'cpd00042_c'}\n", + "cpd00028_c0 {'modelseed_template_id': 'cpd00028_c'}\n", + "cpd00557_c0 {'modelseed_template_id': 'cpd00557_c'}\n", + "cpd00264_c0 {'modelseed_template_id': 'cpd00264_c'}\n", + "cpd00118_c0 {'modelseed_template_id': 'cpd00118_c'}\n", + "cpd00056_c0 {'modelseed_template_id': 'cpd00056_c'}\n", + "cpd15560_c0 {'modelseed_template_id': 'cpd15560_c'}\n", + "cpd15352_c0 {'modelseed_template_id': 'cpd15352_c'}\n", + "cpd15500_c0 {'modelseed_template_id': 'cpd15500_c'}\n", + "cpd00166_c0 {'modelseed_template_id': 'cpd00166_c'}\n", + "cpd01997_c0 {'modelseed_template_id': 'cpd01997_c'}\n", + "cpd03422_c0 {'modelseed_template_id': 'cpd03422_c'}\n", + "cpd00104_c0 {'modelseed_template_id': 'cpd00104_c'}\n", + "cpd00037_c0 {'modelseed_template_id': 'cpd00037_c'}\n", + "cpd00050_c0 {'modelseed_template_id': 'cpd00050_c'}\n", + "cpd15793_c0 {'modelseed_template_id': 'cpd15793_c'}\n", + "cpd15540_c0 {'modelseed_template_id': 'cpd15540_c'}\n", + "cpd15533_c0 {'modelseed_template_id': 'cpd15533_c'}\n", + "cpd15432_c0 {'modelseed_template_id': 'cpd15432_c'}\n", + "cpd02229_c0 {'modelseed_template_id': 'cpd02229_c'}\n", + "cpd15665_c0 {'modelseed_template_id': 'cpd15665_c'}\n", + "cpd15666_c0 {'modelseed_template_id': 'cpd15666_c'}\n", + "cpd00023_c0 {'modelseed_template_id': 'cpd00023_c'}\n", + "cpd00001_c0 {'modelseed_template_id': 'cpd00001_c'}\n", + "cpd00033_c0 {'modelseed_template_id': 'cpd00033_c'}\n", + "cpd00035_c0 {'modelseed_template_id': 'cpd00035_c'}\n", + "cpd00039_c0 {'modelseed_template_id': 'cpd00039_c'}\n", + "cpd00041_c0 {'modelseed_template_id': 'cpd00041_c'}\n", + "cpd00051_c0 {'modelseed_template_id': 'cpd00051_c'}\n", + "cpd00053_c0 {'modelseed_template_id': 'cpd00053_c'}\n", + "cpd00054_c0 {'modelseed_template_id': 'cpd00054_c'}\n", + "cpd00060_c0 {'modelseed_template_id': 'cpd00060_c'}\n", + "cpd00065_c0 {'modelseed_template_id': 'cpd00065_c'}\n", + "cpd00066_c0 {'modelseed_template_id': 'cpd00066_c'}\n", + "cpd00069_c0 {'modelseed_template_id': 'cpd00069_c'}\n", + "cpd00084_c0 {'modelseed_template_id': 'cpd00084_c'}\n", + "cpd00107_c0 {'modelseed_template_id': 'cpd00107_c'}\n", + "cpd00119_c0 {'modelseed_template_id': 'cpd00119_c'}\n", + "cpd00129_c0 {'modelseed_template_id': 'cpd00129_c'}\n", + "cpd00132_c0 {'modelseed_template_id': 'cpd00132_c'}\n", + "cpd00156_c0 {'modelseed_template_id': 'cpd00156_c'}\n", + "cpd00161_c0 {'modelseed_template_id': 'cpd00161_c'}\n", + "cpd00322_c0 {'modelseed_template_id': 'cpd00322_c'}\n", + "cpd00115_c0 {'modelseed_template_id': 'cpd00115_c'}\n", + "cpd00012_c0 {'modelseed_template_id': 'cpd00012_c'}\n", + "cpd00241_c0 {'modelseed_template_id': 'cpd00241_c'}\n", + "cpd00356_c0 {'modelseed_template_id': 'cpd00356_c'}\n", + "cpd00357_c0 {'modelseed_template_id': 'cpd00357_c'}\n", + "cpd00002_c0 {'modelseed_template_id': 'cpd00002_c'}\n", + "cpd00038_c0 {'modelseed_template_id': 'cpd00038_c'}\n", + "cpd00052_c0 {'modelseed_template_id': 'cpd00052_c'}\n", + "cpd00062_c0 {'modelseed_template_id': 'cpd00062_c'}\n", + "cpd00008_c0 {'modelseed_template_id': 'cpd00008_c'}\n", + "cpd00067_c0 {'modelseed_template_id': 'cpd00067_c'}\n", + "cpd11416_c0 {'modelseed_template_id': 'cpd11416_c'}\n", + "cpd17041_c0 {'modelseed_template_id': 'cpd17041_c'}\n", + "cpd17042_c0 {'modelseed_template_id': 'cpd17042_c'}\n", + "cpd17043_c0 {'modelseed_template_id': 'cpd17043_c'}\n" + ] + } + ], + "source": [ + "for m in reaction.metabolites:\n", + " print(m, m.notes)" + ] + }, { "cell_type": "code", "execution_count": 42, @@ -551,7 +701,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 2f1e8d3f..6a817c0f 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -19,6 +19,24 @@ "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = MSBuilder.build_metabolic_model('ecoli', genome, classic_biomass=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.summary()" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -36,6 +54,271 @@ "print('Number of features:', len(genome.features))" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "builder = MSBuilder(genome)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "The genomes or genomeSet that you have submitted wasn’t annotated using the RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or genomeSets via Annotate Multiple Microbial Genomes’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/ml/predict_phenotype.py\u001b[0m in \u001b[0;36mcreate_indicator_matrix\u001b[0;34m(ref_to_role, master_role_list)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 94\u001b[0;31m \u001b[0mindicators\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatching_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 95\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: arrays used as indices must be of integer (or boolean) type", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3016957/3197840996.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_select_template\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/msbuilder.py\u001b[0m in \u001b[0;36mauto_select_template\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 665\u001b[0m \u001b[0mgenome_classifier\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"knn_ACNP_RAST_filter_01_17_2023\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 666\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenome_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenome_classifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenome\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[0;31m# TODO: update with enum MSGenomeClass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/msgenomeclassifier.py\u001b[0m in \u001b[0;36mclassify\u001b[0;34m(self, genome_or_roles, ontology_term)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mgenome_or_roles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0montology_term\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m )\n\u001b[0;32m---> 33\u001b[0;31m indicator_df, master_role_list = create_indicator_matrix(\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0mgenome_or_roles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m )\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/ml/predict_phenotype.py\u001b[0m in \u001b[0;36mcreate_indicator_matrix\u001b[0;34m(ref_to_role, master_role_list)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0mindicators\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatching_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m raise IndexError(\n\u001b[0m\u001b[1;32m 97\u001b[0m \u001b[0;31m\"\u001b[0m\u001b[0mThe\u001b[0m \u001b[0mgenomes\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mgenomeSet\u001b[0m \u001b[0mthat\u001b[0m \u001b[0myou\u001b[0m \u001b[0mhave\u001b[0m \u001b[0msubmitted\u001b[0m \u001b[0mwasn\u001b[0m\u001b[0;31m’\u001b[0m\u001b[0mt\u001b[0m \u001b[0mannotated\u001b[0m \u001b[0musing\u001b[0m \u001b[0mthe\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mRAST\u001b[0m \u001b[0mannotation\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mPlease\u001b[0m \u001b[0mannotate\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mgenomes\u001b[0m \u001b[0mvia\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m‘\u001b[0m\u001b[0mAnnotate\u001b[0m \u001b[0mMicrobial\u001b[0m \u001b[0mGenome\u001b[0m\u001b[0;31m’\u001b[0m \u001b[0mapp\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: The genomes or genomeSet that you have submitted wasn’t annotated using the RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or genomeSets via Annotate Multiple Microbial Genomes’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (" + ] + } + ], + "source": [ + "builder.auto_select_template()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from cobra.core import Reaction" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "rxn = Reaction('SK_cpd11416_c0', 'SK_cpd11416_c0', '', 0, 1000)\n", + "rxn.add_metabolites({model.metabolites.cpd11416_c0: -1})\n", + "model.add_reactions([rxn])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:89: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.float):\n", + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:91: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.bool):\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Nameecoli
Memory address7f3dd51e8400
Number of metabolites1458
Number of reactions1772
Number of genes1295
Number of groups1323
Objective expression1.0*bio1 - 1.0*bio1_reverse_b18f7
CompartmentsCytosol, Extracellular
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MSBuilder.gapfill_model(model, \"bio1\", builder.template, None)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 0.0

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpd00010_c0 CoA [c0] 80\n", + "cpd11493_c0 ACP [c0] 39\n", + "cpd12370_c0 apo-ACP [c0] 3\n", + "cpd00003_c0 NAD [c0] 127\n", + "cpd00006_c0 NADP [c0] 89\n", + "cpd00205_c0 K+ [c0] 5\n", + "cpd00254_c0 Mg [c0] 3\n", + "cpd10516_c0 fe3 [c0] 5\n", + "cpd00063_c0 Ca2+ [c0] 2\n", + "cpd00009_c0 Phosphate [c0] 210\n", + "cpd00099_c0 Cl- [c0] 3\n", + "cpd00149_c0 Co2+ [c0] 2\n", + "cpd00058_c0 Cu2+ [c0] 3\n", + "cpd00015_c0 FAD [c0] 13\n", + "cpd10515_c0 Fe2+ [c0] 5\n", + "cpd00030_c0 Mn2+ [c0] 2\n", + "cpd00048_c0 Sulfate [c0] 4\n", + "cpd00034_c0 Zn2+ [c0] 2\n", + "cpd00016_c0 Pyridoxal phosphate [c0] 5\n", + "cpd00220_c0 Riboflavin [c0] 5\n", + "cpd00017_c0 S-Adenosyl-L-methionine [c0] 21\n", + "cpd00201_c0 10-Formyltetrahydrofolate [c0] 7\n", + "cpd00087_c0 Tetrahydrofolate [c0] 12\n", + "cpd00345_c0 5-Methyltetrahydrofolate [c0] 3\n", + "cpd00042_c0 GSH [c0] 13\n", + "cpd00028_c0 Heme [c0] 4\n", + "cpd00557_c0 Siroheme [c0] 2\n", + "cpd00264_c0 Spermidine [c0] 8\n", + "cpd00118_c0 Putrescine [c0] 9\n", + "cpd00056_c0 TPP [c0] 7\n", + "cpd15560_c0 Ubiquinone-8 [c0] 18\n", + "cpd15352_c0 2-Demethylmenaquinone 8 [c0] 7\n", + "cpd15500_c0 Menaquinone 8 [c0] 12\n", + "cpd00166_c0 Calomide [c0] 4\n", + "cpd01997_c0 Dimethylbenzimidazole [c0] 2\n", + "cpd03422_c0 Cobinamide [c0] 2\n", + "cpd00104_c0 BIOT [c0] 5\n", + "cpd00037_c0 UDP-N-acetylglucosamine [c0] 16\n", + "cpd00050_c0 FMN [c0] 11\n", + "cpd15793_c0 Stearoylcardiolipin (B. subtilis) [c0] 1\n", + "cpd15540_c0 Phosphatidylglycerol dioctadecanoyl [c0] 3\n", + "cpd15533_c0 phosphatidylethanolamine dioctadecanoyl [c0] 3\n", + "cpd15432_c0 core oligosaccharide lipid A [c0] 2\n", + "cpd02229_c0 Bactoprenyl diphosphate [c0] 5\n", + "cpd15665_c0 Peptidoglycan polymer (n subunits) [c0] 2\n", + "cpd15666_c0 Peptidoglycan polymer (n-1 subunits) [c0] 2\n", + "cpd00023_c0 L-Glutamate [c0] 57\n", + "cpd00001_c0 H2O [c0] 556\n", + "cpd00033_c0 Glycine [c0] 21\n", + "cpd00035_c0 L-Alanine [c0] 17\n", + "cpd00039_c0 L-Lysine [c0] 8\n", + "cpd00041_c0 L-Aspartate [c0] 19\n", + "cpd00051_c0 L-Arginine [c0] 6\n", + "cpd00053_c0 L-Glutamine [c0] 17\n", + "cpd00054_c0 L-Serine [c0] 23\n", + "cpd00060_c0 L-Methionine [c0] 19\n", + "cpd00065_c0 L-Tryptophan [c0] 5\n", + "cpd00066_c0 L-Phenylalanine [c0] 4\n", + "cpd00069_c0 L-Tyrosine [c0] 6\n", + "cpd00084_c0 L-Cysteine [c0] 14\n", + "cpd00107_c0 L-Leucine [c0] 6\n", + "cpd00119_c0 L-Histidine [c0] 4\n", + "cpd00129_c0 L-Proline [c0] 11\n", + "cpd00132_c0 L-Asparagine [c0] 6\n", + "cpd00156_c0 L-Valine [c0] 5\n", + "cpd00161_c0 L-Threonine [c0] 7\n", + "cpd00322_c0 L-Isoleucine [c0] 4\n", + "cpd00115_c0 dATP [c0] 7\n", + "cpd00012_c0 PPi [c0] 134\n", + "cpd00241_c0 dGTP [c0] 8\n", + "cpd00356_c0 dCTP [c0] 6\n", + "cpd00357_c0 TTP [c0] 7\n", + "cpd00002_c0 ATP [c0] 276\n", + "cpd00038_c0 GTP [c0] 20\n", + "cpd00052_c0 CTP [c0] 25\n", + "cpd00062_c0 UTP [c0] 13\n", + "cpd00008_c0 ADP [c0] 214\n", + "cpd00067_c0 H+ [c0] 896\n", + "cpd11416_c0 Biomass [c0] 2\n", + "cpd17041_c0 Protein biosynthesis [c0] 2\n", + "cpd17042_c0 DNA replication [c0] 2\n", + "cpd17043_c0 RNA transcription [c0] 2\n" + ] + } + ], + "source": [ + "for m in model.reactions.bio1.metabolites:\n", + " print(m, m.name, len(m.reactions))" + ] + }, { "cell_type": "code", "execution_count": 4, diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e8b21f0b..1c456d19 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -886,6 +886,8 @@ def build( index="0", allow_all_non_grp_reactions=False, annotate_with_rast=True, + biomass_classic=False, + biomass_gc=0.5, ): """ @@ -894,6 +896,8 @@ def build( @param index: @param allow_all_non_grp_reactions: @param annotate_with_rast: + @param biomass_classic: + @param biomass_gc: @return: """ self.index = index @@ -931,6 +935,23 @@ def build( cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) + biomass_reactions = [] + for rxn_biomass in self.template.biomasses: + reaction = rxn_biomass.build_biomass( + cobra_model, "0", biomass_classic, biomass_gc + ) + for m in reaction.metabolites: + if "modelseed_template_id" in m.notes: + self.template_species_to_model_species[ + m.notes["modelseed_template_id"] + ] = m + biomass_reactions.append(reaction) + + if len(biomass_reactions) > 0: + cobra_model.add_reactions(biomass_reactions) + cobra_model.objective = biomass_reactions[0].id + + """ if ( self.template.name.startswith("CoreModel") or self.template.name.startswith("GramNeg") @@ -940,6 +961,7 @@ def build( self.build_static_biomasses(cobra_model, self.template) ) cobra_model.objective = "bio1" + """ reactions_sinks = self.build_drains() cobra_model.add_reactions(reactions_sinks) @@ -1027,10 +1049,15 @@ def build_metabolic_model( allow_all_non_grp_reactions=False, annotate_with_rast=True, gapfill_model=True, + classic_biomass=False, ): builder = MSBuilder(genome, template) model = builder.build( - model_id, index, allow_all_non_grp_reactions, annotate_with_rast + model_id, + index, + allow_all_non_grp_reactions, + annotate_with_rast, + classic_biomass, ) # Gapfilling model if gapfill_model: diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index b0d384eb..d33846f3 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -174,6 +174,7 @@ def to_metabolite(self, index="0"): if len(str(index)) > 0: name = f"{self.name} [{compartment}]" metabolite = Metabolite(cpd_id, self.formula, name, self.charge, compartment) + metabolite.notes["modelseed_template_id"] = self.id return metabolite @property From a8583236fdfdd9c02e948b9573ff89a34a226c82 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 3 Feb 2023 16:54:03 -0600 Subject: [PATCH 005/146] notebook update --- .../build_metabolic_model.ipynb | 1140 ++++++++++++++++- 1 file changed, 1135 insertions(+), 5 deletions(-) diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 6a817c0f..8cdd7a12 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -1,12 +1,26 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Metabolic Model from Genome .faa file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* MSGenome: to read a faa file\n", + "* MSBuilder: to build metabolic model from the genome" + ] + }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import modelseedpy\n", "from modelseedpy import MSBuilder, MSGenome" ] }, @@ -19,20 +33,1136 @@ "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MSBuilder.build_metabolic_model` default parameters runs RAST, ML prediction to select template (gram neg, gram pos, cyano [not implemented], archaea [not implemented]), builds draft model and gapfills with complete media" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:89: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.float):\n", + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:91: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.bool):\n" + ] + } + ], "source": [ "model = MSBuilder.build_metabolic_model('ecoli', genome, classic_biomass=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 141.02637369025626

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00007_e0EX_cpd00007_e0244.300.00%
cpd00024_e0EX_cpd00024_e083.0752.58%
cpd00028_e0EX_cpd00028_e00.3955340.08%
cpd00030_e0EX_cpd00030_e00.395500.00%
cpd00033_e0EX_cpd00033_e079.8120.99%
cpd00034_e0EX_cpd00034_e00.395500.00%
cpd00039_e0EX_cpd00039_e031.4261.17%
cpd00051_e0EX_cpd00051_e034.7461.29%
cpd00054_e0EX_cpd00054_e034.3530.64%
cpd00058_e0EX_cpd00058_e00.395500.00%
cpd00060_e0EX_cpd00060_e031.0950.96%
cpd00063_e0EX_cpd00063_e00.395500.00%
cpd00065_e0EX_cpd00065_e06.647110.45%
cpd00066_e0EX_cpd00066_e021.7691.21%
cpd00069_e0EX_cpd00069_e016.9990.95%
cpd00079_e0EX_cpd00079_e0499.9618.61%
cpd00080_e0EX_cpd00080_e0609.4311.34%
cpd00099_e0EX_cpd00099_e00.395500.00%
cpd00106_e0EX_cpd00106_e0401.249.96%
cpd00107_e0EX_cpd00107_e052.8661.97%
cpd00118_e0EX_cpd00118_e00.395540.01%
cpd00119_e0EX_cpd00119_e011.1660.42%
cpd00129_e0EX_cpd00129_e025.9650.81%
cpd00130_e0EX_cpd00130_e0199.144.94%
cpd00132_e0EX_cpd00132_e028.2840.70%
cpd00136_e0EX_cpd00136_e00.395570.02%
cpd00149_e0EX_cpd00149_e00.395500.00%
cpd00156_e0EX_cpd00156_e049.651.54%
cpd00161_e0EX_cpd00161_e029.7240.74%
cpd00184_e0EX_cpd00184_e0221.11013.71%
cpd00205_e0EX_cpd00205_e00.395500.00%
cpd00208_e0EX_cpd00208_e03.526120.26%
cpd00209_e0EX_cpd00209_e019000.00%
cpd00249_e0EX_cpd00249_e011.5690.65%
cpd00254_e0EX_cpd00254_e00.395500.00%
cpd00264_e0EX_cpd00264_e00.395570.02%
cpd00268_e0EX_cpd00268_e00.197800.00%
cpd00277_e0EX_cpd00277_e022.59101.40%
cpd00305_e0EX_cpd00305_e00.3955120.03%
cpd00322_e0EX_cpd00322_e034.0561.27%
cpd00355_e0EX_cpd00355_e00.791110.05%
cpd00367_e0EX_cpd00367_e012.9990.73%
cpd00383_e0EX_cpd00383_e01.97870.09%
cpd00412_e0EX_cpd00412_e02.76990.15%
cpd00438_e0EX_cpd00438_e02411014.95%
cpd00644_e0EX_cpd00644_e00.79190.04%
cpd00794_e0EX_cpd00794_e014.1121.05%
cpd01080_e0EX_cpd01080_e035.09183.92%
cpd03847_e0EX_cpd03847_e03.526140.31%
cpd10515_e0EX_cpd10515_e00.79100.00%
cpd10516_e0EX_cpd10516_e00.395500.00%
cpd17041_c0rxn13782_c014100.00%
cpd17042_c0rxn13783_c014100.00%
cpd17043_c0rxn13784_c014100.00%

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00009_e0EX_cpd00009_e0-100000.00%
cpd00011_e0EX_cpd00011_e0-796.817.50%
cpd00020_e0EX_cpd00020_e0-282.137.97%
cpd00027_e0EX_cpd00027_e0-445.8625.18%
cpd00029_e0EX_cpd00029_e0-49029.22%
cpd00035_e0EX_cpd00035_e0-185.235.23%
cpd00047_e0EX_cpd00047_e0-2.37310.02%
cpd00100_e0EX_cpd00100_e0-4.38630.12%
cpd00108_e0EX_cpd00108_e0-3.52660.20%
cpd00116_e0EX_cpd00116_e0-0.395510.00%
cpd00139_e0EX_cpd00139_e0-1.18720.02%
cpd00151_e0EX_cpd00151_e0-221.1510.40%
cpd00159_e0EX_cpd00159_e0-835.5323.60%
cpd00226_e0EX_cpd00226_e0-220.8510.39%
cpd02701_c0SK_cpd02701_c0-0.3955150.06%
cpd03091_c0SK_cpd03091_c0-0.791100.07%
cpd11416_c0SK_cpd11416_c0-14100.00%
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ignore this below ..." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from modelseedpy import RastClient\n", + "rast = RastClient()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 141.02637369025626

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00007_e0EX_cpd00007_e0244.300.00%
cpd00024_e0EX_cpd00024_e083.0752.58%
cpd00028_e0EX_cpd00028_e00.3955340.08%
cpd00030_e0EX_cpd00030_e00.395500.00%
cpd00033_e0EX_cpd00033_e079.8120.99%
cpd00034_e0EX_cpd00034_e00.395500.00%
cpd00039_e0EX_cpd00039_e031.4261.17%
cpd00051_e0EX_cpd00051_e034.7461.29%
cpd00054_e0EX_cpd00054_e034.3530.64%
cpd00058_e0EX_cpd00058_e00.395500.00%
cpd00060_e0EX_cpd00060_e031.0950.96%
cpd00063_e0EX_cpd00063_e00.395500.00%
cpd00065_e0EX_cpd00065_e06.647110.45%
cpd00066_e0EX_cpd00066_e021.7691.21%
cpd00069_e0EX_cpd00069_e016.9990.95%
cpd00079_e0EX_cpd00079_e0499.9618.61%
cpd00080_e0EX_cpd00080_e0609.4311.34%
cpd00099_e0EX_cpd00099_e00.395500.00%
cpd00106_e0EX_cpd00106_e0401.249.96%
cpd00107_e0EX_cpd00107_e052.8661.97%
cpd00118_e0EX_cpd00118_e00.395540.01%
cpd00119_e0EX_cpd00119_e011.1660.42%
cpd00129_e0EX_cpd00129_e025.9650.81%
cpd00130_e0EX_cpd00130_e0199.144.94%
cpd00132_e0EX_cpd00132_e028.2840.70%
cpd00136_e0EX_cpd00136_e00.395570.02%
cpd00149_e0EX_cpd00149_e00.395500.00%
cpd00156_e0EX_cpd00156_e049.651.54%
cpd00161_e0EX_cpd00161_e029.7240.74%
cpd00184_e0EX_cpd00184_e0221.11013.71%
cpd00205_e0EX_cpd00205_e00.395500.00%
cpd00208_e0EX_cpd00208_e03.526120.26%
cpd00209_e0EX_cpd00209_e019000.00%
cpd00249_e0EX_cpd00249_e011.5690.65%
cpd00254_e0EX_cpd00254_e00.395500.00%
cpd00264_e0EX_cpd00264_e00.395570.02%
cpd00268_e0EX_cpd00268_e00.197800.00%
cpd00277_e0EX_cpd00277_e022.59101.40%
cpd00305_e0EX_cpd00305_e00.3955120.03%
cpd00322_e0EX_cpd00322_e034.0561.27%
cpd00355_e0EX_cpd00355_e00.791110.05%
cpd00367_e0EX_cpd00367_e012.9990.73%
cpd00383_e0EX_cpd00383_e01.97870.09%
cpd00412_e0EX_cpd00412_e02.76990.15%
cpd00438_e0EX_cpd00438_e02411014.95%
cpd00644_e0EX_cpd00644_e00.79190.04%
cpd00794_e0EX_cpd00794_e014.1121.05%
cpd01080_e0EX_cpd01080_e035.09183.92%
cpd03847_e0EX_cpd03847_e03.526140.31%
cpd10515_e0EX_cpd10515_e00.79100.00%
cpd10516_e0EX_cpd10516_e00.395500.00%
cpd17041_c0rxn13782_c014100.00%
cpd17042_c0rxn13783_c014100.00%
cpd17043_c0rxn13784_c014100.00%

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00009_e0EX_cpd00009_e0-100000.00%
cpd00011_e0EX_cpd00011_e0-796.817.50%
cpd00020_e0EX_cpd00020_e0-282.137.97%
cpd00027_e0EX_cpd00027_e0-445.8625.18%
cpd00029_e0EX_cpd00029_e0-49029.22%
cpd00035_e0EX_cpd00035_e0-185.235.23%
cpd00047_e0EX_cpd00047_e0-2.37310.02%
cpd00100_e0EX_cpd00100_e0-4.38630.12%
cpd00108_e0EX_cpd00108_e0-3.52660.20%
cpd00116_e0EX_cpd00116_e0-0.395510.00%
cpd00139_e0EX_cpd00139_e0-1.18720.02%
cpd00151_e0EX_cpd00151_e0-221.1510.40%
cpd00159_e0EX_cpd00159_e0-835.5323.60%
cpd00226_e0EX_cpd00226_e0-220.8510.39%
cpd02701_c0SK_cpd02701_c0-0.3955150.06%
cpd03091_c0SK_cpd03091_c0-0.791100.07%
cpd11416_c0SK_cpd11416_c0-14100.00%
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.summary()" ] From f00dbc1b429410443878f5f1f2b8b4611f83da89 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 9 Feb 2023 13:47:14 -0600 Subject: [PATCH 006/146] Fixes for new biomass code --- modelseedpy/core/fbahelper.py | 16 +++-- modelseedpy/core/msbuilder.py | 100 +++------------------------- modelseedpy/core/msgapfill.py | 2 +- modelseedpy/core/mstemplate.py | 23 +++++-- modelseedpy/fbapkg/gapfillingpkg.py | 43 ++++++++---- 5 files changed, 72 insertions(+), 112 deletions(-) diff --git a/modelseedpy/core/fbahelper.py b/modelseedpy/core/fbahelper.py index 6c44108f..8605fef3 100644 --- a/modelseedpy/core/fbahelper.py +++ b/modelseedpy/core/fbahelper.py @@ -115,18 +115,24 @@ def modelseed_id_from_cobra_reaction(reaction): @staticmethod def metabolite_mw(metabolite): + fixed_masses = {"cpd11416":1,"cpd17041":0,"cpd17042":0,"cpd17043":0} + msid = FBAHelper.modelseed_id_from_cobra_metabolite(metabolite) + if msid in fixed_masses: + return fixed_masses[msid] + if not metabolite.formula: + return 0 + formula = re.sub("R\d*", "", metabolite.formula) try: - if not metabolite.formula: - return 0 - formula = re.sub("R\d*", "", metabolite.formula) chem_mw = ChemMW(printing=False) chem_mw.mass(formula) return chem_mw.raw_mw except: - warn( + logger.warn( "The compound " + metabolite.id - + " possesses an unconventional formula {metabolite.formula}; hence, the MW cannot be computed." + + " possesses an unconventional formula " + + metabolite.formula + + "; hence, the MW cannot be computed." ) return 0 diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 06869289..2a2415cb 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -555,85 +555,6 @@ def get_or_create_metabolite( pass return model.metabolites.get_by_id(full_id) - @staticmethod - def build_biomass_new(model, template, index): - biomasses = [] - types = ["cofactor", "lipid", "cellwall"] - for bio in template.biomasses: - # Creating biomass reaction object - metabolites = {} - biorxn = Reaction(bio.id, bio.name, "biomasses", 0, 1000) - # Adding standard compounds for DNA, RNA, protein, and biomass - if bio["type"] == "growth": - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11416", "c", index - ) - metabolites[met] = 1 - if "dna" in bio and bio["dna"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11461", "c", index - ) - metabolites[met] = -1 * bio["dna"] - if "protein" in bio and bio["protein"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11463", "c", index - ) - metabolites[met] = -1 * bio["protein"] - if "rna" in bio and bio["rna"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11462", "c", index - ) - metabolites[met] = -1 * bio["rna"] - bio_type_hash = {} - for type in types: - for comp in bio["templateBiomassComponents"]: - fullid = FBAHelper.id_from_ref(comp["templatecompcompound_ref"]) - (baseid, compartment, ignore_index) = FBAHelper.parse_id(fullid) - comp["met"] = MSBuilder.get_or_create_metabolite( - model, template, baseid, compartment, index - ) - if type not in bio_type_hash: - bio_type_hash[type] = {"items": [], "total_mw": 0} - if FBAHelper.metabolite_mw(comp["met"]): - types[type] += FBAHelper.metabolite_mw(comp["met"]) / 1000 - bio_type_hash[type].append(comp) - for type in bio_type_hash: - compmass = bio[type] - for comp in bio_type_hash[type]: - coef = None - if comp["coefficient_type"] == "MOLFRACTION": - coef = compmass / types[type] * comp["coefficient"] - elif comp["coefficient_type"] == "MOLSPLIT": - coef = compmass / types[type] * comp["coefficient"] - elif comp["coefficient_type"] == "MULTIPLIER": - coef = biorxn[type] * comp["coefficient"] - elif comp["coefficient_type"] == "EXACT": - coef = comp["coefficient"] - if coef: - met = model.metabolites.get_by_id("cpd11416_c0") - if met in metabolites: - metabolites[met] += coef - else: - metabolites[met] = coef - metabolites[met] = coef - for count, value in enumerate(comp["linked_compound_refs"]): - met = model.metabolites.get_by_id( - FBAHelper.id_from_ref(value) - ) - if met in metabolites: - metabolites[met] += ( - coef * comp["link_coefficients"][count] - ) - else: - metabolites[met] = ( - coef * comp["link_coefficients"][count] - ) - - biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" - biorxn.add_metabolites(metabolites) - biomasses.append(biorxn) - return biomasses - def build_static_biomasses(self, model, template): res = [] if template.name.startswith("CoreModel"): @@ -737,7 +658,7 @@ def build_complex_groups(self, complex_sets): group_complexes = {} for complex_set in complex_sets: for complex_id in complex_set: - if complex_id not in group_complexes: + if complex_id not in group_complexes and complex_id in self.template.complexes: cpx = self.template.complexes.get_by_id(complex_id) g = Group(complex_id) g.notes["complex_source"] = cpx.source @@ -924,9 +845,12 @@ def build( or self.template.name.startswith("GramNeg") or self.template.name.startswith("GramPos") ): - cobra_model.add_reactions( - self.build_static_biomasses(cobra_model, self.template) - ) + gc = 0.5 + if hasattr(self.genome,"info"): + gc = float(self.genome.info.metadata["GC content"]) + print("Genome custom GC:",gc) + for bio in self.template.biomasses: + bio.build_biomass(cobra_model, index, classic=False, GC=gc,add_to_model=True) cobra_model.objective = "bio1" reactions_sinks = self.build_drains() @@ -983,13 +907,9 @@ def build_full_template_model(template, model_id=None, index="0"): bio_rxn2 = build_biomass("bio2", model, template, core_atp, index) model.add_reactions([bio_rxn1, bio_rxn2]) model.objective = "bio1" - if template.name.startswith("GramNeg"): - bio_rxn1 = build_biomass("bio1", model, template, gramneg, index) - model.add_reactions([bio_rxn1]) - model.objective = "bio1" - if template.name.startswith("GramPos"): - bio_rxn1 = build_biomass("bio1", model, template, grampos, index) - model.add_reactions([bio_rxn1]) + else: + for bio in template.biomasses: + bio.build_biomass(self, model, index, classic=False, GC=0.5,add_to_model=True) model.objective = "bio1" reactions_sinks = [] diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 9b42e17d..c48cf94b 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -147,7 +147,7 @@ def integrate_gapfill_solution(self, solution, cumulative_solution=[]): ---------- solution : dict Specifies the reactions to be added to the model to implement the gapfilling solution - cumulation_solution : list + cumulative_solution : list Optional array to cumulatively track all reactions added to the model when integrating multiple solutions """ for rxn_id in solution["reversed"]: diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 4335ef45..ed44ce0c 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -693,15 +693,18 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru metabolites = {} biorxn = Reaction(self.id, self.name, "biomasses", 0, 1000) # Adding standard compounds for DNA, RNA, protein, and biomass - if not classic and self.type == "growth": - met = self.get_or_create_metabolite(model, "cpd11416", "c", index) - metabolites[met] = 1 specific_reactions = {"dna": None, "rna": None, "protein": None} + exclusions = {"cpd17041_c":1,"cpd17042_c":1,"cpd17043_c":1} if not classic and self.dna > 0: met = self.get_or_create_metabolite(model, "cpd11461", "c", index) specific_reactions["dna"] = self.get_or_create_reaction( model, "rxn05294", "c", index ) + specific_reactions["dna"].name = "DNA synthesis" + if "rxn13783_c" + index in model.reactions: + specific_reactions["dna"].gene_reaction_rule = model.reactions.get_by_id("rxn13783_c" + index).gene_reaction_rule + specific_reactions["dna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13783_c" + index).notes['modelseed_complex'] + model.remove_reactions([model.reactions.get_by_id("rxn13783_c" + index)]) specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) @@ -713,6 +716,11 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru specific_reactions["protein"] = self.get_or_create_reaction( model, "rxn05296", "c", index ) + specific_reactions["protein"].name = "Protein synthesis" + if "rxn13782_c" + index in model.reactions: + specific_reactions["protein"].gene_reaction_rule = model.reactions.get_by_id("rxn13782_c" + index).gene_reaction_rule + specific_reactions["protein"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13782_c" + index).notes['modelseed_complex'] + model.remove_reactions([model.reactions.get_by_id("rxn13782_c" + index)]) specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) @@ -723,6 +731,11 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru specific_reactions["rna"] = self.get_or_create_reaction( model, "rxn05295", "c", index ) + specific_reactions["rna"].name = "mRNA synthesis" + if "rxn13784_c" + index in model.reactions: + specific_reactions["rna"].gene_reaction_rule = model.reactions.get_by_id("rxn13784_c" + index).gene_reaction_rule + specific_reactions["rna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13784_c" + index).notes['modelseed_complex'] + model.remove_reactions([model.reactions.get_by_id("rxn13784_c" + index)]) specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) @@ -731,7 +744,9 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru bio_type_hash = {} for type in types: for comp in self.templateBiomassComponents: - if type == comp.comp_class: + if comp.metabolite.id in exclusions and not classic: + pass + elif type == comp.comp_class: met = self.get_or_create_metabolite( model, comp.metabolite.id, None, index ) diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 58140418..880aeabf 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -7,6 +7,7 @@ import json from optlang.symbolics import Zero, add from cobra import Model, Reaction, Metabolite +from cobra.io import load_json_model, save_json_model, load_matlab_model, save_matlab_model, read_sbml_model, write_sbml_model from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg from modelseedpy.core.fbahelper import FBAHelper @@ -899,6 +900,26 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return solution def filter_database_based_on_tests(self, test_conditions): + #Preserving the gapfilling objective function + gfobj = self.model.objective + #Setting the minimal growth constraint to zero + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + #Setting the objective to the original default objective for the model + self.model.objective = self.parameters["origobj"] + #Testing if the minimal objective can be achieved before filtering + solution = self.model.optimize() + print( + "Objective before filtering:", + solution.objective_value, + "; min objective:", + self.parameters["minimum_obj"], + ) + with open("debuggf.lp", "w") as out: + out.write(str(self.model.solver)) + if solution.objective_value < self.parameters["minimum_obj"]: + save_json_model(self.model, "gfdebugmdl.json") + logger.critical("Model cannot achieve the minimum objective even before filtering!") + #Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: rxnlist = [] @@ -908,7 +929,7 @@ def filter_database_based_on_tests(self, test_conditions): rxnlist.append([reaction, "<"]) if "forward" in self.gapfilling_penalties[reaction.id]: rxnlist.append([reaction, ">"]) - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + filtered_list = self.modelutl.reaction_expansion_test( rxnlist, test_conditions ) @@ -920,21 +941,19 @@ def filter_database_based_on_tests(self, test_conditions): else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 # Now testing if the gapfilling minimum objective can still be achieved - gfobj = self.model.objective - self.model.objective = self.parameters["origobj"] solution = self.model.optimize() - # Restoring the minimum objective constraint - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ - "minimum_obj" - ] print( "Objective after filtering:", solution.objective_value, "; min objective:", self.parameters["minimum_obj"], ) + # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached if solution.objective_value < self.parameters["minimum_obj"]: - # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached + # Restoring the minimum objective constraint + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] new_objective = self.model.problem.Objective(Zero, direction="min") filterobjcoef = dict() for item in filtered_list: @@ -945,7 +964,6 @@ def filter_database_based_on_tests(self, test_conditions): else: filterobjcoef[rxn.reverse_variable] = item[3] rxn.lower_bound = item[2] - self.model.objective = new_objective new_objective.set_linear_coefficients(filterobjcoef) solution = self.model.optimize() @@ -979,9 +997,10 @@ def filter_database_based_on_tests(self, test_conditions): self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ - "1" - ].lb = self.parameters["minimum_obj"] + #Restoring gapfilling objective function and minimal objective constraint + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] self.model.objective = gfobj def compute_gapfilled_solution(self, flux_values=None): From c22a8d6940ca173f934e21a3d1d6e39b02239cca Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 9 Feb 2023 13:59:47 -0600 Subject: [PATCH 007/146] Formatting with black --- modelseedpy/core/fbahelper.py | 2 +- modelseedpy/core/msbuilder.py | 9 +++- modelseedpy/core/mstemplate.py | 70 +++++++++++++++++++++-------- modelseedpy/fbapkg/gapfillingpkg.py | 33 +++++++++----- 4 files changed, 81 insertions(+), 33 deletions(-) diff --git a/modelseedpy/core/fbahelper.py b/modelseedpy/core/fbahelper.py index 8605fef3..502611d9 100644 --- a/modelseedpy/core/fbahelper.py +++ b/modelseedpy/core/fbahelper.py @@ -115,7 +115,7 @@ def modelseed_id_from_cobra_reaction(reaction): @staticmethod def metabolite_mw(metabolite): - fixed_masses = {"cpd11416":1,"cpd17041":0,"cpd17042":0,"cpd17043":0} + fixed_masses = {"cpd11416": 1, "cpd17041": 0, "cpd17042": 0, "cpd17043": 0} msid = FBAHelper.modelseed_id_from_cobra_metabolite(metabolite) if msid in fixed_masses: return fixed_masses[msid] diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 3c54986b..e53a28ac 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -668,7 +668,10 @@ def build_complex_groups(self, complex_sets): group_complexes = {} for complex_set in complex_sets: for complex_id in complex_set: - if complex_id not in group_complexes and complex_id in self.template.complexes: + if ( + complex_id not in group_complexes + and complex_id in self.template.complexes + ): cpx = self.template.complexes.get_by_id(complex_id) g = Group(complex_id) g.notes["complex_source"] = cpx.source @@ -943,7 +946,9 @@ def build_full_template_model(template, model_id=None, index="0"): model.objective = "bio1" else: for bio in template.biomasses: - bio.build_biomass(self, model, index, classic=False, GC=0.5,add_to_model=True) + bio.build_biomass( + self, model, index, classic=False, GC=0.5, add_to_model=True + ) model.objective = "bio1" reactions_sinks = [] diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 8f2a8560..6a6d5b6f 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -670,7 +670,7 @@ def get_or_create_reaction(self, model, baseid, compartment=None, index=None): model.add_reactions(newrxn) return newrxn - def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=True): + def build_biomass(self, model, index="0", classic=False, GC=0.5, add_to_model=True): types = [ "cofactor", "lipid", @@ -695,7 +695,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru biorxn = Reaction(self.id, self.name, "biomasses", 0, 1000) # Adding standard compounds for DNA, RNA, protein, and biomass specific_reactions = {"dna": None, "rna": None, "protein": None} - exclusions = {"cpd17041_c":1,"cpd17042_c":1,"cpd17043_c":1} + exclusions = {"cpd17041_c": 1, "cpd17042_c": 1, "cpd17043_c": 1} if not classic and self.dna > 0: met = self.get_or_create_metabolite(model, "cpd11461", "c", index) specific_reactions["dna"] = self.get_or_create_reaction( @@ -703,13 +703,23 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru ) specific_reactions["dna"].name = "DNA synthesis" if "rxn13783_c" + index in model.reactions: - specific_reactions["dna"].gene_reaction_rule = model.reactions.get_by_id("rxn13783_c" + index).gene_reaction_rule - specific_reactions["dna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13783_c" + index).notes['modelseed_complex'] - model.remove_reactions([model.reactions.get_by_id("rxn13783_c" + index)]) + specific_reactions[ + "dna" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13783_c" + index + ).gene_reaction_rule + specific_reactions["dna"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13783_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13783_c" + index)] + ) specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) - specific_reactions["dna"].add_metabolites({met:1}) + specific_reactions["dna"].add_metabolites({met: 1}) metabolites[met] = 1 metabolites[met] = -1 * self.dna if not classic and self.protein > 0: @@ -719,13 +729,23 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru ) specific_reactions["protein"].name = "Protein synthesis" if "rxn13782_c" + index in model.reactions: - specific_reactions["protein"].gene_reaction_rule = model.reactions.get_by_id("rxn13782_c" + index).gene_reaction_rule - specific_reactions["protein"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13782_c" + index).notes['modelseed_complex'] - model.remove_reactions([model.reactions.get_by_id("rxn13782_c" + index)]) + specific_reactions[ + "protein" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13782_c" + index + ).gene_reaction_rule + specific_reactions["protein"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13782_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13782_c" + index)] + ) specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) - specific_reactions["protein"].add_metabolites({met:1}) + specific_reactions["protein"].add_metabolites({met: 1}) metabolites[met] = -1 * self.protein if not classic and self.rna > 0: met = self.get_or_create_metabolite(model, "cpd11462", "c", index) @@ -734,13 +754,23 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru ) specific_reactions["rna"].name = "mRNA synthesis" if "rxn13784_c" + index in model.reactions: - specific_reactions["rna"].gene_reaction_rule = model.reactions.get_by_id("rxn13784_c" + index).gene_reaction_rule - specific_reactions["rna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13784_c" + index).notes['modelseed_complex'] - model.remove_reactions([model.reactions.get_by_id("rxn13784_c" + index)]) + specific_reactions[ + "rna" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13784_c" + index + ).gene_reaction_rule + specific_reactions["rna"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13784_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13784_c" + index)] + ) specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) - specific_reactions["rna"].add_metabolites({met:1}) + specific_reactions["rna"].add_metabolites({met: 1}) metabolites[met] = -1 * self.rna bio_type_hash = {} for type in types: @@ -774,13 +804,15 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru coef = comp.coefficient elif comp.coefficient_type == "AT": coef = ( - 2 * comp.coefficient + 2 + * comp.coefficient * (1 - GC) * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) elif comp.coefficient_type == "GC": coef = ( - 2 * comp.coefficient + 2 + * comp.coefficient * GC * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) @@ -793,7 +825,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru metabolites[met] = coef elif not classic: coef = coef / type_abundances[type] - specific_reactions[type].add_metabolites({met:coef}) + specific_reactions[type].add_metabolites({met: coef}) for l_met in comp.linked_metabolites: met = self.get_or_create_metabolite( model, l_met.id, None, index @@ -806,7 +838,9 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru else: metabolites[met] = coef * comp.linked_metabolites[l_met] elif not classic: - specific_reactions[type].add_metabolites({met:coef * comp.linked_metabolites[l_met]}) + specific_reactions[type].add_metabolites( + {met: coef * comp.linked_metabolites[l_met]} + ) biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" biorxn.add_metabolites(metabolites) if add_to_model: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 880aeabf..ebbebe72 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -7,7 +7,14 @@ import json from optlang.symbolics import Zero, add from cobra import Model, Reaction, Metabolite -from cobra.io import load_json_model, save_json_model, load_matlab_model, save_matlab_model, read_sbml_model, write_sbml_model +from cobra.io import ( + load_json_model, + save_json_model, + load_matlab_model, + save_matlab_model, + read_sbml_model, + write_sbml_model, +) from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg from modelseedpy.core.fbahelper import FBAHelper @@ -900,13 +907,13 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return solution def filter_database_based_on_tests(self, test_conditions): - #Preserving the gapfilling objective function + # Preserving the gapfilling objective function gfobj = self.model.objective - #Setting the minimal growth constraint to zero + # Setting the minimal growth constraint to zero self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 - #Setting the objective to the original default objective for the model + # Setting the objective to the original default objective for the model self.model.objective = self.parameters["origobj"] - #Testing if the minimal objective can be achieved before filtering + # Testing if the minimal objective can be achieved before filtering solution = self.model.optimize() print( "Objective before filtering:", @@ -918,8 +925,10 @@ def filter_database_based_on_tests(self, test_conditions): out.write(str(self.model.solver)) if solution.objective_value < self.parameters["minimum_obj"]: save_json_model(self.model, "gfdebugmdl.json") - logger.critical("Model cannot achieve the minimum objective even before filtering!") - #Filtering the database of any reactions that violate the specified tests + logger.critical( + "Model cannot achieve the minimum objective even before filtering!" + ) + # Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: rxnlist = [] @@ -929,7 +938,7 @@ def filter_database_based_on_tests(self, test_conditions): rxnlist.append([reaction, "<"]) if "forward" in self.gapfilling_penalties[reaction.id]: rxnlist.append([reaction, ">"]) - + filtered_list = self.modelutl.reaction_expansion_test( rxnlist, test_conditions ) @@ -951,9 +960,9 @@ def filter_database_based_on_tests(self, test_conditions): # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached if solution.objective_value < self.parameters["minimum_obj"]: # Restoring the minimum objective constraint - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ - "minimum_obj" - ] + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ + "1" + ].lb = self.parameters["minimum_obj"] new_objective = self.model.problem.Objective(Zero, direction="min") filterobjcoef = dict() for item in filtered_list: @@ -997,7 +1006,7 @@ def filter_database_based_on_tests(self, test_conditions): self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 - #Restoring gapfilling objective function and minimal objective constraint + # Restoring gapfilling objective function and minimal objective constraint self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ "minimum_obj" ] From c61394c564810914e39da3ee8698fd3c60db7ced Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 13 Feb 2023 23:26:21 -0600 Subject: [PATCH 008/146] biomass fix --- modelseedpy/core/mstemplate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index d33846f3..b9475dc4 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -823,6 +823,8 @@ def get_data(self): for comp in self.templateBiomassComponents: data["templateBiomassComponents"].append(comp.get_data()) + return data + class NewModelTemplateRole: def __init__(self, role_id, name, features=None, source="", aliases=None): From 2915d7743f8cd6f1b335e2a86d44eb55e0f84d0b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 14 Feb 2023 14:45:37 -0600 Subject: [PATCH 009/146] bug fix --- .../build_metabolic_model.ipynb | 26 +++++++++++++++++++ modelseedpy/core/mstemplate.py | 3 ++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 8cdd7a12..ea2e8d41 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -40,6 +40,32 @@ "`MSBuilder.build_metabolic_model` default parameters runs RAST, ML prediction to select template (gram neg, gram pos, cyano [not implemented], archaea [not implemented]), builds draft model and gapfills with complete media" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "type object argument after ** must be a mapping, not str", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3118582/859642788.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mmodelseedpy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mRastClient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mrast\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRastClient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mrast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mannotate_genome\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenome\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rast_client.py\u001b[0m in \u001b[0;36mannotate_genome\u001b[0;34m(self, genome)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"id\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"protein_translation\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"features\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rast_client.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, p_features)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"features\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"stages\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstages\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrpc_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"GenomeAnnotation.run_pipeline\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rpcclient.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, method, params, token)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0merr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"error\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 75\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mServerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"error\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 76\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mServerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: type object argument after ** must be a mapping, not str" + ] + } + ], + "source": [ + "from modelseedpy import RastClient\n", + "rast = RastClient()\n", + "rast.annotate_genome(genome)" + ] + }, { "cell_type": "code", "execution_count": 3, diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index b9475dc4..7e992d52 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -654,13 +654,14 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): ) def get_or_create_reaction(self, model, baseid, compartment=None, index=None): + logger.debug(f'{baseid}, {compartment}, {index}') fullid = baseid if compartment: fullid += "_" + compartment tempid = fullid if index: fullid += index - if fullid in model.metabolites: + if fullid in model.reactions: return model.reactions.get_by_id(fullid) if tempid in self._template.reactions: rxn = self._template.reactions.get_by_id(tempid).to_reaction(model, index) From b42a04f7b7dcfaa6b6cf1eea84c8e8519cbb0b23 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 14 Feb 2023 14:48:19 -0600 Subject: [PATCH 010/146] black --- modelseedpy/core/mstemplate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 7e992d52..36a49698 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -654,7 +654,7 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): ) def get_or_create_reaction(self, model, baseid, compartment=None, index=None): - logger.debug(f'{baseid}, {compartment}, {index}') + logger.debug(f"{baseid}, {compartment}, {index}") fullid = baseid if compartment: fullid += "_" + compartment From d33bd9a23e720a5799853e485491d2b7f3086ded Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 15 Feb 2023 14:07:37 -0600 Subject: [PATCH 011/146] Updates to fix gapfilling and string concatenation --- modelseedpy/core/msgapfill.py | 3 +- modelseedpy/core/msmodelutl.py | 28 +- modelseedpy/core/mstemplate.py | 4 +- modelseedpy/fbapkg/gapfillingpkg.py | 777 ++++++++++++++++------------ 4 files changed, 456 insertions(+), 356 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index c48cf94b..ad430ef2 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -10,7 +10,6 @@ from modelseedpy.core import FBAHelper # !!! the import is never used from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.core.msmodelutl import MSModelUtil -from modelseedpy.fbapkg.gapfillingpkg import default_blacklist from modelseedpy.core.exceptions import GapfillingError @@ -57,7 +56,7 @@ def __init__( self.gapfill_templates_by_index, self.gapfill_models_by_index = {}, {} self.gapfill_all_indecies_with_default_templates = True self.gapfill_all_indecies_with_default_models = True - self.blacklist = list(set(default_blacklist + blacklist)) + self.blacklist = list(set(blacklist)) self.test_condition_iteration_limit = 10 self.test_conditions = test_conditions self.reaction_scores = reaction_scores diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index af499773..bb147f89 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -307,27 +307,26 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): print(len(output)) self.model.add_reactions(output) return output - + ################################################################################# # Functions related to utility functions ################################################################################# def build_model_data_hash(self): data = { - "Model":self.id, - "Genome":self.genome.info.metadata["Name"], - "Genes":self.genome.info.metadata["Number of Protein Encoding Genes"], - + "Model": self.id, + "Genome": self.genome.info.metadata["Name"], + "Genes": self.genome.info.metadata["Number of Protein Encoding Genes"], } return data - - def compare_reactions(self, reaction_list,filename): + + def compare_reactions(self, reaction_list, filename): data = {} for rxn in reaction_list: for met in rxn.metabolites: if met.id not in data: data[met.id] = {} for other_rxn in reaction_list: - data[met.id][other_rxn.id] = 0 + data[met.id][other_rxn.id] = 0 data[met.id][rxn.id] = rxn.metabolites[met] df = pd.DataFrame(data) df = df.transpose() @@ -508,6 +507,7 @@ def convert_cobra_reaction_to_kbreaction( def test_solution(self, solution, keep_changes=False): unneeded = [] + removed_rxns = [] tempmodel = self.model if not keep_changes: tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) @@ -535,6 +535,7 @@ def test_solution(self, solution, keep_changes=False): ) rxnobj.upper_bound = original_bound else: + removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) logger.debug( rxn_id @@ -557,6 +558,7 @@ def test_solution(self, solution, keep_changes=False): ) rxnobj.lower_bound = original_bound else: + removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) logger.debug( rxn_id @@ -565,6 +567,7 @@ def test_solution(self, solution, keep_changes=False): + str(objective) ) if keep_changes: + tempmodel.remove_reactions(removed_rxns) for items in unneeded: del solution[items[2]][items[0]] return unneeded @@ -682,6 +685,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model is None: model = self.model if apply_condition: + print("applying - bad") self.apply_test_condition(condition, model) new_objective = model.slim_optimize() value = new_objective @@ -882,12 +886,10 @@ def reaction_expansion_test( Raises ------ """ - logger.debug("Expansion started!") + logger.debug(f"Expansion started! Binary = {binary_search}") filtered_list = [] for condition in condition_list: - logger.debug(f"testing condition {condition}") - currmodel = self.model tic = time.perf_counter() new_filtered = [] @@ -921,6 +923,10 @@ def reaction_expansion_test( + " out of " + str(len(reaction_list)) ) + filterlist = [] + for item in new_filtered: + filterlist.append(item[0].id + item[1]) + logger.debug(",".join(filterlist)) return filtered_list def add_atp_hydrolysis(self, compartment): diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 6a6d5b6f..5d206aed 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -583,7 +583,7 @@ def from_table( for index, row in filename_or_df.iterrows(): if row["biomass_id"] == bio_id: metabolite = template.compcompounds.get_by_id( - row["id"] + "_" + row["compartment"] + f'{row["id"]}_{row["compartment"]}' ) linked_mets = {} if ( @@ -594,7 +594,7 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - sub_array[0] + "_" + row["compartment"] + f'{sub_array[0]}_{row["compartment"]}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index ebbebe72..3ea2d6dd 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -20,346 +20,425 @@ logger = logging.getLogger(__name__) -default_blacklist = [ - "rxn12985", - "rxn00238", - "rxn07058", - "rxn05305", - "rxn09037", - "rxn10643", - "rxn11317", - "rxn05254", - "rxn05257", - "rxn05258", - "rxn05259", - "rxn05264", - "rxn05268", - "rxn05269", - "rxn05270", - "rxn05271", - "rxn05272", - "rxn05273", - "rxn05274", - "rxn05275", - "rxn05276", - "rxn05277", - "rxn05278", - "rxn05279", - "rxn05280", - "rxn05281", - "rxn05282", - "rxn05283", - "rxn05284", - "rxn05285", - "rxn05286", - "rxn05963", - "rxn05964", - "rxn05971", - "rxn05989", - "rxn05990", - "rxn06041", - "rxn06042", - "rxn06043", - "rxn06044", - "rxn06045", - "rxn06046", - "rxn06079", - "rxn06080", - "rxn06081", - "rxn06086", - "rxn06087", - "rxn06088", - "rxn06089", - "rxn06090", - "rxn06091", - "rxn06092", - "rxn06138", - "rxn06139", - "rxn06140", - "rxn06141", - "rxn06145", - "rxn06217", - "rxn06218", - "rxn06219", - "rxn06220", - "rxn06221", - "rxn06222", - "rxn06223", - "rxn06235", - "rxn06362", - "rxn06368", - "rxn06378", - "rxn06474", - "rxn06475", - "rxn06502", - "rxn06562", - "rxn06569", - "rxn06604", - "rxn06702", - "rxn06706", - "rxn06715", - "rxn06803", - "rxn06811", - "rxn06812", - "rxn06850", - "rxn06901", - "rxn06971", - "rxn06999", - "rxn07123", - "rxn07172", - "rxn07254", - "rxn07255", - "rxn07269", - "rxn07451", - "rxn09037", - "rxn10018", - "rxn10077", - "rxn10096", - "rxn10097", - "rxn10098", - "rxn10099", - "rxn10101", - "rxn10102", - "rxn10103", - "rxn10104", - "rxn10105", - "rxn10106", - "rxn10107", - "rxn10109", - "rxn10111", - "rxn10403", - "rxn10410", - "rxn10416", - "rxn11313", - "rxn11316", - "rxn11318", - "rxn11353", - "rxn05224", - "rxn05795", - "rxn05796", - "rxn05797", - "rxn05798", - "rxn05799", - "rxn05801", - "rxn05802", - "rxn05803", - "rxn05804", - "rxn05805", - "rxn05806", - "rxn05808", - "rxn05812", - "rxn05815", - "rxn05832", - "rxn05836", - "rxn05851", - "rxn05857", - "rxn05869", - "rxn05870", - "rxn05884", - "rxn05888", - "rxn05896", - "rxn05898", - "rxn05900", - "rxn05903", - "rxn05904", - "rxn05905", - "rxn05911", - "rxn05921", - "rxn05925", - "rxn05936", - "rxn05947", - "rxn05956", - "rxn05959", - "rxn05960", - "rxn05980", - "rxn05991", - "rxn05992", - "rxn05999", - "rxn06001", - "rxn06014", - "rxn06017", - "rxn06021", - "rxn06026", - "rxn06027", - "rxn06034", - "rxn06048", - "rxn06052", - "rxn06053", - "rxn06054", - "rxn06057", - "rxn06059", - "rxn06061", - "rxn06102", - "rxn06103", - "rxn06127", - "rxn06128", - "rxn06129", - "rxn06130", - "rxn06131", - "rxn06132", - "rxn06137", - "rxn06146", - "rxn06161", - "rxn06167", - "rxn06172", - "rxn06174", - "rxn06175", - "rxn06187", - "rxn06189", - "rxn06203", - "rxn06204", - "rxn06246", - "rxn06261", - "rxn06265", - "rxn06266", - "rxn06286", - "rxn06291", - "rxn06294", - "rxn06310", - "rxn06320", - "rxn06327", - "rxn06334", - "rxn06337", - "rxn06339", - "rxn06342", - "rxn06343", - "rxn06350", - "rxn06352", - "rxn06358", - "rxn06361", - "rxn06369", - "rxn06380", - "rxn06395", - "rxn06415", - "rxn06419", - "rxn06420", - "rxn06421", - "rxn06423", - "rxn06450", - "rxn06457", - "rxn06463", - "rxn06464", - "rxn06466", - "rxn06471", - "rxn06482", - "rxn06483", - "rxn06486", - "rxn06492", - "rxn06497", - "rxn06498", - "rxn06501", - "rxn06505", - "rxn06506", - "rxn06521", - "rxn06534", - "rxn06580", - "rxn06585", - "rxn06593", - "rxn06609", - "rxn06613", - "rxn06654", - "rxn06667", - "rxn06676", - "rxn06693", - "rxn06730", - "rxn06746", - "rxn06762", - "rxn06779", - "rxn06790", - "rxn06791", - "rxn06792", - "rxn06793", - "rxn06794", - "rxn06795", - "rxn06796", - "rxn06797", - "rxn06821", - "rxn06826", - "rxn06827", - "rxn06829", - "rxn06839", - "rxn06841", - "rxn06842", - "rxn06851", - "rxn06866", - "rxn06867", - "rxn06873", - "rxn06885", - "rxn06891", - "rxn06892", - "rxn06896", - "rxn06938", - "rxn06939", - "rxn06944", - "rxn06951", - "rxn06952", - "rxn06955", - "rxn06957", - "rxn06960", - "rxn06964", - "rxn06965", - "rxn07086", - "rxn07097", - "rxn07103", - "rxn07104", - "rxn07105", - "rxn07106", - "rxn07107", - "rxn07109", - "rxn07119", - "rxn07179", - "rxn07186", - "rxn07187", - "rxn07188", - "rxn07195", - "rxn07196", - "rxn07197", - "rxn07198", - "rxn07201", - "rxn07205", - "rxn07206", - "rxn07210", - "rxn07244", - "rxn07245", - "rxn07253", - "rxn07275", - "rxn07299", - "rxn07302", - "rxn07651", - "rxn07723", - "rxn07736", - "rxn07878", - "rxn11417", - "rxn11582", - "rxn11593", - "rxn11597", - "rxn11615", - "rxn11617", - "rxn11619", - "rxn11620", - "rxn11624", - "rxn11626", - "rxn11638", - "rxn11648", - "rxn11651", - "rxn11665", - "rxn11666", - "rxn11667", - "rxn11698", - "rxn11983", - "rxn11986", - "rxn11994", - "rxn12006", - "rxn12007", - "rxn12014", - "rxn12017", - "rxn12022", - "rxn12160", - "rxn12161", - "rxn01267", - "rxn05294", - "rxn04656", -] +base_blacklist = { + "rxn10157": "<", + "rxn09295": "<", + "rxn05938": "<", + "rxn08628": ">", + "rxn10155": "<", + "rxn01353": "<", + "rxn05683": "<", + "rxn09193": "<", + "rxn09003": "<", + "rxn01128": ">", + "rxn08655": "<", + "rxn09272": "<", + "rxn05313": "<", + "rxn01510": ">", + "rxn05297": ">", + "rxn00507": "<", + "rxn05596": "<", + "rxn01674": "<", + "rxn01679": "<", + "rxn00778": ">", + "rxn05206": ">", + "rxn00239": "<", + "rxn05937": "<", + "rxn00715": "<", + "rxn05638": ">", + "rxn05289": ">", + "rxn00839": "<", + "rxn08866": "<", + "rxn10901": "<", + "rxn09331": "<", + "rxn05242": "<", + "rxn12549": "<", + "rxn13143": "<", + "rxn12498": "<", + "rxn08373": "<", + "rxn05208": "<", + "rxn09372": "<", + "rxn00571": ">", + "rxn08104": "<", + "rxn08704": "<", + "rxn07191": "<", + "rxn09672": "<", + "rxn01048": ">", + "rxn11267": ">", + "rxn08290": "<", + "rxn09307": "<", + "rxn05676": ">", + "rxn09653": "<", + "rxn11277": "<", + "rxn00976": "<", + "rxn02520": "<", + "rxn08275": "<", + "rxn09121": "<", + "rxn08999": "<", + "rxn08633": "<", + "rxn08610": "<", + "rxn09218": "<", + "rxn05626": "<", + "rxn11320": "<", + "rxn10058": ">", + "rxn08544": "<", + "rxn12539": "<", + "rxn08990": "<", + "rxn09348": "<", + "rxn00378": "<", + "rxn05243": "<", + "rxn02154": "<", + "rxn12587": "<", + "rxn00125": "<", + "rxn05648": "<", + "rxn13722": "<", + "rxn10910": ">", + "rxn05308": ">", + "rxn08585": "<", + "rxn14207": "<", + "rxn08682": "<", + "rxn10895": "<", + "rxn09655": "<", + "rxn11934": "<", + "rxn01742": ">", + "rxn05222": ">", + "rxn09942": "<", + "rxn13753": ">", + "rxn10857": "<", + "rxn03468": "<", + "rxn04942": "<", + "rxn10990": ">", + "rxn08639": "<", + "rxn09248": "<", + "rxn11935": ">", + "rxn00870": ">", + "rxn08314": "<", + "rxn09378": "<", + "rxn09269": "<", + "rxn10057": ">", + "rxn13702": ">", + "rxn00517": "<", + "rxn09221": ">", + "rxn01505": ">", + "rxn13692": ">", + "rxn05573": "<", + "rxn10123": ">", + "rxn09005": "<", + "rxn05244": "<", + "rxn05940": "<", + "rxn10124": ">", + "rxn06202": ">", + "rxn09660": "<", + "rxn02260": ">", + "rxn08912": "<", + "rxn05760": ">", + "rxn05580": ">", + "rxn02181": ">", + "rxn09339": "<", + "rxn00767": "<", + "rxn09118": "<", + "rxn05303": "<", + "rxn06110": "<", + "rxn12800": "<", + "rxn10966": "<", + "rxn12561": "<", + "rxn04678": ">", + "rxn10818": "<", + "rxn08166": "<", + "rxn02044": ">", + "rxn12623": "<", + "rxn13392": ">", + "rxn02283": "<", + "rxn13647": ">", + "rxn08653": "<", + "rxn05218": ">", + "rxn11676": ">", + "rxn00197": "<", + "rxn00697": "<", + "rxn12575": ">", + "rxn08188": "<", + "rxn01215": "<", + "rxn08730": ">", + "rxn08519": ">", + "rxn08642": "<", + "rxn05245": "<", + "rxn04042": "<", + "rxn01443": ">", + "rxn08535": "<", + "rxn03983": "<", + "rxn08317": "<", + "rxn14173": ">", + "rxn08868": "<", + "rxn05893": ">", + "rxn00435": ">", + "rxn13724": "<", + "rxn09681": "<", + "rxn00572": ">", + "rxn05942": "<", + "rxn11158": "<", + "rxn05562": "<", + "rxn10868": "<", + "rxn10426": "<", + "rxn00941": ">", + "rxn08240": "<", + "rxn05220": ">", + "rxn01228": ">", + "rxn12540": "<", + "rxn10618": ">", + "rxn09659": "<", + "rxn08985": ">", + "rxn05523": "<", + "rxn00421": "<", + "rxn09385": "<", + "rxn08542": "<", + "rxn09658": "<", + "rxn01173": "<", + "rxn10977": "<", + "rxn05216": "<", + "rxn13748": ">", + "rxn10769": ">", + "rxn00451": "<", + "rxn01639": "<", + "rxn08661": "<", + "rxn09308": "<", + "rxn09260": "<", + "rxn00253": "<", + "rxn05207": "<", + "rxn01667": "<", + "rxn08063": "<", + "rxn01508": ">", + "rxn09657": "<", + "rxn01209": ">", + "rxn00548": ">", + "rxn12617": "<", + "rxn08747": ">", + "rxn08096": "<", + "rxn11951": "<", + "rxn09061": "<", + "rxn10978": "<", + "rxn02748": ">", + "rxn09663": "<", + "rxn08737": "<", + "rxn13127": "<", + "rxn09366": "<", + "rxn05634": "<", + "rxn05554": "<", + "rxn09266": ">", + "rxn04676": ">", + "rxn11078": ">", + "rxn04932": "<", + "rxn00607": ">", + "rxn08856": "<", + "rxn12624": "<", + "rxn05215": "<", + "rxn13686": "<", + "rxn12529": "<", + "rxn00234": "<", + "rxn13689": ">", + "rxn08117": "<", + "rxn05315": ">", + "rxn08865": "<", + "rxn11678": ">", + "rxn00518": "<", + "rxn00195": "<", + "rxn10054": "<", + "rxn12532": "<", + "rxn05902": ">", + "rxn12777": "<", + "rxn12822": ">", + "rxn13735": ">", + "rxn00427": "<", + "rxn13196": "<", + "rxn08284": "<", + "rxn10576": ">", + "rxn00891": "<", + "rxn08293": "<", + "rxn00374": ">", + "rxn08795": "<", + "rxn12583": "<", + "rxn00918": ">", + "rxn08525": "<", + "rxn10427": ">", + "rxn09271": "<", + "rxn10860": "<", + "rxn10600": ">", + "rxn13729": ">", + "rxn01375": "<", + "rxn13726": ">", + "rxn10587": "<", + "rxn08672": "<", + "rxn10588": ">", + "rxn08152": ">", + "rxn09306": "<", + "rxn00635": "<", + "rxn08427": "<", + "rxn05225": ">", + "rxn00680": ">", + "rxn08786": ">", + "rxn08721": "<", + "rxn11339": "<", + "rxn05749": "<", + "rxn01187": ">", + "rxn08625": "<", + "rxn06677": "<", + "rxn12302": ">", + "rxn02770": "<", + "rxn05628": "<", + "rxn13706": ">", + "rxn12739": "<", + "rxn00177": "<", + "rxn09896": ">", + "rxn12574": "<", + "rxn12533": ">", + "rxn08537": ">", + "rxn05651": ">", + "rxn08170": "<", + "rxn05240": "<", + "rxn00663": ">", + "rxn12589": "<", + "rxn09299": "<", + "rxn02059": "<", + "rxn12217": ">", + "rxn06592": "<", + "rxn05939": ">", + "rxn08581": "<", + "rxn00430": "<", + "rxn09283": ">", + "rxn08919": "<", + "rxn13660": "<", + "rxn08065": "<", + "rxn08428": ">", + "rxn10936": ">", + "rxn05238": ">", + "rxn05685": "<", + "rxn08920": ">", + "rxn07193": "<", + "rxn08265": "<", + "rxn12554": "<", + "rxn08094": "<", + "rxn13727": ">", + "rxn04158": "<", + "rxn09839": "<", + "rxn10820": "<", + "rxn00869": ">", + "rxn00331": ">", + "rxn09034": "<", + "rxn01136": "<", + "rxn09247": "<", + "rxn08302": "<", + "rxn10594": "<", + "rxn08670": ">", + "rxn11334": "<", + "rxn09941": "<", + "rxn02919": "<", + "rxn09670": "<", + "rxn10892": "<", + "rxn09794": "<", + "rxn02332": ">", + "rxn00244": ">", + "rxn08030": "<", + "rxn12526": "<", + "rxn13150": ">", + "rxn05486": "<", + "rxn10852": ">", + "rxn13790": ">", + "rxn06348": ">", + "rxn09172": ">", + "rxn03653": ">", + "rxn05213": "<", + "rxn01869": "<", + "rxn08142": "<", + "rxn12606": "<", + "rxn11916": ">", + "rxn05748": "<", + "rxn08543": "<", + "rxn01107": ">", + "rxn05708": "<", + "rxn08169": "<", + "rxn06641": ">", + "rxn12578": "<", + "rxn01172": "<", + "rxn02120": ">", + "rxn05669": "<", + "rxn11322": "<", + "rxn12630": "<", + "rxn00698": "<", + "rxn05507": ">", + "rxn12530": "<", + "rxn09304": "<", + "rxn05532": ">", + "rxn03644": ">", + "rxn08733": "<", + "rxn13733": "<", + "rxn10044": ">", + "rxn00176": ">", + "rxn01364": ">", + "rxn02198": ">", + "rxn06990": "<", + "rxn08424": "<", + "rxn08069": "<", + "rxn05611": "<", + "rxn11973": "<", + "rxn12665": ">", + "rxn05241": "<", + "rxn08982": ">", + "rxn00542": ">", + "rxn12588": "<", + "rxn03517": ">", + "rxn01805": "<", + "rxn13203": ">", + "rxn08614": "<", + "rxn12200": ">", + "rxn13811": "<", + "rxn08377": "<", + "rxn11342": ">", + "rxn02976": "<", + "rxn08217": "<", + "rxn07921": ">", + "rxn09944": ">", + "rxn02401": "<", + "rxn08429": ">", + "rxn00905": "<", + "rxn08196": "<", + "rxn03054": "<", + "rxn08643": "<", + "rxn01874": "<", + "rxn08028": "<", + "rxn01641": ">", + "rxn03442": "<", + "rxn02172": "<", + "rxn10692": ">", + "rxn10613": ">", + "rxn12928": ">", + "rxn12994": ">", + "rxn13843": ">", + "rxn12942": ">", + "rxn12934": ">", + "rxn16827": ">", + "rxn12941": ">", + "rxn01736": ">", + "rxn14109": ">", + "rxn15060": ">", + "rxn15064": ">", + "rxn30685": ">", + "rxn10095": ">", + "rxn16143": ">", + "rxn25271": ">", + "rxn25160": ">", + "rxn30917": ">", + "rxn16843": ">", + "rxn08921": ">", + "rxn09390": ">", + "rxn27362": ">", + "rxn02664": ">", + "rxn24638": ">", + "rxn24613": ">", + "rxn24611": ">", + "rxn14428": ">", + "rxn03079": ">", + "rxn03020": ">", + "rxn10471": "<", +} class GapfillingPkg(BaseFBAPkg): @@ -416,7 +495,7 @@ def build_package(self, parameters): "minimum_obj": 0.01, "set_objective": 1, "minimize_exchanges": False, - "blacklist": default_blacklist, + "blacklist": [], }, ) # Adding model reactions to original reaction list @@ -558,6 +637,11 @@ def extend_model_with_model_for_gapfilling(self, source_model, index): if re.search("(.+)_([a-z])\d+$", modelreaction.id) != None: m = re.search("(.+)_([a-z])\d+$", modelreaction.id) if m[1] not in self.parameters["blacklist"]: + if m[1] in base_blacklist: + if base_blacklist[m[1]] == ">" or base_blacklist[m[1]] == "=": + cobra_reaction.upper_bound = 0 + if base_blacklist[m[1]] == "<" or base_blacklist[m[1]] == "=": + cobra_reaction.lower_bound = 0 cobra_reaction = modelreaction.copy() cobra_reaction.id = groups[1] + "_" + groups[2] + index if ( @@ -687,6 +771,17 @@ def extend_model_with_template_for_gapfilling(self, template, index): cobra_reaction = self.convert_template_reaction( template_reaction, index, template, 1 ) # TODO: move function out + if template_reaction.reference_id in base_blacklist: + if ( + base_blacklist[template_reaction.reference_id] == ">" + or base_blacklist[template_reaction.reference_id] == "=" + ): + cobra_reaction.upper_bound = 0 + if ( + base_blacklist[template_reaction.reference_id] == "<" + or base_blacklist[template_reaction.reference_id] == "=" + ): + cobra_reaction.lower_bound = 0 new_penalties[cobra_reaction.id] = dict() if ( cobra_reaction.id not in self.model.reactions From 906bb3e3a008b81cafbc39597fbf71b21e8d40e6 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 20 Feb 2023 00:24:38 -0600 Subject: [PATCH 012/146] genome feature aliases --- modelseedpy/core/msbuilder.py | 4 +++- modelseedpy/core/msgenome.py | 4 ++-- modelseedpy/core/msmodelutl.py | 15 +++++++-------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e53a28ac..54fb06c6 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -872,7 +872,9 @@ def build( biomass_reactions.append(reaction) if len(biomass_reactions) > 0: - cobra_model.add_reactions(biomass_reactions) + for rxn in biomass_reactions: + if rxn.id not in cobra_model.reactions: + cobra_model.add_reactions([rxn]) cobra_model.objective = biomass_reactions[0].id """ diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 999e464d..875699c2 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -48,7 +48,7 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): class MSFeature: - def __init__(self, feature_id, sequence, description=None): + def __init__(self, feature_id, sequence, description=None, aliases=None): """ @param feature_id: identifier for the protein coding feature @@ -60,7 +60,7 @@ def __init__(self, feature_id, sequence, description=None): self.seq = sequence self.description = description # temporary replace with proper parsing self.ontology_terms = {} - self.aliases = [] + self.aliases = aliases def add_ontology_term(self, ontology_term, value): """ diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index af499773..7017552b 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -307,27 +307,26 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): print(len(output)) self.model.add_reactions(output) return output - + ################################################################################# # Functions related to utility functions ################################################################################# def build_model_data_hash(self): data = { - "Model":self.id, - "Genome":self.genome.info.metadata["Name"], - "Genes":self.genome.info.metadata["Number of Protein Encoding Genes"], - + "Model": self.id, + "Genome": self.genome.info.metadata["Name"], + "Genes": self.genome.info.metadata["Number of Protein Encoding Genes"], } return data - - def compare_reactions(self, reaction_list,filename): + + def compare_reactions(self, reaction_list, filename): data = {} for rxn in reaction_list: for met in rxn.metabolites: if met.id not in data: data[met.id] = {} for other_rxn in reaction_list: - data[met.id][other_rxn.id] = 0 + data[met.id][other_rxn.id] = 0 data[met.id][rxn.id] = rxn.metabolites[met] df = pd.DataFrame(data) df = df.transpose() From 26d7b622bf0d2a4464f7f631c87e5f1001abb575 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 20 Feb 2023 21:13:39 -0600 Subject: [PATCH 013/146] template.add_reaction update comcompound references --- modelseedpy/core/mstemplate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index f439dc91..7bf9cbea 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -1302,7 +1302,9 @@ def add_reactions(self, reaction_list: list): if cpx.id not in self.complexes: self.add_complexes([cpx]) complex_replace.add(self.complexes.get_by_id(cpx.id)) + x._metabolites = metabolites_replace + x._update_awareness() x.complexes = complex_replace self.reactions += reaction_list From 34b4d812b0d1fcaf562a99db76f91f784f9db119 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 1 Mar 2023 00:01:53 -0600 Subject: [PATCH 014/146] Improving gapfilling and ATP correction --- modelseedpy/__init__.py | 2 + modelseedpy/biochem/modelseed_biochem.py | 2 +- modelseedpy/core/msatpcorrection.py | 46 ++++- modelseedpy/core/msbuilder.py | 8 +- modelseedpy/core/msgapfill.py | 137 ++++++-------- modelseedpy/core/msmodelutl.py | 169 ++++++++++++++++- modelseedpy/core/mstemplate.py | 14 +- modelseedpy/fbapkg/flexiblebiomasspkg.py | 229 ++++++++++++++++------- modelseedpy/fbapkg/gapfillingpkg.py | 99 +++++----- 9 files changed, 494 insertions(+), 212 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 7f135055..aabb2c53 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -59,6 +59,8 @@ from modelseedpy.community import MSCommunity, MSCompatibility, CommKineticPkg +from modelseedpy.biochem import ModelSEEDBiochem + from modelseedpy.fbapkg import ( BaseFBAPkg, RevBinPkg, diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index 80594e0e..287ce470 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -495,7 +495,7 @@ class ModelSEEDBiochem: @staticmethod def get(create_if_missing=True): if not ModelSEEDBiochem.default_biochemistry: - ModelSEEDBiochem.default_biochemistry = from_local( + ModelSEEDBiochem.default_biochemistry = from_local2( config.get("biochem", "path") ) return ModelSEEDBiochem.default_biochemistry diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c5b20e3c..e72835aa 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -22,6 +22,7 @@ from modelseedpy.helpers import get_template logger = logging.getLogger(__name__) +# logger.setLevel(logging.DEBUG) _path = _dirname(_abspath(__file__)) @@ -291,7 +292,10 @@ def evaluate_growth_media(self): or solution.status != "optimal" ): self.media_gapfill_stats[media] = self.msgapfill.run_gapfilling( - media, self.atp_hydrolysis.id, minimum_obj + media, + self.atp_hydrolysis.id, + minimum_obj, + check_for_growth=False, ) # IF gapfilling fails - need to activate and penalize the noncore and try again elif solution.objective_value >= minimum_obj: @@ -312,16 +316,29 @@ def determine_growth_media(self, max_gapfilling=None): Decides which of the test media to use as growth conditions for this model :return: """ + atp_att = {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}} self.selected_media = [] best_score = None for media in self.media_gapfill_stats: gfscore = 0 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 0, + "new": {}, + "reversed": {}, + } if self.media_gapfill_stats[media]: gfscore = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] if best_score is None or gfscore < best_score: best_score = gfscore + atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore if self.max_gapfilling is None: self.max_gapfilling = best_score @@ -339,6 +356,9 @@ def determine_growth_media(self, max_gapfilling=None): best_score + self.gapfilling_delta ): self.selected_media.append(media) + atp_att["selected_media"][media.id] = 0 + + self.modelutl.save_attributes(atp_att, "ATP_analysis") def determine_growth_media2(self, max_gapfilling=None): """ @@ -385,8 +405,15 @@ def apply_growth_media_gapfilling(self): and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 ): self.msgapfill.integrate_gapfill_solution( - self.media_gapfill_stats[media], self.cumulative_core_gapfilling + self.media_gapfill_stats[media], + self.cumulative_core_gapfilling, + link_gaps_to_objective=False, ) + core_gf = { + "count": len(self.cumulative_core_gapfilling), + "reactions": self.cumulative_core_gapfilling, + } + self.modelutl.save_attributes(core_gf, "core_gapfilling") def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP @@ -460,6 +487,11 @@ def build_tests(self, multiplier=None): Raises ------ """ + atp_att = self.modelutl.get_attributes( + "ATP_analysis", + {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, + ) + if multiplier is None: multiplier = self.multiplier tests = [] @@ -467,7 +499,7 @@ def build_tests(self, multiplier=None): for media in self.selected_media: self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) obj_value = self.model.slim_optimize() - logger.debug(f"{media.name} = {obj_value}") + logger.debug(f"{media.name} = {obj_value};{multiplier}") tests.append( { "media": media, @@ -476,6 +508,14 @@ def build_tests(self, multiplier=None): "objective": self.atp_hydrolysis.id, } ) + atp_att["selected_media"][media.id] = obj_value + atp_att["tests"][media.id] = { + "threshold": multiplier * obj_value, + "objective": self.atp_hydrolysis.id, + } + + self.modelutl.save_attributes(atp_att, "ATP_analysis") + return tests def run_atp_correction(self): diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e53a28ac..4ea0cd3e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -849,16 +849,22 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - + if "bio1" in cobra_model.reactions: + print("1:Biomass present!!") metabolic_reactions = self.build_metabolic_reactions() cobra_model.add_reactions(metabolic_reactions) + if "bio1" in cobra_model.reactions: + print("2:Biomass present!!") non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) cobra_model.add_reactions(non_metabolic_reactions) + if "bio1" in cobra_model.reactions: + print("3:Biomass present!!") cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) + print("Adding biomass!!") biomass_reactions = [] for rxn_biomass in self.template.biomasses: reaction = rxn_biomass.build_biomass( diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index ad430ef2..8d023272 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -69,6 +69,7 @@ def run_gapfilling( minimum_obj=0.01, binary_check=False, prefilter=True, + check_for_growth=True, ): if target: self.model.objective = self.model.problem.Objective( @@ -96,15 +97,54 @@ def run_gapfilling( ) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + # Testing if gapfilling can work before filtering + if ( + check_for_growth + and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() + ): + # save_json_model(self.model, "gfdebugmdl.json") + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + gf_sensitivity[media.id][target][ + "FBF" + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning("No solution found before filtering for %s", media) + return None + # Filtering breaking reactions out of the database if prefilter and self.test_conditions: pkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) + # Testing if gapfilling can work after filtering + if ( + check_for_growth + and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() + ): + # save_json_model(self.model, "gfdebugmdl.json") + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + gf_sensitivity[media.id][target][ + "FAF" + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning("No solution found after filtering for %s", media) + return None + + # Printing the gapfilling LP file if self.lp_filename: with open(self.lp_filename, "w") as out: out.write(str(self.gfmodel.solver)) + + # Running gapfilling and checking solution sol = self.gfmodel.optimize() logger.debug( "gapfill solution objective value %f (%s) for media %s", @@ -112,11 +152,11 @@ def run_gapfilling( sol.status, media, ) - if sol.status != "optimal": logger.warning("No solution found for %s", media) return None + # Computing solution and ensuring all tests still pass self.last_solution = pkgmgr.getpkg("GapfillingPkg").compute_gapfilled_solution() if self.test_conditions: self.last_solution = pkgmgr.getpkg("GapfillingPkg").run_test_conditions( @@ -129,18 +169,23 @@ def run_gapfilling( "no solution could be found that satisfied all specified test conditions in specified iterations!" ) return None + + # Running binary check to reduce solution to minimal reaction soltuion if binary_check: self.last_solution = pkgmgr.getpkg( "GapfillingPkg" ).binary_check_gapfilling_solution() + # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target self.last_solution["minobjective"] = minimum_obj self.last_solution["binary_check"] = binary_check return self.last_solution - def integrate_gapfill_solution(self, solution, cumulative_solution=[]): + def integrate_gapfill_solution( + self, solution, cumulative_solution=[], link_gaps_to_objective=True + ): """Integrating gapfilling solution into model Parameters ---------- @@ -191,84 +236,20 @@ def integrate_gapfill_solution(self, solution, cumulative_solution=[]): cumulative_solution.remove(oitem) break self.mdlutl.add_gapfilling(solution) + if link_gaps_to_objective: + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if solution["media"] not in gf_sensitivity: + gf_sensitivity[solution["media"]] = {} + if solution["target"] not in gf_sensitivity[solution["media"]]: + gf_sensitivity[solution["media"]][solution["target"]] = {} + gf_sensitivity[solution["media"]][solution["target"]][ + "success" + ] = self.mdlutl.find_unproducible_biomass_compounds( + solution["target"], cumulative_solution + ) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) - def link_gapfilling_to_biomass(self, target="bio1"): - def find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ): - objective = tempmodel.slim_optimize() - logger.debug("Obj:" + str(objective)) - with open("FlexBiomass2.lp", "w") as out: - out.write(str(tempmodel.solver)) - if objective > 0: - target_rxn.lower_bound = 0.1 - tempmodel.objective = min_flex_obj - solution = tempmodel.optimize() - with open("FlexBiomass3.lp", "w") as out: - out.write(str(tempmodel.solver)) - biocpds = [] - for reaction in tempmodel.reactions: - if ( - reaction.id[0:5] == "FLEX_" - and reaction.forward_variable.primal > Zero - ): - biocpds.append(reaction.id[5:]) - item.append(biocpds) - logger.debug(item[0] + ":" + ",".join(biocpds)) - tempmodel.objective = original_objective - target_rxn.lower_bound = 0 - - # Copying model before manipulating it - tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.mdlutl.model)) - # Getting target reaction and making sure it exists - target_rxn = tempmodel.reactions.get_by_id(target) - # Constraining objective to be greater than 0.1 - pkgmgr = MSPackageManager.get_pkg_mgr(tempmodel) - # Adding biomass flexibility - pkgmgr.getpkg("FlexibleBiomassPkg").build_package( - { - "bio_rxn_id": target, - "flex_coefficient": [0, 1], - "use_rna_class": None, - "use_dna_class": None, - "use_protein_class": None, - "use_energy_class": [0, 1], - "add_total_biomass_constraint": False, - } - ) - # Creating min flex objective - tempmodel.objective = target_rxn - original_objective = tempmodel.objective - min_flex_obj = tempmodel.problem.Objective(Zero, direction="min") - obj_coef = dict() - for reaction in tempmodel.reactions: - if reaction.id[0:5] == "FLEX_" or reaction.id[0:6] == "energy": - obj_coef[reaction.forward_variable] = 1 - # Temporarily setting flex objective so I can set coefficients - tempmodel.objective = min_flex_obj - min_flex_obj.set_linear_coefficients(obj_coef) - # Restoring biomass object - tempmodel.objective = original_objective - # Knocking out gapfilled reactions one at a time - for item in self.cumulative_gapfilling: - logger.debug("KO:" + item[0] + item[1]) - rxnobj = tempmodel.reactions.get_by_id(item[0]) - if item[1] == ">": - original_bound = rxnobj.upper_bound - rxnobj.upper_bound = 0 - find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ) - rxnobj.upper_bound = original_bound - else: - original_bound = rxnobj.lower_bound - rxnobj.lower_bound = 0 - find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ) - rxnobj.lower_bound = original_bound - @staticmethod def gapfill( model, diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index bb147f89..a44c5653 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -5,10 +5,15 @@ import json import sys import pandas as pd +import cobra from cobra import Model, Reaction, Metabolite +from optlang.symbolics import Zero from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem from modelseedpy.core.fbahelper import FBAHelper +from multiprocessing import Value + +# from builtins import None logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -105,6 +110,9 @@ def __init__(self, model): self.reaction_scores = None self.score = None self.integrated_gapfillings = [] + self.attributes = {} + if hasattr(self.model, "attributes"): + self.attributes = self.model def compute_automated_reaction_scores(self): """ @@ -270,6 +278,22 @@ def reaction_scores(self): ################################################################################# # Functions related to editing the model ################################################################################# + def get_attributes(self, key=None, default=None): + if not key: + return self.attributes + if key not in self.attributes: + self.attributes[key] = default + return self.attributes[key] + + def save_attributes(self, value, key=None): + attributes = self.get_attributes() + if key: + attributes[key] = value + else: + self.attributes = value + if hasattr(self.model, "attributes"): + self.model.attributes = self.attributes + def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): modelseed = ModelSEEDBiochem.get() output = [] @@ -923,12 +947,151 @@ def reaction_expansion_test( + " out of " + str(len(reaction_list)) ) - filterlist = [] + # Adding filter results to attributes + gf_filter_att = self.get_attributes("gf_filter", {}) + if condition["media"].id not in gf_filter_att: + gf_filter_att[condition["media"].id] = {} + if condition["objective"] not in gf_filter_att[condition["media"].id]: + gf_filter_att[condition["media"].id][condition["objective"]] = {} + if ( + condition["threshold"] + not in gf_filter_att[condition["media"].id][condition["objective"]] + ): + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ] = {} for item in new_filtered: - filterlist.append(item[0].id + item[1]) - logger.debug(",".join(filterlist)) + if ( + item[0].id + not in gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ] + ): + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id] = {} + if ( + item[1] + not in gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id] + ): + if len(item) < 3: + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id][item[1]] = None + else: + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id][item[1]] = item[2] + gf_filter_att = self.save_attributes(gf_filter_att, "gf_filter") return filtered_list + ################################################################################# + # Functions related to biomass sensitivity analysis + ################################################################################# + def find_unproducible_biomass_compounds(self, target_rxn="bio1", ko_list=None): + # Cloning the model because we don't want to modify the original model with this analysis + tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + # Getting target reaction and making sure it exists + if target_rxn not in tempmodel.reactions: + logger.critical(target_rxn + " not in model!") + target_rxn_obj = tempmodel.reactions.get_by_id(target_rxn) + tempmodel.objective = target_rxn + original_objective = tempmodel.objective + pkgmgr = MSPackageManager.get_pkg_mgr(tempmodel) + rxn_list = [target_rxn, "rxn05294_c0", "rxn05295_c0", "rxn05296_c0"] + for rxn in rxn_list: + if rxn in tempmodel.reactions: + pkgmgr.getpkg("FlexibleBiomassPkg").build_package( + { + "bio_rxn_id": rxn, + "flex_coefficient": [0, 1], + "use_rna_class": None, + "use_dna_class": None, + "use_protein_class": None, + "use_energy_class": [0, 1], + "add_total_biomass_constraint": False, + } + ) + + # Creating min flex objective + min_flex_obj = tempmodel.problem.Objective(Zero, direction="min") + obj_coef = dict() + for reaction in tempmodel.reactions: + if reaction.id[0:5] == "FLEX_" or reaction.id[0:6] == "energy": + obj_coef[reaction.forward_variable] = 1 + obj_coef[reaction.reverse_variable] = 1 + # Temporarily setting flex objective so I can set coefficients + tempmodel.objective = min_flex_obj + min_flex_obj.set_linear_coefficients(obj_coef) + if not ko_list: + return self.run_biomass_dependency_test( + target_rxn_obj, tempmodel, original_objective, min_flex_obj, rxn_list + ) + else: + output = {} + for item in ko_list: + logger.debug("KO:" + item[0] + item[1]) + rxnobj = tempmodel.reactions.get_by_id(item[0]) + if item[1] == ">": + original_bound = rxnobj.upper_bound + rxnobj.upper_bound = 0 + if item[0] not in output: + output[item[0]] = {} + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.upper_bound = original_bound + else: + original_bound = rxnobj.lower_bound + rxnobj.lower_bound = 0 + if item[0] not in output: + output[item[0]] = {} + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.lower_bound = original_bound + return output + + def run_biomass_dependency_test( + self, target_rxn, tempmodel, original_objective, min_flex_obj, rxn_list + ): + tempmodel.objective = original_objective + objective = tempmodel.slim_optimize() + with open("FlexBiomass2.lp", "w") as out: + out.write(str(tempmodel.solver)) + if objective > 0: + target_rxn.lower_bound = 0.1 + tempmodel.objective = min_flex_obj + solution = tempmodel.optimize() + with open("FlexBiomass3.lp", "w") as out: + out.write(str(tempmodel.solver)) + biocpds = [] + for reaction in tempmodel.reactions: + if reaction.id[0:5] == "FLEX_" and ( + reaction.forward_variable.primal > Zero + or reaction.reverse_variable.primal > Zero + ): + logger.debug("Depends on:" + reaction.id) + label = reaction.id[5:] + for item in rxn_list: + if label[0 : len(item)] == item: + biocpds.append(label[len(item) + 1 :]) + target_rxn.lower_bound = 0 + return biocpds + else: + logger.debug("Cannot grow") + return None + def add_atp_hydrolysis(self, compartment): # Searching for ATP hydrolysis compounds coefs = { diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 5d206aed..72118f07 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -581,9 +581,13 @@ def from_table( if isinstance(filename_or_df, str): filename_or_df = pd.read_table(filename_or_df) for index, row in filename_or_df.iterrows(): + if "biomass_id" not in row: + row["biomass_id"] = "bio1" if row["biomass_id"] == bio_id: + if "compartment" not in row: + row["compartment"] = "c" metabolite = template.compcompounds.get_by_id( - f'{row["id"]}_{row["compartment"]}' + f'{row["id"]}_{lower(row["compartment"])}' ) linked_mets = {} if ( @@ -594,14 +598,14 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - f'{sub_array[0]}_{row["compartment"]}' + f'{sub_array[0]}_{lower(row["compartment"])}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( metabolite, - row["class"], - row["coefficient"], - row["coefficient_type"], + lower(row["class"]), + float(row["coefficient"]), + upper(row["coefficient_type"]), linked_mets, ) return self diff --git a/modelseedpy/fbapkg/flexiblebiomasspkg.py b/modelseedpy/fbapkg/flexiblebiomasspkg.py index ae8a1cfe..223f778d 100644 --- a/modelseedpy/fbapkg/flexiblebiomasspkg.py +++ b/modelseedpy/fbapkg/flexiblebiomasspkg.py @@ -93,7 +93,13 @@ def build_package(self, parameters): for metabolite in self.parameters["bio_rxn"].metabolites: met_class[metabolite] = None msid = MSModelUtil.metabolite_msid(metabolite) - if msid != "cpd11416" and msid != None: + if ( + msid != "cpd11416" + and msid != "cpd11463" + and msid != "cpd11462" + and msid != "cpd11461" + and msid != None + ): if msid in refcpd: met_class[metabolite] = "refcpd" else: @@ -111,20 +117,24 @@ def build_package(self, parameters): self.parameters["use_" + curr_class + "_class"] = None break # Creating FLEX reactions and constraints for unclassified compounds - flexcpds = [] + flexcpds = {} for metabolite in self.parameters["bio_rxn"].metabolites: if not met_class[metabolite]: - flexcpds.append(metabolite) + flexcpds[metabolite] = self.parameters["bio_rxn"].metabolites[ + metabolite + ] elif ( met_class[metabolite] != "refcpd" and not self.parameters["use_" + met_class[metabolite] + "_class"] ): - flexcpds.append(metabolite) + flexcpds[metabolite] = self.parameters["bio_rxn"].metabolites[ + metabolite + ] self.modelutl.add_exchanges_for_metabolites( flexcpds, uptake=1000, excretion=1000, - prefix="FLEX_", + prefix="FLEX_" + self.parameters["bio_rxn"].id + "_", prefix_name="Biomass flex for ", ) for metabolite in flexcpds: @@ -206,24 +216,32 @@ def build_variable(self, object, type): # !!! can the function be removed? pass def build_constraint(self, cobra_obj, obj_type): - element_mass = FBAHelper.elemental_mass() # !!! element_mass is never used if obj_type == "flxbio": # Sum(MW*(vdrn,for-vdrn,ref)) + Sum(massdiff*(vrxn,for-vrxn,ref)) = 0 coef = {} for metabolite in self.parameters["bio_rxn"].metabolites: - if "FLEX_" + metabolite.id in self.model.reactions: + if ( + "FLEX_" + self.parameters["bio_rxn"].id + "_" + metabolite.id + in self.model.reactions + ): mw = FBAHelper.metabolite_mw(metabolite) sign = -1 if self.parameters["bio_rxn"].metabolites[metabolite] > 0: sign = 1 coef[ self.model.reactions.get_by_id( - "FLEX_" + metabolite.id + "FLEX_" + + self.parameters["bio_rxn"].id + + "_" + + metabolite.id ).forward_variable ] = (sign * mw) coef[ self.model.reactions.get_by_id( - "FLEX_" + metabolite.id + "FLEX_" + + self.parameters["bio_rxn"].id + + "_" + + metabolite.id ).reverse_variable ] = (-1 * sign * mw) for met_class in classes: @@ -238,8 +256,11 @@ def build_constraint(self, cobra_obj, obj_type): coef[rxn.reverse_variable] = -massdiff return BaseFBAPkg.build_constraint(self, obj_type, 0, 0, coef, cobra_obj) elif obj_type == "flxcpd" or obj_type == "flxcls": + first_entry = None + second_entry = None + product = False biovar = self.parameters["bio_rxn"].forward_variable - object = cobra_obj + object = None const = None if obj_type == "flxcpd": # 0.75 * abs(bio_coef) * vbio - vdrn,for >= 0 @@ -250,7 +271,11 @@ def build_constraint(self, cobra_obj, obj_type): second_entry = self.parameters["flex_coefficient"][1] * abs( self.parameters["bio_rxn"].metabolites[cobra_obj] ) - object = self.model.reactions.get_by_id("FLEX_" + cobra_obj.id) + if self.parameters["bio_rxn"].metabolites[cobra_obj] > 0: + product = True + object = self.model.reactions.get_by_id( + "FLEX_" + self.parameters["bio_rxn"].id + "_" + cobra_obj.id + ) elif ( cobra_obj.id[0:-5] == None or not self.parameters["use_" + cobra_obj.id[0:-5] + "_class"] @@ -263,87 +288,157 @@ def build_constraint(self, cobra_obj, obj_type): second_entry = self.parameters["use_" + cobra_obj.id[0:-5] + "_class"][ 1 ] + object = cobra_obj if first_entry == second_entry: # If the value is positive, lock in the forward variable and set the reverse to zero if first_entry > 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - 0, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - object.lower_bound = 0 + if product: + const = self.build_constraint( + "f" + obj_type, + 0, + 0, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 + else: + const = self.build_constraint( + "f" + obj_type, + 0, + 0, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 # If the value is negative, lock in the reverse variable and set the forward to zero elif first_entry < 0: - const = BaseFBAPkg.build_constraint( - self, - "r" + obj_type, - 0, - 0, - {biovar: -first_entry, object.reverse_variable: -1}, - cobra_obj, - ) - object.upper_bound = 0 + if product: + const = self.build_constraint( + "r" + obj_type, + 0, + 0, + {biovar: -first_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 + else: + const = self.build_constraint( + "r" + obj_type, + 0, + 0, + {biovar: -first_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 # If the value is zero, lock both variables to zero if first_entry == 0: object.lower_bound = 0 object.upper_bound = 0 elif second_entry >= 0: if first_entry >= 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - None, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - object.lower_bound = 0 - if first_entry > 0: - BaseFBAPkg.build_constraint( + if product: + const = BaseFBAPkg.build_constraint( self, - "r" + obj_type, + "f" + obj_type, 0, None, - {biovar: -first_entry, object.forward_variable: 1}, + {biovar: second_entry, object.forward_variable: -1}, cobra_obj, ) + object.lower_bound = 0 + if first_entry > 0: + BaseFBAPkg.build_constraint( + self, + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.forward_variable: 1}, + cobra_obj, + ) + else: + const = BaseFBAPkg.build_constraint( + self, + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 + if first_entry > 0: + BaseFBAPkg.build_constraint( + self, + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.reverse_variable: 1}, + cobra_obj, + ) else: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - None, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - BaseFBAPkg.build_constraint( - self, + if product: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + self.build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.reverse_variable: -1}, + cobra_obj, + ) + else: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + self.build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.forward_variable: -1}, + cobra_obj, + ) + else: + if second_entry < 0: + if product: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: 1}, + cobra_obj, + ) + else: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: 1}, + cobra_obj, + ) + if product: + self.build_constraint( "r" + obj_type, 0, None, {biovar: -first_entry, object.reverse_variable: -1}, cobra_obj, ) - else: - if second_entry < 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, + object.lower_bound = 0 + else: + self.build_constraint( + "r" + obj_type, 0, None, - {biovar: second_entry, object.reverse_variable: 1}, + {biovar: -first_entry, object.forward_variable: -1}, cobra_obj, ) - BaseFBAPkg.build_constraint( - self, - "r" + obj_type, - 0, - None, - {biovar: -first_entry, object.reverse_variable: -1}, - cobra_obj, - ) - object.upper_bound = 0 + object.upper_bound = 0 return const diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 3ea2d6dd..f14eb7ed 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -19,6 +19,7 @@ from modelseedpy.core.fbahelper import FBAHelper logger = logging.getLogger(__name__) +# logger.setLevel(logging.DEBUG) base_blacklist = { "rxn10157": "<", @@ -493,7 +494,6 @@ def build_package(self, parameters): "default_excretion": 100, "default_uptake": 100, "minimum_obj": 0.01, - "set_objective": 1, "minimize_exchanges": False, "blacklist": [], }, @@ -578,29 +578,27 @@ def build_package(self, parameters): ) self.model.solver.update() - if self.parameters["set_objective"] == 1: - reaction_objective = self.model.problem.Objective(Zero, direction="min") - obj_coef = dict() - for reaction in self.model.reactions: - if reaction.id in self.gapfilling_penalties: - if ( - self.parameters["minimize_exchanges"] - or reaction.id[0:3] != "EX_" - ): - # Minimizing gapfilled reactions - if "reverse" in self.gapfilling_penalties[reaction.id]: - obj_coef[reaction.reverse_variable] = abs( - self.gapfilling_penalties[reaction.id]["reverse"] - ) - if "forward" in self.gapfilling_penalties[reaction.id]: - obj_coef[reaction.forward_variable] = abs( - self.gapfilling_penalties[reaction.id]["forward"] - ) - else: - obj_coef[reaction.forward_variable] = 0 - obj_coef[reaction.reverse_variable] = 0 - self.model.objective = reaction_objective - reaction_objective.set_linear_coefficients(obj_coef) + + reaction_objective = self.model.problem.Objective(Zero, direction="min") + obj_coef = dict() + for reaction in self.model.reactions: + if reaction.id in self.gapfilling_penalties: + if self.parameters["minimize_exchanges"] or reaction.id[0:3] != "EX_": + # Minimizing gapfilled reactions + if "reverse" in self.gapfilling_penalties[reaction.id]: + obj_coef[reaction.reverse_variable] = abs( + self.gapfilling_penalties[reaction.id]["reverse"] + ) + if "forward" in self.gapfilling_penalties[reaction.id]: + obj_coef[reaction.forward_variable] = abs( + self.gapfilling_penalties[reaction.id]["forward"] + ) + else: + obj_coef[reaction.forward_variable] = 0 + obj_coef[reaction.reverse_variable] = 0 + self.model.objective = reaction_objective + reaction_objective.set_linear_coefficients(obj_coef) + self.parameters["gfobj"] = self.model.objective def extend_model_with_model_for_gapfilling(self, source_model, index): new_metabolites = {} @@ -1001,28 +999,27 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return None return solution - def filter_database_based_on_tests(self, test_conditions): - # Preserving the gapfilling objective function - gfobj = self.model.objective - # Setting the minimal growth constraint to zero + def test_gapfill_database(self): self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 - # Setting the objective to the original default objective for the model self.model.objective = self.parameters["origobj"] - # Testing if the minimal objective can be achieved before filtering solution = self.model.optimize() - print( - "Objective before filtering:", - solution.objective_value, - "; min objective:", - self.parameters["minimum_obj"], + logger.info( + "Objective with gapfill database:" + + str(solution.objective_value) + + "; min objective:" + + str(self.parameters["minimum_obj"]) ) - with open("debuggf.lp", "w") as out: - out.write(str(self.model.solver)) + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + self.model.objective = self.parameters["gfobj"] if solution.objective_value < self.parameters["minimum_obj"]: - save_json_model(self.model, "gfdebugmdl.json") - logger.critical( - "Model cannot achieve the minimum objective even before filtering!" - ) + return False + return True + + def filter_database_based_on_tests(self, test_conditions): + # Setting the minimal growth constraint to zero + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 # Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: @@ -1039,21 +1036,14 @@ def filter_database_based_on_tests(self, test_conditions): ) # Now constraining filtered reactions to zero for item in filtered_list: - logger.debug("Filtering:", item[0].id, item[1]) + logger.info("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 # Now testing if the gapfilling minimum objective can still be achieved - solution = self.model.optimize() - print( - "Objective after filtering:", - solution.objective_value, - "; min objective:", - self.parameters["minimum_obj"], - ) - # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached - if solution.objective_value < self.parameters["minimum_obj"]: + if not self.test_gapfill_database(): + # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached # Restoring the minimum objective constraint self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ "1" @@ -1089,14 +1079,14 @@ def filter_database_based_on_tests(self, test_conditions): else: count += -1 rxn.lower_bound = 0 - print("Reactions unfiltered:", count) + logger.info("Reactions unfiltered:" + str(count)) # Checking for model reactions that can be removed to enable all tests to pass self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 filtered_list = self.modelutl.reaction_expansion_test( self.parameters["original_reactions"], test_conditions ) for item in filtered_list: - logger.debug("Filtering:", item[0].id, item[1]) + logger.info("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: @@ -1105,7 +1095,8 @@ def filter_database_based_on_tests(self, test_conditions): self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ "minimum_obj" ] - self.model.objective = gfobj + self.model.objective = self.parameters["gfobj"] + return True def compute_gapfilled_solution(self, flux_values=None): if flux_values is None: From 64010b3096b1259afb727074c5578c7cb9565773 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 1 Mar 2023 00:46:17 -0600 Subject: [PATCH 015/146] template species name --- modelseedpy/core/msbuilder.py | 2 +- modelseedpy/core/mstemplate.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 86df362b..cd16d75e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -955,7 +955,7 @@ def build_full_template_model(template, model_id=None, index="0"): else: for bio in template.biomasses: bio.build_biomass( - self, model, index, classic=False, GC=0.5, add_to_model=True + model, index, classic=False, GC=0.5, add_to_model=True ) model.objective = "bio1" diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 07931f86..af7b0deb 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -184,8 +184,8 @@ def compound(self): @property def name(self): if self._template_compound: - return self._template_compound.name - return "" + return f'{self._template_compound.name} [{self.compartment}]' + return f'{self.id} [{self.compartment}]' @name.setter def name(self, value): From 972920b35bb8ac597085a46a0fb7039ba54c6233 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 1 Mar 2023 00:46:42 -0600 Subject: [PATCH 016/146] black --- modelseedpy/core/mstemplate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index af7b0deb..f28d170f 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -184,8 +184,8 @@ def compound(self): @property def name(self): if self._template_compound: - return f'{self._template_compound.name} [{self.compartment}]' - return f'{self.id} [{self.compartment}]' + return f"{self._template_compound.name} [{self.compartment}]" + return f"{self.id} [{self.compartment}]" @name.setter def name(self, value): From 75c464ac5ca4a5f05085baacec295ad03cd45052 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 13 Mar 2023 09:55:05 -0500 Subject: [PATCH 017/146] x --- modelseedpy/biochem/modelseed_compound.py | 18 +++++++++++++++++- modelseedpy/core/mstemplate.py | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index 89c4d5f5..1d00435d 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from modelseedpy.biochem.seed_object import ModelSEEDObject -from modelseedpy.core.mstemplate import MSTemplateSpecies +from modelseedpy.core.mstemplate import MSTemplateSpecies, MSTemplateMetabolite from cobra.core import Metabolite import pandas as pd @@ -58,7 +58,23 @@ def __init__( def to_template_compartment_compound(self, compartment): cpd_id = f"{self.seed_id}_{compartment}" + # build Template Compound + metabolite = MSTemplateMetabolite( + self.seed_id, + self.formula, + self.name, + self.charge, + self.mass, + self.delta_g, + self.delta_g_error, + self.is_cofactor, + self.abbr, + ) + # build Template Compartment Compound res = MSTemplateSpecies(cpd_id, self.charge, compartment, self.id) + + # assign Compound to Compartment Compound + res._template_compound = metabolite res.annotation.update(self.annotation) return res diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index f28d170f..3b5552f4 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -143,7 +143,7 @@ class MSTemplateSpecies(Metabolite): def __init__( self, comp_cpd_id: str, - charge: int, + charge: float, compartment: str, cpd_id, max_uptake=0, From cac909bbec0f6d1176511c817a5ba3246ee758e7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 28 Mar 2023 01:07:23 -0500 Subject: [PATCH 018/146] template format --- modelseedpy/core/mstemplate.py | 23 ++++++++++++++++------- setup.py | 3 ++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 3b5552f4..fc2bbb08 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -160,19 +160,26 @@ def __init__( self.cpd_id ) - def to_metabolite(self, index="0"): + def to_metabolite(self, index="0", force=False): """ Create cobra.core.Metabolite instance :param index: compartment index + :@param force: force index :return: cobra.core.Metabolite """ if index is None: index = "" + index = str(index) + + if self.compartment == 'e' and index.isnumeric(): + if force: + logger.warning(f'Forcing numeric index [{index}] to extra cellular compartment not advised') + else: + index = '0' + cpd_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" - name = f"{self.name}" - if len(str(index)) > 0: - name = f"{self.name} [{compartment}]" + name = f"{self.compound.name} [{compartment}]" metabolite = Metabolite(cpd_id, self.formula, name, self.charge, compartment) metabolite.notes["modelseed_template_id"] = self.id return metabolite @@ -294,15 +301,17 @@ def compartment(self): def to_reaction(self, model=None, index="0"): if index is None: index = "" + index = str(index) rxn_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" name = f"{self.name}" metabolites = {} for m, v in self.metabolites.items(): - if model and m.id in model.metabolites: - metabolites[model.metabolites.get_by_id(m.id)] = v + _metabolite = m.to_metabolite(index) + if _metabolite.id in model.metabolites: + metabolites[model.metabolites.get_by_id(_metabolite.id)] = v else: - metabolites[m.to_metabolite(index)] = v + metabolites[_metabolite] = v if len(str(index)) > 0: name = f"{self.name} [{compartment}]" diff --git a/setup.py b/setup.py index 5fba7f6c..a7555b97 100644 --- a/setup.py +++ b/setup.py @@ -27,9 +27,10 @@ "Topic :: Scientific/Engineering :: Bio-Informatics", "Intended Audience :: Science/Research", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Natural Language :: English", ], install_requires=[ From dbe8c6d7acb3f72087166200766b8436f96150e3 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 28 Mar 2023 01:11:06 -0500 Subject: [PATCH 019/146] black --- modelseedpy/core/mstemplate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index fc2bbb08..4a628e21 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -171,11 +171,13 @@ def to_metabolite(self, index="0", force=False): index = "" index = str(index) - if self.compartment == 'e' and index.isnumeric(): + if self.compartment == "e" and index.isnumeric(): if force: - logger.warning(f'Forcing numeric index [{index}] to extra cellular compartment not advised') + logger.warning( + f"Forcing numeric index [{index}] to extra cellular compartment not advised" + ) else: - index = '0' + index = "0" cpd_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" From 29e5c4d164bdee5a9fd6077cba23636b4107bef7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 13 Apr 2023 08:44:34 -0500 Subject: [PATCH 020/146] examples --- .../Model Reconstruction/Gapfilling.ipynb | 95 +++--- examples/Model Reconstruction/Genomes.ipynb | 297 +++++++++++------- 2 files changed, 234 insertions(+), 158 deletions(-) diff --git a/examples/Model Reconstruction/Gapfilling.ipynb b/examples/Model Reconstruction/Gapfilling.ipynb index eea0c536..88eadaa6 100644 --- a/examples/Model Reconstruction/Gapfilling.ipynb +++ b/examples/Model Reconstruction/Gapfilling.ipynb @@ -2,17 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cobrakbase 0.2.8\n" - ] - } - ], + "outputs": [], "source": [ "import cobra\n", "#If you have CPLEX, uncomment this\n", @@ -20,31 +12,37 @@ "import cobrakbase\n", "#import modelseedpy.fbapkg\n", "from modelseedpy import GapfillingPkg, KBaseMediaPkg\n", - "from modelseedpy import FBAHelper, MSBuilder" + "from modelseedpy import FBAHelper, MSBuilder\n", + "kbase_api = cobrakbase.KBaseAPI()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "model = kbase_api.get_from_ws(\"test_model\",18528)" + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:modelseedpy.core.msmodelutl:cpd00244 not found in model!\n" + ] + }, { "data": { "text/html": [ - "

Objective

1.0 bio1 = 0.8048653841131165

Uptake

\n", + "

Objective

1.0 bio1 = 0.7997546667881398

Uptake

\n", " \n", " \n", " \n", @@ -58,14 +56,14 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -79,98 +77,98 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -189,28 +187,35 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -218,19 +223,15 @@ "
Metabolite
cpd00009_e0EX_cpd00009_e00.99980.993400.00%
cpd00013_e0EX_cpd00013_e06.0376.09400.00%
cpd00030_e0EX_cpd00030_e00.006390.0063500.00%
cpd00034_e0EX_cpd00034_e00.006390.0063500.00%
cpd00048_e0EX_cpd00048_e00.17550.174400.00%
cpd00058_e0EX_cpd00058_e00.006390.0063500.00%
cpd00063_e0EX_cpd00063_e00.006390.0063500.00%
cpd00067_e0EX_cpd00067_e061.8561.4300.00%
cpd00099_e0EX_cpd00099_e00.006390.0063500.00%
cpd00149_e0EX_cpd00149_e00.006390.0063500.00%
cpd00205_e0EX_cpd00205_e00.006390.0063500.00%
cpd00254_e0EX_cpd00254_e00.006390.0063500.00%
cpd10516_e0EX_cpd10516_e00.025560.025400.00%
cpd17041_c0rxn13782_c00.80490.799800.00%
cpd17042_c0rxn13783_c00.80490.799800.00%
cpd17043_c0rxn13784_c00.80490.799800.00%
cpd00001_e0EX_cpd00001_e0-82.26-81.9500.00%
cpd00007_e0EX_cpd00007_e0-2.928-2.86900.00%
cpd15378_e0EX_cpd15378_e0-0.00639-0.006357100.00%18.92%
cpd03091_c0SK_cpd03091_c0-0.019051081.08%
cpd11416_c0SK_cpd11416_c0-0.8049-0.799800.00%
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "kbase_api = cobrakbase.KBaseAPI()\n", - "model = kbase_api.get_from_ws(\"test_model\",18528)\n", - "#If you have CPLEX, uncomment this\n", - "#model.solver = 'optlang-cplex'\n", "template = kbase_api.get_from_ws(\"GramNegModelTemplateV3\",\"NewKBaseModelTemplates\")\n", "media = kbase_api.get_from_ws(\"Carbon-D-Glucose\",\"KBaseMedia\")\n", "model = MSBuilder.gapfill_model(model,\"bio1\",template,media)\n", @@ -17910,7 +17911,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/Genomes.ipynb b/examples/Model Reconstruction/Genomes.ipynb index 60270468..8ea82ef4 100644 --- a/examples/Model Reconstruction/Genomes.ipynb +++ b/examples/Model Reconstruction/Genomes.ipynb @@ -1,223 +1,300 @@ { "cells": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "tags": [] + }, "source": [ - "import modelseedpy\n", - "from modelseedpy.core.msgenome import MSGenome\n", - "from modelseedpy.core.rast_client import RastClient" + "### Genomes\n", + "\n", + "ModelSEEDpy provides its own genome object type `modelseedpy.core.msgenome.MSGenome` to manipulate genomes" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "genome = MS" + "import modelseedpy\n", + "from modelseedpy.core.msgenome import MSGenome" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "1" + "#### Reading faa file\n", + "\n", + "To load a genome we can read a `.faa` file that contains protein sequences" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "rast = RastClient()" + "genome" ] }, { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "genome = MSGenome.from_fasta('GCF_000005845.2.faa', split=' ')" + "#### Manipulating genes\n", + "\n", + "Each gene is stored as a `modelseedpy.core.msgenome.MSFeature` in the `.features` of type `cobra.core.dictlist.DictList` similiar to the cobrapy `.reactions` and `.metabolites` in the `cobra.core.Model`" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of features: 3\n" - ] + "data": { + "text/plain": [ + "4285" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print('Number of features:', len(genome.features))" + "len(genome.features)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "for f in genome.features:\n", - " print(f.id, len(f.seq), f.description)" + "gene = genome.features.get_by_id('NP_414542.1')\n", + "gene" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'execution_time': 1622756127.36331,\n", - " 'tool_name': 'kmer_search',\n", - " 'hostname': 'pear',\n", - " 'parameters': ['-a',\n", - " '-g',\n", - " 200,\n", - " '-m',\n", - " 5,\n", - " '-d',\n", - " '/opt/patric-common/data/kmer_metadata_v2',\n", - " '-u',\n", - " 'http://pear.mcs.anl.gov:6100/query'],\n", - " 'id': '9CCA6D20-C4B3-11EB-A893-36A8BEF382BD'},\n", - " {'parameters': ['annotate_hypothetical_only=1',\n", - " 'dataset_name=Release70',\n", - " 'kmer_size=8'],\n", - " 'hostname': 'pear',\n", - " 'tool_name': 'KmerAnnotationByFigfam',\n", - " 'id': '9CE3769E-C4B3-11EB-A893-36A8BEF382BD',\n", - " 'execution_time': 1622756127.52738},\n", - " {'execute_time': 1622756127.88296,\n", - " 'hostname': 'pear',\n", - " 'parameters': [],\n", - " 'tool_name': 'annotate_proteins_similarity',\n", - " 'id': '9D19B7EA-C4B3-11EB-9714-71B3BDF382BD'}]" + "modelseedpy.core.msgenome.MSFeature" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "rast.annotate_genome(genome)" + "type(gene)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Equivalent call from the client it self" + "##### Gene annotation\n", + "Annotation is store as an **ontology term**. When loading from a `.faa` file no ontology term is present but we can add them later." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#genome, res = rast.annotate_genome_from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')\n", - "#res" + "gene.ontology_terms" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene.description" + ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "{'annotation': ['thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]']}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene.add_ontology_term('annotation', gene.description)\n", + "gene.ontology_terms" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "#### RAST\n", + "It is possible to annotate genomes with RAST by calling the `RastClient`" + ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from modelseedpy.core.rast_client import RastClient\n", + "rast = RastClient()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'C54F08A4-CDB3-11ED-A7E9-CAF09D6086F0',\n", + " 'parameters': ['-a',\n", + " '-g',\n", + " 200,\n", + " '-m',\n", + " 5,\n", + " '-d',\n", + " '/opt/patric-common/data/kmer_metadata_v2',\n", + " '-u',\n", + " 'http://pear.mcs.anl.gov:6100/query'],\n", + " 'hostname': 'pear',\n", + " 'tool_name': 'kmer_search',\n", + " 'execution_time': 1680040751.14837},\n", + " {'id': 'C5638324-CDB3-11ED-A7E9-CAF09D6086F0',\n", + " 'parameters': ['annotate_hypothetical_only=1',\n", + " 'dataset_name=Release70',\n", + " 'kmer_size=8'],\n", + " 'tool_name': 'KmerAnnotationByFigfam',\n", + " 'hostname': 'pear',\n", + " 'execution_time': 1680040751.28257},\n", + " {'parameters': [],\n", + " 'id': 'C5944E1E-CDB3-11ED-8217-51F29F6086F0',\n", + " 'execute_time': 1680040751.60236,\n", + " 'tool_name': 'annotate_proteins_similarity',\n", + " 'hostname': 'pear'}]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rast.annotate_genome(genome)" + ] }, { - "cell_type": "code", - "execution_count": 34, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "feature = genome.features.get_by_id('YP_588478.1')" + "RAST annotation is stored in the ontology term **RAST** and this is used as default to build metabolic models with the ModelSEED templates" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'RAST': 'DUF1435 domain-containing protein YjjZ [Escherichia coli str. K-12 substr. MG1655]'}" + "{'annotation': ['thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]'],\n", + " 'RAST': ['Thr operon leader peptide']}" ] }, - "execution_count": 36, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "feature.ontology_terms" + "gene.ontology_terms" ] }, { @@ -225,14 +302,12 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "feature.add_ontology_term('')" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -246,7 +321,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.10" } }, "nbformat": 4, From 24ef228fd800755c6380e917e4c513d8ff5d36ef Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 18 Apr 2023 16:09:06 -0500 Subject: [PATCH 021/146] lower/upper case fix --- modelseedpy/core/mstemplate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 4a628e21..49fd98c3 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -598,7 +598,7 @@ def from_table( if "compartment" not in row: row["compartment"] = "c" metabolite = template.compcompounds.get_by_id( - f'{row["id"]}_{lower(row["compartment"])}' + f'{row["id"]}_{row["compartment"].lower()}' ) linked_mets = {} if ( @@ -609,14 +609,14 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - f'{sub_array[0]}_{lower(row["compartment"])}' + f'{sub_array[0]}_{row["compartment"].lower()}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( metabolite, - lower(row["class"]), + row["class"].lower(), float(row["coefficient"]), - upper(row["coefficient_type"]), + row["coefficient_type"].upper(), linked_mets, ) return self From b1e7ff457ad84fcdddbd8f9bfff2a575956ee1ba Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 3 May 2023 09:31:25 -0500 Subject: [PATCH 022/146] Implementing multiple gapfill --- modelseedpy/core/msatpcorrection.py | 73 +++-- modelseedpy/core/msgapfill.py | 190 ++++++++---- modelseedpy/core/msmodelutl.py | 19 +- modelseedpy/fbapkg/gapfillingpkg.py | 448 ++-------------------------- 4 files changed, 206 insertions(+), 524 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index e72835aa..46bd32ea 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import logging -import itertools import cobra +import copy import json import time import pandas as pd @@ -22,7 +22,9 @@ from modelseedpy.helpers import get_template logger = logging.getLogger(__name__) -# logger.setLevel(logging.DEBUG) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO _path = _dirname(_abspath(__file__)) @@ -122,7 +124,9 @@ def __init__( self.coretemplate = core_template self.msgapfill = MSGapfill( - self.modelutl, default_gapfill_templates=core_template + self.modelutl, + default_gapfill_templates=[core_template], + default_target=self.atp_hydrolysis.id, ) # These should stay as None until atp correction is actually run self.cumulative_core_gapfilling = None @@ -209,6 +213,7 @@ def disable_noncore_reactions(self): self.other_compartments = [] # Iterating through reactions and disabling for reaction in self.model.reactions: + gfrxn = self.msgapfill.gfmodel.reactions.get_by_id(reaction.id) if reaction.id == self.atp_hydrolysis.id: continue if FBAHelper.is_ex(reaction): @@ -233,10 +238,12 @@ def disable_noncore_reactions(self): logger.debug(reaction.id + " core but reversible") self.noncore_reactions.append([reaction, "<"]) reaction.lower_bound = 0 + gfrxn.lower_bound = 0 if reaction.upper_bound > 0 and template_reaction.upper_bound <= 0: logger.debug(reaction.id + " core but reversible") self.noncore_reactions.append([reaction, ">"]) reaction.upper_bound = 0 + gfrxn.upper_bound = 0 else: logger.debug(f"{reaction.id} non core") if FBAHelper.rxn_compartment(reaction) != self.compartment: @@ -251,6 +258,8 @@ def disable_noncore_reactions(self): self.noncore_reactions.append([reaction, ">"]) reaction.lower_bound = 0 reaction.upper_bound = 0 + gfrxn.lower_bound = 0 + gfrxn.upper_bound = 0 def evaluate_growth_media(self): """ @@ -266,24 +275,22 @@ def evaluate_growth_media(self): output = {} with self.model: self.model.objective = self.atp_hydrolysis.id - # self.model.objective = self.model.problem.Objective(Zero,direction="max") - - logger.debug( - f"ATP bounds: ({self.atp_hydrolysis.lower_bound}, {self.atp_hydrolysis.upper_bound})" - ) - # self.model.objective.set_linear_coefficients({self.atp_hydrolysis.forward_variable:1}) pkgmgr = MSPackageManager.get_pkg_mgr(self.model) + # First prescreening model for ATP production without gapfilling + media_list = [] + min_objectives = {} for media, minimum_obj in self.atp_medias: - logger.debug("evaluate media %s", media) + logger.info("evaluate media %s", media) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - logger.debug("model.medium %s", self.model.medium) + logger.info("model.medium %s", self.model.medium) solution = self.model.optimize() - logger.debug( + logger.info( "evaluate media %s - %f (%s)", media.id, solution.objective_value, solution.status, ) + self.media_gapfill_stats[media] = None output[media.id] = solution.objective_value @@ -291,23 +298,29 @@ def evaluate_growth_media(self): solution.objective_value < minimum_obj or solution.status != "optimal" ): - self.media_gapfill_stats[media] = self.msgapfill.run_gapfilling( - media, - self.atp_hydrolysis.id, - minimum_obj, - check_for_growth=False, - ) - # IF gapfilling fails - need to activate and penalize the noncore and try again + media_list.append(media) + min_objectives[media] = minimum_obj elif solution.objective_value >= minimum_obj: self.media_gapfill_stats[media] = {"reversed": {}, "new": {}} - logger.debug( - "gapfilling stats: %s", - json.dumps(self.media_gapfill_stats[media], indent=2, default=vars), - ) + + # Now running gapfilling on all conditions where initially there was no growth + all_solutions = self.msgapfill.run_multi_gapfill( + media_list, + self.atp_hydrolysis.id, + min_objectives, + check_for_growth=False, + ) + + # Adding the new solutions to the media gapfill stats + for media in all_solutions: + self.media_gapfill_stats[media] = all_solutions[media] if MSATPCorrection.DEBUG: + export_data = {} + for media in self.media_gapfill_stats: + export_data[media.id] = self.media_gapfill_stats[media] with open("debug.json", "w") as outfile: - json.dump(self.media_gapfill_stats[media], outfile) + json.dump(export_data, outfile) return output @@ -342,7 +355,7 @@ def determine_growth_media(self, max_gapfilling=None): if self.max_gapfilling is None: self.max_gapfilling = best_score - logger.debug(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") + logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: gfscore = 0 @@ -359,6 +372,9 @@ def determine_growth_media(self, max_gapfilling=None): atp_att["selected_media"][media.id] = 0 self.modelutl.save_attributes(atp_att, "ATP_analysis") + if MSATPCorrection.DEBUG: + with open("atp_att_debug.json", "w") as outfile: + json.dump(atp_att, outfile) def determine_growth_media2(self, max_gapfilling=None): """ @@ -386,7 +402,7 @@ def scoring_function(media): max_gapfilling = best_score + self.gapfilling_delta for media in media_scores: score = media_scores[media] - logger.debug(score, best_score, max_gapfilling) + logger.info(score, best_score, max_gapfilling) if score <= max_gapfilling: self.selected_media.append(media) @@ -435,7 +451,7 @@ def expand_model_to_genome_scale(self): ) # Removing filtered reactions for item in self.filtered_noncore: - print("Removing " + item[0].id + " " + item[1]) + logger.debug("Removing " + item[0].id + " " + item[1]) if item[1] == ">": item[0].upper_bound = 0 else: @@ -500,6 +516,7 @@ def build_tests(self, multiplier=None): self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") + logger.debug("Test:" + media.id + ";" + str(multiplier * obj_value)) tests.append( { "media": media, @@ -527,7 +544,7 @@ def run_atp_correction(self): self.evaluate_growth_media() self.determine_growth_media() self.apply_growth_media_gapfilling() - self.evaluate_growth_media() + # self.evaluate_growth_media() self.expand_model_to_genome_scale() return self.build_tests() diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 8d023272..92890c0e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -1,9 +1,5 @@ # -*- coding: utf-8 -*- import logging -import itertools # !!! the import is never used - -logger = logging.getLogger(__name__) - import cobra import re from optlang.symbolics import Zero, add @@ -12,6 +8,11 @@ from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.exceptions import GapfillingError +logger = logging.getLogger(__name__) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + class MSGapfill: @staticmethod @@ -32,6 +33,10 @@ def __init__( reaction_scores={}, blacklist=[], atp_gapfilling=False, + minimum_obj=0.01, + default_excretion=100, + default_uptake=100, + default_target=None, ): # Discerning input is model or mdlutl and setting internal links if isinstance(model_or_mdlutl, MSModelUtil): @@ -49,7 +54,18 @@ def __init__( "cpd15302", "cpd03091", ] # the cpd11416 compound is filtered during model extension with templates - self.gfmodel = self.lp_filename = self.last_solution = None + # Cloning model to create gapfilling model + self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + # Getting package manager for gapfilling model + self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) + # Setting target from input + if default_target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(default_target).flux_expression, + direction="max", + ) + # Setting parameters for gapfilling + self.lp_filename = self.last_solution = None self.model_penalty = 1 self.default_gapfill_models = default_gapfill_models self.default_gapfill_templates = default_gapfill_templates @@ -61,23 +77,8 @@ def __init__( self.test_conditions = test_conditions self.reaction_scores = reaction_scores self.cumulative_gapfilling = [] - - def run_gapfilling( - self, - media=None, - target=None, - minimum_obj=0.01, - binary_check=False, - prefilter=True, - check_for_growth=True, - ): - if target: - self.model.objective = self.model.problem.Objective( - self.model.reactions.get_by_id(target).flux_expression, direction="max" - ) - self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) - pkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) - pkgmgr.getpkg("GapfillingPkg").build_package( + # Building gapfilling package + self.gfpkgmgr.getpkg("GapfillingPkg").build_package( { "auto_sink": self.auto_sink, "model_penalty": self.model_penalty, @@ -87,58 +88,95 @@ def run_gapfilling( "gapfill_models_by_index": self.gapfill_models_by_index, "gapfill_all_indecies_with_default_templates": self.gapfill_all_indecies_with_default_templates, "gapfill_all_indecies_with_default_models": self.gapfill_all_indecies_with_default_models, - "default_excretion": 100, - "default_uptake": 100, + "default_excretion": default_excretion, + "default_uptake": default_uptake, "minimum_obj": minimum_obj, "blacklist": self.blacklist, "reaction_scores": self.reaction_scores, "set_objective": 1, } ) - pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + def test_gapfill_database(self, media, target=None, before_filtering=True): # Testing if gapfilling can work before filtering - if ( - check_for_growth - and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() - ): - # save_json_model(self.model, "gfdebugmdl.json") - gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) - if media.id not in gf_sensitivity: - gf_sensitivity[media.id] = {} - if target not in gf_sensitivity[media.id]: - gf_sensitivity[media.id][target] = {} - gf_sensitivity[media.id][target][ - "FBF" - ] = self.mdlutl.find_unproducible_biomass_compounds(target) - self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") - logger.warning("No solution found before filtering for %s", media) - return None + if target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(target).flux_expression, + direction="max", + ) + self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + else: + target = str(self.gfmodel.objective) + target = target.split(" ")[0] + target = target[13:] + if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(): + return True + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + filter_msg = " " + note = "FAF" + if before_filtering: + filter_msg = " before filtering " + note = "FBF" + gf_sensitivity[media.id][target][ + note + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning( + "No gapfilling solution found" + + filter_msg + + "for " + + media.id + + " activating " + + target + ) + return False + def prefilter(self, media, target): # Filtering breaking reactions out of the database - if prefilter and self.test_conditions: - pkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( + if self.test_conditions: + self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) # Testing if gapfilling can work after filtering - if ( - check_for_growth - and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() - ): - # save_json_model(self.model, "gfdebugmdl.json") - gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) - if media.id not in gf_sensitivity: - gf_sensitivity[media.id] = {} - if target not in gf_sensitivity[media.id]: - gf_sensitivity[media.id][target] = {} - gf_sensitivity[media.id][target][ - "FAF" - ] = self.mdlutl.find_unproducible_biomass_compounds(target) - self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") - logger.warning("No solution found after filtering for %s", media) + if not self.test_gapfill_database(media, target, before_filtering=False): + return False + return True + + def run_gapfilling( + self, + media=None, + target=None, + minimum_obj=None, + binary_check=False, + prefilter=True, + check_for_growth=True, + ): + # Setting target and media if specified + if target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(target).flux_expression, + direction="max", + ) + self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + if media: + self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) + if minimum_obj: + self.gfpkgmgr.getpkg("GapfillingPkg").set_min_objective(minimum_obj) + + # Testing if gapfilling can work before filtering + if not self.test_gapfill_database(media, before_filtering=True): return None + # Filtering + if prefilter: + if not self.prefilter(media, target): + return None + # Printing the gapfilling LP file if self.lp_filename: with open(self.lp_filename, "w") as out: @@ -157,9 +195,13 @@ def run_gapfilling( return None # Computing solution and ensuring all tests still pass - self.last_solution = pkgmgr.getpkg("GapfillingPkg").compute_gapfilled_solution() + self.last_solution = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).compute_gapfilled_solution() if self.test_conditions: - self.last_solution = pkgmgr.getpkg("GapfillingPkg").run_test_conditions( + self.last_solution = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).run_test_conditions( self.test_conditions, self.last_solution, self.test_condition_iteration_limit, @@ -172,7 +214,7 @@ def run_gapfilling( # Running binary check to reduce solution to minimal reaction soltuion if binary_check: - self.last_solution = pkgmgr.getpkg( + self.last_solution = self.gfpkgmgr.getpkg( "GapfillingPkg" ).binary_check_gapfilling_solution() @@ -183,6 +225,32 @@ def run_gapfilling( self.last_solution["binary_check"] = binary_check return self.last_solution + def run_multi_gapfill( + self, + media_list, + target=None, + minimum_objectives={}, + binary_check=False, + prefilter=True, + check_for_growth=True, + ): + first = True + solution_dictionary = {} + for item in media_list: + minimum_obj = None + if item in minimum_objectives: + minimum_obj = minimum_objectives[item] + if first: + solution_dictionary[item] = self.run_gapfilling( + item, target, minimum_obj, binary_check, True, True + ) + else: + solution_dictionary[item] = self.run_gapfilling( + item, None, minimum_obj, binary_check, False, True + ) + false = False + return solution_dictionary + def integrate_gapfill_solution( self, solution, cumulative_solution=[], link_gaps_to_objective=True ): diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index a44c5653..371abeb7 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -16,12 +16,9 @@ # from builtins import None logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -# handler = logging.StreamHandler(sys.stdout) -# handler.setLevel(logging.DEBUG) -# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -# handler.setFormatter(formatter) -# logger.addHandler(handler) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSModelUtil: @@ -924,13 +921,15 @@ def reaction_expansion_test( reaction_list, condition, currmodel ) for item in new_filtered: - filtered_list.append(item) + if item not in filtered_list: + filtered_list.append(item) else: new_filtered = self.linear_expansion_test( reaction_list, condition, currmodel ) for item in new_filtered: - filtered_list.append(item) + if item not in filtered_list: + filtered_list.append(item) # Restoring knockout of newly filtered reactions, which expire after exiting the "with" block above for item in new_filtered: if item[1] == ">": @@ -938,10 +937,10 @@ def reaction_expansion_test( else: item[0].lower_bound = 0 toc = time.perf_counter() - logger.debug( + logger.info( "Expansion time:" + condition["media"].id + ":" + str((toc - tic)) ) - logger.debug( + logger.info( "Filtered count:" + str(len(filtered_list)) + " out of " diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index f14eb7ed..465e5558 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import logging +import sys import re import json from optlang.symbolics import Zero, add @@ -19,427 +20,11 @@ from modelseedpy.core.fbahelper import FBAHelper logger = logging.getLogger(__name__) -# logger.setLevel(logging.DEBUG) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO -base_blacklist = { - "rxn10157": "<", - "rxn09295": "<", - "rxn05938": "<", - "rxn08628": ">", - "rxn10155": "<", - "rxn01353": "<", - "rxn05683": "<", - "rxn09193": "<", - "rxn09003": "<", - "rxn01128": ">", - "rxn08655": "<", - "rxn09272": "<", - "rxn05313": "<", - "rxn01510": ">", - "rxn05297": ">", - "rxn00507": "<", - "rxn05596": "<", - "rxn01674": "<", - "rxn01679": "<", - "rxn00778": ">", - "rxn05206": ">", - "rxn00239": "<", - "rxn05937": "<", - "rxn00715": "<", - "rxn05638": ">", - "rxn05289": ">", - "rxn00839": "<", - "rxn08866": "<", - "rxn10901": "<", - "rxn09331": "<", - "rxn05242": "<", - "rxn12549": "<", - "rxn13143": "<", - "rxn12498": "<", - "rxn08373": "<", - "rxn05208": "<", - "rxn09372": "<", - "rxn00571": ">", - "rxn08104": "<", - "rxn08704": "<", - "rxn07191": "<", - "rxn09672": "<", - "rxn01048": ">", - "rxn11267": ">", - "rxn08290": "<", - "rxn09307": "<", - "rxn05676": ">", - "rxn09653": "<", - "rxn11277": "<", - "rxn00976": "<", - "rxn02520": "<", - "rxn08275": "<", - "rxn09121": "<", - "rxn08999": "<", - "rxn08633": "<", - "rxn08610": "<", - "rxn09218": "<", - "rxn05626": "<", - "rxn11320": "<", - "rxn10058": ">", - "rxn08544": "<", - "rxn12539": "<", - "rxn08990": "<", - "rxn09348": "<", - "rxn00378": "<", - "rxn05243": "<", - "rxn02154": "<", - "rxn12587": "<", - "rxn00125": "<", - "rxn05648": "<", - "rxn13722": "<", - "rxn10910": ">", - "rxn05308": ">", - "rxn08585": "<", - "rxn14207": "<", - "rxn08682": "<", - "rxn10895": "<", - "rxn09655": "<", - "rxn11934": "<", - "rxn01742": ">", - "rxn05222": ">", - "rxn09942": "<", - "rxn13753": ">", - "rxn10857": "<", - "rxn03468": "<", - "rxn04942": "<", - "rxn10990": ">", - "rxn08639": "<", - "rxn09248": "<", - "rxn11935": ">", - "rxn00870": ">", - "rxn08314": "<", - "rxn09378": "<", - "rxn09269": "<", - "rxn10057": ">", - "rxn13702": ">", - "rxn00517": "<", - "rxn09221": ">", - "rxn01505": ">", - "rxn13692": ">", - "rxn05573": "<", - "rxn10123": ">", - "rxn09005": "<", - "rxn05244": "<", - "rxn05940": "<", - "rxn10124": ">", - "rxn06202": ">", - "rxn09660": "<", - "rxn02260": ">", - "rxn08912": "<", - "rxn05760": ">", - "rxn05580": ">", - "rxn02181": ">", - "rxn09339": "<", - "rxn00767": "<", - "rxn09118": "<", - "rxn05303": "<", - "rxn06110": "<", - "rxn12800": "<", - "rxn10966": "<", - "rxn12561": "<", - "rxn04678": ">", - "rxn10818": "<", - "rxn08166": "<", - "rxn02044": ">", - "rxn12623": "<", - "rxn13392": ">", - "rxn02283": "<", - "rxn13647": ">", - "rxn08653": "<", - "rxn05218": ">", - "rxn11676": ">", - "rxn00197": "<", - "rxn00697": "<", - "rxn12575": ">", - "rxn08188": "<", - "rxn01215": "<", - "rxn08730": ">", - "rxn08519": ">", - "rxn08642": "<", - "rxn05245": "<", - "rxn04042": "<", - "rxn01443": ">", - "rxn08535": "<", - "rxn03983": "<", - "rxn08317": "<", - "rxn14173": ">", - "rxn08868": "<", - "rxn05893": ">", - "rxn00435": ">", - "rxn13724": "<", - "rxn09681": "<", - "rxn00572": ">", - "rxn05942": "<", - "rxn11158": "<", - "rxn05562": "<", - "rxn10868": "<", - "rxn10426": "<", - "rxn00941": ">", - "rxn08240": "<", - "rxn05220": ">", - "rxn01228": ">", - "rxn12540": "<", - "rxn10618": ">", - "rxn09659": "<", - "rxn08985": ">", - "rxn05523": "<", - "rxn00421": "<", - "rxn09385": "<", - "rxn08542": "<", - "rxn09658": "<", - "rxn01173": "<", - "rxn10977": "<", - "rxn05216": "<", - "rxn13748": ">", - "rxn10769": ">", - "rxn00451": "<", - "rxn01639": "<", - "rxn08661": "<", - "rxn09308": "<", - "rxn09260": "<", - "rxn00253": "<", - "rxn05207": "<", - "rxn01667": "<", - "rxn08063": "<", - "rxn01508": ">", - "rxn09657": "<", - "rxn01209": ">", - "rxn00548": ">", - "rxn12617": "<", - "rxn08747": ">", - "rxn08096": "<", - "rxn11951": "<", - "rxn09061": "<", - "rxn10978": "<", - "rxn02748": ">", - "rxn09663": "<", - "rxn08737": "<", - "rxn13127": "<", - "rxn09366": "<", - "rxn05634": "<", - "rxn05554": "<", - "rxn09266": ">", - "rxn04676": ">", - "rxn11078": ">", - "rxn04932": "<", - "rxn00607": ">", - "rxn08856": "<", - "rxn12624": "<", - "rxn05215": "<", - "rxn13686": "<", - "rxn12529": "<", - "rxn00234": "<", - "rxn13689": ">", - "rxn08117": "<", - "rxn05315": ">", - "rxn08865": "<", - "rxn11678": ">", - "rxn00518": "<", - "rxn00195": "<", - "rxn10054": "<", - "rxn12532": "<", - "rxn05902": ">", - "rxn12777": "<", - "rxn12822": ">", - "rxn13735": ">", - "rxn00427": "<", - "rxn13196": "<", - "rxn08284": "<", - "rxn10576": ">", - "rxn00891": "<", - "rxn08293": "<", - "rxn00374": ">", - "rxn08795": "<", - "rxn12583": "<", - "rxn00918": ">", - "rxn08525": "<", - "rxn10427": ">", - "rxn09271": "<", - "rxn10860": "<", - "rxn10600": ">", - "rxn13729": ">", - "rxn01375": "<", - "rxn13726": ">", - "rxn10587": "<", - "rxn08672": "<", - "rxn10588": ">", - "rxn08152": ">", - "rxn09306": "<", - "rxn00635": "<", - "rxn08427": "<", - "rxn05225": ">", - "rxn00680": ">", - "rxn08786": ">", - "rxn08721": "<", - "rxn11339": "<", - "rxn05749": "<", - "rxn01187": ">", - "rxn08625": "<", - "rxn06677": "<", - "rxn12302": ">", - "rxn02770": "<", - "rxn05628": "<", - "rxn13706": ">", - "rxn12739": "<", - "rxn00177": "<", - "rxn09896": ">", - "rxn12574": "<", - "rxn12533": ">", - "rxn08537": ">", - "rxn05651": ">", - "rxn08170": "<", - "rxn05240": "<", - "rxn00663": ">", - "rxn12589": "<", - "rxn09299": "<", - "rxn02059": "<", - "rxn12217": ">", - "rxn06592": "<", - "rxn05939": ">", - "rxn08581": "<", - "rxn00430": "<", - "rxn09283": ">", - "rxn08919": "<", - "rxn13660": "<", - "rxn08065": "<", - "rxn08428": ">", - "rxn10936": ">", - "rxn05238": ">", - "rxn05685": "<", - "rxn08920": ">", - "rxn07193": "<", - "rxn08265": "<", - "rxn12554": "<", - "rxn08094": "<", - "rxn13727": ">", - "rxn04158": "<", - "rxn09839": "<", - "rxn10820": "<", - "rxn00869": ">", - "rxn00331": ">", - "rxn09034": "<", - "rxn01136": "<", - "rxn09247": "<", - "rxn08302": "<", - "rxn10594": "<", - "rxn08670": ">", - "rxn11334": "<", - "rxn09941": "<", - "rxn02919": "<", - "rxn09670": "<", - "rxn10892": "<", - "rxn09794": "<", - "rxn02332": ">", - "rxn00244": ">", - "rxn08030": "<", - "rxn12526": "<", - "rxn13150": ">", - "rxn05486": "<", - "rxn10852": ">", - "rxn13790": ">", - "rxn06348": ">", - "rxn09172": ">", - "rxn03653": ">", - "rxn05213": "<", - "rxn01869": "<", - "rxn08142": "<", - "rxn12606": "<", - "rxn11916": ">", - "rxn05748": "<", - "rxn08543": "<", - "rxn01107": ">", - "rxn05708": "<", - "rxn08169": "<", - "rxn06641": ">", - "rxn12578": "<", - "rxn01172": "<", - "rxn02120": ">", - "rxn05669": "<", - "rxn11322": "<", - "rxn12630": "<", - "rxn00698": "<", - "rxn05507": ">", - "rxn12530": "<", - "rxn09304": "<", - "rxn05532": ">", - "rxn03644": ">", - "rxn08733": "<", - "rxn13733": "<", - "rxn10044": ">", - "rxn00176": ">", - "rxn01364": ">", - "rxn02198": ">", - "rxn06990": "<", - "rxn08424": "<", - "rxn08069": "<", - "rxn05611": "<", - "rxn11973": "<", - "rxn12665": ">", - "rxn05241": "<", - "rxn08982": ">", - "rxn00542": ">", - "rxn12588": "<", - "rxn03517": ">", - "rxn01805": "<", - "rxn13203": ">", - "rxn08614": "<", - "rxn12200": ">", - "rxn13811": "<", - "rxn08377": "<", - "rxn11342": ">", - "rxn02976": "<", - "rxn08217": "<", - "rxn07921": ">", - "rxn09944": ">", - "rxn02401": "<", - "rxn08429": ">", - "rxn00905": "<", - "rxn08196": "<", - "rxn03054": "<", - "rxn08643": "<", - "rxn01874": "<", - "rxn08028": "<", - "rxn01641": ">", - "rxn03442": "<", - "rxn02172": "<", - "rxn10692": ">", - "rxn10613": ">", - "rxn12928": ">", - "rxn12994": ">", - "rxn13843": ">", - "rxn12942": ">", - "rxn12934": ">", - "rxn16827": ">", - "rxn12941": ">", - "rxn01736": ">", - "rxn14109": ">", - "rxn15060": ">", - "rxn15064": ">", - "rxn30685": ">", - "rxn10095": ">", - "rxn16143": ">", - "rxn25271": ">", - "rxn25160": ">", - "rxn30917": ">", - "rxn16843": ">", - "rxn08921": ">", - "rxn09390": ">", - "rxn27362": ">", - "rxn02664": ">", - "rxn24638": ">", - "rxn24613": ">", - "rxn24611": ">", - "rxn14428": ">", - "rxn03079": ">", - "rxn03020": ">", - "rxn10471": "<", -} +base_blacklist = {} class GapfillingPkg(BaseFBAPkg): @@ -600,6 +185,9 @@ def build_package(self, parameters): reaction_objective.set_linear_coefficients(obj_coef) self.parameters["gfobj"] = self.model.objective + def reset_original_objective(self): + self.parameters["origobj"] = self.model.objective + def extend_model_with_model_for_gapfilling(self, source_model, index): new_metabolites = {} new_reactions = {} @@ -980,7 +568,7 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): condition["change"] = False if len(filtered_list) > 0: if max_iterations > 0: - print("Gapfilling test failed " + str(11 - max_iterations)) + logger.warning("Gapfilling test failed " + str(11 - max_iterations)) # Forcing filtered reactions to zero for item in filtered_list: if item[1] == ">": @@ -1003,7 +591,7 @@ def test_gapfill_database(self): self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 self.model.objective = self.parameters["origobj"] solution = self.model.optimize() - logger.info( + logger.debug( "Objective with gapfill database:" + str(solution.objective_value) + "; min objective:" @@ -1017,6 +605,12 @@ def test_gapfill_database(self): return False return True + def set_min_objective(self, min_objective): + self.parameters["minimum_obj"] = min_objective + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + def filter_database_based_on_tests(self, test_conditions): # Setting the minimal growth constraint to zero self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 @@ -1036,7 +630,7 @@ def filter_database_based_on_tests(self, test_conditions): ) # Now constraining filtered reactions to zero for item in filtered_list: - logger.info("Filtering:" + item[0].id + item[1]) + logger.debug("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: @@ -1079,14 +673,14 @@ def filter_database_based_on_tests(self, test_conditions): else: count += -1 rxn.lower_bound = 0 - logger.info("Reactions unfiltered:" + str(count)) + logger.debug("Reactions unfiltered:" + str(count)) # Checking for model reactions that can be removed to enable all tests to pass self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 filtered_list = self.modelutl.reaction_expansion_test( self.parameters["original_reactions"], test_conditions ) for item in filtered_list: - logger.info("Filtering:" + item[0].id + item[1]) + logger.debug("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: @@ -1109,15 +703,19 @@ def compute_gapfilled_solution(self, flux_values=None): and "forward" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: + logger.debug(f"New gapfilled reaction: {reaction.id} >") output["new"][reaction.id] = ">" else: + logger.debug(f"Reversed gapfilled reaction: {reaction.id} >") output["reversed"][reaction.id] = ">" elif ( flux_values[reaction.id]["reverse"] > Zero and "reverse" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: + logger.debug(f"New gapfilled reaction: {reaction.id} <") output["new"][reaction.id] = "<" else: + logger.debug(f"Reversed gapfilled reaction: {reaction.id} <") output["reversed"][reaction.id] = "<" return output From 9a6da4521df92689f27f4a8794e32f40bccdc1dd Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 16 May 2023 01:18:17 -0500 Subject: [PATCH 023/146] Fixing test --- tests/core/test_msatpcorreption.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/core/test_msatpcorreption.py b/tests/core/test_msatpcorreption.py index 108cc3ec..3d036193 100644 --- a/tests/core/test_msatpcorreption.py +++ b/tests/core/test_msatpcorreption.py @@ -251,7 +251,7 @@ def test_ms_atp_correction_and_gap_fill1( model = get_model_with_infinite_atp_loop(["GLCpts_c0", "GLUSy_c0", "GLUDy_c0"]) model.reactions.ATPM_c0.lower_bound = 0 model.reactions.ATPM_c0.upper_bound = 1000 - + model.objective = "ATPM_c0" atp_correction = MSATPCorrection( model, template, @@ -260,7 +260,6 @@ def test_ms_atp_correction_and_gap_fill1( load_default_medias=False, ) tests = atp_correction.run_atp_correction() - # expected tests = [{'media': MSMedia object, 'is_max_threshold': True, 'threshold': 21.0, 'objective': 'ATPM_c0'}] assert tests @@ -268,13 +267,13 @@ def test_ms_atp_correction_and_gap_fill1( assert tests[0]["threshold"] > 0 assert tests[0]["objective"] == "ATPM_c0" + model.objective = "BIOMASS_Ecoli_core_w_GAM_c0" gap_fill = MSGapfill(model, [template_genome_scale], [], tests, {}, []) result = gap_fill.run_gapfilling( media_genome_scale_glucose_aerobic, "BIOMASS_Ecoli_core_w_GAM_c0", minimum_obj=0.1, ) - # either GLUSy_c0 or GLUDy_c0 should be gap filled for glutamate assert result From 3256ea0ad2e0d5b199500afe654c18b33d54ee89 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 14 Jun 2023 23:48:42 -0500 Subject: [PATCH 024/146] Renaming ATP --- modelseedpy/core/msatpcorrection.py | 36 ++++++++++++++--------------- modelseedpy/data/atp_medias.tsv | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 46bd32ea..c848fbeb 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -23,28 +23,28 @@ logger = logging.getLogger(__name__) logger.setLevel( - logging.WARNING + logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO _path = _dirname(_abspath(__file__)) min_gap = { - "Glc/O2": 5, - "Etho/O2": 0.01, - "Ac/O2": 1, - "Pyr/O2": 3, - "Glyc/O2": 2, - "Fum/O2": 3, - "Succ/O2": 2, - "Akg/O2": 2, - "LLac/O2": 2, - "Dlac/O2": 2, - "For/O2": 2, - "For/NO3": 1.5, - "Pyr/NO": 2.5, - "Pyr/NO2": 2.5, - "Pyr/NO3": 2.5, - "Pyr/SO4": 2.5, + "Glc.O2": 5, + "Etho.O2": 0.01, + "Ac.O2": 1, + "Pyr.O2": 3, + "Glyc.O2": 2, + "Fum.O2": 3, + "Succ.O2": 2, + "Akg.O2": 2, + "LLac.O2": 2, + "Dlac.O2": 2, + "For.O2": 2, + "For.NO3": 1.5, + "Pyr.NO": 2.5, + "Pyr.NO2": 2.5, + "Pyr.NO3": 2.5, + "Pyr.SO4": 2.5, } @@ -451,7 +451,7 @@ def expand_model_to_genome_scale(self): ) # Removing filtered reactions for item in self.filtered_noncore: - logger.debug("Removing " + item[0].id + " " + item[1]) + logger.info("Removing " + item[0].id + " " + item[1]) if item[1] == ">": item[0].upper_bound = 0 else: diff --git a/modelseedpy/data/atp_medias.tsv b/modelseedpy/data/atp_medias.tsv index 4a4b7a84..0bf5e56c 100644 --- a/modelseedpy/data/atp_medias.tsv +++ b/modelseedpy/data/atp_medias.tsv @@ -1,4 +1,4 @@ -seed Glc/O2 Ac/O2 Etho/O2 Pyr/O2 Glyc/O2 Fum/O2 Succ/O2 Akg/O2 LLac/O2 Dlac/O2 For/O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For/NO2 For/NO3 For/NO Pyr/NO2 Pyr/NO3 Pyr/NO Ac/NO2 Ac/NO3 Ac/NO Glc/DMSO Glc/TMAO Pyr/DMSO Pyr/TMAO Pyr/SO4 Pyr/SO3 H2/CO2 H2/Ac For/SO4/H2 LLac/SO4/H2 For/SO4 LLac/SO4 H2/SO4 empty Light ANME Methane +seed Glc.O2 Ac.O2 Etho.O2 Pyr.O2 Glyc.O2 Fum.O2 Succ.O2 Akg.O2 LLac.O2 Dlac.O2 For.O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For.NO2 For.NO3 For.NO Pyr.NO2 Pyr.NO3 Pyr.NO Ac.NO2 Ac.NO3 Ac.NO Glc.DMSO Glc.TMAO Pyr.DMSO Pyr.TMAO Pyr.SO4 Pyr.SO3 H2.CO2 H2.Ac For.SO4.H2 LLac.SO4.H2 For.SO4 LLac.SO4 H2.SO4 empty Light ANME Methane EX_cpd00027_e0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 EX_cpd00024_e0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 EX_cpd00106_e0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 From da33bd2b366cadab4da89d5f790ef3607fb54dba Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 26 Jun 2023 12:34:56 -0500 Subject: [PATCH 025/146] fixes --- modelseedpy/biochem/modelseed_compound.py | 2 +- modelseedpy/core/msbuilder.py | 1 + modelseedpy/core/msgenome.py | 9 +++++++-- modelseedpy/core/rast_client.py | 8 ++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index 1d00435d..a3ea75f3 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -71,7 +71,7 @@ def to_template_compartment_compound(self, compartment): self.abbr, ) # build Template Compartment Compound - res = MSTemplateSpecies(cpd_id, self.charge, compartment, self.id) + res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id) # assign Compound to Compartment Compound res._template_compound = metabolite diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index cd16d75e..e376ae0b 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -315,6 +315,7 @@ def __init__( self.reaction_to_complex_sets = None self.compartments = None self.base_model = None + self.compartments_index = None # TODO: implement custom index by compartment self.index = index def build_drains(self): diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 875699c2..e41953d2 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -15,8 +15,13 @@ def normalize_role(s): def read_fasta(f, split=DEFAULT_SPLIT, h_func=None): - with open(f, "r") as fh: - return parse_fasta_str(fh.read(), split, h_func) + if f.endswith('.gz'): + import gzip + with gzip.open(f, 'rb') as fh: + return parse_fasta_str(fh.read().decode('utf-8'), split, h_func) + else: + with open(f, "r") as fh: + return parse_fasta_str(fh.read(), split, h_func) def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): diff --git a/modelseedpy/core/rast_client.py b/modelseedpy/core/rast_client.py index 575cf0d4..ebe06cb5 100644 --- a/modelseedpy/core/rast_client.py +++ b/modelseedpy/core/rast_client.py @@ -84,6 +84,14 @@ def annotate_genome_from_fasta(self, filepath, split="|"): return genome, res + def annotate_protein_sequence(self, protein_id: str, protein_seq: str): + p_features = [{"id": protein_id, "protein_translation": protein_seq}] + return self.f(p_features) + + def annotate_protein_sequences(self, protein_seqs: dict): + p_features = [{"id": protein_id, "protein_translation": protein_seq}] + return self.f(p_features) + def f1(self, protein_id, protein_seq): p_features = [{"id": protein_id, "protein_translation": protein_seq}] return self.f(p_features) From d13f6c20b7f70413d668c4ddcbb23b27af083f1b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 26 Jun 2023 12:35:47 -0500 Subject: [PATCH 026/146] black --- modelseedpy/core/msgenome.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index e41953d2..78f1e004 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -15,10 +15,11 @@ def normalize_role(s): def read_fasta(f, split=DEFAULT_SPLIT, h_func=None): - if f.endswith('.gz'): + if f.endswith(".gz"): import gzip - with gzip.open(f, 'rb') as fh: - return parse_fasta_str(fh.read().decode('utf-8'), split, h_func) + + with gzip.open(f, "rb") as fh: + return parse_fasta_str(fh.read().decode("utf-8"), split, h_func) else: with open(f, "r") as fh: return parse_fasta_str(fh.read(), split, h_func) From b7edac04b16ae63ba68b7afa344dd8995c81e946 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 2 Jul 2023 14:30:36 -0500 Subject: [PATCH 027/146] Changing zero threshold on gapfilling --- modelseedpy/core/msgapfill.py | 8 +++++++- modelseedpy/core/msmodelutl.py | 2 ++ modelseedpy/fbapkg/gapfillingpkg.py | 10 +++++----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 92890c0e..774c1ca8 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -67,6 +67,7 @@ def __init__( # Setting parameters for gapfilling self.lp_filename = self.last_solution = None self.model_penalty = 1 + self.default_minimum_objective = minimum_obj self.default_gapfill_models = default_gapfill_models self.default_gapfill_templates = default_gapfill_templates self.gapfill_templates_by_index, self.gapfill_models_by_index = {}, {} @@ -165,6 +166,8 @@ def run_gapfilling( self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() if media: self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) + if not minimum_obj: + minimum_obj = self.default_minimum_objective if minimum_obj: self.gfpkgmgr.getpkg("GapfillingPkg").set_min_objective(minimum_obj) @@ -230,14 +233,17 @@ def run_multi_gapfill( media_list, target=None, minimum_objectives={}, + default_minimum_objective = None, binary_check=False, prefilter=True, check_for_growth=True, ): + if not default_minimum_objective: + default_minimum_objective = self.default_minimum_objective first = True solution_dictionary = {} for item in media_list: - minimum_obj = None + minimum_obj = default_minimum_objective if item in minimum_objectives: minimum_obj = minimum_objectives[item] if first: diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 371abeb7..d24ac90f 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -88,6 +88,8 @@ def search_name(name): @staticmethod def get(model, create_if_missing=True): + if isinstance(model, MSModelUtil): + return model if model in MSModelUtil.mdlutls: return MSModelUtil.mdlutls[model] elif create_if_missing: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 465e5558..d066c1a1 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -25,7 +25,7 @@ ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO base_blacklist = {} - +zero_threshold = 1e-8 class GapfillingPkg(BaseFBAPkg): """ """ @@ -532,12 +532,12 @@ def knockout_gf_reactions_outside_solution(self, solution=None, flux_values=None if rxnobj.id in self.gapfilling_penalties: if ( "reverse" in self.gapfilling_penalties[rxnobj.id] - and flux_values[rxnobj.id]["reverse"] <= Zero + and flux_values[rxnobj.id]["reverse"] <= zero_threshold ): rxnobj.lower_bound = 0 if ( "forward" in self.gapfilling_penalties[rxnobj.id] - and flux_values[rxnobj.id]["forward"] <= Zero + and flux_values[rxnobj.id]["forward"] <= zero_threshold ): rxnobj.upper_bound = 0 rxnobj.update_variable_bounds() @@ -699,7 +699,7 @@ def compute_gapfilled_solution(self, flux_values=None): for reaction in self.model.reactions: if reaction.id in self.gapfilling_penalties: if ( - flux_values[reaction.id]["forward"] > Zero + flux_values[reaction.id]["forward"] > zero_threshold and "forward" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: @@ -709,7 +709,7 @@ def compute_gapfilled_solution(self, flux_values=None): logger.debug(f"Reversed gapfilled reaction: {reaction.id} >") output["reversed"][reaction.id] = ">" elif ( - flux_values[reaction.id]["reverse"] > Zero + flux_values[reaction.id]["reverse"] > zero_threshold and "reverse" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: From 97b5d4fe436dc477fefbb451bffbf057660685eb Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 3 Jul 2023 00:47:30 -0500 Subject: [PATCH 028/146] Adding version printing to ModelSEEDpy so I can be sure what version of the code is running --- modelseedpy/__init__.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index aabb2c53..24c19a8e 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -11,27 +11,11 @@ from os.path import dirname as _dirname from modelseedpy.helpers import config -logging_hash = { - "debug": logging.DEBUG, - "critical": logging.CRITICAL, - "error": logging.ERROR, - "warning": logging.WARNING, - "info": logging.INFO, -} +__author__ = "Christopher Henry" +__email__ = "chenry@anl.gov" +__version__ = "0.2.2" -# Configuing modelseedpy logger -logger = logging.getLogger(__name__) -c_handler = logging.StreamHandler() -c_handler.setLevel(logging_hash[config.get("logging", "console_level")]) -c_format = logging.Formatter("%(name)s - %(levelname)s - %(message)s") -c_handler.setFormatter(c_format) -logger.addHandler(c_handler) -if config.get("logging", "log_file") == "yes": - f_handler = logging.FileHandler(config.get("logging", "filename"), mode="a") - f_handler.setLevel(logging_hash[config.get("logging", "file_level")]) - f_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - f_handler.setFormatter(f_format) - logger.addHandler(f_handler) +print("modelseedpy", __version__) if sys.version_info[0] == 2: logger.warning( @@ -83,5 +67,3 @@ ) from modelseedpy.multiomics import MSExpression - -__version__ = "0.2.2" From a27ba8f7d1940435a0b966943e41d85b57ca5fe8 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 01:01:40 -0500 Subject: [PATCH 029/146] Resetting gapfill threshold for zero --- modelseedpy/fbapkg/gapfillingpkg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index d066c1a1..715f7667 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -25,7 +25,7 @@ ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO base_blacklist = {} -zero_threshold = 1e-8 +zero_threshold = 0 class GapfillingPkg(BaseFBAPkg): """ """ From 3dc662bc8c2852739ae58ed42c39aa5868572dab Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 11:35:27 -0500 Subject: [PATCH 030/146] Restoring small gapfilling threshold --- modelseedpy/core/msmodelutl.py | 9 +++++---- modelseedpy/fbapkg/gapfillingpkg.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index d24ac90f..e1754b0d 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -101,6 +101,7 @@ def get(model, create_if_missing=True): def __init__(self, model): self.model = model self.pkgmgr = MSPackageManager.get_pkg_mgr(model) + self.wsid = None self.atputl = None self.gfutl = None self.metabolite_hash = None @@ -548,7 +549,7 @@ def test_solution(self, solution, keep_changes=False): rxnobj.upper_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " needed:" @@ -560,7 +561,7 @@ def test_solution(self, solution, keep_changes=False): else: removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " not needed:" @@ -571,7 +572,7 @@ def test_solution(self, solution, keep_changes=False): rxnobj.lower_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " needed:" @@ -583,7 +584,7 @@ def test_solution(self, solution, keep_changes=False): else: removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " not needed:" diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 715f7667..d066c1a1 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -25,7 +25,7 @@ ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO base_blacklist = {} -zero_threshold = 0 +zero_threshold = 1e-8 class GapfillingPkg(BaseFBAPkg): """ """ From 7f694bf515b900f2a8e27e9f0bf87a5100e7099b Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 11:46:02 -0500 Subject: [PATCH 031/146] Fixing gapfilling target issue --- modelseedpy/core/msgapfill.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 774c1ca8..09ba8c5c 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -60,6 +60,7 @@ def __init__( self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) # Setting target from input if default_target: + self.default_target = default_target self.gfmodel.objective = self.gfmodel.problem.Objective( self.gfmodel.reactions.get_by_id(default_target).flux_expression, direction="max", @@ -164,6 +165,8 @@ def run_gapfilling( direction="max", ) self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + else: + target = self.default_target if media: self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) if not minimum_obj: From a8563df8fd9bb31e51e93395b4f8ce420ffe4e38 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 14:53:44 -0500 Subject: [PATCH 032/146] Fixing bug in ATP correction --- modelseedpy/core/msatpcorrection.py | 39 +++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c848fbeb..38114ace 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -99,6 +99,7 @@ def __init__( output = self.modelutl.add_atp_hydrolysis(compartment) self.atp_hydrolysis = output["reaction"] + self.media_hash = {} self.atp_medias = [] if load_default_medias: self.load_default_medias() @@ -107,6 +108,7 @@ def __init__( self.atp_medias.append(media) else: self.atp_medias.append([media, 0.01]) + self.media_hash[media.id] = media self.forced_media = [] for media_id in forced_media: @@ -292,6 +294,7 @@ def evaluate_growth_media(self): ) self.media_gapfill_stats[media] = None + output[media.id] = solution.objective_value if ( @@ -339,16 +342,23 @@ def determine_growth_media(self, max_gapfilling=None): "new": {}, "reversed": {}, } - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - atp_att["core_atp_gapfilling"][media.id][ - "new" - ] = self.media_gapfill_stats[media]["new"] - atp_att["core_atp_gapfilling"][media.id][ - "reversed" - ] = self.media_gapfill_stats[media]["reversed"] + if media in self.media_gapfill_stats: + if self.media_gapfill_stats[media]: + gfscore = len( + self.media_gapfill_stats[media]["new"].keys() + ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] + else: + gfscore = 1000 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 1000, + "failed":True + } if best_score is None or gfscore < best_score: best_score = gfscore atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore @@ -511,6 +521,15 @@ def build_tests(self, multiplier=None): if multiplier is None: multiplier = self.multiplier tests = [] + if "empty" in self.media_hash: + tests.append( + { + "media": self.media_hash["empty"], + "is_max_threshold": True, + "threshold": 0.00001, + "objective": self.atp_hydrolysis.id, + } + ) self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) From 7cc3c550a481df80fdf3c65757c4654f60e26927 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 15:21:26 -0500 Subject: [PATCH 033/146] Fixing ATP correction --- modelseedpy/core/msatpcorrection.py | 33 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 38114ace..c6cc5707 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -342,23 +342,22 @@ def determine_growth_media(self, max_gapfilling=None): "new": {}, "reversed": {}, } - if media in self.media_gapfill_stats: - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - atp_att["core_atp_gapfilling"][media.id][ - "new" - ] = self.media_gapfill_stats[media]["new"] - atp_att["core_atp_gapfilling"][media.id][ - "reversed" - ] = self.media_gapfill_stats[media]["reversed"] - else: - gfscore = 1000 - atp_att["core_atp_gapfilling"][media.id] = { - "score": 1000, - "failed":True - } + if self.media_gapfill_stats[media]: + gfscore = len( + self.media_gapfill_stats[media]["new"].keys() + ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] + else: + gfscore = 1000 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 1000, + "failed":True + } if best_score is None or gfscore < best_score: best_score = gfscore atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore From e76d8242245c8bfdfdd38efea8107d85e5cb9348 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 15:37:08 -0500 Subject: [PATCH 034/146] Fixing ATP correction media selection --- modelseedpy/core/msatpcorrection.py | 54 ++++------------------------- 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c6cc5707..b5e59c97 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -336,14 +336,15 @@ def determine_growth_media(self, max_gapfilling=None): self.selected_media = [] best_score = None for media in self.media_gapfill_stats: - gfscore = 0 atp_att["core_atp_gapfilling"][media.id] = { "score": 0, "new": {}, "reversed": {}, } if self.media_gapfill_stats[media]: - gfscore = len( + atp_att["core_atp_gapfilling"][media.id][ + "score" + ] = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) atp_att["core_atp_gapfilling"][media.id][ @@ -353,67 +354,26 @@ def determine_growth_media(self, max_gapfilling=None): "reversed" ] = self.media_gapfill_stats[media]["reversed"] else: - gfscore = 1000 atp_att["core_atp_gapfilling"][media.id] = { "score": 1000, "failed":True } - if best_score is None or gfscore < best_score: - best_score = gfscore - atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore + if best_score is None or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score: + best_score = atp_att["core_atp_gapfilling"][media.id]["score"] + if self.max_gapfilling is None: self.max_gapfilling = best_score logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: - gfscore = 0 - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - - logger.debug(f"media gapfilling score: {media.id}: {gfscore}") - if gfscore <= self.max_gapfilling and gfscore <= ( + if atp_att["core_atp_gapfilling"][media.id]["score"] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id]["score"] <= ( best_score + self.gapfilling_delta ): self.selected_media.append(media) atp_att["selected_media"][media.id] = 0 self.modelutl.save_attributes(atp_att, "ATP_analysis") - if MSATPCorrection.DEBUG: - with open("atp_att_debug.json", "w") as outfile: - json.dump(atp_att, outfile) - - def determine_growth_media2(self, max_gapfilling=None): - """ - Decides which of the test media to use as growth conditions for this model - :return: - """ - - def scoring_function(media): - return len(self.media_gapfill_stats[media]["new"].keys()) + 0.5 * len( - self.media_gapfill_stats[media]["reversed"].keys() - ) - - if not max_gapfilling: - max_gapfilling = self.max_gapfilling - self.selected_media = [] - media_scores = dict( - (media, scoring_function(media)) - for media in self.media_gapfill_stats - if self.media_gapfill_stats[media] - ) - best_score = min(media_scores.values()) - if max_gapfilling is None or max_gapfilling > ( - best_score + self.gapfilling_delta - ): - max_gapfilling = best_score + self.gapfilling_delta - for media in media_scores: - score = media_scores[media] - logger.info(score, best_score, max_gapfilling) - if score <= max_gapfilling: - self.selected_media.append(media) def apply_growth_media_gapfilling(self): """ From c4565407d6420cd1e8073899d86f11ffc653a871 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 15:40:43 -0500 Subject: [PATCH 035/146] Running black --- modelseedpy/core/msatpcorrection.py | 21 +++++++++++++-------- modelseedpy/core/msgapfill.py | 2 +- modelseedpy/core/msmodelutl.py | 2 +- modelseedpy/fbapkg/gapfillingpkg.py | 1 + 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index b5e59c97..232d4b3f 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -294,7 +294,7 @@ def evaluate_growth_media(self): ) self.media_gapfill_stats[media] = None - + output[media.id] = solution.objective_value if ( @@ -342,9 +342,7 @@ def determine_growth_media(self, max_gapfilling=None): "reversed": {}, } if self.media_gapfill_stats[media]: - atp_att["core_atp_gapfilling"][media.id][ - "score" - ] = len( + atp_att["core_atp_gapfilling"][media.id]["score"] = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) atp_att["core_atp_gapfilling"][media.id][ @@ -356,18 +354,25 @@ def determine_growth_media(self, max_gapfilling=None): else: atp_att["core_atp_gapfilling"][media.id] = { "score": 1000, - "failed":True + "failed": True, } - if best_score is None or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score: + if ( + best_score is None + or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score + ): best_score = atp_att["core_atp_gapfilling"][media.id]["score"] - + if self.max_gapfilling is None: self.max_gapfilling = best_score logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: - if atp_att["core_atp_gapfilling"][media.id]["score"] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id]["score"] <= ( + if atp_att["core_atp_gapfilling"][media.id][ + "score" + ] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id][ + "score" + ] <= ( best_score + self.gapfilling_delta ): self.selected_media.append(media) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 09ba8c5c..4448b1e7 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -236,7 +236,7 @@ def run_multi_gapfill( media_list, target=None, minimum_objectives={}, - default_minimum_objective = None, + default_minimum_objective=None, binary_check=False, prefilter=True, check_for_growth=True, diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index e1754b0d..ac232de8 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -89,7 +89,7 @@ def search_name(name): @staticmethod def get(model, create_if_missing=True): if isinstance(model, MSModelUtil): - return model + return model if model in MSModelUtil.mdlutls: return MSModelUtil.mdlutls[model] elif create_if_missing: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index d066c1a1..74a097df 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -27,6 +27,7 @@ base_blacklist = {} zero_threshold = 1e-8 + class GapfillingPkg(BaseFBAPkg): """ """ From c8b67b563862bdc36a2066740880dc6921cc3923 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 4 Jul 2023 18:01:40 -0500 Subject: [PATCH 036/146] minor --- modelseedpy/core/msatpcorrection.py | 14 +++++----- modelseedpy/core/msgapfill.py | 12 +++------ modelseedpy/core/msmodelutl.py | 42 +++++++---------------------- 3 files changed, 21 insertions(+), 47 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 46bd32ea..63b6933d 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -413,15 +413,15 @@ def apply_growth_media_gapfilling(self): """ self.cumulative_core_gapfilling = ( [] - ) # TODO: In case someone runs ATP correction twice with different parameters, before resetting this, maybe check if any of these reactions are already in the model and remove them so we're starting fresh??? + ) + # TODO: In case someone runs ATP correction twice with different parameters, + # before resetting this, maybe check if any of these reactions are already in + # the model and remove them so we're starting fresh??? for media in self.selected_media: - if ( - media in self.media_gapfill_stats - and self.media_gapfill_stats[media] - and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 - ): + stats = self.media_gapfill_stats.get(media, None) + if stats is not None and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0: self.msgapfill.integrate_gapfill_solution( - self.media_gapfill_stats[media], + stats, self.cumulative_core_gapfilling, link_gaps_to_objective=False, ) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 92890c0e..68b9ba9a 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -15,6 +15,7 @@ class MSGapfill: + @staticmethod def gapfill_count(solution): total = 0 @@ -184,12 +185,7 @@ def run_gapfilling( # Running gapfilling and checking solution sol = self.gfmodel.optimize() - logger.debug( - "gapfill solution objective value %f (%s) for media %s", - sol.objective_value, - sol.status, - media, - ) + logger.debug(f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}") if sol.status != "optimal": logger.warning("No solution found for %s", media) return None @@ -212,7 +208,7 @@ def run_gapfilling( ) return None - # Running binary check to reduce solution to minimal reaction soltuion + # Running binary check to reduce solution to minimal reaction solution if binary_check: self.last_solution = self.gfpkgmgr.getpkg( "GapfillingPkg" @@ -221,7 +217,7 @@ def run_gapfilling( # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target - self.last_solution["minobjective"] = minimum_obj + self.last_solution["minobjective"] = self.gfpkgmgr.getpkg("GapfillingPkg").parameters['minimum_obj'] self.last_solution["binary_check"] = binary_check return self.last_solution diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 371abeb7..031a9e58 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -538,55 +538,33 @@ def test_solution(self, solution, keep_changes=False): objective = tempmodel.slim_optimize() logger.debug("Starting objective:" + str(objective)) types = ["new", "reversed"] + for key in types: for rxn_id in solution[key]: rxnobj = tempmodel.reactions.get_by_id(rxn_id) - if solution[key][rxn_id] == ">": + solution_key_rxn_id = solution[key][rxn_id] # could call this direction instead but wasn't 100% sure + if solution_key_rxn_id == ">": original_bound = rxnobj.upper_bound rxnobj.upper_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( - rxn_id - + solution[key][rxn_id] - + " needed:" - + str(objective) - + " with min obj:" - + str(solution["minobjective"]) - ) + logger.debug(f'{rxn_id}{solution_key_rxn_id} needed:{objective} with min obj:{solution["minobjective"]}') rxnobj.upper_bound = original_bound else: removed_rxns.append(rxnobj) - unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( - rxn_id - + solution[key][rxn_id] - + " not needed:" - + str(objective) - ) + unneeded.append([rxn_id, solution_key_rxn_id, key]) + logger.debug(f'{rxn_id}{solution_key_rxn_id} not needed:{objective}') else: original_bound = rxnobj.lower_bound rxnobj.lower_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( - rxn_id - + solution[key][rxn_id] - + " needed:" - + str(objective) - + " with min obj:" - + str(solution["minobjective"]) - ) + logger.debug(f'{rxn_id}{solution_key_rxn_id} needed:{objective} with min obj:{solution["minobjective"]}') rxnobj.lower_bound = original_bound else: removed_rxns.append(rxnobj) - unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( - rxn_id - + solution[key][rxn_id] - + " not needed:" - + str(objective) - ) + unneeded.append([rxn_id, solution_key_rxn_id, key]) + logger.debug(f'{rxn_id}{solution_key_rxn_id} not needed:{objective}') if keep_changes: tempmodel.remove_reactions(removed_rxns) for items in unneeded: @@ -726,7 +704,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model.solver.status != "optimal": self.printlp(condition["media"].id + "-Testing-Infeasible.lp") logger.critical( - ondition["media"].id + condition["media"].id + "testing leads to infeasible problem. LP file printed to debug!" ) return False From 1851286a03871a758d67e81750dfd4392d6f6da7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 6 Jul 2023 12:20:51 -0500 Subject: [PATCH 037/146] added e0 as extracell search for cobrapy --- .gitignore | 2 ++ modelseedpy/__init__.py | 6 +++++ tests/core/test_msgapfill.py | 50 ------------------------------------ 3 files changed, 8 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index 6390162b..5589324a 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,5 @@ dmypy.json # Pyre type checker .pyre/ + +*.lp \ No newline at end of file diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 24c19a8e..1973b2a3 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -5,6 +5,7 @@ # set the warning format to be on a single line import sys import logging +import cobra import warnings as _warnings from os import name as _name from os.path import abspath as _abspath @@ -15,6 +16,8 @@ __email__ = "chenry@anl.gov" __version__ = "0.2.2" +logger = logging.getLogger(__name__) + print("modelseedpy", __version__) if sys.version_info[0] == 2: @@ -25,6 +28,9 @@ "still work but we will no longer actively maintain Python 2 support." ) +if 'e0' not in cobra.medium.annotations.compartment_shortlist['e']: + cobra.medium.annotations.compartment_shortlist['e'].append('e0') + import modelseedpy from modelseedpy.core import ( RastClient, diff --git a/tests/core/test_msgapfill.py b/tests/core/test_msgapfill.py index 1ee694bd..622a0924 100644 --- a/tests/core/test_msgapfill.py +++ b/tests/core/test_msgapfill.py @@ -1,54 +1,4 @@ # -*- coding: utf-8 -*- -""" -from glob import glob -os.environ["HOME"] = 'C:\\Users\\Andrew Freiburger\\Dropbox\\My PC (DESKTOP-M302P50)\\Documents\\UVic Civil Engineering\\Internships\\Agronne\\cobrakbase' -import cobrakbase -token = 'xx' -kbase = cobrakbase.KBaseAPI(token) -import re - -# define the example individual model and associated API media package -model = kbase.get_from_ws('e_coli_core.kb', 95098) -model.solver = 'optlang-cplex' - -# import the modelseedpy packages -import modelseedpy -from modelseedpy.core.msgapfill import MSGapfill -gapfill = MSGapfill(model) - -def test_init(): - assert type(gapfill.model) is cobrakbase.core.kbasefba.fbamodel.FBAModel - assert type(gapfill.blacklist) is list - assert type(gapfill.solutions) is dict - -def test_run_gapfilling_and_integrate_gapfill_solution(): - solutions = gapfill.run_gapfilling() - - # test that the objective expression is correctly set - if solutions is not None: - assert type(solutions) is dict - - # verify the integrate_gapfill_solution function - model_2 = gapfill.integrate_gapfill_solution(solutions) - assert type(model_2) is cobrakbase.core.kbasefba.fbamodel.FBAModel - - for reaction in solutions['reversed']: - if solution["reversed"][reaction] == ">": - assert reaction.upper_bound == 100 - else: - assert reaction.lower_bound == -100 - - for reaction in solutions['new']: - if solution["new"][reaction] == ">": - assert reaction.upper_bound == 100 - assert reaction.lower_bound == 0 - else: - assert reaction.upper_bound == 0 - assert reaction.lower_bound == -100 - -def test_gapfill(): - pass -""" import os import pytest import json From 7e0e21632d2580c7d3ab5337a06c632b232508c1 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 6 Jul 2023 12:21:16 -0500 Subject: [PATCH 038/146] black --- modelseedpy/__init__.py | 4 ++-- modelseedpy/core/msatpcorrection.py | 9 +++++---- modelseedpy/core/msgapfill.py | 9 ++++++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 1973b2a3..665f000c 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -28,8 +28,8 @@ "still work but we will no longer actively maintain Python 2 support." ) -if 'e0' not in cobra.medium.annotations.compartment_shortlist['e']: - cobra.medium.annotations.compartment_shortlist['e'].append('e0') +if "e0" not in cobra.medium.annotations.compartment_shortlist["e"]: + cobra.medium.annotations.compartment_shortlist["e"].append("e0") import modelseedpy from modelseedpy.core import ( diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 2f17d576..727c3e33 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -385,15 +385,16 @@ def apply_growth_media_gapfilling(self): Applies the gapfilling to all selected growth media :return: """ - self.cumulative_core_gapfilling = ( - [] - ) + self.cumulative_core_gapfilling = [] # TODO: In case someone runs ATP correction twice with different parameters, # before resetting this, maybe check if any of these reactions are already in # the model and remove them so we're starting fresh??? for media in self.selected_media: stats = self.media_gapfill_stats.get(media, None) - if stats is not None and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0: + if ( + stats is not None + and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 + ): self.msgapfill.integrate_gapfill_solution( stats, self.cumulative_core_gapfilling, diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index c6dd0a17..cb0824a4 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -15,7 +15,6 @@ class MSGapfill: - @staticmethod def gapfill_count(solution): total = 0 @@ -191,7 +190,9 @@ def run_gapfilling( # Running gapfilling and checking solution sol = self.gfmodel.optimize() - logger.debug(f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}") + logger.debug( + f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}" + ) if sol.status != "optimal": logger.warning("No solution found for %s", media) return None @@ -223,7 +224,9 @@ def run_gapfilling( # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target - self.last_solution["minobjective"] = self.gfpkgmgr.getpkg("GapfillingPkg").parameters['minimum_obj'] + self.last_solution["minobjective"] = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).parameters["minimum_obj"] self.last_solution["binary_check"] = binary_check return self.last_solution From afcaa7a2fe060f58195994638381090118b32c85 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 6 Jul 2023 12:33:44 -0500 Subject: [PATCH 039/146] pre-commit --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5589324a..d5d6d7bd 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,4 @@ dmypy.json # Pyre type checker .pyre/ -*.lp \ No newline at end of file +*.lp From 38c5a7a48d5cc3567b179a5b9530b6506c2f7e17 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 6 Jul 2023 23:38:21 -0500 Subject: [PATCH 040/146] Fixing threshold and adding empty media and fixing thresholds --- modelseedpy/core/msatpcorrection.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 232d4b3f..d22e816e 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -39,7 +39,7 @@ "Akg.O2": 2, "LLac.O2": 2, "Dlac.O2": 2, - "For.O2": 2, + "For.O2": 1.875, "For.NO3": 1.5, "Pyr.NO": 2.5, "Pyr.NO2": 2.5, @@ -109,7 +109,12 @@ def __init__( else: self.atp_medias.append([media, 0.01]) self.media_hash[media.id] = media - + if "empty" not in self.media_hash: + media = MSMedia.from_dict({}) + media.id = "empty" + media.name = "empty" + self.media_hash[media.id] = media + self.forced_media = [] for media_id in forced_media: for media in self.atp_medias: @@ -500,11 +505,14 @@ def build_tests(self, multiplier=None): obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") logger.debug("Test:" + media.id + ";" + str(multiplier * obj_value)) + threshold = multiplier * obj_value + if threshold == 0: + threshold += 0.00001 tests.append( { "media": media, "is_max_threshold": True, - "threshold": multiplier * obj_value, + "threshold": threshold, "objective": self.atp_hydrolysis.id, } ) From 787c6bec657760a1ced0845ff8512c4922d042b1 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 6 Jul 2023 23:59:47 -0500 Subject: [PATCH 041/146] Fixing empty media --- modelseedpy/core/msatpcorrection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index d22e816e..653de451 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -499,6 +499,11 @@ def build_tests(self, multiplier=None): "objective": self.atp_hydrolysis.id, } ) + atp_att["tests"]["empty"] = { + "threshold": 0.00001, + "objective": self.atp_hydrolysis.id, + } + self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) From 2f8aeeeaf1756842f880afde103f416404ce6698 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 7 Jul 2023 11:01:24 -0500 Subject: [PATCH 042/146] Fixing git ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index d5d6d7bd..87619079 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,7 @@ dmypy.json # Pyre type checker .pyre/ +.pydevproject +.settings/* +*data/* *.lp From 91ac4998cd78a083f98131fb49a5aa2e705c6db5 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 7 Jul 2023 23:05:57 -0500 Subject: [PATCH 043/146] Making thresholds on tests more flexible, including supporting media specific multipliers for the threshold --- modelseedpy/core/msatpcorrection.py | 34 +++++++++++++++++++---------- modelseedpy/core/msbuilder.py | 7 ------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 31cb7905..aa10acac 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -47,6 +47,11 @@ "Pyr.SO4": 2.5, } +default_threshold_multipiers = { + "Glc": 2, + "default":1.2, +} + class MSATPCorrection: @@ -287,11 +292,11 @@ def evaluate_growth_media(self): media_list = [] min_objectives = {} for media, minimum_obj in self.atp_medias: - logger.info("evaluate media %s", media) + logger.debug("evaluate media %s", media) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - logger.info("model.medium %s", self.model.medium) + logger.debug("model.medium %s", self.model.medium) solution = self.model.optimize() - logger.info( + logger.debug( "evaluate media %s - %f (%s)", media.id, solution.objective_value, @@ -467,7 +472,7 @@ def restore_noncore_reactions(self, noncore=True, othercompartment=True): reaction.lower_bound = self.original_bounds[reaction.id][0] reaction.upper_bound = self.original_bounds[reaction.id][1] - def build_tests(self, multiplier=None): + def build_tests(self,multiplier_hash_override={}): """Build tests based on ATP media evaluations Parameters @@ -483,13 +488,16 @@ def build_tests(self, multiplier=None): Raises ------ """ + #Applying threshold multiplier + for key in default_threshold_multipiers: + if key not in multiplier_hash_override: + multiplier_hash_override[key] = default_threshold_multipiers[key] + #Initialzing atp test attributes atp_att = self.modelutl.get_attributes( "ATP_analysis", {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, ) - - if multiplier is None: - multiplier = self.multiplier + #Initializing tests and adding empty media every time tests = [] if "empty" in self.media_hash: tests.append( @@ -504,13 +512,18 @@ def build_tests(self, multiplier=None): "threshold": 0.00001, "objective": self.atp_hydrolysis.id, } - + #Setting objective to ATP hydrolysis self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: + #Setting multiplier for test threshold + multiplier = multiplier_hash_override["default"] + if media.id in multiplier_hash_override: + multiplier = multiplier_hash_override[media.id] + #Constraining model exchanges for media self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + #Computing core ATP production obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") - logger.debug("Test:" + media.id + ";" + str(multiplier * obj_value)) threshold = multiplier * obj_value if threshold == 0: threshold += 0.00001 @@ -527,9 +540,8 @@ def build_tests(self, multiplier=None): "threshold": multiplier * obj_value, "objective": self.atp_hydrolysis.id, } - + #Saving test attributes to the model self.modelutl.save_attributes(atp_att, "ATP_analysis") - return tests def run_atp_correction(self): diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e376ae0b..3a78188a 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -850,22 +850,15 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - if "bio1" in cobra_model.reactions: - print("1:Biomass present!!") metabolic_reactions = self.build_metabolic_reactions() cobra_model.add_reactions(metabolic_reactions) - if "bio1" in cobra_model.reactions: - print("2:Biomass present!!") non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) cobra_model.add_reactions(non_metabolic_reactions) - if "bio1" in cobra_model.reactions: - print("3:Biomass present!!") cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) - print("Adding biomass!!") biomass_reactions = [] for rxn_biomass in self.template.biomasses: reaction = rxn_biomass.build_biomass( From 55ae63f60d3039014187177eebfd6a8048455e67 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 9 Jul 2023 23:24:28 -0500 Subject: [PATCH 044/146] Improving commenting and improving multi gapfilling --- modelseedpy/core/msatpcorrection.py | 2 +- modelseedpy/core/msgapfill.py | 46 +++++++++++++++++++++++++++-- modelseedpy/core/msmodelutl.py | 11 +++---- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index aa10acac..dd381b18 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -408,7 +408,7 @@ def apply_growth_media_gapfilling(self): self.msgapfill.integrate_gapfill_solution( stats, self.cumulative_core_gapfilling, - link_gaps_to_objective=False, + link_gaps_to_objective=False ) core_gf = { "count": len(self.cumulative_core_gapfilling), diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index cb0824a4..cc17df98 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) logger.setLevel( - logging.WARNING + logging.INFO#WARNING ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO @@ -158,6 +158,22 @@ def run_gapfilling( prefilter=True, check_for_growth=True, ): + """Run gapfilling on a single media condition to force the model to achieve a nonzero specified objective + Parameters + ---------- + media : MSMedia + Media in which the model should be gapfilled + target : string + Name or expression describing the reaction or combination of reactions to the optimized + minimum_obj : double + Value to use for the minimal objective threshold that the model must be gapfilled to achieve + binary_check : bool + Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved + prefilter : bool + Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling + check_for_growth : bool + Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective + """ # Setting target and media if specified if target: self.gfmodel.objective = self.gfmodel.problem.Objective( @@ -240,6 +256,25 @@ def run_multi_gapfill( prefilter=True, check_for_growth=True, ): + """Run gapfilling across an array of media conditions ultimately using different integration policies: simultaneous gapfilling, independent gapfilling, cumulative gapfilling + Parameters + ---------- + media_list : [MSMedia] + List of the medias in which the model should be gapfilled + target : string + Name or expression describing the reaction or combination of reactions to the optimized + minimum_objectives : {string - media ID : double - minimum objective value} + Media-specific minimal objective thresholds that the model must be gapfilled to achieve + default_minimum_objective : double + Default value to use for the minimal objective threshold that the model must be gapfilled to achieve + binary_check : bool + Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved + prefilter : bool + Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling + check_for_growth : bool + Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective + """ + if not default_minimum_objective: default_minimum_objective = self.default_minimum_objective first = True @@ -250,11 +285,11 @@ def run_multi_gapfill( minimum_obj = minimum_objectives[item] if first: solution_dictionary[item] = self.run_gapfilling( - item, target, minimum_obj, binary_check, True, True + item, target, minimum_obj, binary_check, prefilter, check_for_growth ) else: solution_dictionary[item] = self.run_gapfilling( - item, None, minimum_obj, binary_check, False, True + item, None, minimum_obj, binary_check, False, check_for_growth ) false = False return solution_dictionary @@ -303,6 +338,8 @@ def integrate_gapfill_solution( cumulative_solution.append([rxn_id, "<"]) rxn.upper_bound = 0 rxn.lower_bound = -100 + + #Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model unneeded = self.mdlutl.test_solution( solution, keep_changes=True ) # Strips out unneeded reactions - which undoes some of what is done above @@ -311,8 +348,11 @@ def integrate_gapfill_solution( if item[0] == oitem[0] and item[1] == oitem[1]: cumulative_solution.remove(oitem) break + #Adding the gapfilling solution data to the model, which is needed for saving the model in KBase self.mdlutl.add_gapfilling(solution) + #Testing which gapfilled reactions are needed to produce each reactant in the objective function if link_gaps_to_objective: + logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"]+" for target "+solution["target"]) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if solution["media"] not in gf_sensitivity: gf_sensitivity[solution["media"]] = {} diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index ac232de8..3d40c60b 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -285,12 +285,13 @@ def get_attributes(self, key=None, default=None): self.attributes[key] = default return self.attributes[key] - def save_attributes(self, value, key=None): + def save_attributes(self, value=None, key=None): attributes = self.get_attributes() - if key: - attributes[key] = value - else: - self.attributes = value + if value: + if key: + attributes[key] = value + else: + self.attributes = value if hasattr(self.model, "attributes"): self.model.attributes = self.attributes From 32c590f45689a673d9acc91822a3af2418a8e70d Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 10 Jul 2023 00:46:13 -0500 Subject: [PATCH 045/146] Fixing bug in log message --- modelseedpy/core/msgapfill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index cc17df98..10bcb9a2 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -352,7 +352,7 @@ def integrate_gapfill_solution( self.mdlutl.add_gapfilling(solution) #Testing which gapfilled reactions are needed to produce each reactant in the objective function if link_gaps_to_objective: - logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"]+" for target "+solution["target"]) + logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"].id+" for target "+solution["target"]) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if solution["media"] not in gf_sensitivity: gf_sensitivity[solution["media"]] = {} From 19f9f58653ff55a0a089c604de452b8761354248 Mon Sep 17 00:00:00 2001 From: jplfaria Date: Mon, 10 Jul 2023 12:47:03 -0500 Subject: [PATCH 046/146] Update atp_medias.tsv adding methanol and methylamines medias --- modelseedpy/data/atp_medias.tsv | 64 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/modelseedpy/data/atp_medias.tsv b/modelseedpy/data/atp_medias.tsv index 0bf5e56c..53d15048 100644 --- a/modelseedpy/data/atp_medias.tsv +++ b/modelseedpy/data/atp_medias.tsv @@ -1,30 +1,34 @@ -seed Glc.O2 Ac.O2 Etho.O2 Pyr.O2 Glyc.O2 Fum.O2 Succ.O2 Akg.O2 LLac.O2 Dlac.O2 For.O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For.NO2 For.NO3 For.NO Pyr.NO2 Pyr.NO3 Pyr.NO Ac.NO2 Ac.NO3 Ac.NO Glc.DMSO Glc.TMAO Pyr.DMSO Pyr.TMAO Pyr.SO4 Pyr.SO3 H2.CO2 H2.Ac For.SO4.H2 LLac.SO4.H2 For.SO4 LLac.SO4 H2.SO4 empty Light ANME Methane -EX_cpd00027_e0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00024_e0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00106_e0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00036_e0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00137_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00130_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00159_e0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 -EX_cpd00221_e0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00020_e0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00100_e0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00363_e0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00029_e0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -EX_cpd00047_e0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 -EX_cpd00204_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00011_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -EX_cpd00007_e0 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd11640_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 1000 1000 1000 0 0 1000 0 0 0 0 -EX_cpd00418_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00209_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00075_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00659_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00528_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd08021_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00811_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00048_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 1000 1000 1000 1000 1000 0 0 0 0 -EX_cpd00081_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd11632_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 -EX_cpd08701_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 -EX_cpd01024_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 +seed Glc.O2 Ac.O2 Etho.O2 Pyr.O2 Glyc.O2 Fum.O2 Succ.O2 Akg.O2 LLac.O2 Dlac.O2 For.O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For.NO2 For.NO3 For.NO Pyr.NO2 Pyr.NO3 Pyr.NO Ac.NO2 Ac.NO3 Ac.NO Glc.DMSO Glc.TMAO Pyr.DMSO Pyr.TMAO Pyr.SO4 Pyr.SO3 H2.CO2 H2.Ac For.SO4.H2 LLac.SO4.H2 For.SO4 LLac.SO4 H2.SO4 empty Light ANME Methane Methanol Methanol.H2 Methanamine.H2 Dimethylamine.H2 Trimethylamine.H2 +EX_cpd00027_e0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00024_e0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00106_e0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00036_e0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00137_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00130_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00159_e0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 +EX_cpd00221_e0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00020_e0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00100_e0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00363_e0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00029_e0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00047_e0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00204_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00011_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00007_e0 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd11640_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 1000 1000 1000 0 0 1000 0 0 0 0 0 1000 1000 1000 1000 +EX_cpd00418_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00209_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00075_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00659_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00528_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd08021_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00811_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00048_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 1000 1000 1000 1000 1000 0 0 0 0 0 0 0 0 0 +EX_cpd00081_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd11632_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 +EX_cpd08701_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 +EX_cpd01024_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 +EX_cpd00116_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 +EX_cpd00187_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +EX_cpd00425_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 +EX_cpd00441_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 From 432aaed3f70d5f3239d6c04ab1d19a54c4f4cb63 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 10 Jul 2023 13:38:38 -0500 Subject: [PATCH 047/146] build biomass index fix --- modelseedpy/core/msbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e376ae0b..f3514f7f 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -869,7 +869,7 @@ def build( biomass_reactions = [] for rxn_biomass in self.template.biomasses: reaction = rxn_biomass.build_biomass( - cobra_model, "0", biomass_classic, biomass_gc + cobra_model, index, biomass_classic, biomass_gc ) for m in reaction.metabolites: if "modelseed_template_id" in m.notes: From 239ac6eb8f1e77adf593dbd55a30cc1ca5a71c5d Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 14:04:13 -0500 Subject: [PATCH 048/146] Fixing various media and element package --- modelseedpy/core/msmedia.py | 14 +++++++++++++- modelseedpy/core/msmodelutl.py | 4 ---- modelseedpy/fbapkg/basefbapkg.py | 9 +++++++-- modelseedpy/fbapkg/elementuptakepkg.py | 19 ++++++++++++++----- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index 488aad57..aeac7092 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList +from builtins import None logger = logging.getLogger(__name__) @@ -21,7 +22,18 @@ def maxFlux(self): def minFlux(self): # TODO: will be removed later just for old methods return -self.upper_bound - + + def get_mdl_exchange_hash(self,model_or_mdlutl): + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + mets = modelutl.find_met(self.id) + output = {} + exchange_hash = modelutl.exchange_hash() + for met in mets: + if met in exchange_hash: + output[met] = exchange_hash[met] + return output class MSMedia: def __init__(self, media_id, name=""): diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 3d40c60b..9c69a51f 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -1070,14 +1070,10 @@ def run_biomass_dependency_test( ): tempmodel.objective = original_objective objective = tempmodel.slim_optimize() - with open("FlexBiomass2.lp", "w") as out: - out.write(str(tempmodel.solver)) if objective > 0: target_rxn.lower_bound = 0.1 tempmodel.objective = min_flex_obj solution = tempmodel.optimize() - with open("FlexBiomass3.lp", "w") as out: - out.write(str(tempmodel.solver)) biocpds = [] for reaction in tempmodel.reactions: if reaction.id[0:5] == "FLEX_" and ( diff --git a/modelseedpy/fbapkg/basefbapkg.py b/modelseedpy/fbapkg/basefbapkg.py index 662696f3..77effe32 100644 --- a/modelseedpy/fbapkg/basefbapkg.py +++ b/modelseedpy/fbapkg/basefbapkg.py @@ -33,8 +33,13 @@ class BaseFBAPkg: def __init__( self, model, name, variable_types={}, constraint_types={}, reaction_types={} ): - self.model = model - self.modelutl = MSModelUtil.get(model) + if isinstance(model, MSModelUtil): + self.model = model.model + self.modelutl = model + else: + self.model = model + self.modelutl = MSModelUtil.get(model) + self.name = name self.pkgmgr = MSPackageManager.get_pkg_mgr(model) diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 66e01035..4eb27e44 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -16,21 +16,30 @@ def __init__(self, model): {"elements": "string"}, ) - def build_package(self, element_limits): + def build_package(self, element_limits,exception_compounds=[],exception_reactions=[]): + #Converting exception compounds list into exception reaction list + exchange_hash = self.modelutl.exchange_hash() + for met in exception_compounds: + if met in exchange_hash: + exception_reactions.append(exchange_hash[met]) + #Now building or rebuilding constraints for element in element_limits: if element not in self.variables["elements"]: self.build_variable(element, element_limits[element]) - self.build_constraint(element) + for element in element_limits: + #This call will first remove existing constraints then build the new constraint + self.build_constraint(element,exception_reactions) def build_variable(self, element, limit): return BaseFBAPkg.build_variable( self, "elements", 0, limit, "continuous", element ) - def build_constraint(self, element): + def build_constraint(self, element,exception_reactions): coef = {self.variables["elements"][element]: -1} - for reaction in self.model.reactions: - if reaction.id[0:3] == "EX_": + rxnlist = self.modelutl.exchange_list() + for reaction in rxnlist: + if reaction not in exception_reactions: total = 0 for metabolite in reaction.metabolites: elements = metabolite.elements From 3f25882b8646b4f777591c5fb1a7e818c0f7aa36 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 14:07:18 -0500 Subject: [PATCH 049/146] Fixing weird import --- modelseedpy/core/msmedia.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index aeac7092..48fa90ad 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList -from builtins import None logger = logging.getLogger(__name__) From b0311a317be2de2a77b008d1585deaed4e1727d0 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:06:14 -0500 Subject: [PATCH 050/146] Improving phenotype simulations and gapfilling --- modelseedpy/core/msgrowthphenotypes.py | 310 +++++++++++++++++++------ 1 file changed, 238 insertions(+), 72 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 6c30bb2a..13d540b1 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -9,7 +9,9 @@ from modelseedpy.core.msgapfill import MSGapfill logger = logging.getLogger(__name__) - +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSGrowthPhenotype: def __init__( @@ -33,102 +35,186 @@ def __init__( self.additional_compounds = additional_compounds self.parent = parent - def build_media(self): + def build_media(self,include_base_media=True): + """Builds media object to use when simulating the phenotype + Parameters + ---------- + include_base_media : bool + Indicates whether to include the base media for the phenotype set in the formulation + """ cpd_hash = {} for cpd in self.additional_compounds: cpd_hash[cpd] = 100 full_media = MSMedia.from_dict(cpd_hash) - if self.media != None: + if self.media: full_media.merge(self.media, overwrite_overlap=False) - if self.parent != None and self.parent.base_media != None: - full_media.merge(parent.base_media, overwrite_overlap=False) + if full_media: + if self.parent and self.parent.base_media: + full_media.merge(parent.base_media, overwrite_overlap=False) return full_media def simulate( self, - modelutl, - growth_threshold=0.001, + model_or_modelutl, + objective, + growth_multiplier=10, add_missing_exchanges=False, save_fluxes=False, pfba=False, ): - if not isinstance(modelutl, MSModelUtil): - modelutl = MSModelUtil(modelutl) - media = self.build_media() - output = {"growth": None, "class": None, "missing_transports": []} + """Simulates a single phenotype + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + save_fluxes : bool + Indicates if the fluxes should be saved and returned with the results + pfba : bool + Runs pFBA to compute fluxes after initially solving for growth + """ + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + + #Setting objective + if objective: + modelutl.model.objective = objective + + #Building full media and adding missing exchanges + output = {"growth": None, "class": None, "missing_transports": [], "baseline_growth": None} + full_media = self.build_media() if add_missing_exchanges: - output["missing_transports"] = modelutl.add_missing_exchanges(media) - pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model) - pkgmgr.getpkg("KBaseMediaPkg").build_package( - media, self.parent.base_uptake, self.parent.base_excretion - ) - for gene in self.gene_ko: - if gene in modelutl.model.genes: - geneobj = modelutl.model.genes.get_by_id(gene) - geneobj.knock_out() - solution = modelutl.model.optimize() - output["growth"] = solution.objective_value - if solution.objective_value > 0 and pfba: - solution = cobra.flux_analysis.pfba(modelutl.model) - if save_fluxes: - output["fluxes"] = solution.fluxes - if output["growth"] >= growth_threshold: + output["missing_transports"] = modelutl.add_missing_exchanges(full_media) + + #Getting basline growth + output["baseline_growth"] = 0.001 + if self.parent: + output["baseline_growth"] = self.parent.baseline_growth(modelutl,True) + + #Building specific media and setting compound exception list + if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: + reaction_exceptions = [] + specific_media = self.build_media(False) + for mediacpd in specific_media.mediacompounds: + output = mediacpd.get_mdl_exchange_hash(self,modelutl) + for mdlcpd in output: + reaction_exceptions.append(output[mdlcpd]) + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.parent.atom_limits,exception_reactions=reaction_exceptions) + + #Applying media + if self.parent: + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + full_media, self.parent.base_uptake, self.parent.base_excretion + ) + else: + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + full_media,0,1000 + ) + + with modelutl.model: + #Applying gene knockouts + for gene in self.gene_ko: + if gene in modelutl.model.genes: + geneobj = modelutl.model.genes.get_by_id(gene) + geneobj.knock_out() + + #Optimizing model + solution = modelutl.model.optimize() + output["growth"] = solution.objective_value + if solution.objective_value > 0 and pfba: + solution = cobra.flux_analysis.pfba(modelutl.model) + if save_fluxes: + output["fluxes"] = solution.fluxes + + #Determining phenotype class + if output["growth"] >= output["baseline_growth"]*growth_multiplier: if self.growth > 0: output["class"] = "CP" - else: + elif self.growth == 0: output["class"] = "FP" + else: + output["class"] = "GROWTH" else: if self.growth > 0: output["class"] = "FN" - else: + elif self.growth == 0: output["class"] = "CN" + else: + output["class"] = "NOGROWTH" return output def gapfill_model_for_phenotype( self, - modelutl, - default_gapfill_templates, + msgapfill, + objective, test_conditions, - default_gapfill_models=[], - blacklist=[], - growth_threshold=0.001, + growth_multiplier=10, add_missing_exchanges=False, ): - if not isinstance(modelutl, MSModelUtil): - modelutl = MSModelUtil(modelutl) - self.gapfilling = MSGapfill( - modelutl.model, - default_gapfill_templates, - default_gapfill_models, - test_conditions, - modelutl.reaction_scores(), - blacklist, - ) - media = self.build_media() - if add_missing_exchanges: - modelutl.add_missing_exchanges(media) - for gene in self.gene_ko: - if gene in modelutl.model.genes: - geneobj = modelutl.model.genes.get_by_id(gene) - geneobj.knock_out() - gfresults = self.gapfilling.run_gapfilling(media, None) - if gfresults is None: + """Gapfills the model to permit this single phenotype to be positive + Parameters + ---------- + msgapfill : MSGapfill + Fully configured gapfilling object + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + objective : string + Expression for objective to be activated by gapfilling + """ + #First simulate model without gapfilling to assess ungapfilled growth + output = self.simulate(msgapfill.mdlutl,objective,growth_multiplier,add_missing_exchanges) + if output["growth"] >= output["baseline_growth"]*growth_multiplier: + #No gapfilling needed - original model grows without gapfilling + return {"reversed": {}, "new": {},"media": self.build_media(), "target":objective, "minobjective": output["baseline_growth"]*growth_multiplier, "binary_check":False} + + #Now pulling the gapfilling configured model from MSGapfill + gfmodelutl = MSModelUtil.get(msgapfill.gfmodel) + #Saving the gapfill objective because this will be replaced when the simulation runs + gfobj = gfmodelutl.model.objective + #Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints + output = self.simulate(modelutl,objective,growth_multiplier,add_missing_exchanges) + #If the gapfilling model fails to achieve the minimum growth, then no solution exists + if output["growth"] < output["baseline_growth"]*growth_multiplier: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) - return self.gapfilling.integrate_gapfill_solution(gfresults) - + return None + + #Running the gapfilling itself + full_media = self.build_media() + with modelutl.model: + #Applying gene knockouts + for gene in self.gene_ko: + if gene in modelutl.model.genes: + geneobj = modelutl.model.genes.get_by_id(gene) + geneobj.knock_out() + + gfresults = self.gapfilling.run_gapfilling(media,None,minimum_obj=output["baseline_growth"]*growth_multiplier) + if gfresults is None: + logger.warning( + "Gapfilling failed with the specified model, media, and target reaction." + ) + + return gfresults class MSGrowthPhenotypes: - def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): + def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): self.base_media = base_media self.phenotypes = DictList() self.base_uptake = base_uptake self.base_excretion = base_excretion + self.atom_limits = global_atom_limits + self.baseline_growth_data = {} @staticmethod - def from_compound_hash(compounds, base_media, base_uptake=0, base_excretion=1000): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion) + def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion,global_atom_limits) new_phenos = [] for cpd in compounds: newpheno = MSGrowthPhenotype(cpd, None, compounds[cpd], [], [cpd]) @@ -137,8 +223,8 @@ def from_compound_hash(compounds, base_media, base_uptake=0, base_excretion=1000 return growthpheno @staticmethod - def from_kbase_object(data, kbase_api): - growthpheno = MSGrowthPhenotypes(None, 0, 1000) + def from_kbase_object(data, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) new_phenos = [] for pheno in data["phenotypes"]: media = kbase_api.get_from_ws(pheno["media_ref"], None) @@ -156,9 +242,9 @@ def from_kbase_object(data, kbase_api): return growthpheno @staticmethod - def from_kbase_file(filename, kbase_api): + def from_kbase_file(filename, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): # TSV file with the following headers:media mediaws growth geneko addtlCpd - growthpheno = MSGrowthPhenotypes(base_media, 0, 1000) + growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) headings = [] new_phenos = [] with open(filename) as f: @@ -190,8 +276,8 @@ def from_kbase_file(filename, kbase_api): return growthpheno @staticmethod - def from_ms_file(filename, basemedia, base_uptake=0, base_excretion=100): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion) + def from_ms_file(filename,base_media=None, base_uptake=0, base_excretion=100,global_atom_limits={}): + growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) df = pd.read_csv(filename) required_headers = ["Compounds", "Growth"] for item in required_headers: @@ -222,19 +308,40 @@ def add_phenotypes(self, new_phenotypes): def simulate_phenotypes( self, - model, - biomass, + model_or_modelutl, + objective, add_missing_exchanges=False, correct_false_negatives=False, template=None, - growth_threshold=0.001, - save_fluxes=False, + growth_threshold=0.01, + save_fluxes=False ): - model.objective = biomass - modelutl = MSModelUtil(model) + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + objective : string + Expression for objective to maximize in simulations + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + save_fluxes : bool + Indicates if the fluxes should be saved and returned with the results + """ + # Discerning input is model or mdlutl and setting internal links + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + #Setting objective + modelutl.objective = objective + #Getting basline growth + if self.parent + summary = { - "Label": ["Accuracy", "CP", "CN", "FP", "FN"], - "Count": [0, 0, 0, 0, 0], + "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], + "Count": [0, 0, 0, 0, 0,0,0], } data = { "Phenotype": [], @@ -293,3 +400,62 @@ def simulate_phenotypes( df = pd.DataFrame(data) logger.info(df) return {"details": df, "summary": sdf} + + def fit_model_to_phenotypes( + self, + model_or_mdlutl, + correct_false_negatives, + correct_false_positives, + minimize_new_false_positives, + core_template, + template, + integrate_results + ): + + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_mdlutl : Model | MSModelUtl + Model to use to run the simulations + correct_false_negatives : bool + Indicates if false negatives should be corrected + correct_false_positives : bool + Indicates if false positives should be corrected + minimize_new_false_positives : bool + Indicates if new false positivies should be avoided + core_template : MSTemplate + Core template to use for ATP safe gapfilling if tests aren't already computed (defaults to model core template if it has one) + template : MSTemplate + The template that should be used for gapfilling (will default to model template if it has one) + integrate_results : bool + Indicates if the resulting modifications to the model should be integrated + """ + pass + + def gapfill_all_phenotypes( + self, + model_or_mdlutl, + msgapfill=None, # Needed if the gapfilling object in model utl is not initialized + growth_threshold=None, + add_missing_exchanges=False, + ): + mdlutl = MSModelUtil.get(model_or_mdlutl) + # if msgapfill: + # mdlutl.gfutl = msgapfill + # if not mdlutl.gfutl: + # logger.critical( + # "Must either provide a gapfilling object or provide a model utl with an existing gapfilling object" + # ) + # media_list = [] + # for pheno in self.phenotypes: + # + # + # output = mdlutl.gfutl.run_multi_gapfill( + # media_list, + # default_minimum_objective=growth_threshold + # target=mdlutl.primary_biomass(), + # + # binary_check=False, + # prefilter=True, + # check_for_growth=True, + # ) From ea81a98b40fe797c7e02b8b3a96843d4923cd585 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:34:33 -0500 Subject: [PATCH 051/146] Fixing bug --- modelseedpy/core/msgrowthphenotypes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 13d540b1..fffb4619 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -337,7 +337,6 @@ def simulate_phenotypes( #Setting objective modelutl.objective = objective #Getting basline growth - if self.parent summary = { "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], From abd998c395350695bd3516f7aac38d3a52f68cb5 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:36:20 -0500 Subject: [PATCH 052/146] Adding MSGrowthPhenotype object --- modelseedpy/core/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/__init__.py b/modelseedpy/core/__init__.py index 204564ab..0b7c7b5c 100644 --- a/modelseedpy/core/__init__.py +++ b/modelseedpy/core/__init__.py @@ -9,7 +9,7 @@ from modelseedpy.core.mseditorapi import MSEditorAPI, MSEquation from modelseedpy.core.msgapfill import MSGapfill from modelseedpy.core.msatpcorrection import MSATPCorrection -from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes +from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes, MSGrowthPhenotype from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder from modelseedpy.core.exceptions import * From 963f00a0ef8e1e3a812b2820cc1218628e5fec45 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:39:02 -0500 Subject: [PATCH 053/146] Adding MSGrowthPhenotype --- modelseedpy/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 665f000c..dbb7c090 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -38,6 +38,7 @@ MSBuilder, MSMedia, MSGrowthPhenotypes, + MSGrowthPhenotype, MSModelUtil, FBAHelper, MSEditorAPI, From 0d6e7af0727907b64ba6f737f48679f9235f4f94 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:47:33 -0500 Subject: [PATCH 054/146] Fixing error in simulate arguments --- modelseedpy/core/msgrowthphenotypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index fffb4619..553b0523 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -55,7 +55,7 @@ def build_media(self,include_base_media=True): def simulate( self, - model_or_modelutl, + model_or_mdlutl, objective, growth_multiplier=10, add_missing_exchanges=False, From c0abcd166a8fcbfde702cf375fef9b604440f37a Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 16 Jul 2023 22:42:19 -0500 Subject: [PATCH 055/146] Fixing attributes, improving phenotypes, improving gapfilling --- modelseedpy/core/msgapfill.py | 11 ++--- modelseedpy/core/msgrowthphenotypes.py | 57 ++++++++++++++++++-------- modelseedpy/core/msmodelutl.py | 17 ++++++-- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 10bcb9a2..dde1514e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -2,6 +2,7 @@ import logging import cobra import re +import json from optlang.symbolics import Zero, add from modelseedpy.core import FBAHelper # !!! the import is never used from modelseedpy.fbapkg.mspackagemanager import MSPackageManager @@ -354,11 +355,11 @@ def integrate_gapfill_solution( if link_gaps_to_objective: logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"].id+" for target "+solution["target"]) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) - if solution["media"] not in gf_sensitivity: - gf_sensitivity[solution["media"]] = {} - if solution["target"] not in gf_sensitivity[solution["media"]]: - gf_sensitivity[solution["media"]][solution["target"]] = {} - gf_sensitivity[solution["media"]][solution["target"]][ + if solution["media"].id not in gf_sensitivity: + gf_sensitivity[solution["media"].id] = {} + if solution["target"] not in gf_sensitivity[solution["media"].id]: + gf_sensitivity[solution["media"].id][solution["target"]] = {} + gf_sensitivity[solution["media"].id][solution["target"]][ "success" ] = self.mdlutl.find_unproducible_biomass_compounds( solution["target"], cumulative_solution diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 553b0523..bc2f4f05 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -297,6 +297,15 @@ def from_ms_file(filename,base_media=None, base_uptake=0, base_excretion=100,glo growthpheno.add_phenotypes(new_phenos) return growthpheno + def build_super_media(self): + super_media = None + for pheno in self.phenotypes: + if not super_media: + super_media = pheno.build_media() + else: + super_media.merge(pheno.build_media(), overwrite_overlap=False) + return super_media + def add_phenotypes(self, new_phenotypes): keep_phenos = [] for pheno in new_phenotypes: @@ -402,34 +411,50 @@ def simulate_phenotypes( def fit_model_to_phenotypes( self, - model_or_mdlutl, - correct_false_negatives, - correct_false_positives, - minimize_new_false_positives, - core_template, - template, - integrate_results + msgapfill, + objective, + grow_multiplier, + correct_false_positives=False, + minimize_new_false_positives=True, + atp_safe=True, + integrate_results=True, + global_gapfilling=True ): """Simulates all the specified phenotype conditions and saves results Parameters ---------- - model_or_mdlutl : Model | MSModelUtl - Model to use to run the simulations - correct_false_negatives : bool - Indicates if false negatives should be corrected + msgapfill : MSGapfill + Gapfilling object used for the gapfilling process correct_false_positives : bool Indicates if false positives should be corrected minimize_new_false_positives : bool Indicates if new false positivies should be avoided - core_template : MSTemplate - Core template to use for ATP safe gapfilling if tests aren't already computed (defaults to model core template if it has one) - template : MSTemplate - The template that should be used for gapfilling (will default to model template if it has one) integrate_results : bool Indicates if the resulting modifications to the model should be integrated """ - pass + #Create super media for all + super_media = self.build_super_media() + #Adding missing exchanges + msgapfill.gfmodel.add_missing_exchanges(super_media) + #Adding elemental constraints + self.add_elemental_constraints() + #Getting ATP tests + + #Filtering database for ATP tests + + #Penalizing database to avoid creating false positives + + #Building additional tests from current correct negatives + + #Computing base-line growth + + #Computing growth threshold + + #Running global gapfill + + #Integrating solution + def gapfill_all_phenotypes( self, diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 9c69a51f..fb4c45e4 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -113,6 +113,12 @@ def __init__(self, model): self.attributes = {} if hasattr(self.model, "attributes"): self.attributes = self.model + if "pathways" not in self.attributes: + self.attributes["pathways"] = {} + if "auxotrophy" not in self.attributes: + self.attributes["auxotrophy"] = {} + if "fbas" not in self.attributes: + self.attributes["fbas"] = {} def compute_automated_reaction_scores(self): """ @@ -286,14 +292,17 @@ def get_attributes(self, key=None, default=None): return self.attributes[key] def save_attributes(self, value=None, key=None): - attributes = self.get_attributes() if value: if key: - attributes[key] = value + self.attributes[key] = value else: self.attributes = value - if hasattr(self.model, "attributes"): - self.model.attributes = self.attributes + if hasattr(self.model, "computed_attributes"): + logger.info( + "Setting FBAModel computed_attributes to mdlutl attributes" + ) + self.attributes["gene_count"] = len(self.model.genes) + self.model.computed_attributes = self.attributes def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): modelseed = ModelSEEDBiochem.get() From de260aec2b0897fa20dbc6b0ca0981d050df0151 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 16 Jul 2023 23:15:22 -0500 Subject: [PATCH 056/146] Fixing attritbute problem and fixing phenotypes --- modelseedpy/core/msatpcorrection.py | 2 +- modelseedpy/core/msgrowthphenotypes.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index dd381b18..c07cc34f 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -414,7 +414,7 @@ def apply_growth_media_gapfilling(self): "count": len(self.cumulative_core_gapfilling), "reactions": self.cumulative_core_gapfilling, } - self.modelutl.save_attributes(core_gf, "core_gapfilling") + self.modelutl.save_attributes(core_gf, "core_gapfilling_details") def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index bc2f4f05..885e4f78 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -132,19 +132,19 @@ def simulate( #Determining phenotype class if output["growth"] >= output["baseline_growth"]*growth_multiplier: - if self.growth > 0: + if not self.growth: + output["class"] = "GROWTH" + elif self.growth > 0: output["class"] = "CP" elif self.growth == 0: output["class"] = "FP" - else: - output["class"] = "GROWTH" else: - if self.growth > 0: + if not self.growth: + output["class"] = "NOGROWTH" + elif self.growth > 0: output["class"] = "FN" elif self.growth == 0: output["class"] = "CN" - else: - output["class"] = "NOGROWTH" return output def gapfill_model_for_phenotype( From 328c57878423d89c2d48281d4b3adca606fc1bec Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 17 Jul 2023 23:23:34 -0500 Subject: [PATCH 057/146] Improving phenotype simulations and enabling use of complete media --- modelseedpy/core/msgrowthphenotypes.py | 49 ++++++++++++++++++++++---- modelseedpy/core/msmedia.py | 1 + modelseedpy/core/msmodelutl.py | 6 ++-- modelseedpy/fbapkg/kbasemediapkg.py | 2 +- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 885e4f78..ebdba851 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -48,9 +48,9 @@ def build_media(self,include_base_media=True): full_media = MSMedia.from_dict(cpd_hash) if self.media: full_media.merge(self.media, overwrite_overlap=False) - if full_media: + if include_base_media: if self.parent and self.parent.base_media: - full_media.merge(parent.base_media, overwrite_overlap=False) + full_media.merge(self.parent.base_media, overwrite_overlap=False) return full_media def simulate( @@ -91,18 +91,18 @@ def simulate( output["missing_transports"] = modelutl.add_missing_exchanges(full_media) #Getting basline growth - output["baseline_growth"] = 0.001 + output["baseline_growth"] = 0.01 if self.parent: - output["baseline_growth"] = self.parent.baseline_growth(modelutl,True) + output["baseline_growth"] = self.parent.baseline_growth(modelutl,objective) #Building specific media and setting compound exception list if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: reaction_exceptions = [] specific_media = self.build_media(False) for mediacpd in specific_media.mediacompounds: - output = mediacpd.get_mdl_exchange_hash(self,modelutl) - for mdlcpd in output: - reaction_exceptions.append(output[mdlcpd]) + ex_hash = mediacpd.get_mdl_exchange_hash(modelutl) + for mdlcpd in ex_hash: + reaction_exceptions.append(ex_hash[mdlcpd]) modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.parent.atom_limits,exception_reactions=reaction_exceptions) #Applying media @@ -211,6 +211,7 @@ def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_at self.base_excretion = base_excretion self.atom_limits = global_atom_limits self.baseline_growth_data = {} + self.cached_based_growth = {} @staticmethod def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): @@ -315,6 +316,40 @@ def add_phenotypes(self, new_phenotypes): additions = DictList(keep_phenos) self.phenotypes += additions + def baseline_growth( + self, + model_or_mdlutl, + objective + ): + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + """ + # Discerning input is model or mdlutl and setting internal links + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + #Checking if base growth already computed + if modelutl in self.cached_based_growth: + if objective in self.cached_based_growth[modelutl]: + return self.cached_based_growth[modelutl][objective] + else: + self.cached_based_growth[modelutl] = {} + #Setting objective + modelutl.objective = objective + #Setting media + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + self.base_media, self.base_uptake, self.base_excretion + ) + #Adding uptake limits + if len(self.atom_limits) > 0: + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.atom_limits) + #Simulating + self.cached_based_growth[modelutl][objective] = modelutl.model.slim_optimize() + return self.cached_based_growth[modelutl][objective] + def simulate_phenotypes( self, model_or_modelutl, diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index 48fa90ad..fadc435d 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList +from modelseedpy.core.msmodelutl import MSModelUtil logger = logging.getLogger(__name__) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index fb4c45e4..097dc9cc 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -111,14 +111,14 @@ def __init__(self, model): self.score = None self.integrated_gapfillings = [] self.attributes = {} - if hasattr(self.model, "attributes"): - self.attributes = self.model + if hasattr(self.model, "computed_attributes"): + self.attributes = self.model.computed_attributes if "pathways" not in self.attributes: self.attributes["pathways"] = {} if "auxotrophy" not in self.attributes: self.attributes["auxotrophy"] = {} if "fbas" not in self.attributes: - self.attributes["fbas"] = {} + self.attributes["fbas"] = {} def compute_automated_reaction_scores(self): """ diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index 4dbf0779..a3c19243 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -40,7 +40,7 @@ def build_package( self.parameters["default_uptake"] = 0 if self.parameters["default_excretion"] is None: self.parameters["default_excretion"] = 100 - if self.parameters["media"] is None and self.parameters["default_uptake"] == 0: + if (self.parameters["media"] is None or self.parameters["media"].name == "Complete") and self.parameters["default_uptake"] == 0: self.parameters["default_uptake"] = 100 # First initializing all exchanges to default uptake and excretion From 084054dbb831231c324c09db35a4bff636a8bc3b Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 17 Jul 2023 23:26:23 -0500 Subject: [PATCH 058/146] Fixing issue where attributes will be none --- modelseedpy/core/msmodelutl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 097dc9cc..ec6bc903 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -112,7 +112,8 @@ def __init__(self, model): self.integrated_gapfillings = [] self.attributes = {} if hasattr(self.model, "computed_attributes"): - self.attributes = self.model.computed_attributes + if self.model.computed_attributes: + self.attributes = self.model.computed_attributes if "pathways" not in self.attributes: self.attributes["pathways"] = {} if "auxotrophy" not in self.attributes: From 0d6149799c0e30393f9cdf0d8fc654f0bdb173f1 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 18 Jul 2023 01:48:51 -0500 Subject: [PATCH 059/146] atpcorrection --- modelseedpy/core/msatpcorrection.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 727c3e33..2f5a2774 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -161,9 +161,10 @@ def load_default_medias(self): media.id = media_id media.name = media_id min_obj = 0.01 - if media_id in min_gap: - min_obj = min_gap[media_id] - self.atp_medias.append([media, min_obj]) + self.atp_medias.append([ + media, + min_gap.get(media_d, min_obj) + ]) @staticmethod def find_reaction_in_template(model_reaction, template, compartment): From b08f8f38362c3a9140571f5ade10f8d00ab10ae6 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 18 Jul 2023 13:29:46 -0500 Subject: [PATCH 060/146] Improving phenotype simulation and making sure parameters are documented in element uptake --- modelseedpy/core/msgrowthphenotypes.py | 2 +- modelseedpy/fbapkg/elementuptakepkg.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index ebdba851..a98fd64d 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -57,7 +57,7 @@ def simulate( self, model_or_mdlutl, objective, - growth_multiplier=10, + growth_multiplier=3, add_missing_exchanges=False, save_fluxes=False, pfba=False, diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 4eb27e44..8348e602 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -18,6 +18,11 @@ def __init__(self, model): def build_package(self, element_limits,exception_compounds=[],exception_reactions=[]): #Converting exception compounds list into exception reaction list + self.parameters = { + "element_limits" : element_limits, + "exception_compounds" : exception_compounds, + "exception_reactions" : exception_reactions + } exchange_hash = self.modelutl.exchange_hash() for met in exception_compounds: if met in exchange_hash: From c45f798ec14d8f140a9e0b0af101a49d932532ef Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 21 Jul 2023 23:11:59 -0500 Subject: [PATCH 061/146] Improved phenotype functions --- modelseedpy/core/msgrowthphenotypes.py | 122 +++++++++++++++---------- 1 file changed, 72 insertions(+), 50 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index a98fd64d..696a6b9d 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -352,25 +352,26 @@ def baseline_growth( def simulate_phenotypes( self, - model_or_modelutl, + model_or_mdlutl, objective, + growth_multiplier=3, add_missing_exchanges=False, - correct_false_negatives=False, - template=None, - growth_threshold=0.01, - save_fluxes=False + save_fluxes=False, + gapfill_negatives=False, + msgapfill=None, + test_conditions=None ): """Simulates all the specified phenotype conditions and saves results Parameters ---------- - model_or_modelutl : Model | MSModelUtl + model_or_mdlutl : Model | MSModelUtl Model to use to run the simulations objective : string Expression for objective to maximize in simulations - add_missing_exchanges : bool - Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically growth_multiplier : double Indicates a multiplier to use for positive growth above the growth on baseline media + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically save_fluxes : bool Indicates if the fluxes should be saved and returned with the results """ @@ -381,7 +382,8 @@ def simulate_phenotypes( #Setting objective modelutl.objective = objective #Getting basline growth - + baseline_growth = self.baseline_growth(modelutl,objective) + #Establishing output of the simulation method summary = { "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], "Count": [0, 0, 0, 0, 0,0,0], @@ -393,51 +395,53 @@ def simulate_phenotypes( "Class": [], "Transports missing": [], "Gapfilled reactions": [], + "Gapfilling score":None } + #Running simulations + gapfilling_solutions = {} for pheno in self.phenotypes: - with model: - result = pheno.simulate( - modelutl, growth_threshold, add_missing_exchanges, save_fluxes - ) # Result should have "growth" and "class" - if result["class"] == "FN" and correct_false_negatives: - pheno.gapfill_model_for_phenotype(modelutl, [template], None) - if pheno.gapfilling.last_solution != None: - list = [] - for rxn_id in pheno.gapfilling.last_solution["reversed"]: - list.append( - pheno.gapfilling.last_solution["reversed"][rxn_id] - + rxn_id - ) - for rxn_id in pheno.gapfilling.last_solution["new"]: - list.append( - pheno.gapfilling.last_solution["new"][rxn_id] + rxn_id - ) - data["Gapfilled reactions"].append(";".join(list)) - else: - data["Gapfilled reactions"].append(None) + result = pheno.simulate( + modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes + ) + data["Class"].append(result["class"]) + data["Phenotype"].append(pheno.id) + data["Observed growth"].append(pheno.growth) + data["Simulated growth"].append(result["growth"]) + data["Transports missing"].append( + ";".join(result["missing_transports"]) + ) + if result["class"] == "CP": + summary["Count"][1] += 1 + summary["Count"][0] += 1 + if result["class"] == "CN": + summary["Count"][2] += 1 + summary["Count"][0] += 1 + if result["class"] == "FP": + summary["Count"][3] += 1 + if result["class"] == "FN": + summary["Count"][4] += 1 + #Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: + gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype(msgapfill,objective,test_conditions,growth_multiplier,add_missing_exchanges) + if gapfilling_solutions[pheno] != None: + data["Gapfilling score"] = 0 + list = [] + for rxn_id in gapfilling_solutions[pheno]["reversed"]: + list.append( + gapfilling_solutions[pheno]["reversed"][rxn_id] + + rxn_id + ) + data["Gapfilling score"] += 0.5 + for rxn_id in gapfilling_solutions[pheno]["new"]: + list.append( + gapfilling_solutions[pheno]["new"][rxn_id] + rxn_id + ) + data["Gapfilling score"] += 1 + data["Gapfilled reactions"].append(";".join(list)) else: data["Gapfilled reactions"].append(None) - result = pheno.simulate( - modelutl, growth_threshold, add_missing_exchanges, save_fluxes - ) # Result should have "growth" and "class" - data["Class"].append(result["class"]) - data["Phenotype"].append(pheno.id) - data["Observed growth"].append(pheno.growth) - data["Simulated growth"].append(result["growth"]) - data["Transports missing"].append( - ";".join(result["missing_transports"]) - ) - if result["class"] == "CP": - summary["Count"][1] += 1 - summary["Count"][0] += 1 - if result["class"] == "CN": - summary["Count"][2] += 1 - summary["Count"][0] += 1 - if result["class"] == "FP": - summary["Count"][3] += 1 - if result["class"] == "FN": - summary["Count"][4] += 1 - + else: + data["Gapfilled reactions"].append(None) summary["Count"][0] = summary["Count"][0] / len(self.phenotypes) sdf = pd.DataFrame(summary) df = pd.DataFrame(data) @@ -468,6 +472,24 @@ def fit_model_to_phenotypes( integrate_results : bool Indicates if the resulting modifications to the model should be integrated """ + + + + #Running simulations + positive_growth = [] + negative_growth = [] + for pheno in self.phenotypes: + with model: + result = pheno.simulate( + modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes + ) + #Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: + negative_growth.append(pheno.build_media()) + elif gapfill_negatives and output["class"] in ["GROWTH","FP","CP"]: + positive_growth.append(pheno.build_media()) + + #Create super media for all super_media = self.build_super_media() #Adding missing exchanges From a48f3987d43630142102e15c391984a7b7fbe7f8 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 24 Jul 2023 00:19:56 -0500 Subject: [PATCH 062/146] Removing failed ATP gapfillings from gapfilling sensitivity --- modelseedpy/core/msgapfill.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index dde1514e..14371e2a 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -114,7 +114,9 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): target = target[13:] if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(): return True - gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + gf_sensitivity = {} + if target != "rxn00062_c0": + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if media.id not in gf_sensitivity: gf_sensitivity[media.id] = {} if target not in gf_sensitivity[media.id]: @@ -127,7 +129,8 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): gf_sensitivity[media.id][target][ note ] = self.mdlutl.find_unproducible_biomass_compounds(target) - self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + if target != "rxn00062_c0": + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") logger.warning( "No gapfilling solution found" + filter_msg From 6dca6d957466dc31785faf9ae0b235fb2e8f5a64 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 24 Jul 2023 05:27:46 +0000 Subject: [PATCH 063/146] Adding ATP gapfilled reactions to gapfilling sensitivity --- modelseedpy/core/msatpcorrection.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c07cc34f..08540daa 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -410,11 +410,23 @@ def apply_growth_media_gapfilling(self): self.cumulative_core_gapfilling, link_gaps_to_objective=False ) - core_gf = { - "count": len(self.cumulative_core_gapfilling), - "reactions": self.cumulative_core_gapfilling, - } - self.modelutl.save_attributes(core_gf, "core_gapfilling_details") + #Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if self.atp_hydrolysis.id not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][self.atp_hydrolysis.id] = {} + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"] = {} + for item in stats["new"]: + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { + stats["new"][item] : [] + } + for item in stats["reversed"]: + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { + stats["reversed"][item] : [] + } + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + self.modelutl.save_attributes(len(self.cumulative_core_gapfilling), "total_core_gapfilling") def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP From 1be467481a38f3782a804e9def5ad0b9a07774b0 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 25 Jul 2023 12:23:06 -0500 Subject: [PATCH 064/146] Fixing bug in gapfill --- modelseedpy/core/msmodelutl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index ec6bc903..47064f05 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -1009,6 +1009,7 @@ def find_unproducible_biomass_compounds(self, target_rxn="bio1", ko_list=None): # Getting target reaction and making sure it exists if target_rxn not in tempmodel.reactions: logger.critical(target_rxn + " not in model!") + return None target_rxn_obj = tempmodel.reactions.get_by_id(target_rxn) tempmodel.objective = target_rxn original_objective = tempmodel.objective From 2db54c6d5e41965f5cd2980046d2ae2a79983d80 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 25 Jul 2023 23:20:05 -0500 Subject: [PATCH 065/146] Fixing bug in ATP media --- modelseedpy/core/msatpcorrection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 08540daa..18c3d40f 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -122,9 +122,9 @@ def __init__( self.forced_media = [] for media_id in forced_media: - for media in self.atp_medias: - if media.id == media_id: - self.forced_media.append(media) + for item in self.atp_medias: + if item[0].id == media_id: + self.forced_media.append(item[0]) break self.max_gapfilling = max_gapfilling From ca7cc5054f4f9acb76d4163fab090e7391420b62 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 26 Jul 2023 04:51:17 +0000 Subject: [PATCH 066/146] Fixing ATP correction --- modelseedpy/core/msatpcorrection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 08540daa..8b173161 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -411,7 +411,7 @@ def apply_growth_media_gapfilling(self): link_gaps_to_objective=False ) #Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions - gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + gf_sensitivity = self.modelutl.get_attributes("gf_sensitivity", {}) if media.id not in gf_sensitivity: gf_sensitivity[media.id] = {} if self.atp_hydrolysis.id not in gf_sensitivity[media.id]: @@ -425,7 +425,7 @@ def apply_growth_media_gapfilling(self): gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { stats["reversed"][item] : [] } - self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + self.modelutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.modelutl.save_attributes(len(self.cumulative_core_gapfilling), "total_core_gapfilling") def expand_model_to_genome_scale(self): From 03ad8cc36b04c4d2fc8960131f5a862df4581e7d Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 26 Jul 2023 13:18:19 -0500 Subject: [PATCH 067/146] Adding MSModelReport object with Jose's report code --- modelseedpy/__init__.py | 1 + modelseedpy/core/__init__.py | 1 + modelseedpy/core/msmodelreport.py | 272 ++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 modelseedpy/core/msmodelreport.py diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index dbb7c090..551f617d 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -45,6 +45,7 @@ MSATPCorrection, MSGapfill, MSEquation, + MSModelReport ) from modelseedpy.core.exceptions import * diff --git a/modelseedpy/core/__init__.py b/modelseedpy/core/__init__.py index 0b7c7b5c..eb4d02a2 100644 --- a/modelseedpy/core/__init__.py +++ b/modelseedpy/core/__init__.py @@ -12,4 +12,5 @@ from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes, MSGrowthPhenotype from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder +from modelseedpy.core.msmodelreport import MSModelReport from modelseedpy.core.exceptions import * diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py new file mode 100644 index 00000000..dccc658b --- /dev/null +++ b/modelseedpy/core/msmodelreport.py @@ -0,0 +1,272 @@ +# -*- coding: utf-8 -*- +import pandas as pd +import logging +import matplotlib.cm as cm +from modelseedpy.core.msmodelutl import MSModelUtil + +logger = logging.getLogger(__name__) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + +class MSModelReport: + def __init__( + self + ): + pass + + def build_report( + self, + model_or_mdlutl + ): + """Builds model HTML report + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + """ + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + + # Process the data + attributes = modelutl.get_attributes() + + selected_media_data = attributes['ATP_analysis']['selected_media'] + core_atp_gapfilling_data = attributes['ATP_analysis']['core_atp_gapfilling'] + gf_filter_data = attributes['gf_filter'] + gf_sensitivity_data = attributes.get('gf_sensitivity') # Get 'gf_sensitivity_data' if available, otherwise it will be None + + # Get the names of 'Core Gapfilling Media' and 'Gapfilling Media' + core_gapfilling_media = [media for media, media_data in (gf_sensitivity_data or {}).items() if 'rxn00062_c0' in media_data] + gapfilling_media = [media for media, media_data in (gf_sensitivity_data or {}).items() if 'bio1' in media_data] + core_gapfilling_media_text = ', '.join(core_gapfilling_media) + gapfilling_media_text = ', '.join(gapfilling_media) + + bio_count = 0 + for rxn in modelutl.model.reactions: + if rxn.id[0:3] == "bio": + bio_count += 1 + + # Create the Model Summary table data + model_summary_data = [ + ('Model ID', modelutl.wsid), + ('Genome Scale Template', modelutl.model.template_ref), + #('Core Template', modelutl.model.core_template_ref), + ('Core Gapfilling Media', core_gapfilling_media_text), + ('Gapfilling Media', gapfilling_media_text), + ('Source Genome',modelutl.model.name), + ('Total Number of reactions', str(len(modelutl.model.reactions))), + # ('Number of reactions in Core', 'TBD - attributes require changes things to support this'), + # ('Number of reactions in Genome Scale', 'TBD - attributes require changes things to support this'), + ('Number compounds', str(len(modelutl.model.metabolites))), + ('Number compartments', str(len(modelutl.model.compartments))), + ('Number biomass', str(bio_count)), + ('Number gapfills', str(len(gf_sensitivity_data))), + ] + + # Create the DataFrame for Model Summary + model_summary_df = pd.DataFrame(model_summary_data, columns=['', '']) + + # Process core_atp_gapfilling_data and gf_filter_data into a list of dictionaries + gapfilling_list = [] + for media in core_atp_gapfilling_data: + core_atp_gapfilling_media = core_atp_gapfilling_data[media] + row = { + 'no of gapfilled reactions': int(core_atp_gapfilling_media['score']), + 'media': media, + 'ATP Production': f"{round(selected_media_data.get(media, 0), 2):.2f}" if media in selected_media_data else '', + 'gapfilled reactions': '', + 'reversed reaction by gapfilling': '', + 'Filtered Reactions': '', + } + if 'new' in core_atp_gapfilling_media: + gapfilled_reactions = core_atp_gapfilling_media['new'] + if gapfilled_reactions: + reactions = [f'{rxn} : {direction}' if not rxn.startswith("EX") else f'EX_{rxn} : {direction}' for rxn, direction in gapfilled_reactions.items()] + row['gapfilled reactions'] = ' | '.join(reactions) + if 'failed' in core_atp_gapfilling_media and core_atp_gapfilling_media['failed']: + row['gapfilled reactions'] = 'Failed' + if 'reversed' in core_atp_gapfilling_media: + reversed_reactions = core_atp_gapfilling_media['reversed'] + if reversed_reactions: + reactions = [f'{rxn} : {direction}' if not rxn.startswith("EX") else f'EX_{rxn} : {direction}' for rxn, direction in reversed_reactions.items()] + row['reversed reaction by gapfilling'] = ' | '.join(reactions) + if media in gf_filter_data: + gf_filter_media_data = gf_filter_data[media] + atp_production_values = list(gf_filter_media_data.values()) + if atp_production_values: + atp_prod_reaction_pairs = list(atp_production_values[0].items()) + if atp_prod_reaction_pairs: + _, reactions = atp_prod_reaction_pairs[0] + if reactions: + filtered_reactions = ' | '.join([f'{rxn} : {list(value.keys())[0]}' if not rxn.startswith("EX") else f'EX_{rxn} : {list(value.keys())[0]}' for rxn, value in reactions.items()]) + row['Filtered Reactions'] = filtered_reactions if filtered_reactions else '' + if not row['reversed reaction by gapfilling']: + row['reversed reaction by gapfilling'] = '' + gapfilling_list.append(row) + + + + gapfilling_df = pd.DataFrame(gapfilling_list, columns=['no of gapfilled reactions', 'media', 'ATP Production', 'gapfilled reactions', 'reversed reaction by gapfilling', 'Filtered Reactions']) + gapfilling_df['no of gapfilled reactions'] = pd.to_numeric(gapfilling_df['no of gapfilled reactions']) + gapfilling_df = gapfilling_df.sort_values('no of gapfilled reactions') + + + reaction_names = {} + for rxn in modelutl.model.reactions: + reaction_id = rxn.id + reaction_name = rxn.name + reaction_names[reaction_id] = reaction_name + + # Gapfillings Analysis DataFrame + gapfillings_list = [] + if gf_sensitivity_data: + for media, media_data in gf_sensitivity_data.items(): + for target, target_data in media_data.items(): # Iterate through each target for the current media + for status, status_data in target_data.items(): + if isinstance(status_data, dict): + for reaction_id, reaction_directions in status_data.items(): + for direction, gapfilling_sensitivity in reaction_directions.items(): + if status == 'success': + if isinstance(gapfilling_sensitivity, list): + gapfilling_sensitivity = ', '.join(gapfilling_sensitivity) + gapfillings_list.append({ + 'Reaction ID': reaction_id, + 'Reaction Name': reaction_names.get(reaction_id, ''), # Get reaction name from the dictionary + 'Media': media, + 'Direction': direction, + 'Target': target, + 'Gapfilling Sensitivity': gapfilling_sensitivity + }) + else: + # Handle cases where status_data is null + gapfillings_list.append({ + 'Reaction ID': '', # No data available for Reaction ID + 'Reaction Name': '', # No data available for Reaction Name + 'Media': media, + 'Direction': '', # No data available for Direction + 'Target': target, + 'Gapfilling Sensitivity': 'Failed Before Filtering' if status == 'FBF' else 'Failed After Filtering' if status == 'FAF' else status # Status is the 'FBF' or other labels in this case + }) + + gapfillings_analysis_df = pd.DataFrame(gapfillings_list, columns=['Reaction ID', 'Reaction Name', 'Media', 'Direction', 'Target', 'Gapfilling Sensitivity']) + + + # Define the custom color mapping function + def color_gradient(val): + if val == 0: + return 'background-color: green' + else: + color_map = cm.get_cmap('YlOrRd') # Choose the color map + norm_val = val / gapfilling_df['no of gapfilled reactions'].max() # Normalize the value between 0 and 1 + color = color_map(norm_val) + r, g, b, _ = color + return f'background-color: rgb({int(r * 255)}, {int(g * 255)}, {int(b * 255)})' + + # Apply the default style to the Model Summary DataFrame + model_summary_df_styled = ( + model_summary_df.style + .hide_index() + .set_table_styles([ + {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, + {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, + ]) + ) + + + # Apply the default style to the Gapfillings Analysis DataFrame + # Apply the default style to the Gapfillings Analysis DataFrame + gapfillings_analysis_df_styled = ( + gapfillings_analysis_df.style + .hide_index() + .format({ + 'Reaction ID': lambda x: f'{x}' if not x.startswith("EX") else f'{x}', # Add hyperlink to Reaction ID + 'Gapfilling Sensitivity': lambda x: ', '.join([f'{i}' for i in x.split(', ')]) if x and not x.startswith('Failed') else x # Add hyperlinks to Gapfilling Sensitivity + }) + .set_table_styles([ + {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, + {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, + ]) + ) + + + # Apply the default style with alternating row colors, Oxygen font, adjusted font size and line height, + # and switched order of light grey and white backgrounds in the header column for Core ATP Gapfilling Analysis + gapfilling_df_styled = ( + gapfilling_df.style + .applymap(color_gradient, subset=['no of gapfilled reactions']) + .hide_index() + .format({'Filtered Reactions': lambda x: f'{x}'}) + .format({'gapfilled reactions': lambda x: f'{x}'}) + .set_table_styles([ + {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, + {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, + ]) + ) + + + # Legend text for Table 1 + annotations_text_1 = """ +
    +
  • Reaction ID: The identifier of the reaction.
  • +
  • Reaction Name: The name of the reaction.
  • +
  • Media: The media used by gap filling.
  • +
  • Direction: The direction of the reaction. Can be ">" for forward, "<" for reverse, or "=" for both directions.
  • +
  • Target: The reaction selected as the objective function target for the gapfilling optimization problem. Targets here can be the model’s biomass reaction, commonly named “bio1” for models created by this app. + Alternatively, “rxn00062” (ATP Production) reaction is shown for cases where gapfilling was applied to guarantee ATP production in a given media. + When reactions are gapfilled for ATP production, we recommend checking the full Core ATP Analysis in Table 2 below.
  • +
  • Gapfilling Sensitivity: Gapfilling is necessary when compounds in the biomass objective function can not be produced by the model. + For each reaction we list the biomass compound(s) that can not be synthesized by the model without gapfilling. + In cases where gap filling fails there are two possible scenarios: + 1) FBF (failed before filtering) : the gapfilling immediately failed, even before we filtered out the ATP breaking reactions. This means this objective CANNOT be satisfied with the entire current database. + 2) FAF (failed after filtering): the gapfilling succeeded before filtering, but failed after filtering out reactions that break ATP. This tells you definitively if the ATP filtering caused the gapfilling to fail
  • +
+ """ + #table 2 intro text + introductory_text = """ +

During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. + The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. + This step is designed to prevent gapfilling from introducing reactions that create energy-generating loops. + The tests are conducted on a large collection of minimal conditions, with the goal of simulating the model’s capability to produce energy with different electron donor, electron acceptor, and carbon source combinations.

+

When the draft model of the core metabolism is capable of producing ATP in at least one of the test media, no gapfilling reactions part of this analysis will be added to the model. While we still report the gapfilling requirements for the test media formulations that fail to produce ATP with that draft core model, we only integrate these solutions in the model when no test media succeeds in producing ATP. + In this case, the integrated gap-filling solution(s) will be displayed in “Table 1 - Gapfilling Analysis” above, with the “Target” “rxn00062” (ATP Production) objective function.

+

The goal is to display the test results for all media to provide clues for the metabolic capabilities of the genome(s). When many reactions are required for growth on the SO4 testing media conditions, this could be a good indicator that the organism is not capable of performing sulfate reduction. + On the other hand, when only one gapfill reaction is required for ATP production in a given media, multiple scenarios can be considered. + 1) Organism(s) can’t grow on test condition, and we correctly did not add the reaction to the model. 2) Possible issue with the source genome annotation missing a specific gene function 3) Possible issue with the model reconstruction database. We hope this data helps make more informed decisions on reactions that may need to be manually curated in the model. + In cases where is known from the literature or unpublished experimental results that an organism is capable of producing ATP in a given media condition that requires gapfilling in this analysis, you can use the parameter “Force ATP media” in the reconstruction app to ensure those reactions are integrated into the model. + .

+ """ + # Legend text for Table 2 + annotations_text_2 = """ +
    +
  • No. of gapfilled reactions: The number of reactions filled by the gapfilling process.
  • +
  • Media: The media in which the reaction takes place.
  • +
  • ATP Production: ATP production by the core metabolism model.
  • +
  • Gapfilled Reactions: Reactions added during the gapfilling process.
  • +
  • Reversed Reaction by Gapfilling: Reactions that have been reversed during the gapfilling process.
  • +
  • Filtered Reactions: Reactions that have been filtered out during the analysis. When a reaction addition would lead to a large increase in ATP production or an infinite energy loop, we filter that reaction out of the gapfilling database and prevent it from being added to the model.
  • +
+ """ + # Save the data to HTML with the styled DataFrames and the legends + with open('testt.html', 'w') as f: + f.write('

Model Summary

') + f.write(model_summary_df_styled.render(escape=False)) + f.write('

') + if gf_sensitivity_data: + f.write('

Table 1 - Gapfillings Analysis

') + f.write(gapfillings_analysis_df_styled.render(escape=False)) + f.write(f'

Legend:

{annotations_text_1}') + else: + f.write('Gapfilling was not selected as a parameter during reconstruction of the model. As a result your model may not grow on your media object when running Flux Balance Analysis. You can gapfill your model after reconstruction by using the bew Gapiflling Metabolic Model app curently in beta') + f.write('

') + f.write('

Table 2 - Core ATP Analysis

') + f.write(gapfilling_df_styled.render(escape=False)) + f.write(f'

Legend:

{annotations_text_2}') + f.write(introductory_text) From 81dd30c00ba00589d78b6c862cbaef7856c3ef4c Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 26 Jul 2023 13:33:09 -0500 Subject: [PATCH 068/146] Adding filename for report output --- modelseedpy/core/msmodelreport.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index dccc658b..892f3169 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -2,6 +2,8 @@ import pandas as pd import logging import matplotlib.cm as cm +import os +from os.path import dirname, exists from modelseedpy.core.msmodelutl import MSModelUtil logger = logging.getLogger(__name__) @@ -17,7 +19,8 @@ def __init__( def build_report( self, - model_or_mdlutl + model_or_mdlutl, + output_path ): """Builds model HTML report Parameters @@ -255,7 +258,9 @@ def color_gradient(val): """ # Save the data to HTML with the styled DataFrames and the legends - with open('testt.html', 'w') as f: + directory = dirname(output_path) + os.makedirs(directory, exist_ok=True) + with open(output_path, 'w') as f: f.write('

Model Summary

') f.write(model_summary_df_styled.render(escape=False)) f.write('

') From 650766662847976bd3acd17296ee0c8b07bf8e06 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 26 Jul 2023 14:06:26 -0500 Subject: [PATCH 069/146] Fixing report --- modelseedpy/core/msmodelreport.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 892f3169..e7c8b591 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -39,6 +39,9 @@ def build_report( core_atp_gapfilling_data = attributes['ATP_analysis']['core_atp_gapfilling'] gf_filter_data = attributes['gf_filter'] gf_sensitivity_data = attributes.get('gf_sensitivity') # Get 'gf_sensitivity_data' if available, otherwise it will be None + number_gapfills = 0 + if gf_sensitivity_data: + number_gapfills = len(gf_sensitivity_data) # Get the names of 'Core Gapfilling Media' and 'Gapfilling Media' core_gapfilling_media = [media for media, media_data in (gf_sensitivity_data or {}).items() if 'rxn00062_c0' in media_data] @@ -65,7 +68,7 @@ def build_report( ('Number compounds', str(len(modelutl.model.metabolites))), ('Number compartments', str(len(modelutl.model.compartments))), ('Number biomass', str(bio_count)), - ('Number gapfills', str(len(gf_sensitivity_data))), + ('Number gapfills', str(number_gapfills)), ] # Create the DataFrame for Model Summary From 28c7028a275ea50ac3085cb14255ddc2a32286f7 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 2 Aug 2023 08:57:41 -0500 Subject: [PATCH 070/146] Fixing accuracy and output and baseline growth --- modelseedpy/core/msgrowthphenotypes.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 696a6b9d..c5149923 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -94,7 +94,9 @@ def simulate( output["baseline_growth"] = 0.01 if self.parent: output["baseline_growth"] = self.parent.baseline_growth(modelutl,objective) - + if output["baseline_growth"] < 1e-5: + output["baseline_growth"] = 0.01 + #Building specific media and setting compound exception list if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: reaction_exceptions = [] @@ -131,7 +133,9 @@ def simulate( output["fluxes"] = solution.fluxes #Determining phenotype class + if output["growth"] >= output["baseline_growth"]*growth_multiplier: + output["GROWING"] = True if not self.growth: output["class"] = "GROWTH" elif self.growth > 0: @@ -139,6 +143,7 @@ def simulate( elif self.growth == 0: output["class"] = "FP" else: + output["GROWING"] = False if not self.growth: output["class"] = "NOGROWTH" elif self.growth > 0: @@ -399,6 +404,7 @@ def simulate_phenotypes( } #Running simulations gapfilling_solutions = {} + totalcount = 0 for pheno in self.phenotypes: result = pheno.simulate( modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes @@ -413,13 +419,21 @@ def simulate_phenotypes( if result["class"] == "CP": summary["Count"][1] += 1 summary["Count"][0] += 1 - if result["class"] == "CN": + totalcount += 1 + elif result["class"] == "CN": summary["Count"][2] += 1 summary["Count"][0] += 1 - if result["class"] == "FP": + totalcount += 1 + elif result["class"] == "FP": summary["Count"][3] += 1 - if result["class"] == "FN": + totalcount += 1 + elif result["class"] == "FN": summary["Count"][4] += 1 + totalcount += 1 + elif result["class"] == "GROWTH": + summary["Count"][5] += 1 + elif result["class"] == "NOGROWTH": + summary["Count"][6] += 1 #Gapfilling negative growth conditions if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype(msgapfill,objective,test_conditions,growth_multiplier,add_missing_exchanges) @@ -442,10 +456,9 @@ def simulate_phenotypes( data["Gapfilled reactions"].append(None) else: data["Gapfilled reactions"].append(None) - summary["Count"][0] = summary["Count"][0] / len(self.phenotypes) + summary["Count"][0] = summary["Count"][0] / totalcount sdf = pd.DataFrame(summary) df = pd.DataFrame(data) - logger.info(df) return {"details": df, "summary": sdf} def fit_model_to_phenotypes( From 21efcbc0a7d68b6a8b21f28397f9912ca8b784a1 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 2 Aug 2023 09:20:11 -0500 Subject: [PATCH 071/146] Fixing gapfilling filter saving and complete media --- modelseedpy/core/msgapfill.py | 6 +++++- modelseedpy/fbapkg/kbasemediapkg.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 14371e2a..f983750a 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -57,8 +57,9 @@ def __init__( ] # the cpd11416 compound is filtered during model extension with templates # Cloning model to create gapfilling model self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + self.gfmodelutl = MSModelUtil.get(self.gfmodel) # Getting package manager for gapfilling model - self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) + self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodelutl) # Setting target from input if default_target: self.default_target = default_target @@ -147,6 +148,9 @@ def prefilter(self, media, target): self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) + base_filter = self.mdlutl.get_attributes("gf_filter") + gf_filter = self.gfmodelutl.get_attributes("gf_filter") + base_filter[media.id] = gf_filter[media.id] # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index a3c19243..b377547e 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -40,7 +40,7 @@ def build_package( self.parameters["default_uptake"] = 0 if self.parameters["default_excretion"] is None: self.parameters["default_excretion"] = 100 - if (self.parameters["media"] is None or self.parameters["media"].name == "Complete") and self.parameters["default_uptake"] == 0: + if (self.parameters["media"] and self.parameters["media"].name == "Complete") and self.parameters["default_uptake"] == 0: self.parameters["default_uptake"] = 100 # First initializing all exchanges to default uptake and excretion From ed045508254722f2170c62b6c2cac7a93c16e605 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 3 Aug 2023 00:17:49 -0400 Subject: [PATCH 072/146] Fixing bug in phenotype accuracy computation --- modelseedpy/core/msgrowthphenotypes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index c5149923..b0d3b2b6 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -456,7 +456,10 @@ def simulate_phenotypes( data["Gapfilled reactions"].append(None) else: data["Gapfilled reactions"].append(None) - summary["Count"][0] = summary["Count"][0] / totalcount + if totalcount == 0: + summary["Count"][0] = None + else: + summary["Count"][0] = summary["Count"][0] / totalcount sdf = pd.DataFrame(summary) df = pd.DataFrame(data) return {"details": df, "summary": sdf} From 26495773c9e27c1047b6d35f3ff9b25edaf15b35 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 3 Aug 2023 00:28:19 -0400 Subject: [PATCH 073/146] Fixing gapfilling filter attribute --- modelseedpy/core/msatpcorrection.py | 2 +- modelseedpy/core/msgapfill.py | 8 +++++--- modelseedpy/core/msmodelutl.py | 11 +++++++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 35ee409e..59055ca6 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -444,7 +444,7 @@ def expand_model_to_genome_scale(self): self.restore_noncore_reactions(noncore=True, othercompartment=False) # Extending model with non core reactions while retaining ATP accuracy self.filtered_noncore = self.modelutl.reaction_expansion_test( - self.noncore_reactions, tests + self.noncore_reactions, tests,atp_expansion=True ) # Removing filtered reactions for item in self.filtered_noncore: diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index f983750a..575fb5b5 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -148,9 +148,11 @@ def prefilter(self, media, target): self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) - base_filter = self.mdlutl.get_attributes("gf_filter") - gf_filter = self.gfmodelutl.get_attributes("gf_filter") - base_filter[media.id] = gf_filter[media.id] + gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes("gf_filter", {}) + base_filter = self.mdlutl.get_attributes("gf_filter", {}) + for media_id in gf_filter: + base_filter[media_id] = gf_filter[media_id] + base_filter = self.save_attributes(base_filter, "gf_filter") # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 47064f05..607095c2 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -902,7 +902,7 @@ def binary_expansion_test(self, reaction_list, condition, currmodel, depth=0): return filtered_list def reaction_expansion_test( - self, reaction_list, condition_list, binary_search=True + self, reaction_list, condition_list, binary_search=True,atp_expansion=False ): """Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail @@ -962,6 +962,10 @@ def reaction_expansion_test( ) # Adding filter results to attributes gf_filter_att = self.get_attributes("gf_filter", {}) + if atp_expansion: + atp_analysis = self.get_attributes("ATP_analysis", {}) + atp_analysis["atp_expansion_filter"] = {} + gf_filter_att = atp_analysis["atp_expansion_filter"] if condition["media"].id not in gf_filter_att: gf_filter_att[condition["media"].id] = {} if condition["objective"] not in gf_filter_att[condition["media"].id]: @@ -997,7 +1001,10 @@ def reaction_expansion_test( gf_filter_att[condition["media"].id][condition["objective"]][ condition["threshold"] ][item[0].id][item[1]] = item[2] - gf_filter_att = self.save_attributes(gf_filter_att, "gf_filter") + if atp_expansion: + atp_analysis = self.save_attributes(atp_analysis, "ATP_analysis") + else: + gf_filter_att = self.save_attributes(gf_filter_att, "gf_filter") return filtered_list ################################################################################# From 8c704488c184b6e1d31ddd43e5398eddd1644dac Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 3 Aug 2023 09:22:49 -0400 Subject: [PATCH 074/146] Fixing bug in saving of gapfilling filtering --- modelseedpy/core/msgapfill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 575fb5b5..a3d5c5f3 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -152,7 +152,7 @@ def prefilter(self, media, target): base_filter = self.mdlutl.get_attributes("gf_filter", {}) for media_id in gf_filter: base_filter[media_id] = gf_filter[media_id] - base_filter = self.save_attributes(base_filter, "gf_filter") + base_filter = self.mdlutl.save_attributes(base_filter, "gf_filter") # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): From f02a4a775b85b722188400e808080f9a15084d8b Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 3 Aug 2023 23:21:23 -0400 Subject: [PATCH 075/146] fixing filter saving in attributes --- modelseedpy/core/msatpcorrection.py | 2 +- modelseedpy/core/msgapfill.py | 7 ++++++- modelseedpy/core/msmodelutl.py | 12 ++---------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 59055ca6..6a0fcec2 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -444,7 +444,7 @@ def expand_model_to_genome_scale(self): self.restore_noncore_reactions(noncore=True, othercompartment=False) # Extending model with non core reactions while retaining ATP accuracy self.filtered_noncore = self.modelutl.reaction_expansion_test( - self.noncore_reactions, tests,atp_expansion=True + self.noncore_reactions, tests,attribute_label="atp_expansion_filter" ) # Removing filtered reactions for item in self.filtered_noncore: diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index a3d5c5f3..f716c1fd 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -148,11 +148,16 @@ def prefilter(self, media, target): self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) + with open("OriginalAttributes.json", 'w') as f: + json.dump(self.mdlutl.get_attributes(), f,indent=4,skipkeys=True) + with open("GapfillingAttributes.json", 'w') as f: + json.dump(self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes(), f,indent=4,skipkeys=True) gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes("gf_filter", {}) base_filter = self.mdlutl.get_attributes("gf_filter", {}) for media_id in gf_filter: base_filter[media_id] = gf_filter[media_id] - base_filter = self.mdlutl.save_attributes(base_filter, "gf_filter") + with open("FinalAttributes.json", 'w') as f: + json.dump(self.mdlutl.get_attributes(), f,indent=4,skipkeys=True) # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 607095c2..785c4f9c 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -902,7 +902,7 @@ def binary_expansion_test(self, reaction_list, condition, currmodel, depth=0): return filtered_list def reaction_expansion_test( - self, reaction_list, condition_list, binary_search=True,atp_expansion=False + self, reaction_list, condition_list, binary_search=True,attribute_label="gf_filter" ): """Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail @@ -961,11 +961,7 @@ def reaction_expansion_test( + str(len(reaction_list)) ) # Adding filter results to attributes - gf_filter_att = self.get_attributes("gf_filter", {}) - if atp_expansion: - atp_analysis = self.get_attributes("ATP_analysis", {}) - atp_analysis["atp_expansion_filter"] = {} - gf_filter_att = atp_analysis["atp_expansion_filter"] + gf_filter_att = self.get_attributes(attribute_label, {}) if condition["media"].id not in gf_filter_att: gf_filter_att[condition["media"].id] = {} if condition["objective"] not in gf_filter_att[condition["media"].id]: @@ -1001,10 +997,6 @@ def reaction_expansion_test( gf_filter_att[condition["media"].id][condition["objective"]][ condition["threshold"] ][item[0].id][item[1]] = item[2] - if atp_expansion: - atp_analysis = self.save_attributes(atp_analysis, "ATP_analysis") - else: - gf_filter_att = self.save_attributes(gf_filter_att, "gf_filter") return filtered_list ################################################################################# From ca7dee2488a83d848929af8c4c3e7df6a1222d14 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 4 Aug 2023 08:44:40 -0400 Subject: [PATCH 076/146] Removing debugging from filtering code --- modelseedpy/core/msgapfill.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index f716c1fd..16634707 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -148,17 +148,11 @@ def prefilter(self, media, target): self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) - with open("OriginalAttributes.json", 'w') as f: - json.dump(self.mdlutl.get_attributes(), f,indent=4,skipkeys=True) - with open("GapfillingAttributes.json", 'w') as f: - json.dump(self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes(), f,indent=4,skipkeys=True) gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes("gf_filter", {}) base_filter = self.mdlutl.get_attributes("gf_filter", {}) for media_id in gf_filter: base_filter[media_id] = gf_filter[media_id] - with open("FinalAttributes.json", 'w') as f: - json.dump(self.mdlutl.get_attributes(), f,indent=4,skipkeys=True) - + # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): return False From 0cdba7c2742d63abf6c4dc30271ec0a02293188c Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 8 Aug 2023 12:22:11 -0500 Subject: [PATCH 077/146] Adding support for building models from multiple annotations --- modelseedpy/biochem/modelseed_biochem.py | 2 +- modelseedpy/core/annotationontology.py | 275 +++++++++++++++++++++++ modelseedpy/core/msbuilder.py | 79 ++++++- 3 files changed, 353 insertions(+), 3 deletions(-) create mode 100644 modelseedpy/core/annotationontology.py diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index 287ce470..80594e0e 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -495,7 +495,7 @@ class ModelSEEDBiochem: @staticmethod def get(create_if_missing=True): if not ModelSEEDBiochem.default_biochemistry: - ModelSEEDBiochem.default_biochemistry = from_local2( + ModelSEEDBiochem.default_biochemistry = from_local( config.get("biochem", "path") ) return ModelSEEDBiochem.default_biochemistry diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py new file mode 100644 index 00000000..6685bb0a --- /dev/null +++ b/modelseedpy/core/annotationontology.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +import logging +import re +import time +import json +import sys +import pandas as pd +import cobra +from cobra import DictList + +# from builtins import None + +logger = logging.getLogger(__name__) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + +#Class structure +#AnnotationOntology -> Features/Events/Terms/Ontologies +# AnnotationOntologyOntology -> Events/Terms +# AnnotationOntologyEvent -> Features/Ontology +# AnnotationOntologyFeature -> Term+Event->Evidence +# AnnotationOntologyTerm -> Ontology/Events/Featurs +# AnnotationOntologyEvidence -> -- + +allowable_score_types = ["probability","evalue","bitscore","identity","qalignstart","qalignstop","salignstart","salignstop","kmerhits","tmscore","rmsd","hmmscore"] + +class AnnotationOntologyEvidence: + def __init__(self,scores={},ref_entity=None,entity_type=None): + self.ref_entity=ref_entity + self.entity_type=entity_type + self.scores=scores + for item in self.scores: + if item not in allowable_score_types: + logger.warning(item+" not an allowable score type!") + + def to_data(self): + return { + "ref_entity":self.ref_entity, + "entity_type":self.entity_type, + "scores":self.scores + } + +class AnnotationOntologyTerm: + def __init__(self,parent,term_id,ontology): + self.id = term_id + self.parent = parent + self.ontology = ontology + self.ontology.add_term(self) + self.parent.add_term(self) + self.msrxns = set() + self.events = {} + self.features = {} + + def add_msrxns(self,rxn_ids): + for rxn_id in rxn_ids: + if rxn_id[0:6] == "MSRXN:": + rxn_id = rxn_id[6:] + self.msrxns.update([rxn_id]) + + def add_event(self,event): + self.events[event.id] = event + + def add_feature(self,feature): + self.features[feature.id] = feature + +class AnnotationOntologyOntology: + def __init__(self,parent,ontology_id): + self.id = ontology_id + self.parent = parent + self.events = {} + self.terms = {} + + def add_event(self,event): + self.events[event.id] = event + + def add_term(self,term): + self.terms[term.id] = term + +class AnnotationOntologyFeature: + def __init__(self,parent,feature_id,type=None): + self.id = feature_id + self.parent = parent + parent.add_feature(self) + self.type = type + self.event_terms = {} + self.term_events = {} + + def add_event_term(self,event,term,scores={},ref_entity=None,entity_type=None): + if event.id not in self.event_terms: + self.event_terms[event.id] = {} + self.event_terms[event.id][term.id] = AnnotationOntologyEvidence(scores,ref_entity,entity_type) + if term.id not in self.term_events: + self.term_events[term.id] = {} + self.term_events[term.id][event.id] = self.event_terms[event.id][term.id] + + def get_associated_terms(self,event_list=None,ontologies=None): + output = {} + for term in self.term_events: + if not ontologies or term.ontology.id in ontologies: + for event in self.term_events[term]: + if not event_list or event.id in event_list: + if term.id not in output: + output[term.id] = [] + output[term.id].append(self.term_events[term][event].to_data()) + return output + + def get_associated_reactions(self,prioritized_event_list=None,ontologies=None,merge_all=False): + output = {} + for term_id in self.term_events: + if not ontologies or self.parent.terms[term_id].ontology.id in ontologies: + if merge_all or not prioritized_event_list: + for event_id in self.term_events[term_id]: + if not prioritized_event_list or event_id in prioritized_event_list: + rxns = self.parent.terms[term_id].msrxns; + for rxn_id in rxns: + if rxn_id not in output: + output[rxn_id] = [] + output[rxn_id].append(self.term_events[term_id][event_id].to_data()) + else: + for event_id in prioritized_event_list: + if event_id in self.term_events[term_id]: + rxns = self.parent.terms[term_id].msrxns; + for rxn_id in rxns: + if rxn_id not in output: + output[rxn_id] = [] + output[rxn_id].append(self.term_events[term_id][event_id].to_data()) + if len(rxns) > 0: + break + return output + +class AnnotationOntologyEvent: + def __init__(self,parent,event_id,ontology_id,method,method_version=None,description=None,timestamp=None): + self.id = event_id + self.parent = parent + #Linking ontology + self.ontology = self.parent.add_ontology(ontology_id) + self.ontology.add_event(self) + if not description: + self.description = ""#TODO + else: + self.description = description + self.method = method + self.method_version = method_version + self.timestamp = timestamp + self.features = {} + + @staticmethod + def from_data(data,parent): + if "method_version" not in data: + data["method_version"] = None + if "description" not in data: + data["description"] = None + if "timestamp" not in data: + data["timestamp"] = None + self = AnnotationOntologyEvent(parent,data["event_id"],data["ontology_id"],data["method"],data["method_version"],data["description"],data["timestamp"]) + if "ontology_terms" in data: + for feature_id in data["ontology_terms"]: + feature = self.parent.add_feature(feature_id) + self.add_feature(feature) + for item in data["ontology_terms"][feature_id]: + term = self.parent.add_term(item["term"],self.ontology) + scores = {} + ref_entity = None + entity_type = None + if "evidence" in item: + if "scores" in item["evidence"]: + scores = item["evidence"]["scores"] + if "reference" in item["evidence"]: + ref_entity = item["evidence"]["reference"][1] + entity_type = item["evidence"]["reference"][0] + feature.add_event_term(self,term,scores,ref_entity,entity_type) + if "modelseed_ids" in item: + term.add_msrxns(item["modelseed_ids"]) + return self + + def add_feature(self,feature): + self.features[feature.id] = feature + + def to_data(self): + data = { + "event_id" : self.event_id, + "description" : self.event_id, + "ontology_id" : self.ontology_id, + "method" : self.method, + "method_version" : self.method_version, + "timestamp" : self.timestamp, + "ontology_terms" : {} + } + for feature in self.features: + data["ontology_terms"][feature] = { + "term":None#TODO + } + +class AnnotationOntology: + mdlutls = {} + + @staticmethod + def from_kbase_data(data,genome_ref=None): + self = AnnotationOntology(genome_ref) + if "feature_types" in data: + self.feature_types = data["feature_types"] + if "events" in data: + for event in data["events"]: + self.events += [AnnotationOntologyEvent.from_data(event,self)] + return self + + def __init__(self,genome_ref): + self.genome_ref = genome_ref + self.events = DictList() + self.terms = {} + self.ontologies = {} + self.genes = {} + self.cdss = {} + self.noncodings = {} + self.feature_types = {} + + def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,type="genes"): + output = {} + if type == "genes" and len(self.genes) > 0: + for feature_id in self.genes: + output[feature_id] = self.genes[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) + elif len(self.cdss) > 0: + for feature_id in self.cdss: + output[feature_id] = self.cdss[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) + return output + + def add_term(self,term_or_id,ontology=None): + if not isinstance(term_or_id, AnnotationOntologyTerm): + if term_or_id in self.terms: + return self.terms[term_or_id] + else: + return AnnotationOntologyTerm(self,term_or_id,ontology) + if term_or_id.id in self.terms: + logger.critical("Term with id "+term_or_id.id+" already in annotation!") + return self.terms[term_or_id.id] + else: + self.terms[term_or_id.id] = term_or_id + + def add_ontology(self,ontology_or_id): + if not isinstance(ontology_or_id, AnnotationOntologyOntology): + if ontology_or_id in self.ontologies: + return self.ontologies[ontology_or_id] + else: + return AnnotationOntologyOntology(self,ontology_or_id) + if ontology_or_id.id in self.ontologies: + logger.critical("Ontology with id "+ontology_or_id.id+" already in annotation!") + return self.ontologies[ontology_or_id.id] + else: + self.ontologies[ontology_or_id.id] = ontology_or_id + + def get_feature_hash(self,feature_id): + feature_hash = self.genes + if feature_id in self.feature_types: + if self.feature_types[feature_id] == "cds": + feature_hash = self.cdss + elif self.feature_types[feature_id] == "noncoding": + feature_hash = self.noncodings + return feature_hash + + def add_feature(self,feature_or_id): + feature_hash = None + if not isinstance(feature_or_id, AnnotationOntologyFeature): + feature_hash = self.get_feature_hash(feature_or_id) + if feature_or_id in feature_hash: + return feature_hash[feature_or_id] + else: + feature_or_id = AnnotationOntologyFeature(self,feature_or_id) + if not feature_hash: + feature_hash = self.get_feature_hash(feature_or_id.id) + if feature_or_id.id in feature_hash: + logger.critical("Feature with id "+feature_or_id.id+" already in annotation!") + else: + feature_hash[feature_or_id.id] = feature_or_id + return feature_hash[feature_or_id.id] diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 3a78188a..2825fdea 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -13,6 +13,8 @@ from cobra.core import Gene, Metabolite, Model, Reaction, Group from modelseedpy.core import FBAHelper from modelseedpy.fbapkg.mspackagemanager import MSPackageManager +from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem +from modelseedpy.biochem.modelseed_to_cobra import modelseed_to_cobra_reaction SBO_ANNOTATION = "sbo" @@ -728,6 +730,75 @@ def build_metabolic_reactions(self): reactions.append(reaction) return reactions + + def build_from_annotaton_ontology( + self, + model_or_id, + anno_ont, + index="0", + allow_all_non_grp_reactions=False, + annotate_with_rast=True, + biomass_classic=False, + biomass_gc=0.5, + add_non_template_reactions=True, + prioritized_event_list=None, + ontologies=None, + merge_all=False + ): + #Build base model without annotation + model_or_id = self.build(model_or_id,index,allow_all_non_grp_reactions,annotate_with_rast,biomass_classic,biomass_gc) + + gene_associated_reactions = self.build_reactions_from_annotaton_ontology(anno_ont,add_non_template_reactions,prioritized_event_list,ontologies,merge_all) + cobra_model.add_reactions(gene_associated_reactions) + return cobra_model + + def build_reactions_from_annotaton_ontology( + self, + anno_ont, + add_non_template_reactions=True, + prioritized_event_list=None, + ontologies=None, + merge_all=False + ): + if self.base_model is None: + raise ModelSEEDError( + "unable to generate metabolic reactions without base model" + ) + + reactions = [] + rxn_gene_hash = anno_ont.get_reaction_gene_hash(prioritized_event_list,ontologies,merge_all) + modelseeddb = ModelSEEDBiochem.get() + for rxn_id in rxn_gene_hash: + reaction = None + template_reaction = None + if rxn_id+"_c" in self.template.reactions: + template_reaction = self.template.reactions.get_by_id(rxn_id+"_c") + elif rxn_id in modelseeddb.reactions: + msrxn = modelseeddb.reactions.get_by_id(rxn_id) + template_reaction = msrxn.to_template_reaction({0:"c",1:"e"}) + if template_reaction: + for m in template_reaction.metabolites: + if m.compartment not in self.compartments: + self.compartments[ + m.compartment + ] = self.template.compartments.get_by_id(m.compartment) + if m.id not in self.template_species_to_model_species: + model_metabolite = m.to_metabolite(self.index) + self.template_species_to_model_species[m.id] = model_metabolite + self.base_model.add_metabolites([model_metabolite]) + reaction = template_reaction.to_reaction(self.base_model, self.index) + gpr = "" + for gene_id in rxn_gene_hash[rxn_id]: + if len(gpr) > 0: + gpr += " or " + gpr += gene_id + reaction.gpr(gpr) + reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" + reactions.append(reaction) + else: + print("Reaction ",rxn_id," not found in template or database!") + + return reactions def build_non_metabolite_reactions( self, cobra_model, allow_all_non_grp_reactions=False @@ -813,6 +884,7 @@ def build( annotate_with_rast=True, biomass_classic=False, biomass_gc=0.5, + add_reaction_from_rast_annotation=True ): """ @@ -850,8 +922,11 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - metabolic_reactions = self.build_metabolic_reactions() - cobra_model.add_reactions(metabolic_reactions) + + if add_reaction_from_rast_annotation: + metabolic_reactions = self.build_metabolic_reactions() + cobra_model.add_reactions(metabolic_reactions) + non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) From cdea5d6fa20302c0cdd87c249f5b523b0c63ebbf Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 8 Aug 2023 12:27:08 -0500 Subject: [PATCH 078/146] Fixing gene hash function --- modelseedpy/core/annotationontology.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py index 6685bb0a..7bb56793 100644 --- a/modelseedpy/core/annotationontology.py +++ b/modelseedpy/core/annotationontology.py @@ -215,14 +215,19 @@ def __init__(self,genome_ref): self.noncodings = {} self.feature_types = {} - def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,type="genes"): + def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False): output = {} - if type == "genes" and len(self.genes) > 0: - for feature_id in self.genes: - output[feature_id] = self.genes[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) - elif len(self.cdss) > 0: - for feature_id in self.cdss: - output[feature_id] = self.cdss[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) + feature_hash = self.genes + if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): + feature_hash = self.cdss + for feature_id in feature_hash: + reactions = feature_hash[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) + for rxn_id in reactions: + if rxn_id not in output: + output[rxn_id] = {} + if feature_id not in output[rxn_id]: + output[rxn_id][feature_id] = [] + output[rxn_id][feature_id].append(reactions[rxn_id]) return output def add_term(self,term_or_id,ontology=None): From 894cee0946b6b77e2c643bb9cc709340adb250c4 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 8 Aug 2023 12:33:19 -0500 Subject: [PATCH 079/146] Fixing add feature function --- modelseedpy/core/annotationontology.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py index 7bb56793..e75a14ca 100644 --- a/modelseedpy/core/annotationontology.py +++ b/modelseedpy/core/annotationontology.py @@ -273,8 +273,6 @@ def add_feature(self,feature_or_id): feature_or_id = AnnotationOntologyFeature(self,feature_or_id) if not feature_hash: feature_hash = self.get_feature_hash(feature_or_id.id) - if feature_or_id.id in feature_hash: - logger.critical("Feature with id "+feature_or_id.id+" already in annotation!") - else: + if feature_or_id.id not in feature_hash: feature_hash[feature_or_id.id] = feature_or_id return feature_hash[feature_or_id.id] From 47de2c8719b676d5c69fb861d00aaa1cb3bf1e08 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 8 Aug 2023 12:45:44 -0500 Subject: [PATCH 080/146] Fixing problem with ModelSEEDDatabase reactions --- modelseedpy/core/msbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 2825fdea..5e80774a 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -774,7 +774,7 @@ def build_reactions_from_annotaton_ontology( if rxn_id+"_c" in self.template.reactions: template_reaction = self.template.reactions.get_by_id(rxn_id+"_c") elif rxn_id in modelseeddb.reactions: - msrxn = modelseeddb.reactions.get_by_id(rxn_id) + msrxn = modelseeddb.reactions[rxn_id] template_reaction = msrxn.to_template_reaction({0:"c",1:"e"}) if template_reaction: for m in template_reaction.metabolites: From 951bce1cb092cf8cb98906bdfed63849c0ef0935 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 10 Aug 2023 07:06:24 -0700 Subject: [PATCH 081/146] Fixing annotation ontology and build from annotation ontology --- modelseedpy/core/annotationontology.py | 59 +++++++++++--- modelseedpy/core/msbuilder.py | 102 +++++++++++++------------ 2 files changed, 102 insertions(+), 59 deletions(-) diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py index e75a14ca..4750ed13 100644 --- a/modelseedpy/core/annotationontology.py +++ b/modelseedpy/core/annotationontology.py @@ -94,17 +94,28 @@ def add_event_term(self,event,term,scores={},ref_entity=None,entity_type=None): self.term_events[term.id] = {} self.term_events[term.id][event.id] = self.event_terms[event.id][term.id] - def get_associated_terms(self,event_list=None,ontologies=None): + def get_associated_terms(self,prioritized_event_list=None,ontologies=None,merge_all=False,translate_to_rast=False): output = {} - for term in self.term_events: + for term_id in self.term_events: + term = self.parent.terms[term_id] if not ontologies or term.ontology.id in ontologies: - for event in self.term_events[term]: - if not event_list or event.id in event_list: - if term.id not in output: - output[term.id] = [] - output[term.id].append(self.term_events[term][event].to_data()) + if merge_all or not prioritized_event_list: + for event_id in self.term_events[term_id]: + if not prioritized_event_list or event_id in prioritized_event_list: + if term not in output: + output[term] = [] + output[term].append(self.term_events[term_id][event_id].to_data()) + else: + for event_id in prioritized_event_list: + if event_id in self.term_events[term_id]: + rxns = self.parent.terms[term_id].msrxns; + if len(rxns) > 0: + if term not in output: + output[term] = [] + output[term].append(self.term_events[term_id][event_id].to_data()) + break return output - + def get_associated_reactions(self,prioritized_event_list=None,ontologies=None,merge_all=False): output = {} for term_id in self.term_events: @@ -196,8 +207,8 @@ class AnnotationOntology: mdlutls = {} @staticmethod - def from_kbase_data(data,genome_ref=None): - self = AnnotationOntology(genome_ref) + def from_kbase_data(data,genome_ref=None,data_dir=None): + self = AnnotationOntology(genome_ref,data_dir) if "feature_types" in data: self.feature_types = data["feature_types"] if "events" in data: @@ -205,15 +216,41 @@ def from_kbase_data(data,genome_ref=None): self.events += [AnnotationOntologyEvent.from_data(event,self)] return self - def __init__(self,genome_ref): + def __init__(self,genome_ref,data_dir): self.genome_ref = genome_ref self.events = DictList() self.terms = {} self.ontologies = {} self.genes = {} self.cdss = {} + self.data_dir = data_dir self.noncodings = {} self.feature_types = {} + self.term_names = {} + + def get_term_name(self,term): + if term.ontology.id not in self.term_names: + self.term_names[term.ontology.id] = {} + if term.ontology.id in ["SSO","AntiSmash","EC","TC","META","RO","KO","GO"]: + with open(self.data_dir + "/"+term.ontology.id+"_dictionary.json") as json_file: + ontology = json.load(json_file) + for item in ontology["term_hash"]: + self.term_names[term.ontology.id][item] = ontology["term_hash"][item]["name"] + if term.id not in self.term_names[term.ontology.id]: + return "Unknown" + return self.term_names[term.ontology.id][term.id] + + def get_gene_term_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False,translate_to_rast=True): + output = {} + feature_hash = self.genes + if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): + feature_hash = self.cdss + for feature_id in feature_hash: + feature = feature_hash[feature_id] + if feature not in output: + output[feature] = {} + output[feature] = feature.get_associated_terms(prioritized_event_list,ontologies,merge_all,translate_to_rast) + return output def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False): output = {} diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 5e80774a..005dc9ee 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -737,68 +737,74 @@ def build_from_annotaton_ontology( anno_ont, index="0", allow_all_non_grp_reactions=False, - annotate_with_rast=True, + annotate_with_rast=False, biomass_classic=False, biomass_gc=0.5, add_non_template_reactions=True, prioritized_event_list=None, ontologies=None, - merge_all=False + merge_all=True, + convert_to_sso=True ): #Build base model without annotation - model_or_id = self.build(model_or_id,index,allow_all_non_grp_reactions,annotate_with_rast,biomass_classic,biomass_gc) - - gene_associated_reactions = self.build_reactions_from_annotaton_ontology(anno_ont,add_non_template_reactions,prioritized_event_list,ontologies,merge_all) - cobra_model.add_reactions(gene_associated_reactions) - return cobra_model - - def build_reactions_from_annotaton_ontology( - self, - anno_ont, - add_non_template_reactions=True, - prioritized_event_list=None, - ontologies=None, - merge_all=False - ): - if self.base_model is None: - raise ModelSEEDError( - "unable to generate metabolic reactions without base model" - ) + self.search_name_to_orginal = {} + self.search_name_to_genes = {} + gene_term_hash = anno_ont.get_gene_term_hash(prioritized_event_list,ontologies,merge_all,convert_to_sso) + residual_reaction_gene_hash = {} + for gene in gene_term_hash: + for term in gene_term_hash[gene]: + if term.ontology.id == "SSO": + name = anno_ont.get_term_name(term) + f_norm = normalize_role(name) + if f_norm not in self.search_name_to_genes: + self.search_name_to_genes[f_norm] = set() + self.search_name_to_orginal[f_norm] = set() + self.search_name_to_orginal[f_norm].add(name) + self.search_name_to_genes[f_norm].add(gene.id) + else: + for rxn_id in term.msrxns: + if rxn_id not in residual_reaction_gene_hash: + residual_reaction_gene_hash[rxn_id] = {} + if gene not in residual_reaction_gene_hash[rxn_id]: + residual_reaction_gene_hash[rxn_id][gene] = [] + residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[gene][term] + model_or_id = self.build(model_or_id,index,allow_all_non_grp_reactions,annotate_with_rast,biomass_classic,biomass_gc) reactions = [] - rxn_gene_hash = anno_ont.get_reaction_gene_hash(prioritized_event_list,ontologies,merge_all) modelseeddb = ModelSEEDBiochem.get() - for rxn_id in rxn_gene_hash: - reaction = None - template_reaction = None - if rxn_id+"_c" in self.template.reactions: - template_reaction = self.template.reactions.get_by_id(rxn_id+"_c") - elif rxn_id in modelseeddb.reactions: - msrxn = modelseeddb.reactions[rxn_id] - template_reaction = msrxn.to_template_reaction({0:"c",1:"e"}) - if template_reaction: - for m in template_reaction.metabolites: - if m.compartment not in self.compartments: - self.compartments[ - m.compartment - ] = self.template.compartments.get_by_id(m.compartment) - if m.id not in self.template_species_to_model_species: - model_metabolite = m.to_metabolite(self.index) - self.template_species_to_model_species[m.id] = model_metabolite - self.base_model.add_metabolites([model_metabolite]) + for rxn_id in residual_reaction_gene_hash: + if rxn_id+"_c0" not in model_or_id.reactions: + reaction = None + template_reaction = None + if rxn_id+"_c" in self.template.reactions: + template_reaction = self.template.reactions.get_by_id(rxn_id+"_c") + elif rxn_id in modelseeddb.reactions: + msrxn = modelseeddb.reactions.get_by_id(rxn_id) + template_reaction = msrxn.to_template_reaction({0:"c",1:"e"}) + if template_reaction: + for m in template_reaction.metabolites: + if m.compartment not in self.compartments: + self.compartments[ + m.compartment + ] = self.template.compartments.get_by_id(m.compartment) + if m.id not in self.template_species_to_model_species: + model_metabolite = m.to_metabolite(self.index) + self.template_species_to_model_species[m.id] = model_metabolite + self.base_model.add_metabolites([model_metabolite]) reaction = template_reaction.to_reaction(self.base_model, self.index) - gpr = "" - for gene_id in rxn_gene_hash[rxn_id]: - if len(gpr) > 0: - gpr += " or " - gpr += gene_id - reaction.gpr(gpr) - reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" - reactions.append(reaction) + gpr = "" + for gene in residual_reaction_gene_hash[rxn_id]: + if len(gpr) > 0: + gpr += " or " + gpr += gene.id + reaction.gene_reaction_rule = gpr + reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" + reactions.append(reaction) else: print("Reaction ",rxn_id," not found in template or database!") - return reactions + model_or_id.add_reactions(reactions) + return model_or_id def build_non_metabolite_reactions( self, cobra_model, allow_all_non_grp_reactions=False From be8dcd1778c2bead9b509fd735b41a89d427ec88 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 10 Aug 2023 08:23:00 -0700 Subject: [PATCH 082/146] Correcting message output in ontology builder --- modelseedpy/core/msbuilder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 005dc9ee..2c2ffc9d 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -800,8 +800,8 @@ def build_from_annotaton_ontology( reaction.gene_reaction_rule = gpr reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" reactions.append(reaction) - else: - print("Reaction ",rxn_id," not found in template or database!") + if not reaction: + print("Reaction ",rxn_id," not found in template or database!") model_or_id.add_reactions(reactions) return model_or_id From 0af0299b5708e06005e574ae626260d8e3aee07a Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 17 Aug 2023 14:32:43 -0500 Subject: [PATCH 083/146] Checking in first draft of tempalte --- modelseedpy/data/ModelReportTemplate.html | 138 ++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 modelseedpy/data/ModelReportTemplate.html diff --git a/modelseedpy/data/ModelReportTemplate.html b/modelseedpy/data/ModelReportTemplate.html new file mode 100644 index 00000000..ca506882 --- /dev/null +++ b/modelseedpy/data/ModelReportTemplate.html @@ -0,0 +1,138 @@ + +
+ModelSEED Reconstruction + + + +
+ +
+ + + + + + + + \ No newline at end of file From 58cc3c2f6ecc0cbfc0aa6d3eac83e49c4b71ae56 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 17 Aug 2023 14:35:36 -0500 Subject: [PATCH 084/146] Fixing template --- modelseedpy/data/ModelReportTemplate.html | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modelseedpy/data/ModelReportTemplate.html b/modelseedpy/data/ModelReportTemplate.html index ca506882..738eb5e6 100644 --- a/modelseedpy/data/ModelReportTemplate.html +++ b/modelseedpy/data/ModelReportTemplate.html @@ -89,8 +89,7 @@ From de30b92e605b4f5ac6eaf4f03f854b5e318b252a Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 23 Aug 2023 23:30:16 -0500 Subject: [PATCH 086/146] Adding probabilities for ensemble models --- modelseedpy/core/msbuilder.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 2c2ffc9d..8879cf5e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -770,6 +770,20 @@ def build_from_annotaton_ontology( residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[gene][term] model_or_id = self.build(model_or_id,index,allow_all_non_grp_reactions,annotate_with_rast,biomass_classic,biomass_gc) + for rxn in model_or_id.reactions: + probability = None + for gene in rxn.genes(): + annoont_gene = anno_ont.get_feature(gene.id) + if annoont_gene and annoont_gene in gene_term_hash: + for term in gene_term_hash[annoont_gene]: + if rxn.id[0:-3] in term.msrxns: + for item in gene_term_hash[gene][term]: + if "probability" in item.scores: + if not probability or item.scores["probability"] > probability: + probability = item.scores["probability"] + if hasattr(rxn, "probability"): + rxn.probability = probability + reactions = [] modelseeddb = ModelSEEDBiochem.get() for rxn_id in residual_reaction_gene_hash: @@ -793,10 +807,17 @@ def build_from_annotaton_ontology( self.base_model.add_metabolites([model_metabolite]) reaction = template_reaction.to_reaction(self.base_model, self.index) gpr = "" + probability = None for gene in residual_reaction_gene_hash[rxn_id]: + for item in residual_reaction_gene_hash[rxn_id][gene]: + if "probability" in item["scores"]: + if not probability or item["scores"]["probability"] > probability + probability = item["scores"]["probability"] if len(gpr) > 0: gpr += " or " gpr += gene.id + if hasattr(rxn, "probability"): + reaction.probability = probability reaction.gene_reaction_rule = gpr reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" reactions.append(reaction) From 6ee54a2c7b6381b7e9c89c7d27ef95fa4179e1ee Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 23 Aug 2023 23:49:56 -0500 Subject: [PATCH 087/146] Fixing bug --- modelseedpy/core/msbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 8879cf5e..9953f2a9 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -811,7 +811,7 @@ def build_from_annotaton_ontology( for gene in residual_reaction_gene_hash[rxn_id]: for item in residual_reaction_gene_hash[rxn_id][gene]: if "probability" in item["scores"]: - if not probability or item["scores"]["probability"] > probability + if not probability or item["scores"]["probability"] > probability: probability = item["scores"]["probability"] if len(gpr) > 0: gpr += " or " From 9c2cff13cba1ae2ffdd3b09da843f3aa49f4fb94 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 24 Aug 2023 17:21:50 -0500 Subject: [PATCH 088/146] Fixing bug in report when gapfilling isn't run --- modelseedpy/core/msmodelreport.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index e7c8b591..2a635dc0 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -34,14 +34,22 @@ def build_report( # Process the data attributes = modelutl.get_attributes() + gf_filter_data = {} + selected_media_data = {} + core_atp_gapfilling_data = {} - selected_media_data = attributes['ATP_analysis']['selected_media'] - core_atp_gapfilling_data = attributes['ATP_analysis']['core_atp_gapfilling'] - gf_filter_data = attributes['gf_filter'] gf_sensitivity_data = attributes.get('gf_sensitivity') # Get 'gf_sensitivity_data' if available, otherwise it will be None number_gapfills = 0 if gf_sensitivity_data: number_gapfills = len(gf_sensitivity_data) + if 'ATP_analysis' in attributes: + if 'selected_media' in attributes['ATP_analysis']: + selected_media_data = attributes['ATP_analysis']['selected_media'] + if 'core_atp_gapfilling' in attributes['ATP_analysis']: + core_atp_gapfilling_data = attributes['ATP_analysis']['core_atp_gapfilling'] + gf_filter_data = attributes['gf_filter'] + if 'gf_filter' in attributes: + gf_filter_data = attributes['gf_filter'] # Get the names of 'Core Gapfilling Media' and 'Gapfilling Media' core_gapfilling_media = [media for media, media_data in (gf_sensitivity_data or {}).items() if 'rxn00062_c0' in media_data] From 645be000d1e8e3792cd2e2ea4b34b59064e6b212 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 24 Aug 2023 22:27:13 +0000 Subject: [PATCH 089/146] Updates to report code --- modelseedpy/core/msmodelreport.py | 139 ++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index e7c8b591..10510a96 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -3,8 +3,10 @@ import logging import matplotlib.cm as cm import os +import jinja2 from os.path import dirname, exists from modelseedpy.core.msmodelutl import MSModelUtil +module_path = dirname(os.path.abspath(__file__)) logger = logging.getLogger(__name__) logger.setLevel( @@ -17,6 +19,143 @@ def __init__( ): pass + def build_multitab_report(self, model_or_mdlutl, output_path): + + # Helper function for extracting gapfilling data + def extract_gapfilling_data(gf_sensitivity, model): + gapfilling_entries = [] + + if not gf_sensitivity: + return [] + + for media, media_data in gf_sensitivity.items(): + for target, target_data in media_data.items(): + for reaction_id, reaction_data in target_data.get('success', {}).items(): + for direction, metabolites in reaction_data.items(): + entry = { + "reaction_id": reaction_id, + "reaction_name": model.reactions.get_by_id(reaction_id).name if reaction_id in model.reactions else reaction_id, + "media": media, + "direction": direction, + "target": target, + "gapfilling_sensitivity": "; ".join(metabolites) if isinstance(metabolites, (list, tuple)) else str(metabolites) + } + gapfilling_entries.append(entry) + + return gapfilling_entries + + context = { + "overview": [{"Model": "Model 3", "Genome": "Genome C"}, {"Model": "Model 4", "Genome": "Genome D"}], + "reactions": [], + "compounds": [], + "genes": [], + "biomass": [], + "gapfilling": [], + "atpanalysis": [{'no_of_gapfilled_reactions': 5, 'media': 'LB', 'atp_production': 'High', 'gapfilled_reactions': 'R005; R006', 'reversed_reaction_by_gapfilling': 'R007', 'filtered_reactions': 'R008; R009'}], + } + + print("Module Path:", module_path + "/../data/") + + exchanges = {r.id for r in model_or_mdlutl.exchanges} + + # Identify biomass reactions using SBO annotation + biomass_reactions_ids = {rxn.id for rxn in model_or_mdlutl.reactions if rxn.annotation.get('sbo') == 'SBO:0000629'} + + # Reactions Tab + for rxn in model_or_mdlutl.reactions: + if rxn.id not in exchanges and rxn.id not in biomass_reactions_ids: + equation = rxn.build_reaction_string(use_metabolite_names=True) + rxn_data = { + "id": rxn.id, + "name": rxn.name, + "equation": equation, + "genes": rxn.gene_reaction_rule, + "gapfilling": "TBD" + } + context["reactions"].append(rxn_data) + + # Compounds Tab + for cpd in model_or_mdlutl.metabolites: + cpd_data = { + "id": cpd.id, + "name": cpd.name, + "formula": cpd.formula, + "charge": cpd.charge, + "compartment": cpd.compartment + } + context["compounds"].append(cpd_data) + + # Genes Tab + for gene in model_or_mdlutl.genes: + gene_data = { + "gene": gene.id, + "reactions": "; ".join([rxn.id for rxn in gene.reactions]) + } + context["genes"].append(gene_data) + + # Biomass Tab + if biomass_reactions_ids: + for biomass_rxn_id in biomass_reactions_ids: + biomass_rxn = model_or_mdlutl.reactions.get_by_id(biomass_rxn_id) + for metabolite, coefficient in biomass_rxn.metabolites.items(): + compound_id = metabolite.id + compound_name = metabolite.name.split('_')[0] + compartment = compound_id.split('_')[-1] + + biomass_data = { + "biomass_reaction_id": biomass_rxn.id, + "biomass_compound_id": compound_id, + "name": compound_name, + "coefficient": coefficient, + "compartment": compartment + } + context["biomass"].append(biomass_data) + else: + print("No biomass reactions found in the model.") + + # Gapfilling Tab + gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) + gapfilling_data = extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + context["gapfilling"] = gapfilling_data + + # Diagnostics + unique_biomass_rxns = biomass_reactions_ids + print(f"Unique biomass reactions identified: {len(unique_biomass_rxns)}") + print(f"Biomass Reaction IDs: {', '.join(unique_biomass_rxns)}") + + print("\nFirst 2 reactions:") + for rxn in context["reactions"][:2]: + print(rxn) + + print("\nFirst 2 compounds:") + for cpd in context["compounds"][:2]: + print(cpd) + + print("\nFirst 2 genes:") + for gene in context["genes"][:2]: + print(gene) + + print("\nFirst 2 biomass compounds:") + for bm in context["biomass"][:2]: + print(bm) + + print("\nFirst 2 gapfilling entries:") + for gf in context["gapfilling"][:2]: + print(gf) + + # Render with template + env = jinja2.Environment( + loader=jinja2.FileSystemLoader(module_path + "/../data/"), + autoescape=jinja2.select_autoescape(['html', 'xml']) + ) + html = env.get_template("ModelReportTemplate.html").render(context) + directory = dirname(output_path) + os.makedirs(directory, exist_ok=True) + with open(output_path, 'w') as f: + f.write(html) + + + def build_report( self, model_or_mdlutl, From ec198d8d8fb606ee06e8b8fef20cb9ade6a08c6d Mon Sep 17 00:00:00 2001 From: jplfaria Date: Fri, 25 Aug 2023 16:19:34 +0000 Subject: [PATCH 090/146] adding multi tab model report --- modelseedpy/core/msmodelreport.py | 137 ++++++- modelseedpy/data/ModelReportTemplate.html | 476 ++++++++++++++++------ 2 files changed, 469 insertions(+), 144 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index b4a73815..e7521dcc 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -23,35 +23,135 @@ def build_multitab_report(self, model_or_mdlutl, output_path): # Helper function for extracting gapfilling data def extract_gapfilling_data(gf_sensitivity, model): - gapfilling_entries = [] + if gf_sensitivity is None: + return [], {} - if not gf_sensitivity: - return [] + gapfilling_dict = {} + gapfilling_summary = {} for media, media_data in gf_sensitivity.items(): for target, target_data in media_data.items(): for reaction_id, reaction_data in target_data.get('success', {}).items(): for direction, metabolites in reaction_data.items(): + # If metabolites is None, set to empty string + if metabolites is None: + metabolites = "" + + # Extract both IDs and Names for Gapfilling Sensitivity + sensitivity_ids = [] + sensitivity_names = [] + if isinstance(metabolites, (list, tuple)): + for met_id in metabolites: + sensitivity_ids.append(met_id) + met_name = model.metabolites.get_by_id(met_id).name if met_id in model.metabolites else met_id + sensitivity_names.append(met_name) + else: + metabolites = str(metabolites) entry = { "reaction_id": reaction_id, "reaction_name": model.reactions.get_by_id(reaction_id).name if reaction_id in model.reactions else reaction_id, "media": media, "direction": direction, "target": target, - "gapfilling_sensitivity": "; ".join(metabolites) if isinstance(metabolites, (list, tuple)) else str(metabolites) + "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, + "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites } - gapfilling_entries.append(entry) + + # Update the summary dictionary + if reaction_id not in gapfilling_summary: + gapfilling_summary[reaction_id] = [] + gapfilling_summary[reaction_id].append(f"{media}: {direction}") + + # Check if reaction_id is already in dictionary + if reaction_id in gapfilling_dict: + # Update the media + existing_entry = gapfilling_dict[reaction_id] + existing_media = existing_entry["media"].split("; ") + if media not in existing_media: + existing_media.append(media) + existing_entry["media"] = "; ".join(existing_media) + else: + gapfilling_dict[reaction_id] = entry + + return list(gapfilling_dict.values()), gapfilling_summary + + # Extract ATP analysis data + def extract_atp_analysis_data(atp_analysis, atp_expansion_filter): + entries = [] + if atp_analysis and 'core_atp_gapfilling' in atp_analysis: + for media, data in atp_analysis['core_atp_gapfilling'].items(): + score = data.get('score', None) + new_reactions = ["{}: {}".format(k, v) for k, v in data.get('new', {}).items()] + reversed_reactions = ["{}: {}".format(k, v) for k, v in data.get('reversed', {}).items()] + + # Extracting the "Filtered Reactions" in the required format + filtered_reactions = [] + for k, v in atp_expansion_filter.get(media, {}).items(): + if isinstance(v, dict): + for sub_k, sub_v in v.items(): + if isinstance(sub_v, dict): + for reaction, direction_dict in sub_v.items(): + direction = list(direction_dict.keys())[0] + filtered_reactions.append(f"{reaction}: {direction}") + filtered_reactions_str = "; ".join(filtered_reactions) + + if score is not None: + entries.append({ + 'media': media, + 'no_of_gapfilled_reactions': score, + 'gapfilled_reactions': "; ".join(new_reactions), + 'reversed_reaction_by_gapfilling': "; ".join(reversed_reactions), + 'filtered_reactions': filtered_reactions_str + }) + # Sorting the entries based on the 'no_of_gapfilled_reactions' column + entries.sort(key=lambda x: x['no_of_gapfilled_reactions']) + return entries - return gapfilling_entries + # Extract ATP production data for the ATP Analysis tab + def extract_atp_production_data(atp_analysis): + atp_production_dict = {} + if atp_analysis: + selected_media = atp_analysis.get('selected_media', {}) + core_atp_gapfilling = atp_analysis.get('core_atp_gapfilling', {}) + # First, process selected_media + for media, value in selected_media.items(): + atp_production_dict[media] = round(value, 2) + + # Next, process core_atp_gapfilling for media not in selected_media + for media, data in core_atp_gapfilling.items(): + if media not in atp_production_dict: + if data.get('failed'): + atp_production_dict[media] = 'failed' + else: + # If the media was not processed in selected_media and it's not failed, set as 'Not Integrated' + atp_production_dict[media] = 'Not Integrated' + + return atp_production_dict + + # Get gf_sensitivity attribute from the model + gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) + + # Extract gapfilling data + gapfilling_entries, gapfilling_reaction_summary = extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + + # Check if ATP_analysis attribute is present in the model + atp_analysis = model_or_mdlutl.attributes.get('ATP_analysis', None) + if atp_analysis: + atp_expansion_filter = model_or_mdlutl.attributes.get('atp_expansion_filter', {}) + atp_analysis_entries = extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + else: + atp_analysis_entries = [] + + # Initialize context dictionary context = { - "overview": [{"Model": "Model 3", "Genome": "Genome C"}, {"Model": "Model 4", "Genome": "Genome D"}], + "overview": [{"Model": "Model 5", "Genome": "Genome C"}, {"Model": "Model 5", "Genome": "Genome D"}], "reactions": [], "compounds": [], "genes": [], "biomass": [], - "gapfilling": [], - "atpanalysis": [{'no_of_gapfilled_reactions': 5, 'media': 'LB', 'atp_production': 'High', 'gapfilled_reactions': 'R005; R006', 'reversed_reaction_by_gapfilling': 'R007', 'filtered_reactions': 'R008; R009'}], + "gapfilling": gapfilling_entries, # Populated with gapfilling data + "atpanalysis": atp_analysis_entries # Populated with ATP analysis data } print("Module Path:", module_path + "/../data/") @@ -70,10 +170,11 @@ def extract_gapfilling_data(gf_sensitivity, model): "name": rxn.name, "equation": equation, "genes": rxn.gene_reaction_rule, - "gapfilling": "TBD" + "gapfilling": "; ".join(gapfilling_reaction_summary.get(rxn.id, [])) # Empty list results in an empty string } context["reactions"].append(rxn_data) + # Compounds Tab for cpd in model_or_mdlutl.metabolites: cpd_data = { @@ -116,7 +217,15 @@ def extract_gapfilling_data(gf_sensitivity, model): # Gapfilling Tab gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) gapfilling_data = extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) - context["gapfilling"] = gapfilling_data + context["gapfilling"] = gapfilling_entries + + # Extract ATP Production Data + atp_production_data = extract_atp_production_data(atp_analysis) + + # Populate the 'atpanalysis' context with ATP production data + for entry in context["atpanalysis"]: + media = entry['media'] + entry['atp_production'] = atp_production_data.get(media, None) # Diagnostics unique_biomass_rxns = biomass_reactions_ids @@ -142,6 +251,10 @@ def extract_gapfilling_data(gf_sensitivity, model): print("\nFirst 2 gapfilling entries:") for gf in context["gapfilling"][:2]: print(gf) + + print("\nFirst 2 ATP Analysis entries:") + for entry in context["atpanalysis"][:2]: + print(entry) # Render with template env = jinja2.Environment( @@ -153,8 +266,6 @@ def extract_gapfilling_data(gf_sensitivity, model): os.makedirs(directory, exist_ok=True) with open(output_path, 'w') as f: f.write(html) - - def build_report( self, diff --git a/modelseedpy/data/ModelReportTemplate.html b/modelseedpy/data/ModelReportTemplate.html index 6ae79ea0..bd0a1c1c 100644 --- a/modelseedpy/data/ModelReportTemplate.html +++ b/modelseedpy/data/ModelReportTemplate.html @@ -1,135 +1,349 @@ + -
-ModelSEED Reconstruction - - - -
+ + + ModelSEED Reconstruction + + + + + -
- - +
+ + - - - - + \ No newline at end of file From 61ecda1c30f5e204125c8f62b2c7825e300e1321 Mon Sep 17 00:00:00 2001 From: jplfaria Date: Mon, 28 Aug 2023 19:46:33 +0000 Subject: [PATCH 091/146] changes to multitab report and redoing gapfilling and atp analysis report --- modelseedpy/core/msmodelreport.py | 611 ++++++++++------------ modelseedpy/data/ModelReportTemplate.html | 2 +- 2 files changed, 278 insertions(+), 335 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index e7521dcc..c5274fcd 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- import pandas as pd import logging -import matplotlib.cm as cm import os +import re import jinja2 -from os.path import dirname, exists +from os.path import dirname +from pandas.io.formats.style import Styler from modelseedpy.core.msmodelutl import MSModelUtil + module_path = dirname(os.path.abspath(__file__)) logger = logging.getLogger(__name__) @@ -19,133 +21,202 @@ def __init__( ): pass - def build_multitab_report(self, model_or_mdlutl, output_path): - - # Helper function for extracting gapfilling data - def extract_gapfilling_data(gf_sensitivity, model): - if gf_sensitivity is None: - return [], {} - - gapfilling_dict = {} - gapfilling_summary = {} - - for media, media_data in gf_sensitivity.items(): - for target, target_data in media_data.items(): - for reaction_id, reaction_data in target_data.get('success', {}).items(): - for direction, metabolites in reaction_data.items(): - # If metabolites is None, set to empty string - if metabolites is None: - metabolites = "" - - # Extract both IDs and Names for Gapfilling Sensitivity - sensitivity_ids = [] - sensitivity_names = [] - if isinstance(metabolites, (list, tuple)): - for met_id in metabolites: - sensitivity_ids.append(met_id) - met_name = model.metabolites.get_by_id(met_id).name if met_id in model.metabolites else met_id - sensitivity_names.append(met_name) - else: - metabolites = str(metabolites) - entry = { - "reaction_id": reaction_id, - "reaction_name": model.reactions.get_by_id(reaction_id).name if reaction_id in model.reactions else reaction_id, - "media": media, - "direction": direction, - "target": target, - "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, - "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites - } - - # Update the summary dictionary - if reaction_id not in gapfilling_summary: - gapfilling_summary[reaction_id] = [] - gapfilling_summary[reaction_id].append(f"{media}: {direction}") - - # Check if reaction_id is already in dictionary - if reaction_id in gapfilling_dict: - # Update the media - existing_entry = gapfilling_dict[reaction_id] - existing_media = existing_entry["media"].split("; ") - if media not in existing_media: - existing_media.append(media) - existing_entry["media"] = "; ".join(existing_media) - else: - gapfilling_dict[reaction_id] = entry - - return list(gapfilling_dict.values()), gapfilling_summary - - # Extract ATP analysis data - def extract_atp_analysis_data(atp_analysis, atp_expansion_filter): - entries = [] - if atp_analysis and 'core_atp_gapfilling' in atp_analysis: - for media, data in atp_analysis['core_atp_gapfilling'].items(): - score = data.get('score', None) - new_reactions = ["{}: {}".format(k, v) for k, v in data.get('new', {}).items()] - reversed_reactions = ["{}: {}".format(k, v) for k, v in data.get('reversed', {}).items()] - - # Extracting the "Filtered Reactions" in the required format - filtered_reactions = [] - for k, v in atp_expansion_filter.get(media, {}).items(): - if isinstance(v, dict): - for sub_k, sub_v in v.items(): - if isinstance(sub_v, dict): - for reaction, direction_dict in sub_v.items(): - direction = list(direction_dict.keys())[0] - filtered_reactions.append(f"{reaction}: {direction}") - filtered_reactions_str = "; ".join(filtered_reactions) - - if score is not None: - entries.append({ - 'media': media, - 'no_of_gapfilled_reactions': score, - 'gapfilled_reactions': "; ".join(new_reactions), - 'reversed_reaction_by_gapfilling': "; ".join(reversed_reactions), - 'filtered_reactions': filtered_reactions_str - }) - # Sorting the entries based on the 'no_of_gapfilled_reactions' column - entries.sort(key=lambda x: x['no_of_gapfilled_reactions']) - return entries - - # Extract ATP production data for the ATP Analysis tab - def extract_atp_production_data(atp_analysis): - atp_production_dict = {} - if atp_analysis: - selected_media = atp_analysis.get('selected_media', {}) - core_atp_gapfilling = atp_analysis.get('core_atp_gapfilling', {}) - - # First, process selected_media - for media, value in selected_media.items(): - atp_production_dict[media] = round(value, 2) - - # Next, process core_atp_gapfilling for media not in selected_media - for media, data in core_atp_gapfilling.items(): - if media not in atp_production_dict: - if data.get('failed'): - atp_production_dict[media] = 'failed' + def generate_reports(self, model, report_path, multi_tab_report_path): + self.build_report(model, report_path) + self.build_multitab_report(model, multi_tab_report_path) + + # Helper function to build overview data + def build_overview_data(self, model): + # Get the number of compartments + number_compartments = len(set([metabolite.compartment for metabolite in model.metabolites])) + + # Extract gapfilling information + gapfillings_str = model.notes.get('kbase_gapfillings', '[]') + pattern = r"\{.*?\}" + gapfilling_matches = re.findall(pattern, gapfillings_str) + gapfillings = [eval(gapfilling.replace('false', 'False').replace('true', 'True').replace('null', 'None')) for gapfilling in gapfilling_matches] + + core_gapfilling_media = [] + gapfilling_media = [] + + for gapfilling in gapfillings: + media_name = gapfilling.get('id', '').replace('ATP-', '') + target = gapfilling.get('target', '') + + if target == "rxn00062_c0": + core_gapfilling_media.append(media_name) + elif target.startswith('bio'): + gapfilling_media.append(media_name) + + # Count the number of gapfills + number_gapfills = gapfillings_str.count('"media_ref"') + + # Convert the lists to strings + core_gapfilling_str = "; ".join(core_gapfilling_media) if core_gapfilling_media else "No core gapfilling data found!" + gapfilling_media_str = "; ".join(gapfilling_media) if gapfilling_media else "No genome-scale gapfilling data found!" + + overview = { + 'Model ID': model.id, + 'Full Gapfilling and ATP Analysis Report': 'TBD', # You may replace 'TBD' with actual data when available + 'Genome Scale Template': model.notes.get('kbase_template_refs', 'Data Not Available'), + 'Core Gapfilling Media': core_gapfilling_str, + 'Gapfilling Media': gapfilling_media_str, + 'Source Genome': model.notes.get('kbase_genome_ref', 'Data Not Available'), + 'Total Number of reactions': len(model.reactions), + 'Number compounds': len(model.metabolites), + 'Number compartments': number_compartments, + 'Number biomass': len([rxn for rxn in model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629']), + 'Number gapfills': number_gapfills + } + return overview + + # Helper function for extracting gapfilling data + def extract_gapfilling_data(self, gf_sensitivity, model): + if gf_sensitivity is None: + return [], {} + + gapfilling_dict = {} + gapfilling_summary = {} + + for media, media_data in gf_sensitivity.items(): + for target, target_data in media_data.items(): + for reaction_id, reaction_data in target_data.get('success', {}).items(): + for direction, metabolites in reaction_data.items(): + # If metabolites is None, set to empty string + if metabolites is None: + metabolites = "" + + # Extract both IDs and Names for Gapfilling Sensitivity + sensitivity_ids = [] + sensitivity_names = [] + if isinstance(metabolites, (list, tuple)): + for met_id in metabolites: + sensitivity_ids.append(met_id) + met_name = model.metabolites.get_by_id(met_id).name if met_id in model.metabolites else met_id + sensitivity_names.append(met_name) else: - # If the media was not processed in selected_media and it's not failed, set as 'Not Integrated' - atp_production_dict[media] = 'Not Integrated' - - return atp_production_dict + metabolites = str(metabolites) + entry = { + "reaction_id": reaction_id, + "reaction_name": model.reactions.get_by_id(reaction_id).name if reaction_id in model.reactions else reaction_id, + "media": media, + "direction": direction, + "target": target, + "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, + "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites + } + + # Update the summary dictionary + if reaction_id not in gapfilling_summary: + gapfilling_summary[reaction_id] = [] + gapfilling_summary[reaction_id].append(f"{media}: {direction}") + + # Check if reaction_id is already in dictionary + if reaction_id in gapfilling_dict: + # Update the media + existing_entry = gapfilling_dict[reaction_id] + existing_media = existing_entry["media"].split("; ") + if media not in existing_media: + existing_media.append(media) + existing_entry["media"] = "; ".join(existing_media) + else: + gapfilling_dict[reaction_id] = entry + + return list(gapfilling_dict.values()), gapfilling_summary + + #transform data to be used in tabular format to use in build_model_report + def transform_gapfilling_data(self, gapfilling_data): + transformed_data = [] + for entry in gapfilling_data: + row = [ + entry["reaction_id"], + entry["reaction_name"], + entry["media"], + entry["direction"], + entry["target"], + entry["gapfilling_sensitivity_id"], + entry["gapfilling_sensitivity_name"] + ] + transformed_data.append(row) + return transformed_data + + + # Extract ATP analysis data + def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): + entries = [] + if atp_analysis and 'core_atp_gapfilling' in atp_analysis: + for media, data in atp_analysis['core_atp_gapfilling'].items(): + score = data.get('score', None) + new_reactions = ["{}: {}".format(k, v) for k, v in data.get('new', {}).items()] + reversed_reactions = ["{}: {}".format(k, v) for k, v in data.get('reversed', {}).items()] + + # Extracting the "Filtered Reactions" in the required format + filtered_reactions = [] + for k, v in atp_expansion_filter.get(media, {}).items(): + if isinstance(v, dict): + for sub_k, sub_v in v.items(): + if isinstance(sub_v, dict): + for reaction, direction_dict in sub_v.items(): + direction = list(direction_dict.keys())[0] + filtered_reactions.append(f"{reaction}: {direction}") + filtered_reactions_str = "; ".join(filtered_reactions) + + if score is not None: + entries.append({ + 'media': media, + 'no_of_gapfilled_reactions': score, + 'gapfilled_reactions': "; ".join(new_reactions), + 'reversed_reaction_by_gapfilling': "; ".join(reversed_reactions), + 'filtered_reactions': filtered_reactions_str + }) + # Sorting the entries based on the 'no_of_gapfilled_reactions' column + entries.sort(key=lambda x: x['no_of_gapfilled_reactions']) + return entries + + # Extract ATP production data for the ATP Analysis tab + def extract_atp_production_data(self, atp_analysis): + atp_production_dict = {} + if atp_analysis: + selected_media = atp_analysis.get('selected_media', {}) + core_atp_gapfilling = atp_analysis.get('core_atp_gapfilling', {}) + + # First, process selected_media + for media, value in selected_media.items(): + atp_production_dict[media] = round(value, 2) + + # Next, process core_atp_gapfilling for media not in selected_media + for media, data in core_atp_gapfilling.items(): + if media not in atp_production_dict: + if data.get('failed'): + atp_production_dict[media] = 'failed' + else: + # If the media was not processed in selected_media and it's not failed, set as 'Not Integrated' + atp_production_dict[media] = 'Not Integrated' + + return atp_production_dict + + def build_multitab_report(self, model_or_mdlutl, output_path): + + # Build overview data + overview_data = self.build_overview_data(model_or_mdlutl) # Get gf_sensitivity attribute from the model gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) # Extract gapfilling data - gapfilling_entries, gapfilling_reaction_summary = extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) # Check if ATP_analysis attribute is present in the model atp_analysis = model_or_mdlutl.attributes.get('ATP_analysis', None) if atp_analysis: atp_expansion_filter = model_or_mdlutl.attributes.get('atp_expansion_filter', {}) - atp_analysis_entries = extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) else: atp_analysis_entries = [] # Initialize context dictionary context = { - "overview": [{"Model": "Model 5", "Genome": "Genome C"}, {"Model": "Model 5", "Genome": "Genome D"}], + "overview": overview_data, "reactions": [], "compounds": [], "genes": [], @@ -216,11 +287,11 @@ def extract_atp_production_data(atp_analysis): # Gapfilling Tab gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) - gapfilling_data = extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) context["gapfilling"] = gapfilling_entries # Extract ATP Production Data - atp_production_data = extract_atp_production_data(atp_analysis) + atp_production_data = self.extract_atp_production_data(atp_analysis) # Populate the 'atpanalysis' context with ATP production data for entry in context["atpanalysis"]: @@ -267,171 +338,27 @@ def extract_atp_production_data(atp_analysis): with open(output_path, 'w') as f: f.write(html) - def build_report( - self, - model_or_mdlutl, - output_path - ): - """Builds model HTML report + + def build_report(self, model, output_path): + """Builds model HTML report for the Model Summary table Parameters ---------- - model_or_modelutl : Model | MSModelUtl - Model to use to run the simulations + model : cobra.Model + Model to use to build the report """ - modelutl = model_or_mdlutl - if not isinstance(model_or_mdlutl, MSModelUtil): - modelutl = MSModelUtil.get(model_or_mdlutl) - - # Process the data - attributes = modelutl.get_attributes() - gf_filter_data = {} - selected_media_data = {} - core_atp_gapfilling_data = {} - - gf_sensitivity_data = attributes.get('gf_sensitivity') # Get 'gf_sensitivity_data' if available, otherwise it will be None - number_gapfills = 0 - if gf_sensitivity_data: - number_gapfills = len(gf_sensitivity_data) - if 'ATP_analysis' in attributes: - if 'selected_media' in attributes['ATP_analysis']: - selected_media_data = attributes['ATP_analysis']['selected_media'] - if 'core_atp_gapfilling' in attributes['ATP_analysis']: - core_atp_gapfilling_data = attributes['ATP_analysis']['core_atp_gapfilling'] - gf_filter_data = attributes['gf_filter'] - if 'gf_filter' in attributes: - gf_filter_data = attributes['gf_filter'] - - # Get the names of 'Core Gapfilling Media' and 'Gapfilling Media' - core_gapfilling_media = [media for media, media_data in (gf_sensitivity_data or {}).items() if 'rxn00062_c0' in media_data] - gapfilling_media = [media for media, media_data in (gf_sensitivity_data or {}).items() if 'bio1' in media_data] - core_gapfilling_media_text = ', '.join(core_gapfilling_media) - gapfilling_media_text = ', '.join(gapfilling_media) - - bio_count = 0 - for rxn in modelutl.model.reactions: - if rxn.id[0:3] == "bio": - bio_count += 1 - - # Create the Model Summary table data - model_summary_data = [ - ('Model ID', modelutl.wsid), - ('Genome Scale Template', modelutl.model.template_ref), - #('Core Template', modelutl.model.core_template_ref), - ('Core Gapfilling Media', core_gapfilling_media_text), - ('Gapfilling Media', gapfilling_media_text), - ('Source Genome',modelutl.model.name), - ('Total Number of reactions', str(len(modelutl.model.reactions))), - # ('Number of reactions in Core', 'TBD - attributes require changes things to support this'), - # ('Number of reactions in Genome Scale', 'TBD - attributes require changes things to support this'), - ('Number compounds', str(len(modelutl.model.metabolites))), - ('Number compartments', str(len(modelutl.model.compartments))), - ('Number biomass', str(bio_count)), - ('Number gapfills', str(number_gapfills)), - ] - - # Create the DataFrame for Model Summary - model_summary_df = pd.DataFrame(model_summary_data, columns=['', '']) - - # Process core_atp_gapfilling_data and gf_filter_data into a list of dictionaries - gapfilling_list = [] - for media in core_atp_gapfilling_data: - core_atp_gapfilling_media = core_atp_gapfilling_data[media] - row = { - 'no of gapfilled reactions': int(core_atp_gapfilling_media['score']), - 'media': media, - 'ATP Production': f"{round(selected_media_data.get(media, 0), 2):.2f}" if media in selected_media_data else '', - 'gapfilled reactions': '', - 'reversed reaction by gapfilling': '', - 'Filtered Reactions': '', - } - if 'new' in core_atp_gapfilling_media: - gapfilled_reactions = core_atp_gapfilling_media['new'] - if gapfilled_reactions: - reactions = [f'{rxn} : {direction}' if not rxn.startswith("EX") else f'EX_{rxn} : {direction}' for rxn, direction in gapfilled_reactions.items()] - row['gapfilled reactions'] = ' | '.join(reactions) - if 'failed' in core_atp_gapfilling_media and core_atp_gapfilling_media['failed']: - row['gapfilled reactions'] = 'Failed' - if 'reversed' in core_atp_gapfilling_media: - reversed_reactions = core_atp_gapfilling_media['reversed'] - if reversed_reactions: - reactions = [f'{rxn} : {direction}' if not rxn.startswith("EX") else f'EX_{rxn} : {direction}' for rxn, direction in reversed_reactions.items()] - row['reversed reaction by gapfilling'] = ' | '.join(reactions) - if media in gf_filter_data: - gf_filter_media_data = gf_filter_data[media] - atp_production_values = list(gf_filter_media_data.values()) - if atp_production_values: - atp_prod_reaction_pairs = list(atp_production_values[0].items()) - if atp_prod_reaction_pairs: - _, reactions = atp_prod_reaction_pairs[0] - if reactions: - filtered_reactions = ' | '.join([f'{rxn} : {list(value.keys())[0]}' if not rxn.startswith("EX") else f'EX_{rxn} : {list(value.keys())[0]}' for rxn, value in reactions.items()]) - row['Filtered Reactions'] = filtered_reactions if filtered_reactions else '' - if not row['reversed reaction by gapfilling']: - row['reversed reaction by gapfilling'] = '' - gapfilling_list.append(row) - - - - gapfilling_df = pd.DataFrame(gapfilling_list, columns=['no of gapfilled reactions', 'media', 'ATP Production', 'gapfilled reactions', 'reversed reaction by gapfilling', 'Filtered Reactions']) - gapfilling_df['no of gapfilled reactions'] = pd.to_numeric(gapfilling_df['no of gapfilled reactions']) - gapfilling_df = gapfilling_df.sort_values('no of gapfilled reactions') - - - reaction_names = {} - for rxn in modelutl.model.reactions: - reaction_id = rxn.id - reaction_name = rxn.name - reaction_names[reaction_id] = reaction_name - - # Gapfillings Analysis DataFrame - gapfillings_list = [] - if gf_sensitivity_data: - for media, media_data in gf_sensitivity_data.items(): - for target, target_data in media_data.items(): # Iterate through each target for the current media - for status, status_data in target_data.items(): - if isinstance(status_data, dict): - for reaction_id, reaction_directions in status_data.items(): - for direction, gapfilling_sensitivity in reaction_directions.items(): - if status == 'success': - if isinstance(gapfilling_sensitivity, list): - gapfilling_sensitivity = ', '.join(gapfilling_sensitivity) - gapfillings_list.append({ - 'Reaction ID': reaction_id, - 'Reaction Name': reaction_names.get(reaction_id, ''), # Get reaction name from the dictionary - 'Media': media, - 'Direction': direction, - 'Target': target, - 'Gapfilling Sensitivity': gapfilling_sensitivity - }) - else: - # Handle cases where status_data is null - gapfillings_list.append({ - 'Reaction ID': '', # No data available for Reaction ID - 'Reaction Name': '', # No data available for Reaction Name - 'Media': media, - 'Direction': '', # No data available for Direction - 'Target': target, - 'Gapfilling Sensitivity': 'Failed Before Filtering' if status == 'FBF' else 'Failed After Filtering' if status == 'FAF' else status # Status is the 'FBF' or other labels in this case - }) - - gapfillings_analysis_df = pd.DataFrame(gapfillings_list, columns=['Reaction ID', 'Reaction Name', 'Media', 'Direction', 'Target', 'Gapfilling Sensitivity']) - - - # Define the custom color mapping function - def color_gradient(val): - if val == 0: - return 'background-color: green' - else: - color_map = cm.get_cmap('YlOrRd') # Choose the color map - norm_val = val / gapfilling_df['no of gapfilled reactions'].max() # Normalize the value between 0 and 1 - color = color_map(norm_val) - r, g, b, _ = color - return f'background-color: rgb({int(r * 255)}, {int(g * 255)}, {int(b * 255)})' - - # Apply the default style to the Model Summary DataFrame + + # 1. Utilize the build_overview_data method + model_summary_data = self.build_overview_data(model) + # Remove the unwanted entry + model_summary_data.pop("Full Gapfilling and ATP Analysis Report", None) + # 2. Transform the dictionary into a list of tuples + model_summary_list = [(key, value) for key, value in model_summary_data.items()] + # 3. Convert to DataFrame + model_summary_df = pd.DataFrame(model_summary_list, columns=['', '']) + + # Style the DataFrame (as was done previously) model_summary_df_styled = ( - model_summary_df.style - .hide_index() + model_summary_df.style.hide(axis="index") .set_table_styles([ {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, @@ -439,34 +366,22 @@ def color_gradient(val): {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, ]) ) - - - # Apply the default style to the Gapfillings Analysis DataFrame - # Apply the default style to the Gapfillings Analysis DataFrame - gapfillings_analysis_df_styled = ( - gapfillings_analysis_df.style - .hide_index() - .format({ - 'Reaction ID': lambda x: f'{x}' if not x.startswith("EX") else f'{x}', # Add hyperlink to Reaction ID - 'Gapfilling Sensitivity': lambda x: ', '.join([f'{i}' for i in x.split(', ')]) if x and not x.startswith('Failed') else x # Add hyperlinks to Gapfilling Sensitivity - }) - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + + # Fetching the gapfilling sensitivity data + gf_sensitivity = model.attributes.get('gf_sensitivity', None) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model) + gapfilling_list = self.transform_gapfilling_data(gapfilling_data[0]) + + # Convert the gapfilling_list to a DataFrame + gapfillings_analysis_df = pd.DataFrame( + gapfilling_list, + columns=[ + "Reaction ID", "Reaction Name", "Media", "Direction", "Target", "Gapfilling Sensitivity ID", "Gapfilling Sensitivity Name"] ) - - - # Apply the default style with alternating row colors, Oxygen font, adjusted font size and line height, - # and switched order of light grey and white backgrounds in the header column for Core ATP Gapfilling Analysis - gapfilling_df_styled = ( - gapfilling_df.style - .applymap(color_gradient, subset=['no of gapfilled reactions']) - .hide_index() - .format({'Filtered Reactions': lambda x: f'{x}'}) - .format({'gapfilled reactions': lambda x: f'{x}'}) + + # Apply style to Gapfillings Analysis DataFrame + gapfillings_analysis_df_styled = ( + gapfillings_analysis_df.style.hide(axis="index") .set_table_styles([ {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, @@ -474,10 +389,9 @@ def color_gradient(val): {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, ]) ) - - - # Legend text for Table 1 - annotations_text_1 = """ + + # Legend for Gapfillings Analysis + annotations_text_gapfillings = """
  • Reaction ID: The identifier of the reaction.
  • Reaction Name: The name of the reaction.
  • @@ -485,54 +399,83 @@ def color_gradient(val):
  • Direction: The direction of the reaction. Can be ">" for forward, "<" for reverse, or "=" for both directions.
  • Target: The reaction selected as the objective function target for the gapfilling optimization problem. Targets here can be the model’s biomass reaction, commonly named “bio1” for models created by this app. Alternatively, “rxn00062” (ATP Production) reaction is shown for cases where gapfilling was applied to guarantee ATP production in a given media. - When reactions are gapfilled for ATP production, we recommend checking the full Core ATP Analysis in Table 2 below.
  • -
  • Gapfilling Sensitivity: Gapfilling is necessary when compounds in the biomass objective function can not be produced by the model. + When reactions are gapfilled for ATP production, we recommend checking the full Core ATP Analysis in the table below.
  • +
  • Gapfilling Sensitivity ID and Name: Gapfilling is necessary when compounds in the biomass objective function can not be produced by the model. For each reaction we list the biomass compound(s) that can not be synthesized by the model without gapfilling. In cases where gap filling fails there are two possible scenarios: 1) FBF (failed before filtering) : the gapfilling immediately failed, even before we filtered out the ATP breaking reactions. This means this objective CANNOT be satisfied with the entire current database. 2) FAF (failed after filtering): the gapfilling succeeded before filtering, but failed after filtering out reactions that break ATP. This tells you definitively if the ATP filtering caused the gapfilling to fail
""" - #table 2 intro text - introductory_text = """ + + # Extract ATP analysis data + atp_analysis = model.attributes.get('ATP_analysis', None) + atp_expansion_filter = model.attributes.get('atp_expansion_filter', {}) + atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + + # Convert the atp_analysis_entries list to a DataFrame + atp_analysis_df = pd.DataFrame(atp_analysis_entries) + + # Apply style to ATP Analysis DataFrame + atp_analysis_df_styled = ( + atp_analysis_df.style.hide(axis="index") + .set_table_styles([ + {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, + {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, + {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, + ]) + ) + + # Legend for ATP Analysis + annotations_text_atp_analysis = """ +
    +
  • No. of gapfilled reactions: The number of reactions filled by the gapfilling process.
  • +
  • Media: The media in which the reaction takes place.
  • +
  • ATP Production: ATP production by the core metabolism model.
  • +
  • Gapfilled Reactions: Reactions added during the gapfilling process.
  • +
  • Reversed Reaction by Gapfilling: Reactions that have been reversed during the gapfilling process.
  • +
  • Filtered Reactions: Reactions that have been filtered out during the analysis. When a reaction addition would lead to a large increase in ATP production or an infinite energy loop, we filter that reaction out of the gapfilling database and prevent it from being added to the model.
  • +
+ """ + + #ATP analysis explanation text + explanation_text_atp_analysis = """

During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. This step is designed to prevent gapfilling from introducing reactions that create energy-generating loops. The tests are conducted on a large collection of minimal conditions, with the goal of simulating the model’s capability to produce energy with different electron donor, electron acceptor, and carbon source combinations.

When the draft model of the core metabolism is capable of producing ATP in at least one of the test media, no gapfilling reactions part of this analysis will be added to the model. While we still report the gapfilling requirements for the test media formulations that fail to produce ATP with that draft core model, we only integrate these solutions in the model when no test media succeeds in producing ATP. - In this case, the integrated gap-filling solution(s) will be displayed in “Table 1 - Gapfilling Analysis” above, with the “Target” “rxn00062” (ATP Production) objective function.

+ In this case, the integrated gap-filling solution(s) will be displayed in the “Gapfilling Analysis” table above, with the “Target” “rxn00062” (ATP Production) objective function.

The goal is to display the test results for all media to provide clues for the metabolic capabilities of the genome(s). When many reactions are required for growth on the SO4 testing media conditions, this could be a good indicator that the organism is not capable of performing sulfate reduction. On the other hand, when only one gapfill reaction is required for ATP production in a given media, multiple scenarios can be considered. 1) Organism(s) can’t grow on test condition, and we correctly did not add the reaction to the model. 2) Possible issue with the source genome annotation missing a specific gene function 3) Possible issue with the model reconstruction database. We hope this data helps make more informed decisions on reactions that may need to be manually curated in the model. In cases where is known from the literature or unpublished experimental results that an organism is capable of producing ATP in a given media condition that requires gapfilling in this analysis, you can use the parameter “Force ATP media” in the reconstruction app to ensure those reactions are integrated into the model. .

""" - # Legend text for Table 2 - annotations_text_2 = """ -
    -
  • No. of gapfilled reactions: The number of reactions filled by the gapfilling process.
  • -
  • Media: The media in which the reaction takes place.
  • -
  • ATP Production: ATP production by the core metabolism model.
  • -
  • Gapfilled Reactions: Reactions added during the gapfilling process.
  • -
  • Reversed Reaction by Gapfilling: Reactions that have been reversed during the gapfilling process.
  • -
  • Filtered Reactions: Reactions that have been filtered out during the analysis. When a reaction addition would lead to a large increase in ATP production or an infinite energy loop, we filter that reaction out of the gapfilling database and prevent it from being added to the model.
  • -
- """ + # Save the data to HTML with the styled DataFrames and the legends - directory = dirname(output_path) + directory = os.path.dirname(output_path) os.makedirs(directory, exist_ok=True) - with open(output_path, 'w') as f: + with open(output_path, 'w', encoding='utf-8') as f: f.write('

Model Summary

') f.write(model_summary_df_styled.render(escape=False)) f.write('

') - if gf_sensitivity_data: - f.write('

Table 1 - Gapfillings Analysis

') + f.write('

Gapfillings Analysis

') + + # Check for Gapfillings Analysis data + if not gapfillings_analysis_df.empty: f.write(gapfillings_analysis_df_styled.render(escape=False)) - f.write(f'

Legend:

{annotations_text_1}') + f.write(f'

Legend:

{annotations_text_gapfillings}') else: - f.write('Gapfilling was not selected as a parameter during reconstruction of the model. As a result your model may not grow on your media object when running Flux Balance Analysis. You can gapfill your model after reconstruction by using the bew Gapiflling Metabolic Model app curently in beta') - f.write('

') - f.write('

Table 2 - Core ATP Analysis

') - f.write(gapfilling_df_styled.render(escape=False)) - f.write(f'

Legend:

{annotations_text_2}') - f.write(introductory_text) + f.write('

Warning: No Gapfillings Analysis data available for this model.

') + + f.write('

Core ATP Analysis

') + + # Check for ATP Analysis data + if not atp_analysis_df.empty: + f.write(atp_analysis_df_styled.render(escape=False)) + f.write(f'

Legend:

{annotations_text_atp_analysis}') + f.write(explanation_text_atp_analysis) + else: + f.write('

Warning: No Core ATP Analysis data available for this model.

') \ No newline at end of file diff --git a/modelseedpy/data/ModelReportTemplate.html b/modelseedpy/data/ModelReportTemplate.html index bd0a1c1c..c382c8fc 100644 --- a/modelseedpy/data/ModelReportTemplate.html +++ b/modelseedpy/data/ModelReportTemplate.html @@ -80,7 +80,7 @@ Full Gapfilling and ATP Analysis Report - {{ overview['Full Gapfilling and ATP Analysis Report'] }} + VIEW REPORT IN SEPARATE WINDOW Genome Scale Template From 1f0ec94ea21a75a1d8227fdf53403fc8c4f96608 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 29 Aug 2023 23:52:19 -0500 Subject: [PATCH 092/146] Fixing bug in report --- modelseedpy/core/msmodelreport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index c5274fcd..bdce7695 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -348,7 +348,7 @@ def build_report(self, model, output_path): """ # 1. Utilize the build_overview_data method - model_summary_data = self.build_overview_data(model) + model_summary_data = self.build_overview_data(model.model) # Remove the unwanted entry model_summary_data.pop("Full Gapfilling and ATP Analysis Report", None) # 2. Transform the dictionary into a list of tuples From 6496f651f8ec23977fbfce0d07e7d9478f775b5e Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 29 Aug 2023 23:58:24 -0500 Subject: [PATCH 093/146] Fixing report bug --- modelseedpy/core/msmodelreport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index bdce7695..d86065bf 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -369,7 +369,7 @@ def build_report(self, model, output_path): # Fetching the gapfilling sensitivity data gf_sensitivity = model.attributes.get('gf_sensitivity', None) - gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model.model) gapfilling_list = self.transform_gapfilling_data(gapfilling_data[0]) # Convert the gapfilling_list to a DataFrame From c5b1f4f23f090374c31379479a21fe78cd59382f Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 6 Sep 2023 23:34:31 -0500 Subject: [PATCH 094/146] Debugging media --- modelseedpy/core/msatpcorrection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 6a0fcec2..a0ecba66 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -173,6 +173,7 @@ def load_default_medias(self): min_obj = 0.01 if media_id in min_gap: min_obj = min_gap[media_id] + print(media.id) self.atp_medias.append([media, min_obj]) @staticmethod From ea4117a14178004684b40af088fd1b63622b400a Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 6 Sep 2023 23:54:51 -0500 Subject: [PATCH 095/146] Removing debugging --- modelseedpy/core/msatpcorrection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index a0ecba66..6a0fcec2 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -173,7 +173,6 @@ def load_default_medias(self): min_obj = 0.01 if media_id in min_gap: min_obj = min_gap[media_id] - print(media.id) self.atp_medias.append([media, min_obj]) @staticmethod From cfc3bec92860de50b34709db351b9915b27953ba Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 9 Sep 2023 00:47:10 -0500 Subject: [PATCH 096/146] Fixing report --- modelseedpy/core/msmodelreport.py | 100 +++++++++++++++--------------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index d86065bf..af594528 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -17,61 +17,59 @@ class MSModelReport: def __init__( - self + self, + model_or_mdlutl ): - pass + if isinstance(model_or_mdlutl, MSModelUtil): + self.model = model_or_mdlutl.model + self.modelutl = model_or_mdlutl + else: + self.model = model_or_mdlutl + self.modelutl = MSModelUtil.get(model_or_mdlutl) - def generate_reports(self, model, report_path, multi_tab_report_path): - self.build_report(model, report_path) - self.build_multitab_report(model, multi_tab_report_path) + def generate_reports(self,report_path, multi_tab_report_path): + self.build_report(report_path) + self.build_multitab_report(multi_tab_report_path) # Helper function to build overview data - def build_overview_data(self, model): + def build_overview_data(self): # Get the number of compartments - number_compartments = len(set([metabolite.compartment for metabolite in model.metabolites])) + number_compartments = len(set([metabolite.compartment for metabolite in self.model.metabolites])) # Extract gapfilling information - gapfillings_str = model.notes.get('kbase_gapfillings', '[]') - pattern = r"\{.*?\}" - gapfilling_matches = re.findall(pattern, gapfillings_str) - gapfillings = [eval(gapfilling.replace('false', 'False').replace('true', 'True').replace('null', 'None')) for gapfilling in gapfilling_matches] - core_gapfilling_media = [] gapfilling_media = [] - - for gapfilling in gapfillings: - media_name = gapfilling.get('id', '').replace('ATP-', '') - target = gapfilling.get('target', '') - - if target == "rxn00062_c0": - core_gapfilling_media.append(media_name) - elif target.startswith('bio'): + gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + for media in gf_sensitivity: + if "bio1" in attributes["gf_sensitivity"][media] and "success" in attributes["gf_sensitivity"][media]["bio1"]: gapfilling_media.append(media_name) - + if "rxn00062_c0" in attributes["gf_sensitivity"][media] and "success" in attributes["gf_sensitivity"][media]["rxn00062_c0"]: + core_gapfilling_media.append(media) + # Count the number of gapfills - number_gapfills = gapfillings_str.count('"media_ref"') + number_gapfills = len(gapfilling_media) # Convert the lists to strings core_gapfilling_str = "; ".join(core_gapfilling_media) if core_gapfilling_media else "No core gapfilling data found!" gapfilling_media_str = "; ".join(gapfilling_media) if gapfilling_media else "No genome-scale gapfilling data found!" overview = { - 'Model ID': model.id, + 'Model ID': self.model.id, 'Full Gapfilling and ATP Analysis Report': 'TBD', # You may replace 'TBD' with actual data when available - 'Genome Scale Template': model.notes.get('kbase_template_refs', 'Data Not Available'), + 'Genome Scale Template': self.model.notes.get('kbase_template_refs', 'Data Not Available'), 'Core Gapfilling Media': core_gapfilling_str, 'Gapfilling Media': gapfilling_media_str, - 'Source Genome': model.notes.get('kbase_genome_ref', 'Data Not Available'), - 'Total Number of reactions': len(model.reactions), - 'Number compounds': len(model.metabolites), + 'Source Genome': self.model.notes.get('kbase_genome_ref', 'Data Not Available'), + 'Total Number of reactions': len(self.model.reactions), + 'Number compounds': len(self.model.metabolites), 'Number compartments': number_compartments, - 'Number biomass': len([rxn for rxn in model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629']), + 'Number biomass': len([rxn for rxn in self.model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629']), 'Number gapfills': number_gapfills } return overview # Helper function for extracting gapfilling data - def extract_gapfilling_data(self, gf_sensitivity, model): + def extract_gapfilling_data(self, gf_sensitivity): if gf_sensitivity is None: return [], {} @@ -92,13 +90,13 @@ def extract_gapfilling_data(self, gf_sensitivity, model): if isinstance(metabolites, (list, tuple)): for met_id in metabolites: sensitivity_ids.append(met_id) - met_name = model.metabolites.get_by_id(met_id).name if met_id in model.metabolites else met_id + met_name = self.model.metabolites.get_by_id(met_id).name if met_id in self.model.metabolites else met_id sensitivity_names.append(met_name) else: metabolites = str(metabolites) entry = { "reaction_id": reaction_id, - "reaction_name": model.reactions.get_by_id(reaction_id).name if reaction_id in model.reactions else reaction_id, + "reaction_name": self.model.reactions.get_by_id(reaction_id).name if reaction_id in self.model.reactions else reaction_id, "media": media, "direction": direction, "target": target, @@ -195,21 +193,21 @@ def extract_atp_production_data(self, atp_analysis): return atp_production_dict - def build_multitab_report(self, model_or_mdlutl, output_path): + def build_multitab_report(self, output_path): # Build overview data - overview_data = self.build_overview_data(model_or_mdlutl) + overview_data = self.build_overview_data() # Get gf_sensitivity attribute from the model - gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) # Extract gapfilling data - gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data(gf_sensitivity) # Check if ATP_analysis attribute is present in the model - atp_analysis = model_or_mdlutl.attributes.get('ATP_analysis', None) + atp_analysis = self.modelutl.attributes.get('ATP_analysis', None) if atp_analysis: - atp_expansion_filter = model_or_mdlutl.attributes.get('atp_expansion_filter', {}) + atp_expansion_filter = self.modelutl.attributes.get('atp_expansion_filter', {}) atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) else: atp_analysis_entries = [] @@ -227,13 +225,13 @@ def build_multitab_report(self, model_or_mdlutl, output_path): print("Module Path:", module_path + "/../data/") - exchanges = {r.id for r in model_or_mdlutl.exchanges} + exchanges = {r.id for r in self.modelutl.exchanges} # Identify biomass reactions using SBO annotation - biomass_reactions_ids = {rxn.id for rxn in model_or_mdlutl.reactions if rxn.annotation.get('sbo') == 'SBO:0000629'} + biomass_reactions_ids = {rxn.id for rxn in self.model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629'} # Reactions Tab - for rxn in model_or_mdlutl.reactions: + for rxn in self.model.reactions: if rxn.id not in exchanges and rxn.id not in biomass_reactions_ids: equation = rxn.build_reaction_string(use_metabolite_names=True) rxn_data = { @@ -247,7 +245,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): # Compounds Tab - for cpd in model_or_mdlutl.metabolites: + for cpd in self.model.metabolites: cpd_data = { "id": cpd.id, "name": cpd.name, @@ -258,7 +256,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): context["compounds"].append(cpd_data) # Genes Tab - for gene in model_or_mdlutl.genes: + for gene in self.model.genes: gene_data = { "gene": gene.id, "reactions": "; ".join([rxn.id for rxn in gene.reactions]) @@ -268,7 +266,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): # Biomass Tab if biomass_reactions_ids: for biomass_rxn_id in biomass_reactions_ids: - biomass_rxn = model_or_mdlutl.reactions.get_by_id(biomass_rxn_id) + biomass_rxn = self.model.reactions.get_by_id(biomass_rxn_id) for metabolite, coefficient in biomass_rxn.metabolites.items(): compound_id = metabolite.id compound_name = metabolite.name.split('_')[0] @@ -286,8 +284,8 @@ def build_multitab_report(self, model_or_mdlutl, output_path): print("No biomass reactions found in the model.") # Gapfilling Tab - gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) - gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity) context["gapfilling"] = gapfilling_entries # Extract ATP Production Data @@ -339,7 +337,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): f.write(html) - def build_report(self, model, output_path): + def build_report(self, output_path): """Builds model HTML report for the Model Summary table Parameters ---------- @@ -348,7 +346,7 @@ def build_report(self, model, output_path): """ # 1. Utilize the build_overview_data method - model_summary_data = self.build_overview_data(model.model) + model_summary_data = self.build_overview_data() # Remove the unwanted entry model_summary_data.pop("Full Gapfilling and ATP Analysis Report", None) # 2. Transform the dictionary into a list of tuples @@ -368,8 +366,8 @@ def build_report(self, model, output_path): ) # Fetching the gapfilling sensitivity data - gf_sensitivity = model.attributes.get('gf_sensitivity', None) - gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model.model) + gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity) gapfilling_list = self.transform_gapfilling_data(gapfilling_data[0]) # Convert the gapfilling_list to a DataFrame @@ -409,8 +407,8 @@ def build_report(self, model, output_path): """ # Extract ATP analysis data - atp_analysis = model.attributes.get('ATP_analysis', None) - atp_expansion_filter = model.attributes.get('atp_expansion_filter', {}) + atp_analysis = self.modelutl.attributes.get('ATP_analysis', None) + atp_expansion_filter = self.modelutl.attributes.get('atp_expansion_filter', {}) atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) # Convert the atp_analysis_entries list to a DataFrame From 1b038fd7c35e4376e8dcd43798e1d4cd26940f78 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 9 Sep 2023 01:19:52 -0500 Subject: [PATCH 097/146] Fixing report --- modelseedpy/core/msmodelreport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index af594528..45ac5710 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -41,9 +41,9 @@ def build_overview_data(self): gapfilling_media = [] gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) for media in gf_sensitivity: - if "bio1" in attributes["gf_sensitivity"][media] and "success" in attributes["gf_sensitivity"][media]["bio1"]: + if "bio1" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["bio1"]: gapfilling_media.append(media_name) - if "rxn00062_c0" in attributes["gf_sensitivity"][media] and "success" in attributes["gf_sensitivity"][media]["rxn00062_c0"]: + if "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"]: core_gapfilling_media.append(media) # Count the number of gapfills From 46323032ccb6a7d8e078fcaefeb62df169e3e092 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 9 Sep 2023 10:55:36 -0500 Subject: [PATCH 098/146] Fixing report --- modelseedpy/core/msmodelreport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 45ac5710..1007a245 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -42,7 +42,7 @@ def build_overview_data(self): gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) for media in gf_sensitivity: if "bio1" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["bio1"]: - gapfilling_media.append(media_name) + gapfilling_media.append(media) if "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"]: core_gapfilling_media.append(media) From 4b5f573cccb763369c77599f6b4bf8e197cf2131 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 9 Sep 2023 11:33:52 -0500 Subject: [PATCH 099/146] Fixing report --- modelseedpy/core/msmodelreport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 1007a245..675e1b68 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -225,7 +225,7 @@ def build_multitab_report(self, output_path): print("Module Path:", module_path + "/../data/") - exchanges = {r.id for r in self.modelutl.exchanges} + exchanges = {r.id for r in self.model.exchanges} # Identify biomass reactions using SBO annotation biomass_reactions_ids = {rxn.id for rxn in self.model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629'} From c9ce4709c12f37bf220c9b3951afe4c8473fb13f Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 9 Sep 2023 12:31:18 -0500 Subject: [PATCH 100/146] Slight report correction --- modelseedpy/core/msmodelreport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 675e1b68..1e682c3c 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -50,8 +50,8 @@ def build_overview_data(self): number_gapfills = len(gapfilling_media) # Convert the lists to strings - core_gapfilling_str = "; ".join(core_gapfilling_media) if core_gapfilling_media else "No core gapfilling data found!" - gapfilling_media_str = "; ".join(gapfilling_media) if gapfilling_media else "No genome-scale gapfilling data found!" + core_gapfilling_str = "; ".join(core_gapfilling_media) if core_gapfilling_media else "No core gapfilling needed." + gapfilling_media_str = "; ".join(gapfilling_media) if gapfilling_media else "No genome-scale gapfilling." overview = { 'Model ID': self.model.id, From de064ab7536f141626550ce230579c24a4ad3317 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 10 Sep 2023 23:38:18 -0500 Subject: [PATCH 101/146] Fixing report bugs --- modelseedpy/core/msmodelreport.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 1e682c3c..b0058a36 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -60,7 +60,7 @@ def build_overview_data(self): 'Core Gapfilling Media': core_gapfilling_str, 'Gapfilling Media': gapfilling_media_str, 'Source Genome': self.model.notes.get('kbase_genome_ref', 'Data Not Available'), - 'Total Number of reactions': len(self.model.reactions), + 'Total Number of reactions': self.modelutl.nonexchange_reaction_count(), 'Number compounds': len(self.model.metabolites), 'Number compartments': number_compartments, 'Number biomass': len([rxn for rxn in self.model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629']), @@ -147,6 +147,9 @@ def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): score = data.get('score', None) new_reactions = ["{}: {}".format(k, v) for k, v in data.get('new', {}).items()] reversed_reactions = ["{}: {}".format(k, v) for k, v in data.get('reversed', {}).items()] + atp_production = "Not integrated" + if "selected_media" in atp_analysis and media in atp_analysis["selected_media"]: + atp_production = atp_analysis["selected_media"][media] # Extracting the "Filtered Reactions" in the required format filtered_reactions = [] @@ -163,6 +166,7 @@ def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): entries.append({ 'media': media, 'no_of_gapfilled_reactions': score, + 'atp_production': atp_production, 'gapfilled_reactions': "; ".join(new_reactions), 'reversed_reaction_by_gapfilling': "; ".join(reversed_reactions), 'filtered_reactions': filtered_reactions_str From 481d914c8618456bfa0f49c488def57dd42b590c Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 12 Sep 2023 23:30:36 -0500 Subject: [PATCH 102/146] Fixing model report encoding --- modelseedpy/core/msmodelreport.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index b0058a36..f08bfa04 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -460,6 +460,7 @@ def build_report(self, output_path): directory = os.path.dirname(output_path) os.makedirs(directory, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: + f.write('') f.write('

Model Summary

') f.write(model_summary_df_styled.render(escape=False)) f.write('

') From df44d598137c889f87f2216e443be0b7be73c3d3 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 13 Sep 2023 00:06:22 -0500 Subject: [PATCH 103/146] Fixing report for empty gapfillling data --- modelseedpy/core/msmodelreport.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index f08bfa04..0e6f580e 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -40,11 +40,12 @@ def build_overview_data(self): core_gapfilling_media = [] gapfilling_media = [] gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) - for media in gf_sensitivity: - if "bio1" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["bio1"]: - gapfilling_media.append(media) - if "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"]: - core_gapfilling_media.append(media) + if gf_sensitivity: + for media in gf_sensitivity: + if "bio1" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["bio1"]: + gapfilling_media.append(media) + if "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"]: + core_gapfilling_media.append(media) # Count the number of gapfills number_gapfills = len(gapfilling_media) From b4a62f4213ffa657c4b2d21944a2eb3f1a4f5fbf Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 13 Sep 2023 01:24:05 -0500 Subject: [PATCH 104/146] Adding annotationontology class to module --- modelseedpy/__init__.py | 3 ++- modelseedpy/core/__init__.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 551f617d..efd16995 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -45,7 +45,8 @@ MSATPCorrection, MSGapfill, MSEquation, - MSModelReport + MSModelReport, + AnnotationOntology ) from modelseedpy.core.exceptions import * diff --git a/modelseedpy/core/__init__.py b/modelseedpy/core/__init__.py index eb4d02a2..bd374a03 100644 --- a/modelseedpy/core/__init__.py +++ b/modelseedpy/core/__init__.py @@ -13,4 +13,5 @@ from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder from modelseedpy.core.msmodelreport import MSModelReport +from modelseedpy.core.annotationontology import AnnotationOntology from modelseedpy.core.exceptions import * From 2818366204ba871eea96f0356e2039acbdab497a Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 13 Sep 2023 01:37:58 -0500 Subject: [PATCH 105/146] Fixing report for models where gapfilling has an empty solution --- modelseedpy/core/msmodelreport.py | 84 ++++++++++++++++--------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 0e6f580e..78595f1c 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -79,47 +79,49 @@ def extract_gapfilling_data(self, gf_sensitivity): for media, media_data in gf_sensitivity.items(): for target, target_data in media_data.items(): - for reaction_id, reaction_data in target_data.get('success', {}).items(): - for direction, metabolites in reaction_data.items(): - # If metabolites is None, set to empty string - if metabolites is None: - metabolites = "" - - # Extract both IDs and Names for Gapfilling Sensitivity - sensitivity_ids = [] - sensitivity_names = [] - if isinstance(metabolites, (list, tuple)): - for met_id in metabolites: - sensitivity_ids.append(met_id) - met_name = self.model.metabolites.get_by_id(met_id).name if met_id in self.model.metabolites else met_id - sensitivity_names.append(met_name) - else: - metabolites = str(metabolites) - entry = { - "reaction_id": reaction_id, - "reaction_name": self.model.reactions.get_by_id(reaction_id).name if reaction_id in self.model.reactions else reaction_id, - "media": media, - "direction": direction, - "target": target, - "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, - "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites - } - - # Update the summary dictionary - if reaction_id not in gapfilling_summary: - gapfilling_summary[reaction_id] = [] - gapfilling_summary[reaction_id].append(f"{media}: {direction}") - - # Check if reaction_id is already in dictionary - if reaction_id in gapfilling_dict: - # Update the media - existing_entry = gapfilling_dict[reaction_id] - existing_media = existing_entry["media"].split("; ") - if media not in existing_media: - existing_media.append(media) - existing_entry["media"] = "; ".join(existing_media) - else: - gapfilling_dict[reaction_id] = entry + gf_data = target_data.get('success', {}) + if isinstance(gf_data, dict): + for reaction_id, reaction_data in gf_data.items(): + for direction, metabolites in reaction_data.items(): + # If metabolites is None, set to empty string + if metabolites is None: + metabolites = "" + + # Extract both IDs and Names for Gapfilling Sensitivity + sensitivity_ids = [] + sensitivity_names = [] + if isinstance(metabolites, (list, tuple)): + for met_id in metabolites: + sensitivity_ids.append(met_id) + met_name = self.model.metabolites.get_by_id(met_id).name if met_id in self.model.metabolites else met_id + sensitivity_names.append(met_name) + else: + metabolites = str(metabolites) + entry = { + "reaction_id": reaction_id, + "reaction_name": self.model.reactions.get_by_id(reaction_id).name if reaction_id in self.model.reactions else reaction_id, + "media": media, + "direction": direction, + "target": target, + "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, + "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites + } + + # Update the summary dictionary + if reaction_id not in gapfilling_summary: + gapfilling_summary[reaction_id] = [] + gapfilling_summary[reaction_id].append(f"{media}: {direction}") + + # Check if reaction_id is already in dictionary + if reaction_id in gapfilling_dict: + # Update the media + existing_entry = gapfilling_dict[reaction_id] + existing_media = existing_entry["media"].split("; ") + if media not in existing_media: + existing_media.append(media) + existing_entry["media"] = "; ".join(existing_media) + else: + gapfilling_dict[reaction_id] = entry return list(gapfilling_dict.values()), gapfilling_summary From 1dd84b3c2d9de98653e30100a4e9c9ddbd5e4c0b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 14 Sep 2023 14:05:37 -0500 Subject: [PATCH 106/146] utl --- modelseedpy/core/msmodelutl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index ac232de8..d828b569 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -729,7 +729,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model.solver.status != "optimal": self.printlp(condition["media"].id + "-Testing-Infeasible.lp") logger.critical( - ondition["media"].id + condition["media"].id + "testing leads to infeasible problem. LP file printed to debug!" ) return False From 7112e17246407d31c85d47a4c3a343643047a064 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 14 Sep 2023 14:08:22 -0500 Subject: [PATCH 107/146] balck --- modelseedpy/__init__.py | 2 +- modelseedpy/core/annotationontology.py | 288 +++++++++++------- modelseedpy/core/msatpcorrection.py | 55 ++-- modelseedpy/core/msbuilder.py | 89 +++--- modelseedpy/core/msgapfill.py | 29 +- modelseedpy/core/msgrowthphenotypes.py | 287 ++++++++++-------- modelseedpy/core/msmedia.py | 7 +- modelseedpy/core/msmodelreport.py | 386 +++++++++++++++++-------- modelseedpy/core/msmodelutl.py | 10 +- modelseedpy/fbapkg/elementuptakepkg.py | 20 +- modelseedpy/fbapkg/kbasemediapkg.py | 4 +- 11 files changed, 748 insertions(+), 429 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 551f617d..75c94ad8 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -45,7 +45,7 @@ MSATPCorrection, MSGapfill, MSEquation, - MSModelReport + MSModelReport, ) from modelseedpy.core.exceptions import * diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py index 4750ed13..db64a981 100644 --- a/modelseedpy/core/annotationontology.py +++ b/modelseedpy/core/annotationontology.py @@ -15,34 +15,49 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO -#Class structure -#AnnotationOntology -> Features/Events/Terms/Ontologies +# Class structure +# AnnotationOntology -> Features/Events/Terms/Ontologies # AnnotationOntologyOntology -> Events/Terms # AnnotationOntologyEvent -> Features/Ontology # AnnotationOntologyFeature -> Term+Event->Evidence # AnnotationOntologyTerm -> Ontology/Events/Featurs # AnnotationOntologyEvidence -> -- -allowable_score_types = ["probability","evalue","bitscore","identity","qalignstart","qalignstop","salignstart","salignstop","kmerhits","tmscore","rmsd","hmmscore"] +allowable_score_types = [ + "probability", + "evalue", + "bitscore", + "identity", + "qalignstart", + "qalignstop", + "salignstart", + "salignstop", + "kmerhits", + "tmscore", + "rmsd", + "hmmscore", +] + class AnnotationOntologyEvidence: - def __init__(self,scores={},ref_entity=None,entity_type=None): - self.ref_entity=ref_entity - self.entity_type=entity_type - self.scores=scores + def __init__(self, scores={}, ref_entity=None, entity_type=None): + self.ref_entity = ref_entity + self.entity_type = entity_type + self.scores = scores for item in self.scores: if item not in allowable_score_types: - logger.warning(item+" not an allowable score type!") - + logger.warning(item + " not an allowable score type!") + def to_data(self): return { - "ref_entity":self.ref_entity, - "entity_type":self.entity_type, - "scores":self.scores + "ref_entity": self.ref_entity, + "entity_type": self.entity_type, + "scores": self.scores, } - + + class AnnotationOntologyTerm: - def __init__(self,parent,term_id,ontology): + def __init__(self, parent, term_id, ontology): self.id = term_id self.parent = parent self.ontology = ontology @@ -51,126 +66,170 @@ def __init__(self,parent,term_id,ontology): self.msrxns = set() self.events = {} self.features = {} - - def add_msrxns(self,rxn_ids): + + def add_msrxns(self, rxn_ids): for rxn_id in rxn_ids: if rxn_id[0:6] == "MSRXN:": - rxn_id = rxn_id[6:] - self.msrxns.update([rxn_id]) - - def add_event(self,event): + rxn_id = rxn_id[6:] + self.msrxns.update([rxn_id]) + + def add_event(self, event): self.events[event.id] = event - - def add_feature(self,feature): + + def add_feature(self, feature): self.features[feature.id] = feature - + + class AnnotationOntologyOntology: - def __init__(self,parent,ontology_id): + def __init__(self, parent, ontology_id): self.id = ontology_id self.parent = parent self.events = {} self.terms = {} - - def add_event(self,event): + + def add_event(self, event): self.events[event.id] = event - - def add_term(self,term): + + def add_term(self, term): self.terms[term.id] = term + class AnnotationOntologyFeature: - def __init__(self,parent,feature_id,type=None): + def __init__(self, parent, feature_id, type=None): self.id = feature_id self.parent = parent parent.add_feature(self) self.type = type self.event_terms = {} self.term_events = {} - - def add_event_term(self,event,term,scores={},ref_entity=None,entity_type=None): + + def add_event_term(self, event, term, scores={}, ref_entity=None, entity_type=None): if event.id not in self.event_terms: self.event_terms[event.id] = {} - self.event_terms[event.id][term.id] = AnnotationOntologyEvidence(scores,ref_entity,entity_type) + self.event_terms[event.id][term.id] = AnnotationOntologyEvidence( + scores, ref_entity, entity_type + ) if term.id not in self.term_events: self.term_events[term.id] = {} self.term_events[term.id][event.id] = self.event_terms[event.id][term.id] - - def get_associated_terms(self,prioritized_event_list=None,ontologies=None,merge_all=False,translate_to_rast=False): + + def get_associated_terms( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + translate_to_rast=False, + ): output = {} for term_id in self.term_events: term = self.parent.terms[term_id] if not ontologies or term.ontology.id in ontologies: if merge_all or not prioritized_event_list: for event_id in self.term_events[term_id]: - if not prioritized_event_list or event_id in prioritized_event_list: + if ( + not prioritized_event_list + or event_id in prioritized_event_list + ): if term not in output: output[term] = [] - output[term].append(self.term_events[term_id][event_id].to_data()) + output[term].append( + self.term_events[term_id][event_id].to_data() + ) else: for event_id in prioritized_event_list: if event_id in self.term_events[term_id]: - rxns = self.parent.terms[term_id].msrxns; + rxns = self.parent.terms[term_id].msrxns if len(rxns) > 0: if term not in output: output[term] = [] - output[term].append(self.term_events[term_id][event_id].to_data()) + output[term].append( + self.term_events[term_id][event_id].to_data() + ) break return output - - def get_associated_reactions(self,prioritized_event_list=None,ontologies=None,merge_all=False): + + def get_associated_reactions( + self, prioritized_event_list=None, ontologies=None, merge_all=False + ): output = {} for term_id in self.term_events: if not ontologies or self.parent.terms[term_id].ontology.id in ontologies: if merge_all or not prioritized_event_list: for event_id in self.term_events[term_id]: - if not prioritized_event_list or event_id in prioritized_event_list: - rxns = self.parent.terms[term_id].msrxns; + if ( + not prioritized_event_list + or event_id in prioritized_event_list + ): + rxns = self.parent.terms[term_id].msrxns for rxn_id in rxns: if rxn_id not in output: output[rxn_id] = [] - output[rxn_id].append(self.term_events[term_id][event_id].to_data()) + output[rxn_id].append( + self.term_events[term_id][event_id].to_data() + ) else: for event_id in prioritized_event_list: if event_id in self.term_events[term_id]: - rxns = self.parent.terms[term_id].msrxns; + rxns = self.parent.terms[term_id].msrxns for rxn_id in rxns: if rxn_id not in output: output[rxn_id] = [] - output[rxn_id].append(self.term_events[term_id][event_id].to_data()) + output[rxn_id].append( + self.term_events[term_id][event_id].to_data() + ) if len(rxns) > 0: break return output - + + class AnnotationOntologyEvent: - def __init__(self,parent,event_id,ontology_id,method,method_version=None,description=None,timestamp=None): + def __init__( + self, + parent, + event_id, + ontology_id, + method, + method_version=None, + description=None, + timestamp=None, + ): self.id = event_id self.parent = parent - #Linking ontology + # Linking ontology self.ontology = self.parent.add_ontology(ontology_id) self.ontology.add_event(self) if not description: - self.description = ""#TODO + self.description = "" # TODO else: self.description = description self.method = method self.method_version = method_version self.timestamp = timestamp self.features = {} - + @staticmethod - def from_data(data,parent): + def from_data(data, parent): if "method_version" not in data: data["method_version"] = None if "description" not in data: data["description"] = None if "timestamp" not in data: - data["timestamp"] = None - self = AnnotationOntologyEvent(parent,data["event_id"],data["ontology_id"],data["method"],data["method_version"],data["description"],data["timestamp"]) + data["timestamp"] = None + self = AnnotationOntologyEvent( + parent, + data["event_id"], + data["ontology_id"], + data["method"], + data["method_version"], + data["description"], + data["timestamp"], + ) if "ontology_terms" in data: for feature_id in data["ontology_terms"]: feature = self.parent.add_feature(feature_id) self.add_feature(feature) for item in data["ontology_terms"][feature_id]: - term = self.parent.add_term(item["term"],self.ontology) + term = self.parent.add_term(item["term"], self.ontology) scores = {} ref_entity = None entity_type = None @@ -180,43 +239,42 @@ def from_data(data,parent): if "reference" in item["evidence"]: ref_entity = item["evidence"]["reference"][1] entity_type = item["evidence"]["reference"][0] - feature.add_event_term(self,term,scores,ref_entity,entity_type) + feature.add_event_term(self, term, scores, ref_entity, entity_type) if "modelseed_ids" in item: term.add_msrxns(item["modelseed_ids"]) return self - - def add_feature(self,feature): + + def add_feature(self, feature): self.features[feature.id] = feature - + def to_data(self): data = { - "event_id" : self.event_id, - "description" : self.event_id, - "ontology_id" : self.ontology_id, - "method" : self.method, - "method_version" : self.method_version, - "timestamp" : self.timestamp, - "ontology_terms" : {} + "event_id": self.event_id, + "description": self.event_id, + "ontology_id": self.ontology_id, + "method": self.method, + "method_version": self.method_version, + "timestamp": self.timestamp, + "ontology_terms": {}, } for feature in self.features: - data["ontology_terms"][feature] = { - "term":None#TODO - } - + data["ontology_terms"][feature] = {"term": None} # TODO + + class AnnotationOntology: mdlutls = {} @staticmethod - def from_kbase_data(data,genome_ref=None,data_dir=None): - self = AnnotationOntology(genome_ref,data_dir) + def from_kbase_data(data, genome_ref=None, data_dir=None): + self = AnnotationOntology(genome_ref, data_dir) if "feature_types" in data: self.feature_types = data["feature_types"] if "events" in data: for event in data["events"]: - self.events += [AnnotationOntologyEvent.from_data(event,self)] + self.events += [AnnotationOntologyEvent.from_data(event, self)] return self - - def __init__(self,genome_ref,data_dir): + + def __init__(self, genome_ref, data_dir): self.genome_ref = genome_ref self.events = DictList() self.terms = {} @@ -227,20 +285,40 @@ def __init__(self,genome_ref,data_dir): self.noncodings = {} self.feature_types = {} self.term_names = {} - - def get_term_name(self,term): + + def get_term_name(self, term): if term.ontology.id not in self.term_names: self.term_names[term.ontology.id] = {} - if term.ontology.id in ["SSO","AntiSmash","EC","TC","META","RO","KO","GO"]: - with open(self.data_dir + "/"+term.ontology.id+"_dictionary.json") as json_file: + if term.ontology.id in [ + "SSO", + "AntiSmash", + "EC", + "TC", + "META", + "RO", + "KO", + "GO", + ]: + with open( + self.data_dir + "/" + term.ontology.id + "_dictionary.json" + ) as json_file: ontology = json.load(json_file) for item in ontology["term_hash"]: - self.term_names[term.ontology.id][item] = ontology["term_hash"][item]["name"] + self.term_names[term.ontology.id][item] = ontology["term_hash"][ + item + ]["name"] if term.id not in self.term_names[term.ontology.id]: return "Unknown" return self.term_names[term.ontology.id][term.id] - - def get_gene_term_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False,translate_to_rast=True): + + def get_gene_term_hash( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + cds_features=False, + translate_to_rast=True, + ): output = {} feature_hash = self.genes if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): @@ -249,16 +327,26 @@ def get_gene_term_hash(self,prioritized_event_list=None,ontologies=None,merge_al feature = feature_hash[feature_id] if feature not in output: output[feature] = {} - output[feature] = feature.get_associated_terms(prioritized_event_list,ontologies,merge_all,translate_to_rast) + output[feature] = feature.get_associated_terms( + prioritized_event_list, ontologies, merge_all, translate_to_rast + ) return output - - def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False): + + def get_reaction_gene_hash( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + cds_features=False, + ): output = {} feature_hash = self.genes if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): feature_hash = self.cdss for feature_id in feature_hash: - reactions = feature_hash[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) + reactions = feature_hash[feature_id].get_associated_reactions( + prioritized_event_list, ontologies, merge_all + ) for rxn_id in reactions: if rxn_id not in output: output[rxn_id] = {} @@ -266,32 +354,34 @@ def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merg output[rxn_id][feature_id] = [] output[rxn_id][feature_id].append(reactions[rxn_id]) return output - - def add_term(self,term_or_id,ontology=None): + + def add_term(self, term_or_id, ontology=None): if not isinstance(term_or_id, AnnotationOntologyTerm): if term_or_id in self.terms: return self.terms[term_or_id] else: - return AnnotationOntologyTerm(self,term_or_id,ontology) + return AnnotationOntologyTerm(self, term_or_id, ontology) if term_or_id.id in self.terms: - logger.critical("Term with id "+term_or_id.id+" already in annotation!") + logger.critical("Term with id " + term_or_id.id + " already in annotation!") return self.terms[term_or_id.id] else: - self.terms[term_or_id.id] = term_or_id - - def add_ontology(self,ontology_or_id): + self.terms[term_or_id.id] = term_or_id + + def add_ontology(self, ontology_or_id): if not isinstance(ontology_or_id, AnnotationOntologyOntology): if ontology_or_id in self.ontologies: return self.ontologies[ontology_or_id] else: - return AnnotationOntologyOntology(self,ontology_or_id) + return AnnotationOntologyOntology(self, ontology_or_id) if ontology_or_id.id in self.ontologies: - logger.critical("Ontology with id "+ontology_or_id.id+" already in annotation!") + logger.critical( + "Ontology with id " + ontology_or_id.id + " already in annotation!" + ) return self.ontologies[ontology_or_id.id] else: self.ontologies[ontology_or_id.id] = ontology_or_id - - def get_feature_hash(self,feature_id): + + def get_feature_hash(self, feature_id): feature_hash = self.genes if feature_id in self.feature_types: if self.feature_types[feature_id] == "cds": @@ -299,15 +389,15 @@ def get_feature_hash(self,feature_id): elif self.feature_types[feature_id] == "noncoding": feature_hash = self.noncodings return feature_hash - - def add_feature(self,feature_or_id): + + def add_feature(self, feature_or_id): feature_hash = None if not isinstance(feature_or_id, AnnotationOntologyFeature): feature_hash = self.get_feature_hash(feature_or_id) if feature_or_id in feature_hash: return feature_hash[feature_or_id] else: - feature_or_id = AnnotationOntologyFeature(self,feature_or_id) + feature_or_id = AnnotationOntologyFeature(self, feature_or_id) if not feature_hash: feature_hash = self.get_feature_hash(feature_or_id.id) if feature_or_id.id not in feature_hash: diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 93319808..d3321d6a 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -49,7 +49,7 @@ default_threshold_multipiers = { "Glc": 2, - "default":1.2, + "default": 1.2, } @@ -119,7 +119,7 @@ def __init__( media.id = "empty" media.name = "empty" self.media_hash[media.id] = media - + self.forced_media = [] for media_id in forced_media: for item in self.atp_medias: @@ -171,10 +171,7 @@ def load_default_medias(self): media.id = media_id media.name = media_id min_obj = 0.01 - self.atp_medias.append([ - media, - min_gap.get(media_d, min_obj) - ]) + self.atp_medias.append([media, min_gap.get(media_d, min_obj)]) @staticmethod def find_reaction_in_template(model_reaction, template, compartment): @@ -407,11 +404,9 @@ def apply_growth_media_gapfilling(self): and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 ): self.msgapfill.integrate_gapfill_solution( - stats, - self.cumulative_core_gapfilling, - link_gaps_to_objective=False + stats, self.cumulative_core_gapfilling, link_gaps_to_objective=False ) - #Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions + # Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions gf_sensitivity = self.modelutl.get_attributes("gf_sensitivity", {}) if media.id not in gf_sensitivity: gf_sensitivity[media.id] = {} @@ -419,15 +414,17 @@ def apply_growth_media_gapfilling(self): gf_sensitivity[media.id][self.atp_hydrolysis.id] = {} gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"] = {} for item in stats["new"]: - gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { - stats["new"][item] : [] - } + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][ + item + ] = {stats["new"][item]: []} for item in stats["reversed"]: - gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { - stats["reversed"][item] : [] - } - self.modelutl.save_attributes(gf_sensitivity, "gf_sensitivity") - self.modelutl.save_attributes(len(self.cumulative_core_gapfilling), "total_core_gapfilling") + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][ + item + ] = {stats["reversed"][item]: []} + self.modelutl.save_attributes(gf_sensitivity, "gf_sensitivity") + self.modelutl.save_attributes( + len(self.cumulative_core_gapfilling), "total_core_gapfilling" + ) def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP @@ -445,7 +442,7 @@ def expand_model_to_genome_scale(self): self.restore_noncore_reactions(noncore=True, othercompartment=False) # Extending model with non core reactions while retaining ATP accuracy self.filtered_noncore = self.modelutl.reaction_expansion_test( - self.noncore_reactions, tests,attribute_label="atp_expansion_filter" + self.noncore_reactions, tests, attribute_label="atp_expansion_filter" ) # Removing filtered reactions for item in self.filtered_noncore: @@ -485,7 +482,7 @@ def restore_noncore_reactions(self, noncore=True, othercompartment=True): reaction.lower_bound = self.original_bounds[reaction.id][0] reaction.upper_bound = self.original_bounds[reaction.id][1] - def build_tests(self,multiplier_hash_override={}): + def build_tests(self, multiplier_hash_override={}): """Build tests based on ATP media evaluations Parameters @@ -501,16 +498,16 @@ def build_tests(self,multiplier_hash_override={}): Raises ------ """ - #Applying threshold multiplier + # Applying threshold multiplier for key in default_threshold_multipiers: if key not in multiplier_hash_override: multiplier_hash_override[key] = default_threshold_multipiers[key] - #Initialzing atp test attributes + # Initialzing atp test attributes atp_att = self.modelutl.get_attributes( "ATP_analysis", {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, ) - #Initializing tests and adding empty media every time + # Initializing tests and adding empty media every time tests = [] if "empty" in self.media_hash: tests.append( @@ -525,16 +522,16 @@ def build_tests(self,multiplier_hash_override={}): "threshold": 0.00001, "objective": self.atp_hydrolysis.id, } - #Setting objective to ATP hydrolysis + # Setting objective to ATP hydrolysis self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: - #Setting multiplier for test threshold + # Setting multiplier for test threshold multiplier = multiplier_hash_override["default"] if media.id in multiplier_hash_override: - multiplier = multiplier_hash_override[media.id] - #Constraining model exchanges for media + multiplier = multiplier_hash_override[media.id] + # Constraining model exchanges for media self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - #Computing core ATP production + # Computing core ATP production obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") threshold = multiplier * obj_value @@ -553,7 +550,7 @@ def build_tests(self,multiplier_hash_override={}): "threshold": multiplier * obj_value, "objective": self.atp_hydrolysis.id, } - #Saving test attributes to the model + # Saving test attributes to the model self.modelutl.save_attributes(atp_att, "ATP_analysis") return tests diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 7f079234..bca4a0f8 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -730,26 +730,28 @@ def build_metabolic_reactions(self): reactions.append(reaction) return reactions - + def build_from_annotaton_ontology( - self, - model_or_id, - anno_ont, - index="0", - allow_all_non_grp_reactions=False, - annotate_with_rast=False, - biomass_classic=False, - biomass_gc=0.5, - add_non_template_reactions=True, - prioritized_event_list=None, - ontologies=None, - merge_all=True, - convert_to_sso=True - ): - #Build base model without annotation + self, + model_or_id, + anno_ont, + index="0", + allow_all_non_grp_reactions=False, + annotate_with_rast=False, + biomass_classic=False, + biomass_gc=0.5, + add_non_template_reactions=True, + prioritized_event_list=None, + ontologies=None, + merge_all=True, + convert_to_sso=True, + ): + # Build base model without annotation self.search_name_to_orginal = {} self.search_name_to_genes = {} - gene_term_hash = anno_ont.get_gene_term_hash(prioritized_event_list,ontologies,merge_all,convert_to_sso) + gene_term_hash = anno_ont.get_gene_term_hash( + prioritized_event_list, ontologies, merge_all, convert_to_sso + ) residual_reaction_gene_hash = {} for gene in gene_term_hash: for term in gene_term_hash[gene]: @@ -767,9 +769,18 @@ def build_from_annotaton_ontology( residual_reaction_gene_hash[rxn_id] = {} if gene not in residual_reaction_gene_hash[rxn_id]: residual_reaction_gene_hash[rxn_id][gene] = [] - residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[gene][term] - - model_or_id = self.build(model_or_id,index,allow_all_non_grp_reactions,annotate_with_rast,biomass_classic,biomass_gc) + residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[ + gene + ][term] + + model_or_id = self.build( + model_or_id, + index, + allow_all_non_grp_reactions, + annotate_with_rast, + biomass_classic, + biomass_gc, + ) for rxn in model_or_id.reactions: probability = None for gene in rxn.genes(): @@ -779,22 +790,25 @@ def build_from_annotaton_ontology( if rxn.id[0:-3] in term.msrxns: for item in gene_term_hash[gene][term]: if "probability" in item.scores: - if not probability or item.scores["probability"] > probability: + if ( + not probability + or item.scores["probability"] > probability + ): probability = item.scores["probability"] if hasattr(rxn, "probability"): - rxn.probability = probability - + rxn.probability = probability + reactions = [] modelseeddb = ModelSEEDBiochem.get() for rxn_id in residual_reaction_gene_hash: - if rxn_id+"_c0" not in model_or_id.reactions: + if rxn_id + "_c0" not in model_or_id.reactions: reaction = None template_reaction = None - if rxn_id+"_c" in self.template.reactions: - template_reaction = self.template.reactions.get_by_id(rxn_id+"_c") + if rxn_id + "_c" in self.template.reactions: + template_reaction = self.template.reactions.get_by_id(rxn_id + "_c") elif rxn_id in modelseeddb.reactions: msrxn = modelseeddb.reactions.get_by_id(rxn_id) - template_reaction = msrxn.to_template_reaction({0:"c",1:"e"}) + template_reaction = msrxn.to_template_reaction({0: "c", 1: "e"}) if template_reaction: for m in template_reaction.metabolites: if m.compartment not in self.compartments: @@ -803,15 +817,22 @@ def build_from_annotaton_ontology( ] = self.template.compartments.get_by_id(m.compartment) if m.id not in self.template_species_to_model_species: model_metabolite = m.to_metabolite(self.index) - self.template_species_to_model_species[m.id] = model_metabolite + self.template_species_to_model_species[ + m.id + ] = model_metabolite self.base_model.add_metabolites([model_metabolite]) - reaction = template_reaction.to_reaction(self.base_model, self.index) + reaction = template_reaction.to_reaction( + self.base_model, self.index + ) gpr = "" probability = None for gene in residual_reaction_gene_hash[rxn_id]: for item in residual_reaction_gene_hash[rxn_id][gene]: if "probability" in item["scores"]: - if not probability or item["scores"]["probability"] > probability: + if ( + not probability + or item["scores"]["probability"] > probability + ): probability = item["scores"]["probability"] if len(gpr) > 0: gpr += " or " @@ -822,7 +843,7 @@ def build_from_annotaton_ontology( reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" reactions.append(reaction) if not reaction: - print("Reaction ",rxn_id," not found in template or database!") + print("Reaction ", rxn_id, " not found in template or database!") model_or_id.add_reactions(reactions) return model_or_id @@ -911,7 +932,7 @@ def build( annotate_with_rast=True, biomass_classic=False, biomass_gc=0.5, - add_reaction_from_rast_annotation=True + add_reaction_from_rast_annotation=True, ): """ @@ -949,11 +970,11 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - + if add_reaction_from_rast_annotation: metabolic_reactions = self.build_metabolic_reactions() cobra_model.add_reactions(metabolic_reactions) - + non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 16634707..ba0c1704 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) logger.setLevel( - logging.INFO#WARNING + logging.INFO # WARNING ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO @@ -148,11 +148,13 @@ def prefilter(self, media, target): self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) - gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes("gf_filter", {}) + gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes( + "gf_filter", {} + ) base_filter = self.mdlutl.get_attributes("gf_filter", {}) for media_id in gf_filter: base_filter[media_id] = gf_filter[media_id] - + # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): return False @@ -176,7 +178,7 @@ def run_gapfilling( Name or expression describing the reaction or combination of reactions to the optimized minimum_obj : double Value to use for the minimal objective threshold that the model must be gapfilled to achieve - binary_check : bool + binary_check : bool Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved prefilter : bool Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling @@ -276,14 +278,14 @@ def run_multi_gapfill( Media-specific minimal objective thresholds that the model must be gapfilled to achieve default_minimum_objective : double Default value to use for the minimal objective threshold that the model must be gapfilled to achieve - binary_check : bool + binary_check : bool Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved prefilter : bool Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling check_for_growth : bool Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective """ - + if not default_minimum_objective: default_minimum_objective = self.default_minimum_objective first = True @@ -347,8 +349,8 @@ def integrate_gapfill_solution( cumulative_solution.append([rxn_id, "<"]) rxn.upper_bound = 0 rxn.lower_bound = -100 - - #Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model + + # Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model unneeded = self.mdlutl.test_solution( solution, keep_changes=True ) # Strips out unneeded reactions - which undoes some of what is done above @@ -357,11 +359,16 @@ def integrate_gapfill_solution( if item[0] == oitem[0] and item[1] == oitem[1]: cumulative_solution.remove(oitem) break - #Adding the gapfilling solution data to the model, which is needed for saving the model in KBase + # Adding the gapfilling solution data to the model, which is needed for saving the model in KBase self.mdlutl.add_gapfilling(solution) - #Testing which gapfilled reactions are needed to produce each reactant in the objective function + # Testing which gapfilled reactions are needed to produce each reactant in the objective function if link_gaps_to_objective: - logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"].id+" for target "+solution["target"]) + logger.info( + "Gapfilling sensitivity analysis running on succesful run in " + + solution["media"].id + + " for target " + + solution["target"] + ) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if solution["media"].id not in gf_sensitivity: gf_sensitivity[solution["media"].id] = {} diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index b0d3b2b6..75e356c4 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -13,6 +13,7 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + class MSGrowthPhenotype: def __init__( self, @@ -35,7 +36,7 @@ def __init__( self.additional_compounds = additional_compounds self.parent = parent - def build_media(self,include_base_media=True): + def build_media(self, include_base_media=True): """Builds media object to use when simulating the phenotype Parameters ---------- @@ -79,25 +80,30 @@ def simulate( modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) - - #Setting objective + + # Setting objective if objective: modelutl.model.objective = objective - - #Building full media and adding missing exchanges - output = {"growth": None, "class": None, "missing_transports": [], "baseline_growth": None} + + # Building full media and adding missing exchanges + output = { + "growth": None, + "class": None, + "missing_transports": [], + "baseline_growth": None, + } full_media = self.build_media() if add_missing_exchanges: output["missing_transports"] = modelutl.add_missing_exchanges(full_media) - - #Getting basline growth + + # Getting basline growth output["baseline_growth"] = 0.01 if self.parent: - output["baseline_growth"] = self.parent.baseline_growth(modelutl,objective) + output["baseline_growth"] = self.parent.baseline_growth(modelutl, objective) if output["baseline_growth"] < 1e-5: output["baseline_growth"] = 0.01 - - #Building specific media and setting compound exception list + + # Building specific media and setting compound exception list if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: reaction_exceptions = [] specific_media = self.build_media(False) @@ -105,36 +111,36 @@ def simulate( ex_hash = mediacpd.get_mdl_exchange_hash(modelutl) for mdlcpd in ex_hash: reaction_exceptions.append(ex_hash[mdlcpd]) - modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.parent.atom_limits,exception_reactions=reaction_exceptions) - - #Applying media + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package( + self.parent.atom_limits, exception_reactions=reaction_exceptions + ) + + # Applying media if self.parent: modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( full_media, self.parent.base_uptake, self.parent.base_excretion ) else: - modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( - full_media,0,1000 - ) - + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(full_media, 0, 1000) + with modelutl.model: - #Applying gene knockouts + # Applying gene knockouts for gene in self.gene_ko: if gene in modelutl.model.genes: geneobj = modelutl.model.genes.get_by_id(gene) geneobj.knock_out() - - #Optimizing model + + # Optimizing model solution = modelutl.model.optimize() output["growth"] = solution.objective_value if solution.objective_value > 0 and pfba: solution = cobra.flux_analysis.pfba(modelutl.model) if save_fluxes: output["fluxes"] = solution.fluxes - - #Determining phenotype class - - if output["growth"] >= output["baseline_growth"]*growth_multiplier: + + # Determining phenotype class + + if output["growth"] >= output["baseline_growth"] * growth_multiplier: output["GROWING"] = True if not self.growth: output["class"] = "GROWTH" @@ -172,44 +178,60 @@ def gapfill_model_for_phenotype( objective : string Expression for objective to be activated by gapfilling """ - #First simulate model without gapfilling to assess ungapfilled growth - output = self.simulate(msgapfill.mdlutl,objective,growth_multiplier,add_missing_exchanges) - if output["growth"] >= output["baseline_growth"]*growth_multiplier: - #No gapfilling needed - original model grows without gapfilling - return {"reversed": {}, "new": {},"media": self.build_media(), "target":objective, "minobjective": output["baseline_growth"]*growth_multiplier, "binary_check":False} - - #Now pulling the gapfilling configured model from MSGapfill + # First simulate model without gapfilling to assess ungapfilled growth + output = self.simulate( + msgapfill.mdlutl, objective, growth_multiplier, add_missing_exchanges + ) + if output["growth"] >= output["baseline_growth"] * growth_multiplier: + # No gapfilling needed - original model grows without gapfilling + return { + "reversed": {}, + "new": {}, + "media": self.build_media(), + "target": objective, + "minobjective": output["baseline_growth"] * growth_multiplier, + "binary_check": False, + } + + # Now pulling the gapfilling configured model from MSGapfill gfmodelutl = MSModelUtil.get(msgapfill.gfmodel) - #Saving the gapfill objective because this will be replaced when the simulation runs + # Saving the gapfill objective because this will be replaced when the simulation runs gfobj = gfmodelutl.model.objective - #Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints - output = self.simulate(modelutl,objective,growth_multiplier,add_missing_exchanges) - #If the gapfilling model fails to achieve the minimum growth, then no solution exists - if output["growth"] < output["baseline_growth"]*growth_multiplier: + # Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints + output = self.simulate( + modelutl, objective, growth_multiplier, add_missing_exchanges + ) + # If the gapfilling model fails to achieve the minimum growth, then no solution exists + if output["growth"] < output["baseline_growth"] * growth_multiplier: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) return None - - #Running the gapfilling itself + + # Running the gapfilling itself full_media = self.build_media() with modelutl.model: - #Applying gene knockouts + # Applying gene knockouts for gene in self.gene_ko: if gene in modelutl.model.genes: geneobj = modelutl.model.genes.get_by_id(gene) geneobj.knock_out() - - gfresults = self.gapfilling.run_gapfilling(media,None,minimum_obj=output["baseline_growth"]*growth_multiplier) + + gfresults = self.gapfilling.run_gapfilling( + media, None, minimum_obj=output["baseline_growth"] * growth_multiplier + ) if gfresults is None: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) - + return gfresults + class MSGrowthPhenotypes: - def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + def __init__( + self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={} + ): self.base_media = base_media self.phenotypes = DictList() self.base_uptake = base_uptake @@ -219,8 +241,16 @@ def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_at self.cached_based_growth = {} @staticmethod - def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion,global_atom_limits) + def from_compound_hash( + compounds, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) new_phenos = [] for cpd in compounds: newpheno = MSGrowthPhenotype(cpd, None, compounds[cpd], [], [cpd]) @@ -229,8 +259,17 @@ def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion= return growthpheno @staticmethod - def from_kbase_object(data, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): - growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) + def from_kbase_object( + data, + kbase_api, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) new_phenos = [] for pheno in data["phenotypes"]: media = kbase_api.get_from_ws(pheno["media_ref"], None) @@ -248,9 +287,18 @@ def from_kbase_object(data, kbase_api,base_media=None, base_uptake=0, base_excre return growthpheno @staticmethod - def from_kbase_file(filename, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + def from_kbase_file( + filename, + kbase_api, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): # TSV file with the following headers:media mediaws growth geneko addtlCpd - growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) headings = [] new_phenos = [] with open(filename) as f: @@ -282,8 +330,16 @@ def from_kbase_file(filename, kbase_api,base_media=None, base_uptake=0, base_exc return growthpheno @staticmethod - def from_ms_file(filename,base_media=None, base_uptake=0, base_excretion=100,global_atom_limits={}): - growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) + def from_ms_file( + filename, + base_media=None, + base_uptake=0, + base_excretion=100, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) df = pd.read_csv(filename) required_headers = ["Compounds", "Growth"] for item in required_headers: @@ -311,7 +367,7 @@ def build_super_media(self): else: super_media.merge(pheno.build_media(), overwrite_overlap=False) return super_media - + def add_phenotypes(self, new_phenotypes): keep_phenos = [] for pheno in new_phenotypes: @@ -321,11 +377,7 @@ def add_phenotypes(self, new_phenotypes): additions = DictList(keep_phenos) self.phenotypes += additions - def baseline_growth( - self, - model_or_mdlutl, - objective - ): + def baseline_growth(self, model_or_mdlutl, objective): """Simulates all the specified phenotype conditions and saves results Parameters ---------- @@ -336,22 +388,22 @@ def baseline_growth( modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) - #Checking if base growth already computed + # Checking if base growth already computed if modelutl in self.cached_based_growth: if objective in self.cached_based_growth[modelutl]: return self.cached_based_growth[modelutl][objective] else: self.cached_based_growth[modelutl] = {} - #Setting objective + # Setting objective modelutl.objective = objective - #Setting media + # Setting media modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( self.base_media, self.base_uptake, self.base_excretion ) - #Adding uptake limits + # Adding uptake limits if len(self.atom_limits) > 0: modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.atom_limits) - #Simulating + # Simulating self.cached_based_growth[modelutl][objective] = modelutl.model.slim_optimize() return self.cached_based_growth[modelutl][objective] @@ -364,7 +416,7 @@ def simulate_phenotypes( save_fluxes=False, gapfill_negatives=False, msgapfill=None, - test_conditions=None + test_conditions=None, ): """Simulates all the specified phenotype conditions and saves results Parameters @@ -384,14 +436,14 @@ def simulate_phenotypes( modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) - #Setting objective + # Setting objective modelutl.objective = objective - #Getting basline growth - baseline_growth = self.baseline_growth(modelutl,objective) - #Establishing output of the simulation method + # Getting basline growth + baseline_growth = self.baseline_growth(modelutl, objective) + # Establishing output of the simulation method summary = { - "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], - "Count": [0, 0, 0, 0, 0,0,0], + "Label": ["Accuracy", "CP", "CN", "FP", "FN", "Growth", "No growth"], + "Count": [0, 0, 0, 0, 0, 0, 0], } data = { "Phenotype": [], @@ -400,22 +452,24 @@ def simulate_phenotypes( "Class": [], "Transports missing": [], "Gapfilled reactions": [], - "Gapfilling score":None + "Gapfilling score": None, } - #Running simulations + # Running simulations gapfilling_solutions = {} totalcount = 0 for pheno in self.phenotypes: result = pheno.simulate( - modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes + modelutl, + objective, + growth_multiplier, + add_missing_exchanges, + save_fluxes, ) data["Class"].append(result["class"]) data["Phenotype"].append(pheno.id) data["Observed growth"].append(pheno.growth) data["Simulated growth"].append(result["growth"]) - data["Transports missing"].append( - ";".join(result["missing_transports"]) - ) + data["Transports missing"].append(";".join(result["missing_transports"])) if result["class"] == "CP": summary["Count"][1] += 1 summary["Count"][0] += 1 @@ -434,22 +488,25 @@ def simulate_phenotypes( summary["Count"][5] += 1 elif result["class"] == "NOGROWTH": summary["Count"][6] += 1 - #Gapfilling negative growth conditions - if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: - gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype(msgapfill,objective,test_conditions,growth_multiplier,add_missing_exchanges) + # Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH", "FN", "CN"]: + gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype( + msgapfill, + objective, + test_conditions, + growth_multiplier, + add_missing_exchanges, + ) if gapfilling_solutions[pheno] != None: data["Gapfilling score"] = 0 list = [] for rxn_id in gapfilling_solutions[pheno]["reversed"]: list.append( - gapfilling_solutions[pheno]["reversed"][rxn_id] - + rxn_id + gapfilling_solutions[pheno]["reversed"][rxn_id] + rxn_id ) data["Gapfilling score"] += 0.5 for rxn_id in gapfilling_solutions[pheno]["new"]: - list.append( - gapfilling_solutions[pheno]["new"][rxn_id] + rxn_id - ) + list.append(gapfilling_solutions[pheno]["new"][rxn_id] + rxn_id) data["Gapfilling score"] += 1 data["Gapfilled reactions"].append(";".join(list)) else: @@ -473,9 +530,9 @@ def fit_model_to_phenotypes( minimize_new_false_positives=True, atp_safe=True, integrate_results=True, - global_gapfilling=True + global_gapfilling=True, ): - + """Simulates all the specified phenotype conditions and saves results Parameters ---------- @@ -488,46 +545,46 @@ def fit_model_to_phenotypes( integrate_results : bool Indicates if the resulting modifications to the model should be integrated """ - - - - #Running simulations + + # Running simulations positive_growth = [] negative_growth = [] for pheno in self.phenotypes: with model: result = pheno.simulate( - modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes + modelutl, + objective, + growth_multiplier, + add_missing_exchanges, + save_fluxes, ) - #Gapfilling negative growth conditions - if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: + # Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH", "FN", "CN"]: negative_growth.append(pheno.build_media()) - elif gapfill_negatives and output["class"] in ["GROWTH","FP","CP"]: + elif gapfill_negatives and output["class"] in ["GROWTH", "FP", "CP"]: positive_growth.append(pheno.build_media()) - - - #Create super media for all + + # Create super media for all super_media = self.build_super_media() - #Adding missing exchanges + # Adding missing exchanges msgapfill.gfmodel.add_missing_exchanges(super_media) - #Adding elemental constraints + # Adding elemental constraints self.add_elemental_constraints() - #Getting ATP tests - - #Filtering database for ATP tests - - #Penalizing database to avoid creating false positives - - #Building additional tests from current correct negatives - - #Computing base-line growth - - #Computing growth threshold - - #Running global gapfill - - #Integrating solution - + # Getting ATP tests + + # Filtering database for ATP tests + + # Penalizing database to avoid creating false positives + + # Building additional tests from current correct negatives + + # Computing base-line growth + + # Computing growth threshold + + # Running global gapfill + + # Integrating solution def gapfill_all_phenotypes( self, diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index fadc435d..960e82d1 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -22,8 +22,8 @@ def maxFlux(self): def minFlux(self): # TODO: will be removed later just for old methods return -self.upper_bound - - def get_mdl_exchange_hash(self,model_or_mdlutl): + + def get_mdl_exchange_hash(self, model_or_mdlutl): modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) @@ -33,7 +33,8 @@ def get_mdl_exchange_hash(self,model_or_mdlutl): for met in mets: if met in exchange_hash: output[met] = exchange_hash[met] - return output + return output + class MSMedia: def __init__(self, media_id, name=""): diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index c5274fcd..ca83533f 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -15,58 +15,82 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + class MSModelReport: - def __init__( - self - ): + def __init__(self): pass def generate_reports(self, model, report_path, multi_tab_report_path): self.build_report(model, report_path) self.build_multitab_report(model, multi_tab_report_path) - + # Helper function to build overview data def build_overview_data(self, model): # Get the number of compartments - number_compartments = len(set([metabolite.compartment for metabolite in model.metabolites])) + number_compartments = len( + set([metabolite.compartment for metabolite in model.metabolites]) + ) # Extract gapfilling information - gapfillings_str = model.notes.get('kbase_gapfillings', '[]') + gapfillings_str = model.notes.get("kbase_gapfillings", "[]") pattern = r"\{.*?\}" gapfilling_matches = re.findall(pattern, gapfillings_str) - gapfillings = [eval(gapfilling.replace('false', 'False').replace('true', 'True').replace('null', 'None')) for gapfilling in gapfilling_matches] + gapfillings = [ + eval( + gapfilling.replace("false", "False") + .replace("true", "True") + .replace("null", "None") + ) + for gapfilling in gapfilling_matches + ] core_gapfilling_media = [] gapfilling_media = [] for gapfilling in gapfillings: - media_name = gapfilling.get('id', '').replace('ATP-', '') - target = gapfilling.get('target', '') + media_name = gapfilling.get("id", "").replace("ATP-", "") + target = gapfilling.get("target", "") if target == "rxn00062_c0": core_gapfilling_media.append(media_name) - elif target.startswith('bio'): + elif target.startswith("bio"): gapfilling_media.append(media_name) # Count the number of gapfills number_gapfills = gapfillings_str.count('"media_ref"') # Convert the lists to strings - core_gapfilling_str = "; ".join(core_gapfilling_media) if core_gapfilling_media else "No core gapfilling data found!" - gapfilling_media_str = "; ".join(gapfilling_media) if gapfilling_media else "No genome-scale gapfilling data found!" + core_gapfilling_str = ( + "; ".join(core_gapfilling_media) + if core_gapfilling_media + else "No core gapfilling data found!" + ) + gapfilling_media_str = ( + "; ".join(gapfilling_media) + if gapfilling_media + else "No genome-scale gapfilling data found!" + ) overview = { - 'Model ID': model.id, - 'Full Gapfilling and ATP Analysis Report': 'TBD', # You may replace 'TBD' with actual data when available - 'Genome Scale Template': model.notes.get('kbase_template_refs', 'Data Not Available'), - 'Core Gapfilling Media': core_gapfilling_str, - 'Gapfilling Media': gapfilling_media_str, - 'Source Genome': model.notes.get('kbase_genome_ref', 'Data Not Available'), - 'Total Number of reactions': len(model.reactions), - 'Number compounds': len(model.metabolites), - 'Number compartments': number_compartments, - 'Number biomass': len([rxn for rxn in model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629']), - 'Number gapfills': number_gapfills + "Model ID": model.id, + "Full Gapfilling and ATP Analysis Report": "TBD", # You may replace 'TBD' with actual data when available + "Genome Scale Template": model.notes.get( + "kbase_template_refs", "Data Not Available" + ), + "Core Gapfilling Media": core_gapfilling_str, + "Gapfilling Media": gapfilling_media_str, + "Source Genome": model.notes.get("kbase_genome_ref", "Data Not Available"), + "Total Number of reactions": len(model.reactions), + "Number compounds": len(model.metabolites), + "Number compartments": number_compartments, + "Number biomass": len( + [ + rxn + for rxn in model.reactions + if rxn.annotation.get("sbo") == "SBO:0000629" + ] + ), + "Number gapfills": number_gapfills, } return overview @@ -80,7 +104,9 @@ def extract_gapfilling_data(self, gf_sensitivity, model): for media, media_data in gf_sensitivity.items(): for target, target_data in media_data.items(): - for reaction_id, reaction_data in target_data.get('success', {}).items(): + for reaction_id, reaction_data in target_data.get( + "success", {} + ).items(): for direction, metabolites in reaction_data.items(): # If metabolites is None, set to empty string if metabolites is None: @@ -92,18 +118,28 @@ def extract_gapfilling_data(self, gf_sensitivity, model): if isinstance(metabolites, (list, tuple)): for met_id in metabolites: sensitivity_ids.append(met_id) - met_name = model.metabolites.get_by_id(met_id).name if met_id in model.metabolites else met_id + met_name = ( + model.metabolites.get_by_id(met_id).name + if met_id in model.metabolites + else met_id + ) sensitivity_names.append(met_name) else: - metabolites = str(metabolites) + metabolites = str(metabolites) entry = { "reaction_id": reaction_id, - "reaction_name": model.reactions.get_by_id(reaction_id).name if reaction_id in model.reactions else reaction_id, + "reaction_name": model.reactions.get_by_id(reaction_id).name + if reaction_id in model.reactions + else reaction_id, "media": media, "direction": direction, "target": target, - "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, - "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites + "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) + if sensitivity_ids + else metabolites, + "gapfilling_sensitivity_name": "; ".join(sensitivity_names) + if sensitivity_names + else metabolites, } # Update the summary dictionary @@ -122,9 +158,9 @@ def extract_gapfilling_data(self, gf_sensitivity, model): else: gapfilling_dict[reaction_id] = entry - return list(gapfilling_dict.values()), gapfilling_summary + return list(gapfilling_dict.values()), gapfilling_summary - #transform data to be used in tabular format to use in build_model_report + # transform data to be used in tabular format to use in build_model_report def transform_gapfilling_data(self, gapfilling_data): transformed_data = [] for entry in gapfilling_data: @@ -135,20 +171,23 @@ def transform_gapfilling_data(self, gapfilling_data): entry["direction"], entry["target"], entry["gapfilling_sensitivity_id"], - entry["gapfilling_sensitivity_name"] + entry["gapfilling_sensitivity_name"], ] transformed_data.append(row) return transformed_data - - + # Extract ATP analysis data def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): entries = [] - if atp_analysis and 'core_atp_gapfilling' in atp_analysis: - for media, data in atp_analysis['core_atp_gapfilling'].items(): - score = data.get('score', None) - new_reactions = ["{}: {}".format(k, v) for k, v in data.get('new', {}).items()] - reversed_reactions = ["{}: {}".format(k, v) for k, v in data.get('reversed', {}).items()] + if atp_analysis and "core_atp_gapfilling" in atp_analysis: + for media, data in atp_analysis["core_atp_gapfilling"].items(): + score = data.get("score", None) + new_reactions = [ + "{}: {}".format(k, v) for k, v in data.get("new", {}).items() + ] + reversed_reactions = [ + "{}: {}".format(k, v) for k, v in data.get("reversed", {}).items() + ] # Extracting the "Filtered Reactions" in the required format filtered_reactions = [] @@ -158,27 +197,33 @@ def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): if isinstance(sub_v, dict): for reaction, direction_dict in sub_v.items(): direction = list(direction_dict.keys())[0] - filtered_reactions.append(f"{reaction}: {direction}") + filtered_reactions.append( + f"{reaction}: {direction}" + ) filtered_reactions_str = "; ".join(filtered_reactions) if score is not None: - entries.append({ - 'media': media, - 'no_of_gapfilled_reactions': score, - 'gapfilled_reactions': "; ".join(new_reactions), - 'reversed_reaction_by_gapfilling': "; ".join(reversed_reactions), - 'filtered_reactions': filtered_reactions_str - }) + entries.append( + { + "media": media, + "no_of_gapfilled_reactions": score, + "gapfilled_reactions": "; ".join(new_reactions), + "reversed_reaction_by_gapfilling": "; ".join( + reversed_reactions + ), + "filtered_reactions": filtered_reactions_str, + } + ) # Sorting the entries based on the 'no_of_gapfilled_reactions' column - entries.sort(key=lambda x: x['no_of_gapfilled_reactions']) + entries.sort(key=lambda x: x["no_of_gapfilled_reactions"]) return entries # Extract ATP production data for the ATP Analysis tab def extract_atp_production_data(self, atp_analysis): atp_production_dict = {} if atp_analysis: - selected_media = atp_analysis.get('selected_media', {}) - core_atp_gapfilling = atp_analysis.get('core_atp_gapfilling', {}) + selected_media = atp_analysis.get("selected_media", {}) + core_atp_gapfilling = atp_analysis.get("core_atp_gapfilling", {}) # First, process selected_media for media, value in selected_media.items(): @@ -187,30 +232,36 @@ def extract_atp_production_data(self, atp_analysis): # Next, process core_atp_gapfilling for media not in selected_media for media, data in core_atp_gapfilling.items(): if media not in atp_production_dict: - if data.get('failed'): - atp_production_dict[media] = 'failed' + if data.get("failed"): + atp_production_dict[media] = "failed" else: # If the media was not processed in selected_media and it's not failed, set as 'Not Integrated' - atp_production_dict[media] = 'Not Integrated' + atp_production_dict[media] = "Not Integrated" + + return atp_production_dict - return atp_production_dict - def build_multitab_report(self, model_or_mdlutl, output_path): - + # Build overview data overview_data = self.build_overview_data(model_or_mdlutl) - + # Get gf_sensitivity attribute from the model - gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = model_or_mdlutl.attributes.get("gf_sensitivity", None) # Extract gapfilling data - gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) + gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data( + gf_sensitivity, model_or_mdlutl + ) # Check if ATP_analysis attribute is present in the model - atp_analysis = model_or_mdlutl.attributes.get('ATP_analysis', None) + atp_analysis = model_or_mdlutl.attributes.get("ATP_analysis", None) if atp_analysis: - atp_expansion_filter = model_or_mdlutl.attributes.get('atp_expansion_filter', {}) - atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + atp_expansion_filter = model_or_mdlutl.attributes.get( + "atp_expansion_filter", {} + ) + atp_analysis_entries = self.extract_atp_analysis_data( + atp_analysis, atp_expansion_filter + ) else: atp_analysis_entries = [] @@ -222,7 +273,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): "genes": [], "biomass": [], "gapfilling": gapfilling_entries, # Populated with gapfilling data - "atpanalysis": atp_analysis_entries # Populated with ATP analysis data + "atpanalysis": atp_analysis_entries, # Populated with ATP analysis data } print("Module Path:", module_path + "/../data/") @@ -230,7 +281,11 @@ def build_multitab_report(self, model_or_mdlutl, output_path): exchanges = {r.id for r in model_or_mdlutl.exchanges} # Identify biomass reactions using SBO annotation - biomass_reactions_ids = {rxn.id for rxn in model_or_mdlutl.reactions if rxn.annotation.get('sbo') == 'SBO:0000629'} + biomass_reactions_ids = { + rxn.id + for rxn in model_or_mdlutl.reactions + if rxn.annotation.get("sbo") == "SBO:0000629" + } # Reactions Tab for rxn in model_or_mdlutl.reactions: @@ -241,11 +296,12 @@ def build_multitab_report(self, model_or_mdlutl, output_path): "name": rxn.name, "equation": equation, "genes": rxn.gene_reaction_rule, - "gapfilling": "; ".join(gapfilling_reaction_summary.get(rxn.id, [])) # Empty list results in an empty string + "gapfilling": "; ".join( + gapfilling_reaction_summary.get(rxn.id, []) + ), # Empty list results in an empty string } context["reactions"].append(rxn_data) - # Compounds Tab for cpd in model_or_mdlutl.metabolites: cpd_data = { @@ -253,7 +309,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): "name": cpd.name, "formula": cpd.formula, "charge": cpd.charge, - "compartment": cpd.compartment + "compartment": cpd.compartment, } context["compounds"].append(cpd_data) @@ -261,7 +317,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): for gene in model_or_mdlutl.genes: gene_data = { "gene": gene.id, - "reactions": "; ".join([rxn.id for rxn in gene.reactions]) + "reactions": "; ".join([rxn.id for rxn in gene.reactions]), } context["genes"].append(gene_data) @@ -271,22 +327,22 @@ def build_multitab_report(self, model_or_mdlutl, output_path): biomass_rxn = model_or_mdlutl.reactions.get_by_id(biomass_rxn_id) for metabolite, coefficient in biomass_rxn.metabolites.items(): compound_id = metabolite.id - compound_name = metabolite.name.split('_')[0] - compartment = compound_id.split('_')[-1] + compound_name = metabolite.name.split("_")[0] + compartment = compound_id.split("_")[-1] biomass_data = { "biomass_reaction_id": biomass_rxn.id, "biomass_compound_id": compound_id, "name": compound_name, "coefficient": coefficient, - "compartment": compartment + "compartment": compartment, } context["biomass"].append(biomass_data) else: print("No biomass reactions found in the model.") # Gapfilling Tab - gf_sensitivity = model_or_mdlutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = model_or_mdlutl.attributes.get("gf_sensitivity", None) gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model_or_mdlutl) context["gapfilling"] = gapfilling_entries @@ -295,8 +351,8 @@ def build_multitab_report(self, model_or_mdlutl, output_path): # Populate the 'atpanalysis' context with ATP production data for entry in context["atpanalysis"]: - media = entry['media'] - entry['atp_production'] = atp_production_data.get(media, None) + media = entry["media"] + entry["atp_production"] = atp_production_data.get(media, None) # Diagnostics unique_biomass_rxns = biomass_reactions_ids @@ -322,7 +378,7 @@ def build_multitab_report(self, model_or_mdlutl, output_path): print("\nFirst 2 gapfilling entries:") for gf in context["gapfilling"][:2]: print(gf) - + print("\nFirst 2 ATP Analysis entries:") for entry in context["atpanalysis"][:2]: print(entry) @@ -330,15 +386,14 @@ def build_multitab_report(self, model_or_mdlutl, output_path): # Render with template env = jinja2.Environment( loader=jinja2.FileSystemLoader(module_path + "/../data/"), - autoescape=jinja2.select_autoescape(['html', 'xml']) + autoescape=jinja2.select_autoescape(["html", "xml"]), ) html = env.get_template("ModelReportTemplate.html").render(context) directory = dirname(output_path) os.makedirs(directory, exist_ok=True) - with open(output_path, 'w') as f: + with open(output_path, "w") as f: f.write(html) - - + def build_report(self, model, output_path): """Builds model HTML report for the Model Summary table Parameters @@ -354,40 +409,95 @@ def build_report(self, model, output_path): # 2. Transform the dictionary into a list of tuples model_summary_list = [(key, value) for key, value in model_summary_data.items()] # 3. Convert to DataFrame - model_summary_df = pd.DataFrame(model_summary_list, columns=['', '']) + model_summary_df = pd.DataFrame(model_summary_list, columns=["", ""]) # Style the DataFrame (as was done previously) - model_summary_df_styled = ( - model_summary_df.style.hide(axis="index") - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + model_summary_df_styled = model_summary_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] ) # Fetching the gapfilling sensitivity data - gf_sensitivity = model.attributes.get('gf_sensitivity', None) + gf_sensitivity = model.attributes.get("gf_sensitivity", None) gapfilling_data = self.extract_gapfilling_data(gf_sensitivity, model) gapfilling_list = self.transform_gapfilling_data(gapfilling_data[0]) # Convert the gapfilling_list to a DataFrame gapfillings_analysis_df = pd.DataFrame( - gapfilling_list, + gapfilling_list, columns=[ - "Reaction ID", "Reaction Name", "Media", "Direction", "Target", "Gapfilling Sensitivity ID", "Gapfilling Sensitivity Name"] + "Reaction ID", + "Reaction Name", + "Media", + "Direction", + "Target", + "Gapfilling Sensitivity ID", + "Gapfilling Sensitivity Name", + ], ) # Apply style to Gapfillings Analysis DataFrame - gapfillings_analysis_df_styled = ( - gapfillings_analysis_df.style.hide(axis="index") - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + gapfillings_analysis_df_styled = gapfillings_analysis_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] ) # Legend for Gapfillings Analysis @@ -409,22 +519,48 @@ def build_report(self, model, output_path): """ # Extract ATP analysis data - atp_analysis = model.attributes.get('ATP_analysis', None) - atp_expansion_filter = model.attributes.get('atp_expansion_filter', {}) - atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + atp_analysis = model.attributes.get("ATP_analysis", None) + atp_expansion_filter = model.attributes.get("atp_expansion_filter", {}) + atp_analysis_entries = self.extract_atp_analysis_data( + atp_analysis, atp_expansion_filter + ) # Convert the atp_analysis_entries list to a DataFrame atp_analysis_df = pd.DataFrame(atp_analysis_entries) # Apply style to ATP Analysis DataFrame - atp_analysis_df_styled = ( - atp_analysis_df.style.hide(axis="index") - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + atp_analysis_df_styled = atp_analysis_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] ) # Legend for ATP Analysis @@ -437,9 +573,9 @@ def build_report(self, model, output_path):
  • Reversed Reaction by Gapfilling: Reactions that have been reversed during the gapfilling process.
  • Filtered Reactions: Reactions that have been filtered out during the analysis. When a reaction addition would lead to a large increase in ATP production or an infinite energy loop, we filter that reaction out of the gapfilling database and prevent it from being added to the model.
  • - """ - - #ATP analysis explanation text + """ + + # ATP analysis explanation text explanation_text_atp_analysis = """

    During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. @@ -453,29 +589,33 @@ def build_report(self, model, output_path): In cases where is known from the literature or unpublished experimental results that an organism is capable of producing ATP in a given media condition that requires gapfilling in this analysis, you can use the parameter “Force ATP media” in the reconstruction app to ensure those reactions are integrated into the model. .

    """ - + # Save the data to HTML with the styled DataFrames and the legends directory = os.path.dirname(output_path) os.makedirs(directory, exist_ok=True) - with open(output_path, 'w', encoding='utf-8') as f: - f.write('

    Model Summary

    ') + with open(output_path, "w", encoding="utf-8") as f: + f.write("

    Model Summary

    ") f.write(model_summary_df_styled.render(escape=False)) - f.write('

    ') - f.write('

    Gapfillings Analysis

    ') + f.write("

    ") + f.write("

    Gapfillings Analysis

    ") # Check for Gapfillings Analysis data if not gapfillings_analysis_df.empty: f.write(gapfillings_analysis_df_styled.render(escape=False)) - f.write(f'

    Legend:

    {annotations_text_gapfillings}') + f.write(f"

    Legend:

    {annotations_text_gapfillings}") else: - f.write('

    Warning: No Gapfillings Analysis data available for this model.

    ') + f.write( + "

    Warning: No Gapfillings Analysis data available for this model.

    " + ) - f.write('

    Core ATP Analysis

    ') + f.write("

    Core ATP Analysis

    ") # Check for ATP Analysis data if not atp_analysis_df.empty: f.write(atp_analysis_df_styled.render(escape=False)) - f.write(f'

    Legend:

    {annotations_text_atp_analysis}') + f.write(f"

    Legend:

    {annotations_text_atp_analysis}") f.write(explanation_text_atp_analysis) else: - f.write('

    Warning: No Core ATP Analysis data available for this model.

    ') \ No newline at end of file + f.write( + "

    Warning: No Core ATP Analysis data available for this model.

    " + ) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 69d70616..610724dd 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -299,9 +299,7 @@ def save_attributes(self, value=None, key=None): else: self.attributes = value if hasattr(self.model, "computed_attributes"): - logger.info( - "Setting FBAModel computed_attributes to mdlutl attributes" - ) + logger.info("Setting FBAModel computed_attributes to mdlutl attributes") self.attributes["gene_count"] = len(self.model.genes) self.model.computed_attributes = self.attributes @@ -902,7 +900,11 @@ def binary_expansion_test(self, reaction_list, condition, currmodel, depth=0): return filtered_list def reaction_expansion_test( - self, reaction_list, condition_list, binary_search=True,attribute_label="gf_filter" + self, + reaction_list, + condition_list, + binary_search=True, + attribute_label="gf_filter", ): """Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 8348e602..1f61f7a8 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -16,31 +16,33 @@ def __init__(self, model): {"elements": "string"}, ) - def build_package(self, element_limits,exception_compounds=[],exception_reactions=[]): - #Converting exception compounds list into exception reaction list + def build_package( + self, element_limits, exception_compounds=[], exception_reactions=[] + ): + # Converting exception compounds list into exception reaction list self.parameters = { - "element_limits" : element_limits, - "exception_compounds" : exception_compounds, - "exception_reactions" : exception_reactions + "element_limits": element_limits, + "exception_compounds": exception_compounds, + "exception_reactions": exception_reactions, } exchange_hash = self.modelutl.exchange_hash() for met in exception_compounds: if met in exchange_hash: exception_reactions.append(exchange_hash[met]) - #Now building or rebuilding constraints + # Now building or rebuilding constraints for element in element_limits: if element not in self.variables["elements"]: self.build_variable(element, element_limits[element]) for element in element_limits: - #This call will first remove existing constraints then build the new constraint - self.build_constraint(element,exception_reactions) + # This call will first remove existing constraints then build the new constraint + self.build_constraint(element, exception_reactions) def build_variable(self, element, limit): return BaseFBAPkg.build_variable( self, "elements", 0, limit, "continuous", element ) - def build_constraint(self, element,exception_reactions): + def build_constraint(self, element, exception_reactions): coef = {self.variables["elements"][element]: -1} rxnlist = self.modelutl.exchange_list() for reaction in rxnlist: diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index b377547e..9dc9b315 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -40,7 +40,9 @@ def build_package( self.parameters["default_uptake"] = 0 if self.parameters["default_excretion"] is None: self.parameters["default_excretion"] = 100 - if (self.parameters["media"] and self.parameters["media"].name == "Complete") and self.parameters["default_uptake"] == 0: + if ( + self.parameters["media"] and self.parameters["media"].name == "Complete" + ) and self.parameters["default_uptake"] == 0: self.parameters["default_uptake"] = 100 # First initializing all exchanges to default uptake and excretion From 722d4dda489d134be096c3b497d27cd97891dfcc Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 14 Sep 2023 14:14:27 -0500 Subject: [PATCH 108/146] precommit --- modelseedpy/core/msmodelreport.py | 24 +++++++++++------------ modelseedpy/data/ModelReportTemplate.html | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index ca83533f..6faa728b 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -507,13 +507,13 @@ def build_report(self, model, output_path):
  • Reaction Name: The name of the reaction.
  • Media: The media used by gap filling.
  • Direction: The direction of the reaction. Can be ">" for forward, "<" for reverse, or "=" for both directions.
  • -
  • Target: The reaction selected as the objective function target for the gapfilling optimization problem. Targets here can be the model’s biomass reaction, commonly named “bio1” for models created by this app. - Alternatively, “rxn00062” (ATP Production) reaction is shown for cases where gapfilling was applied to guarantee ATP production in a given media. +
  • Target: The reaction selected as the objective function target for the gapfilling optimization problem. Targets here can be the model’s biomass reaction, commonly named “bio1” for models created by this app. + Alternatively, “rxn00062” (ATP Production) reaction is shown for cases where gapfilling was applied to guarantee ATP production in a given media. When reactions are gapfilled for ATP production, we recommend checking the full Core ATP Analysis in the table below.
  • -
  • Gapfilling Sensitivity ID and Name: Gapfilling is necessary when compounds in the biomass objective function can not be produced by the model. +
  • Gapfilling Sensitivity ID and Name: Gapfilling is necessary when compounds in the biomass objective function can not be produced by the model. For each reaction we list the biomass compound(s) that can not be synthesized by the model without gapfilling. In cases where gap filling fails there are two possible scenarios: - 1) FBF (failed before filtering) : the gapfilling immediately failed, even before we filtered out the ATP breaking reactions. This means this objective CANNOT be satisfied with the entire current database. + 1) FBF (failed before filtering) : the gapfilling immediately failed, even before we filtered out the ATP breaking reactions. This means this objective CANNOT be satisfied with the entire current database. 2) FAF (failed after filtering): the gapfilling succeeded before filtering, but failed after filtering out reactions that break ATP. This tells you definitively if the ATP filtering caused the gapfilling to fail
  • """ @@ -577,15 +577,15 @@ def build_report(self, model, output_path): # ATP analysis explanation text explanation_text_atp_analysis = """ -

    During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. - The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. - This step is designed to prevent gapfilling from introducing reactions that create energy-generating loops. +

    During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. + The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. + This step is designed to prevent gapfilling from introducing reactions that create energy-generating loops. The tests are conducted on a large collection of minimal conditions, with the goal of simulating the model’s capability to produce energy with different electron donor, electron acceptor, and carbon source combinations.

    -

    When the draft model of the core metabolism is capable of producing ATP in at least one of the test media, no gapfilling reactions part of this analysis will be added to the model. While we still report the gapfilling requirements for the test media formulations that fail to produce ATP with that draft core model, we only integrate these solutions in the model when no test media succeeds in producing ATP. - In this case, the integrated gap-filling solution(s) will be displayed in the “Gapfilling Analysis” table above, with the “Target” “rxn00062” (ATP Production) objective function.

    -

    The goal is to display the test results for all media to provide clues for the metabolic capabilities of the genome(s). When many reactions are required for growth on the SO4 testing media conditions, this could be a good indicator that the organism is not capable of performing sulfate reduction. - On the other hand, when only one gapfill reaction is required for ATP production in a given media, multiple scenarios can be considered. - 1) Organism(s) can’t grow on test condition, and we correctly did not add the reaction to the model. 2) Possible issue with the source genome annotation missing a specific gene function 3) Possible issue with the model reconstruction database. We hope this data helps make more informed decisions on reactions that may need to be manually curated in the model. +

    When the draft model of the core metabolism is capable of producing ATP in at least one of the test media, no gapfilling reactions part of this analysis will be added to the model. While we still report the gapfilling requirements for the test media formulations that fail to produce ATP with that draft core model, we only integrate these solutions in the model when no test media succeeds in producing ATP. + In this case, the integrated gap-filling solution(s) will be displayed in the “Gapfilling Analysis” table above, with the “Target” “rxn00062” (ATP Production) objective function.

    +

    The goal is to display the test results for all media to provide clues for the metabolic capabilities of the genome(s). When many reactions are required for growth on the SO4 testing media conditions, this could be a good indicator that the organism is not capable of performing sulfate reduction. + On the other hand, when only one gapfill reaction is required for ATP production in a given media, multiple scenarios can be considered. + 1) Organism(s) can’t grow on test condition, and we correctly did not add the reaction to the model. 2) Possible issue with the source genome annotation missing a specific gene function 3) Possible issue with the model reconstruction database. We hope this data helps make more informed decisions on reactions that may need to be manually curated in the model. In cases where is known from the literature or unpublished experimental results that an organism is capable of producing ATP in a given media condition that requires gapfilling in this analysis, you can use the parameter “Force ATP media” in the reconstruction app to ensure those reactions are integrated into the model. .

    """ diff --git a/modelseedpy/data/ModelReportTemplate.html b/modelseedpy/data/ModelReportTemplate.html index c382c8fc..cab60a0b 100644 --- a/modelseedpy/data/ModelReportTemplate.html +++ b/modelseedpy/data/ModelReportTemplate.html @@ -346,4 +346,4 @@
    - \ No newline at end of file + From 6151d17dbdb0124502649cb2f1f05deca6d34f6d Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 14 Sep 2023 14:50:36 -0500 Subject: [PATCH 109/146] ignore examples --- .github/workflows/pre-commit.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 87de0099..6b54b4a0 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -3,6 +3,8 @@ name: Run Pre-Commit on: pull_request: {} push: + paths-ignore: + - 'examples/**' branches: - dev - main From 06e05f3d470e8a64707c1f36772134285561aab4 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 14 Sep 2023 15:16:58 -0500 Subject: [PATCH 110/146] no examples --- .pre-commit-config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04cde634..325706ab 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,9 @@ repos: args: - --pytest-test-first - id: check-json + exclude: examples/ - id: pretty-format-json + exclude: examples/ args: - --autofix - --top-keys=_id From 9bcc3b425858a103e2f72ce38b3935771a0ecd80 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 15 Sep 2023 16:28:52 -0500 Subject: [PATCH 111/146] version bump and atpcorrection media id check --- .../Model Reconstruction/ATPGapfilling.ipynb | 362 ++++++++++++++++-- modelseedpy/__init__.py | 2 +- modelseedpy/core/msatpcorrection.py | 10 +- setup.py | 2 +- tests/core/test_msatpcorreption.py | 33 +- 5 files changed, 365 insertions(+), 44 deletions(-) diff --git a/examples/Model Reconstruction/ATPGapfilling.ipynb b/examples/Model Reconstruction/ATPGapfilling.ipynb index f0116989..d236d609 100644 --- a/examples/Model Reconstruction/ATPGapfilling.ipynb +++ b/examples/Model Reconstruction/ATPGapfilling.ipynb @@ -526,7 +526,13 @@ "cell_type": "code", "execution_count": 60, "id": "6ade9096-f3f4-40f8-a1ea-53b5b63ec2c0", - "metadata": {}, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, "outputs": [ { "name": "stderr", @@ -1174,123 +1180,417 @@ }, { "cell_type": "code", - "execution_count": 67, - "id": "7aba6de8-9252-4980-95b0-bd1a72db2e05", + "execution_count": 1, + "id": "e24d8e82-357a-4658-9362-6073f502b6bc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "modelseedpy 0.2.2\n" + ] + } + ], + "source": [ + "import modelseedpy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1080bc7b-58c2-4105-91a2-2defaa8a1c92", "metadata": {}, "outputs": [], "source": [ - "atp_correction.apply_growth_media_gapfilling()" + "%run /home/fliu/workspace/python3/ModelSEEDpy/tests/core/test_msatpcorreption.py" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3ee9a1dd-9b8c-4204-b846-609cecebffc7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model(ko):\n", + " def _method(ko=ko, added_compounds=None, added_reactions=None):\n", + " if ko is None:\n", + " ko = []\n", + " with open(\n", + " '/home/fliu/workspace/python3/ModelSEEDpy/tests/test_data/e_coli_core.json',\n", + " \"r\",\n", + " ) as fh:\n", + " model_json = json.load(fh)\n", + " model_json[\"compartments\"] = {\n", + " k + \"0\": v for (k, v) in model_json[\"compartments\"].items()\n", + " }\n", + " metabolites = {}\n", + " for m in model_json[\"metabolites\"]:\n", + " m[\"id\"] += \"0\"\n", + " m[\"compartment\"] += \"0\"\n", + " metabolites[m[\"id\"]] = m\n", + " for r in model_json[\"reactions\"]:\n", + " r[\"metabolites\"] = {i + \"0\": v for (i, v) in r[\"metabolites\"].items()}\n", + " compartments = set(\n", + " [metabolites[k][\"compartment\"] for k in r[\"metabolites\"].keys()]\n", + " )\n", + " if r[\"id\"].endswith(\"_e\"):\n", + " r[\"id\"] += \"0\"\n", + " elif len(compartments) == 1:\n", + " r[\"id\"] += \"_\" + list(compartments)[0]\n", + " else:\n", + " r[\"id\"] += (\n", + " \"_\" + \"c0\"\n", + " ) # hack cause there is only combo between e0 and c0\n", + "\n", + " model_json[\"reactions\"] = [\n", + " x for x in model_json[\"reactions\"] if x[\"id\"] not in ko\n", + " ]\n", + "\n", + " if added_compounds:\n", + " for o in added_compounds:\n", + " model_json[\"metabolites\"].append(o)\n", + " if added_reactions:\n", + " for o in added_reactions:\n", + " model_json[\"reactions\"].append(o)\n", + " model = cobra.io.from_json(json.dumps(model_json))\n", + " model.reactions.ATPM_c0.lower_bound = 0\n", + " model.reactions.ATPM_c0.upper_bound = 1000\n", + " return model\n", + "\n", + " return _method(ko)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "e8107ba2-f470-4e05-8b80-731fc00febe7", + "execution_count": 45, + "id": "928bb140-9110-4a1a-b750-dbd9d6a2acc6", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "95db6e6f-bedc-4c0d-9e73-c6eec5365c16", + "metadata": {}, + "outputs": [], + "source": [ + "model = get_model([\"NADH16_c0\", \"CYTBD_c0\", \"O2t_c0\", \"GLCpts_c0\"])\n", + "with open('/home/fliu/workspace/python3/ModelSEEDpy/tests/test_data/template_core_bigg.json', 'r') as fh:\n", + " template = MSTemplateBuilder.from_dict(json.load(fh)).build()\n", + "media_glucose_aerobic = MSMedia.from_dict(\n", + " {\n", + " \"glc__D\": (-1, 1000),\n", + " \"o2\": (-1000, 1000),\n", + " \"h\": (-1000, 1000),\n", + " \"h2o\": (-1000, 1000),\n", + " }\n", + " )\n", + "media_glucose_aerobic.id = 'glc/o2'\n", + "media_acetate_aerobic = MSMedia.from_dict(\n", + " {\n", + " \"ac\": (-1, 1000),\n", + " \"o2\": (-1000, 1000),\n", + " \"h\": (-1000, 1000),\n", + " \"h2o\": (-1000, 1000),\n", + " }\n", + " )\n", + "media_acetate_aerobic.id = 'ac/o2'\n", + "medias = [media_glucose_aerobic, media_acetate_aerobic]" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "8fdc8faf-fcc8-45cd-b775-e6bc143a42cc", + "metadata": {}, + "outputs": [], + "source": [ + "%run /home/fliu/workspace/python3/ModelSEEDpy/modelseedpy/core/msatpcorrection.py\n", + "atp_correction = MSATPCorrection(\n", + " model,\n", + " template,\n", + " medias,\n", + " atp_hydrolysis_id=\"ATPM_c0\",\n", + " load_default_medias=False,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "fc07b43d-88f5-477c-9149-28756a5cd926", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0" + "[[, 0.01],\n", + " [, 0.01]]" ] }, - "execution_count": 18, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.max_gapfilling" + "atp_correction.atp_medias" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "1af1e574-76b2-40f7-82f8-4ffd1bb2c442", + "execution_count": 99, + "id": "369ef2d4-f696-4762-9370-d91276e3b95f", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    Namee_coli_core
    Memory address7ff258653370
    Number of metabolites72
    Number of reactions91
    Number of genes137
    Number of groups0
    Objective expression1.0*BIOMASS_Ecoli_core_w_GAM_c0 - 1.0*BIOMASS_Ecoli_core_w_GAM_c0_reverse_70c47
    Compartmentsextracellular space, cytosol
    " + ], "text/plain": [ - "0" + "" ] }, - "execution_count": 19, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_delta" + "model" ] }, { "cell_type": "code", - "execution_count": 43, - "id": "0a344084-edad-456f-9e88-064a404039d4", + "execution_count": 100, + "id": "62862b90-d73b-4597-8e3f-c8bf55e9090e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "{'glc/o2': 0.0, 'ac/o2': 0.0}" ] }, - "execution_count": 43, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_tests" + "atp_correction.evaluate_growth_media()" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "9e78779d-b7e7-4e73-a77c-9813bee3c6a9", + "execution_count": 101, + "id": "e67db875-e06f-464c-b96c-8e4ce7eb6324", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "{: {'reversed': {},\n", + " 'new': {'GLCpts_c0': '>'},\n", + " 'media': ,\n", + " 'target': 'ATPM_c0',\n", + " 'minobjective': 0.01,\n", + " 'binary_check': False},\n", + " : {'reversed': {},\n", + " 'new': {'CYTBD_c0': '>', 'NADH16_c0': '>', 'O2t_c0': '>'},\n", + " 'media': ,\n", + " 'target': 'ATPM_c0',\n", + " 'minobjective': 0.01,\n", + " 'binary_check': False}}" ] }, - "execution_count": 44, + "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_tests" + "atp_correction.media_gapfill_stats" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "47da598f-b3cd-423d-93eb-0e68f11eaef9", + "metadata": {}, + "outputs": [], + "source": [ + "atp_correction.determine_growth_media()" ] }, { "cell_type": "code", - "execution_count": 68, - "id": "669e1ddb-493b-461e-bef9-d19cb1f5e542", + "execution_count": 105, + "id": "42673388-2500-4922-83b9-3e4dfa7acb17", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "'glc/o2'" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "atp_correction.selected_media[0].id" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "c0e29cc8-85d5-450e-a3d6-c1207d297963", + "metadata": {}, + "outputs": [], + "source": [ + "atp_correction.apply_growth_media_gapfilling()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "43f29d4f-30b3-452f-a5f9-49489b97d646", + "metadata": {}, + "outputs": [], + "source": [ + "media_eval = atp_correction.evaluate_growth_media()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "f8044fd4-70f1-4082-9316-e601ac06ac7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'glc/o2': 2.75, 'ac/o2': 0.0}" ] }, - "execution_count": 68, + "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_tests" + "media_eval" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "db1e8df2-4a86-408b-a479-5eebf13e9971", + "metadata": {}, + "outputs": [], + "source": [ + "atp_correction.expand_model_to_genome_scale()" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "d76dcb54-1ea2-4e53-8853-521790cd8300", + "metadata": {}, + "outputs": [], + "source": [ + "tests = atp_correction.build_tests()" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "f30e70fa-5258-42fd-b624-aafdce509b80", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "empty {'media': , 'is_max_threshold': True, 'threshold': 1e-05, 'objective': 'ATPM_c0'}\n", + "glc/o2 {'media': , 'is_max_threshold': True, 'threshold': 3.3, 'objective': 'ATPM_c0'}\n" + ] + } + ], + "source": [ + "for t in tests:\n", + " print(t['media'].id, t)" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "c35d3047-da1f-4331-a907-765c2b43048d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'media': ,\n", + " 'is_max_threshold': True,\n", + " 'threshold': 1e-05,\n", + " 'objective': 'ATPM_c0'},\n", + " {'media': ,\n", + " 'is_max_threshold': True,\n", + " 'threshold': 3.3,\n", + " 'objective': 'ATPM_c0'}]" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tests" ] }, { "cell_type": "code", "execution_count": null, - "id": "e24d8e82-357a-4658-9362-6073f502b6bc", + "id": "7b718e1d-059d-410b-bf1a-05a734f09e0d", "metadata": {}, "outputs": [], "source": [] @@ -1298,7 +1598,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 75c94ad8..88c0ae64 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -14,7 +14,7 @@ __author__ = "Christopher Henry" __email__ = "chenry@anl.gov" -__version__ = "0.2.2" +__version__ = "0.3.3" logger = logging.getLogger(__name__) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index d3321d6a..0007cc96 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -108,10 +108,18 @@ def __init__( self.atp_medias = [] if load_default_medias: self.load_default_medias() + + media_ids = set() for media in atp_medias: if isinstance(media, list): + if media[0].id in media_ids: + raise ValueError('media ids not unique') + media_ids.add(media[0].id) self.atp_medias.append(media) else: + if media.id in media_ids: + raise ValueError('media ids not unique') + media_ids.add(media.id) self.atp_medias.append([media, 0.01]) self.media_hash[media.id] = media if "empty" not in self.media_hash: @@ -290,6 +298,7 @@ def evaluate_growth_media(self): media_list = [] min_objectives = {} for media, minimum_obj in self.atp_medias: + logger.debug("evaluate media %s", media) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) logger.debug("model.medium %s", self.model.medium) @@ -300,7 +309,6 @@ def evaluate_growth_media(self): solution.objective_value, solution.status, ) - self.media_gapfill_stats[media] = None output[media.id] = solution.objective_value diff --git a/setup.py b/setup.py index a7555b97..2fb97221 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="ModelSEEDpy", - version="0.3.1", + version="0.3.3", description="Python package for building and analyzing models using ModelSEED", long_description_content_type="text/x-rst", long_description=readme, diff --git a/tests/core/test_msatpcorreption.py b/tests/core/test_msatpcorreption.py index 3d036193..8ee5dceb 100644 --- a/tests/core/test_msatpcorreption.py +++ b/tests/core/test_msatpcorreption.py @@ -108,7 +108,7 @@ def media_acetate_aerobic(): "h2o": (-1000, 1000), } ) - media.id = "glc/o2" + media.id = "ac/o2" return media @@ -205,12 +205,13 @@ def test_infinite_atp_model_growth_boost( def test_ms_atp_correction1(get_model, template, media_all_aerobic): + atp_hydrolysis_id = 'ATPM_c0' model = get_model(["GLCpts_c0", "NADH16_c0", "CYTBD_c0", "O2t_c0"]) atp_correction = MSATPCorrection( model, template, media_all_aerobic, - atp_hydrolysis_id="ATPM_c0", + atp_hydrolysis_id=atp_hydrolysis_id, load_default_medias=False, ) atp_correction.evaluate_growth_media() @@ -234,9 +235,14 @@ def test_ms_atp_correction1(get_model, template, media_all_aerobic): tests = atp_correction.build_tests() assert tests - assert len(tests) == 1 - assert tests[0]["threshold"] > 0 - assert tests[0]["objective"] == "ATPM_c0" + assert len(tests) == 2 # glucose and empty + for t in tests: + if t['media'].id == 'empty': + assert t["threshold"] <= 1e-05 + else: + assert t["threshold"] > 1e-05 + assert t["objective"] == atp_hydrolysis_id + assert t["is_max_threshold"] is True def test_ms_atp_correction_and_gap_fill1( @@ -248,24 +254,31 @@ def test_ms_atp_correction_and_gap_fill1( ): from modelseedpy import MSGapfill + atp_hydrolysis_id = 'ATPM_c0' + model = get_model_with_infinite_atp_loop(["GLCpts_c0", "GLUSy_c0", "GLUDy_c0"]) model.reactions.ATPM_c0.lower_bound = 0 model.reactions.ATPM_c0.upper_bound = 1000 - model.objective = "ATPM_c0" + model.objective = atp_hydrolysis_id atp_correction = MSATPCorrection( model, template, [media_glucose_aerobic], - atp_hydrolysis_id="ATPM_c0", + atp_hydrolysis_id=atp_hydrolysis_id, load_default_medias=False, ) tests = atp_correction.run_atp_correction() # expected tests = [{'media': MSMedia object, 'is_max_threshold': True, 'threshold': 21.0, 'objective': 'ATPM_c0'}] assert tests - assert len(tests) == 1 - assert tests[0]["threshold"] > 0 - assert tests[0]["objective"] == "ATPM_c0" + assert len(tests) == 2 + for t in tests: + if t['media'].id == 'empty': + assert t["threshold"] <= 1e-05 + else: + assert t["threshold"] > 1e-05 + assert t["objective"] == atp_hydrolysis_id + assert t["is_max_threshold"] is True model.objective = "BIOMASS_Ecoli_core_w_GAM_c0" gap_fill = MSGapfill(model, [template_genome_scale], [], tests, {}, []) From a7de5496e575761ffb54414e6d8ba319ce9d5735 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 15 Sep 2023 16:30:37 -0500 Subject: [PATCH 112/146] black --- modelseedpy/core/msatpcorrection.py | 4 ++-- tests/core/test_msatpcorreption.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 0007cc96..083fc719 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -113,12 +113,12 @@ def __init__( for media in atp_medias: if isinstance(media, list): if media[0].id in media_ids: - raise ValueError('media ids not unique') + raise ValueError("media ids not unique") media_ids.add(media[0].id) self.atp_medias.append(media) else: if media.id in media_ids: - raise ValueError('media ids not unique') + raise ValueError("media ids not unique") media_ids.add(media.id) self.atp_medias.append([media, 0.01]) self.media_hash[media.id] = media diff --git a/tests/core/test_msatpcorreption.py b/tests/core/test_msatpcorreption.py index 8ee5dceb..a60d33ec 100644 --- a/tests/core/test_msatpcorreption.py +++ b/tests/core/test_msatpcorreption.py @@ -205,7 +205,7 @@ def test_infinite_atp_model_growth_boost( def test_ms_atp_correction1(get_model, template, media_all_aerobic): - atp_hydrolysis_id = 'ATPM_c0' + atp_hydrolysis_id = "ATPM_c0" model = get_model(["GLCpts_c0", "NADH16_c0", "CYTBD_c0", "O2t_c0"]) atp_correction = MSATPCorrection( model, @@ -237,7 +237,7 @@ def test_ms_atp_correction1(get_model, template, media_all_aerobic): assert tests assert len(tests) == 2 # glucose and empty for t in tests: - if t['media'].id == 'empty': + if t["media"].id == "empty": assert t["threshold"] <= 1e-05 else: assert t["threshold"] > 1e-05 @@ -254,7 +254,7 @@ def test_ms_atp_correction_and_gap_fill1( ): from modelseedpy import MSGapfill - atp_hydrolysis_id = 'ATPM_c0' + atp_hydrolysis_id = "ATPM_c0" model = get_model_with_infinite_atp_loop(["GLCpts_c0", "GLUSy_c0", "GLUDy_c0"]) model.reactions.ATPM_c0.lower_bound = 0 @@ -273,7 +273,7 @@ def test_ms_atp_correction_and_gap_fill1( assert tests assert len(tests) == 2 for t in tests: - if t['media'].id == 'empty': + if t["media"].id == "empty": assert t["threshold"] <= 1e-05 else: assert t["threshold"] > 1e-05 From 640118c1e2cfa676006f626687414681a3ad23f4 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 21 Sep 2023 10:17:33 -0500 Subject: [PATCH 113/146] Adding function template for gapfilling function from PNNL team --- modelseedpy/core/msgapfill.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 16634707..39de2e3e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -375,6 +375,27 @@ def integrate_gapfill_solution( self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) + def compute_reaction_weights_from_expression_data( + self, omics_data, conditions=[] + ): + """Computing reaction weights based on input gene-level omics data + Parameters + ---------- + omics_data : pandas dataframe with genes as rows and conditions as columns + Specifies the reactions to be added to the model to implement the gapfilling solution + conditions : list + Optional array containing the IDs of the columns in omics_data from which data should be used. + If an empty array (or no array) is supplied, data from all columns will be used. When multiple columns are + used, the data from those columns should be normalized first, then added together + """ + #Validitions: + #1.) An conditions listed in the conditions argument should match the columns in the omics_data dataframe + #2.) Most (~80%) of the genes in the model should match genes in the omics_data dataframe + #3.) The omics_data dataframe should have at least 2 columns + #4.) The omics_data dataframe should have at least 2 rows + #5.) Logging should be used to report out which genes in the model don't match any genes in the omics_data dataframe + pass + @staticmethod def gapfill( model, From 7debbb4de5e37322a22912e30495ec3795cfc2de Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 22 Sep 2023 23:48:07 -0500 Subject: [PATCH 114/146] Fixing bug in model reconstruction --- modelseedpy/core/msmodelutl.py | 53 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 785c4f9c..19b3497a 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -1046,33 +1046,34 @@ def find_unproducible_biomass_compounds(self, target_rxn="bio1", ko_list=None): output = {} for item in ko_list: logger.debug("KO:" + item[0] + item[1]) - rxnobj = tempmodel.reactions.get_by_id(item[0]) - if item[1] == ">": - original_bound = rxnobj.upper_bound - rxnobj.upper_bound = 0 - if item[0] not in output: - output[item[0]] = {} - output[item[0]][item[1]] = self.run_biomass_dependency_test( - target_rxn_obj, - tempmodel, - original_objective, - min_flex_obj, - rxn_list, - ) - rxnobj.upper_bound = original_bound + if item[0] not in output: + output[item[0]] = {} + if item[0] in tempmodel.reactions: + rxnobj = tempmodel.reactions.get_by_id(item[0]) + if item[1] == ">": + original_bound = rxnobj.upper_bound + rxnobj.upper_bound = 0 + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.upper_bound = original_bound + else: + original_bound = rxnobj.lower_bound + rxnobj.lower_bound = 0 + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.lower_bound = original_bound else: - original_bound = rxnobj.lower_bound - rxnobj.lower_bound = 0 - if item[0] not in output: - output[item[0]] = {} - output[item[0]][item[1]] = self.run_biomass_dependency_test( - target_rxn_obj, - tempmodel, - original_objective, - min_flex_obj, - rxn_list, - ) - rxnobj.lower_bound = original_bound + output[item[0]][item[1]] = [] return output def run_biomass_dependency_test( From 144946445649f60d457fc494d4d19327f2f64183 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 23 Sep 2023 00:01:09 -0500 Subject: [PATCH 115/146] Running black --- modelseedpy/__init__.py | 2 +- modelseedpy/core/annotationontology.py | 288 +++++++++++------ modelseedpy/core/msatpcorrection.py | 50 +-- modelseedpy/core/msbuilder.py | 89 ++++-- modelseedpy/core/msgrowthphenotypes.py | 287 ++++++++++------- modelseedpy/core/msmedia.py | 7 +- modelseedpy/core/msmodelreport.py | 409 +++++++++++++++++-------- modelseedpy/core/msmodelutl.py | 10 +- modelseedpy/fbapkg/elementuptakepkg.py | 20 +- modelseedpy/fbapkg/kbasemediapkg.py | 4 +- 10 files changed, 745 insertions(+), 421 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index efd16995..6409bd1f 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -46,7 +46,7 @@ MSGapfill, MSEquation, MSModelReport, - AnnotationOntology + AnnotationOntology, ) from modelseedpy.core.exceptions import * diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py index 4750ed13..db64a981 100644 --- a/modelseedpy/core/annotationontology.py +++ b/modelseedpy/core/annotationontology.py @@ -15,34 +15,49 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO -#Class structure -#AnnotationOntology -> Features/Events/Terms/Ontologies +# Class structure +# AnnotationOntology -> Features/Events/Terms/Ontologies # AnnotationOntologyOntology -> Events/Terms # AnnotationOntologyEvent -> Features/Ontology # AnnotationOntologyFeature -> Term+Event->Evidence # AnnotationOntologyTerm -> Ontology/Events/Featurs # AnnotationOntologyEvidence -> -- -allowable_score_types = ["probability","evalue","bitscore","identity","qalignstart","qalignstop","salignstart","salignstop","kmerhits","tmscore","rmsd","hmmscore"] +allowable_score_types = [ + "probability", + "evalue", + "bitscore", + "identity", + "qalignstart", + "qalignstop", + "salignstart", + "salignstop", + "kmerhits", + "tmscore", + "rmsd", + "hmmscore", +] + class AnnotationOntologyEvidence: - def __init__(self,scores={},ref_entity=None,entity_type=None): - self.ref_entity=ref_entity - self.entity_type=entity_type - self.scores=scores + def __init__(self, scores={}, ref_entity=None, entity_type=None): + self.ref_entity = ref_entity + self.entity_type = entity_type + self.scores = scores for item in self.scores: if item not in allowable_score_types: - logger.warning(item+" not an allowable score type!") - + logger.warning(item + " not an allowable score type!") + def to_data(self): return { - "ref_entity":self.ref_entity, - "entity_type":self.entity_type, - "scores":self.scores + "ref_entity": self.ref_entity, + "entity_type": self.entity_type, + "scores": self.scores, } - + + class AnnotationOntologyTerm: - def __init__(self,parent,term_id,ontology): + def __init__(self, parent, term_id, ontology): self.id = term_id self.parent = parent self.ontology = ontology @@ -51,126 +66,170 @@ def __init__(self,parent,term_id,ontology): self.msrxns = set() self.events = {} self.features = {} - - def add_msrxns(self,rxn_ids): + + def add_msrxns(self, rxn_ids): for rxn_id in rxn_ids: if rxn_id[0:6] == "MSRXN:": - rxn_id = rxn_id[6:] - self.msrxns.update([rxn_id]) - - def add_event(self,event): + rxn_id = rxn_id[6:] + self.msrxns.update([rxn_id]) + + def add_event(self, event): self.events[event.id] = event - - def add_feature(self,feature): + + def add_feature(self, feature): self.features[feature.id] = feature - + + class AnnotationOntologyOntology: - def __init__(self,parent,ontology_id): + def __init__(self, parent, ontology_id): self.id = ontology_id self.parent = parent self.events = {} self.terms = {} - - def add_event(self,event): + + def add_event(self, event): self.events[event.id] = event - - def add_term(self,term): + + def add_term(self, term): self.terms[term.id] = term + class AnnotationOntologyFeature: - def __init__(self,parent,feature_id,type=None): + def __init__(self, parent, feature_id, type=None): self.id = feature_id self.parent = parent parent.add_feature(self) self.type = type self.event_terms = {} self.term_events = {} - - def add_event_term(self,event,term,scores={},ref_entity=None,entity_type=None): + + def add_event_term(self, event, term, scores={}, ref_entity=None, entity_type=None): if event.id not in self.event_terms: self.event_terms[event.id] = {} - self.event_terms[event.id][term.id] = AnnotationOntologyEvidence(scores,ref_entity,entity_type) + self.event_terms[event.id][term.id] = AnnotationOntologyEvidence( + scores, ref_entity, entity_type + ) if term.id not in self.term_events: self.term_events[term.id] = {} self.term_events[term.id][event.id] = self.event_terms[event.id][term.id] - - def get_associated_terms(self,prioritized_event_list=None,ontologies=None,merge_all=False,translate_to_rast=False): + + def get_associated_terms( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + translate_to_rast=False, + ): output = {} for term_id in self.term_events: term = self.parent.terms[term_id] if not ontologies or term.ontology.id in ontologies: if merge_all or not prioritized_event_list: for event_id in self.term_events[term_id]: - if not prioritized_event_list or event_id in prioritized_event_list: + if ( + not prioritized_event_list + or event_id in prioritized_event_list + ): if term not in output: output[term] = [] - output[term].append(self.term_events[term_id][event_id].to_data()) + output[term].append( + self.term_events[term_id][event_id].to_data() + ) else: for event_id in prioritized_event_list: if event_id in self.term_events[term_id]: - rxns = self.parent.terms[term_id].msrxns; + rxns = self.parent.terms[term_id].msrxns if len(rxns) > 0: if term not in output: output[term] = [] - output[term].append(self.term_events[term_id][event_id].to_data()) + output[term].append( + self.term_events[term_id][event_id].to_data() + ) break return output - - def get_associated_reactions(self,prioritized_event_list=None,ontologies=None,merge_all=False): + + def get_associated_reactions( + self, prioritized_event_list=None, ontologies=None, merge_all=False + ): output = {} for term_id in self.term_events: if not ontologies or self.parent.terms[term_id].ontology.id in ontologies: if merge_all or not prioritized_event_list: for event_id in self.term_events[term_id]: - if not prioritized_event_list or event_id in prioritized_event_list: - rxns = self.parent.terms[term_id].msrxns; + if ( + not prioritized_event_list + or event_id in prioritized_event_list + ): + rxns = self.parent.terms[term_id].msrxns for rxn_id in rxns: if rxn_id not in output: output[rxn_id] = [] - output[rxn_id].append(self.term_events[term_id][event_id].to_data()) + output[rxn_id].append( + self.term_events[term_id][event_id].to_data() + ) else: for event_id in prioritized_event_list: if event_id in self.term_events[term_id]: - rxns = self.parent.terms[term_id].msrxns; + rxns = self.parent.terms[term_id].msrxns for rxn_id in rxns: if rxn_id not in output: output[rxn_id] = [] - output[rxn_id].append(self.term_events[term_id][event_id].to_data()) + output[rxn_id].append( + self.term_events[term_id][event_id].to_data() + ) if len(rxns) > 0: break return output - + + class AnnotationOntologyEvent: - def __init__(self,parent,event_id,ontology_id,method,method_version=None,description=None,timestamp=None): + def __init__( + self, + parent, + event_id, + ontology_id, + method, + method_version=None, + description=None, + timestamp=None, + ): self.id = event_id self.parent = parent - #Linking ontology + # Linking ontology self.ontology = self.parent.add_ontology(ontology_id) self.ontology.add_event(self) if not description: - self.description = ""#TODO + self.description = "" # TODO else: self.description = description self.method = method self.method_version = method_version self.timestamp = timestamp self.features = {} - + @staticmethod - def from_data(data,parent): + def from_data(data, parent): if "method_version" not in data: data["method_version"] = None if "description" not in data: data["description"] = None if "timestamp" not in data: - data["timestamp"] = None - self = AnnotationOntologyEvent(parent,data["event_id"],data["ontology_id"],data["method"],data["method_version"],data["description"],data["timestamp"]) + data["timestamp"] = None + self = AnnotationOntologyEvent( + parent, + data["event_id"], + data["ontology_id"], + data["method"], + data["method_version"], + data["description"], + data["timestamp"], + ) if "ontology_terms" in data: for feature_id in data["ontology_terms"]: feature = self.parent.add_feature(feature_id) self.add_feature(feature) for item in data["ontology_terms"][feature_id]: - term = self.parent.add_term(item["term"],self.ontology) + term = self.parent.add_term(item["term"], self.ontology) scores = {} ref_entity = None entity_type = None @@ -180,43 +239,42 @@ def from_data(data,parent): if "reference" in item["evidence"]: ref_entity = item["evidence"]["reference"][1] entity_type = item["evidence"]["reference"][0] - feature.add_event_term(self,term,scores,ref_entity,entity_type) + feature.add_event_term(self, term, scores, ref_entity, entity_type) if "modelseed_ids" in item: term.add_msrxns(item["modelseed_ids"]) return self - - def add_feature(self,feature): + + def add_feature(self, feature): self.features[feature.id] = feature - + def to_data(self): data = { - "event_id" : self.event_id, - "description" : self.event_id, - "ontology_id" : self.ontology_id, - "method" : self.method, - "method_version" : self.method_version, - "timestamp" : self.timestamp, - "ontology_terms" : {} + "event_id": self.event_id, + "description": self.event_id, + "ontology_id": self.ontology_id, + "method": self.method, + "method_version": self.method_version, + "timestamp": self.timestamp, + "ontology_terms": {}, } for feature in self.features: - data["ontology_terms"][feature] = { - "term":None#TODO - } - + data["ontology_terms"][feature] = {"term": None} # TODO + + class AnnotationOntology: mdlutls = {} @staticmethod - def from_kbase_data(data,genome_ref=None,data_dir=None): - self = AnnotationOntology(genome_ref,data_dir) + def from_kbase_data(data, genome_ref=None, data_dir=None): + self = AnnotationOntology(genome_ref, data_dir) if "feature_types" in data: self.feature_types = data["feature_types"] if "events" in data: for event in data["events"]: - self.events += [AnnotationOntologyEvent.from_data(event,self)] + self.events += [AnnotationOntologyEvent.from_data(event, self)] return self - - def __init__(self,genome_ref,data_dir): + + def __init__(self, genome_ref, data_dir): self.genome_ref = genome_ref self.events = DictList() self.terms = {} @@ -227,20 +285,40 @@ def __init__(self,genome_ref,data_dir): self.noncodings = {} self.feature_types = {} self.term_names = {} - - def get_term_name(self,term): + + def get_term_name(self, term): if term.ontology.id not in self.term_names: self.term_names[term.ontology.id] = {} - if term.ontology.id in ["SSO","AntiSmash","EC","TC","META","RO","KO","GO"]: - with open(self.data_dir + "/"+term.ontology.id+"_dictionary.json") as json_file: + if term.ontology.id in [ + "SSO", + "AntiSmash", + "EC", + "TC", + "META", + "RO", + "KO", + "GO", + ]: + with open( + self.data_dir + "/" + term.ontology.id + "_dictionary.json" + ) as json_file: ontology = json.load(json_file) for item in ontology["term_hash"]: - self.term_names[term.ontology.id][item] = ontology["term_hash"][item]["name"] + self.term_names[term.ontology.id][item] = ontology["term_hash"][ + item + ]["name"] if term.id not in self.term_names[term.ontology.id]: return "Unknown" return self.term_names[term.ontology.id][term.id] - - def get_gene_term_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False,translate_to_rast=True): + + def get_gene_term_hash( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + cds_features=False, + translate_to_rast=True, + ): output = {} feature_hash = self.genes if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): @@ -249,16 +327,26 @@ def get_gene_term_hash(self,prioritized_event_list=None,ontologies=None,merge_al feature = feature_hash[feature_id] if feature not in output: output[feature] = {} - output[feature] = feature.get_associated_terms(prioritized_event_list,ontologies,merge_all,translate_to_rast) + output[feature] = feature.get_associated_terms( + prioritized_event_list, ontologies, merge_all, translate_to_rast + ) return output - - def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merge_all=False,cds_features=False): + + def get_reaction_gene_hash( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + cds_features=False, + ): output = {} feature_hash = self.genes if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): feature_hash = self.cdss for feature_id in feature_hash: - reactions = feature_hash[feature_id].get_associated_reactions(prioritized_event_list,ontologies,merge_all) + reactions = feature_hash[feature_id].get_associated_reactions( + prioritized_event_list, ontologies, merge_all + ) for rxn_id in reactions: if rxn_id not in output: output[rxn_id] = {} @@ -266,32 +354,34 @@ def get_reaction_gene_hash(self,prioritized_event_list=None,ontologies=None,merg output[rxn_id][feature_id] = [] output[rxn_id][feature_id].append(reactions[rxn_id]) return output - - def add_term(self,term_or_id,ontology=None): + + def add_term(self, term_or_id, ontology=None): if not isinstance(term_or_id, AnnotationOntologyTerm): if term_or_id in self.terms: return self.terms[term_or_id] else: - return AnnotationOntologyTerm(self,term_or_id,ontology) + return AnnotationOntologyTerm(self, term_or_id, ontology) if term_or_id.id in self.terms: - logger.critical("Term with id "+term_or_id.id+" already in annotation!") + logger.critical("Term with id " + term_or_id.id + " already in annotation!") return self.terms[term_or_id.id] else: - self.terms[term_or_id.id] = term_or_id - - def add_ontology(self,ontology_or_id): + self.terms[term_or_id.id] = term_or_id + + def add_ontology(self, ontology_or_id): if not isinstance(ontology_or_id, AnnotationOntologyOntology): if ontology_or_id in self.ontologies: return self.ontologies[ontology_or_id] else: - return AnnotationOntologyOntology(self,ontology_or_id) + return AnnotationOntologyOntology(self, ontology_or_id) if ontology_or_id.id in self.ontologies: - logger.critical("Ontology with id "+ontology_or_id.id+" already in annotation!") + logger.critical( + "Ontology with id " + ontology_or_id.id + " already in annotation!" + ) return self.ontologies[ontology_or_id.id] else: self.ontologies[ontology_or_id.id] = ontology_or_id - - def get_feature_hash(self,feature_id): + + def get_feature_hash(self, feature_id): feature_hash = self.genes if feature_id in self.feature_types: if self.feature_types[feature_id] == "cds": @@ -299,15 +389,15 @@ def get_feature_hash(self,feature_id): elif self.feature_types[feature_id] == "noncoding": feature_hash = self.noncodings return feature_hash - - def add_feature(self,feature_or_id): + + def add_feature(self, feature_or_id): feature_hash = None if not isinstance(feature_or_id, AnnotationOntologyFeature): feature_hash = self.get_feature_hash(feature_or_id) if feature_or_id in feature_hash: return feature_hash[feature_or_id] else: - feature_or_id = AnnotationOntologyFeature(self,feature_or_id) + feature_or_id = AnnotationOntologyFeature(self, feature_or_id) if not feature_hash: feature_hash = self.get_feature_hash(feature_or_id.id) if feature_or_id.id not in feature_hash: diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 6a0fcec2..d5af6b97 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -49,7 +49,7 @@ default_threshold_multipiers = { "Glc": 2, - "default":1.2, + "default": 1.2, } @@ -119,7 +119,7 @@ def __init__( media.id = "empty" media.name = "empty" self.media_hash[media.id] = media - + self.forced_media = [] for media_id in forced_media: for item in self.atp_medias: @@ -406,11 +406,9 @@ def apply_growth_media_gapfilling(self): and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 ): self.msgapfill.integrate_gapfill_solution( - stats, - self.cumulative_core_gapfilling, - link_gaps_to_objective=False + stats, self.cumulative_core_gapfilling, link_gaps_to_objective=False ) - #Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions + # Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions gf_sensitivity = self.modelutl.get_attributes("gf_sensitivity", {}) if media.id not in gf_sensitivity: gf_sensitivity[media.id] = {} @@ -418,15 +416,17 @@ def apply_growth_media_gapfilling(self): gf_sensitivity[media.id][self.atp_hydrolysis.id] = {} gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"] = {} for item in stats["new"]: - gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { - stats["new"][item] : [] - } + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][ + item + ] = {stats["new"][item]: []} for item in stats["reversed"]: - gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][item] = { - stats["reversed"][item] : [] - } - self.modelutl.save_attributes(gf_sensitivity, "gf_sensitivity") - self.modelutl.save_attributes(len(self.cumulative_core_gapfilling), "total_core_gapfilling") + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][ + item + ] = {stats["reversed"][item]: []} + self.modelutl.save_attributes(gf_sensitivity, "gf_sensitivity") + self.modelutl.save_attributes( + len(self.cumulative_core_gapfilling), "total_core_gapfilling" + ) def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP @@ -444,7 +444,7 @@ def expand_model_to_genome_scale(self): self.restore_noncore_reactions(noncore=True, othercompartment=False) # Extending model with non core reactions while retaining ATP accuracy self.filtered_noncore = self.modelutl.reaction_expansion_test( - self.noncore_reactions, tests,attribute_label="atp_expansion_filter" + self.noncore_reactions, tests, attribute_label="atp_expansion_filter" ) # Removing filtered reactions for item in self.filtered_noncore: @@ -484,7 +484,7 @@ def restore_noncore_reactions(self, noncore=True, othercompartment=True): reaction.lower_bound = self.original_bounds[reaction.id][0] reaction.upper_bound = self.original_bounds[reaction.id][1] - def build_tests(self,multiplier_hash_override={}): + def build_tests(self, multiplier_hash_override={}): """Build tests based on ATP media evaluations Parameters @@ -500,16 +500,16 @@ def build_tests(self,multiplier_hash_override={}): Raises ------ """ - #Applying threshold multiplier + # Applying threshold multiplier for key in default_threshold_multipiers: if key not in multiplier_hash_override: multiplier_hash_override[key] = default_threshold_multipiers[key] - #Initialzing atp test attributes + # Initialzing atp test attributes atp_att = self.modelutl.get_attributes( "ATP_analysis", {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, ) - #Initializing tests and adding empty media every time + # Initializing tests and adding empty media every time tests = [] if "empty" in self.media_hash: tests.append( @@ -524,16 +524,16 @@ def build_tests(self,multiplier_hash_override={}): "threshold": 0.00001, "objective": self.atp_hydrolysis.id, } - #Setting objective to ATP hydrolysis + # Setting objective to ATP hydrolysis self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: - #Setting multiplier for test threshold + # Setting multiplier for test threshold multiplier = multiplier_hash_override["default"] if media.id in multiplier_hash_override: - multiplier = multiplier_hash_override[media.id] - #Constraining model exchanges for media + multiplier = multiplier_hash_override[media.id] + # Constraining model exchanges for media self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - #Computing core ATP production + # Computing core ATP production obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") threshold = multiplier * obj_value @@ -552,7 +552,7 @@ def build_tests(self,multiplier_hash_override={}): "threshold": multiplier * obj_value, "objective": self.atp_hydrolysis.id, } - #Saving test attributes to the model + # Saving test attributes to the model self.modelutl.save_attributes(atp_att, "ATP_analysis") return tests diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 7f079234..bca4a0f8 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -730,26 +730,28 @@ def build_metabolic_reactions(self): reactions.append(reaction) return reactions - + def build_from_annotaton_ontology( - self, - model_or_id, - anno_ont, - index="0", - allow_all_non_grp_reactions=False, - annotate_with_rast=False, - biomass_classic=False, - biomass_gc=0.5, - add_non_template_reactions=True, - prioritized_event_list=None, - ontologies=None, - merge_all=True, - convert_to_sso=True - ): - #Build base model without annotation + self, + model_or_id, + anno_ont, + index="0", + allow_all_non_grp_reactions=False, + annotate_with_rast=False, + biomass_classic=False, + biomass_gc=0.5, + add_non_template_reactions=True, + prioritized_event_list=None, + ontologies=None, + merge_all=True, + convert_to_sso=True, + ): + # Build base model without annotation self.search_name_to_orginal = {} self.search_name_to_genes = {} - gene_term_hash = anno_ont.get_gene_term_hash(prioritized_event_list,ontologies,merge_all,convert_to_sso) + gene_term_hash = anno_ont.get_gene_term_hash( + prioritized_event_list, ontologies, merge_all, convert_to_sso + ) residual_reaction_gene_hash = {} for gene in gene_term_hash: for term in gene_term_hash[gene]: @@ -767,9 +769,18 @@ def build_from_annotaton_ontology( residual_reaction_gene_hash[rxn_id] = {} if gene not in residual_reaction_gene_hash[rxn_id]: residual_reaction_gene_hash[rxn_id][gene] = [] - residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[gene][term] - - model_or_id = self.build(model_or_id,index,allow_all_non_grp_reactions,annotate_with_rast,biomass_classic,biomass_gc) + residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[ + gene + ][term] + + model_or_id = self.build( + model_or_id, + index, + allow_all_non_grp_reactions, + annotate_with_rast, + biomass_classic, + biomass_gc, + ) for rxn in model_or_id.reactions: probability = None for gene in rxn.genes(): @@ -779,22 +790,25 @@ def build_from_annotaton_ontology( if rxn.id[0:-3] in term.msrxns: for item in gene_term_hash[gene][term]: if "probability" in item.scores: - if not probability or item.scores["probability"] > probability: + if ( + not probability + or item.scores["probability"] > probability + ): probability = item.scores["probability"] if hasattr(rxn, "probability"): - rxn.probability = probability - + rxn.probability = probability + reactions = [] modelseeddb = ModelSEEDBiochem.get() for rxn_id in residual_reaction_gene_hash: - if rxn_id+"_c0" not in model_or_id.reactions: + if rxn_id + "_c0" not in model_or_id.reactions: reaction = None template_reaction = None - if rxn_id+"_c" in self.template.reactions: - template_reaction = self.template.reactions.get_by_id(rxn_id+"_c") + if rxn_id + "_c" in self.template.reactions: + template_reaction = self.template.reactions.get_by_id(rxn_id + "_c") elif rxn_id in modelseeddb.reactions: msrxn = modelseeddb.reactions.get_by_id(rxn_id) - template_reaction = msrxn.to_template_reaction({0:"c",1:"e"}) + template_reaction = msrxn.to_template_reaction({0: "c", 1: "e"}) if template_reaction: for m in template_reaction.metabolites: if m.compartment not in self.compartments: @@ -803,15 +817,22 @@ def build_from_annotaton_ontology( ] = self.template.compartments.get_by_id(m.compartment) if m.id not in self.template_species_to_model_species: model_metabolite = m.to_metabolite(self.index) - self.template_species_to_model_species[m.id] = model_metabolite + self.template_species_to_model_species[ + m.id + ] = model_metabolite self.base_model.add_metabolites([model_metabolite]) - reaction = template_reaction.to_reaction(self.base_model, self.index) + reaction = template_reaction.to_reaction( + self.base_model, self.index + ) gpr = "" probability = None for gene in residual_reaction_gene_hash[rxn_id]: for item in residual_reaction_gene_hash[rxn_id][gene]: if "probability" in item["scores"]: - if not probability or item["scores"]["probability"] > probability: + if ( + not probability + or item["scores"]["probability"] > probability + ): probability = item["scores"]["probability"] if len(gpr) > 0: gpr += " or " @@ -822,7 +843,7 @@ def build_from_annotaton_ontology( reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" reactions.append(reaction) if not reaction: - print("Reaction ",rxn_id," not found in template or database!") + print("Reaction ", rxn_id, " not found in template or database!") model_or_id.add_reactions(reactions) return model_or_id @@ -911,7 +932,7 @@ def build( annotate_with_rast=True, biomass_classic=False, biomass_gc=0.5, - add_reaction_from_rast_annotation=True + add_reaction_from_rast_annotation=True, ): """ @@ -949,11 +970,11 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - + if add_reaction_from_rast_annotation: metabolic_reactions = self.build_metabolic_reactions() cobra_model.add_reactions(metabolic_reactions) - + non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index b0d3b2b6..75e356c4 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -13,6 +13,7 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + class MSGrowthPhenotype: def __init__( self, @@ -35,7 +36,7 @@ def __init__( self.additional_compounds = additional_compounds self.parent = parent - def build_media(self,include_base_media=True): + def build_media(self, include_base_media=True): """Builds media object to use when simulating the phenotype Parameters ---------- @@ -79,25 +80,30 @@ def simulate( modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) - - #Setting objective + + # Setting objective if objective: modelutl.model.objective = objective - - #Building full media and adding missing exchanges - output = {"growth": None, "class": None, "missing_transports": [], "baseline_growth": None} + + # Building full media and adding missing exchanges + output = { + "growth": None, + "class": None, + "missing_transports": [], + "baseline_growth": None, + } full_media = self.build_media() if add_missing_exchanges: output["missing_transports"] = modelutl.add_missing_exchanges(full_media) - - #Getting basline growth + + # Getting basline growth output["baseline_growth"] = 0.01 if self.parent: - output["baseline_growth"] = self.parent.baseline_growth(modelutl,objective) + output["baseline_growth"] = self.parent.baseline_growth(modelutl, objective) if output["baseline_growth"] < 1e-5: output["baseline_growth"] = 0.01 - - #Building specific media and setting compound exception list + + # Building specific media and setting compound exception list if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: reaction_exceptions = [] specific_media = self.build_media(False) @@ -105,36 +111,36 @@ def simulate( ex_hash = mediacpd.get_mdl_exchange_hash(modelutl) for mdlcpd in ex_hash: reaction_exceptions.append(ex_hash[mdlcpd]) - modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.parent.atom_limits,exception_reactions=reaction_exceptions) - - #Applying media + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package( + self.parent.atom_limits, exception_reactions=reaction_exceptions + ) + + # Applying media if self.parent: modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( full_media, self.parent.base_uptake, self.parent.base_excretion ) else: - modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( - full_media,0,1000 - ) - + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(full_media, 0, 1000) + with modelutl.model: - #Applying gene knockouts + # Applying gene knockouts for gene in self.gene_ko: if gene in modelutl.model.genes: geneobj = modelutl.model.genes.get_by_id(gene) geneobj.knock_out() - - #Optimizing model + + # Optimizing model solution = modelutl.model.optimize() output["growth"] = solution.objective_value if solution.objective_value > 0 and pfba: solution = cobra.flux_analysis.pfba(modelutl.model) if save_fluxes: output["fluxes"] = solution.fluxes - - #Determining phenotype class - - if output["growth"] >= output["baseline_growth"]*growth_multiplier: + + # Determining phenotype class + + if output["growth"] >= output["baseline_growth"] * growth_multiplier: output["GROWING"] = True if not self.growth: output["class"] = "GROWTH" @@ -172,44 +178,60 @@ def gapfill_model_for_phenotype( objective : string Expression for objective to be activated by gapfilling """ - #First simulate model without gapfilling to assess ungapfilled growth - output = self.simulate(msgapfill.mdlutl,objective,growth_multiplier,add_missing_exchanges) - if output["growth"] >= output["baseline_growth"]*growth_multiplier: - #No gapfilling needed - original model grows without gapfilling - return {"reversed": {}, "new": {},"media": self.build_media(), "target":objective, "minobjective": output["baseline_growth"]*growth_multiplier, "binary_check":False} - - #Now pulling the gapfilling configured model from MSGapfill + # First simulate model without gapfilling to assess ungapfilled growth + output = self.simulate( + msgapfill.mdlutl, objective, growth_multiplier, add_missing_exchanges + ) + if output["growth"] >= output["baseline_growth"] * growth_multiplier: + # No gapfilling needed - original model grows without gapfilling + return { + "reversed": {}, + "new": {}, + "media": self.build_media(), + "target": objective, + "minobjective": output["baseline_growth"] * growth_multiplier, + "binary_check": False, + } + + # Now pulling the gapfilling configured model from MSGapfill gfmodelutl = MSModelUtil.get(msgapfill.gfmodel) - #Saving the gapfill objective because this will be replaced when the simulation runs + # Saving the gapfill objective because this will be replaced when the simulation runs gfobj = gfmodelutl.model.objective - #Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints - output = self.simulate(modelutl,objective,growth_multiplier,add_missing_exchanges) - #If the gapfilling model fails to achieve the minimum growth, then no solution exists - if output["growth"] < output["baseline_growth"]*growth_multiplier: + # Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints + output = self.simulate( + modelutl, objective, growth_multiplier, add_missing_exchanges + ) + # If the gapfilling model fails to achieve the minimum growth, then no solution exists + if output["growth"] < output["baseline_growth"] * growth_multiplier: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) return None - - #Running the gapfilling itself + + # Running the gapfilling itself full_media = self.build_media() with modelutl.model: - #Applying gene knockouts + # Applying gene knockouts for gene in self.gene_ko: if gene in modelutl.model.genes: geneobj = modelutl.model.genes.get_by_id(gene) geneobj.knock_out() - - gfresults = self.gapfilling.run_gapfilling(media,None,minimum_obj=output["baseline_growth"]*growth_multiplier) + + gfresults = self.gapfilling.run_gapfilling( + media, None, minimum_obj=output["baseline_growth"] * growth_multiplier + ) if gfresults is None: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) - + return gfresults + class MSGrowthPhenotypes: - def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + def __init__( + self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={} + ): self.base_media = base_media self.phenotypes = DictList() self.base_uptake = base_uptake @@ -219,8 +241,16 @@ def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_at self.cached_based_growth = {} @staticmethod - def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion,global_atom_limits) + def from_compound_hash( + compounds, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) new_phenos = [] for cpd in compounds: newpheno = MSGrowthPhenotype(cpd, None, compounds[cpd], [], [cpd]) @@ -229,8 +259,17 @@ def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion= return growthpheno @staticmethod - def from_kbase_object(data, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): - growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) + def from_kbase_object( + data, + kbase_api, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) new_phenos = [] for pheno in data["phenotypes"]: media = kbase_api.get_from_ws(pheno["media_ref"], None) @@ -248,9 +287,18 @@ def from_kbase_object(data, kbase_api,base_media=None, base_uptake=0, base_excre return growthpheno @staticmethod - def from_kbase_file(filename, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + def from_kbase_file( + filename, + kbase_api, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): # TSV file with the following headers:media mediaws growth geneko addtlCpd - growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) headings = [] new_phenos = [] with open(filename) as f: @@ -282,8 +330,16 @@ def from_kbase_file(filename, kbase_api,base_media=None, base_uptake=0, base_exc return growthpheno @staticmethod - def from_ms_file(filename,base_media=None, base_uptake=0, base_excretion=100,global_atom_limits={}): - growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) + def from_ms_file( + filename, + base_media=None, + base_uptake=0, + base_excretion=100, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) df = pd.read_csv(filename) required_headers = ["Compounds", "Growth"] for item in required_headers: @@ -311,7 +367,7 @@ def build_super_media(self): else: super_media.merge(pheno.build_media(), overwrite_overlap=False) return super_media - + def add_phenotypes(self, new_phenotypes): keep_phenos = [] for pheno in new_phenotypes: @@ -321,11 +377,7 @@ def add_phenotypes(self, new_phenotypes): additions = DictList(keep_phenos) self.phenotypes += additions - def baseline_growth( - self, - model_or_mdlutl, - objective - ): + def baseline_growth(self, model_or_mdlutl, objective): """Simulates all the specified phenotype conditions and saves results Parameters ---------- @@ -336,22 +388,22 @@ def baseline_growth( modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) - #Checking if base growth already computed + # Checking if base growth already computed if modelutl in self.cached_based_growth: if objective in self.cached_based_growth[modelutl]: return self.cached_based_growth[modelutl][objective] else: self.cached_based_growth[modelutl] = {} - #Setting objective + # Setting objective modelutl.objective = objective - #Setting media + # Setting media modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( self.base_media, self.base_uptake, self.base_excretion ) - #Adding uptake limits + # Adding uptake limits if len(self.atom_limits) > 0: modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.atom_limits) - #Simulating + # Simulating self.cached_based_growth[modelutl][objective] = modelutl.model.slim_optimize() return self.cached_based_growth[modelutl][objective] @@ -364,7 +416,7 @@ def simulate_phenotypes( save_fluxes=False, gapfill_negatives=False, msgapfill=None, - test_conditions=None + test_conditions=None, ): """Simulates all the specified phenotype conditions and saves results Parameters @@ -384,14 +436,14 @@ def simulate_phenotypes( modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) - #Setting objective + # Setting objective modelutl.objective = objective - #Getting basline growth - baseline_growth = self.baseline_growth(modelutl,objective) - #Establishing output of the simulation method + # Getting basline growth + baseline_growth = self.baseline_growth(modelutl, objective) + # Establishing output of the simulation method summary = { - "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], - "Count": [0, 0, 0, 0, 0,0,0], + "Label": ["Accuracy", "CP", "CN", "FP", "FN", "Growth", "No growth"], + "Count": [0, 0, 0, 0, 0, 0, 0], } data = { "Phenotype": [], @@ -400,22 +452,24 @@ def simulate_phenotypes( "Class": [], "Transports missing": [], "Gapfilled reactions": [], - "Gapfilling score":None + "Gapfilling score": None, } - #Running simulations + # Running simulations gapfilling_solutions = {} totalcount = 0 for pheno in self.phenotypes: result = pheno.simulate( - modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes + modelutl, + objective, + growth_multiplier, + add_missing_exchanges, + save_fluxes, ) data["Class"].append(result["class"]) data["Phenotype"].append(pheno.id) data["Observed growth"].append(pheno.growth) data["Simulated growth"].append(result["growth"]) - data["Transports missing"].append( - ";".join(result["missing_transports"]) - ) + data["Transports missing"].append(";".join(result["missing_transports"])) if result["class"] == "CP": summary["Count"][1] += 1 summary["Count"][0] += 1 @@ -434,22 +488,25 @@ def simulate_phenotypes( summary["Count"][5] += 1 elif result["class"] == "NOGROWTH": summary["Count"][6] += 1 - #Gapfilling negative growth conditions - if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: - gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype(msgapfill,objective,test_conditions,growth_multiplier,add_missing_exchanges) + # Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH", "FN", "CN"]: + gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype( + msgapfill, + objective, + test_conditions, + growth_multiplier, + add_missing_exchanges, + ) if gapfilling_solutions[pheno] != None: data["Gapfilling score"] = 0 list = [] for rxn_id in gapfilling_solutions[pheno]["reversed"]: list.append( - gapfilling_solutions[pheno]["reversed"][rxn_id] - + rxn_id + gapfilling_solutions[pheno]["reversed"][rxn_id] + rxn_id ) data["Gapfilling score"] += 0.5 for rxn_id in gapfilling_solutions[pheno]["new"]: - list.append( - gapfilling_solutions[pheno]["new"][rxn_id] + rxn_id - ) + list.append(gapfilling_solutions[pheno]["new"][rxn_id] + rxn_id) data["Gapfilling score"] += 1 data["Gapfilled reactions"].append(";".join(list)) else: @@ -473,9 +530,9 @@ def fit_model_to_phenotypes( minimize_new_false_positives=True, atp_safe=True, integrate_results=True, - global_gapfilling=True + global_gapfilling=True, ): - + """Simulates all the specified phenotype conditions and saves results Parameters ---------- @@ -488,46 +545,46 @@ def fit_model_to_phenotypes( integrate_results : bool Indicates if the resulting modifications to the model should be integrated """ - - - - #Running simulations + + # Running simulations positive_growth = [] negative_growth = [] for pheno in self.phenotypes: with model: result = pheno.simulate( - modelutl,objective,growth_multiplier,add_missing_exchanges,save_fluxes + modelutl, + objective, + growth_multiplier, + add_missing_exchanges, + save_fluxes, ) - #Gapfilling negative growth conditions - if gapfill_negatives and output["class"] in ["NOGROWTH","FN","CN"]: + # Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH", "FN", "CN"]: negative_growth.append(pheno.build_media()) - elif gapfill_negatives and output["class"] in ["GROWTH","FP","CP"]: + elif gapfill_negatives and output["class"] in ["GROWTH", "FP", "CP"]: positive_growth.append(pheno.build_media()) - - - #Create super media for all + + # Create super media for all super_media = self.build_super_media() - #Adding missing exchanges + # Adding missing exchanges msgapfill.gfmodel.add_missing_exchanges(super_media) - #Adding elemental constraints + # Adding elemental constraints self.add_elemental_constraints() - #Getting ATP tests - - #Filtering database for ATP tests - - #Penalizing database to avoid creating false positives - - #Building additional tests from current correct negatives - - #Computing base-line growth - - #Computing growth threshold - - #Running global gapfill - - #Integrating solution - + # Getting ATP tests + + # Filtering database for ATP tests + + # Penalizing database to avoid creating false positives + + # Building additional tests from current correct negatives + + # Computing base-line growth + + # Computing growth threshold + + # Running global gapfill + + # Integrating solution def gapfill_all_phenotypes( self, diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index fadc435d..960e82d1 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -22,8 +22,8 @@ def maxFlux(self): def minFlux(self): # TODO: will be removed later just for old methods return -self.upper_bound - - def get_mdl_exchange_hash(self,model_or_mdlutl): + + def get_mdl_exchange_hash(self, model_or_mdlutl): modelutl = model_or_mdlutl if not isinstance(model_or_mdlutl, MSModelUtil): modelutl = MSModelUtil.get(model_or_mdlutl) @@ -33,7 +33,8 @@ def get_mdl_exchange_hash(self,model_or_mdlutl): for met in mets: if met in exchange_hash: output[met] = exchange_hash[met] - return output + return output + class MSMedia: def __init__(self, media_id, name=""): diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py index 78595f1c..1435468c 100644 --- a/modelseedpy/core/msmodelreport.py +++ b/modelseedpy/core/msmodelreport.py @@ -15,11 +15,9 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + class MSModelReport: - def __init__( - self, - model_or_mdlutl - ): + def __init__(self, model_or_mdlutl): if isinstance(model_or_mdlutl, MSModelUtil): self.model = model_or_mdlutl.model self.modelutl = model_or_mdlutl @@ -27,45 +25,73 @@ def __init__( self.model = model_or_mdlutl self.modelutl = MSModelUtil.get(model_or_mdlutl) - def generate_reports(self,report_path, multi_tab_report_path): + def generate_reports(self, report_path, multi_tab_report_path): self.build_report(report_path) self.build_multitab_report(multi_tab_report_path) - + # Helper function to build overview data def build_overview_data(self): # Get the number of compartments - number_compartments = len(set([metabolite.compartment for metabolite in self.model.metabolites])) + number_compartments = len( + set([metabolite.compartment for metabolite in self.model.metabolites]) + ) # Extract gapfilling information core_gapfilling_media = [] gapfilling_media = [] - gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) if gf_sensitivity: for media in gf_sensitivity: - if "bio1" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["bio1"]: + if ( + "bio1" in self.modelutl.attributes["gf_sensitivity"][media] + and "success" + in self.modelutl.attributes["gf_sensitivity"][media]["bio1"] + ): gapfilling_media.append(media) - if "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] and "success" in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"]: + if ( + "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] + and "success" + in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"] + ): core_gapfilling_media.append(media) - + # Count the number of gapfills number_gapfills = len(gapfilling_media) # Convert the lists to strings - core_gapfilling_str = "; ".join(core_gapfilling_media) if core_gapfilling_media else "No core gapfilling needed." - gapfilling_media_str = "; ".join(gapfilling_media) if gapfilling_media else "No genome-scale gapfilling." + core_gapfilling_str = ( + "; ".join(core_gapfilling_media) + if core_gapfilling_media + else "No core gapfilling needed." + ) + gapfilling_media_str = ( + "; ".join(gapfilling_media) + if gapfilling_media + else "No genome-scale gapfilling." + ) overview = { - 'Model ID': self.model.id, - 'Full Gapfilling and ATP Analysis Report': 'TBD', # You may replace 'TBD' with actual data when available - 'Genome Scale Template': self.model.notes.get('kbase_template_refs', 'Data Not Available'), - 'Core Gapfilling Media': core_gapfilling_str, - 'Gapfilling Media': gapfilling_media_str, - 'Source Genome': self.model.notes.get('kbase_genome_ref', 'Data Not Available'), - 'Total Number of reactions': self.modelutl.nonexchange_reaction_count(), - 'Number compounds': len(self.model.metabolites), - 'Number compartments': number_compartments, - 'Number biomass': len([rxn for rxn in self.model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629']), - 'Number gapfills': number_gapfills + "Model ID": self.model.id, + "Full Gapfilling and ATP Analysis Report": "TBD", # You may replace 'TBD' with actual data when available + "Genome Scale Template": self.model.notes.get( + "kbase_template_refs", "Data Not Available" + ), + "Core Gapfilling Media": core_gapfilling_str, + "Gapfilling Media": gapfilling_media_str, + "Source Genome": self.model.notes.get( + "kbase_genome_ref", "Data Not Available" + ), + "Total Number of reactions": self.modelutl.nonexchange_reaction_count(), + "Number compounds": len(self.model.metabolites), + "Number compartments": number_compartments, + "Number biomass": len( + [ + rxn + for rxn in self.model.reactions + if rxn.annotation.get("sbo") == "SBO:0000629" + ] + ), + "Number gapfills": number_gapfills, } return overview @@ -79,39 +105,55 @@ def extract_gapfilling_data(self, gf_sensitivity): for media, media_data in gf_sensitivity.items(): for target, target_data in media_data.items(): - gf_data = target_data.get('success', {}) + gf_data = target_data.get("success", {}) if isinstance(gf_data, dict): for reaction_id, reaction_data in gf_data.items(): for direction, metabolites in reaction_data.items(): # If metabolites is None, set to empty string if metabolites is None: metabolites = "" - + # Extract both IDs and Names for Gapfilling Sensitivity sensitivity_ids = [] sensitivity_names = [] if isinstance(metabolites, (list, tuple)): for met_id in metabolites: sensitivity_ids.append(met_id) - met_name = self.model.metabolites.get_by_id(met_id).name if met_id in self.model.metabolites else met_id + met_name = ( + self.model.metabolites.get_by_id(met_id).name + if met_id in self.model.metabolites + else met_id + ) sensitivity_names.append(met_name) else: - metabolites = str(metabolites) + metabolites = str(metabolites) entry = { "reaction_id": reaction_id, - "reaction_name": self.model.reactions.get_by_id(reaction_id).name if reaction_id in self.model.reactions else reaction_id, + "reaction_name": self.model.reactions.get_by_id( + reaction_id + ).name + if reaction_id in self.model.reactions + else reaction_id, "media": media, "direction": direction, "target": target, - "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) if sensitivity_ids else metabolites, - "gapfilling_sensitivity_name": "; ".join(sensitivity_names) if sensitivity_names else metabolites + "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) + if sensitivity_ids + else metabolites, + "gapfilling_sensitivity_name": "; ".join( + sensitivity_names + ) + if sensitivity_names + else metabolites, } - + # Update the summary dictionary if reaction_id not in gapfilling_summary: gapfilling_summary[reaction_id] = [] - gapfilling_summary[reaction_id].append(f"{media}: {direction}") - + gapfilling_summary[reaction_id].append( + f"{media}: {direction}" + ) + # Check if reaction_id is already in dictionary if reaction_id in gapfilling_dict: # Update the media @@ -123,9 +165,9 @@ def extract_gapfilling_data(self, gf_sensitivity): else: gapfilling_dict[reaction_id] = entry - return list(gapfilling_dict.values()), gapfilling_summary + return list(gapfilling_dict.values()), gapfilling_summary - #transform data to be used in tabular format to use in build_model_report + # transform data to be used in tabular format to use in build_model_report def transform_gapfilling_data(self, gapfilling_data): transformed_data = [] for entry in gapfilling_data: @@ -136,22 +178,28 @@ def transform_gapfilling_data(self, gapfilling_data): entry["direction"], entry["target"], entry["gapfilling_sensitivity_id"], - entry["gapfilling_sensitivity_name"] + entry["gapfilling_sensitivity_name"], ] transformed_data.append(row) return transformed_data - - + # Extract ATP analysis data def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): entries = [] - if atp_analysis and 'core_atp_gapfilling' in atp_analysis: - for media, data in atp_analysis['core_atp_gapfilling'].items(): - score = data.get('score', None) - new_reactions = ["{}: {}".format(k, v) for k, v in data.get('new', {}).items()] - reversed_reactions = ["{}: {}".format(k, v) for k, v in data.get('reversed', {}).items()] + if atp_analysis and "core_atp_gapfilling" in atp_analysis: + for media, data in atp_analysis["core_atp_gapfilling"].items(): + score = data.get("score", None) + new_reactions = [ + "{}: {}".format(k, v) for k, v in data.get("new", {}).items() + ] + reversed_reactions = [ + "{}: {}".format(k, v) for k, v in data.get("reversed", {}).items() + ] atp_production = "Not integrated" - if "selected_media" in atp_analysis and media in atp_analysis["selected_media"]: + if ( + "selected_media" in atp_analysis + and media in atp_analysis["selected_media"] + ): atp_production = atp_analysis["selected_media"][media] # Extracting the "Filtered Reactions" in the required format @@ -162,28 +210,34 @@ def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): if isinstance(sub_v, dict): for reaction, direction_dict in sub_v.items(): direction = list(direction_dict.keys())[0] - filtered_reactions.append(f"{reaction}: {direction}") + filtered_reactions.append( + f"{reaction}: {direction}" + ) filtered_reactions_str = "; ".join(filtered_reactions) if score is not None: - entries.append({ - 'media': media, - 'no_of_gapfilled_reactions': score, - 'atp_production': atp_production, - 'gapfilled_reactions': "; ".join(new_reactions), - 'reversed_reaction_by_gapfilling': "; ".join(reversed_reactions), - 'filtered_reactions': filtered_reactions_str - }) + entries.append( + { + "media": media, + "no_of_gapfilled_reactions": score, + "atp_production": atp_production, + "gapfilled_reactions": "; ".join(new_reactions), + "reversed_reaction_by_gapfilling": "; ".join( + reversed_reactions + ), + "filtered_reactions": filtered_reactions_str, + } + ) # Sorting the entries based on the 'no_of_gapfilled_reactions' column - entries.sort(key=lambda x: x['no_of_gapfilled_reactions']) + entries.sort(key=lambda x: x["no_of_gapfilled_reactions"]) return entries # Extract ATP production data for the ATP Analysis tab def extract_atp_production_data(self, atp_analysis): atp_production_dict = {} if atp_analysis: - selected_media = atp_analysis.get('selected_media', {}) - core_atp_gapfilling = atp_analysis.get('core_atp_gapfilling', {}) + selected_media = atp_analysis.get("selected_media", {}) + core_atp_gapfilling = atp_analysis.get("core_atp_gapfilling", {}) # First, process selected_media for media, value in selected_media.items(): @@ -192,30 +246,36 @@ def extract_atp_production_data(self, atp_analysis): # Next, process core_atp_gapfilling for media not in selected_media for media, data in core_atp_gapfilling.items(): if media not in atp_production_dict: - if data.get('failed'): - atp_production_dict[media] = 'failed' + if data.get("failed"): + atp_production_dict[media] = "failed" else: # If the media was not processed in selected_media and it's not failed, set as 'Not Integrated' - atp_production_dict[media] = 'Not Integrated' + atp_production_dict[media] = "Not Integrated" + + return atp_production_dict - return atp_production_dict - def build_multitab_report(self, output_path): - + # Build overview data overview_data = self.build_overview_data() - + # Get gf_sensitivity attribute from the model - gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) # Extract gapfilling data - gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data(gf_sensitivity) + gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data( + gf_sensitivity + ) # Check if ATP_analysis attribute is present in the model - atp_analysis = self.modelutl.attributes.get('ATP_analysis', None) + atp_analysis = self.modelutl.attributes.get("ATP_analysis", None) if atp_analysis: - atp_expansion_filter = self.modelutl.attributes.get('atp_expansion_filter', {}) - atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + atp_expansion_filter = self.modelutl.attributes.get( + "atp_expansion_filter", {} + ) + atp_analysis_entries = self.extract_atp_analysis_data( + atp_analysis, atp_expansion_filter + ) else: atp_analysis_entries = [] @@ -227,7 +287,7 @@ def build_multitab_report(self, output_path): "genes": [], "biomass": [], "gapfilling": gapfilling_entries, # Populated with gapfilling data - "atpanalysis": atp_analysis_entries # Populated with ATP analysis data + "atpanalysis": atp_analysis_entries, # Populated with ATP analysis data } print("Module Path:", module_path + "/../data/") @@ -235,7 +295,11 @@ def build_multitab_report(self, output_path): exchanges = {r.id for r in self.model.exchanges} # Identify biomass reactions using SBO annotation - biomass_reactions_ids = {rxn.id for rxn in self.model.reactions if rxn.annotation.get('sbo') == 'SBO:0000629'} + biomass_reactions_ids = { + rxn.id + for rxn in self.model.reactions + if rxn.annotation.get("sbo") == "SBO:0000629" + } # Reactions Tab for rxn in self.model.reactions: @@ -246,11 +310,12 @@ def build_multitab_report(self, output_path): "name": rxn.name, "equation": equation, "genes": rxn.gene_reaction_rule, - "gapfilling": "; ".join(gapfilling_reaction_summary.get(rxn.id, [])) # Empty list results in an empty string + "gapfilling": "; ".join( + gapfilling_reaction_summary.get(rxn.id, []) + ), # Empty list results in an empty string } context["reactions"].append(rxn_data) - # Compounds Tab for cpd in self.model.metabolites: cpd_data = { @@ -258,7 +323,7 @@ def build_multitab_report(self, output_path): "name": cpd.name, "formula": cpd.formula, "charge": cpd.charge, - "compartment": cpd.compartment + "compartment": cpd.compartment, } context["compounds"].append(cpd_data) @@ -266,7 +331,7 @@ def build_multitab_report(self, output_path): for gene in self.model.genes: gene_data = { "gene": gene.id, - "reactions": "; ".join([rxn.id for rxn in gene.reactions]) + "reactions": "; ".join([rxn.id for rxn in gene.reactions]), } context["genes"].append(gene_data) @@ -276,22 +341,22 @@ def build_multitab_report(self, output_path): biomass_rxn = self.model.reactions.get_by_id(biomass_rxn_id) for metabolite, coefficient in biomass_rxn.metabolites.items(): compound_id = metabolite.id - compound_name = metabolite.name.split('_')[0] - compartment = compound_id.split('_')[-1] + compound_name = metabolite.name.split("_")[0] + compartment = compound_id.split("_")[-1] biomass_data = { "biomass_reaction_id": biomass_rxn.id, "biomass_compound_id": compound_id, "name": compound_name, "coefficient": coefficient, - "compartment": compartment + "compartment": compartment, } context["biomass"].append(biomass_data) else: print("No biomass reactions found in the model.") # Gapfilling Tab - gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) gapfilling_data = self.extract_gapfilling_data(gf_sensitivity) context["gapfilling"] = gapfilling_entries @@ -300,8 +365,8 @@ def build_multitab_report(self, output_path): # Populate the 'atpanalysis' context with ATP production data for entry in context["atpanalysis"]: - media = entry['media'] - entry['atp_production'] = atp_production_data.get(media, None) + media = entry["media"] + entry["atp_production"] = atp_production_data.get(media, None) # Diagnostics unique_biomass_rxns = biomass_reactions_ids @@ -327,7 +392,7 @@ def build_multitab_report(self, output_path): print("\nFirst 2 gapfilling entries:") for gf in context["gapfilling"][:2]: print(gf) - + print("\nFirst 2 ATP Analysis entries:") for entry in context["atpanalysis"][:2]: print(entry) @@ -335,15 +400,14 @@ def build_multitab_report(self, output_path): # Render with template env = jinja2.Environment( loader=jinja2.FileSystemLoader(module_path + "/../data/"), - autoescape=jinja2.select_autoescape(['html', 'xml']) + autoescape=jinja2.select_autoescape(["html", "xml"]), ) html = env.get_template("ModelReportTemplate.html").render(context) directory = dirname(output_path) os.makedirs(directory, exist_ok=True) - with open(output_path, 'w') as f: + with open(output_path, "w") as f: f.write(html) - - + def build_report(self, output_path): """Builds model HTML report for the Model Summary table Parameters @@ -359,40 +423,95 @@ def build_report(self, output_path): # 2. Transform the dictionary into a list of tuples model_summary_list = [(key, value) for key, value in model_summary_data.items()] # 3. Convert to DataFrame - model_summary_df = pd.DataFrame(model_summary_list, columns=['', '']) + model_summary_df = pd.DataFrame(model_summary_list, columns=["", ""]) # Style the DataFrame (as was done previously) - model_summary_df_styled = ( - model_summary_df.style.hide(axis="index") - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + model_summary_df_styled = model_summary_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] ) # Fetching the gapfilling sensitivity data - gf_sensitivity = self.modelutl.attributes.get('gf_sensitivity', None) + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) gapfilling_data = self.extract_gapfilling_data(gf_sensitivity) gapfilling_list = self.transform_gapfilling_data(gapfilling_data[0]) # Convert the gapfilling_list to a DataFrame gapfillings_analysis_df = pd.DataFrame( - gapfilling_list, + gapfilling_list, columns=[ - "Reaction ID", "Reaction Name", "Media", "Direction", "Target", "Gapfilling Sensitivity ID", "Gapfilling Sensitivity Name"] + "Reaction ID", + "Reaction Name", + "Media", + "Direction", + "Target", + "Gapfilling Sensitivity ID", + "Gapfilling Sensitivity Name", + ], ) # Apply style to Gapfillings Analysis DataFrame - gapfillings_analysis_df_styled = ( - gapfillings_analysis_df.style.hide(axis="index") - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + gapfillings_analysis_df_styled = gapfillings_analysis_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] ) # Legend for Gapfillings Analysis @@ -414,22 +533,48 @@ def build_report(self, output_path): """ # Extract ATP analysis data - atp_analysis = self.modelutl.attributes.get('ATP_analysis', None) - atp_expansion_filter = self.modelutl.attributes.get('atp_expansion_filter', {}) - atp_analysis_entries = self.extract_atp_analysis_data(atp_analysis, atp_expansion_filter) + atp_analysis = self.modelutl.attributes.get("ATP_analysis", None) + atp_expansion_filter = self.modelutl.attributes.get("atp_expansion_filter", {}) + atp_analysis_entries = self.extract_atp_analysis_data( + atp_analysis, atp_expansion_filter + ) # Convert the atp_analysis_entries list to a DataFrame atp_analysis_df = pd.DataFrame(atp_analysis_entries) # Apply style to ATP Analysis DataFrame - atp_analysis_df_styled = ( - atp_analysis_df.style.hide(axis="index") - .set_table_styles([ - {'selector': 'th', 'props': [('border', 'none'), ('background-color', 'white'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'td', 'props': [('border', 'none'), ('font-family', 'Oxygen'), ('font-size', '14px'), ('line-height', '20px')]}, - {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]}, - {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]}, - ]) + atp_analysis_df_styled = atp_analysis_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] ) # Legend for ATP Analysis @@ -442,9 +587,9 @@ def build_report(self, output_path):
  • Reversed Reaction by Gapfilling: Reactions that have been reversed during the gapfilling process.
  • Filtered Reactions: Reactions that have been filtered out during the analysis. When a reaction addition would lead to a large increase in ATP production or an infinite energy loop, we filter that reaction out of the gapfilling database and prevent it from being added to the model.
  • - """ - - #ATP analysis explanation text + """ + + # ATP analysis explanation text explanation_text_atp_analysis = """

    During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. @@ -458,30 +603,34 @@ def build_report(self, output_path): In cases where is known from the literature or unpublished experimental results that an organism is capable of producing ATP in a given media condition that requires gapfilling in this analysis, you can use the parameter “Force ATP media” in the reconstruction app to ensure those reactions are integrated into the model. .

    """ - + # Save the data to HTML with the styled DataFrames and the legends directory = os.path.dirname(output_path) os.makedirs(directory, exist_ok=True) - with open(output_path, 'w', encoding='utf-8') as f: + with open(output_path, "w", encoding="utf-8") as f: f.write('') - f.write('

    Model Summary

    ') + f.write("

    Model Summary

    ") f.write(model_summary_df_styled.render(escape=False)) - f.write('

    ') - f.write('

    Gapfillings Analysis

    ') + f.write("

    ") + f.write("

    Gapfillings Analysis

    ") # Check for Gapfillings Analysis data if not gapfillings_analysis_df.empty: f.write(gapfillings_analysis_df_styled.render(escape=False)) - f.write(f'

    Legend:

    {annotations_text_gapfillings}') + f.write(f"

    Legend:

    {annotations_text_gapfillings}") else: - f.write('

    Warning: No Gapfillings Analysis data available for this model.

    ') + f.write( + "

    Warning: No Gapfillings Analysis data available for this model.

    " + ) - f.write('

    Core ATP Analysis

    ') + f.write("

    Core ATP Analysis

    ") # Check for ATP Analysis data if not atp_analysis_df.empty: f.write(atp_analysis_df_styled.render(escape=False)) - f.write(f'

    Legend:

    {annotations_text_atp_analysis}') + f.write(f"

    Legend:

    {annotations_text_atp_analysis}") f.write(explanation_text_atp_analysis) else: - f.write('

    Warning: No Core ATP Analysis data available for this model.

    ') \ No newline at end of file + f.write( + "

    Warning: No Core ATP Analysis data available for this model.

    " + ) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 19b3497a..5f6440ce 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -299,9 +299,7 @@ def save_attributes(self, value=None, key=None): else: self.attributes = value if hasattr(self.model, "computed_attributes"): - logger.info( - "Setting FBAModel computed_attributes to mdlutl attributes" - ) + logger.info("Setting FBAModel computed_attributes to mdlutl attributes") self.attributes["gene_count"] = len(self.model.genes) self.model.computed_attributes = self.attributes @@ -902,7 +900,11 @@ def binary_expansion_test(self, reaction_list, condition, currmodel, depth=0): return filtered_list def reaction_expansion_test( - self, reaction_list, condition_list, binary_search=True,attribute_label="gf_filter" + self, + reaction_list, + condition_list, + binary_search=True, + attribute_label="gf_filter", ): """Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 8348e602..1f61f7a8 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -16,31 +16,33 @@ def __init__(self, model): {"elements": "string"}, ) - def build_package(self, element_limits,exception_compounds=[],exception_reactions=[]): - #Converting exception compounds list into exception reaction list + def build_package( + self, element_limits, exception_compounds=[], exception_reactions=[] + ): + # Converting exception compounds list into exception reaction list self.parameters = { - "element_limits" : element_limits, - "exception_compounds" : exception_compounds, - "exception_reactions" : exception_reactions + "element_limits": element_limits, + "exception_compounds": exception_compounds, + "exception_reactions": exception_reactions, } exchange_hash = self.modelutl.exchange_hash() for met in exception_compounds: if met in exchange_hash: exception_reactions.append(exchange_hash[met]) - #Now building or rebuilding constraints + # Now building or rebuilding constraints for element in element_limits: if element not in self.variables["elements"]: self.build_variable(element, element_limits[element]) for element in element_limits: - #This call will first remove existing constraints then build the new constraint - self.build_constraint(element,exception_reactions) + # This call will first remove existing constraints then build the new constraint + self.build_constraint(element, exception_reactions) def build_variable(self, element, limit): return BaseFBAPkg.build_variable( self, "elements", 0, limit, "continuous", element ) - def build_constraint(self, element,exception_reactions): + def build_constraint(self, element, exception_reactions): coef = {self.variables["elements"][element]: -1} rxnlist = self.modelutl.exchange_list() for reaction in rxnlist: diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index b377547e..9dc9b315 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -40,7 +40,9 @@ def build_package( self.parameters["default_uptake"] = 0 if self.parameters["default_excretion"] is None: self.parameters["default_excretion"] = 100 - if (self.parameters["media"] and self.parameters["media"].name == "Complete") and self.parameters["default_uptake"] == 0: + if ( + self.parameters["media"] and self.parameters["media"].name == "Complete" + ) and self.parameters["default_uptake"] == 0: self.parameters["default_uptake"] = 100 # First initializing all exchanges to default uptake and excretion From 67eb8c2be275e3cab7a7f95aee013aa8bda389be Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sat, 23 Sep 2023 12:04:01 -0500 Subject: [PATCH 116/146] Beginning implementation of global gapfill method --- modelseedpy/core/msgapfill.py | 87 +++++++++++++++++------------ modelseedpy/fbapkg/gapfillingpkg.py | 48 +++++++++++++++- 2 files changed, 98 insertions(+), 37 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 39de2e3e..db3ed703 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) logger.setLevel( - logging.INFO#WARNING + logging.INFO # WARNING ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO @@ -148,11 +148,13 @@ def prefilter(self, media, target): self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) - gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes("gf_filter", {}) + gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes( + "gf_filter", {} + ) base_filter = self.mdlutl.get_attributes("gf_filter", {}) for media_id in gf_filter: base_filter[media_id] = gf_filter[media_id] - + # Testing if gapfilling can work after filtering if not self.test_gapfill_database(media, target, before_filtering=False): return False @@ -176,7 +178,7 @@ def run_gapfilling( Name or expression describing the reaction or combination of reactions to the optimized minimum_obj : double Value to use for the minimal objective threshold that the model must be gapfilled to achieve - binary_check : bool + binary_check : bool Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved prefilter : bool Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling @@ -264,6 +266,7 @@ def run_multi_gapfill( binary_check=False, prefilter=True, check_for_growth=True, + simultaneous_gapfilling=False, ): """Run gapfilling across an array of media conditions ultimately using different integration policies: simultaneous gapfilling, independent gapfilling, cumulative gapfilling Parameters @@ -276,32 +279,41 @@ def run_multi_gapfill( Media-specific minimal objective thresholds that the model must be gapfilled to achieve default_minimum_objective : double Default value to use for the minimal objective threshold that the model must be gapfilled to achieve - binary_check : bool + binary_check : bool Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved prefilter : bool Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling check_for_growth : bool Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective """ - + if not default_minimum_objective: default_minimum_objective = self.default_minimum_objective - first = True solution_dictionary = {} - for item in media_list: - minimum_obj = default_minimum_objective - if item in minimum_objectives: - minimum_obj = minimum_objectives[item] - if first: - solution_dictionary[item] = self.run_gapfilling( - item, target, minimum_obj, binary_check, prefilter, check_for_growth - ) - else: - solution_dictionary[item] = self.run_gapfilling( - item, None, minimum_obj, binary_check, False, check_for_growth - ) - false = False - return solution_dictionary + if simultaneous_gapfilling: + for item in media_list: + pass + else: + first = True + for item in media_list: + minimum_obj = default_minimum_objective + if item in minimum_objectives: + minimum_obj = minimum_objectives[item] + if first: + solution_dictionary[item] = self.run_gapfilling( + item, + target, + minimum_obj, + binary_check, + prefilter, + check_for_growth, + ) + else: + solution_dictionary[item] = self.run_gapfilling( + item, None, minimum_obj, binary_check, False, check_for_growth + ) + false = False + return solution_dictionary def integrate_gapfill_solution( self, solution, cumulative_solution=[], link_gaps_to_objective=True @@ -347,8 +359,8 @@ def integrate_gapfill_solution( cumulative_solution.append([rxn_id, "<"]) rxn.upper_bound = 0 rxn.lower_bound = -100 - - #Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model + + # Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model unneeded = self.mdlutl.test_solution( solution, keep_changes=True ) # Strips out unneeded reactions - which undoes some of what is done above @@ -357,11 +369,16 @@ def integrate_gapfill_solution( if item[0] == oitem[0] and item[1] == oitem[1]: cumulative_solution.remove(oitem) break - #Adding the gapfilling solution data to the model, which is needed for saving the model in KBase + # Adding the gapfilling solution data to the model, which is needed for saving the model in KBase self.mdlutl.add_gapfilling(solution) - #Testing which gapfilled reactions are needed to produce each reactant in the objective function + # Testing which gapfilled reactions are needed to produce each reactant in the objective function if link_gaps_to_objective: - logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"].id+" for target "+solution["target"]) + logger.info( + "Gapfilling sensitivity analysis running on succesful run in " + + solution["media"].id + + " for target " + + solution["target"] + ) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if solution["media"].id not in gf_sensitivity: gf_sensitivity[solution["media"].id] = {} @@ -375,25 +392,23 @@ def integrate_gapfill_solution( self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) - def compute_reaction_weights_from_expression_data( - self, omics_data, conditions=[] - ): + def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[]): """Computing reaction weights based on input gene-level omics data Parameters ---------- omics_data : pandas dataframe with genes as rows and conditions as columns Specifies the reactions to be added to the model to implement the gapfilling solution conditions : list - Optional array containing the IDs of the columns in omics_data from which data should be used. + Optional array containing the IDs of the columns in omics_data from which data should be used. If an empty array (or no array) is supplied, data from all columns will be used. When multiple columns are used, the data from those columns should be normalized first, then added together """ - #Validitions: - #1.) An conditions listed in the conditions argument should match the columns in the omics_data dataframe - #2.) Most (~80%) of the genes in the model should match genes in the omics_data dataframe - #3.) The omics_data dataframe should have at least 2 columns - #4.) The omics_data dataframe should have at least 2 rows - #5.) Logging should be used to report out which genes in the model don't match any genes in the omics_data dataframe + # Validitions: + # 1.) An conditions listed in the conditions argument should match the columns in the omics_data dataframe + # 2.) Most (~80%) of the genes in the model should match genes in the omics_data dataframe + # 3.) The omics_data dataframe should have at least 2 columns + # 4.) The omics_data dataframe should have at least 2 rows + # 5.) Logging should be used to report out which genes in the model don't match any genes in the omics_data dataframe pass @staticmethod diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 74a097df..dbc1441e 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -32,8 +32,15 @@ class GapfillingPkg(BaseFBAPkg): """ """ def __init__(self, model): - BaseFBAPkg.__init__(self, model, "gapfilling", {}, {}) + BaseFBAPkg.__init__( + self, + model, + "gapfilling", + {"rmaxf": "reaction", "fmaxf": "reaction"}, + {"rmaxfc": "reaction", "fmaxfc": "reaction"}, + ) self.gapfilling_penalties = None + self.maxflux_variables = {} def build(self, template, minimum_objective=0.01): parameters = { @@ -81,6 +88,7 @@ def build_package(self, parameters): "default_uptake": 100, "minimum_obj": 0.01, "minimize_exchanges": False, + "add_max_flux_variables": False, "blacklist": [], }, ) @@ -165,6 +173,44 @@ def build_package(self, parameters): self.model.solver.update() + # Creating max flux variables and constraints to be used for global gapfilling and other formulations + if self.parameters["add_max_flux_variables"]: + for reaction in self.model.reactions: + if reaction.id in self.gapfilling_penalties: + if "reverse" in self.gapfilling_penalties[reaction.id]: + self.maxflux_variables[reaction.id][ + "reverse" + ] = self.build_variable( + "rmaxf", 0, 1000, "continuous", reaction + ) + self.build_constraint( + "rmaxfc", + 0, + None, + { + reaction.reverse_variable: -1, + self.maxflux_variables[reaction.id]["reverse"]: 1, + }, + reaction, + ) + if "forward" in self.gapfilling_penalties[reaction.id]: + self.maxflux_variables[reaction.id][ + "forward" + ] = self.build_variable( + "fmaxf", 0, 1000, "continuous", reaction + ) + self.build_constraint( + "fmaxfc", + 0, + None, + { + reaction.forward_variable: -1, + self.maxflux_variables[reaction.id]["forward"]: 1, + }, + reaction, + ) + + # Creating the gapfilling objective function and saving it under self.parameters["gfobj"] reaction_objective = self.model.problem.Objective(Zero, direction="min") obj_coef = dict() for reaction in self.model.reactions: From dfca4627b620d9e2f90ee499152bf6f5e68c7da4 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 9 Oct 2023 23:43:58 -0500 Subject: [PATCH 117/146] update rast call --- modelseedpy/core/rast_client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modelseedpy/core/rast_client.py b/modelseedpy/core/rast_client.py index ebe06cb5..fc575237 100644 --- a/modelseedpy/core/rast_client.py +++ b/modelseedpy/core/rast_client.py @@ -52,10 +52,8 @@ def __init__(self): ) self.stages = [ {"name": "annotate_proteins_kmer_v2", "kmer_v2_parameters": {}}, - { - "name": "annotate_proteins_kmer_v1", - "kmer_v1_parameters": {"annotate_hypothetical_only": 1}, - }, + # {"name": "annotate_proteins_kmer_v1", + # "kmer_v1_parameters": {"annotate_hypothetical_only": 1},}, { "name": "annotate_proteins_similarity", "similarity_parameters": {"annotate_hypothetical_only": 1}, From 017859055703731823e366b46c2c7205ded67623 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 4 Dec 2023 14:50:18 -0800 Subject: [PATCH 118/146] Created Weighting function for gap fill. Example data required for testing. --- modelseedpy/core/msgapfill.py | 84 ++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index db3ed703..8fe5d505 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -3,6 +3,8 @@ import cobra import re import json +import numpy as np +import pandas as pd from optlang.symbolics import Zero, add from modelseedpy.core import FBAHelper # !!! the import is never used from modelseedpy.fbapkg.mspackagemanager import MSPackageManager @@ -402,6 +404,8 @@ def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[ Optional array containing the IDs of the columns in omics_data from which data should be used. If an empty array (or no array) is supplied, data from all columns will be used. When multiple columns are used, the data from those columns should be normalized first, then added together + Returns : + A dictionary with Rxns as the keys and calculated result as the value. """ # Validitions: # 1.) An conditions listed in the conditions argument should match the columns in the omics_data dataframe @@ -409,7 +413,85 @@ def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[ # 3.) The omics_data dataframe should have at least 2 columns # 4.) The omics_data dataframe should have at least 2 rows # 5.) Logging should be used to report out which genes in the model don't match any genes in the omics_data dataframe - pass + + # Assumptions: + # omics_data is an input with the following columns: + # Col 1: Gene + # Col 2: Reactions + # Cols 3-9: Annotations + # Unlike with the MATLAB code, this will not check if rxn is not in draft - this is only making calculations based on the columns in omics_data. + + + # Notes: + # This has only been tested on dummy data and in this case python performs the same as MATLAB. However, this needs to be tested with an example input. + # Two outputs are currently created. + # A table that matches with t in the MATLAB code. + # The requested hash (dictionary) with Rxns and floats. + # Many more inputs were required with the MATLAB code, this has attempted to condense everything to handle the single input. + # Conditions has not been added yet. I think as other questions are answered, the use for this will be more clear. + + #Questions + # When might the example data be updated? + # Are other inputs required? + # Is this output (Dictionary with RXN: Value), correct? + # When will the next steps for setting up the kbase jupyter notebook be ready? + + measuredGeneScore = np.zeros((omics_data.shape[0], len(omics_data.columns[3:10]))) + num_cols = len(omics_data.columns[3:10]) + w = np.full((num_cols, 1), 1/num_cols) + p = np.zeros(len(omics_data["Reactions"])) + + # t is table to match and check against MatLab code. + t = pd.DataFrame() + # rxn_hash is the desired output + rxn_hash = {} + + for rxn in range(0,len(omics_data)): + substr_rxns = [rxn for rxn in omics_data["Reactions"][[rxn]]] + # Get the indices of the rows where the condition is True + mask = omics_data['Reactions'].apply(lambda x: any(substr in x for substr in substr_rxns)) + idx_gene = mask[mask].index + nAG = 0 + nMG = 0 + nCG = 0 + + if len(idx_gene) > 0: + #number of genes that map to a reaction + nAG = len(idx_gene) + for iGene in range(0,nAG): + subset = omics_data.iloc[idx_gene[iGene], 3:9].to_numpy() + # Checking for non-empty elements in the subset + non_empty_check = np.vectorize(lambda x: x is not None and x == x)(subset) # x == x checks for NaN + # Finding the maximum value between the non-empty check and the corresponding row in measuredGeneScore + max_value = np.maximum(non_empty_check, measuredGeneScore[idx_gene[iGene], :]) + # Multiplying by the weight and adding to nMG + nMG += max(sum((max_value * w))) + selected_gene = omics_data['Gene'].iloc[idx_gene[iGene]] + + # Finding reactions associated with genes that contain the selected gene + associated_reactions = omics_data['Reactions'][omics_data['Gene'].str.contains(selected_gene)] + # Checking if there are more than one unique reactions + if len(associated_reactions.unique()) > 1: + nCG +=1 + + p[rxn] = (nMG/nAG) * (1 / (1 + (nCG/nAG))) + + #format table + new_row = { + 'iRxn': rxn, + 'nMG': nMG, + 'nCG': nCG, + 'nAG': nAG, + 'Values': p[rxn] #Values is equivalent to Var5 in the MatLab Code + } + + # Append the new row to the table + t = t.append(new_row, ignore_index=True) + # Add item to output rxn dictionary + rxn_hash[omics_data.iloc[rxn, 0]] = p[rxn] + + return rxn_hash + @staticmethod def gapfill( From fb9f1166c83300dc9ba7747234c91a337392980d Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 7 Dec 2023 17:40:45 -0600 Subject: [PATCH 119/146] updated cobra version and template reaction type fix --- modelseedpy/core/msbuilder.py | 8 ++++++-- modelseedpy/core/msgenome.py | 1 + modelseedpy/core/mstemplate.py | 2 +- setup.py | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index bca4a0f8..c180cc6f 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -6,6 +6,7 @@ from modelseedpy.core.exceptions import ModelSEEDError from modelseedpy.core.rast_client import RastClient from modelseedpy.core.msgenome import normalize_role +from modelseedpy.core.mstemplate import TemplateReactionType from modelseedpy.core.msmodel import ( get_gpr_string, get_reaction_constraints_from_direction, @@ -867,9 +868,12 @@ def build_non_metabolite_reactions( reactions = [] for template_reaction in self.template.reactions: + rxn_type = template_reaction.type if ( - template_reaction.type == "universal" - or template_reaction.type == "spontaneous" + rxn_type == "universal" + or rxn_type == "spontaneous" + or rxn_type == TemplateReactionType.UNIVERSAL + or rxn_type == TemplateReactionType.SPONTANEOUS ): reaction_metabolite_ids = {m.id for m in template_reaction.metabolites} if ( diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 78f1e004..f79165fb 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -82,6 +82,7 @@ def add_ontology_term(self, ontology_term, value): class MSGenome: + def __init__(self): self.features = DictList() diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 49fd98c3..dccdce4e 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -438,7 +438,7 @@ def get_data(self): map(lambda x: "~/complexes/id/" + x.id, self.complexes) ), # 'status': self.status, - "type": self.type, + "type": self.type if type(self.type) is str else self.type.value, } # def build_reaction_string(self, use_metabolite_names=False, use_compartment_names=None): diff --git a/setup.py b/setup.py index 2fb97221..f2fe0b3b 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ ], install_requires=[ "networkx >= 2.4", - "cobra >= 0.17.1", + "cobra >= 0.28.0", "scikit-learn == 1.2.0", # version lock for pickle ML models "scipy >= 1.5.4", "chemicals >= 1.0.13", From ea79000cb87a86cffe66ad2b856cf90f49f2e324 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 7 Dec 2023 17:41:14 -0600 Subject: [PATCH 120/146] format --- modelseedpy/core/msgenome.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index f79165fb..78f1e004 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -82,7 +82,6 @@ def add_ontology_term(self, ontology_term, value): class MSGenome: - def __init__(self): self.features = DictList() From 7132c50cbfbc10bdffb3e6778f47900ad3276dd8 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 4 Jan 2024 15:59:47 -0800 Subject: [PATCH 121/146] Linted --- modelseedpy/core/msgapfill.py | 87 +++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 8fe5d505..53a52b07 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -1,9 +1,10 @@ +#!/usr/bin/python # -*- coding: utf-8 -*- import logging import cobra import re import json -import numpy as np +import numpy as np import pandas as pd from optlang.symbolics import Zero, add from modelseedpy.core import FBAHelper # !!! the import is never used @@ -405,7 +406,7 @@ def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[ If an empty array (or no array) is supplied, data from all columns will be used. When multiple columns are used, the data from those columns should be normalized first, then added together Returns : - A dictionary with Rxns as the keys and calculated result as the value. + A dictionary with Rxns as the keys and calculated result as the value. """ # Validitions: # 1.) An conditions listed in the conditions argument should match the columns in the omics_data dataframe @@ -413,85 +414,93 @@ def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[ # 3.) The omics_data dataframe should have at least 2 columns # 4.) The omics_data dataframe should have at least 2 rows # 5.) Logging should be used to report out which genes in the model don't match any genes in the omics_data dataframe - - # Assumptions: + + # Assumptions: # omics_data is an input with the following columns: - # Col 1: Gene - # Col 2: Reactions - # Cols 3-9: Annotations + # Col 1: Gene + # Col 2: Reactions + # Cols 3-9: Annotations # Unlike with the MATLAB code, this will not check if rxn is not in draft - this is only making calculations based on the columns in omics_data. - - + # Notes: # This has only been tested on dummy data and in this case python performs the same as MATLAB. However, this needs to be tested with an example input. # Two outputs are currently created. - # A table that matches with t in the MATLAB code. - # The requested hash (dictionary) with Rxns and floats. + # A table that matches with t in the MATLAB code. + # The requested hash (dictionary) with Rxns and floats. # Many more inputs were required with the MATLAB code, this has attempted to condense everything to handle the single input. # Conditions has not been added yet. I think as other questions are answered, the use for this will be more clear. - - #Questions + + # Questions # When might the example data be updated? # Are other inputs required? # Is this output (Dictionary with RXN: Value), correct? # When will the next steps for setting up the kbase jupyter notebook be ready? - - measuredGeneScore = np.zeros((omics_data.shape[0], len(omics_data.columns[3:10]))) + + measuredGeneScore = np.zeros( + (omics_data.shape[0], len(omics_data.columns[3:10])) + ) num_cols = len(omics_data.columns[3:10]) - w = np.full((num_cols, 1), 1/num_cols) + w = np.full((num_cols, 1), 1 / num_cols) p = np.zeros(len(omics_data["Reactions"])) - # t is table to match and check against MatLab code. + # t is table to match and check against MatLab code. t = pd.DataFrame() # rxn_hash is the desired output rxn_hash = {} - for rxn in range(0,len(omics_data)): + for rxn in range(0, len(omics_data)): substr_rxns = [rxn for rxn in omics_data["Reactions"][[rxn]]] # Get the indices of the rows where the condition is True - mask = omics_data['Reactions'].apply(lambda x: any(substr in x for substr in substr_rxns)) + mask = omics_data["Reactions"].apply( + lambda x: any(substr in x for substr in substr_rxns) + ) idx_gene = mask[mask].index nAG = 0 nMG = 0 nCG = 0 - + if len(idx_gene) > 0: - #number of genes that map to a reaction + # number of genes that map to a reaction nAG = len(idx_gene) - for iGene in range(0,nAG): + for iGene in range(0, nAG): subset = omics_data.iloc[idx_gene[iGene], 3:9].to_numpy() # Checking for non-empty elements in the subset - non_empty_check = np.vectorize(lambda x: x is not None and x == x)(subset) # x == x checks for NaN + non_empty_check = np.vectorize(lambda x: x is not None and x == x)( + subset + ) # x == x checks for NaN # Finding the maximum value between the non-empty check and the corresponding row in measuredGeneScore - max_value = np.maximum(non_empty_check, measuredGeneScore[idx_gene[iGene], :]) + max_value = np.maximum( + non_empty_check, measuredGeneScore[idx_gene[iGene], :] + ) # Multiplying by the weight and adding to nMG nMG += max(sum((max_value * w))) - selected_gene = omics_data['Gene'].iloc[idx_gene[iGene]] + selected_gene = omics_data["Gene"].iloc[idx_gene[iGene]] # Finding reactions associated with genes that contain the selected gene - associated_reactions = omics_data['Reactions'][omics_data['Gene'].str.contains(selected_gene)] + associated_reactions = omics_data["Reactions"][ + omics_data["Gene"].str.contains(selected_gene) + ] # Checking if there are more than one unique reactions if len(associated_reactions.unique()) > 1: - nCG +=1 - - p[rxn] = (nMG/nAG) * (1 / (1 + (nCG/nAG))) - - #format table + nCG += 1 + + p[rxn] = (nMG / nAG) * (1 / (1 + (nCG / nAG))) + + # format table new_row = { - 'iRxn': rxn, - 'nMG': nMG, - 'nCG': nCG, - 'nAG': nAG, - 'Values': p[rxn] #Values is equivalent to Var5 in the MatLab Code + "iRxn": rxn, + "nMG": nMG, + "nCG": nCG, + "nAG": nAG, + "Values": p[rxn], # Values is equivalent to Var5 in the MatLab Code } - + # Append the new row to the table t = t.append(new_row, ignore_index=True) # Add item to output rxn dictionary rxn_hash[omics_data.iloc[rxn, 0]] = p[rxn] - + return rxn_hash - @staticmethod def gapfill( From 7502e5154d14602b65446a0e832fa47adafb7c5b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 9 Feb 2024 16:14:12 -0600 Subject: [PATCH 122/146] rast regex --- modelseedpy/core/rast_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/rast_client.py b/modelseedpy/core/rast_client.py index fc575237..97211add 100644 --- a/modelseedpy/core/rast_client.py +++ b/modelseedpy/core/rast_client.py @@ -70,7 +70,7 @@ def annotate_genome(self, genome): for o in res[0]["features"]: feature = genome.features.get_by_id(o["id"]) if "function" in o: - functions = re.split("; | / | @ | => ", o["function"]) + functions = re.split("; | / | @", o["function"]) for function in functions: feature.add_ontology_term("RAST", function) From 491a200662b98100b5db45d875bc99bbd56bf22b Mon Sep 17 00:00:00 2001 From: Jeremy Date: Wed, 14 Feb 2024 12:23:29 -0800 Subject: [PATCH 123/146] Major updates to weighting / expression data function --- modelseedpy/core/msgapfill.py | 132 ++++++++++++++++------------------ 1 file changed, 60 insertions(+), 72 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 53a52b07..5c79fb9c 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -130,9 +130,9 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): if before_filtering: filter_msg = " before filtering " note = "FBF" - gf_sensitivity[media.id][target][ - note - ] = self.mdlutl.find_unproducible_biomass_compounds(target) + gf_sensitivity[media.id][target][note] = ( + self.mdlutl.find_unproducible_biomass_compounds(target) + ) if target != "rxn00062_c0": self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") logger.warning( @@ -387,71 +387,70 @@ def integrate_gapfill_solution( gf_sensitivity[solution["media"].id] = {} if solution["target"] not in gf_sensitivity[solution["media"].id]: gf_sensitivity[solution["media"].id][solution["target"]] = {} - gf_sensitivity[solution["media"].id][solution["target"]][ - "success" - ] = self.mdlutl.find_unproducible_biomass_compounds( - solution["target"], cumulative_solution + gf_sensitivity[solution["media"].id][solution["target"]]["success"] = ( + self.mdlutl.find_unproducible_biomass_compounds( + solution["target"], cumulative_solution + ) ) self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) - def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[]): + def compute_reaction_weights_from_expression_data(self, omics_data, annoont): """Computing reaction weights based on input gene-level omics data Parameters ---------- omics_data : pandas dataframe with genes as rows and conditions as columns Specifies the reactions to be added to the model to implement the gapfilling solution - conditions : list - Optional array containing the IDs of the columns in omics_data from which data should be used. - If an empty array (or no array) is supplied, data from all columns will be used. When multiple columns are - used, the data from those columns should be normalized first, then added together + annoont : annoont object + Contains reaction, feature id, ontologies, probabilities. Restructured into dataframe in function Returns : A dictionary with Rxns as the keys and calculated result as the value. """ - # Validitions: - # 1.) An conditions listed in the conditions argument should match the columns in the omics_data dataframe - # 2.) Most (~80%) of the genes in the model should match genes in the omics_data dataframe - # 3.) The omics_data dataframe should have at least 2 columns - # 4.) The omics_data dataframe should have at least 2 rows - # 5.) Logging should be used to report out which genes in the model don't match any genes in the omics_data dataframe - - # Assumptions: - # omics_data is an input with the following columns: - # Col 1: Gene - # Col 2: Reactions - # Cols 3-9: Annotations - # Unlike with the MATLAB code, this will not check if rxn is not in draft - this is only making calculations based on the columns in omics_data. - # Notes: - # This has only been tested on dummy data and in this case python performs the same as MATLAB. However, this needs to be tested with an example input. - # Two outputs are currently created. - # A table that matches with t in the MATLAB code. - # The requested hash (dictionary) with Rxns and floats. - # Many more inputs were required with the MATLAB code, this has attempted to condense everything to handle the single input. - # Conditions has not been added yet. I think as other questions are answered, the use for this will be more clear. - - # Questions - # When might the example data be updated? - # Are other inputs required? - # Is this output (Dictionary with RXN: Value), correct? - # When will the next steps for setting up the kbase jupyter notebook be ready? - - measuredGeneScore = np.zeros( - (omics_data.shape[0], len(omics_data.columns[3:10])) - ) - num_cols = len(omics_data.columns[3:10]) + ### Restructure annoont into Dataframe + rows_list = [] + for reaction, genes in annoont.get_reaction_gene_hash().items(): + for gene, gene_info in genes.items(): + # Initialize the row with 'Gene' and 'Reactions' + row = {"Gene": gene, "Reactions": reaction} + # Loop through each evidence in the gene's evidence list + for evidence in gene_info["evidence"]: + # Construct column name from the event and ontology for uniqueness + column_name = f"{evidence['ontology']}" + if column_name in row: + row[column_name] = f"{row[column_name]}, {evidence['term']}" + else: + row[column_name] = evidence["term"] + rows_list.append(row) + restructured_anoot = pd.DataFrame(rows_list) + + ### Integrate Omics, set weights, find indexes for features + feature_ids_set = set(omics_data["feature_ids"]) + + # Find indices where 'Gene' values are in 'feature_ids' + # isin method returns a boolean series that is True where tbl_supAno['Gene'] is in feature_ids_set + mask = restructured_anoot["Gene"].isin(feature_ids_set) + # Get the indices of True values in the mask + idx_measuredGene = mask[mask].index.tolist() + # Calculate the dimensions for the measuredGeneScore array + num_genes = len(restructured_anoot["Gene"]) + num_columns = len(restructured_anoot.columns[2:]) + # Initialize the measuredGeneScore array with zeros + measuredGeneScore = np.zeros((num_genes, num_columns)) + measuredGeneScore[idx_measuredGene, :] = 1 + num_weights = len(restructured_anoot.columns[3:]) + w = np.repeat(1 / num_weights, num_weights) + + ### Calculate Weights and generate the reaction/weight hash + num_cols = len(restructured_anoot.columns[2:]) w = np.full((num_cols, 1), 1 / num_cols) - p = np.zeros(len(omics_data["Reactions"])) - - # t is table to match and check against MatLab code. - t = pd.DataFrame() - # rxn_hash is the desired output - rxn_hash = {} - - for rxn in range(0, len(omics_data)): - substr_rxns = [rxn for rxn in omics_data["Reactions"][[rxn]]] + p = np.zeros(len(restructured_anoot["Reactions"])) + # computed_weights is the rxn_hash ({rxn: weight, ...}) + computed_weights = {} + for rxn in range(0, len(restructured_anoot)): + substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]] # Get the indices of the rows where the condition is True - mask = omics_data["Reactions"].apply( + mask = restructured_anoot["Reactions"].apply( lambda x: any(substr in x for substr in substr_rxns) ) idx_gene = mask[mask].index @@ -463,22 +462,22 @@ def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[ # number of genes that map to a reaction nAG = len(idx_gene) for iGene in range(0, nAG): - subset = omics_data.iloc[idx_gene[iGene], 3:9].to_numpy() + subset = restructured_anoot.iloc[idx_gene[iGene], 2:].to_numpy() # Checking for non-empty elements in the subset non_empty_check = np.vectorize(lambda x: x is not None and x == x)( subset - ) # x == x checks for NaN + ) # Finding the maximum value between the non-empty check and the corresponding row in measuredGeneScore max_value = np.maximum( non_empty_check, measuredGeneScore[idx_gene[iGene], :] ) # Multiplying by the weight and adding to nMG nMG += max(sum((max_value * w))) - selected_gene = omics_data["Gene"].iloc[idx_gene[iGene]] + selected_gene = restructured_anoot["Gene"].iloc[idx_gene[iGene]] # Finding reactions associated with genes that contain the selected gene - associated_reactions = omics_data["Reactions"][ - omics_data["Gene"].str.contains(selected_gene) + associated_reactions = restructured_anoot["Reactions"][ + restructured_anoot["Gene"].str.contains(selected_gene) ] # Checking if there are more than one unique reactions if len(associated_reactions.unique()) > 1: @@ -486,21 +485,10 @@ def compute_reaction_weights_from_expression_data(self, omics_data, conditions=[ p[rxn] = (nMG / nAG) * (1 / (1 + (nCG / nAG))) - # format table - new_row = { - "iRxn": rxn, - "nMG": nMG, - "nCG": nCG, - "nAG": nAG, - "Values": p[rxn], # Values is equivalent to Var5 in the MatLab Code - } - - # Append the new row to the table - t = t.append(new_row, ignore_index=True) - # Add item to output rxn dictionary - rxn_hash[omics_data.iloc[rxn, 0]] = p[rxn] + # Add item to output rxn hash dictionary + computed_weights[restructured_anoot.iloc[rxn, 0]] = p[rxn] - return rxn_hash + return computed_weights @staticmethod def gapfill( From cb5008472debb2a4dc0e75a0dbffa16b34e9b61a Mon Sep 17 00:00:00 2001 From: Jeremy Jacobson <85139244+jjacobson95@users.noreply.github.com> Date: Fri, 22 Mar 2024 09:54:04 -0700 Subject: [PATCH 124/146] Update msgapfill.py --- modelseedpy/core/msgapfill.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 5c79fb9c..79200d6e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -11,6 +11,7 @@ from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.exceptions import GapfillingError +from collections import defaultdict logger = logging.getLogger(__name__) logger.setLevel( @@ -447,12 +448,21 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): p = np.zeros(len(restructured_anoot["Reactions"])) # computed_weights is the rxn_hash ({rxn: weight, ...}) computed_weights = {} + + # Precompute gene reaction lookups + gene_reaction_lookup = {} + for idx, row in restructured_anoot.iterrows(): + gene = row['Gene'] + reaction = row['Reactions'] + if gene in gene_reaction_lookup: + gene_reaction_lookup[gene].append(reaction) + else: + gene_reaction_lookup[gene] = [reaction] + for rxn in range(0, len(restructured_anoot)): substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]] # Get the indices of the rows where the condition is True - mask = restructured_anoot["Reactions"].apply( - lambda x: any(substr in x for substr in substr_rxns) - ) + mask = restructured_anoot["Reactions"] == substr_rxns[0] idx_gene = mask[mask].index nAG = 0 nMG = 0 @@ -476,11 +486,10 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): selected_gene = restructured_anoot["Gene"].iloc[idx_gene[iGene]] # Finding reactions associated with genes that contain the selected gene - associated_reactions = restructured_anoot["Reactions"][ - restructured_anoot["Gene"].str.contains(selected_gene) - ] + associated_reactions = gene_reaction_lookup.get(selected_gene, []) + # Checking if there are more than one unique reactions - if len(associated_reactions.unique()) > 1: + if len(associated_reactions) > 1: nCG += 1 p[rxn] = (nMG / nAG) * (1 / (1 + (nCG / nAG))) From fc1019ba1703f0547809de3764624fee95d7e00b Mon Sep 17 00:00:00 2001 From: Jeremy Date: Fri, 22 Mar 2024 09:55:18 -0700 Subject: [PATCH 125/146] linted --- modelseedpy/core/msgapfill.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 79200d6e..4e94b069 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -452,13 +452,13 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): # Precompute gene reaction lookups gene_reaction_lookup = {} for idx, row in restructured_anoot.iterrows(): - gene = row['Gene'] - reaction = row['Reactions'] + gene = row["Gene"] + reaction = row["Reactions"] if gene in gene_reaction_lookup: gene_reaction_lookup[gene].append(reaction) else: gene_reaction_lookup[gene] = [reaction] - + for rxn in range(0, len(restructured_anoot)): substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]] # Get the indices of the rows where the condition is True @@ -487,7 +487,7 @@ def compute_reaction_weights_from_expression_data(self, omics_data, annoont): # Finding reactions associated with genes that contain the selected gene associated_reactions = gene_reaction_lookup.get(selected_gene, []) - + # Checking if there are more than one unique reactions if len(associated_reactions) > 1: nCG += 1 From 6738874a02b583e66a83c12e45b4607ed95afa86 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Fri, 22 Mar 2024 10:17:26 -0700 Subject: [PATCH 126/146] re-linted msgapfill.py --- modelseedpy/core/msgapfill.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 4e94b069..ee1b0fe3 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -13,6 +13,7 @@ from modelseedpy.core.exceptions import GapfillingError from collections import defaultdict + logger = logging.getLogger(__name__) logger.setLevel( logging.INFO # WARNING @@ -131,9 +132,9 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): if before_filtering: filter_msg = " before filtering " note = "FBF" - gf_sensitivity[media.id][target][note] = ( - self.mdlutl.find_unproducible_biomass_compounds(target) - ) + gf_sensitivity[media.id][target][ + note + ] = self.mdlutl.find_unproducible_biomass_compounds(target) if target != "rxn00062_c0": self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") logger.warning( @@ -388,10 +389,10 @@ def integrate_gapfill_solution( gf_sensitivity[solution["media"].id] = {} if solution["target"] not in gf_sensitivity[solution["media"].id]: gf_sensitivity[solution["media"].id][solution["target"]] = {} - gf_sensitivity[solution["media"].id][solution["target"]]["success"] = ( - self.mdlutl.find_unproducible_biomass_compounds( - solution["target"], cumulative_solution - ) + gf_sensitivity[solution["media"].id][solution["target"]][ + "success" + ] = self.mdlutl.find_unproducible_biomass_compounds( + solution["target"], cumulative_solution ) self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) From 6348c58e6767e980cca9074379d70c29a01879c7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 9 Apr 2024 17:48:04 -0500 Subject: [PATCH 127/146] fixes --- modelseedpy/community/mscommunity.py | 1 + modelseedpy/core/msbuilder.py | 1 + modelseedpy/core/msmodel.py | 1 - setup.py | 1 - tests/{ => core}/test_advanced.py | 0 tests/{ => core}/test_basic.py | 0 tests/test_data/mock_data.py | 5 ++--- tox.ini | 3 +-- 8 files changed, 5 insertions(+), 7 deletions(-) rename tests/{ => core}/test_advanced.py (100%) rename tests/{ => core}/test_basic.py (100%) diff --git a/modelseedpy/community/mscommunity.py b/modelseedpy/community/mscommunity.py index c2b5ab6f..ccd4acd8 100644 --- a/modelseedpy/community/mscommunity.py +++ b/modelseedpy/community/mscommunity.py @@ -125,6 +125,7 @@ def compute_max_atp(self): class MSCommunity: + def __init__( self, model=None, # the model that will be defined diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index c180cc6f..361ac778 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -1059,6 +1059,7 @@ def build_full_template_model(template, model_id=None, index="0"): :param index: index for the metabolites :return: """ + from modelseedpy.core.msmodel import MSModel model = MSModel(model_id if model_id else template.id, template=template) all_reactions = [] for rxn in template.reactions: diff --git a/modelseedpy/core/msmodel.py b/modelseedpy/core/msmodel.py index 36bfdc7c..d34fc9e6 100644 --- a/modelseedpy/core/msmodel.py +++ b/modelseedpy/core/msmodel.py @@ -113,7 +113,6 @@ def get_set_set(expr_str): # !!! this currently returns dictionaries, not sets? return {frozenset({str(x) for x in dnf.inputs})} else: return {frozenset({str(x) for x in o.inputs}) for o in dnf.xs} - return {} class MSModel(Model): diff --git a/setup.py b/setup.py index f2fe0b3b..aeed38f5 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,6 @@ "Topic :: Scientific/Engineering :: Bio-Informatics", "Intended Audience :: Science/Research", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/tests/test_advanced.py b/tests/core/test_advanced.py similarity index 100% rename from tests/test_advanced.py rename to tests/core/test_advanced.py diff --git a/tests/test_basic.py b/tests/core/test_basic.py similarity index 100% rename from tests/test_basic.py rename to tests/core/test_basic.py diff --git a/tests/test_data/mock_data.py b/tests/test_data/mock_data.py index 4c86b371..478aad0e 100644 --- a/tests/test_data/mock_data.py +++ b/tests/test_data/mock_data.py @@ -271,10 +271,9 @@ def remap(model, bigg_to_seed_cpd, bigg_to_seed_rxn, index="0"): def mock_model_ecoli_core(seed=True): - from cobra.io import load_json_model - from os import path + from cobra.io import load_model - model = load_json_model(path.join(path.dirname(__file__), "e_coli_core.json")) + model = load_model("textbook") if not seed: return model bigg_to_seed_cpd = { diff --git a/tox.ini b/tox.ini index d5ff7ef9..ba2e836d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,8 @@ [tox] -envlist = py38,py39,py310 +envlist = py39,py310 [gh-actions] python = - 3.8: py38 3.9: py39 3.10: py310 From 6f15296baa538c01a0866916d2ba35a38c4c16e5 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 9 Apr 2024 17:49:18 -0500 Subject: [PATCH 128/146] black --- modelseedpy/community/mscommunity.py | 1 - modelseedpy/core/msbuilder.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/community/mscommunity.py b/modelseedpy/community/mscommunity.py index ccd4acd8..c2b5ab6f 100644 --- a/modelseedpy/community/mscommunity.py +++ b/modelseedpy/community/mscommunity.py @@ -125,7 +125,6 @@ def compute_max_atp(self): class MSCommunity: - def __init__( self, model=None, # the model that will be defined diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 361ac778..c49e2e5e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -1060,6 +1060,7 @@ def build_full_template_model(template, model_id=None, index="0"): :return: """ from modelseedpy.core.msmodel import MSModel + model = MSModel(model_id if model_id else template.id, template=template) all_reactions = [] for rxn in template.reactions: From 184faf563c1f3f3e9181754fac38ec9e4be0afdc Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 9 Apr 2024 18:01:58 -0500 Subject: [PATCH 129/146] added py311 --- .github/workflows/pre-commit.yml | 2 +- .github/workflows/tox.yml | 2 +- .travis.yml | 4 ++-- setup.py | 2 +- tox.ini | 3 ++- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 6b54b4a0..ffde9f98 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v3 diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 9ce47285..c3d816d0 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 - name: Set up Python diff --git a/.travis.yml b/.travis.yml index 20911611..75b2eb81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,8 @@ language: python python: - - 3.7 - - 3.8 - 3.9 + - 3.10 + - 3.11 before_install: - python --version - pip install -U pip diff --git a/setup.py b/setup.py index aeed38f5..da01e792 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ ], install_requires=[ "networkx >= 2.4", - "cobra >= 0.28.0", + "cobra >= 0.29.0", "scikit-learn == 1.2.0", # version lock for pickle ML models "scipy >= 1.5.4", "chemicals >= 1.0.13", diff --git a/tox.ini b/tox.ini index ba2e836d..0aa1e6aa 100644 --- a/tox.ini +++ b/tox.ini @@ -1,10 +1,11 @@ [tox] -envlist = py39,py310 +envlist = py39,py310,py311 [gh-actions] python = 3.9: py39 3.10: py310 + 3.11: py311 [testenv] setenv = ARCHIVEINTERFACE_CPCONFIG = {toxinidir}/server.conf From 2b71ba0928319319c57d73f1f57dd06d0b484f08 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 21 Jun 2024 13:02:31 -0500 Subject: [PATCH 130/146] biochem --- modelseedpy/biochem/modelseed_compound.py | 6 ++++- modelseedpy/biochem/modelseed_reaction.py | 5 ++++- modelseedpy/core/msgenome.py | 27 ++++++++++++++--------- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index a3ea75f3..c17941ac 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -57,7 +57,9 @@ def __init__( self.flags |= set(flags) def to_template_compartment_compound(self, compartment): - cpd_id = f"{self.seed_id}_{compartment}" + cpd_id = f"{self.seed_id}" + if compartment: + cpd_id += f"_{compartment}" # build Template Compound metabolite = MSTemplateMetabolite( self.seed_id, @@ -71,6 +73,8 @@ def to_template_compartment_compound(self, compartment): self.abbr, ) # build Template Compartment Compound + if compartment is None: + compartment = 'x' res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id) # assign Compound to Compartment Compound diff --git a/modelseedpy/biochem/modelseed_reaction.py b/modelseedpy/biochem/modelseed_reaction.py index b43430ce..d0e3b050 100644 --- a/modelseedpy/biochem/modelseed_reaction.py +++ b/modelseedpy/biochem/modelseed_reaction.py @@ -174,8 +174,11 @@ def to_template_reaction(self, compartment_setup=None): raise ValueError("invalid compartment setup") from modelseedpy.core.msmodel import get_cmp_token + rxn_id = f"{self.id}" reaction_compartment = get_cmp_token(compartment_setup.values()) - rxn_id = f"{self.id}_{reaction_compartment}" + if reaction_compartment: + rxn_id += f"_{reaction_compartment}" + name = f"{self.name}" metabolites = {} for m, v in self.metabolites.items(): diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 78f1e004..51ec3896 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -8,6 +8,21 @@ DEFAULT_SPLIT = " " +def to_fasta(features, filename, l=80, fn_header=None): + with open(filename, "w") as fh: + for feature in features: + h = f">{feature.id}\n" + if fn_header: + h = fn_header(feature) + fh.write(h) + lines = [ + feature.seq[i: i + l] + "\n" for i in range(0, len(feature.seq), l) + ] + for line in lines: + fh.write(line) + return filename + + def normalize_role(s): s = s.strip().lower() s = re.sub(r"[\W_]+", "", s) @@ -111,17 +126,7 @@ def from_fasta( return genome def to_fasta(self, filename, l=80, fn_header=None): - with open(filename, "w") as fh: - for feature in self.features: - h = f">{feature.id}\n" - if fn_header: - h = fn_header(feature) - fh.write(h) - lines = [ - feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l) - ] - for line in lines: - fh.write(line) + to_fasta(self.features, filename, l, fn_header) return filename @staticmethod From f42f91f3e5e1019fa0d00a602368a1a47718609a Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 8 Jul 2024 23:13:29 -0500 Subject: [PATCH 131/146] bump 4.0 swapped pyeda for sympy --- README.rst | 5 - modelseedpy/__init__.py | 2 +- modelseedpy/biochem/modelseed_biochem.py | 50 ++++++++++ modelseedpy/biochem/modelseed_compound.py | 2 +- modelseedpy/core/msgenome.py | 50 +++++++++- modelseedpy/core/msmodel.py | 33 +++++-- setup.py | 5 +- tests/core/test_msmodel.py | 82 +++++++++++++++++ tests/core/test_mstemplate.py | 107 ++++++++++++++++++++++ 9 files changed, 320 insertions(+), 16 deletions(-) create mode 100644 tests/core/test_msmodel.py create mode 100644 tests/core/test_mstemplate.py diff --git a/README.rst b/README.rst index 6f380d9a..f2b4e973 100644 --- a/README.rst +++ b/README.rst @@ -51,8 +51,3 @@ The associated ModelSEED Database, which is required for a few packages, is simp git clone https://github.com/ModelSEED/ModelSEEDDatabase.git and the path to this repository is passed as an argument to the corresponding packages. - -**Windows users** must separately install the ``pyeda`` module: 1) download the appropriate wheel for your Python version from `this website `_ ; and 2) install the wheel through the following commands in a command prompt/powershell console:: - - cd path/to/pyeda/wheel - pip install pyeda_wheel_name.whl diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 74ea066b..995c054f 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -14,7 +14,7 @@ __author__ = "Christopher Henry" __email__ = "chenry@anl.gov" -__version__ = "0.3.3" +__version__ = "0.4.0" logger = logging.getLogger(__name__) diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index 80594e0e..439becb0 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -250,6 +250,56 @@ def _load_metabolites( return metabolites +def build_modelseed_reaction( + o, names, aliases, ec_numbers, metabolites_indexed, metabolites +): + if "id" in o and o["id"]: + rxn_names = set() + if o["id"] in names: + rxn_names |= names[o["id"]] + ( + lower_bound, + upper_bound, + ) = get_reaction_constraints_from_direction(o.get("reversibility")) + stoichiometry = o.get("stoichiometry") + reaction_metabolites = {} + for s in stoichiometry: + cmp_token = s["compartment"] + value = s["coefficient"] + cpd = metabolites[s["compound"]] + cpd_index_id = f"{cpd.id}_{cmp_token}" + if cpd_index_id not in metabolites_indexed: + cpd_token = cpd.copy() + cpd_token.id = f"{cpd.id}_{cmp_token}" + cpd_token.base_id = cpd.id + cpd_token.compartment = cmp_token + metabolites_indexed[cpd_index_id] = cpd_token + reaction_metabolites[metabolites_indexed[cpd_index_id]] = value + rxn = ModelSEEDReaction2( + o["id"], + o.get("name"), + "", + lower_bound, + upper_bound, + "", + rxn_names, + o.get("deltag"), + o.get("deltagerr"), + o.get("is_obsolete"), + None, + o.get("status"), + o.get("source"), + ) + rxn.add_metabolites(reaction_metabolites) + if rxn.id in aliases: + rxn.annotation.update(aliases[rxn.id]) + if rxn.id in ec_numbers: + rxn.annotation["ec-code"] = ec_numbers[rxn.id] + return rxn + else: + raise ValueError("unable to build reaction") + + def _load_reactions( database_path: str, metabolites: dict, aliases=None, names=None, ec_numbers=None ) -> (dict, dict): diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index c17941ac..c5c73fed 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -74,7 +74,7 @@ def to_template_compartment_compound(self, compartment): ) # build Template Compartment Compound if compartment is None: - compartment = 'x' + compartment = "x" res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id) # assign Compound to Compartment Compound diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 51ec3896..99790484 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -16,7 +16,7 @@ def to_fasta(features, filename, l=80, fn_header=None): h = fn_header(feature) fh.write(h) lines = [ - feature.seq[i: i + l] + "\n" for i in range(0, len(feature.seq), l) + feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l) ] for line in lines: fh.write(line) @@ -40,6 +40,17 @@ def read_fasta(f, split=DEFAULT_SPLIT, h_func=None): return parse_fasta_str(fh.read(), split, h_func) +def read_fasta2(f, split=DEFAULT_SPLIT, h_func=None): + if f.endswith(".gz"): + import gzip + + with gzip.open(f, "rb") as fh: + return extract_features(fh.read().decode("utf-8"), split, h_func) + else: + with open(f, "r") as fh: + return extract_features(fh.read(), split, h_func) + + def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): features = [] seq = None @@ -68,6 +79,37 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): return features +def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None): + features = [] + active_seq = None + seq_lines = [] + for line in faa_str.split("\n"): + if line.startswith(">"): + if active_seq is not None: + active_seq.seq = "".join(seq_lines) + features.append(active_seq) + seq_lines = [] + seq_id = line[1:] + desc = None + if h_func: + seq_id, desc = h_func(seq_id) + elif split: + header_data = line[1:].split(split, 1) + seq_id = header_data[0] + if len(header_data) > 1: + desc = header_data[1] + active_seq = MSFeature(seq_id, "", desc) + else: + seq_lines.append(line.strip()) + + # add last sequence + if len(seq_lines) > 0: + active_seq.seq = "".join(seq_lines) + features.append(active_seq) + + return features + + class MSFeature: def __init__(self, feature_id, sequence, description=None, aliases=None): """ @@ -125,6 +167,12 @@ def from_fasta( genome.features += read_fasta(filename, split, h_func) return genome + @staticmethod + def from_fasta2(filename, split=" ", h_func=None): + genome = MSGenome() + genome.features += read_fasta2(filename, split, h_func) + return genome + def to_fasta(self, filename, l=80, fn_header=None): to_fasta(self.features, filename, l, fn_header) return filename diff --git a/modelseedpy/core/msmodel.py b/modelseedpy/core/msmodel.py index d34fc9e6..48c9f985 100644 --- a/modelseedpy/core/msmodel.py +++ b/modelseedpy/core/msmodel.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- import logging import re -from cobra.core import Model -from pyeda.inter import ( - expr, -) # wheels must be specially downloaded and installed for Windows https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda +from sympy.logic.inference import satisfiable +from sympy import Symbol +import sympy.logic.boolalg as spl +from cobra.core import Model, GPR + +# from pyeda.inter import expr logger = logging.getLogger(__name__) @@ -103,18 +105,37 @@ def get_cmp_token(compartments): return None -def get_set_set(expr_str): # !!! this currently returns dictionaries, not sets?? +def get_set_set_pyeda(expr_str: str, pyeda_expr): if len(expr_str.strip()) == 0: return {} expr_str = expr_str.replace(" or ", " | ") expr_str = expr_str.replace(" and ", " & ") - dnf = expr(expr_str).to_dnf() + dnf = pyeda_expr(expr_str).to_dnf() if len(dnf.inputs) == 1 or dnf.NAME == "And": return {frozenset({str(x) for x in dnf.inputs})} else: return {frozenset({str(x) for x in o.inputs}) for o in dnf.xs} +def get_set_set(expr_str: str): + if expr_str is None or len(expr_str.strip()) == 0: + return {} + gpr = GPR.from_string(expr_str) + expr = gpr.as_symbolic() + expr_model = list(satisfiable(expr, all_models=True)) + dnf = spl.SOPform(tuple(gpr.genes), list(expr_model)) + if type(dnf) == spl.And or type(dnf) == Symbol: + variable_set = set() + variable_set.add(frozenset({atom.name for atom in dnf.atoms()})) + return frozenset(variable_set) + elif type(dnf) == spl.Or: + return frozenset( + {frozenset({atom.name for atom in x.atoms()}) for x in dnf.args} + ) + else: + raise ValueError(f"unable to decode {expr_str} found token of type {type(dnf)}") + + class MSModel(Model): def __init__(self, id_or_model=None, genome=None, template=None): """ diff --git a/setup.py b/setup.py index da01e792..97e79dfd 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="ModelSEEDpy", - version="0.3.3", + version="0.4.0", description="Python package for building and analyzing models using ModelSEED", long_description_content_type="text/x-rst", long_description=readme, @@ -40,7 +40,8 @@ "chemicals >= 1.0.13", "chemw >= 0.3.2", "matplotlib >= 3.0.0", - "pyeda", + "Jinja2 >= 3.1.4", + "sympy >=1.12.0", ], tests_require=[ "pytest", diff --git a/tests/core/test_msmodel.py b/tests/core/test_msmodel.py new file mode 100644 index 00000000..68478498 --- /dev/null +++ b/tests/core/test_msmodel.py @@ -0,0 +1,82 @@ +from modelseedpy.core.msmodel import * + + +def test_get_direction_from_constraints1(): + res = get_direction_from_constraints(0, 1000) + + assert res == ">" + + +def test_get_direction_from_constraints2(): + res = get_direction_from_constraints(-1000, 0) + + assert res == "<" + + +def test_get_direction_from_constraints3(): + res = get_direction_from_constraints(-1000, 1000) + + assert res == "=" + + +def test_get_set_set1(): + res = get_set_set("A") + + assert len(res) == 1 + assert {"A"} in res + + +def test_get_set_set2(): + res = get_set_set("A and B") + + assert len(res) == 1 + assert {"A", "B"} in res + + +def test_get_set_set3(): + res = get_set_set("A or B") + + assert len(res) == 2 + assert {"A"} in res + assert {"B"} in res + + +def test_get_set_set4(): + res = get_set_set("A or B or C") + + assert len(res) == 3 + assert {"A"} in res + assert {"B"} in res + assert {"C"} in res + + +def test_get_set_set5(): + res = get_set_set("A or B and C") + + assert len(res) == 2 + assert {"A"} in res + assert {"B", "C"} in res + + +def test_get_set_set6(): + res = get_set_set("A and B or C") + + assert len(res) == 2 + assert {"A", "B"} in res + assert {"C"} in res + + +def test_get_set_set7(): + res = get_set_set("(A or B) and C") + + assert len(res) == 2 + assert {"A", "C"} in res + assert {"B", "C"} in res + + +def test_get_set_set8(): + res = get_set_set("A and (B or C)") + + assert len(res) == 2 + assert {"A", "B"} in res + assert {"A", "C"} in res diff --git a/tests/core/test_mstemplate.py b/tests/core/test_mstemplate.py new file mode 100644 index 00000000..8de3eeea --- /dev/null +++ b/tests/core/test_mstemplate.py @@ -0,0 +1,107 @@ +import pytest +from modelseedpy.core.mstemplate import ( + MSTemplate, + MSTemplateMetabolite, + MSTemplateReaction, + MSTemplateSpecies, +) +from modelseedpy.core.mstemplate import ( + NewModelTemplateRole, + NewModelTemplateComplex, + MSTemplateCompartment, +) + + +@pytest.fixture +def empty_template(): + return MSTemplate("test", "test name", "test") + + +def test_empty_template(): + template = MSTemplate("test", "test name", "test") + assert template.id == "test" + assert template.name == "test name" + assert len(template.roles) == 0 + assert len(template.complexes) == 0 + assert len(template.compounds) == 0 + assert len(template.compcompounds) == 0 + assert len(template.reactions) == 0 + + +def test_template_add_role(empty_template): + role = NewModelTemplateRole("role1", "metabolic function") + empty_template.add_roles([role]) + assert len(empty_template.roles) == 1 + + +def test_template_add_role_mult(empty_template): + role_a = NewModelTemplateRole("roleA", "metabolic function A") + role_b = NewModelTemplateRole("roleB", "metabolic function B") + role_c = NewModelTemplateRole("roleC", "metabolic function C") + empty_template.add_roles([role_a, role_b, role_c]) + assert len(empty_template.roles) == 3 + + +def test_template_add_simple_complex(empty_template): + role = NewModelTemplateRole("role1", "metabolic function") + empty_template.add_roles([role]) + + seed_complex = NewModelTemplateComplex("complex1", "example complex") + + seed_complex.add_role(empty_template.roles.role1) + + empty_template.add_complexes([seed_complex]) + + assert len(empty_template.complexes) == 1 + + +def test_template_add_simple_metabolite(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + empty_template.add_compounds([cpd_apple]) + + assert len(empty_template.compounds) == 1 + + +def test_template_add_simple_metabolite_species(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + empty_template.add_compounds([cpd_apple]) + + comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") + empty_template.add_comp_compounds([comp_cpd_apple]) + + assert len(empty_template.compounds) == 1 + assert len(empty_template.compcompounds) == 1 + assert empty_template.compcompounds.apple_k.compound + assert empty_template.compcompounds.apple_k.compound.name == "just a apple" + assert len(empty_template.compounds.apple.species) == 1 + + +def test_template_add_compartment(empty_template): + empty_template.compartments += [MSTemplateCompartment("w", "world", 4)] + + assert len(empty_template.compartments) == 1 + + +def test_template_add_reaction(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + cpd_apple_pie = MSTemplateMetabolite("appie", "C1000", "apple pie (10 apples)") + empty_template.add_compounds([cpd_apple, cpd_apple_pie]) + + comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") + comp_cpd_apple_pie = MSTemplateSpecies("appie_k", 0, "k", "appie") + empty_template.add_comp_compounds([comp_cpd_apple, comp_cpd_apple_pie]) + + rxn_make_pie = MSTemplateReaction( + "rxn_pie_k", "rxn00000", "make pie", "pie", 0, 1000 + ) + rxn_make_pie.add_metabolites( + { + empty_template.compcompounds.apple_k: -10, + empty_template.compcompounds.appie_k: 1, + } + ) + + empty_template.add_reactions([rxn_make_pie]) + + assert len(empty_template.reactions) == 1 + assert empty_template.reactions.rxn_pie_k.check_mass_balance() == {} From 9e978484edb1fbbe94336188df9daf56d676c2be Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 8 Jul 2024 23:25:44 -0500 Subject: [PATCH 132/146] mixed line endings --- tests/core/test_msmodel.py | 164 +++++++++++++------------- tests/core/test_mstemplate.py | 214 +++++++++++++++++----------------- 2 files changed, 189 insertions(+), 189 deletions(-) diff --git a/tests/core/test_msmodel.py b/tests/core/test_msmodel.py index 68478498..0aa63ffa 100644 --- a/tests/core/test_msmodel.py +++ b/tests/core/test_msmodel.py @@ -1,82 +1,82 @@ -from modelseedpy.core.msmodel import * - - -def test_get_direction_from_constraints1(): - res = get_direction_from_constraints(0, 1000) - - assert res == ">" - - -def test_get_direction_from_constraints2(): - res = get_direction_from_constraints(-1000, 0) - - assert res == "<" - - -def test_get_direction_from_constraints3(): - res = get_direction_from_constraints(-1000, 1000) - - assert res == "=" - - -def test_get_set_set1(): - res = get_set_set("A") - - assert len(res) == 1 - assert {"A"} in res - - -def test_get_set_set2(): - res = get_set_set("A and B") - - assert len(res) == 1 - assert {"A", "B"} in res - - -def test_get_set_set3(): - res = get_set_set("A or B") - - assert len(res) == 2 - assert {"A"} in res - assert {"B"} in res - - -def test_get_set_set4(): - res = get_set_set("A or B or C") - - assert len(res) == 3 - assert {"A"} in res - assert {"B"} in res - assert {"C"} in res - - -def test_get_set_set5(): - res = get_set_set("A or B and C") - - assert len(res) == 2 - assert {"A"} in res - assert {"B", "C"} in res - - -def test_get_set_set6(): - res = get_set_set("A and B or C") - - assert len(res) == 2 - assert {"A", "B"} in res - assert {"C"} in res - - -def test_get_set_set7(): - res = get_set_set("(A or B) and C") - - assert len(res) == 2 - assert {"A", "C"} in res - assert {"B", "C"} in res - - -def test_get_set_set8(): - res = get_set_set("A and (B or C)") - - assert len(res) == 2 - assert {"A", "B"} in res - assert {"A", "C"} in res +from modelseedpy.core.msmodel import * + + +def test_get_direction_from_constraints1(): + res = get_direction_from_constraints(0, 1000) + + assert res == ">" + + +def test_get_direction_from_constraints2(): + res = get_direction_from_constraints(-1000, 0) + + assert res == "<" + + +def test_get_direction_from_constraints3(): + res = get_direction_from_constraints(-1000, 1000) + + assert res == "=" + + +def test_get_set_set1(): + res = get_set_set("A") + + assert len(res) == 1 + assert {"A"} in res + + +def test_get_set_set2(): + res = get_set_set("A and B") + + assert len(res) == 1 + assert {"A", "B"} in res + + +def test_get_set_set3(): + res = get_set_set("A or B") + + assert len(res) == 2 + assert {"A"} in res + assert {"B"} in res + + +def test_get_set_set4(): + res = get_set_set("A or B or C") + + assert len(res) == 3 + assert {"A"} in res + assert {"B"} in res + assert {"C"} in res + + +def test_get_set_set5(): + res = get_set_set("A or B and C") + + assert len(res) == 2 + assert {"A"} in res + assert {"B", "C"} in res + + +def test_get_set_set6(): + res = get_set_set("A and B or C") + + assert len(res) == 2 + assert {"A", "B"} in res + assert {"C"} in res + + +def test_get_set_set7(): + res = get_set_set("(A or B) and C") + + assert len(res) == 2 + assert {"A", "C"} in res + assert {"B", "C"} in res + + +def test_get_set_set8(): + res = get_set_set("A and (B or C)") + + assert len(res) == 2 + assert {"A", "B"} in res + assert {"A", "C"} in res diff --git a/tests/core/test_mstemplate.py b/tests/core/test_mstemplate.py index 8de3eeea..8be3683d 100644 --- a/tests/core/test_mstemplate.py +++ b/tests/core/test_mstemplate.py @@ -1,107 +1,107 @@ -import pytest -from modelseedpy.core.mstemplate import ( - MSTemplate, - MSTemplateMetabolite, - MSTemplateReaction, - MSTemplateSpecies, -) -from modelseedpy.core.mstemplate import ( - NewModelTemplateRole, - NewModelTemplateComplex, - MSTemplateCompartment, -) - - -@pytest.fixture -def empty_template(): - return MSTemplate("test", "test name", "test") - - -def test_empty_template(): - template = MSTemplate("test", "test name", "test") - assert template.id == "test" - assert template.name == "test name" - assert len(template.roles) == 0 - assert len(template.complexes) == 0 - assert len(template.compounds) == 0 - assert len(template.compcompounds) == 0 - assert len(template.reactions) == 0 - - -def test_template_add_role(empty_template): - role = NewModelTemplateRole("role1", "metabolic function") - empty_template.add_roles([role]) - assert len(empty_template.roles) == 1 - - -def test_template_add_role_mult(empty_template): - role_a = NewModelTemplateRole("roleA", "metabolic function A") - role_b = NewModelTemplateRole("roleB", "metabolic function B") - role_c = NewModelTemplateRole("roleC", "metabolic function C") - empty_template.add_roles([role_a, role_b, role_c]) - assert len(empty_template.roles) == 3 - - -def test_template_add_simple_complex(empty_template): - role = NewModelTemplateRole("role1", "metabolic function") - empty_template.add_roles([role]) - - seed_complex = NewModelTemplateComplex("complex1", "example complex") - - seed_complex.add_role(empty_template.roles.role1) - - empty_template.add_complexes([seed_complex]) - - assert len(empty_template.complexes) == 1 - - -def test_template_add_simple_metabolite(empty_template): - cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") - empty_template.add_compounds([cpd_apple]) - - assert len(empty_template.compounds) == 1 - - -def test_template_add_simple_metabolite_species(empty_template): - cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") - empty_template.add_compounds([cpd_apple]) - - comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") - empty_template.add_comp_compounds([comp_cpd_apple]) - - assert len(empty_template.compounds) == 1 - assert len(empty_template.compcompounds) == 1 - assert empty_template.compcompounds.apple_k.compound - assert empty_template.compcompounds.apple_k.compound.name == "just a apple" - assert len(empty_template.compounds.apple.species) == 1 - - -def test_template_add_compartment(empty_template): - empty_template.compartments += [MSTemplateCompartment("w", "world", 4)] - - assert len(empty_template.compartments) == 1 - - -def test_template_add_reaction(empty_template): - cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") - cpd_apple_pie = MSTemplateMetabolite("appie", "C1000", "apple pie (10 apples)") - empty_template.add_compounds([cpd_apple, cpd_apple_pie]) - - comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") - comp_cpd_apple_pie = MSTemplateSpecies("appie_k", 0, "k", "appie") - empty_template.add_comp_compounds([comp_cpd_apple, comp_cpd_apple_pie]) - - rxn_make_pie = MSTemplateReaction( - "rxn_pie_k", "rxn00000", "make pie", "pie", 0, 1000 - ) - rxn_make_pie.add_metabolites( - { - empty_template.compcompounds.apple_k: -10, - empty_template.compcompounds.appie_k: 1, - } - ) - - empty_template.add_reactions([rxn_make_pie]) - - assert len(empty_template.reactions) == 1 - assert empty_template.reactions.rxn_pie_k.check_mass_balance() == {} +import pytest +from modelseedpy.core.mstemplate import ( + MSTemplate, + MSTemplateMetabolite, + MSTemplateReaction, + MSTemplateSpecies, +) +from modelseedpy.core.mstemplate import ( + NewModelTemplateRole, + NewModelTemplateComplex, + MSTemplateCompartment, +) + + +@pytest.fixture +def empty_template(): + return MSTemplate("test", "test name", "test") + + +def test_empty_template(): + template = MSTemplate("test", "test name", "test") + assert template.id == "test" + assert template.name == "test name" + assert len(template.roles) == 0 + assert len(template.complexes) == 0 + assert len(template.compounds) == 0 + assert len(template.compcompounds) == 0 + assert len(template.reactions) == 0 + + +def test_template_add_role(empty_template): + role = NewModelTemplateRole("role1", "metabolic function") + empty_template.add_roles([role]) + assert len(empty_template.roles) == 1 + + +def test_template_add_role_mult(empty_template): + role_a = NewModelTemplateRole("roleA", "metabolic function A") + role_b = NewModelTemplateRole("roleB", "metabolic function B") + role_c = NewModelTemplateRole("roleC", "metabolic function C") + empty_template.add_roles([role_a, role_b, role_c]) + assert len(empty_template.roles) == 3 + + +def test_template_add_simple_complex(empty_template): + role = NewModelTemplateRole("role1", "metabolic function") + empty_template.add_roles([role]) + + seed_complex = NewModelTemplateComplex("complex1", "example complex") + + seed_complex.add_role(empty_template.roles.role1) + + empty_template.add_complexes([seed_complex]) + + assert len(empty_template.complexes) == 1 + + +def test_template_add_simple_metabolite(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + empty_template.add_compounds([cpd_apple]) + + assert len(empty_template.compounds) == 1 + + +def test_template_add_simple_metabolite_species(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + empty_template.add_compounds([cpd_apple]) + + comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") + empty_template.add_comp_compounds([comp_cpd_apple]) + + assert len(empty_template.compounds) == 1 + assert len(empty_template.compcompounds) == 1 + assert empty_template.compcompounds.apple_k.compound + assert empty_template.compcompounds.apple_k.compound.name == "just a apple" + assert len(empty_template.compounds.apple.species) == 1 + + +def test_template_add_compartment(empty_template): + empty_template.compartments += [MSTemplateCompartment("w", "world", 4)] + + assert len(empty_template.compartments) == 1 + + +def test_template_add_reaction(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + cpd_apple_pie = MSTemplateMetabolite("appie", "C1000", "apple pie (10 apples)") + empty_template.add_compounds([cpd_apple, cpd_apple_pie]) + + comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") + comp_cpd_apple_pie = MSTemplateSpecies("appie_k", 0, "k", "appie") + empty_template.add_comp_compounds([comp_cpd_apple, comp_cpd_apple_pie]) + + rxn_make_pie = MSTemplateReaction( + "rxn_pie_k", "rxn00000", "make pie", "pie", 0, 1000 + ) + rxn_make_pie.add_metabolites( + { + empty_template.compcompounds.apple_k: -10, + empty_template.compcompounds.appie_k: 1, + } + ) + + empty_template.add_reactions([rxn_make_pie]) + + assert len(empty_template.reactions) == 1 + assert empty_template.reactions.rxn_pie_k.check_mass_balance() == {} From 311563e39959e46f8acf47c4f09485e58f787f4b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 8 Jul 2024 23:28:49 -0500 Subject: [PATCH 133/146] pragmas --- tests/core/test_msmodel.py | 1 + tests/core/test_mstemplate.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tests/core/test_msmodel.py b/tests/core/test_msmodel.py index 0aa63ffa..ec4027f5 100644 --- a/tests/core/test_msmodel.py +++ b/tests/core/test_msmodel.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from modelseedpy.core.msmodel import * diff --git a/tests/core/test_mstemplate.py b/tests/core/test_mstemplate.py index 8be3683d..9663e8c8 100644 --- a/tests/core/test_mstemplate.py +++ b/tests/core/test_mstemplate.py @@ -1,4 +1,6 @@ +# -*- coding: utf-8 -*- import pytest + from modelseedpy.core.mstemplate import ( MSTemplate, MSTemplateMetabolite, From 43770ce67072924932dc19cf184f921ae1ed04eb Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 9 Jul 2024 00:07:37 -0500 Subject: [PATCH 134/146] readne --- README.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index f2b4e973..3d491ec3 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,10 @@ ________________________________________________________________________ :target: https://pepy.tech/project/modelseedpy :alt: Downloads +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/ambv/black + :alt: Black + Metabolic modeling is an pivotal method for computational research in synthetic biology and precision medicine. The metabolic models, such as the constrint-based flux balance analysis (FBA) algorithm, are improved with comprehensive datasets that capture more metabolic chemistry in the model and improve the accuracy of simulation predictions. We therefore developed ModelSEEDpy as a comprehensive suite of packages that bootstrap metabolic modeling with the ModelSEED Database (`Seaver et al., 2021 `_ ). These packages parse and manipulate (e.g. gapfill missing reactions or calculated chemical properties of metabolites), constrain (with kinetic, thermodynamics, and nutrient uptake), and simulate cobrakbase models (both individual models and communities). This is achieved by standardizing COBRA models through the ``cobrakbase`` module into a form that is amenable with the KBase/ModelSEED ecosystem. These functionalities are exemplified in `Python Notebooks `_ . Please submit errors, inquiries, or suggestions as `GitHub issues `_ where they can be addressed by our developers. @@ -33,11 +37,11 @@ Metabolic modeling is an pivotal method for computational research in synthetic Installation ---------------------- -ModelSEEDpy will soon be installable via the ``PyPI`` channel:: +PIP (latest stable version 0.4.0):: pip install modelseedpy -but, until then, the repository must cloned:: +GitHub dev build (latest working version):: git clone https://github.com/ModelSEED/ModelSEEDpy.git From 899e093b786b5fcd0d4336ad1fd475955d54f130 Mon Sep 17 00:00:00 2001 From: jplfaria Date: Sat, 20 Jul 2024 05:40:37 +0000 Subject: [PATCH 135/146] Add pigment and carbohydrate biomass categories --- modelseedpy/core/mstemplate.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 49fd98c3..3144c43e 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -529,6 +529,8 @@ def __init__( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ): @@ -543,6 +545,8 @@ def __init__( :param lipid:float :param cellwall:float :param cofactor:float + :param pigment:float + :param carbohydrate:float :param energy:float :param other:float """ @@ -555,6 +559,8 @@ def __init__( self.lipid = lipid self.cellwall = cellwall self.cofactor = cofactor + self.pigment = pigment + self.carbohydrate = carbohydrate self.energy = energy self.other = other self.templateBiomassComponents = DictList() @@ -573,6 +579,8 @@ def from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ): @@ -586,6 +594,8 @@ def from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ) @@ -633,6 +643,8 @@ def from_dict(d, template): d["lipid"], d["cellwall"], d["cofactor"], + d["pigment"], + d["carbohydrate"], d["energy"], d["other"], ) @@ -689,6 +701,8 @@ def get_or_create_reaction(self, model, baseid, compartment=None, index=None): def build_biomass(self, model, index="0", classic=False, GC=0.5, add_to_model=True): types = [ "cofactor", + "pigment", + "carbohydrate", "lipid", "cellwall", "protein", @@ -699,6 +713,8 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5, add_to_model=Tr ] type_abundances = { "cofactor": self.cofactor, + "pigment": self.pigment, + "carbohydrate": self.carbohydrate, "lipid": self.lipid, "cellwall": self.cellwall, "protein": self.protein, @@ -876,6 +892,8 @@ def get_data(self): "lipid": self.lipid, "cellwall": self.cellwall, "cofactor": self.cofactor, + "pigment": self.pigment, + "carbohydrate": self.carbohydrate, "energy": self.energy, "other": self.other, "templateBiomassComponents": [], @@ -1122,6 +1140,8 @@ def overwrite_biomass_from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ): @@ -1139,6 +1159,8 @@ def overwrite_biomass_from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ) From ff94ce1803ae615f1e12a1b0d190411d81dfc46e Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 3 Sep 2024 22:50:32 -0500 Subject: [PATCH 136/146] genome --- modelseedpy/core/msatpcorrection.py | 55 +++++++++-------------------- modelseedpy/core/msgenome.py | 10 +----- setup.py | 1 + 3 files changed, 18 insertions(+), 48 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 083fc719..bcbc9ccb 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -1,20 +1,7 @@ # -*- coding: utf-8 -*- import logging -import cobra -import copy import json -import time import pandas as pd -from os.path import abspath as _abspath -from os.path import dirname as _dirname -from optlang.symbolics import Zero, add -from modelseedpy.core.rast_client import RastClient -from modelseedpy.core.msgenome import normalize_role -from modelseedpy.core.msmodel import ( - get_gpr_string, - get_reaction_constraints_from_direction, -) -from cobra.core import Gene, Metabolite, Model, Reaction from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder from modelseedpy.core import FBAHelper, MSGapfill, MSMedia @@ -22,11 +9,6 @@ from modelseedpy.helpers import get_template logger = logging.getLogger(__name__) -logger.setLevel( - logging.INFO -) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO - -_path = _dirname(_abspath(__file__)) min_gap = { "Glc.O2": 5, @@ -90,12 +72,6 @@ def __init__( self.modelutl = MSModelUtil.get(model_or_mdlutl) # Setting atpcorrection attribute in model utl so link is bidirectional self.modelutl.atputl = self - - if default_media_path: - self.default_media_path = default_media_path - else: - self.default_media_path = _path + "/../data/atp_medias.tsv" - self.compartment = compartment if atp_hydrolysis_id and atp_hydrolysis_id in self.model.reactions: @@ -106,21 +82,18 @@ def __init__( self.media_hash = {} self.atp_medias = [] + if load_default_medias: - self.load_default_medias() + self.load_default_medias(default_media_path) media_ids = set() - for media in atp_medias: - if isinstance(media, list): - if media[0].id in media_ids: - raise ValueError("media ids not unique") - media_ids.add(media[0].id) - self.atp_medias.append(media) - else: - if media.id in media_ids: - raise ValueError("media ids not unique") - media_ids.add(media.id) - self.atp_medias.append([media, 0.01]) + for media_or_list in atp_medias: + media = media_or_list[0] if isinstance(media_or_list, list) else media_or_list + min_obj = media_or_list[1] if isinstance(media_or_list, list) else 0.01 + if media.id in media_ids: + raise ValueError("media ids not unique") + media_ids.add(media.id) + self.atp_medias.append((media, min_obj)) self.media_hash[media.id] = media if "empty" not in self.media_hash: media = MSMedia.from_dict({}) @@ -164,8 +137,12 @@ def load_default_template(self): get_template("template_core"), None ).build() - def load_default_medias(self): - filename = self.default_media_path + def load_default_medias(self, default_media_path=None): + if default_media_path is None: + import os.path as _path + current_file_path = _path.dirname(_path.abspath(__file__)) + default_media_path = f"{current_file_path}/../data/atp_medias.tsv" + filename = default_media_path medias = pd.read_csv(filename, sep="\t", index_col=0).to_dict() for media_id in medias: media_d = {} @@ -179,7 +156,7 @@ def load_default_medias(self): media.id = media_id media.name = media_id min_obj = 0.01 - self.atp_medias.append([media, min_gap.get(media_d, min_obj)]) + self.atp_medias.append((media, min_gap.get(media_d, min_obj))) @staticmethod def find_reaction_in_template(model_reaction, template, compartment): diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 99790484..86063cc2 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -160,15 +160,7 @@ def add_features(self, feature_list: list): self.features += feature_list @staticmethod - def from_fasta( - filename, contigs=0, split="|", h_func=None - ): # !!! the contigs argument is never used - genome = MSGenome() - genome.features += read_fasta(filename, split, h_func) - return genome - - @staticmethod - def from_fasta2(filename, split=" ", h_func=None): + def from_fasta(filename, split=" ", h_func=None): genome = MSGenome() genome.features += read_fasta2(filename, split, h_func) return genome diff --git a/setup.py b/setup.py index 97e79dfd..17f6e136 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ install_requires=[ "networkx >= 2.4", "cobra >= 0.29.0", + "pandas >= 2.2.2", "scikit-learn == 1.2.0", # version lock for pickle ML models "scipy >= 1.5.4", "chemicals >= 1.0.13", From 2961bfb4e74767cb67914dc7215cefc0d220897f Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 26 Sep 2024 12:04:53 -0500 Subject: [PATCH 137/146] reaction pathways --- modelseedpy/biochem/modelseed_biochem.py | 1 + modelseedpy/biochem/modelseed_reaction.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index 439becb0..c970222a 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -357,6 +357,7 @@ def _load_reactions( None, o.get("status"), o.get("source"), + pathways=o.get('pathways') ) rxn.add_metabolites(reaction_metabolites) if rxn.id in aliases: diff --git a/modelseedpy/biochem/modelseed_reaction.py b/modelseedpy/biochem/modelseed_reaction.py index d0e3b050..5b828c55 100644 --- a/modelseedpy/biochem/modelseed_reaction.py +++ b/modelseedpy/biochem/modelseed_reaction.py @@ -134,6 +134,7 @@ def __init__( status=None, source=None, flags=None, + pathways=None ): super().__init__(rxn_id, name, subsystem, lower_bound, upper_bound) @@ -165,6 +166,8 @@ def __init__( if flags: self.flags |= set(flags) + self.pathways = pathways + @property def compound_ids(self): return None From 67957d367fe0524969855dd618bf9eb20ccf8fe1 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 26 Sep 2024 12:05:47 -0500 Subject: [PATCH 138/146] black --- modelseedpy/biochem/modelseed_biochem.py | 2 +- modelseedpy/biochem/modelseed_reaction.py | 2 +- modelseedpy/core/msatpcorrection.py | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index c970222a..634a97ba 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -357,7 +357,7 @@ def _load_reactions( None, o.get("status"), o.get("source"), - pathways=o.get('pathways') + pathways=o.get("pathways"), ) rxn.add_metabolites(reaction_metabolites) if rxn.id in aliases: diff --git a/modelseedpy/biochem/modelseed_reaction.py b/modelseedpy/biochem/modelseed_reaction.py index 5b828c55..04b5e086 100644 --- a/modelseedpy/biochem/modelseed_reaction.py +++ b/modelseedpy/biochem/modelseed_reaction.py @@ -134,7 +134,7 @@ def __init__( status=None, source=None, flags=None, - pathways=None + pathways=None, ): super().__init__(rxn_id, name, subsystem, lower_bound, upper_bound) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index bcbc9ccb..448580ea 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -88,7 +88,9 @@ def __init__( media_ids = set() for media_or_list in atp_medias: - media = media_or_list[0] if isinstance(media_or_list, list) else media_or_list + media = ( + media_or_list[0] if isinstance(media_or_list, list) else media_or_list + ) min_obj = media_or_list[1] if isinstance(media_or_list, list) else 0.01 if media.id in media_ids: raise ValueError("media ids not unique") @@ -140,6 +142,7 @@ def load_default_template(self): def load_default_medias(self, default_media_path=None): if default_media_path is None: import os.path as _path + current_file_path = _path.dirname(_path.abspath(__file__)) default_media_path = f"{current_file_path}/../data/atp_medias.tsv" filename = default_media_path From aa63f34514c59f235e0f58e0b29729b35a132042 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 8 Nov 2024 23:45:11 -0600 Subject: [PATCH 139/146] genome --- modelseedpy/core/msgenome.py | 32 ++++++++++++++++++++++---------- modelseedpy/core/mstemplate.py | 30 +++++++++++++++--------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 86063cc2..c9e3df88 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -8,18 +8,18 @@ DEFAULT_SPLIT = " " -def to_fasta(features, filename, l=80, fn_header=None): +def to_fasta(features, filename, line_size=80, fn_header=None): with open(filename, "w") as fh: for feature in features: - h = f">{feature.id}\n" - if fn_header: - h = fn_header(feature) - fh.write(h) - lines = [ - feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l) - ] - for line in lines: - fh.write(line) + if feature.seq: + h = f">{feature.id}\n" + if fn_header: + h = fn_header(feature) + fh.write(h) + _seq = feature.seq + lines = [_seq[i: i + line_size] + "\n" for i in range(0, len(_seq), line_size)] + for line in lines: + fh.write(line) return filename @@ -188,3 +188,15 @@ def search_for_gene(self, query): return self.features.get_by_id(query) aliases = self.alias_hash() return aliases[query] if query in aliases else None + + def _repr_html_(self): + return f""" + + + + + + + + +
    Memory address{f"{id(self):x}"}
    Features{len(self.features)}
    """ diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 70b94ec1..61312eb0 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -520,23 +520,23 @@ def get_data(self): class MSTemplateBiomass: def __init__( self, - bio_id, - name, - type, - dna, - rna, - protein, - lipid, - cellwall, - cofactor, - pigment, - carbohydrate, - energy, - other, + biomass_id: str, + name: str, + type: str, + dna: float, + rna: float, + protein: float, + lipid: float, + cellwall: float, + cofactor: float, + pigment: float, + carbohydrate: float, + energy: float, + other: float, ): """ - :param bio_id:string + :param biomass_id:string :param name:string :param type:string :param dna:float @@ -550,7 +550,7 @@ def __init__( :param energy:float :param other:float """ - self.id = bio_id + self.id = biomass_id self.name = name self.type = type self.dna = dna From 35ecbc70c7881bb6d9b317b8aa2bdfb1365d4f97 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 8 Nov 2024 23:46:44 -0600 Subject: [PATCH 140/146] b --- modelseedpy/core/msgenome.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index c9e3df88..7b751738 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -17,7 +17,10 @@ def to_fasta(features, filename, line_size=80, fn_header=None): h = fn_header(feature) fh.write(h) _seq = feature.seq - lines = [_seq[i: i + line_size] + "\n" for i in range(0, len(_seq), line_size)] + lines = [ + _seq[i : i + line_size] + "\n" + for i in range(0, len(_seq), line_size) + ] for line in lines: fh.write(line) return filename From c7938f428720927ef462b90f058c06b1a7c93961 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 28 Nov 2024 19:28:57 -1000 Subject: [PATCH 141/146] pigment fix --- modelseedpy/core/mstemplate.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 61312eb0..6bdc27a3 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -637,16 +637,16 @@ def from_dict(d, template): d["id"], d["name"], d["type"], - d["dna"], - d["rna"], - d["protein"], - d["lipid"], - d["cellwall"], - d["cofactor"], - d["pigment"], - d["carbohydrate"], - d["energy"], - d["other"], + d.get("dna", 0), + d.get("rna", 0), + d.get("protein", 0), + d.get("lipid", 0), + d.get("cellwall", 0), + d.get("cofactor", 0), + d.get("pigment", 0), + d.get("carbohydrate", 0), + d.get("energy", 0), + d.get("other", 0) ) for item in d["templateBiomassComponents"]: biocomp = MSTemplateBiomassComponent.from_dict(item, template) From 1ca7860a716295e8e1f842837695bc18559c95f2 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 20 Jan 2025 03:28:20 -0600 Subject: [PATCH 142/146] proj --- modelseedpy/core/rast_client.py | 12 ++++++++---- pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/modelseedpy/core/rast_client.py b/modelseedpy/core/rast_client.py index 97211add..cc36b7bb 100644 --- a/modelseedpy/core/rast_client.py +++ b/modelseedpy/core/rast_client.py @@ -60,7 +60,7 @@ def __init__(self): }, ] - def annotate_genome(self, genome): + def annotate_genome(self, genome, split_terms=True): p_features = [] for f in genome.features: if f.seq and len(f.seq) > 0: @@ -70,9 +70,13 @@ def annotate_genome(self, genome): for o in res[0]["features"]: feature = genome.features.get_by_id(o["id"]) if "function" in o: - functions = re.split("; | / | @", o["function"]) - for function in functions: - feature.add_ontology_term("RAST", function) + rast_function = o["function"] + if split_terms: + functions = re.split("; | / | @", rast_function) + for function in functions: + feature.add_ontology_term("RAST", function) + else: + feature.add_ontology_term("RAST", rast_function) return res[0]["analysis_events"] diff --git a/pyproject.toml b/pyproject.toml index 0ed58542..8e0e52df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 88 -python-version = ['py36'] +python-version = ['py38'] include = '\.pyi?$' exclude = ''' ( From 574fe004e7b909ed97f68c43051b118b89a481f2 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 3 Mar 2025 13:16:49 -0600 Subject: [PATCH 143/146] genome loader --- modelseedpy/core/msgenome.py | 142 +++++++++++++++++++++++++++++++++ modelseedpy/core/mstemplate.py | 2 +- 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 7b751738..c423427c 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -82,6 +82,27 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): return features +def read_gbff_records_from_file(filename: str): + if filename.endswith(".gbff"): + with open(filename, "r") as fh: + return read_gbff_records(fh) + elif filename.endswith(".gz"): + import gzip + from io import StringIO + + with gzip.open(filename, "rb") as fh: + return read_gbff_records(StringIO(fh.read().decode("utf-8"))) + + +def read_gbff_records(handler): + from Bio import SeqIO + + gbff_records = [] + for record in SeqIO.parse(handler, "gb"): + gbff_records.append(record) + return gbff_records + + def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None): features = [] active_seq = None @@ -168,6 +189,45 @@ def from_fasta(filename, split=" ", h_func=None): genome.features += read_fasta2(filename, split, h_func) return genome + @staticmethod + def from_gbff_sequence(filename): + gbff_records = read_gbff_records_from_file(filename) + genome = MSGenome() + features = [] + for rec in gbff_records: + feature = MSFeature(rec.id, str(rec.seq), description=rec.description) + features.append(feature) + genome.features += features + return genome + + @staticmethod + def from_gbff_features( + filename, feature_id_qualifier="protein_id", description_qualifier="product" + ): + gbff_records = read_gbff_records_from_file(filename) + genome = MSGenome() + features = [] + for rec in gbff_records: + for f in rec.features: + if f.type == "CDS": + translations = f.qualifiers.get("translation", []) + if len(translations) == 1: + feature_id = f.qualifiers.get(feature_id_qualifier, [None])[0] + description = f.qualifiers.get(description_qualifier, [None])[0] + if feature_id: + feature = MSFeature( + feature_id, translations[0], description=description + ) + features.append(feature) + else: + logger.warning( + f"skip feature: unable to fetch id from qualifier {feature_id_qualifier}" + ) + elif len(translations) > 1: + logger.warning(f"skip feature: with multiple sequences {f}") + genome.features += features + return genome + def to_fasta(self, filename, l=80, fn_header=None): to_fasta(self.features, filename, l, fn_header) return filename @@ -203,3 +263,85 @@ def _repr_html_(self): {len(self.features)} """ + + +class GenomeGff(MSGenome): + def __init__(self, contigs): + self.contigs = contigs + super().__init__() + + @staticmethod + def read_sequence(feature_id, gff_record, expected_sequence, contigs): + from Bio.Seq import Seq + from Bio import Align + + protein_seq_cds = expected_sequence + feature_contig = contigs.features.get_by_id(gff_record.contig_id) + seq = Seq(feature_contig.seq[gff_record.start - 1 : gff_record.end]) + if gff_record.strand == "-": + seq = seq.reverse_complement() + seq_from_dna = str(seq.translate()) + if len(seq_from_dna) > 0 and seq_from_dna[-1] == "*": + seq_from_dna = seq_from_dna[:-1] + if len(protein_seq_cds) > 0 and protein_seq_cds[-1] == "*": + protein_seq_cds = protein_seq_cds[:-1] + eq = protein_seq_cds == seq_from_dna + + score = None + if not eq and len(seq_from_dna) > 0: + try: + aligner = Align.PairwiseAligner() + res = aligner.align(protein_seq_cds, seq_from_dna) + score = res.score + except ValueError as ex: + print("error", gff_record) + raise ex + + feature = MSFeature(feature_id, protein_seq_cds) + feature.description = f"score: {score}" + feature.gff = gff_record + return feature + + @staticmethod + def from_fna_faa_gff( + filename_fna, filename_faa, filename_gff, _fn_get_id, prodigal=False + ): + genome_gff_features = _read_gff_features(filename_gff) + genome_faa = MSGenome.from_fasta(filename_faa) + contigs = MSGenome.from_fasta(filename_fna) + + feature_lookup = {} + if prodigal: + for feature in genome_faa.features: + attr = dict( + x.split("=") + for x in feature.description.split(" # ")[-1].split(";") + ) + if attr["ID"] not in feature_lookup: + feature_lookup[attr["ID"]] = feature + else: + raise ValueError("") + else: + feature_lookup = {feature.id: feature for feature in genome_faa.features} + + features = [] + for gff_record in genome_gff_features: + if gff_record.feature_type == "CDS": + feature_id = gff_record.attr.get("ID") + if _fn_get_id: + feature_id = _fn_get_id(gff_record) + + feature_cds = feature_lookup.get(feature_id) + + if feature_cds: + protein_seq_cds = feature_cds.seq + f = GenomeGff.read_sequence( + feature_id, gff_record, protein_seq_cds, contigs + ) + features.append(f) + else: + print(f"not found {feature_id}") + + genome = GenomeGff(contigs) + genome.features += features + return genome diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 6bdc27a3..cffc6106 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -646,7 +646,7 @@ def from_dict(d, template): d.get("pigment", 0), d.get("carbohydrate", 0), d.get("energy", 0), - d.get("other", 0) + d.get("other", 0), ) for item in d["templateBiomassComponents"]: biocomp = MSTemplateBiomassComponent.from_dict(item, template) From 7cc275a490b0df4988b1082b041e4b59137b19b5 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 4 Apr 2025 01:16:25 -0500 Subject: [PATCH 144/146] mstemplate biomass --- modelseedpy/__init__.py | 2 +- modelseedpy/core/mstemplate.py | 20 ++++++++++---------- setup.py | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 995c054f..2c4e20b1 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -14,7 +14,7 @@ __author__ = "Christopher Henry" __email__ = "chenry@anl.gov" -__version__ = "0.4.0" +__version__ = "0.4.2" logger = logging.getLogger(__name__) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index cffc6106..5cce5927 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -523,16 +523,16 @@ def __init__( biomass_id: str, name: str, type: str, - dna: float, - rna: float, - protein: float, - lipid: float, - cellwall: float, - cofactor: float, - pigment: float, - carbohydrate: float, - energy: float, - other: float, + dna: float = 0, + rna: float = 0, + protein: float = 0, + lipid: float = 0, + cellwall: float = 0, + cofactor: float = 0, + pigment: float = 0, + carbohydrate: float = 0, + energy: float = 0, + other: float = 0, ): """ diff --git a/setup.py b/setup.py index 17f6e136..3c27af19 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="ModelSEEDpy", - version="0.4.0", + version="0.4.2", description="Python package for building and analyzing models using ModelSEED", long_description_content_type="text/x-rst", long_description=readme, From 264e753a767dc368412f8d09c04af27b25ed037c Mon Sep 17 00:00:00 2001 From: Sam Seaver Date: Thu, 17 Apr 2025 15:01:28 -0500 Subject: [PATCH 145/146] Fixing correct use of inheritance for build_constraint() function --- modelseedpy/fbapkg/flexiblebiomasspkg.py | 60 +++++++++++------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/modelseedpy/fbapkg/flexiblebiomasspkg.py b/modelseedpy/fbapkg/flexiblebiomasspkg.py index 223f778d..a116c8bd 100644 --- a/modelseedpy/fbapkg/flexiblebiomasspkg.py +++ b/modelseedpy/fbapkg/flexiblebiomasspkg.py @@ -77,11 +77,11 @@ def build_package(self, parameters): newrxns = [] class_coef = {"rna": {}, "dna": {}, "protein": {}, "energy": {}} refcpd = { - "cpd00001": None, - "cpd00009": None, - "cpd00012": None, - "cpd00067": None, - "cpd00002": None, + "cpd00001": None, # Water + "cpd00009": None, # Orthophosphate + "cpd00012": None, # Pyrophosphate + "cpd00067": None, # Proton + "cpd00002": None, # ATP } # Finding all reference compounds in the model msid_hash = self.modelutl.msid_hash() @@ -94,10 +94,10 @@ def build_package(self, parameters): met_class[metabolite] = None msid = MSModelUtil.metabolite_msid(metabolite) if ( - msid != "cpd11416" - and msid != "cpd11463" - and msid != "cpd11462" - and msid != "cpd11461" + msid != "cpd11416" # Biomass + and msid != "cpd11463" # Protein + and msid != "cpd11462" # RNA + and msid != "cpd11461" # DNA and msid != None ): if msid in refcpd: @@ -206,10 +206,10 @@ def build_package(self, parameters): self.model.add_reactions( [self.new_reactions[met_class + "_flex"]] ) - self.build_constraint( + super().build_constraint( self.new_reactions[met_class + "_flex"], "flxcls" ) - if parameters["add_total_biomass_constraint"]: + if self.parameters["add_total_biomass_constraint"]: self.build_constraint(self.parameters["bio_rxn"], "flxbio") def build_variable(self, object, type): # !!! can the function be removed? @@ -254,7 +254,7 @@ def build_constraint(self, cobra_obj, obj_type): if abs(massdiff) > 0.00001: coef[rxn.forward_variable] = massdiff coef[rxn.reverse_variable] = -massdiff - return BaseFBAPkg.build_constraint(self, obj_type, 0, 0, coef, cobra_obj) + return super().build_constraint(obj_type, 0, 0, coef, cobra_obj) elif obj_type == "flxcpd" or obj_type == "flxcls": first_entry = None second_entry = None @@ -293,7 +293,7 @@ def build_constraint(self, cobra_obj, obj_type): # If the value is positive, lock in the forward variable and set the reverse to zero if first_entry > 0: if product: - const = self.build_constraint( + const = super().build_constraint( "f" + obj_type, 0, 0, @@ -302,7 +302,7 @@ def build_constraint(self, cobra_obj, obj_type): ) object.lower_bound = 0 else: - const = self.build_constraint( + const = super().build_constraint( "f" + obj_type, 0, 0, @@ -313,7 +313,7 @@ def build_constraint(self, cobra_obj, obj_type): # If the value is negative, lock in the reverse variable and set the forward to zero elif first_entry < 0: if product: - const = self.build_constraint( + const = super().build_constraint( "r" + obj_type, 0, 0, @@ -322,7 +322,7 @@ def build_constraint(self, cobra_obj, obj_type): ) object.upper_bound = 0 else: - const = self.build_constraint( + const = super().build_constraint( "r" + obj_type, 0, 0, @@ -337,8 +337,7 @@ def build_constraint(self, cobra_obj, obj_type): elif second_entry >= 0: if first_entry >= 0: if product: - const = BaseFBAPkg.build_constraint( - self, + const = super().build_constraint( "f" + obj_type, 0, None, @@ -347,8 +346,7 @@ def build_constraint(self, cobra_obj, obj_type): ) object.lower_bound = 0 if first_entry > 0: - BaseFBAPkg.build_constraint( - self, + super().build_constraint( "r" + obj_type, 0, None, @@ -356,8 +354,7 @@ def build_constraint(self, cobra_obj, obj_type): cobra_obj, ) else: - const = BaseFBAPkg.build_constraint( - self, + const = super().build_constraint( "f" + obj_type, 0, None, @@ -366,8 +363,7 @@ def build_constraint(self, cobra_obj, obj_type): ) object.upper_bound = 0 if first_entry > 0: - BaseFBAPkg.build_constraint( - self, + super().build_constraint( "r" + obj_type, 0, None, @@ -376,14 +372,14 @@ def build_constraint(self, cobra_obj, obj_type): ) else: if product: - const = self.build_constraint( + const = super().build_constraint( "f" + obj_type, 0, None, {biovar: second_entry, object.forward_variable: -1}, cobra_obj, ) - self.build_constraint( + super().build_constraint( "r" + obj_type, 0, None, @@ -391,14 +387,14 @@ def build_constraint(self, cobra_obj, obj_type): cobra_obj, ) else: - const = self.build_constraint( + const = super().build_constraint( "f" + obj_type, 0, None, {biovar: second_entry, object.reverse_variable: -1}, cobra_obj, ) - self.build_constraint( + super().build_constraint( "r" + obj_type, 0, None, @@ -408,7 +404,7 @@ def build_constraint(self, cobra_obj, obj_type): else: if second_entry < 0: if product: - const = self.build_constraint( + const = super().build_constraint( "f" + obj_type, 0, None, @@ -416,7 +412,7 @@ def build_constraint(self, cobra_obj, obj_type): cobra_obj, ) else: - const = self.build_constraint( + const = super().build_constraint( "f" + obj_type, 0, None, @@ -424,7 +420,7 @@ def build_constraint(self, cobra_obj, obj_type): cobra_obj, ) if product: - self.build_constraint( + super().build_constraint( "r" + obj_type, 0, None, @@ -433,7 +429,7 @@ def build_constraint(self, cobra_obj, obj_type): ) object.lower_bound = 0 else: - self.build_constraint( + super().build_constraint( "r" + obj_type, 0, None, From 217edfdbad11e2e6ad20874b07c2b143d9efa243 Mon Sep 17 00:00:00 2001 From: Sam Seaver Date: Thu, 17 Apr 2025 15:14:22 -0500 Subject: [PATCH 146/146] Another fix for incorrect call --- modelseedpy/fbapkg/flexiblebiomasspkg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/fbapkg/flexiblebiomasspkg.py b/modelseedpy/fbapkg/flexiblebiomasspkg.py index a116c8bd..aa11ea2d 100644 --- a/modelseedpy/fbapkg/flexiblebiomasspkg.py +++ b/modelseedpy/fbapkg/flexiblebiomasspkg.py @@ -206,7 +206,7 @@ def build_package(self, parameters): self.model.add_reactions( [self.new_reactions[met_class + "_flex"]] ) - super().build_constraint( + self.build_constraint( self.new_reactions[met_class + "_flex"], "flxcls" ) if self.parameters["add_total_biomass_constraint"]: