diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 87de0099..ffde9f98 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -3,6 +3,8 @@ name: Run Pre-Commit on: pull_request: {} push: + paths-ignore: + - 'examples/**' branches: - dev - main @@ -13,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v3 diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 9ce47285..c3d816d0 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 - name: Set up Python diff --git a/.gitignore b/.gitignore index 6390162b..87619079 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,8 @@ dmypy.json # Pyre type checker .pyre/ + +.pydevproject +.settings/* +*data/* +*.lp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04cde634..325706ab 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,9 @@ repos: args: - --pytest-test-first - id: check-json + exclude: examples/ - id: pretty-format-json + exclude: examples/ args: - --autofix - --top-keys=_id diff --git a/.travis.yml b/.travis.yml index 20911611..75b2eb81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,8 @@ language: python python: - - 3.7 - - 3.8 - 3.9 + - 3.10 + - 3.11 before_install: - python --version - pip install -U pip diff --git a/README.rst b/README.rst index 6f380d9a..3d491ec3 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,10 @@ ________________________________________________________________________ :target: https://pepy.tech/project/modelseedpy :alt: Downloads +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/ambv/black + :alt: Black + Metabolic modeling is an pivotal method for computational research in synthetic biology and precision medicine. The metabolic models, such as the constrint-based flux balance analysis (FBA) algorithm, are improved with comprehensive datasets that capture more metabolic chemistry in the model and improve the accuracy of simulation predictions. We therefore developed ModelSEEDpy as a comprehensive suite of packages that bootstrap metabolic modeling with the ModelSEED Database (`Seaver et al., 2021 `_ ). These packages parse and manipulate (e.g. gapfill missing reactions or calculated chemical properties of metabolites), constrain (with kinetic, thermodynamics, and nutrient uptake), and simulate cobrakbase models (both individual models and communities). This is achieved by standardizing COBRA models through the ``cobrakbase`` module into a form that is amenable with the KBase/ModelSEED ecosystem. These functionalities are exemplified in `Python Notebooks `_ . Please submit errors, inquiries, or suggestions as `GitHub issues `_ where they can be addressed by our developers. @@ -33,11 +37,11 @@ Metabolic modeling is an pivotal method for computational research in synthetic Installation ---------------------- -ModelSEEDpy will soon be installable via the ``PyPI`` channel:: +PIP (latest stable version 0.4.0):: pip install modelseedpy -but, until then, the repository must cloned:: +GitHub dev build (latest working version):: git clone https://github.com/ModelSEED/ModelSEEDpy.git @@ -51,8 +55,3 @@ The associated ModelSEED Database, which is required for a few packages, is simp git clone https://github.com/ModelSEED/ModelSEEDDatabase.git and the path to this repository is passed as an argument to the corresponding packages. - -**Windows users** must separately install the ``pyeda`` module: 1) download the appropriate wheel for your Python version from `this website `_ ; and 2) install the wheel through the following commands in a command prompt/powershell console:: - - cd path/to/pyeda/wheel - pip install pyeda_wheel_name.whl diff --git a/examples/Model Reconstruction/ATPGapfilling.ipynb b/examples/Model Reconstruction/ATPGapfilling.ipynb index f0116989..d236d609 100644 --- a/examples/Model Reconstruction/ATPGapfilling.ipynb +++ b/examples/Model Reconstruction/ATPGapfilling.ipynb @@ -526,7 +526,13 @@ "cell_type": "code", "execution_count": 60, "id": "6ade9096-f3f4-40f8-a1ea-53b5b63ec2c0", - "metadata": {}, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, "outputs": [ { "name": "stderr", @@ -1174,123 +1180,417 @@ }, { "cell_type": "code", - "execution_count": 67, - "id": "7aba6de8-9252-4980-95b0-bd1a72db2e05", + "execution_count": 1, + "id": "e24d8e82-357a-4658-9362-6073f502b6bc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "modelseedpy 0.2.2\n" + ] + } + ], + "source": [ + "import modelseedpy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1080bc7b-58c2-4105-91a2-2defaa8a1c92", "metadata": {}, "outputs": [], "source": [ - "atp_correction.apply_growth_media_gapfilling()" + "%run /home/fliu/workspace/python3/ModelSEEDpy/tests/core/test_msatpcorreption.py" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3ee9a1dd-9b8c-4204-b846-609cecebffc7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model(ko):\n", + " def _method(ko=ko, added_compounds=None, added_reactions=None):\n", + " if ko is None:\n", + " ko = []\n", + " with open(\n", + " '/home/fliu/workspace/python3/ModelSEEDpy/tests/test_data/e_coli_core.json',\n", + " \"r\",\n", + " ) as fh:\n", + " model_json = json.load(fh)\n", + " model_json[\"compartments\"] = {\n", + " k + \"0\": v for (k, v) in model_json[\"compartments\"].items()\n", + " }\n", + " metabolites = {}\n", + " for m in model_json[\"metabolites\"]:\n", + " m[\"id\"] += \"0\"\n", + " m[\"compartment\"] += \"0\"\n", + " metabolites[m[\"id\"]] = m\n", + " for r in model_json[\"reactions\"]:\n", + " r[\"metabolites\"] = {i + \"0\": v for (i, v) in r[\"metabolites\"].items()}\n", + " compartments = set(\n", + " [metabolites[k][\"compartment\"] for k in r[\"metabolites\"].keys()]\n", + " )\n", + " if r[\"id\"].endswith(\"_e\"):\n", + " r[\"id\"] += \"0\"\n", + " elif len(compartments) == 1:\n", + " r[\"id\"] += \"_\" + list(compartments)[0]\n", + " else:\n", + " r[\"id\"] += (\n", + " \"_\" + \"c0\"\n", + " ) # hack cause there is only combo between e0 and c0\n", + "\n", + " model_json[\"reactions\"] = [\n", + " x for x in model_json[\"reactions\"] if x[\"id\"] not in ko\n", + " ]\n", + "\n", + " if added_compounds:\n", + " for o in added_compounds:\n", + " model_json[\"metabolites\"].append(o)\n", + " if added_reactions:\n", + " for o in added_reactions:\n", + " model_json[\"reactions\"].append(o)\n", + " model = cobra.io.from_json(json.dumps(model_json))\n", + " model.reactions.ATPM_c0.lower_bound = 0\n", + " model.reactions.ATPM_c0.upper_bound = 1000\n", + " return model\n", + "\n", + " return _method(ko)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "e8107ba2-f470-4e05-8b80-731fc00febe7", + "execution_count": 45, + "id": "928bb140-9110-4a1a-b750-dbd9d6a2acc6", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "95db6e6f-bedc-4c0d-9e73-c6eec5365c16", + "metadata": {}, + "outputs": [], + "source": [ + "model = get_model([\"NADH16_c0\", \"CYTBD_c0\", \"O2t_c0\", \"GLCpts_c0\"])\n", + "with open('/home/fliu/workspace/python3/ModelSEEDpy/tests/test_data/template_core_bigg.json', 'r') as fh:\n", + " template = MSTemplateBuilder.from_dict(json.load(fh)).build()\n", + "media_glucose_aerobic = MSMedia.from_dict(\n", + " {\n", + " \"glc__D\": (-1, 1000),\n", + " \"o2\": (-1000, 1000),\n", + " \"h\": (-1000, 1000),\n", + " \"h2o\": (-1000, 1000),\n", + " }\n", + " )\n", + "media_glucose_aerobic.id = 'glc/o2'\n", + "media_acetate_aerobic = MSMedia.from_dict(\n", + " {\n", + " \"ac\": (-1, 1000),\n", + " \"o2\": (-1000, 1000),\n", + " \"h\": (-1000, 1000),\n", + " \"h2o\": (-1000, 1000),\n", + " }\n", + " )\n", + "media_acetate_aerobic.id = 'ac/o2'\n", + "medias = [media_glucose_aerobic, media_acetate_aerobic]" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "8fdc8faf-fcc8-45cd-b775-e6bc143a42cc", + "metadata": {}, + "outputs": [], + "source": [ + "%run /home/fliu/workspace/python3/ModelSEEDpy/modelseedpy/core/msatpcorrection.py\n", + "atp_correction = MSATPCorrection(\n", + " model,\n", + " template,\n", + " medias,\n", + " atp_hydrolysis_id=\"ATPM_c0\",\n", + " load_default_medias=False,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "fc07b43d-88f5-477c-9149-28756a5cd926", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0" + "[[, 0.01],\n", + " [, 0.01]]" ] }, - "execution_count": 18, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.max_gapfilling" + "atp_correction.atp_medias" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "1af1e574-76b2-40f7-82f8-4ffd1bb2c442", + "execution_count": 99, + "id": "369ef2d4-f696-4762-9370-d91276e3b95f", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Namee_coli_core
Memory address7ff258653370
Number of metabolites72
Number of reactions91
Number of genes137
Number of groups0
Objective expression1.0*BIOMASS_Ecoli_core_w_GAM_c0 - 1.0*BIOMASS_Ecoli_core_w_GAM_c0_reverse_70c47
Compartmentsextracellular space, cytosol
" + ], "text/plain": [ - "0" + "" ] }, - "execution_count": 19, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_delta" + "model" ] }, { "cell_type": "code", - "execution_count": 43, - "id": "0a344084-edad-456f-9e88-064a404039d4", + "execution_count": 100, + "id": "62862b90-d73b-4597-8e3f-c8bf55e9090e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "{'glc/o2': 0.0, 'ac/o2': 0.0}" ] }, - "execution_count": 43, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_tests" + "atp_correction.evaluate_growth_media()" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "9e78779d-b7e7-4e73-a77c-9813bee3c6a9", + "execution_count": 101, + "id": "e67db875-e06f-464c-b96c-8e4ce7eb6324", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "{: {'reversed': {},\n", + " 'new': {'GLCpts_c0': '>'},\n", + " 'media': ,\n", + " 'target': 'ATPM_c0',\n", + " 'minobjective': 0.01,\n", + " 'binary_check': False},\n", + " : {'reversed': {},\n", + " 'new': {'CYTBD_c0': '>', 'NADH16_c0': '>', 'O2t_c0': '>'},\n", + " 'media': ,\n", + " 'target': 'ATPM_c0',\n", + " 'minobjective': 0.01,\n", + " 'binary_check': False}}" ] }, - "execution_count": 44, + "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_tests" + "atp_correction.media_gapfill_stats" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "47da598f-b3cd-423d-93eb-0e68f11eaef9", + "metadata": {}, + "outputs": [], + "source": [ + "atp_correction.determine_growth_media()" ] }, { "cell_type": "code", - "execution_count": 68, - "id": "669e1ddb-493b-461e-bef9-d19cb1f5e542", + "execution_count": 105, + "id": "42673388-2500-4922-83b9-3e4dfa7acb17", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[]" + "'glc/o2'" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "atp_correction.selected_media[0].id" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "c0e29cc8-85d5-450e-a3d6-c1207d297963", + "metadata": {}, + "outputs": [], + "source": [ + "atp_correction.apply_growth_media_gapfilling()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "43f29d4f-30b3-452f-a5f9-49489b97d646", + "metadata": {}, + "outputs": [], + "source": [ + "media_eval = atp_correction.evaluate_growth_media()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "f8044fd4-70f1-4082-9316-e601ac06ac7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'glc/o2': 2.75, 'ac/o2': 0.0}" ] }, - "execution_count": 68, + "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "atp_correction.gapfilling_tests" + "media_eval" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "db1e8df2-4a86-408b-a479-5eebf13e9971", + "metadata": {}, + "outputs": [], + "source": [ + "atp_correction.expand_model_to_genome_scale()" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "d76dcb54-1ea2-4e53-8853-521790cd8300", + "metadata": {}, + "outputs": [], + "source": [ + "tests = atp_correction.build_tests()" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "f30e70fa-5258-42fd-b624-aafdce509b80", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "empty {'media': , 'is_max_threshold': True, 'threshold': 1e-05, 'objective': 'ATPM_c0'}\n", + "glc/o2 {'media': , 'is_max_threshold': True, 'threshold': 3.3, 'objective': 'ATPM_c0'}\n" + ] + } + ], + "source": [ + "for t in tests:\n", + " print(t['media'].id, t)" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "c35d3047-da1f-4331-a907-765c2b43048d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'media': ,\n", + " 'is_max_threshold': True,\n", + " 'threshold': 1e-05,\n", + " 'objective': 'ATPM_c0'},\n", + " {'media': ,\n", + " 'is_max_threshold': True,\n", + " 'threshold': 3.3,\n", + " 'objective': 'ATPM_c0'}]" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tests" ] }, { "cell_type": "code", "execution_count": null, - "id": "e24d8e82-357a-4658-9362-6073f502b6bc", + "id": "7b718e1d-059d-410b-bf1a-05a734f09e0d", "metadata": {}, "outputs": [], "source": [] @@ -1298,7 +1598,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/Biomass.ipynb b/examples/Model Reconstruction/Biomass.ipynb index e4a2c901..3726f959 100644 --- a/examples/Model Reconstruction/Biomass.ipynb +++ b/examples/Model Reconstruction/Biomass.ipynb @@ -2,18 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "5434992c-fc67-40f5-ae08-82f44790666c", "metadata": {}, "outputs": [], "source": [ - "from modelseedpy.helpers import get_template\n", - "from modelseedpy.core.mstemplate import MSTemplateBuilder" + "import modelseedpy" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 2, "id": "b243e00a-4a8b-489d-a778-61844a439e63", "metadata": {}, "outputs": [ @@ -21,7 +20,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "cobrakbase 0.2.8\n" + "cobrakbase 0.3.1\n" ] } ], @@ -30,6 +29,157 @@ "kbase = cobrakbase.KBaseAPI()" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3a177c16-ecb0-4050-bbf5-47aad10f2af9", + "metadata": {}, + "outputs": [], + "source": [ + "template = kbase.get_from_ws('GramNegModelTemplateV3', 'NewKBaseModelTemplates')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4ce52552-dce2-4c44-9884-cf00d15e76ab", + "metadata": {}, + "outputs": [], + "source": [ + "from modelseedpy import MSBuilder" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6f216f6a-5e25-4697-bf6b-9ae63475b5c7", + "metadata": {}, + "outputs": [], + "source": [ + "from cobra.core import Model\n", + "model = Model('test')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d9763d58-daba-4751-811f-23581b390025", + "metadata": {}, + "outputs": [], + "source": [ + "biomass = template.biomasses[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d3e884ac-2568-445a-ac04-1508b536c88a", + "metadata": {}, + "outputs": [], + "source": [ + "reaction = biomass.build_biomass(model, '0', True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f5140ac5-273f-4eb5-b806-ddd9178b252e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpd00010_c0 {'modelseed_template_id': 'cpd00010_c'}\n", + "cpd11493_c0 {'modelseed_template_id': 'cpd11493_c'}\n", + "cpd12370_c0 {'modelseed_template_id': 'cpd12370_c'}\n", + "cpd00003_c0 {'modelseed_template_id': 'cpd00003_c'}\n", + "cpd00006_c0 {'modelseed_template_id': 'cpd00006_c'}\n", + "cpd00205_c0 {'modelseed_template_id': 'cpd00205_c'}\n", + "cpd00254_c0 {'modelseed_template_id': 'cpd00254_c'}\n", + "cpd10516_c0 {'modelseed_template_id': 'cpd10516_c'}\n", + "cpd00063_c0 {'modelseed_template_id': 'cpd00063_c'}\n", + "cpd00009_c0 {'modelseed_template_id': 'cpd00009_c'}\n", + "cpd00099_c0 {'modelseed_template_id': 'cpd00099_c'}\n", + "cpd00149_c0 {'modelseed_template_id': 'cpd00149_c'}\n", + "cpd00058_c0 {'modelseed_template_id': 'cpd00058_c'}\n", + "cpd00015_c0 {'modelseed_template_id': 'cpd00015_c'}\n", + "cpd10515_c0 {'modelseed_template_id': 'cpd10515_c'}\n", + "cpd00030_c0 {'modelseed_template_id': 'cpd00030_c'}\n", + "cpd00048_c0 {'modelseed_template_id': 'cpd00048_c'}\n", + "cpd00034_c0 {'modelseed_template_id': 'cpd00034_c'}\n", + "cpd00016_c0 {'modelseed_template_id': 'cpd00016_c'}\n", + "cpd00220_c0 {'modelseed_template_id': 'cpd00220_c'}\n", + "cpd00017_c0 {'modelseed_template_id': 'cpd00017_c'}\n", + "cpd00201_c0 {'modelseed_template_id': 'cpd00201_c'}\n", + "cpd00087_c0 {'modelseed_template_id': 'cpd00087_c'}\n", + "cpd00345_c0 {'modelseed_template_id': 'cpd00345_c'}\n", + "cpd00042_c0 {'modelseed_template_id': 'cpd00042_c'}\n", + "cpd00028_c0 {'modelseed_template_id': 'cpd00028_c'}\n", + "cpd00557_c0 {'modelseed_template_id': 'cpd00557_c'}\n", + "cpd00264_c0 {'modelseed_template_id': 'cpd00264_c'}\n", + "cpd00118_c0 {'modelseed_template_id': 'cpd00118_c'}\n", + "cpd00056_c0 {'modelseed_template_id': 'cpd00056_c'}\n", + "cpd15560_c0 {'modelseed_template_id': 'cpd15560_c'}\n", + "cpd15352_c0 {'modelseed_template_id': 'cpd15352_c'}\n", + "cpd15500_c0 {'modelseed_template_id': 'cpd15500_c'}\n", + "cpd00166_c0 {'modelseed_template_id': 'cpd00166_c'}\n", + "cpd01997_c0 {'modelseed_template_id': 'cpd01997_c'}\n", + "cpd03422_c0 {'modelseed_template_id': 'cpd03422_c'}\n", + "cpd00104_c0 {'modelseed_template_id': 'cpd00104_c'}\n", + "cpd00037_c0 {'modelseed_template_id': 'cpd00037_c'}\n", + "cpd00050_c0 {'modelseed_template_id': 'cpd00050_c'}\n", + "cpd15793_c0 {'modelseed_template_id': 'cpd15793_c'}\n", + "cpd15540_c0 {'modelseed_template_id': 'cpd15540_c'}\n", + "cpd15533_c0 {'modelseed_template_id': 'cpd15533_c'}\n", + "cpd15432_c0 {'modelseed_template_id': 'cpd15432_c'}\n", + "cpd02229_c0 {'modelseed_template_id': 'cpd02229_c'}\n", + "cpd15665_c0 {'modelseed_template_id': 'cpd15665_c'}\n", + "cpd15666_c0 {'modelseed_template_id': 'cpd15666_c'}\n", + "cpd00023_c0 {'modelseed_template_id': 'cpd00023_c'}\n", + "cpd00001_c0 {'modelseed_template_id': 'cpd00001_c'}\n", + "cpd00033_c0 {'modelseed_template_id': 'cpd00033_c'}\n", + "cpd00035_c0 {'modelseed_template_id': 'cpd00035_c'}\n", + "cpd00039_c0 {'modelseed_template_id': 'cpd00039_c'}\n", + "cpd00041_c0 {'modelseed_template_id': 'cpd00041_c'}\n", + "cpd00051_c0 {'modelseed_template_id': 'cpd00051_c'}\n", + "cpd00053_c0 {'modelseed_template_id': 'cpd00053_c'}\n", + "cpd00054_c0 {'modelseed_template_id': 'cpd00054_c'}\n", + "cpd00060_c0 {'modelseed_template_id': 'cpd00060_c'}\n", + "cpd00065_c0 {'modelseed_template_id': 'cpd00065_c'}\n", + "cpd00066_c0 {'modelseed_template_id': 'cpd00066_c'}\n", + "cpd00069_c0 {'modelseed_template_id': 'cpd00069_c'}\n", + "cpd00084_c0 {'modelseed_template_id': 'cpd00084_c'}\n", + "cpd00107_c0 {'modelseed_template_id': 'cpd00107_c'}\n", + "cpd00119_c0 {'modelseed_template_id': 'cpd00119_c'}\n", + "cpd00129_c0 {'modelseed_template_id': 'cpd00129_c'}\n", + "cpd00132_c0 {'modelseed_template_id': 'cpd00132_c'}\n", + "cpd00156_c0 {'modelseed_template_id': 'cpd00156_c'}\n", + "cpd00161_c0 {'modelseed_template_id': 'cpd00161_c'}\n", + "cpd00322_c0 {'modelseed_template_id': 'cpd00322_c'}\n", + "cpd00115_c0 {'modelseed_template_id': 'cpd00115_c'}\n", + "cpd00012_c0 {'modelseed_template_id': 'cpd00012_c'}\n", + "cpd00241_c0 {'modelseed_template_id': 'cpd00241_c'}\n", + "cpd00356_c0 {'modelseed_template_id': 'cpd00356_c'}\n", + "cpd00357_c0 {'modelseed_template_id': 'cpd00357_c'}\n", + "cpd00002_c0 {'modelseed_template_id': 'cpd00002_c'}\n", + "cpd00038_c0 {'modelseed_template_id': 'cpd00038_c'}\n", + "cpd00052_c0 {'modelseed_template_id': 'cpd00052_c'}\n", + "cpd00062_c0 {'modelseed_template_id': 'cpd00062_c'}\n", + "cpd00008_c0 {'modelseed_template_id': 'cpd00008_c'}\n", + "cpd00067_c0 {'modelseed_template_id': 'cpd00067_c'}\n", + "cpd11416_c0 {'modelseed_template_id': 'cpd11416_c'}\n", + "cpd17041_c0 {'modelseed_template_id': 'cpd17041_c'}\n", + "cpd17042_c0 {'modelseed_template_id': 'cpd17042_c'}\n", + "cpd17043_c0 {'modelseed_template_id': 'cpd17043_c'}\n" + ] + } + ], + "source": [ + "for m in reaction.metabolites:\n", + " print(m, m.notes)" + ] + }, { "cell_type": "code", "execution_count": 42, @@ -551,7 +701,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/Gapfilling.ipynb b/examples/Model Reconstruction/Gapfilling.ipynb index eea0c536..88eadaa6 100644 --- a/examples/Model Reconstruction/Gapfilling.ipynb +++ b/examples/Model Reconstruction/Gapfilling.ipynb @@ -2,17 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cobrakbase 0.2.8\n" - ] - } - ], + "outputs": [], "source": [ "import cobra\n", "#If you have CPLEX, uncomment this\n", @@ -20,31 +12,37 @@ "import cobrakbase\n", "#import modelseedpy.fbapkg\n", "from modelseedpy import GapfillingPkg, KBaseMediaPkg\n", - "from modelseedpy import FBAHelper, MSBuilder" + "from modelseedpy import FBAHelper, MSBuilder\n", + "kbase_api = cobrakbase.KBaseAPI()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "model = kbase_api.get_from_ws(\"test_model\",18528)" + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:modelseedpy.core.msmodelutl:cpd00244 not found in model!\n" + ] + }, { "data": { "text/html": [ - "

Objective

1.0 bio1 = 0.8048653841131165

Uptake

\n", + "

Objective

1.0 bio1 = 0.7997546667881398

Uptake

\n", " \n", " \n", " \n", @@ -58,14 +56,14 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -79,98 +77,98 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -189,28 +187,35 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -218,19 +223,15 @@ "
Metabolite
cpd00009_e0EX_cpd00009_e00.99980.993400.00%
cpd00013_e0EX_cpd00013_e06.0376.09400.00%
cpd00030_e0EX_cpd00030_e00.006390.0063500.00%
cpd00034_e0EX_cpd00034_e00.006390.0063500.00%
cpd00048_e0EX_cpd00048_e00.17550.174400.00%
cpd00058_e0EX_cpd00058_e00.006390.0063500.00%
cpd00063_e0EX_cpd00063_e00.006390.0063500.00%
cpd00067_e0EX_cpd00067_e061.8561.4300.00%
cpd00099_e0EX_cpd00099_e00.006390.0063500.00%
cpd00149_e0EX_cpd00149_e00.006390.0063500.00%
cpd00205_e0EX_cpd00205_e00.006390.0063500.00%
cpd00254_e0EX_cpd00254_e00.006390.0063500.00%
cpd10516_e0EX_cpd10516_e00.025560.025400.00%
cpd17041_c0rxn13782_c00.80490.799800.00%
cpd17042_c0rxn13783_c00.80490.799800.00%
cpd17043_c0rxn13784_c00.80490.799800.00%
cpd00001_e0EX_cpd00001_e0-82.26-81.9500.00%
cpd00007_e0EX_cpd00007_e0-2.928-2.86900.00%
cpd15378_e0EX_cpd15378_e0-0.00639-0.006357100.00%18.92%
cpd03091_c0SK_cpd03091_c0-0.019051081.08%
cpd11416_c0SK_cpd11416_c0-0.8049-0.799800.00%
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "kbase_api = cobrakbase.KBaseAPI()\n", - "model = kbase_api.get_from_ws(\"test_model\",18528)\n", - "#If you have CPLEX, uncomment this\n", - "#model.solver = 'optlang-cplex'\n", "template = kbase_api.get_from_ws(\"GramNegModelTemplateV3\",\"NewKBaseModelTemplates\")\n", "media = kbase_api.get_from_ws(\"Carbon-D-Glucose\",\"KBaseMedia\")\n", "model = MSBuilder.gapfill_model(model,\"bio1\",template,media)\n", @@ -17910,7 +17911,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/Genomes.ipynb b/examples/Model Reconstruction/Genomes.ipynb index 60270468..8ea82ef4 100644 --- a/examples/Model Reconstruction/Genomes.ipynb +++ b/examples/Model Reconstruction/Genomes.ipynb @@ -1,223 +1,300 @@ { "cells": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "tags": [] + }, "source": [ - "import modelseedpy\n", - "from modelseedpy.core.msgenome import MSGenome\n", - "from modelseedpy.core.rast_client import RastClient" + "### Genomes\n", + "\n", + "ModelSEEDpy provides its own genome object type `modelseedpy.core.msgenome.MSGenome` to manipulate genomes" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "genome = MS" + "import modelseedpy\n", + "from modelseedpy.core.msgenome import MSGenome" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "1" + "#### Reading faa file\n", + "\n", + "To load a genome we can read a `.faa` file that contains protein sequences" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "rast = RastClient()" + "genome" ] }, { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "genome = MSGenome.from_fasta('GCF_000005845.2.faa', split=' ')" + "#### Manipulating genes\n", + "\n", + "Each gene is stored as a `modelseedpy.core.msgenome.MSFeature` in the `.features` of type `cobra.core.dictlist.DictList` similiar to the cobrapy `.reactions` and `.metabolites` in the `cobra.core.Model`" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of features: 3\n" - ] + "data": { + "text/plain": [ + "4285" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print('Number of features:', len(genome.features))" + "len(genome.features)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "for f in genome.features:\n", - " print(f.id, len(f.seq), f.description)" + "gene = genome.features.get_by_id('NP_414542.1')\n", + "gene" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'execution_time': 1622756127.36331,\n", - " 'tool_name': 'kmer_search',\n", - " 'hostname': 'pear',\n", - " 'parameters': ['-a',\n", - " '-g',\n", - " 200,\n", - " '-m',\n", - " 5,\n", - " '-d',\n", - " '/opt/patric-common/data/kmer_metadata_v2',\n", - " '-u',\n", - " 'http://pear.mcs.anl.gov:6100/query'],\n", - " 'id': '9CCA6D20-C4B3-11EB-A893-36A8BEF382BD'},\n", - " {'parameters': ['annotate_hypothetical_only=1',\n", - " 'dataset_name=Release70',\n", - " 'kmer_size=8'],\n", - " 'hostname': 'pear',\n", - " 'tool_name': 'KmerAnnotationByFigfam',\n", - " 'id': '9CE3769E-C4B3-11EB-A893-36A8BEF382BD',\n", - " 'execution_time': 1622756127.52738},\n", - " {'execute_time': 1622756127.88296,\n", - " 'hostname': 'pear',\n", - " 'parameters': [],\n", - " 'tool_name': 'annotate_proteins_similarity',\n", - " 'id': '9D19B7EA-C4B3-11EB-9714-71B3BDF382BD'}]" + "modelseedpy.core.msgenome.MSFeature" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "rast.annotate_genome(genome)" + "type(gene)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Equivalent call from the client it self" + "##### Gene annotation\n", + "Annotation is store as an **ontology term**. When loading from a `.faa` file no ontology term is present but we can add them later." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#genome, res = rast.annotate_genome_from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')\n", - "#res" + "gene.ontology_terms" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene.description" + ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "{'annotation': ['thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]']}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene.add_ontology_term('annotation', gene.description)\n", + "gene.ontology_terms" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "#### RAST\n", + "It is possible to annotate genomes with RAST by calling the `RastClient`" + ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from modelseedpy.core.rast_client import RastClient\n", + "rast = RastClient()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'C54F08A4-CDB3-11ED-A7E9-CAF09D6086F0',\n", + " 'parameters': ['-a',\n", + " '-g',\n", + " 200,\n", + " '-m',\n", + " 5,\n", + " '-d',\n", + " '/opt/patric-common/data/kmer_metadata_v2',\n", + " '-u',\n", + " 'http://pear.mcs.anl.gov:6100/query'],\n", + " 'hostname': 'pear',\n", + " 'tool_name': 'kmer_search',\n", + " 'execution_time': 1680040751.14837},\n", + " {'id': 'C5638324-CDB3-11ED-A7E9-CAF09D6086F0',\n", + " 'parameters': ['annotate_hypothetical_only=1',\n", + " 'dataset_name=Release70',\n", + " 'kmer_size=8'],\n", + " 'tool_name': 'KmerAnnotationByFigfam',\n", + " 'hostname': 'pear',\n", + " 'execution_time': 1680040751.28257},\n", + " {'parameters': [],\n", + " 'id': 'C5944E1E-CDB3-11ED-8217-51F29F6086F0',\n", + " 'execute_time': 1680040751.60236,\n", + " 'tool_name': 'annotate_proteins_similarity',\n", + " 'hostname': 'pear'}]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rast.annotate_genome(genome)" + ] }, { - "cell_type": "code", - "execution_count": 34, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "feature = genome.features.get_by_id('YP_588478.1')" + "RAST annotation is stored in the ontology term **RAST** and this is used as default to build metabolic models with the ModelSEED templates" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'RAST': 'DUF1435 domain-containing protein YjjZ [Escherichia coli str. K-12 substr. MG1655]'}" + "{'annotation': ['thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]'],\n", + " 'RAST': ['Thr operon leader peptide']}" ] }, - "execution_count": 36, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "feature.ontology_terms" + "gene.ontology_terms" ] }, { @@ -225,14 +302,12 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "feature.add_ontology_term('')" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -246,7 +321,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 2f1e8d3f..ea2e8d41 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -1,12 +1,26 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Metabolic Model from Genome .faa file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* MSGenome: to read a faa file\n", + "* MSBuilder: to build metabolic model from the genome" + ] + }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import modelseedpy\n", "from modelseedpy import MSBuilder, MSGenome" ] }, @@ -19,21 +33,1446 @@ "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MSBuilder.build_metabolic_model` default parameters runs RAST, ML prediction to select template (gram neg, gram pos, cyano [not implemented], archaea [not implemented]), builds draft model and gapfills with complete media" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "type object argument after ** must be a mapping, not str", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3118582/859642788.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mmodelseedpy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mRastClient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mrast\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRastClient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mrast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mannotate_genome\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenome\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rast_client.py\u001b[0m in \u001b[0;36mannotate_genome\u001b[0;34m(self, genome)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"id\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"protein_translation\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"features\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rast_client.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, p_features)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"features\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"stages\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstages\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrpc_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"GenomeAnnotation.run_pipeline\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rpcclient.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, method, params, token)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0merr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"error\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 75\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mServerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"error\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 76\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mServerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: type object argument after ** must be a mapping, not str" + ] + } + ], + "source": [ + "from modelseedpy import RastClient\n", + "rast = RastClient()\n", + "rast.annotate_genome(genome)" + ] + }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Number of features: 4285\n" + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:89: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.float):\n", + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:91: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.bool):\n" ] } ], "source": [ - "print('Number of features:', len(genome.features))" + "model = MSBuilder.build_metabolic_model('ecoli', genome, classic_biomass=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 141.02637369025626

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00007_e0EX_cpd00007_e0244.300.00%
cpd00024_e0EX_cpd00024_e083.0752.58%
cpd00028_e0EX_cpd00028_e00.3955340.08%
cpd00030_e0EX_cpd00030_e00.395500.00%
cpd00033_e0EX_cpd00033_e079.8120.99%
cpd00034_e0EX_cpd00034_e00.395500.00%
cpd00039_e0EX_cpd00039_e031.4261.17%
cpd00051_e0EX_cpd00051_e034.7461.29%
cpd00054_e0EX_cpd00054_e034.3530.64%
cpd00058_e0EX_cpd00058_e00.395500.00%
cpd00060_e0EX_cpd00060_e031.0950.96%
cpd00063_e0EX_cpd00063_e00.395500.00%
cpd00065_e0EX_cpd00065_e06.647110.45%
cpd00066_e0EX_cpd00066_e021.7691.21%
cpd00069_e0EX_cpd00069_e016.9990.95%
cpd00079_e0EX_cpd00079_e0499.9618.61%
cpd00080_e0EX_cpd00080_e0609.4311.34%
cpd00099_e0EX_cpd00099_e00.395500.00%
cpd00106_e0EX_cpd00106_e0401.249.96%
cpd00107_e0EX_cpd00107_e052.8661.97%
cpd00118_e0EX_cpd00118_e00.395540.01%
cpd00119_e0EX_cpd00119_e011.1660.42%
cpd00129_e0EX_cpd00129_e025.9650.81%
cpd00130_e0EX_cpd00130_e0199.144.94%
cpd00132_e0EX_cpd00132_e028.2840.70%
cpd00136_e0EX_cpd00136_e00.395570.02%
cpd00149_e0EX_cpd00149_e00.395500.00%
cpd00156_e0EX_cpd00156_e049.651.54%
cpd00161_e0EX_cpd00161_e029.7240.74%
cpd00184_e0EX_cpd00184_e0221.11013.71%
cpd00205_e0EX_cpd00205_e00.395500.00%
cpd00208_e0EX_cpd00208_e03.526120.26%
cpd00209_e0EX_cpd00209_e019000.00%
cpd00249_e0EX_cpd00249_e011.5690.65%
cpd00254_e0EX_cpd00254_e00.395500.00%
cpd00264_e0EX_cpd00264_e00.395570.02%
cpd00268_e0EX_cpd00268_e00.197800.00%
cpd00277_e0EX_cpd00277_e022.59101.40%
cpd00305_e0EX_cpd00305_e00.3955120.03%
cpd00322_e0EX_cpd00322_e034.0561.27%
cpd00355_e0EX_cpd00355_e00.791110.05%
cpd00367_e0EX_cpd00367_e012.9990.73%
cpd00383_e0EX_cpd00383_e01.97870.09%
cpd00412_e0EX_cpd00412_e02.76990.15%
cpd00438_e0EX_cpd00438_e02411014.95%
cpd00644_e0EX_cpd00644_e00.79190.04%
cpd00794_e0EX_cpd00794_e014.1121.05%
cpd01080_e0EX_cpd01080_e035.09183.92%
cpd03847_e0EX_cpd03847_e03.526140.31%
cpd10515_e0EX_cpd10515_e00.79100.00%
cpd10516_e0EX_cpd10516_e00.395500.00%
cpd17041_c0rxn13782_c014100.00%
cpd17042_c0rxn13783_c014100.00%
cpd17043_c0rxn13784_c014100.00%

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00009_e0EX_cpd00009_e0-100000.00%
cpd00011_e0EX_cpd00011_e0-796.817.50%
cpd00020_e0EX_cpd00020_e0-282.137.97%
cpd00027_e0EX_cpd00027_e0-445.8625.18%
cpd00029_e0EX_cpd00029_e0-49029.22%
cpd00035_e0EX_cpd00035_e0-185.235.23%
cpd00047_e0EX_cpd00047_e0-2.37310.02%
cpd00100_e0EX_cpd00100_e0-4.38630.12%
cpd00108_e0EX_cpd00108_e0-3.52660.20%
cpd00116_e0EX_cpd00116_e0-0.395510.00%
cpd00139_e0EX_cpd00139_e0-1.18720.02%
cpd00151_e0EX_cpd00151_e0-221.1510.40%
cpd00159_e0EX_cpd00159_e0-835.5323.60%
cpd00226_e0EX_cpd00226_e0-220.8510.39%
cpd02701_c0SK_cpd02701_c0-0.3955150.06%
cpd03091_c0SK_cpd03091_c0-0.791100.07%
cpd11416_c0SK_cpd11416_c0-14100.00%
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ignore this below ..." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from modelseedpy import RastClient\n", + "rast = RastClient()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 141.02637369025626

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00007_e0EX_cpd00007_e0244.300.00%
cpd00024_e0EX_cpd00024_e083.0752.58%
cpd00028_e0EX_cpd00028_e00.3955340.08%
cpd00030_e0EX_cpd00030_e00.395500.00%
cpd00033_e0EX_cpd00033_e079.8120.99%
cpd00034_e0EX_cpd00034_e00.395500.00%
cpd00039_e0EX_cpd00039_e031.4261.17%
cpd00051_e0EX_cpd00051_e034.7461.29%
cpd00054_e0EX_cpd00054_e034.3530.64%
cpd00058_e0EX_cpd00058_e00.395500.00%
cpd00060_e0EX_cpd00060_e031.0950.96%
cpd00063_e0EX_cpd00063_e00.395500.00%
cpd00065_e0EX_cpd00065_e06.647110.45%
cpd00066_e0EX_cpd00066_e021.7691.21%
cpd00069_e0EX_cpd00069_e016.9990.95%
cpd00079_e0EX_cpd00079_e0499.9618.61%
cpd00080_e0EX_cpd00080_e0609.4311.34%
cpd00099_e0EX_cpd00099_e00.395500.00%
cpd00106_e0EX_cpd00106_e0401.249.96%
cpd00107_e0EX_cpd00107_e052.8661.97%
cpd00118_e0EX_cpd00118_e00.395540.01%
cpd00119_e0EX_cpd00119_e011.1660.42%
cpd00129_e0EX_cpd00129_e025.9650.81%
cpd00130_e0EX_cpd00130_e0199.144.94%
cpd00132_e0EX_cpd00132_e028.2840.70%
cpd00136_e0EX_cpd00136_e00.395570.02%
cpd00149_e0EX_cpd00149_e00.395500.00%
cpd00156_e0EX_cpd00156_e049.651.54%
cpd00161_e0EX_cpd00161_e029.7240.74%
cpd00184_e0EX_cpd00184_e0221.11013.71%
cpd00205_e0EX_cpd00205_e00.395500.00%
cpd00208_e0EX_cpd00208_e03.526120.26%
cpd00209_e0EX_cpd00209_e019000.00%
cpd00249_e0EX_cpd00249_e011.5690.65%
cpd00254_e0EX_cpd00254_e00.395500.00%
cpd00264_e0EX_cpd00264_e00.395570.02%
cpd00268_e0EX_cpd00268_e00.197800.00%
cpd00277_e0EX_cpd00277_e022.59101.40%
cpd00305_e0EX_cpd00305_e00.3955120.03%
cpd00322_e0EX_cpd00322_e034.0561.27%
cpd00355_e0EX_cpd00355_e00.791110.05%
cpd00367_e0EX_cpd00367_e012.9990.73%
cpd00383_e0EX_cpd00383_e01.97870.09%
cpd00412_e0EX_cpd00412_e02.76990.15%
cpd00438_e0EX_cpd00438_e02411014.95%
cpd00644_e0EX_cpd00644_e00.79190.04%
cpd00794_e0EX_cpd00794_e014.1121.05%
cpd01080_e0EX_cpd01080_e035.09183.92%
cpd03847_e0EX_cpd03847_e03.526140.31%
cpd10515_e0EX_cpd10515_e00.79100.00%
cpd10516_e0EX_cpd10516_e00.395500.00%
cpd17041_c0rxn13782_c014100.00%
cpd17042_c0rxn13783_c014100.00%
cpd17043_c0rxn13784_c014100.00%

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00009_e0EX_cpd00009_e0-100000.00%
cpd00011_e0EX_cpd00011_e0-796.817.50%
cpd00020_e0EX_cpd00020_e0-282.137.97%
cpd00027_e0EX_cpd00027_e0-445.8625.18%
cpd00029_e0EX_cpd00029_e0-49029.22%
cpd00035_e0EX_cpd00035_e0-185.235.23%
cpd00047_e0EX_cpd00047_e0-2.37310.02%
cpd00100_e0EX_cpd00100_e0-4.38630.12%
cpd00108_e0EX_cpd00108_e0-3.52660.20%
cpd00116_e0EX_cpd00116_e0-0.395510.00%
cpd00139_e0EX_cpd00139_e0-1.18720.02%
cpd00151_e0EX_cpd00151_e0-221.1510.40%
cpd00159_e0EX_cpd00159_e0-835.5323.60%
cpd00226_e0EX_cpd00226_e0-220.8510.39%
cpd02701_c0SK_cpd02701_c0-0.3955150.06%
cpd03091_c0SK_cpd03091_c0-0.791100.07%
cpd11416_c0SK_cpd11416_c0-14100.00%
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of features: 4285\n" + ] + } + ], + "source": [ + "print('Number of features:', len(genome.features))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "builder = MSBuilder(genome)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "The genomes or genomeSet that you have submitted wasn’t annotated using the RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or genomeSets via Annotate Multiple Microbial Genomes’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/ml/predict_phenotype.py\u001b[0m in \u001b[0;36mcreate_indicator_matrix\u001b[0;34m(ref_to_role, master_role_list)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 94\u001b[0;31m \u001b[0mindicators\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatching_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 95\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: arrays used as indices must be of integer (or boolean) type", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3016957/3197840996.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_select_template\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/msbuilder.py\u001b[0m in \u001b[0;36mauto_select_template\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 665\u001b[0m \u001b[0mgenome_classifier\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"knn_ACNP_RAST_filter_01_17_2023\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 666\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenome_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenome_classifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenome\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[0;31m# TODO: update with enum MSGenomeClass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/msgenomeclassifier.py\u001b[0m in \u001b[0;36mclassify\u001b[0;34m(self, genome_or_roles, ontology_term)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mgenome_or_roles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0montology_term\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m )\n\u001b[0;32m---> 33\u001b[0;31m indicator_df, master_role_list = create_indicator_matrix(\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0mgenome_or_roles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m )\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/ml/predict_phenotype.py\u001b[0m in \u001b[0;36mcreate_indicator_matrix\u001b[0;34m(ref_to_role, master_role_list)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0mindicators\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatching_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m raise IndexError(\n\u001b[0m\u001b[1;32m 97\u001b[0m \u001b[0;31m\"\u001b[0m\u001b[0mThe\u001b[0m \u001b[0mgenomes\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mgenomeSet\u001b[0m \u001b[0mthat\u001b[0m \u001b[0myou\u001b[0m \u001b[0mhave\u001b[0m \u001b[0msubmitted\u001b[0m \u001b[0mwasn\u001b[0m\u001b[0;31m’\u001b[0m\u001b[0mt\u001b[0m \u001b[0mannotated\u001b[0m \u001b[0musing\u001b[0m \u001b[0mthe\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mRAST\u001b[0m \u001b[0mannotation\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mPlease\u001b[0m \u001b[0mannotate\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mgenomes\u001b[0m \u001b[0mvia\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m‘\u001b[0m\u001b[0mAnnotate\u001b[0m \u001b[0mMicrobial\u001b[0m \u001b[0mGenome\u001b[0m\u001b[0;31m’\u001b[0m \u001b[0mapp\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: The genomes or genomeSet that you have submitted wasn’t annotated using the RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or genomeSets via Annotate Multiple Microbial Genomes’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (" + ] + } + ], + "source": [ + "builder.auto_select_template()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from cobra.core import Reaction" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "rxn = Reaction('SK_cpd11416_c0', 'SK_cpd11416_c0', '', 0, 1000)\n", + "rxn.add_metabolites({model.metabolites.cpd11416_c0: -1})\n", + "model.add_reactions([rxn])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:89: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.float):\n", + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:91: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.bool):\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Nameecoli
Memory address7f3dd51e8400
Number of metabolites1458
Number of reactions1772
Number of genes1295
Number of groups1323
Objective expression1.0*bio1 - 1.0*bio1_reverse_b18f7
CompartmentsCytosol, Extracellular
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MSBuilder.gapfill_model(model, \"bio1\", builder.template, None)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 0.0

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpd00010_c0 CoA [c0] 80\n", + "cpd11493_c0 ACP [c0] 39\n", + "cpd12370_c0 apo-ACP [c0] 3\n", + "cpd00003_c0 NAD [c0] 127\n", + "cpd00006_c0 NADP [c0] 89\n", + "cpd00205_c0 K+ [c0] 5\n", + "cpd00254_c0 Mg [c0] 3\n", + "cpd10516_c0 fe3 [c0] 5\n", + "cpd00063_c0 Ca2+ [c0] 2\n", + "cpd00009_c0 Phosphate [c0] 210\n", + "cpd00099_c0 Cl- [c0] 3\n", + "cpd00149_c0 Co2+ [c0] 2\n", + "cpd00058_c0 Cu2+ [c0] 3\n", + "cpd00015_c0 FAD [c0] 13\n", + "cpd10515_c0 Fe2+ [c0] 5\n", + "cpd00030_c0 Mn2+ [c0] 2\n", + "cpd00048_c0 Sulfate [c0] 4\n", + "cpd00034_c0 Zn2+ [c0] 2\n", + "cpd00016_c0 Pyridoxal phosphate [c0] 5\n", + "cpd00220_c0 Riboflavin [c0] 5\n", + "cpd00017_c0 S-Adenosyl-L-methionine [c0] 21\n", + "cpd00201_c0 10-Formyltetrahydrofolate [c0] 7\n", + "cpd00087_c0 Tetrahydrofolate [c0] 12\n", + "cpd00345_c0 5-Methyltetrahydrofolate [c0] 3\n", + "cpd00042_c0 GSH [c0] 13\n", + "cpd00028_c0 Heme [c0] 4\n", + "cpd00557_c0 Siroheme [c0] 2\n", + "cpd00264_c0 Spermidine [c0] 8\n", + "cpd00118_c0 Putrescine [c0] 9\n", + "cpd00056_c0 TPP [c0] 7\n", + "cpd15560_c0 Ubiquinone-8 [c0] 18\n", + "cpd15352_c0 2-Demethylmenaquinone 8 [c0] 7\n", + "cpd15500_c0 Menaquinone 8 [c0] 12\n", + "cpd00166_c0 Calomide [c0] 4\n", + "cpd01997_c0 Dimethylbenzimidazole [c0] 2\n", + "cpd03422_c0 Cobinamide [c0] 2\n", + "cpd00104_c0 BIOT [c0] 5\n", + "cpd00037_c0 UDP-N-acetylglucosamine [c0] 16\n", + "cpd00050_c0 FMN [c0] 11\n", + "cpd15793_c0 Stearoylcardiolipin (B. subtilis) [c0] 1\n", + "cpd15540_c0 Phosphatidylglycerol dioctadecanoyl [c0] 3\n", + "cpd15533_c0 phosphatidylethanolamine dioctadecanoyl [c0] 3\n", + "cpd15432_c0 core oligosaccharide lipid A [c0] 2\n", + "cpd02229_c0 Bactoprenyl diphosphate [c0] 5\n", + "cpd15665_c0 Peptidoglycan polymer (n subunits) [c0] 2\n", + "cpd15666_c0 Peptidoglycan polymer (n-1 subunits) [c0] 2\n", + "cpd00023_c0 L-Glutamate [c0] 57\n", + "cpd00001_c0 H2O [c0] 556\n", + "cpd00033_c0 Glycine [c0] 21\n", + "cpd00035_c0 L-Alanine [c0] 17\n", + "cpd00039_c0 L-Lysine [c0] 8\n", + "cpd00041_c0 L-Aspartate [c0] 19\n", + "cpd00051_c0 L-Arginine [c0] 6\n", + "cpd00053_c0 L-Glutamine [c0] 17\n", + "cpd00054_c0 L-Serine [c0] 23\n", + "cpd00060_c0 L-Methionine [c0] 19\n", + "cpd00065_c0 L-Tryptophan [c0] 5\n", + "cpd00066_c0 L-Phenylalanine [c0] 4\n", + "cpd00069_c0 L-Tyrosine [c0] 6\n", + "cpd00084_c0 L-Cysteine [c0] 14\n", + "cpd00107_c0 L-Leucine [c0] 6\n", + "cpd00119_c0 L-Histidine [c0] 4\n", + "cpd00129_c0 L-Proline [c0] 11\n", + "cpd00132_c0 L-Asparagine [c0] 6\n", + "cpd00156_c0 L-Valine [c0] 5\n", + "cpd00161_c0 L-Threonine [c0] 7\n", + "cpd00322_c0 L-Isoleucine [c0] 4\n", + "cpd00115_c0 dATP [c0] 7\n", + "cpd00012_c0 PPi [c0] 134\n", + "cpd00241_c0 dGTP [c0] 8\n", + "cpd00356_c0 dCTP [c0] 6\n", + "cpd00357_c0 TTP [c0] 7\n", + "cpd00002_c0 ATP [c0] 276\n", + "cpd00038_c0 GTP [c0] 20\n", + "cpd00052_c0 CTP [c0] 25\n", + "cpd00062_c0 UTP [c0] 13\n", + "cpd00008_c0 ADP [c0] 214\n", + "cpd00067_c0 H+ [c0] 896\n", + "cpd11416_c0 Biomass [c0] 2\n", + "cpd17041_c0 Protein biosynthesis [c0] 2\n", + "cpd17042_c0 DNA replication [c0] 2\n", + "cpd17043_c0 RNA transcription [c0] 2\n" + ] + } + ], + "source": [ + "for m in model.reactions.bio1.metabolites:\n", + " print(m, m.name, len(m.reactions))" ] }, { diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 7f135055..2c4e20b1 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -5,33 +5,20 @@ # set the warning format to be on a single line import sys import logging +import cobra import warnings as _warnings from os import name as _name from os.path import abspath as _abspath from os.path import dirname as _dirname from modelseedpy.helpers import config -logging_hash = { - "debug": logging.DEBUG, - "critical": logging.CRITICAL, - "error": logging.ERROR, - "warning": logging.WARNING, - "info": logging.INFO, -} +__author__ = "Christopher Henry" +__email__ = "chenry@anl.gov" +__version__ = "0.4.2" -# Configuing modelseedpy logger logger = logging.getLogger(__name__) -c_handler = logging.StreamHandler() -c_handler.setLevel(logging_hash[config.get("logging", "console_level")]) -c_format = logging.Formatter("%(name)s - %(levelname)s - %(message)s") -c_handler.setFormatter(c_format) -logger.addHandler(c_handler) -if config.get("logging", "log_file") == "yes": - f_handler = logging.FileHandler(config.get("logging", "filename"), mode="a") - f_handler.setLevel(logging_hash[config.get("logging", "file_level")]) - f_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - f_handler.setFormatter(f_format) - logger.addHandler(f_handler) + +print("modelseedpy", __version__) if sys.version_info[0] == 2: logger.warning( @@ -41,6 +28,9 @@ "still work but we will no longer actively maintain Python 2 support." ) +if "e0" not in cobra.medium.annotations.compartment_shortlist["e"]: + cobra.medium.annotations.compartment_shortlist["e"].append("e0") + import modelseedpy from modelseedpy.core import ( RastClient, @@ -48,17 +38,22 @@ MSBuilder, MSMedia, MSGrowthPhenotypes, + MSGrowthPhenotype, MSModelUtil, FBAHelper, MSEditorAPI, MSATPCorrection, MSGapfill, MSEquation, + MSModelReport, + AnnotationOntology, ) from modelseedpy.core.exceptions import * from modelseedpy.community import MSCommunity, MSCompatibility, CommKineticPkg +from modelseedpy.biochem import ModelSEEDBiochem + from modelseedpy.fbapkg import ( BaseFBAPkg, RevBinPkg, @@ -81,5 +76,3 @@ ) from modelseedpy.multiomics import MSExpression - -__version__ = "0.2.2" diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index ccdd8d76..634a97ba 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -7,6 +7,7 @@ from modelseedpy.biochem.modelseed_compound import ModelSEEDCompound, ModelSEEDCompound2 from modelseedpy.biochem.modelseed_reaction import ModelSEEDReaction, ModelSEEDReaction2 from modelseedpy.helpers import config +from modelseedpy.core.msmodel import get_reaction_constraints_from_direction logger = logging.getLogger(__name__) @@ -249,6 +250,56 @@ def _load_metabolites( return metabolites +def build_modelseed_reaction( + o, names, aliases, ec_numbers, metabolites_indexed, metabolites +): + if "id" in o and o["id"]: + rxn_names = set() + if o["id"] in names: + rxn_names |= names[o["id"]] + ( + lower_bound, + upper_bound, + ) = get_reaction_constraints_from_direction(o.get("reversibility")) + stoichiometry = o.get("stoichiometry") + reaction_metabolites = {} + for s in stoichiometry: + cmp_token = s["compartment"] + value = s["coefficient"] + cpd = metabolites[s["compound"]] + cpd_index_id = f"{cpd.id}_{cmp_token}" + if cpd_index_id not in metabolites_indexed: + cpd_token = cpd.copy() + cpd_token.id = f"{cpd.id}_{cmp_token}" + cpd_token.base_id = cpd.id + cpd_token.compartment = cmp_token + metabolites_indexed[cpd_index_id] = cpd_token + reaction_metabolites[metabolites_indexed[cpd_index_id]] = value + rxn = ModelSEEDReaction2( + o["id"], + o.get("name"), + "", + lower_bound, + upper_bound, + "", + rxn_names, + o.get("deltag"), + o.get("deltagerr"), + o.get("is_obsolete"), + None, + o.get("status"), + o.get("source"), + ) + rxn.add_metabolites(reaction_metabolites) + if rxn.id in aliases: + rxn.annotation.update(aliases[rxn.id]) + if rxn.id in ec_numbers: + rxn.annotation["ec-code"] = ec_numbers[rxn.id] + return rxn + else: + raise ValueError("unable to build reaction") + + def _load_reactions( database_path: str, metabolites: dict, aliases=None, names=None, ec_numbers=None ) -> (dict, dict): @@ -306,6 +357,7 @@ def _load_reactions( None, o.get("status"), o.get("source"), + pathways=o.get("pathways"), ) rxn.add_metabolites(reaction_metabolites) if rxn.id in aliases: diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index 89c4d5f5..c5c73fed 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from modelseedpy.biochem.seed_object import ModelSEEDObject -from modelseedpy.core.mstemplate import MSTemplateSpecies +from modelseedpy.core.mstemplate import MSTemplateSpecies, MSTemplateMetabolite from cobra.core import Metabolite import pandas as pd @@ -57,8 +57,28 @@ def __init__( self.flags |= set(flags) def to_template_compartment_compound(self, compartment): - cpd_id = f"{self.seed_id}_{compartment}" - res = MSTemplateSpecies(cpd_id, self.charge, compartment, self.id) + cpd_id = f"{self.seed_id}" + if compartment: + cpd_id += f"_{compartment}" + # build Template Compound + metabolite = MSTemplateMetabolite( + self.seed_id, + self.formula, + self.name, + self.charge, + self.mass, + self.delta_g, + self.delta_g_error, + self.is_cofactor, + self.abbr, + ) + # build Template Compartment Compound + if compartment is None: + compartment = "x" + res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id) + + # assign Compound to Compartment Compound + res._template_compound = metabolite res.annotation.update(self.annotation) return res diff --git a/modelseedpy/biochem/modelseed_reaction.py b/modelseedpy/biochem/modelseed_reaction.py index b43430ce..04b5e086 100644 --- a/modelseedpy/biochem/modelseed_reaction.py +++ b/modelseedpy/biochem/modelseed_reaction.py @@ -134,6 +134,7 @@ def __init__( status=None, source=None, flags=None, + pathways=None, ): super().__init__(rxn_id, name, subsystem, lower_bound, upper_bound) @@ -165,6 +166,8 @@ def __init__( if flags: self.flags |= set(flags) + self.pathways = pathways + @property def compound_ids(self): return None @@ -174,8 +177,11 @@ def to_template_reaction(self, compartment_setup=None): raise ValueError("invalid compartment setup") from modelseedpy.core.msmodel import get_cmp_token + rxn_id = f"{self.id}" reaction_compartment = get_cmp_token(compartment_setup.values()) - rxn_id = f"{self.id}_{reaction_compartment}" + if reaction_compartment: + rxn_id += f"_{reaction_compartment}" + name = f"{self.name}" metabolites = {} for m, v in self.metabolites.items(): diff --git a/modelseedpy/core/__init__.py b/modelseedpy/core/__init__.py index 204564ab..bd374a03 100644 --- a/modelseedpy/core/__init__.py +++ b/modelseedpy/core/__init__.py @@ -9,7 +9,9 @@ from modelseedpy.core.mseditorapi import MSEditorAPI, MSEquation from modelseedpy.core.msgapfill import MSGapfill from modelseedpy.core.msatpcorrection import MSATPCorrection -from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes +from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes, MSGrowthPhenotype from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder +from modelseedpy.core.msmodelreport import MSModelReport +from modelseedpy.core.annotationontology import AnnotationOntology from modelseedpy.core.exceptions import * diff --git a/modelseedpy/core/annotationontology.py b/modelseedpy/core/annotationontology.py new file mode 100644 index 00000000..db64a981 --- /dev/null +++ b/modelseedpy/core/annotationontology.py @@ -0,0 +1,405 @@ +# -*- coding: utf-8 -*- +import logging +import re +import time +import json +import sys +import pandas as pd +import cobra +from cobra import DictList + +# from builtins import None + +logger = logging.getLogger(__name__) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + +# Class structure +# AnnotationOntology -> Features/Events/Terms/Ontologies +# AnnotationOntologyOntology -> Events/Terms +# AnnotationOntologyEvent -> Features/Ontology +# AnnotationOntologyFeature -> Term+Event->Evidence +# AnnotationOntologyTerm -> Ontology/Events/Featurs +# AnnotationOntologyEvidence -> -- + +allowable_score_types = [ + "probability", + "evalue", + "bitscore", + "identity", + "qalignstart", + "qalignstop", + "salignstart", + "salignstop", + "kmerhits", + "tmscore", + "rmsd", + "hmmscore", +] + + +class AnnotationOntologyEvidence: + def __init__(self, scores={}, ref_entity=None, entity_type=None): + self.ref_entity = ref_entity + self.entity_type = entity_type + self.scores = scores + for item in self.scores: + if item not in allowable_score_types: + logger.warning(item + " not an allowable score type!") + + def to_data(self): + return { + "ref_entity": self.ref_entity, + "entity_type": self.entity_type, + "scores": self.scores, + } + + +class AnnotationOntologyTerm: + def __init__(self, parent, term_id, ontology): + self.id = term_id + self.parent = parent + self.ontology = ontology + self.ontology.add_term(self) + self.parent.add_term(self) + self.msrxns = set() + self.events = {} + self.features = {} + + def add_msrxns(self, rxn_ids): + for rxn_id in rxn_ids: + if rxn_id[0:6] == "MSRXN:": + rxn_id = rxn_id[6:] + self.msrxns.update([rxn_id]) + + def add_event(self, event): + self.events[event.id] = event + + def add_feature(self, feature): + self.features[feature.id] = feature + + +class AnnotationOntologyOntology: + def __init__(self, parent, ontology_id): + self.id = ontology_id + self.parent = parent + self.events = {} + self.terms = {} + + def add_event(self, event): + self.events[event.id] = event + + def add_term(self, term): + self.terms[term.id] = term + + +class AnnotationOntologyFeature: + def __init__(self, parent, feature_id, type=None): + self.id = feature_id + self.parent = parent + parent.add_feature(self) + self.type = type + self.event_terms = {} + self.term_events = {} + + def add_event_term(self, event, term, scores={}, ref_entity=None, entity_type=None): + if event.id not in self.event_terms: + self.event_terms[event.id] = {} + self.event_terms[event.id][term.id] = AnnotationOntologyEvidence( + scores, ref_entity, entity_type + ) + if term.id not in self.term_events: + self.term_events[term.id] = {} + self.term_events[term.id][event.id] = self.event_terms[event.id][term.id] + + def get_associated_terms( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + translate_to_rast=False, + ): + output = {} + for term_id in self.term_events: + term = self.parent.terms[term_id] + if not ontologies or term.ontology.id in ontologies: + if merge_all or not prioritized_event_list: + for event_id in self.term_events[term_id]: + if ( + not prioritized_event_list + or event_id in prioritized_event_list + ): + if term not in output: + output[term] = [] + output[term].append( + self.term_events[term_id][event_id].to_data() + ) + else: + for event_id in prioritized_event_list: + if event_id in self.term_events[term_id]: + rxns = self.parent.terms[term_id].msrxns + if len(rxns) > 0: + if term not in output: + output[term] = [] + output[term].append( + self.term_events[term_id][event_id].to_data() + ) + break + return output + + def get_associated_reactions( + self, prioritized_event_list=None, ontologies=None, merge_all=False + ): + output = {} + for term_id in self.term_events: + if not ontologies or self.parent.terms[term_id].ontology.id in ontologies: + if merge_all or not prioritized_event_list: + for event_id in self.term_events[term_id]: + if ( + not prioritized_event_list + or event_id in prioritized_event_list + ): + rxns = self.parent.terms[term_id].msrxns + for rxn_id in rxns: + if rxn_id not in output: + output[rxn_id] = [] + output[rxn_id].append( + self.term_events[term_id][event_id].to_data() + ) + else: + for event_id in prioritized_event_list: + if event_id in self.term_events[term_id]: + rxns = self.parent.terms[term_id].msrxns + for rxn_id in rxns: + if rxn_id not in output: + output[rxn_id] = [] + output[rxn_id].append( + self.term_events[term_id][event_id].to_data() + ) + if len(rxns) > 0: + break + return output + + +class AnnotationOntologyEvent: + def __init__( + self, + parent, + event_id, + ontology_id, + method, + method_version=None, + description=None, + timestamp=None, + ): + self.id = event_id + self.parent = parent + # Linking ontology + self.ontology = self.parent.add_ontology(ontology_id) + self.ontology.add_event(self) + if not description: + self.description = "" # TODO + else: + self.description = description + self.method = method + self.method_version = method_version + self.timestamp = timestamp + self.features = {} + + @staticmethod + def from_data(data, parent): + if "method_version" not in data: + data["method_version"] = None + if "description" not in data: + data["description"] = None + if "timestamp" not in data: + data["timestamp"] = None + self = AnnotationOntologyEvent( + parent, + data["event_id"], + data["ontology_id"], + data["method"], + data["method_version"], + data["description"], + data["timestamp"], + ) + if "ontology_terms" in data: + for feature_id in data["ontology_terms"]: + feature = self.parent.add_feature(feature_id) + self.add_feature(feature) + for item in data["ontology_terms"][feature_id]: + term = self.parent.add_term(item["term"], self.ontology) + scores = {} + ref_entity = None + entity_type = None + if "evidence" in item: + if "scores" in item["evidence"]: + scores = item["evidence"]["scores"] + if "reference" in item["evidence"]: + ref_entity = item["evidence"]["reference"][1] + entity_type = item["evidence"]["reference"][0] + feature.add_event_term(self, term, scores, ref_entity, entity_type) + if "modelseed_ids" in item: + term.add_msrxns(item["modelseed_ids"]) + return self + + def add_feature(self, feature): + self.features[feature.id] = feature + + def to_data(self): + data = { + "event_id": self.event_id, + "description": self.event_id, + "ontology_id": self.ontology_id, + "method": self.method, + "method_version": self.method_version, + "timestamp": self.timestamp, + "ontology_terms": {}, + } + for feature in self.features: + data["ontology_terms"][feature] = {"term": None} # TODO + + +class AnnotationOntology: + mdlutls = {} + + @staticmethod + def from_kbase_data(data, genome_ref=None, data_dir=None): + self = AnnotationOntology(genome_ref, data_dir) + if "feature_types" in data: + self.feature_types = data["feature_types"] + if "events" in data: + for event in data["events"]: + self.events += [AnnotationOntologyEvent.from_data(event, self)] + return self + + def __init__(self, genome_ref, data_dir): + self.genome_ref = genome_ref + self.events = DictList() + self.terms = {} + self.ontologies = {} + self.genes = {} + self.cdss = {} + self.data_dir = data_dir + self.noncodings = {} + self.feature_types = {} + self.term_names = {} + + def get_term_name(self, term): + if term.ontology.id not in self.term_names: + self.term_names[term.ontology.id] = {} + if term.ontology.id in [ + "SSO", + "AntiSmash", + "EC", + "TC", + "META", + "RO", + "KO", + "GO", + ]: + with open( + self.data_dir + "/" + term.ontology.id + "_dictionary.json" + ) as json_file: + ontology = json.load(json_file) + for item in ontology["term_hash"]: + self.term_names[term.ontology.id][item] = ontology["term_hash"][ + item + ]["name"] + if term.id not in self.term_names[term.ontology.id]: + return "Unknown" + return self.term_names[term.ontology.id][term.id] + + def get_gene_term_hash( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + cds_features=False, + translate_to_rast=True, + ): + output = {} + feature_hash = self.genes + if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): + feature_hash = self.cdss + for feature_id in feature_hash: + feature = feature_hash[feature_id] + if feature not in output: + output[feature] = {} + output[feature] = feature.get_associated_terms( + prioritized_event_list, ontologies, merge_all, translate_to_rast + ) + return output + + def get_reaction_gene_hash( + self, + prioritized_event_list=None, + ontologies=None, + merge_all=False, + cds_features=False, + ): + output = {} + feature_hash = self.genes + if len(self.genes) == 0 or (cds_features and len(self.cdss) == 0): + feature_hash = self.cdss + for feature_id in feature_hash: + reactions = feature_hash[feature_id].get_associated_reactions( + prioritized_event_list, ontologies, merge_all + ) + for rxn_id in reactions: + if rxn_id not in output: + output[rxn_id] = {} + if feature_id not in output[rxn_id]: + output[rxn_id][feature_id] = [] + output[rxn_id][feature_id].append(reactions[rxn_id]) + return output + + def add_term(self, term_or_id, ontology=None): + if not isinstance(term_or_id, AnnotationOntologyTerm): + if term_or_id in self.terms: + return self.terms[term_or_id] + else: + return AnnotationOntologyTerm(self, term_or_id, ontology) + if term_or_id.id in self.terms: + logger.critical("Term with id " + term_or_id.id + " already in annotation!") + return self.terms[term_or_id.id] + else: + self.terms[term_or_id.id] = term_or_id + + def add_ontology(self, ontology_or_id): + if not isinstance(ontology_or_id, AnnotationOntologyOntology): + if ontology_or_id in self.ontologies: + return self.ontologies[ontology_or_id] + else: + return AnnotationOntologyOntology(self, ontology_or_id) + if ontology_or_id.id in self.ontologies: + logger.critical( + "Ontology with id " + ontology_or_id.id + " already in annotation!" + ) + return self.ontologies[ontology_or_id.id] + else: + self.ontologies[ontology_or_id.id] = ontology_or_id + + def get_feature_hash(self, feature_id): + feature_hash = self.genes + if feature_id in self.feature_types: + if self.feature_types[feature_id] == "cds": + feature_hash = self.cdss + elif self.feature_types[feature_id] == "noncoding": + feature_hash = self.noncodings + return feature_hash + + def add_feature(self, feature_or_id): + feature_hash = None + if not isinstance(feature_or_id, AnnotationOntologyFeature): + feature_hash = self.get_feature_hash(feature_or_id) + if feature_or_id in feature_hash: + return feature_hash[feature_or_id] + else: + feature_or_id = AnnotationOntologyFeature(self, feature_or_id) + if not feature_hash: + feature_hash = self.get_feature_hash(feature_or_id.id) + if feature_or_id.id not in feature_hash: + feature_hash[feature_or_id.id] = feature_or_id + return feature_hash[feature_or_id.id] diff --git a/modelseedpy/core/fbahelper.py b/modelseedpy/core/fbahelper.py index 6c44108f..502611d9 100644 --- a/modelseedpy/core/fbahelper.py +++ b/modelseedpy/core/fbahelper.py @@ -115,18 +115,24 @@ def modelseed_id_from_cobra_reaction(reaction): @staticmethod def metabolite_mw(metabolite): + fixed_masses = {"cpd11416": 1, "cpd17041": 0, "cpd17042": 0, "cpd17043": 0} + msid = FBAHelper.modelseed_id_from_cobra_metabolite(metabolite) + if msid in fixed_masses: + return fixed_masses[msid] + if not metabolite.formula: + return 0 + formula = re.sub("R\d*", "", metabolite.formula) try: - if not metabolite.formula: - return 0 - formula = re.sub("R\d*", "", metabolite.formula) chem_mw = ChemMW(printing=False) chem_mw.mass(formula) return chem_mw.raw_mw except: - warn( + logger.warn( "The compound " + metabolite.id - + " possesses an unconventional formula {metabolite.formula}; hence, the MW cannot be computed." + + " possesses an unconventional formula " + + metabolite.formula + + "; hence, the MW cannot be computed." ) return 0 diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c5b20e3c..448580ea 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -1,20 +1,7 @@ # -*- coding: utf-8 -*- import logging -import itertools -import cobra import json -import time import pandas as pd -from os.path import abspath as _abspath -from os.path import dirname as _dirname -from optlang.symbolics import Zero, add -from modelseedpy.core.rast_client import RastClient -from modelseedpy.core.msgenome import normalize_role -from modelseedpy.core.msmodel import ( - get_gpr_string, - get_reaction_constraints_from_direction, -) -from cobra.core import Gene, Metabolite, Model, Reaction from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder from modelseedpy.core import FBAHelper, MSGapfill, MSMedia @@ -23,25 +10,28 @@ logger = logging.getLogger(__name__) -_path = _dirname(_abspath(__file__)) - min_gap = { - "Glc/O2": 5, - "Etho/O2": 0.01, - "Ac/O2": 1, - "Pyr/O2": 3, - "Glyc/O2": 2, - "Fum/O2": 3, - "Succ/O2": 2, - "Akg/O2": 2, - "LLac/O2": 2, - "Dlac/O2": 2, - "For/O2": 2, - "For/NO3": 1.5, - "Pyr/NO": 2.5, - "Pyr/NO2": 2.5, - "Pyr/NO3": 2.5, - "Pyr/SO4": 2.5, + "Glc.O2": 5, + "Etho.O2": 0.01, + "Ac.O2": 1, + "Pyr.O2": 3, + "Glyc.O2": 2, + "Fum.O2": 3, + "Succ.O2": 2, + "Akg.O2": 2, + "LLac.O2": 2, + "Dlac.O2": 2, + "For.O2": 1.875, + "For.NO3": 1.5, + "Pyr.NO": 2.5, + "Pyr.NO2": 2.5, + "Pyr.NO3": 2.5, + "Pyr.SO4": 2.5, +} + +default_threshold_multipiers = { + "Glc": 2, + "default": 1.2, } @@ -82,12 +72,6 @@ def __init__( self.modelutl = MSModelUtil.get(model_or_mdlutl) # Setting atpcorrection attribute in model utl so link is bidirectional self.modelutl.atputl = self - - if default_media_path: - self.default_media_path = default_media_path - else: - self.default_media_path = _path + "/../data/atp_medias.tsv" - self.compartment = compartment if atp_hydrolysis_id and atp_hydrolysis_id in self.model.reactions: @@ -96,20 +80,34 @@ def __init__( output = self.modelutl.add_atp_hydrolysis(compartment) self.atp_hydrolysis = output["reaction"] + self.media_hash = {} self.atp_medias = [] + if load_default_medias: - self.load_default_medias() - for media in atp_medias: - if isinstance(media, list): - self.atp_medias.append(media) - else: - self.atp_medias.append([media, 0.01]) + self.load_default_medias(default_media_path) + + media_ids = set() + for media_or_list in atp_medias: + media = ( + media_or_list[0] if isinstance(media_or_list, list) else media_or_list + ) + min_obj = media_or_list[1] if isinstance(media_or_list, list) else 0.01 + if media.id in media_ids: + raise ValueError("media ids not unique") + media_ids.add(media.id) + self.atp_medias.append((media, min_obj)) + self.media_hash[media.id] = media + if "empty" not in self.media_hash: + media = MSMedia.from_dict({}) + media.id = "empty" + media.name = "empty" + self.media_hash[media.id] = media self.forced_media = [] for media_id in forced_media: - for media in self.atp_medias: - if media.id == media_id: - self.forced_media.append(media) + for item in self.atp_medias: + if item[0].id == media_id: + self.forced_media.append(item[0]) break self.max_gapfilling = max_gapfilling @@ -121,7 +119,9 @@ def __init__( self.coretemplate = core_template self.msgapfill = MSGapfill( - self.modelutl, default_gapfill_templates=core_template + self.modelutl, + default_gapfill_templates=[core_template], + default_target=self.atp_hydrolysis.id, ) # These should stay as None until atp correction is actually run self.cumulative_core_gapfilling = None @@ -139,8 +139,13 @@ def load_default_template(self): get_template("template_core"), None ).build() - def load_default_medias(self): - filename = self.default_media_path + def load_default_medias(self, default_media_path=None): + if default_media_path is None: + import os.path as _path + + current_file_path = _path.dirname(_path.abspath(__file__)) + default_media_path = f"{current_file_path}/../data/atp_medias.tsv" + filename = default_media_path medias = pd.read_csv(filename, sep="\t", index_col=0).to_dict() for media_id in medias: media_d = {} @@ -154,9 +159,7 @@ def load_default_medias(self): media.id = media_id media.name = media_id min_obj = 0.01 - if media_id in min_gap: - min_obj = min_gap[media_id] - self.atp_medias.append([media, min_obj]) + self.atp_medias.append((media, min_gap.get(media_d, min_obj))) @staticmethod def find_reaction_in_template(model_reaction, template, compartment): @@ -208,6 +211,7 @@ def disable_noncore_reactions(self): self.other_compartments = [] # Iterating through reactions and disabling for reaction in self.model.reactions: + gfrxn = self.msgapfill.gfmodel.reactions.get_by_id(reaction.id) if reaction.id == self.atp_hydrolysis.id: continue if FBAHelper.is_ex(reaction): @@ -232,10 +236,12 @@ def disable_noncore_reactions(self): logger.debug(reaction.id + " core but reversible") self.noncore_reactions.append([reaction, "<"]) reaction.lower_bound = 0 + gfrxn.lower_bound = 0 if reaction.upper_bound > 0 and template_reaction.upper_bound <= 0: logger.debug(reaction.id + " core but reversible") self.noncore_reactions.append([reaction, ">"]) reaction.upper_bound = 0 + gfrxn.upper_bound = 0 else: logger.debug(f"{reaction.id} non core") if FBAHelper.rxn_compartment(reaction) != self.compartment: @@ -250,6 +256,8 @@ def disable_noncore_reactions(self): self.noncore_reactions.append([reaction, ">"]) reaction.lower_bound = 0 reaction.upper_bound = 0 + gfrxn.lower_bound = 0 + gfrxn.upper_bound = 0 def evaluate_growth_media(self): """ @@ -265,14 +273,12 @@ def evaluate_growth_media(self): output = {} with self.model: self.model.objective = self.atp_hydrolysis.id - # self.model.objective = self.model.problem.Objective(Zero,direction="max") - - logger.debug( - f"ATP bounds: ({self.atp_hydrolysis.lower_bound}, {self.atp_hydrolysis.upper_bound})" - ) - # self.model.objective.set_linear_coefficients({self.atp_hydrolysis.forward_variable:1}) pkgmgr = MSPackageManager.get_pkg_mgr(self.model) + # First prescreening model for ATP production without gapfilling + media_list = [] + min_objectives = {} for media, minimum_obj in self.atp_medias: + logger.debug("evaluate media %s", media) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) logger.debug("model.medium %s", self.model.medium) @@ -284,26 +290,36 @@ def evaluate_growth_media(self): solution.status, ) self.media_gapfill_stats[media] = None + output[media.id] = solution.objective_value if ( solution.objective_value < minimum_obj or solution.status != "optimal" ): - self.media_gapfill_stats[media] = self.msgapfill.run_gapfilling( - media, self.atp_hydrolysis.id, minimum_obj - ) - # IF gapfilling fails - need to activate and penalize the noncore and try again + media_list.append(media) + min_objectives[media] = minimum_obj elif solution.objective_value >= minimum_obj: self.media_gapfill_stats[media] = {"reversed": {}, "new": {}} - logger.debug( - "gapfilling stats: %s", - json.dumps(self.media_gapfill_stats[media], indent=2, default=vars), - ) + + # Now running gapfilling on all conditions where initially there was no growth + all_solutions = self.msgapfill.run_multi_gapfill( + media_list, + self.atp_hydrolysis.id, + min_objectives, + check_for_growth=False, + ) + + # Adding the new solutions to the media gapfill stats + for media in all_solutions: + self.media_gapfill_stats[media] = all_solutions[media] if MSATPCorrection.DEBUG: + export_data = {} + for media in self.media_gapfill_stats: + export_data[media.id] = self.media_gapfill_stats[media] with open("debug.json", "w") as outfile: - json.dump(self.media_gapfill_stats[media], outfile) + json.dump(export_data, outfile) return output @@ -312,81 +328,91 @@ def determine_growth_media(self, max_gapfilling=None): Decides which of the test media to use as growth conditions for this model :return: """ + atp_att = {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}} self.selected_media = [] best_score = None for media in self.media_gapfill_stats: - gfscore = 0 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 0, + "new": {}, + "reversed": {}, + } if self.media_gapfill_stats[media]: - gfscore = len( + atp_att["core_atp_gapfilling"][media.id]["score"] = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - if best_score is None or gfscore < best_score: - best_score = gfscore + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] + else: + atp_att["core_atp_gapfilling"][media.id] = { + "score": 1000, + "failed": True, + } + if ( + best_score is None + or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score + ): + best_score = atp_att["core_atp_gapfilling"][media.id]["score"] + if self.max_gapfilling is None: self.max_gapfilling = best_score - logger.debug(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") + logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: - gfscore = 0 - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - - logger.debug(f"media gapfilling score: {media.id}: {gfscore}") - if gfscore <= self.max_gapfilling and gfscore <= ( + if atp_att["core_atp_gapfilling"][media.id][ + "score" + ] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id][ + "score" + ] <= ( best_score + self.gapfilling_delta ): self.selected_media.append(media) + atp_att["selected_media"][media.id] = 0 - def determine_growth_media2(self, max_gapfilling=None): - """ - Decides which of the test media to use as growth conditions for this model - :return: - """ - - def scoring_function(media): - return len(self.media_gapfill_stats[media]["new"].keys()) + 0.5 * len( - self.media_gapfill_stats[media]["reversed"].keys() - ) - - if not max_gapfilling: - max_gapfilling = self.max_gapfilling - self.selected_media = [] - media_scores = dict( - (media, scoring_function(media)) - for media in self.media_gapfill_stats - if self.media_gapfill_stats[media] - ) - best_score = min(media_scores.values()) - if max_gapfilling is None or max_gapfilling > ( - best_score + self.gapfilling_delta - ): - max_gapfilling = best_score + self.gapfilling_delta - for media in media_scores: - score = media_scores[media] - logger.debug(score, best_score, max_gapfilling) - if score <= max_gapfilling: - self.selected_media.append(media) + self.modelutl.save_attributes(atp_att, "ATP_analysis") def apply_growth_media_gapfilling(self): """ Applies the gapfilling to all selected growth media :return: """ - self.cumulative_core_gapfilling = ( - [] - ) # TODO: In case someone runs ATP correction twice with different parameters, before resetting this, maybe check if any of these reactions are already in the model and remove them so we're starting fresh??? + self.cumulative_core_gapfilling = [] + # TODO: In case someone runs ATP correction twice with different parameters, + # before resetting this, maybe check if any of these reactions are already in + # the model and remove them so we're starting fresh??? for media in self.selected_media: + stats = self.media_gapfill_stats.get(media, None) if ( - media in self.media_gapfill_stats - and self.media_gapfill_stats[media] + stats is not None and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 ): self.msgapfill.integrate_gapfill_solution( - self.media_gapfill_stats[media], self.cumulative_core_gapfilling + stats, self.cumulative_core_gapfilling, link_gaps_to_objective=False ) + # Adding reactions to gapfilling sensitivity structure so we can track all gapfilled reactions + gf_sensitivity = self.modelutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if self.atp_hydrolysis.id not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][self.atp_hydrolysis.id] = {} + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"] = {} + for item in stats["new"]: + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][ + item + ] = {stats["new"][item]: []} + for item in stats["reversed"]: + gf_sensitivity[media.id][self.atp_hydrolysis.id]["success"][ + item + ] = {stats["reversed"][item]: []} + self.modelutl.save_attributes(gf_sensitivity, "gf_sensitivity") + self.modelutl.save_attributes( + len(self.cumulative_core_gapfilling), "total_core_gapfilling" + ) def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP @@ -404,11 +430,11 @@ def expand_model_to_genome_scale(self): self.restore_noncore_reactions(noncore=True, othercompartment=False) # Extending model with non core reactions while retaining ATP accuracy self.filtered_noncore = self.modelutl.reaction_expansion_test( - self.noncore_reactions, tests + self.noncore_reactions, tests, attribute_label="atp_expansion_filter" ) # Removing filtered reactions for item in self.filtered_noncore: - print("Removing " + item[0].id + " " + item[1]) + logger.info("Removing " + item[0].id + " " + item[1]) if item[1] == ">": item[0].upper_bound = 0 else: @@ -444,7 +470,7 @@ def restore_noncore_reactions(self, noncore=True, othercompartment=True): reaction.lower_bound = self.original_bounds[reaction.id][0] reaction.upper_bound = self.original_bounds[reaction.id][1] - def build_tests(self, multiplier=None): + def build_tests(self, multiplier_hash_override={}): """Build tests based on ATP media evaluations Parameters @@ -460,22 +486,60 @@ def build_tests(self, multiplier=None): Raises ------ """ - if multiplier is None: - multiplier = self.multiplier + # Applying threshold multiplier + for key in default_threshold_multipiers: + if key not in multiplier_hash_override: + multiplier_hash_override[key] = default_threshold_multipiers[key] + # Initialzing atp test attributes + atp_att = self.modelutl.get_attributes( + "ATP_analysis", + {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, + ) + # Initializing tests and adding empty media every time tests = [] + if "empty" in self.media_hash: + tests.append( + { + "media": self.media_hash["empty"], + "is_max_threshold": True, + "threshold": 0.00001, + "objective": self.atp_hydrolysis.id, + } + ) + atp_att["tests"]["empty"] = { + "threshold": 0.00001, + "objective": self.atp_hydrolysis.id, + } + # Setting objective to ATP hydrolysis self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: + # Setting multiplier for test threshold + multiplier = multiplier_hash_override["default"] + if media.id in multiplier_hash_override: + multiplier = multiplier_hash_override[media.id] + # Constraining model exchanges for media self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + # Computing core ATP production obj_value = self.model.slim_optimize() - logger.debug(f"{media.name} = {obj_value}") + logger.debug(f"{media.name} = {obj_value};{multiplier}") + threshold = multiplier * obj_value + if threshold == 0: + threshold += 0.00001 tests.append( { "media": media, "is_max_threshold": True, - "threshold": multiplier * obj_value, + "threshold": threshold, "objective": self.atp_hydrolysis.id, } ) + atp_att["selected_media"][media.id] = obj_value + atp_att["tests"][media.id] = { + "threshold": multiplier * obj_value, + "objective": self.atp_hydrolysis.id, + } + # Saving test attributes to the model + self.modelutl.save_attributes(atp_att, "ATP_analysis") return tests def run_atp_correction(self): @@ -487,7 +551,7 @@ def run_atp_correction(self): self.evaluate_growth_media() self.determine_growth_media() self.apply_growth_media_gapfilling() - self.evaluate_growth_media() + # self.evaluate_growth_media() self.expand_model_to_genome_scale() return self.build_tests() diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index c8763c2d..c49e2e5e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -6,6 +6,7 @@ from modelseedpy.core.exceptions import ModelSEEDError from modelseedpy.core.rast_client import RastClient from modelseedpy.core.msgenome import normalize_role +from modelseedpy.core.mstemplate import TemplateReactionType from modelseedpy.core.msmodel import ( get_gpr_string, get_reaction_constraints_from_direction, @@ -13,6 +14,8 @@ from cobra.core import Gene, Metabolite, Model, Reaction, Group from modelseedpy.core import FBAHelper from modelseedpy.fbapkg.mspackagemanager import MSPackageManager +from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem +from modelseedpy.biochem.modelseed_to_cobra import modelseed_to_cobra_reaction SBO_ANNOTATION = "sbo" @@ -315,6 +318,7 @@ def __init__( self.reaction_to_complex_sets = None self.compartments = None self.base_model = None + self.compartments_index = None # TODO: implement custom index by compartment self.index = index def build_drains(self): @@ -564,85 +568,6 @@ def get_or_create_metabolite( pass return model.metabolites.get_by_id(full_id) - @staticmethod - def build_biomass_new(model, template, index): - biomasses = [] - types = ["cofactor", "lipid", "cellwall"] - for bio in template.biomasses: - # Creating biomass reaction object - metabolites = {} - biorxn = Reaction(bio.id, bio.name, "biomasses", 0, 1000) - # Adding standard compounds for DNA, RNA, protein, and biomass - if bio["type"] == "growth": - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11416", "c", index - ) - metabolites[met] = 1 - if "dna" in bio and bio["dna"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11461", "c", index - ) - metabolites[met] = -1 * bio["dna"] - if "protein" in bio and bio["protein"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11463", "c", index - ) - metabolites[met] = -1 * bio["protein"] - if "rna" in bio and bio["rna"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11462", "c", index - ) - metabolites[met] = -1 * bio["rna"] - bio_type_hash = {} - for type in types: - for comp in bio["templateBiomassComponents"]: - fullid = FBAHelper.id_from_ref(comp["templatecompcompound_ref"]) - (baseid, compartment, ignore_index) = FBAHelper.parse_id(fullid) - comp["met"] = MSBuilder.get_or_create_metabolite( - model, template, baseid, compartment, index - ) - if type not in bio_type_hash: - bio_type_hash[type] = {"items": [], "total_mw": 0} - if FBAHelper.metabolite_mw(comp["met"]): - types[type] += FBAHelper.metabolite_mw(comp["met"]) / 1000 - bio_type_hash[type].append(comp) - for type in bio_type_hash: - compmass = bio[type] - for comp in bio_type_hash[type]: - coef = None - if comp["coefficient_type"] == "MOLFRACTION": - coef = compmass / types[type] * comp["coefficient"] - elif comp["coefficient_type"] == "MOLSPLIT": - coef = compmass / types[type] * comp["coefficient"] - elif comp["coefficient_type"] == "MULTIPLIER": - coef = biorxn[type] * comp["coefficient"] - elif comp["coefficient_type"] == "EXACT": - coef = comp["coefficient"] - if coef: - met = model.metabolites.get_by_id("cpd11416_c0") - if met in metabolites: - metabolites[met] += coef - else: - metabolites[met] = coef - metabolites[met] = coef - for count, value in enumerate(comp["linked_compound_refs"]): - met = model.metabolites.get_by_id( - FBAHelper.id_from_ref(value) - ) - if met in metabolites: - metabolites[met] += ( - coef * comp["link_coefficients"][count] - ) - else: - metabolites[met] = ( - coef * comp["link_coefficients"][count] - ) - - biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" - biorxn.add_metabolites(metabolites) - biomasses.append(biorxn) - return biomasses - def build_static_biomasses(self, model, template): res = [] if template.name.startswith("CoreModel"): @@ -747,7 +672,10 @@ def build_complex_groups(self, complex_sets): group_complexes = {} for complex_set in complex_sets: for complex_id in complex_set: - if complex_id not in group_complexes: + if ( + complex_id not in group_complexes + and complex_id in self.template.complexes + ): cpx = self.template.complexes.get_by_id(complex_id) g = Group(complex_id) g.notes["complex_source"] = cpx.source @@ -804,6 +732,123 @@ def build_metabolic_reactions(self): return reactions + def build_from_annotaton_ontology( + self, + model_or_id, + anno_ont, + index="0", + allow_all_non_grp_reactions=False, + annotate_with_rast=False, + biomass_classic=False, + biomass_gc=0.5, + add_non_template_reactions=True, + prioritized_event_list=None, + ontologies=None, + merge_all=True, + convert_to_sso=True, + ): + # Build base model without annotation + self.search_name_to_orginal = {} + self.search_name_to_genes = {} + gene_term_hash = anno_ont.get_gene_term_hash( + prioritized_event_list, ontologies, merge_all, convert_to_sso + ) + residual_reaction_gene_hash = {} + for gene in gene_term_hash: + for term in gene_term_hash[gene]: + if term.ontology.id == "SSO": + name = anno_ont.get_term_name(term) + f_norm = normalize_role(name) + if f_norm not in self.search_name_to_genes: + self.search_name_to_genes[f_norm] = set() + self.search_name_to_orginal[f_norm] = set() + self.search_name_to_orginal[f_norm].add(name) + self.search_name_to_genes[f_norm].add(gene.id) + else: + for rxn_id in term.msrxns: + if rxn_id not in residual_reaction_gene_hash: + residual_reaction_gene_hash[rxn_id] = {} + if gene not in residual_reaction_gene_hash[rxn_id]: + residual_reaction_gene_hash[rxn_id][gene] = [] + residual_reaction_gene_hash[rxn_id][gene] = gene_term_hash[ + gene + ][term] + + model_or_id = self.build( + model_or_id, + index, + allow_all_non_grp_reactions, + annotate_with_rast, + biomass_classic, + biomass_gc, + ) + for rxn in model_or_id.reactions: + probability = None + for gene in rxn.genes(): + annoont_gene = anno_ont.get_feature(gene.id) + if annoont_gene and annoont_gene in gene_term_hash: + for term in gene_term_hash[annoont_gene]: + if rxn.id[0:-3] in term.msrxns: + for item in gene_term_hash[gene][term]: + if "probability" in item.scores: + if ( + not probability + or item.scores["probability"] > probability + ): + probability = item.scores["probability"] + if hasattr(rxn, "probability"): + rxn.probability = probability + + reactions = [] + modelseeddb = ModelSEEDBiochem.get() + for rxn_id in residual_reaction_gene_hash: + if rxn_id + "_c0" not in model_or_id.reactions: + reaction = None + template_reaction = None + if rxn_id + "_c" in self.template.reactions: + template_reaction = self.template.reactions.get_by_id(rxn_id + "_c") + elif rxn_id in modelseeddb.reactions: + msrxn = modelseeddb.reactions.get_by_id(rxn_id) + template_reaction = msrxn.to_template_reaction({0: "c", 1: "e"}) + if template_reaction: + for m in template_reaction.metabolites: + if m.compartment not in self.compartments: + self.compartments[ + m.compartment + ] = self.template.compartments.get_by_id(m.compartment) + if m.id not in self.template_species_to_model_species: + model_metabolite = m.to_metabolite(self.index) + self.template_species_to_model_species[ + m.id + ] = model_metabolite + self.base_model.add_metabolites([model_metabolite]) + reaction = template_reaction.to_reaction( + self.base_model, self.index + ) + gpr = "" + probability = None + for gene in residual_reaction_gene_hash[rxn_id]: + for item in residual_reaction_gene_hash[rxn_id][gene]: + if "probability" in item["scores"]: + if ( + not probability + or item["scores"]["probability"] > probability + ): + probability = item["scores"]["probability"] + if len(gpr) > 0: + gpr += " or " + gpr += gene.id + if hasattr(rxn, "probability"): + reaction.probability = probability + reaction.gene_reaction_rule = gpr + reaction.annotation[SBO_ANNOTATION] = "SBO:0000176" + reactions.append(reaction) + if not reaction: + print("Reaction ", rxn_id, " not found in template or database!") + + model_or_id.add_reactions(reactions) + return model_or_id + def build_non_metabolite_reactions( self, cobra_model, allow_all_non_grp_reactions=False ): @@ -823,9 +868,12 @@ def build_non_metabolite_reactions( reactions = [] for template_reaction in self.template.reactions: + rxn_type = template_reaction.type if ( - template_reaction.type == "universal" - or template_reaction.type == "spontaneous" + rxn_type == "universal" + or rxn_type == "spontaneous" + or rxn_type == TemplateReactionType.UNIVERSAL + or rxn_type == TemplateReactionType.SPONTANEOUS ): reaction_metabolite_ids = {m.id for m in template_reaction.metabolites} if ( @@ -886,6 +934,9 @@ def build( index="0", allow_all_non_grp_reactions=False, annotate_with_rast=True, + biomass_classic=False, + biomass_gc=0.5, + add_reaction_from_rast_annotation=True, ): """ @@ -894,8 +945,11 @@ def build( @param index: @param allow_all_non_grp_reactions: @param annotate_with_rast: + @param biomass_classic: + @param biomass_gc: @return: """ + self.index = index if annotate_with_rast: rast = RastClient() @@ -921,8 +975,10 @@ def build( self.reaction_to_complex_sets.values() ) - metabolic_reactions = self.build_metabolic_reactions() - cobra_model.add_reactions(metabolic_reactions) + if add_reaction_from_rast_annotation: + metabolic_reactions = self.build_metabolic_reactions() + cobra_model.add_reactions(metabolic_reactions) + non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) @@ -930,15 +986,38 @@ def build( cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) + biomass_reactions = [] + for rxn_biomass in self.template.biomasses: + reaction = rxn_biomass.build_biomass( + cobra_model, index, biomass_classic, biomass_gc + ) + for m in reaction.metabolites: + if "modelseed_template_id" in m.notes: + self.template_species_to_model_species[ + m.notes["modelseed_template_id"] + ] = m + biomass_reactions.append(reaction) + + if len(biomass_reactions) > 0: + for rxn in biomass_reactions: + if rxn.id not in cobra_model.reactions: + cobra_model.add_reactions([rxn]) + cobra_model.objective = biomass_reactions[0].id + + """ if ( self.template.name.startswith("CoreModel") or self.template.name.startswith("GramNeg") or self.template.name.startswith("GramPos") ): - cobra_model.add_reactions( - self.build_static_biomasses(cobra_model, self.template) - ) + gc = 0.5 + if hasattr(self.genome,"info"): + gc = float(self.genome.info.metadata["GC content"]) + print("Genome custom GC:",gc) + for bio in self.template.biomasses: + bio.build_biomass(cobra_model, index, classic=False, GC=gc,add_to_model=True) cobra_model.objective = "bio1" + """ reactions_sinks = self.build_drains() cobra_model.add_reactions(reactions_sinks) @@ -980,6 +1059,8 @@ def build_full_template_model(template, model_id=None, index="0"): :param index: index for the metabolites :return: """ + from modelseedpy.core.msmodel import MSModel + model = MSModel(model_id if model_id else template.id, template=template) all_reactions = [] for rxn in template.reactions: @@ -994,13 +1075,11 @@ def build_full_template_model(template, model_id=None, index="0"): bio_rxn2 = build_biomass("bio2", model, template, core_atp, index) model.add_reactions([bio_rxn1, bio_rxn2]) model.objective = "bio1" - if template.name.startswith("GramNeg"): - bio_rxn1 = build_biomass("bio1", model, template, gramneg, index) - model.add_reactions([bio_rxn1]) - model.objective = "bio1" - if template.name.startswith("GramPos"): - bio_rxn1 = build_biomass("bio1", model, template, grampos, index) - model.add_reactions([bio_rxn1]) + else: + for bio in template.biomasses: + bio.build_biomass( + model, index, classic=False, GC=0.5, add_to_model=True + ) model.objective = "bio1" reactions_sinks = [] @@ -1026,10 +1105,15 @@ def build_metabolic_model( allow_all_non_grp_reactions=False, annotate_with_rast=True, gapfill_model=True, + classic_biomass=False, ): builder = MSBuilder(genome, template) model = builder.build( - model_id, index, allow_all_non_grp_reactions, annotate_with_rast + model_id, + index, + allow_all_non_grp_reactions, + annotate_with_rast, + classic_biomass, ) # Gapfilling model if gapfill_model: diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 9b42e17d..ee1b0fe3 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -1,17 +1,23 @@ +#!/usr/bin/python # -*- coding: utf-8 -*- import logging -import itertools # !!! the import is never used - -logger = logging.getLogger(__name__) - import cobra import re +import json +import numpy as np +import pandas as pd from optlang.symbolics import Zero, add from modelseedpy.core import FBAHelper # !!! the import is never used from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.core.msmodelutl import MSModelUtil -from modelseedpy.fbapkg.gapfillingpkg import default_blacklist from modelseedpy.core.exceptions import GapfillingError +from collections import defaultdict + + +logger = logging.getLogger(__name__) +logger.setLevel( + logging.INFO # WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSGapfill: @@ -33,6 +39,10 @@ def __init__( reaction_scores={}, blacklist=[], atp_gapfilling=False, + minimum_obj=0.01, + default_excretion=100, + default_uptake=100, + default_target=None, ): # Discerning input is model or mdlutl and setting internal links if isinstance(model_or_mdlutl, MSModelUtil): @@ -50,34 +60,34 @@ def __init__( "cpd15302", "cpd03091", ] # the cpd11416 compound is filtered during model extension with templates - self.gfmodel = self.lp_filename = self.last_solution = None + # Cloning model to create gapfilling model + self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + self.gfmodelutl = MSModelUtil.get(self.gfmodel) + # Getting package manager for gapfilling model + self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodelutl) + # Setting target from input + if default_target: + self.default_target = default_target + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(default_target).flux_expression, + direction="max", + ) + # Setting parameters for gapfilling + self.lp_filename = self.last_solution = None self.model_penalty = 1 + self.default_minimum_objective = minimum_obj self.default_gapfill_models = default_gapfill_models self.default_gapfill_templates = default_gapfill_templates self.gapfill_templates_by_index, self.gapfill_models_by_index = {}, {} self.gapfill_all_indecies_with_default_templates = True self.gapfill_all_indecies_with_default_models = True - self.blacklist = list(set(default_blacklist + blacklist)) + self.blacklist = list(set(blacklist)) self.test_condition_iteration_limit = 10 self.test_conditions = test_conditions self.reaction_scores = reaction_scores self.cumulative_gapfilling = [] - - def run_gapfilling( - self, - media=None, - target=None, - minimum_obj=0.01, - binary_check=False, - prefilter=True, - ): - if target: - self.model.objective = self.model.problem.Objective( - self.model.reactions.get_by_id(target).flux_expression, direction="max" - ) - self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) - pkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) - pkgmgr.getpkg("GapfillingPkg").build_package( + # Building gapfilling package + self.gfpkgmgr.getpkg("GapfillingPkg").build_package( { "auto_sink": self.auto_sink, "model_penalty": self.model_penalty, @@ -87,40 +97,146 @@ def run_gapfilling( "gapfill_models_by_index": self.gapfill_models_by_index, "gapfill_all_indecies_with_default_templates": self.gapfill_all_indecies_with_default_templates, "gapfill_all_indecies_with_default_models": self.gapfill_all_indecies_with_default_models, - "default_excretion": 100, - "default_uptake": 100, + "default_excretion": default_excretion, + "default_uptake": default_uptake, "minimum_obj": minimum_obj, "blacklist": self.blacklist, "reaction_scores": self.reaction_scores, "set_objective": 1, } ) - pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + def test_gapfill_database(self, media, target=None, before_filtering=True): + # Testing if gapfilling can work before filtering + if target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(target).flux_expression, + direction="max", + ) + self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + else: + target = str(self.gfmodel.objective) + target = target.split(" ")[0] + target = target[13:] + if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(): + return True + gf_sensitivity = {} + if target != "rxn00062_c0": + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + filter_msg = " " + note = "FAF" + if before_filtering: + filter_msg = " before filtering " + note = "FBF" + gf_sensitivity[media.id][target][ + note + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + if target != "rxn00062_c0": + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning( + "No gapfilling solution found" + + filter_msg + + "for " + + media.id + + " activating " + + target + ) + return False + + def prefilter(self, media, target): # Filtering breaking reactions out of the database - if prefilter and self.test_conditions: - pkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( + if self.test_conditions: + self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) + gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes( + "gf_filter", {} + ) + base_filter = self.mdlutl.get_attributes("gf_filter", {}) + for media_id in gf_filter: + base_filter[media_id] = gf_filter[media_id] + + # Testing if gapfilling can work after filtering + if not self.test_gapfill_database(media, target, before_filtering=False): + return False + return True + + def run_gapfilling( + self, + media=None, + target=None, + minimum_obj=None, + binary_check=False, + prefilter=True, + check_for_growth=True, + ): + """Run gapfilling on a single media condition to force the model to achieve a nonzero specified objective + Parameters + ---------- + media : MSMedia + Media in which the model should be gapfilled + target : string + Name or expression describing the reaction or combination of reactions to the optimized + minimum_obj : double + Value to use for the minimal objective threshold that the model must be gapfilled to achieve + binary_check : bool + Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved + prefilter : bool + Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling + check_for_growth : bool + Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective + """ + # Setting target and media if specified + if target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(target).flux_expression, + direction="max", + ) + self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + else: + target = self.default_target + if media: + self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) + if not minimum_obj: + minimum_obj = self.default_minimum_objective + if minimum_obj: + self.gfpkgmgr.getpkg("GapfillingPkg").set_min_objective(minimum_obj) + + # Testing if gapfilling can work before filtering + if not self.test_gapfill_database(media, before_filtering=True): + return None + + # Filtering + if prefilter: + if not self.prefilter(media, target): + return None + # Printing the gapfilling LP file if self.lp_filename: with open(self.lp_filename, "w") as out: out.write(str(self.gfmodel.solver)) + + # Running gapfilling and checking solution sol = self.gfmodel.optimize() logger.debug( - "gapfill solution objective value %f (%s) for media %s", - sol.objective_value, - sol.status, - media, + f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}" ) - if sol.status != "optimal": logger.warning("No solution found for %s", media) return None - self.last_solution = pkgmgr.getpkg("GapfillingPkg").compute_gapfilled_solution() + # Computing solution and ensuring all tests still pass + self.last_solution = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).compute_gapfilled_solution() if self.test_conditions: - self.last_solution = pkgmgr.getpkg("GapfillingPkg").run_test_conditions( + self.last_solution = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).run_test_conditions( self.test_conditions, self.last_solution, self.test_condition_iteration_limit, @@ -130,24 +246,89 @@ def run_gapfilling( "no solution could be found that satisfied all specified test conditions in specified iterations!" ) return None + + # Running binary check to reduce solution to minimal reaction solution if binary_check: - self.last_solution = pkgmgr.getpkg( + self.last_solution = self.gfpkgmgr.getpkg( "GapfillingPkg" ).binary_check_gapfilling_solution() + # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target - self.last_solution["minobjective"] = minimum_obj + self.last_solution["minobjective"] = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).parameters["minimum_obj"] self.last_solution["binary_check"] = binary_check return self.last_solution - def integrate_gapfill_solution(self, solution, cumulative_solution=[]): + def run_multi_gapfill( + self, + media_list, + target=None, + minimum_objectives={}, + default_minimum_objective=None, + binary_check=False, + prefilter=True, + check_for_growth=True, + simultaneous_gapfilling=False, + ): + """Run gapfilling across an array of media conditions ultimately using different integration policies: simultaneous gapfilling, independent gapfilling, cumulative gapfilling + Parameters + ---------- + media_list : [MSMedia] + List of the medias in which the model should be gapfilled + target : string + Name or expression describing the reaction or combination of reactions to the optimized + minimum_objectives : {string - media ID : double - minimum objective value} + Media-specific minimal objective thresholds that the model must be gapfilled to achieve + default_minimum_objective : double + Default value to use for the minimal objective threshold that the model must be gapfilled to achieve + binary_check : bool + Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved + prefilter : bool + Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling + check_for_growth : bool + Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective + """ + + if not default_minimum_objective: + default_minimum_objective = self.default_minimum_objective + solution_dictionary = {} + if simultaneous_gapfilling: + for item in media_list: + pass + else: + first = True + for item in media_list: + minimum_obj = default_minimum_objective + if item in minimum_objectives: + minimum_obj = minimum_objectives[item] + if first: + solution_dictionary[item] = self.run_gapfilling( + item, + target, + minimum_obj, + binary_check, + prefilter, + check_for_growth, + ) + else: + solution_dictionary[item] = self.run_gapfilling( + item, None, minimum_obj, binary_check, False, check_for_growth + ) + false = False + return solution_dictionary + + def integrate_gapfill_solution( + self, solution, cumulative_solution=[], link_gaps_to_objective=True + ): """Integrating gapfilling solution into model Parameters ---------- solution : dict Specifies the reactions to be added to the model to implement the gapfilling solution - cumulation_solution : list + cumulative_solution : list Optional array to cumulatively track all reactions added to the model when integrating multiple solutions """ for rxn_id in solution["reversed"]: @@ -183,6 +364,8 @@ def integrate_gapfill_solution(self, solution, cumulative_solution=[]): cumulative_solution.append([rxn_id, "<"]) rxn.upper_bound = 0 rxn.lower_bound = -100 + + # Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model unneeded = self.mdlutl.test_solution( solution, keep_changes=True ) # Strips out unneeded reactions - which undoes some of what is done above @@ -191,84 +374,131 @@ def integrate_gapfill_solution(self, solution, cumulative_solution=[]): if item[0] == oitem[0] and item[1] == oitem[1]: cumulative_solution.remove(oitem) break + # Adding the gapfilling solution data to the model, which is needed for saving the model in KBase self.mdlutl.add_gapfilling(solution) + # Testing which gapfilled reactions are needed to produce each reactant in the objective function + if link_gaps_to_objective: + logger.info( + "Gapfilling sensitivity analysis running on succesful run in " + + solution["media"].id + + " for target " + + solution["target"] + ) + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if solution["media"].id not in gf_sensitivity: + gf_sensitivity[solution["media"].id] = {} + if solution["target"] not in gf_sensitivity[solution["media"].id]: + gf_sensitivity[solution["media"].id][solution["target"]] = {} + gf_sensitivity[solution["media"].id][solution["target"]][ + "success" + ] = self.mdlutl.find_unproducible_biomass_compounds( + solution["target"], cumulative_solution + ) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) - def link_gapfilling_to_biomass(self, target="bio1"): - def find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ): - objective = tempmodel.slim_optimize() - logger.debug("Obj:" + str(objective)) - with open("FlexBiomass2.lp", "w") as out: - out.write(str(tempmodel.solver)) - if objective > 0: - target_rxn.lower_bound = 0.1 - tempmodel.objective = min_flex_obj - solution = tempmodel.optimize() - with open("FlexBiomass3.lp", "w") as out: - out.write(str(tempmodel.solver)) - biocpds = [] - for reaction in tempmodel.reactions: - if ( - reaction.id[0:5] == "FLEX_" - and reaction.forward_variable.primal > Zero - ): - biocpds.append(reaction.id[5:]) - item.append(biocpds) - logger.debug(item[0] + ":" + ",".join(biocpds)) - tempmodel.objective = original_objective - target_rxn.lower_bound = 0 - - # Copying model before manipulating it - tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.mdlutl.model)) - # Getting target reaction and making sure it exists - target_rxn = tempmodel.reactions.get_by_id(target) - # Constraining objective to be greater than 0.1 - pkgmgr = MSPackageManager.get_pkg_mgr(tempmodel) - # Adding biomass flexibility - pkgmgr.getpkg("FlexibleBiomassPkg").build_package( - { - "bio_rxn_id": target, - "flex_coefficient": [0, 1], - "use_rna_class": None, - "use_dna_class": None, - "use_protein_class": None, - "use_energy_class": [0, 1], - "add_total_biomass_constraint": False, - } - ) - # Creating min flex objective - tempmodel.objective = target_rxn - original_objective = tempmodel.objective - min_flex_obj = tempmodel.problem.Objective(Zero, direction="min") - obj_coef = dict() - for reaction in tempmodel.reactions: - if reaction.id[0:5] == "FLEX_" or reaction.id[0:6] == "energy": - obj_coef[reaction.forward_variable] = 1 - # Temporarily setting flex objective so I can set coefficients - tempmodel.objective = min_flex_obj - min_flex_obj.set_linear_coefficients(obj_coef) - # Restoring biomass object - tempmodel.objective = original_objective - # Knocking out gapfilled reactions one at a time - for item in self.cumulative_gapfilling: - logger.debug("KO:" + item[0] + item[1]) - rxnobj = tempmodel.reactions.get_by_id(item[0]) - if item[1] == ">": - original_bound = rxnobj.upper_bound - rxnobj.upper_bound = 0 - find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ) - rxnobj.upper_bound = original_bound + def compute_reaction_weights_from_expression_data(self, omics_data, annoont): + """Computing reaction weights based on input gene-level omics data + Parameters + ---------- + omics_data : pandas dataframe with genes as rows and conditions as columns + Specifies the reactions to be added to the model to implement the gapfilling solution + annoont : annoont object + Contains reaction, feature id, ontologies, probabilities. Restructured into dataframe in function + Returns : + A dictionary with Rxns as the keys and calculated result as the value. + """ + + ### Restructure annoont into Dataframe + rows_list = [] + for reaction, genes in annoont.get_reaction_gene_hash().items(): + for gene, gene_info in genes.items(): + # Initialize the row with 'Gene' and 'Reactions' + row = {"Gene": gene, "Reactions": reaction} + # Loop through each evidence in the gene's evidence list + for evidence in gene_info["evidence"]: + # Construct column name from the event and ontology for uniqueness + column_name = f"{evidence['ontology']}" + if column_name in row: + row[column_name] = f"{row[column_name]}, {evidence['term']}" + else: + row[column_name] = evidence["term"] + rows_list.append(row) + restructured_anoot = pd.DataFrame(rows_list) + + ### Integrate Omics, set weights, find indexes for features + feature_ids_set = set(omics_data["feature_ids"]) + + # Find indices where 'Gene' values are in 'feature_ids' + # isin method returns a boolean series that is True where tbl_supAno['Gene'] is in feature_ids_set + mask = restructured_anoot["Gene"].isin(feature_ids_set) + # Get the indices of True values in the mask + idx_measuredGene = mask[mask].index.tolist() + # Calculate the dimensions for the measuredGeneScore array + num_genes = len(restructured_anoot["Gene"]) + num_columns = len(restructured_anoot.columns[2:]) + # Initialize the measuredGeneScore array with zeros + measuredGeneScore = np.zeros((num_genes, num_columns)) + measuredGeneScore[idx_measuredGene, :] = 1 + num_weights = len(restructured_anoot.columns[3:]) + w = np.repeat(1 / num_weights, num_weights) + + ### Calculate Weights and generate the reaction/weight hash + num_cols = len(restructured_anoot.columns[2:]) + w = np.full((num_cols, 1), 1 / num_cols) + p = np.zeros(len(restructured_anoot["Reactions"])) + # computed_weights is the rxn_hash ({rxn: weight, ...}) + computed_weights = {} + + # Precompute gene reaction lookups + gene_reaction_lookup = {} + for idx, row in restructured_anoot.iterrows(): + gene = row["Gene"] + reaction = row["Reactions"] + if gene in gene_reaction_lookup: + gene_reaction_lookup[gene].append(reaction) else: - original_bound = rxnobj.lower_bound - rxnobj.lower_bound = 0 - find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ) - rxnobj.lower_bound = original_bound + gene_reaction_lookup[gene] = [reaction] + + for rxn in range(0, len(restructured_anoot)): + substr_rxns = [rxn for rxn in restructured_anoot["Reactions"][[rxn]]] + # Get the indices of the rows where the condition is True + mask = restructured_anoot["Reactions"] == substr_rxns[0] + idx_gene = mask[mask].index + nAG = 0 + nMG = 0 + nCG = 0 + + if len(idx_gene) > 0: + # number of genes that map to a reaction + nAG = len(idx_gene) + for iGene in range(0, nAG): + subset = restructured_anoot.iloc[idx_gene[iGene], 2:].to_numpy() + # Checking for non-empty elements in the subset + non_empty_check = np.vectorize(lambda x: x is not None and x == x)( + subset + ) + # Finding the maximum value between the non-empty check and the corresponding row in measuredGeneScore + max_value = np.maximum( + non_empty_check, measuredGeneScore[idx_gene[iGene], :] + ) + # Multiplying by the weight and adding to nMG + nMG += max(sum((max_value * w))) + selected_gene = restructured_anoot["Gene"].iloc[idx_gene[iGene]] + + # Finding reactions associated with genes that contain the selected gene + associated_reactions = gene_reaction_lookup.get(selected_gene, []) + + # Checking if there are more than one unique reactions + if len(associated_reactions) > 1: + nCG += 1 + + p[rxn] = (nMG / nAG) * (1 / (1 + (nCG / nAG))) + + # Add item to output rxn hash dictionary + computed_weights[restructured_anoot.iloc[rxn, 0]] = p[rxn] + + return computed_weights @staticmethod def gapfill( diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 999e464d..c423427c 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -8,6 +8,24 @@ DEFAULT_SPLIT = " " +def to_fasta(features, filename, line_size=80, fn_header=None): + with open(filename, "w") as fh: + for feature in features: + if feature.seq: + h = f">{feature.id}\n" + if fn_header: + h = fn_header(feature) + fh.write(h) + _seq = feature.seq + lines = [ + _seq[i : i + line_size] + "\n" + for i in range(0, len(_seq), line_size) + ] + for line in lines: + fh.write(line) + return filename + + def normalize_role(s): s = s.strip().lower() s = re.sub(r"[\W_]+", "", s) @@ -15,8 +33,25 @@ def normalize_role(s): def read_fasta(f, split=DEFAULT_SPLIT, h_func=None): - with open(f, "r") as fh: - return parse_fasta_str(fh.read(), split, h_func) + if f.endswith(".gz"): + import gzip + + with gzip.open(f, "rb") as fh: + return parse_fasta_str(fh.read().decode("utf-8"), split, h_func) + else: + with open(f, "r") as fh: + return parse_fasta_str(fh.read(), split, h_func) + + +def read_fasta2(f, split=DEFAULT_SPLIT, h_func=None): + if f.endswith(".gz"): + import gzip + + with gzip.open(f, "rb") as fh: + return extract_features(fh.read().decode("utf-8"), split, h_func) + else: + with open(f, "r") as fh: + return extract_features(fh.read(), split, h_func) def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): @@ -47,8 +82,60 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): return features +def read_gbff_records_from_file(filename: str): + if filename.endswith(".gbff"): + with open(filename, "r") as fh: + return read_gbff_records(fh) + elif filename.endswith(".gz"): + import gzip + from io import StringIO + + with gzip.open(filename, "rb") as fh: + return read_gbff_records(StringIO(fh.read().decode("utf-8"))) + + +def read_gbff_records(handler): + from Bio import SeqIO + + gbff_records = [] + for record in SeqIO.parse(handler, "gb"): + gbff_records.append(record) + return gbff_records + + +def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None): + features = [] + active_seq = None + seq_lines = [] + for line in faa_str.split("\n"): + if line.startswith(">"): + if active_seq is not None: + active_seq.seq = "".join(seq_lines) + features.append(active_seq) + seq_lines = [] + seq_id = line[1:] + desc = None + if h_func: + seq_id, desc = h_func(seq_id) + elif split: + header_data = line[1:].split(split, 1) + seq_id = header_data[0] + if len(header_data) > 1: + desc = header_data[1] + active_seq = MSFeature(seq_id, "", desc) + else: + seq_lines.append(line.strip()) + + # add last sequence + if len(seq_lines) > 0: + active_seq.seq = "".join(seq_lines) + features.append(active_seq) + + return features + + class MSFeature: - def __init__(self, feature_id, sequence, description=None): + def __init__(self, feature_id, sequence, description=None, aliases=None): """ @param feature_id: identifier for the protein coding feature @@ -60,7 +147,7 @@ def __init__(self, feature_id, sequence, description=None): self.seq = sequence self.description = description # temporary replace with proper parsing self.ontology_terms = {} - self.aliases = [] + self.aliases = aliases def add_ontology_term(self, ontology_term, value): """ @@ -97,25 +184,52 @@ def add_features(self, feature_list: list): self.features += feature_list @staticmethod - def from_fasta( - filename, contigs=0, split="|", h_func=None - ): # !!! the contigs argument is never used + def from_fasta(filename, split=" ", h_func=None): genome = MSGenome() - genome.features += read_fasta(filename, split, h_func) + genome.features += read_fasta2(filename, split, h_func) + return genome + + @staticmethod + def from_gbff_sequence(filename): + gbff_records = read_gbff_records_from_file(filename) + genome = MSGenome() + features = [] + for rec in gbff_records: + feature = MSFeature(rec.id, str(rec.seq), description=rec.description) + features.append(feature) + genome.features += features + return genome + + @staticmethod + def from_gbff_features( + filename, feature_id_qualifier="protein_id", description_qualifier="product" + ): + gbff_records = read_gbff_records_from_file(filename) + genome = MSGenome() + features = [] + for rec in gbff_records: + for f in rec.features: + if f.type == "CDS": + translations = f.qualifiers.get("translation", []) + if len(translations) == 1: + feature_id = f.qualifiers.get(feature_id_qualifier, [None])[0] + description = f.qualifiers.get(description_qualifier, [None])[0] + if feature_id: + feature = MSFeature( + feature_id, translations[0], description=description + ) + features.append(feature) + else: + logger.warning( + f"skip feature: unable to fetch id from qualifier {feature_id_qualifier}" + ) + elif len(translations) > 1: + logger.warning(f"skip feature: with multiple sequences {f}") + genome.features += features return genome def to_fasta(self, filename, l=80, fn_header=None): - with open(filename, "w") as fh: - for feature in self.features: - h = f">{feature.id}\n" - if fn_header: - h = fn_header(feature) - fh.write(h) - lines = [ - feature.seq[i : i + l] + "\n" for i in range(0, len(feature.seq), l) - ] - for line in lines: - fh.write(line) + to_fasta(self.features, filename, l, fn_header) return filename @staticmethod @@ -137,3 +251,97 @@ def search_for_gene(self, query): return self.features.get_by_id(query) aliases = self.alias_hash() return aliases[query] if query in aliases else None + + def _repr_html_(self): + return f""" + + + + + + + + +
Memory address{f"{id(self):x}"}
Features{len(self.features)}
""" + + +class GenomeGff(MSGenome): + def __init__(self, contigs): + self.contigs = contigs + super().__init__() + + @staticmethod + def read_sequence(feature_id, gff_record, expected_sequence, contigs): + from Bio.Seq import Seq + from Bio import Align + + protein_seq_cds = expected_sequence + feature_contig = contigs.features.get_by_id(gff_record.contig_id) + seq = Seq(feature_contig.seq[gff_record.start - 1 : gff_record.end]) + if gff_record.strand == "-": + seq = seq.reverse_complement() + seq_from_dna = str(seq.translate()) + if len(seq_from_dna) > 0 and seq_from_dna[-1] == "*": + seq_from_dna = seq_from_dna[:-1] + if len(protein_seq_cds) > 0 and protein_seq_cds[-1] == "*": + protein_seq_cds = protein_seq_cds[:-1] + eq = protein_seq_cds == seq_from_dna + + score = None + if not eq and len(seq_from_dna) > 0: + try: + aligner = Align.PairwiseAligner() + res = aligner.align(protein_seq_cds, seq_from_dna) + score = res.score + except ValueError as ex: + print("error", gff_record) + raise ex + + feature = MSFeature(feature_id, protein_seq_cds) + feature.description = f"score: {score}" + feature.gff = gff_record + return feature + + @staticmethod + def from_fna_faa_gff( + filename_fna, filename_faa, filename_gff, _fn_get_id, prodigal=False + ): + genome_gff_features = _read_gff_features(filename_gff) + genome_faa = MSGenome.from_fasta(filename_faa) + contigs = MSGenome.from_fasta(filename_fna) + + feature_lookup = {} + if prodigal: + for feature in genome_faa.features: + attr = dict( + x.split("=") + for x in feature.description.split(" # ")[-1].split(";") + ) + if attr["ID"] not in feature_lookup: + feature_lookup[attr["ID"]] = feature + else: + raise ValueError("") + else: + feature_lookup = {feature.id: feature for feature in genome_faa.features} + + features = [] + for gff_record in genome_gff_features: + if gff_record.feature_type == "CDS": + feature_id = gff_record.attr.get("ID") + if _fn_get_id: + feature_id = _fn_get_id(gff_record) + + feature_cds = feature_lookup.get(feature_id) + + if feature_cds: + protein_seq_cds = feature_cds.seq + f = GenomeGff.read_sequence( + feature_id, gff_record, protein_seq_cds, contigs + ) + features.append(f) + else: + print(f"not found {feature_id}") + + genome = GenomeGff(contigs) + genome.features += features + return genome diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 6c30bb2a..75e356c4 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -9,6 +9,9 @@ from modelseedpy.core.msgapfill import MSGapfill logger = logging.getLogger(__name__) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSGrowthPhenotype: @@ -33,102 +36,221 @@ def __init__( self.additional_compounds = additional_compounds self.parent = parent - def build_media(self): + def build_media(self, include_base_media=True): + """Builds media object to use when simulating the phenotype + Parameters + ---------- + include_base_media : bool + Indicates whether to include the base media for the phenotype set in the formulation + """ cpd_hash = {} for cpd in self.additional_compounds: cpd_hash[cpd] = 100 full_media = MSMedia.from_dict(cpd_hash) - if self.media != None: + if self.media: full_media.merge(self.media, overwrite_overlap=False) - if self.parent != None and self.parent.base_media != None: - full_media.merge(parent.base_media, overwrite_overlap=False) + if include_base_media: + if self.parent and self.parent.base_media: + full_media.merge(self.parent.base_media, overwrite_overlap=False) return full_media def simulate( self, - modelutl, - growth_threshold=0.001, + model_or_mdlutl, + objective, + growth_multiplier=3, add_missing_exchanges=False, save_fluxes=False, pfba=False, ): - if not isinstance(modelutl, MSModelUtil): - modelutl = MSModelUtil(modelutl) - media = self.build_media() - output = {"growth": None, "class": None, "missing_transports": []} + """Simulates a single phenotype + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + save_fluxes : bool + Indicates if the fluxes should be saved and returned with the results + pfba : bool + Runs pFBA to compute fluxes after initially solving for growth + """ + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + + # Setting objective + if objective: + modelutl.model.objective = objective + + # Building full media and adding missing exchanges + output = { + "growth": None, + "class": None, + "missing_transports": [], + "baseline_growth": None, + } + full_media = self.build_media() if add_missing_exchanges: - output["missing_transports"] = modelutl.add_missing_exchanges(media) - pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model) - pkgmgr.getpkg("KBaseMediaPkg").build_package( - media, self.parent.base_uptake, self.parent.base_excretion - ) - for gene in self.gene_ko: - if gene in modelutl.model.genes: - geneobj = modelutl.model.genes.get_by_id(gene) - geneobj.knock_out() - solution = modelutl.model.optimize() - output["growth"] = solution.objective_value - if solution.objective_value > 0 and pfba: - solution = cobra.flux_analysis.pfba(modelutl.model) - if save_fluxes: - output["fluxes"] = solution.fluxes - if output["growth"] >= growth_threshold: - if self.growth > 0: + output["missing_transports"] = modelutl.add_missing_exchanges(full_media) + + # Getting basline growth + output["baseline_growth"] = 0.01 + if self.parent: + output["baseline_growth"] = self.parent.baseline_growth(modelutl, objective) + if output["baseline_growth"] < 1e-5: + output["baseline_growth"] = 0.01 + + # Building specific media and setting compound exception list + if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: + reaction_exceptions = [] + specific_media = self.build_media(False) + for mediacpd in specific_media.mediacompounds: + ex_hash = mediacpd.get_mdl_exchange_hash(modelutl) + for mdlcpd in ex_hash: + reaction_exceptions.append(ex_hash[mdlcpd]) + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package( + self.parent.atom_limits, exception_reactions=reaction_exceptions + ) + + # Applying media + if self.parent: + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + full_media, self.parent.base_uptake, self.parent.base_excretion + ) + else: + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(full_media, 0, 1000) + + with modelutl.model: + # Applying gene knockouts + for gene in self.gene_ko: + if gene in modelutl.model.genes: + geneobj = modelutl.model.genes.get_by_id(gene) + geneobj.knock_out() + + # Optimizing model + solution = modelutl.model.optimize() + output["growth"] = solution.objective_value + if solution.objective_value > 0 and pfba: + solution = cobra.flux_analysis.pfba(modelutl.model) + if save_fluxes: + output["fluxes"] = solution.fluxes + + # Determining phenotype class + + if output["growth"] >= output["baseline_growth"] * growth_multiplier: + output["GROWING"] = True + if not self.growth: + output["class"] = "GROWTH" + elif self.growth > 0: output["class"] = "CP" - else: + elif self.growth == 0: output["class"] = "FP" else: - if self.growth > 0: + output["GROWING"] = False + if not self.growth: + output["class"] = "NOGROWTH" + elif self.growth > 0: output["class"] = "FN" - else: + elif self.growth == 0: output["class"] = "CN" return output def gapfill_model_for_phenotype( self, - modelutl, - default_gapfill_templates, + msgapfill, + objective, test_conditions, - default_gapfill_models=[], - blacklist=[], - growth_threshold=0.001, + growth_multiplier=10, add_missing_exchanges=False, ): - if not isinstance(modelutl, MSModelUtil): - modelutl = MSModelUtil(modelutl) - self.gapfilling = MSGapfill( - modelutl.model, - default_gapfill_templates, - default_gapfill_models, - test_conditions, - modelutl.reaction_scores(), - blacklist, + """Gapfills the model to permit this single phenotype to be positive + Parameters + ---------- + msgapfill : MSGapfill + Fully configured gapfilling object + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + objective : string + Expression for objective to be activated by gapfilling + """ + # First simulate model without gapfilling to assess ungapfilled growth + output = self.simulate( + msgapfill.mdlutl, objective, growth_multiplier, add_missing_exchanges ) - media = self.build_media() - if add_missing_exchanges: - modelutl.add_missing_exchanges(media) - for gene in self.gene_ko: - if gene in modelutl.model.genes: - geneobj = modelutl.model.genes.get_by_id(gene) - geneobj.knock_out() - gfresults = self.gapfilling.run_gapfilling(media, None) - if gfresults is None: + if output["growth"] >= output["baseline_growth"] * growth_multiplier: + # No gapfilling needed - original model grows without gapfilling + return { + "reversed": {}, + "new": {}, + "media": self.build_media(), + "target": objective, + "minobjective": output["baseline_growth"] * growth_multiplier, + "binary_check": False, + } + + # Now pulling the gapfilling configured model from MSGapfill + gfmodelutl = MSModelUtil.get(msgapfill.gfmodel) + # Saving the gapfill objective because this will be replaced when the simulation runs + gfobj = gfmodelutl.model.objective + # Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints + output = self.simulate( + modelutl, objective, growth_multiplier, add_missing_exchanges + ) + # If the gapfilling model fails to achieve the minimum growth, then no solution exists + if output["growth"] < output["baseline_growth"] * growth_multiplier: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) - return self.gapfilling.integrate_gapfill_solution(gfresults) + return None + + # Running the gapfilling itself + full_media = self.build_media() + with modelutl.model: + # Applying gene knockouts + for gene in self.gene_ko: + if gene in modelutl.model.genes: + geneobj = modelutl.model.genes.get_by_id(gene) + geneobj.knock_out() + + gfresults = self.gapfilling.run_gapfilling( + media, None, minimum_obj=output["baseline_growth"] * growth_multiplier + ) + if gfresults is None: + logger.warning( + "Gapfilling failed with the specified model, media, and target reaction." + ) + + return gfresults class MSGrowthPhenotypes: - def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): + def __init__( + self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={} + ): self.base_media = base_media self.phenotypes = DictList() self.base_uptake = base_uptake self.base_excretion = base_excretion + self.atom_limits = global_atom_limits + self.baseline_growth_data = {} + self.cached_based_growth = {} @staticmethod - def from_compound_hash(compounds, base_media, base_uptake=0, base_excretion=1000): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion) + def from_compound_hash( + compounds, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) new_phenos = [] for cpd in compounds: newpheno = MSGrowthPhenotype(cpd, None, compounds[cpd], [], [cpd]) @@ -137,8 +259,17 @@ def from_compound_hash(compounds, base_media, base_uptake=0, base_excretion=1000 return growthpheno @staticmethod - def from_kbase_object(data, kbase_api): - growthpheno = MSGrowthPhenotypes(None, 0, 1000) + def from_kbase_object( + data, + kbase_api, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) new_phenos = [] for pheno in data["phenotypes"]: media = kbase_api.get_from_ws(pheno["media_ref"], None) @@ -156,9 +287,18 @@ def from_kbase_object(data, kbase_api): return growthpheno @staticmethod - def from_kbase_file(filename, kbase_api): + def from_kbase_file( + filename, + kbase_api, + base_media=None, + base_uptake=0, + base_excretion=1000, + global_atom_limits={}, + ): # TSV file with the following headers:media mediaws growth geneko addtlCpd - growthpheno = MSGrowthPhenotypes(base_media, 0, 1000) + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) headings = [] new_phenos = [] with open(filename) as f: @@ -190,8 +330,16 @@ def from_kbase_file(filename, kbase_api): return growthpheno @staticmethod - def from_ms_file(filename, basemedia, base_uptake=0, base_excretion=100): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion) + def from_ms_file( + filename, + base_media=None, + base_uptake=0, + base_excretion=100, + global_atom_limits={}, + ): + growthpheno = MSGrowthPhenotypes( + base_media, base_uptake, base_excretion, global_atom_limits + ) df = pd.read_csv(filename) required_headers = ["Compounds", "Growth"] for item in required_headers: @@ -211,6 +359,15 @@ def from_ms_file(filename, basemedia, base_uptake=0, base_excretion=100): growthpheno.add_phenotypes(new_phenos) return growthpheno + def build_super_media(self): + super_media = None + for pheno in self.phenotypes: + if not super_media: + super_media = pheno.build_media() + else: + super_media.merge(pheno.build_media(), overwrite_overlap=False) + return super_media + def add_phenotypes(self, new_phenotypes): keep_phenos = [] for pheno in new_phenotypes: @@ -220,21 +377,73 @@ def add_phenotypes(self, new_phenotypes): additions = DictList(keep_phenos) self.phenotypes += additions + def baseline_growth(self, model_or_mdlutl, objective): + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + """ + # Discerning input is model or mdlutl and setting internal links + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + # Checking if base growth already computed + if modelutl in self.cached_based_growth: + if objective in self.cached_based_growth[modelutl]: + return self.cached_based_growth[modelutl][objective] + else: + self.cached_based_growth[modelutl] = {} + # Setting objective + modelutl.objective = objective + # Setting media + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + self.base_media, self.base_uptake, self.base_excretion + ) + # Adding uptake limits + if len(self.atom_limits) > 0: + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.atom_limits) + # Simulating + self.cached_based_growth[modelutl][objective] = modelutl.model.slim_optimize() + return self.cached_based_growth[modelutl][objective] + def simulate_phenotypes( self, - model, - biomass, + model_or_mdlutl, + objective, + growth_multiplier=3, add_missing_exchanges=False, - correct_false_negatives=False, - template=None, - growth_threshold=0.001, save_fluxes=False, + gapfill_negatives=False, + msgapfill=None, + test_conditions=None, ): - model.objective = biomass - modelutl = MSModelUtil(model) + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_mdlutl : Model | MSModelUtl + Model to use to run the simulations + objective : string + Expression for objective to maximize in simulations + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + save_fluxes : bool + Indicates if the fluxes should be saved and returned with the results + """ + # Discerning input is model or mdlutl and setting internal links + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + # Setting objective + modelutl.objective = objective + # Getting basline growth + baseline_growth = self.baseline_growth(modelutl, objective) + # Establishing output of the simulation method summary = { - "Label": ["Accuracy", "CP", "CN", "FP", "FN"], - "Count": [0, 0, 0, 0, 0], + "Label": ["Accuracy", "CP", "CN", "FP", "FN", "Growth", "No growth"], + "Count": [0, 0, 0, 0, 0, 0, 0], } data = { "Phenotype": [], @@ -243,53 +452,164 @@ def simulate_phenotypes( "Class": [], "Transports missing": [], "Gapfilled reactions": [], + "Gapfilling score": None, } + # Running simulations + gapfilling_solutions = {} + totalcount = 0 for pheno in self.phenotypes: - with model: - result = pheno.simulate( - modelutl, growth_threshold, add_missing_exchanges, save_fluxes - ) # Result should have "growth" and "class" - if result["class"] == "FN" and correct_false_negatives: - pheno.gapfill_model_for_phenotype(modelutl, [template], None) - if pheno.gapfilling.last_solution != None: - list = [] - for rxn_id in pheno.gapfilling.last_solution["reversed"]: - list.append( - pheno.gapfilling.last_solution["reversed"][rxn_id] - + rxn_id - ) - for rxn_id in pheno.gapfilling.last_solution["new"]: - list.append( - pheno.gapfilling.last_solution["new"][rxn_id] + rxn_id - ) - data["Gapfilled reactions"].append(";".join(list)) - else: - data["Gapfilled reactions"].append(None) + result = pheno.simulate( + modelutl, + objective, + growth_multiplier, + add_missing_exchanges, + save_fluxes, + ) + data["Class"].append(result["class"]) + data["Phenotype"].append(pheno.id) + data["Observed growth"].append(pheno.growth) + data["Simulated growth"].append(result["growth"]) + data["Transports missing"].append(";".join(result["missing_transports"])) + if result["class"] == "CP": + summary["Count"][1] += 1 + summary["Count"][0] += 1 + totalcount += 1 + elif result["class"] == "CN": + summary["Count"][2] += 1 + summary["Count"][0] += 1 + totalcount += 1 + elif result["class"] == "FP": + summary["Count"][3] += 1 + totalcount += 1 + elif result["class"] == "FN": + summary["Count"][4] += 1 + totalcount += 1 + elif result["class"] == "GROWTH": + summary["Count"][5] += 1 + elif result["class"] == "NOGROWTH": + summary["Count"][6] += 1 + # Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH", "FN", "CN"]: + gapfilling_solutions[pheno] = pheno.gapfill_model_for_phenotype( + msgapfill, + objective, + test_conditions, + growth_multiplier, + add_missing_exchanges, + ) + if gapfilling_solutions[pheno] != None: + data["Gapfilling score"] = 0 + list = [] + for rxn_id in gapfilling_solutions[pheno]["reversed"]: + list.append( + gapfilling_solutions[pheno]["reversed"][rxn_id] + rxn_id + ) + data["Gapfilling score"] += 0.5 + for rxn_id in gapfilling_solutions[pheno]["new"]: + list.append(gapfilling_solutions[pheno]["new"][rxn_id] + rxn_id) + data["Gapfilling score"] += 1 + data["Gapfilled reactions"].append(";".join(list)) else: data["Gapfilled reactions"].append(None) - result = pheno.simulate( - modelutl, growth_threshold, add_missing_exchanges, save_fluxes - ) # Result should have "growth" and "class" - data["Class"].append(result["class"]) - data["Phenotype"].append(pheno.id) - data["Observed growth"].append(pheno.growth) - data["Simulated growth"].append(result["growth"]) - data["Transports missing"].append( - ";".join(result["missing_transports"]) - ) - if result["class"] == "CP": - summary["Count"][1] += 1 - summary["Count"][0] += 1 - if result["class"] == "CN": - summary["Count"][2] += 1 - summary["Count"][0] += 1 - if result["class"] == "FP": - summary["Count"][3] += 1 - if result["class"] == "FN": - summary["Count"][4] += 1 - - summary["Count"][0] = summary["Count"][0] / len(self.phenotypes) + else: + data["Gapfilled reactions"].append(None) + if totalcount == 0: + summary["Count"][0] = None + else: + summary["Count"][0] = summary["Count"][0] / totalcount sdf = pd.DataFrame(summary) df = pd.DataFrame(data) - logger.info(df) return {"details": df, "summary": sdf} + + def fit_model_to_phenotypes( + self, + msgapfill, + objective, + grow_multiplier, + correct_false_positives=False, + minimize_new_false_positives=True, + atp_safe=True, + integrate_results=True, + global_gapfilling=True, + ): + + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + msgapfill : MSGapfill + Gapfilling object used for the gapfilling process + correct_false_positives : bool + Indicates if false positives should be corrected + minimize_new_false_positives : bool + Indicates if new false positivies should be avoided + integrate_results : bool + Indicates if the resulting modifications to the model should be integrated + """ + + # Running simulations + positive_growth = [] + negative_growth = [] + for pheno in self.phenotypes: + with model: + result = pheno.simulate( + modelutl, + objective, + growth_multiplier, + add_missing_exchanges, + save_fluxes, + ) + # Gapfilling negative growth conditions + if gapfill_negatives and output["class"] in ["NOGROWTH", "FN", "CN"]: + negative_growth.append(pheno.build_media()) + elif gapfill_negatives and output["class"] in ["GROWTH", "FP", "CP"]: + positive_growth.append(pheno.build_media()) + + # Create super media for all + super_media = self.build_super_media() + # Adding missing exchanges + msgapfill.gfmodel.add_missing_exchanges(super_media) + # Adding elemental constraints + self.add_elemental_constraints() + # Getting ATP tests + + # Filtering database for ATP tests + + # Penalizing database to avoid creating false positives + + # Building additional tests from current correct negatives + + # Computing base-line growth + + # Computing growth threshold + + # Running global gapfill + + # Integrating solution + + def gapfill_all_phenotypes( + self, + model_or_mdlutl, + msgapfill=None, # Needed if the gapfilling object in model utl is not initialized + growth_threshold=None, + add_missing_exchanges=False, + ): + mdlutl = MSModelUtil.get(model_or_mdlutl) + # if msgapfill: + # mdlutl.gfutl = msgapfill + # if not mdlutl.gfutl: + # logger.critical( + # "Must either provide a gapfilling object or provide a model utl with an existing gapfilling object" + # ) + # media_list = [] + # for pheno in self.phenotypes: + # + # + # output = mdlutl.gfutl.run_multi_gapfill( + # media_list, + # default_minimum_objective=growth_threshold + # target=mdlutl.primary_biomass(), + # + # binary_check=False, + # prefilter=True, + # check_for_growth=True, + # ) diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index 488aad57..960e82d1 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList +from modelseedpy.core.msmodelutl import MSModelUtil logger = logging.getLogger(__name__) @@ -22,6 +23,18 @@ def minFlux(self): # TODO: will be removed later just for old methods return -self.upper_bound + def get_mdl_exchange_hash(self, model_or_mdlutl): + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + mets = modelutl.find_met(self.id) + output = {} + exchange_hash = modelutl.exchange_hash() + for met in mets: + if met in exchange_hash: + output[met] = exchange_hash[met] + return output + class MSMedia: def __init__(self, media_id, name=""): diff --git a/modelseedpy/core/msmodel.py b/modelseedpy/core/msmodel.py index 36bfdc7c..48c9f985 100644 --- a/modelseedpy/core/msmodel.py +++ b/modelseedpy/core/msmodel.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- import logging import re -from cobra.core import Model -from pyeda.inter import ( - expr, -) # wheels must be specially downloaded and installed for Windows https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda +from sympy.logic.inference import satisfiable +from sympy import Symbol +import sympy.logic.boolalg as spl +from cobra.core import Model, GPR + +# from pyeda.inter import expr logger = logging.getLogger(__name__) @@ -103,17 +105,35 @@ def get_cmp_token(compartments): return None -def get_set_set(expr_str): # !!! this currently returns dictionaries, not sets?? +def get_set_set_pyeda(expr_str: str, pyeda_expr): if len(expr_str.strip()) == 0: return {} expr_str = expr_str.replace(" or ", " | ") expr_str = expr_str.replace(" and ", " & ") - dnf = expr(expr_str).to_dnf() + dnf = pyeda_expr(expr_str).to_dnf() if len(dnf.inputs) == 1 or dnf.NAME == "And": return {frozenset({str(x) for x in dnf.inputs})} else: return {frozenset({str(x) for x in o.inputs}) for o in dnf.xs} - return {} + + +def get_set_set(expr_str: str): + if expr_str is None or len(expr_str.strip()) == 0: + return {} + gpr = GPR.from_string(expr_str) + expr = gpr.as_symbolic() + expr_model = list(satisfiable(expr, all_models=True)) + dnf = spl.SOPform(tuple(gpr.genes), list(expr_model)) + if type(dnf) == spl.And or type(dnf) == Symbol: + variable_set = set() + variable_set.add(frozenset({atom.name for atom in dnf.atoms()})) + return frozenset(variable_set) + elif type(dnf) == spl.Or: + return frozenset( + {frozenset({atom.name for atom in x.atoms()}) for x in dnf.args} + ) + else: + raise ValueError(f"unable to decode {expr_str} found token of type {type(dnf)}") class MSModel(Model): diff --git a/modelseedpy/core/msmodelreport.py b/modelseedpy/core/msmodelreport.py new file mode 100644 index 00000000..df5c34bb --- /dev/null +++ b/modelseedpy/core/msmodelreport.py @@ -0,0 +1,636 @@ +# -*- coding: utf-8 -*- +import pandas as pd +import logging +import os +import re +import jinja2 +from os.path import dirname +from pandas.io.formats.style import Styler +from modelseedpy.core.msmodelutl import MSModelUtil + +module_path = dirname(os.path.abspath(__file__)) + +logger = logging.getLogger(__name__) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + + +class MSModelReport: + def __init__(self, model_or_mdlutl): + if isinstance(model_or_mdlutl, MSModelUtil): + self.model = model_or_mdlutl.model + self.modelutl = model_or_mdlutl + else: + self.model = model_or_mdlutl + self.modelutl = MSModelUtil.get(model_or_mdlutl) + + def generate_reports(self, report_path, multi_tab_report_path): + self.build_report(report_path) + self.build_multitab_report(multi_tab_report_path) + + # Helper function to build overview data + def build_overview_data(self): + # Get the number of compartments + number_compartments = len( + set([metabolite.compartment for metabolite in self.model.metabolites]) + ) + + # Extract gapfilling information + core_gapfilling_media = [] + gapfilling_media = [] + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) + if gf_sensitivity: + for media in gf_sensitivity: + if ( + "bio1" in self.modelutl.attributes["gf_sensitivity"][media] + and "success" + in self.modelutl.attributes["gf_sensitivity"][media]["bio1"] + ): + gapfilling_media.append(media) + if ( + "rxn00062_c0" in self.modelutl.attributes["gf_sensitivity"][media] + and "success" + in self.modelutl.attributes["gf_sensitivity"][media]["rxn00062_c0"] + ): + core_gapfilling_media.append(media) + + # Count the number of gapfills + number_gapfills = len(gapfilling_media) + + # Convert the lists to strings + core_gapfilling_str = ( + "; ".join(core_gapfilling_media) + if core_gapfilling_media + else "No core gapfilling needed." + ) + gapfilling_media_str = ( + "; ".join(gapfilling_media) + if gapfilling_media + else "No genome-scale gapfilling." + ) + + overview = { + "Model ID": self.model.id, + "Full Gapfilling and ATP Analysis Report": "TBD", # You may replace 'TBD' with actual data when available + "Genome Scale Template": self.model.notes.get( + "kbase_template_refs", "Data Not Available" + ), + "Core Gapfilling Media": core_gapfilling_str, + "Gapfilling Media": gapfilling_media_str, + "Source Genome": self.model.notes.get( + "kbase_genome_ref", "Data Not Available" + ), + "Total Number of reactions": self.modelutl.nonexchange_reaction_count(), + "Number compounds": len(self.model.metabolites), + "Number compartments": number_compartments, + "Number biomass": len( + [ + rxn + for rxn in self.model.reactions + if rxn.annotation.get("sbo") == "SBO:0000629" + ] + ), + "Number gapfills": number_gapfills, + } + return overview + + # Helper function for extracting gapfilling data + def extract_gapfilling_data(self, gf_sensitivity): + if gf_sensitivity is None: + return [], {} + + gapfilling_dict = {} + gapfilling_summary = {} + + for media, media_data in gf_sensitivity.items(): + for target, target_data in media_data.items(): + gf_data = target_data.get("success", {}) + if isinstance(gf_data, dict): + for reaction_id, reaction_data in gf_data.items(): + for direction, metabolites in reaction_data.items(): + # If metabolites is None, set to empty string + if metabolites is None: + metabolites = "" + + # Extract both IDs and Names for Gapfilling Sensitivity + sensitivity_ids = [] + sensitivity_names = [] + if isinstance(metabolites, (list, tuple)): + for met_id in metabolites: + sensitivity_ids.append(met_id) + met_name = ( + self.model.metabolites.get_by_id(met_id).name + if met_id in self.model.metabolites + else met_id + ) + sensitivity_names.append(met_name) + else: + metabolites = str(metabolites) + entry = { + "reaction_id": reaction_id, + "reaction_name": self.model.reactions.get_by_id( + reaction_id + ).name + if reaction_id in self.model.reactions + else reaction_id, + "media": media, + "direction": direction, + "target": target, + "gapfilling_sensitivity_id": "; ".join(sensitivity_ids) + if sensitivity_ids + else metabolites, + "gapfilling_sensitivity_name": "; ".join( + sensitivity_names + ) + if sensitivity_names + else metabolites, + } + + # Update the summary dictionary + if reaction_id not in gapfilling_summary: + gapfilling_summary[reaction_id] = [] + gapfilling_summary[reaction_id].append( + f"{media}: {direction}" + ) + + # Check if reaction_id is already in dictionary + if reaction_id in gapfilling_dict: + # Update the media + existing_entry = gapfilling_dict[reaction_id] + existing_media = existing_entry["media"].split("; ") + if media not in existing_media: + existing_media.append(media) + existing_entry["media"] = "; ".join(existing_media) + else: + gapfilling_dict[reaction_id] = entry + + return list(gapfilling_dict.values()), gapfilling_summary + + # transform data to be used in tabular format to use in build_model_report + def transform_gapfilling_data(self, gapfilling_data): + transformed_data = [] + for entry in gapfilling_data: + row = [ + entry["reaction_id"], + entry["reaction_name"], + entry["media"], + entry["direction"], + entry["target"], + entry["gapfilling_sensitivity_id"], + entry["gapfilling_sensitivity_name"], + ] + transformed_data.append(row) + return transformed_data + + # Extract ATP analysis data + def extract_atp_analysis_data(self, atp_analysis, atp_expansion_filter): + entries = [] + if atp_analysis and "core_atp_gapfilling" in atp_analysis: + for media, data in atp_analysis["core_atp_gapfilling"].items(): + score = data.get("score", None) + new_reactions = [ + "{}: {}".format(k, v) for k, v in data.get("new", {}).items() + ] + reversed_reactions = [ + "{}: {}".format(k, v) for k, v in data.get("reversed", {}).items() + ] + atp_production = "Not integrated" + if ( + "selected_media" in atp_analysis + and media in atp_analysis["selected_media"] + ): + atp_production = atp_analysis["selected_media"][media] + + # Extracting the "Filtered Reactions" in the required format + filtered_reactions = [] + for k, v in atp_expansion_filter.get(media, {}).items(): + if isinstance(v, dict): + for sub_k, sub_v in v.items(): + if isinstance(sub_v, dict): + for reaction, direction_dict in sub_v.items(): + direction = list(direction_dict.keys())[0] + filtered_reactions.append( + f"{reaction}: {direction}" + ) + filtered_reactions_str = "; ".join(filtered_reactions) + + if score is not None: + entries.append( + { + "media": media, + "no_of_gapfilled_reactions": score, + "atp_production": atp_production, + "gapfilled_reactions": "; ".join(new_reactions), + "reversed_reaction_by_gapfilling": "; ".join( + reversed_reactions + ), + "filtered_reactions": filtered_reactions_str, + } + ) + # Sorting the entries based on the 'no_of_gapfilled_reactions' column + entries.sort(key=lambda x: x["no_of_gapfilled_reactions"]) + return entries + + # Extract ATP production data for the ATP Analysis tab + def extract_atp_production_data(self, atp_analysis): + atp_production_dict = {} + if atp_analysis: + selected_media = atp_analysis.get("selected_media", {}) + core_atp_gapfilling = atp_analysis.get("core_atp_gapfilling", {}) + + # First, process selected_media + for media, value in selected_media.items(): + atp_production_dict[media] = round(value, 2) + + # Next, process core_atp_gapfilling for media not in selected_media + for media, data in core_atp_gapfilling.items(): + if media not in atp_production_dict: + if data.get("failed"): + atp_production_dict[media] = "failed" + else: + # If the media was not processed in selected_media and it's not failed, set as 'Not Integrated' + atp_production_dict[media] = "Not Integrated" + + return atp_production_dict + + def build_multitab_report(self, output_path): + + # Build overview data + overview_data = self.build_overview_data() + + # Get gf_sensitivity attribute from the model + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) + + # Extract gapfilling data + gapfilling_entries, gapfilling_reaction_summary = self.extract_gapfilling_data( + gf_sensitivity + ) + + # Check if ATP_analysis attribute is present in the model + atp_analysis = self.modelutl.attributes.get("ATP_analysis", None) + if atp_analysis: + atp_expansion_filter = self.modelutl.attributes.get( + "atp_expansion_filter", {} + ) + atp_analysis_entries = self.extract_atp_analysis_data( + atp_analysis, atp_expansion_filter + ) + else: + atp_analysis_entries = [] + + # Initialize context dictionary + context = { + "overview": overview_data, + "reactions": [], + "compounds": [], + "genes": [], + "biomass": [], + "gapfilling": gapfilling_entries, # Populated with gapfilling data + "atpanalysis": atp_analysis_entries, # Populated with ATP analysis data + } + + print("Module Path:", module_path + "/../data/") + + exchanges = {r.id for r in self.model.exchanges} + + # Identify biomass reactions using SBO annotation + biomass_reactions_ids = { + rxn.id + for rxn in self.model.reactions + if rxn.annotation.get("sbo") == "SBO:0000629" + } + + # Reactions Tab + for rxn in self.model.reactions: + if rxn.id not in exchanges and rxn.id not in biomass_reactions_ids: + equation = rxn.build_reaction_string(use_metabolite_names=True) + rxn_data = { + "id": rxn.id, + "name": rxn.name, + "equation": equation, + "genes": rxn.gene_reaction_rule, + "gapfilling": "; ".join( + gapfilling_reaction_summary.get(rxn.id, []) + ), # Empty list results in an empty string + } + context["reactions"].append(rxn_data) + + # Compounds Tab + for cpd in self.model.metabolites: + cpd_data = { + "id": cpd.id, + "name": cpd.name, + "formula": cpd.formula, + "charge": cpd.charge, + "compartment": cpd.compartment, + } + context["compounds"].append(cpd_data) + + # Genes Tab + for gene in self.model.genes: + gene_data = { + "gene": gene.id, + "reactions": "; ".join([rxn.id for rxn in gene.reactions]), + } + context["genes"].append(gene_data) + + # Biomass Tab + if biomass_reactions_ids: + for biomass_rxn_id in biomass_reactions_ids: + biomass_rxn = self.model.reactions.get_by_id(biomass_rxn_id) + for metabolite, coefficient in biomass_rxn.metabolites.items(): + compound_id = metabolite.id + compound_name = metabolite.name.split("_")[0] + compartment = compound_id.split("_")[-1] + + biomass_data = { + "biomass_reaction_id": biomass_rxn.id, + "biomass_compound_id": compound_id, + "name": compound_name, + "coefficient": coefficient, + "compartment": compartment, + } + context["biomass"].append(biomass_data) + else: + print("No biomass reactions found in the model.") + + # Gapfilling Tab + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity) + context["gapfilling"] = gapfilling_entries + + # Extract ATP Production Data + atp_production_data = self.extract_atp_production_data(atp_analysis) + + # Populate the 'atpanalysis' context with ATP production data + for entry in context["atpanalysis"]: + media = entry["media"] + entry["atp_production"] = atp_production_data.get(media, None) + + # Diagnostics + unique_biomass_rxns = biomass_reactions_ids + print(f"Unique biomass reactions identified: {len(unique_biomass_rxns)}") + print(f"Biomass Reaction IDs: {', '.join(unique_biomass_rxns)}") + + print("\nFirst 2 reactions:") + for rxn in context["reactions"][:2]: + print(rxn) + + print("\nFirst 2 compounds:") + for cpd in context["compounds"][:2]: + print(cpd) + + print("\nFirst 2 genes:") + for gene in context["genes"][:2]: + print(gene) + + print("\nFirst 2 biomass compounds:") + for bm in context["biomass"][:2]: + print(bm) + + print("\nFirst 2 gapfilling entries:") + for gf in context["gapfilling"][:2]: + print(gf) + + print("\nFirst 2 ATP Analysis entries:") + for entry in context["atpanalysis"][:2]: + print(entry) + + # Render with template + env = jinja2.Environment( + loader=jinja2.FileSystemLoader(module_path + "/../data/"), + autoescape=jinja2.select_autoescape(["html", "xml"]), + ) + html = env.get_template("ModelReportTemplate.html").render(context) + directory = dirname(output_path) + os.makedirs(directory, exist_ok=True) + with open(output_path, "w") as f: + f.write(html) + + def build_report(self, output_path): + """Builds model HTML report for the Model Summary table + Parameters + ---------- + model : cobra.Model + Model to use to build the report + """ + + # 1. Utilize the build_overview_data method + model_summary_data = self.build_overview_data() + # Remove the unwanted entry + model_summary_data.pop("Full Gapfilling and ATP Analysis Report", None) + # 2. Transform the dictionary into a list of tuples + model_summary_list = [(key, value) for key, value in model_summary_data.items()] + # 3. Convert to DataFrame + model_summary_df = pd.DataFrame(model_summary_list, columns=["", ""]) + + # Style the DataFrame (as was done previously) + model_summary_df_styled = model_summary_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] + ) + + # Fetching the gapfilling sensitivity data + gf_sensitivity = self.modelutl.attributes.get("gf_sensitivity", None) + gapfilling_data = self.extract_gapfilling_data(gf_sensitivity) + gapfilling_list = self.transform_gapfilling_data(gapfilling_data[0]) + + # Convert the gapfilling_list to a DataFrame + gapfillings_analysis_df = pd.DataFrame( + gapfilling_list, + columns=[ + "Reaction ID", + "Reaction Name", + "Media", + "Direction", + "Target", + "Gapfilling Sensitivity ID", + "Gapfilling Sensitivity Name", + ], + ) + + # Apply style to Gapfillings Analysis DataFrame + gapfillings_analysis_df_styled = gapfillings_analysis_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] + ) + + # Legend for Gapfillings Analysis + annotations_text_gapfillings = """ +
    +
  • Reaction ID: The identifier of the reaction.
  • +
  • Reaction Name: The name of the reaction.
  • +
  • Media: The media used by gap filling.
  • +
  • Direction: The direction of the reaction. Can be ">" for forward, "<" for reverse, or "=" for both directions.
  • +
  • Target: The reaction selected as the objective function target for the gapfilling optimization problem. Targets here can be the model’s biomass reaction, commonly named “bio1” for models created by this app. + Alternatively, “rxn00062” (ATP Production) reaction is shown for cases where gapfilling was applied to guarantee ATP production in a given media. + When reactions are gapfilled for ATP production, we recommend checking the full Core ATP Analysis in the table below.
  • +
  • Gapfilling Sensitivity ID and Name: Gapfilling is necessary when compounds in the biomass objective function can not be produced by the model. + For each reaction we list the biomass compound(s) that can not be synthesized by the model without gapfilling. + In cases where gap filling fails there are two possible scenarios: + 1) FBF (failed before filtering) : the gapfilling immediately failed, even before we filtered out the ATP breaking reactions. This means this objective CANNOT be satisfied with the entire current database. + 2) FAF (failed after filtering): the gapfilling succeeded before filtering, but failed after filtering out reactions that break ATP. This tells you definitively if the ATP filtering caused the gapfilling to fail
  • +
+ """ + + # Extract ATP analysis data + atp_analysis = self.modelutl.attributes.get("ATP_analysis", None) + atp_expansion_filter = self.modelutl.attributes.get("atp_expansion_filter", {}) + atp_analysis_entries = self.extract_atp_analysis_data( + atp_analysis, atp_expansion_filter + ) + + # Convert the atp_analysis_entries list to a DataFrame + atp_analysis_df = pd.DataFrame(atp_analysis_entries) + + # Apply style to ATP Analysis DataFrame + atp_analysis_df_styled = atp_analysis_df.style.hide( + axis="index" + ).set_table_styles( + [ + { + "selector": "th", + "props": [ + ("border", "none"), + ("background-color", "white"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "td", + "props": [ + ("border", "none"), + ("font-family", "Oxygen"), + ("font-size", "14px"), + ("line-height", "20px"), + ], + }, + { + "selector": "tr:nth-child(even)", + "props": [("background-color", "white")], + }, + { + "selector": "tr:nth-child(odd)", + "props": [("background-color", "#f2f2f2")], + }, + ] + ) + + # Legend for ATP Analysis + annotations_text_atp_analysis = """ +
    +
  • No. of gapfilled reactions: The number of reactions filled by the gapfilling process.
  • +
  • Media: The media in which the reaction takes place.
  • +
  • ATP Production: ATP production by the core metabolism model.
  • +
  • Gapfilled Reactions: Reactions added during the gapfilling process.
  • +
  • Reversed Reaction by Gapfilling: Reactions that have been reversed during the gapfilling process.
  • +
  • Filtered Reactions: Reactions that have been filtered out during the analysis. When a reaction addition would lead to a large increase in ATP production or an infinite energy loop, we filter that reaction out of the gapfilling database and prevent it from being added to the model.
  • +
+ """ + + # ATP analysis explanation text + explanation_text_atp_analysis = """ +

During model reconstruction, we analyze the genome’s core metabolism draft model (model without gapfilling) to assess energy biosynthesis capabilities. + The goal of this analysis is to ensure the core metabolism model is able to produce ATP before we expand the model to the genome-scale. + This step is designed to prevent gapfilling from introducing reactions that create energy-generating loops. + The tests are conducted on a large collection of minimal conditions, with the goal of simulating the model’s capability to produce energy with different electron donor, electron acceptor, and carbon source combinations.

+

When the draft model of the core metabolism is capable of producing ATP in at least one of the test media, no gapfilling reactions part of this analysis will be added to the model. While we still report the gapfilling requirements for the test media formulations that fail to produce ATP with that draft core model, we only integrate these solutions in the model when no test media succeeds in producing ATP. + In this case, the integrated gap-filling solution(s) will be displayed in the “Gapfilling Analysis” table above, with the “Target” “rxn00062” (ATP Production) objective function.

+

The goal is to display the test results for all media to provide clues for the metabolic capabilities of the genome(s). When many reactions are required for growth on the SO4 testing media conditions, this could be a good indicator that the organism is not capable of performing sulfate reduction. + On the other hand, when only one gapfill reaction is required for ATP production in a given media, multiple scenarios can be considered. + 1) Organism(s) can’t grow on test condition, and we correctly did not add the reaction to the model. 2) Possible issue with the source genome annotation missing a specific gene function 3) Possible issue with the model reconstruction database. We hope this data helps make more informed decisions on reactions that may need to be manually curated in the model. + In cases where is known from the literature or unpublished experimental results that an organism is capable of producing ATP in a given media condition that requires gapfilling in this analysis, you can use the parameter “Force ATP media” in the reconstruction app to ensure those reactions are integrated into the model. + .

+ """ + + # Save the data to HTML with the styled DataFrames and the legends + directory = os.path.dirname(output_path) + os.makedirs(directory, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + f.write('') + f.write("

Model Summary

") + f.write(model_summary_df_styled.render(escape=False)) + f.write("

") + f.write("

Gapfillings Analysis

") + + # Check for Gapfillings Analysis data + if not gapfillings_analysis_df.empty: + f.write(gapfillings_analysis_df_styled.render(escape=False)) + f.write(f"

Legend:

{annotations_text_gapfillings}") + else: + f.write( + "

Warning: No Gapfillings Analysis data available for this model.

" + ) + + f.write("

Core ATP Analysis

") + + # Check for ATP Analysis data + if not atp_analysis_df.empty: + f.write(atp_analysis_df_styled.render(escape=False)) + f.write(f"

Legend:

{annotations_text_atp_analysis}") + f.write(explanation_text_atp_analysis) + else: + f.write( + "

Warning: No Core ATP Analysis data available for this model.

" + ) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index d4494938..a8c836ee 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -4,18 +4,21 @@ import time import json import sys +import pandas as pd +import cobra from cobra import Model, Reaction, Metabolite +from optlang.symbolics import Zero from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem from modelseedpy.core.fbahelper import FBAHelper +from multiprocessing import Value + +# from builtins import None logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -# handler = logging.StreamHandler(sys.stdout) -# handler.setLevel(logging.DEBUG) -# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -# handler.setFormatter(formatter) -# logger.addHandler(handler) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSModelUtil: @@ -85,6 +88,8 @@ def search_name(name): @staticmethod def get(model, create_if_missing=True): + if isinstance(model, MSModelUtil): + return model if model in MSModelUtil.mdlutls: return MSModelUtil.mdlutls[model] elif create_if_missing: @@ -96,6 +101,7 @@ def get(model, create_if_missing=True): def __init__(self, model): self.model = model self.pkgmgr = MSPackageManager.get_pkg_mgr(model) + self.wsid = None self.atputl = None self.gfutl = None self.metabolite_hash = None @@ -104,6 +110,16 @@ def __init__(self, model): self.reaction_scores = None self.score = None self.integrated_gapfillings = [] + self.attributes = {} + if hasattr(self.model, "computed_attributes"): + if self.model.computed_attributes: + self.attributes = self.model.computed_attributes + if "pathways" not in self.attributes: + self.attributes["pathways"] = {} + if "auxotrophy" not in self.attributes: + self.attributes["auxotrophy"] = {} + if "fbas" not in self.attributes: + self.attributes["fbas"] = {} def compute_automated_reaction_scores(self): """ @@ -269,6 +285,24 @@ def reaction_scores(self): ################################################################################# # Functions related to editing the model ################################################################################# + def get_attributes(self, key=None, default=None): + if not key: + return self.attributes + if key not in self.attributes: + self.attributes[key] = default + return self.attributes[key] + + def save_attributes(self, value=None, key=None): + if value: + if key: + self.attributes[key] = value + else: + self.attributes = value + if hasattr(self.model, "computed_attributes"): + logger.info("Setting FBAModel computed_attributes to mdlutl attributes") + self.attributes["gene_count"] = len(self.model.genes) + self.model.computed_attributes = self.attributes + def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): modelseed = ModelSEEDBiochem.get() output = [] @@ -307,6 +341,30 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): self.model.add_reactions(output) return output + ################################################################################# + # Functions related to utility functions + ################################################################################# + def build_model_data_hash(self): + data = { + "Model": self.id, + "Genome": self.genome.info.metadata["Name"], + "Genes": self.genome.info.metadata["Number of Protein Encoding Genes"], + } + return data + + def compare_reactions(self, reaction_list, filename): + data = {} + for rxn in reaction_list: + for met in rxn.metabolites: + if met.id not in data: + data[met.id] = {} + for other_rxn in reaction_list: + data[met.id][other_rxn.id] = 0 + data[met.id][rxn.id] = rxn.metabolites[met] + df = pd.DataFrame(data) + df = df.transpose() + df.to_csv(filename) + ################################################################################# # Functions related to managing biomass reactions ################################################################################# @@ -482,6 +540,7 @@ def convert_cobra_reaction_to_kbreaction( def test_solution(self, solution, keep_changes=False): unneeded = [] + removed_rxns = [] tempmodel = self.model if not keep_changes: tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) @@ -499,7 +558,7 @@ def test_solution(self, solution, keep_changes=False): rxnobj.upper_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " needed:" @@ -509,8 +568,9 @@ def test_solution(self, solution, keep_changes=False): ) rxnobj.upper_bound = original_bound else: + removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " not needed:" @@ -521,7 +581,7 @@ def test_solution(self, solution, keep_changes=False): rxnobj.lower_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " needed:" @@ -531,14 +591,16 @@ def test_solution(self, solution, keep_changes=False): ) rxnobj.lower_bound = original_bound else: + removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " not needed:" + str(objective) ) if keep_changes: + tempmodel.remove_reactions(removed_rxns) for items in unneeded: del solution[items[2]][items[0]] return unneeded @@ -656,6 +718,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model is None: model = self.model if apply_condition: + print("applying - bad") self.apply_test_condition(condition, model) new_objective = model.slim_optimize() value = new_objective @@ -675,7 +738,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model.solver.status != "optimal": self.printlp(condition["media"].id + "-Testing-Infeasible.lp") logger.critical( - ondition["media"].id + condition["media"].id + "testing leads to infeasible problem. LP file printed to debug!" ) return False @@ -837,7 +900,11 @@ def binary_expansion_test(self, reaction_list, condition, currmodel, depth=0): return filtered_list def reaction_expansion_test( - self, reaction_list, condition_list, binary_search=True + self, + reaction_list, + condition_list, + binary_search=True, + attribute_label="gf_filter", ): """Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail @@ -856,12 +923,10 @@ def reaction_expansion_test( Raises ------ """ - logger.debug("Expansion started!") + logger.debug(f"Expansion started! Binary = {binary_search}") filtered_list = [] for condition in condition_list: - logger.debug(f"testing condition {condition}") - currmodel = self.model tic = time.perf_counter() new_filtered = [] @@ -872,13 +937,15 @@ def reaction_expansion_test( reaction_list, condition, currmodel ) for item in new_filtered: - filtered_list.append(item) + if item not in filtered_list: + filtered_list.append(item) else: new_filtered = self.linear_expansion_test( reaction_list, condition, currmodel ) for item in new_filtered: - filtered_list.append(item) + if item not in filtered_list: + filtered_list.append(item) # Restoring knockout of newly filtered reactions, which expire after exiting the "with" block above for item in new_filtered: if item[1] == ">": @@ -886,17 +953,157 @@ def reaction_expansion_test( else: item[0].lower_bound = 0 toc = time.perf_counter() - logger.debug( + logger.info( "Expansion time:" + condition["media"].id + ":" + str((toc - tic)) ) - logger.debug( + logger.info( "Filtered count:" + str(len(filtered_list)) + " out of " + str(len(reaction_list)) ) + # Adding filter results to attributes + gf_filter_att = self.get_attributes(attribute_label, {}) + if condition["media"].id not in gf_filter_att: + gf_filter_att[condition["media"].id] = {} + if condition["objective"] not in gf_filter_att[condition["media"].id]: + gf_filter_att[condition["media"].id][condition["objective"]] = {} + if ( + condition["threshold"] + not in gf_filter_att[condition["media"].id][condition["objective"]] + ): + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ] = {} + for item in new_filtered: + if ( + item[0].id + not in gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ] + ): + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id] = {} + if ( + item[1] + not in gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id] + ): + if len(item) < 3: + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id][item[1]] = None + else: + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id][item[1]] = item[2] return filtered_list + ################################################################################# + # Functions related to biomass sensitivity analysis + ################################################################################# + def find_unproducible_biomass_compounds(self, target_rxn="bio1", ko_list=None): + # Cloning the model because we don't want to modify the original model with this analysis + tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + # Getting target reaction and making sure it exists + if target_rxn not in tempmodel.reactions: + logger.critical(target_rxn + " not in model!") + return None + target_rxn_obj = tempmodel.reactions.get_by_id(target_rxn) + tempmodel.objective = target_rxn + original_objective = tempmodel.objective + pkgmgr = MSPackageManager.get_pkg_mgr(tempmodel) + rxn_list = [target_rxn, "rxn05294_c0", "rxn05295_c0", "rxn05296_c0"] + for rxn in rxn_list: + if rxn in tempmodel.reactions: + pkgmgr.getpkg("FlexibleBiomassPkg").build_package( + { + "bio_rxn_id": rxn, + "flex_coefficient": [0, 1], + "use_rna_class": None, + "use_dna_class": None, + "use_protein_class": None, + "use_energy_class": [0, 1], + "add_total_biomass_constraint": False, + } + ) + + # Creating min flex objective + min_flex_obj = tempmodel.problem.Objective(Zero, direction="min") + obj_coef = dict() + for reaction in tempmodel.reactions: + if reaction.id[0:5] == "FLEX_" or reaction.id[0:6] == "energy": + obj_coef[reaction.forward_variable] = 1 + obj_coef[reaction.reverse_variable] = 1 + # Temporarily setting flex objective so I can set coefficients + tempmodel.objective = min_flex_obj + min_flex_obj.set_linear_coefficients(obj_coef) + if not ko_list: + return self.run_biomass_dependency_test( + target_rxn_obj, tempmodel, original_objective, min_flex_obj, rxn_list + ) + else: + output = {} + for item in ko_list: + logger.debug("KO:" + item[0] + item[1]) + if item[0] not in output: + output[item[0]] = {} + if item[0] in tempmodel.reactions: + rxnobj = tempmodel.reactions.get_by_id(item[0]) + if item[1] == ">": + original_bound = rxnobj.upper_bound + rxnobj.upper_bound = 0 + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.upper_bound = original_bound + else: + original_bound = rxnobj.lower_bound + rxnobj.lower_bound = 0 + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.lower_bound = original_bound + else: + output[item[0]][item[1]] = [] + return output + + def run_biomass_dependency_test( + self, target_rxn, tempmodel, original_objective, min_flex_obj, rxn_list + ): + tempmodel.objective = original_objective + objective = tempmodel.slim_optimize() + if objective > 0: + target_rxn.lower_bound = 0.1 + tempmodel.objective = min_flex_obj + solution = tempmodel.optimize() + biocpds = [] + for reaction in tempmodel.reactions: + if reaction.id[0:5] == "FLEX_" and ( + reaction.forward_variable.primal > Zero + or reaction.reverse_variable.primal > Zero + ): + logger.debug("Depends on:" + reaction.id) + label = reaction.id[5:] + for item in rxn_list: + if label[0 : len(item)] == item: + biocpds.append(label[len(item) + 1 :]) + target_rxn.lower_bound = 0 + return biocpds + else: + logger.debug("Cannot grow") + return None + def add_atp_hydrolysis(self, compartment): # Searching for ATP hydrolysis compounds coefs = { diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index b0d384eb..5cce5927 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -143,7 +143,7 @@ class MSTemplateSpecies(Metabolite): def __init__( self, comp_cpd_id: str, - charge: int, + charge: float, compartment: str, cpd_id, max_uptake=0, @@ -160,20 +160,30 @@ def __init__( self.cpd_id ) - def to_metabolite(self, index="0"): + def to_metabolite(self, index="0", force=False): """ Create cobra.core.Metabolite instance :param index: compartment index + :@param force: force index :return: cobra.core.Metabolite """ if index is None: index = "" + index = str(index) + + if self.compartment == "e" and index.isnumeric(): + if force: + logger.warning( + f"Forcing numeric index [{index}] to extra cellular compartment not advised" + ) + else: + index = "0" + cpd_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" - name = f"{self.name}" - if len(str(index)) > 0: - name = f"{self.name} [{compartment}]" + name = f"{self.compound.name} [{compartment}]" metabolite = Metabolite(cpd_id, self.formula, name, self.charge, compartment) + metabolite.notes["modelseed_template_id"] = self.id return metabolite @property @@ -183,8 +193,8 @@ def compound(self): @property def name(self): if self._template_compound: - return self._template_compound.name - return "" + return f"{self._template_compound.name} [{self.compartment}]" + return f"{self.id} [{self.compartment}]" @name.setter def name(self, value): @@ -293,15 +303,17 @@ def compartment(self): def to_reaction(self, model=None, index="0"): if index is None: index = "" + index = str(index) rxn_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" name = f"{self.name}" metabolites = {} for m, v in self.metabolites.items(): - if model and m.id in model.metabolites: - metabolites[model.metabolites.get_by_id(m.id)] = v + _metabolite = m.to_metabolite(index) + if _metabolite.id in model.metabolites: + metabolites[model.metabolites.get_by_id(_metabolite.id)] = v else: - metabolites[m.to_metabolite(index)] = v + metabolites[_metabolite] = v if len(str(index)) > 0: name = f"{self.name} [{compartment}]" @@ -426,7 +438,7 @@ def get_data(self): map(lambda x: "~/complexes/id/" + x.id, self.complexes) ), # 'status': self.status, - "type": self.type, + "type": self.type if type(self.type) is str else self.type.value, } # def build_reaction_string(self, use_metabolite_names=False, use_compartment_names=None): @@ -508,21 +520,23 @@ def get_data(self): class MSTemplateBiomass: def __init__( self, - bio_id, - name, - type, - dna, - rna, - protein, - lipid, - cellwall, - cofactor, - energy, - other, + biomass_id: str, + name: str, + type: str, + dna: float = 0, + rna: float = 0, + protein: float = 0, + lipid: float = 0, + cellwall: float = 0, + cofactor: float = 0, + pigment: float = 0, + carbohydrate: float = 0, + energy: float = 0, + other: float = 0, ): """ - :param bio_id:string + :param biomass_id:string :param name:string :param type:string :param dna:float @@ -531,10 +545,12 @@ def __init__( :param lipid:float :param cellwall:float :param cofactor:float + :param pigment:float + :param carbohydrate:float :param energy:float :param other:float """ - self.id = bio_id + self.id = biomass_id self.name = name self.type = type self.dna = dna @@ -543,6 +559,8 @@ def __init__( self.lipid = lipid self.cellwall = cellwall self.cofactor = cofactor + self.pigment = pigment + self.carbohydrate = carbohydrate self.energy = energy self.other = other self.templateBiomassComponents = DictList() @@ -561,6 +579,8 @@ def from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ): @@ -574,15 +594,21 @@ def from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ) if isinstance(filename_or_df, str): filename_or_df = pd.read_table(filename_or_df) for index, row in filename_or_df.iterrows(): + if "biomass_id" not in row: + row["biomass_id"] = "bio1" if row["biomass_id"] == bio_id: + if "compartment" not in row: + row["compartment"] = "c" metabolite = template.compcompounds.get_by_id( - row["id"] + "_" + row["compartment"] + f'{row["id"]}_{row["compartment"].lower()}' ) linked_mets = {} if ( @@ -593,14 +619,14 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - sub_array[0] + "_" + row["compartment"] + f'{sub_array[0]}_{row["compartment"].lower()}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( metabolite, - row["class"], - row["coefficient"], - row["coefficient_type"], + row["class"].lower(), + float(row["coefficient"]), + row["coefficient_type"].upper(), linked_mets, ) return self @@ -611,14 +637,16 @@ def from_dict(d, template): d["id"], d["name"], d["type"], - d["dna"], - d["rna"], - d["protein"], - d["lipid"], - d["cellwall"], - d["cofactor"], - d["energy"], - d["other"], + d.get("dna", 0), + d.get("rna", 0), + d.get("protein", 0), + d.get("lipid", 0), + d.get("cellwall", 0), + d.get("cofactor", 0), + d.get("pigment", 0), + d.get("carbohydrate", 0), + d.get("energy", 0), + d.get("other", 0), ) for item in d["templateBiomassComponents"]: biocomp = MSTemplateBiomassComponent.from_dict(item, template) @@ -645,7 +673,7 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): return model.metabolites.get_by_id(fullid) if tempid in self._template.compcompounds: met = self._template.compcompounds.get_by_id(tempid).to_metabolite(index) - model.metabolites.add(met) + model.add_metabolites([met]) return met logger.error( "Could not find biomass metabolite [%s] in model or template!", @@ -653,25 +681,28 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): ) def get_or_create_reaction(self, model, baseid, compartment=None, index=None): + logger.debug(f"{baseid}, {compartment}, {index}") fullid = baseid if compartment: fullid += "_" + compartment tempid = fullid if index: fullid += index - if fullid in model.metabolites: + if fullid in model.reactions: return model.reactions.get_by_id(fullid) if tempid in self._template.reactions: rxn = self._template.reactions.get_by_id(tempid).to_reaction(model, index) - model.reactions.add(rxn) + model.add_reactions([rxn]) return rxn newrxn = Reaction(fullid, fullid, "biomasses", 0, 1000) - model.reactions.add(newrxn) + model.add_reactions(newrxn) return newrxn - def build_biomass(self, model, index="0", classic=False, GC=0.5): + def build_biomass(self, model, index="0", classic=False, GC=0.5, add_to_model=True): types = [ "cofactor", + "pigment", + "carbohydrate", "lipid", "cellwall", "protein", @@ -682,6 +713,8 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): ] type_abundances = { "cofactor": self.cofactor, + "pigment": self.pigment, + "carbohydrate": self.carbohydrate, "lipid": self.lipid, "cellwall": self.cellwall, "protein": self.protein, @@ -693,44 +726,90 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): metabolites = {} biorxn = Reaction(self.id, self.name, "biomasses", 0, 1000) # Adding standard compounds for DNA, RNA, protein, and biomass - if not classic and self.type == "growth": - met = self.get_or_create_metabolite(model, "cpd11416", "c", index) - metabolites[met] = 1 specific_reactions = {"dna": None, "rna": None, "protein": None} + exclusions = {"cpd17041_c": 1, "cpd17042_c": 1, "cpd17043_c": 1} if not classic and self.dna > 0: met = self.get_or_create_metabolite(model, "cpd11461", "c", index) specific_reactions["dna"] = self.get_or_create_reaction( model, "rxn05294", "c", index ) + specific_reactions["dna"].name = "DNA synthesis" + if "rxn13783_c" + index in model.reactions: + specific_reactions[ + "dna" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13783_c" + index + ).gene_reaction_rule + specific_reactions["dna"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13783_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13783_c" + index)] + ) specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) - specific_reactions["dna"].metabolites[met] = 1 + specific_reactions["dna"].add_metabolites({met: 1}) + metabolites[met] = 1 metabolites[met] = -1 * self.dna if not classic and self.protein > 0: met = self.get_or_create_metabolite(model, "cpd11463", "c", index) specific_reactions["protein"] = self.get_or_create_reaction( model, "rxn05296", "c", index ) + specific_reactions["protein"].name = "Protein synthesis" + if "rxn13782_c" + index in model.reactions: + specific_reactions[ + "protein" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13782_c" + index + ).gene_reaction_rule + specific_reactions["protein"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13782_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13782_c" + index)] + ) specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) - specific_reactions["protein"].metabolites[met] = 1 + specific_reactions["protein"].add_metabolites({met: 1}) metabolites[met] = -1 * self.protein if not classic and self.rna > 0: met = self.get_or_create_metabolite(model, "cpd11462", "c", index) specific_reactions["rna"] = self.get_or_create_reaction( model, "rxn05295", "c", index ) + specific_reactions["rna"].name = "mRNA synthesis" + if "rxn13784_c" + index in model.reactions: + specific_reactions[ + "rna" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13784_c" + index + ).gene_reaction_rule + specific_reactions["rna"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13784_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13784_c" + index)] + ) specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) - specific_reactions["rna"].metabolites[met] = 1 + specific_reactions["rna"].add_metabolites({met: 1}) metabolites[met] = -1 * self.rna bio_type_hash = {} for type in types: for comp in self.templateBiomassComponents: - if type == comp.comp_class: + if comp.metabolite.id in exclusions and not classic: + pass + elif type == comp.comp_class: met = self.get_or_create_metabolite( model, comp.metabolite.id, None, index ) @@ -757,13 +836,15 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): coef = comp.coefficient elif comp.coefficient_type == "AT": coef = ( - comp.coefficient + 2 + * comp.coefficient * (1 - GC) * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) elif comp.coefficient_type == "GC": coef = ( - comp.coefficient + 2 + * comp.coefficient * GC * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) @@ -776,10 +857,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): metabolites[met] = coef elif not classic: coef = coef / type_abundances[type] - if met in metabolites: - specific_reactions[type].metabolites[met] += coef - else: - specific_reactions[type].metabolites[met] = coef + specific_reactions[type].add_metabolites({met: coef}) for l_met in comp.linked_metabolites: met = self.get_or_create_metabolite( model, l_met.id, None, index @@ -792,16 +870,15 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): else: metabolites[met] = coef * comp.linked_metabolites[l_met] elif not classic: - if met in metabolites: - specific_reactions[type].metabolites[met] += ( - coef * comp.linked_metabolites[l_met] - ) - else: - specific_reactions[type].metabolites[met] = ( - coef * comp.linked_metabolites[l_met] - ) + specific_reactions[type].add_metabolites( + {met: coef * comp.linked_metabolites[l_met]} + ) biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" biorxn.add_metabolites(metabolites) + if add_to_model: + if biorxn.id in model.reactions: + model.remove_reactions([biorxn.id]) + model.add_reactions([biorxn]) return biorxn def get_data(self): @@ -815,6 +892,8 @@ def get_data(self): "lipid": self.lipid, "cellwall": self.cellwall, "cofactor": self.cofactor, + "pigment": self.pigment, + "carbohydrate": self.carbohydrate, "energy": self.energy, "other": self.other, "templateBiomassComponents": [], @@ -822,6 +901,8 @@ def get_data(self): for comp in self.templateBiomassComponents: data["templateBiomassComponents"].append(comp.get_data()) + return data + class NewModelTemplateRole: def __init__(self, role_id, name, features=None, source="", aliases=None): @@ -1059,6 +1140,8 @@ def overwrite_biomass_from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ): @@ -1076,6 +1159,8 @@ def overwrite_biomass_from_table( lipid, cellwall, cofactor, + pigment, + carbohydrate, energy, other, ) @@ -1254,7 +1339,9 @@ def add_reactions(self, reaction_list: list): if cpx.id not in self.complexes: self.add_complexes([cpx]) complex_replace.add(self.complexes.get_by_id(cpx.id)) + x._metabolites = metabolites_replace + x._update_awareness() x.complexes = complex_replace self.reactions += reaction_list diff --git a/modelseedpy/core/rast_client.py b/modelseedpy/core/rast_client.py index 575cf0d4..cc36b7bb 100644 --- a/modelseedpy/core/rast_client.py +++ b/modelseedpy/core/rast_client.py @@ -52,17 +52,15 @@ def __init__(self): ) self.stages = [ {"name": "annotate_proteins_kmer_v2", "kmer_v2_parameters": {}}, - { - "name": "annotate_proteins_kmer_v1", - "kmer_v1_parameters": {"annotate_hypothetical_only": 1}, - }, + # {"name": "annotate_proteins_kmer_v1", + # "kmer_v1_parameters": {"annotate_hypothetical_only": 1},}, { "name": "annotate_proteins_similarity", "similarity_parameters": {"annotate_hypothetical_only": 1}, }, ] - def annotate_genome(self, genome): + def annotate_genome(self, genome, split_terms=True): p_features = [] for f in genome.features: if f.seq and len(f.seq) > 0: @@ -72,9 +70,13 @@ def annotate_genome(self, genome): for o in res[0]["features"]: feature = genome.features.get_by_id(o["id"]) if "function" in o: - functions = re.split("; | / | @ | => ", o["function"]) - for function in functions: - feature.add_ontology_term("RAST", function) + rast_function = o["function"] + if split_terms: + functions = re.split("; | / | @", rast_function) + for function in functions: + feature.add_ontology_term("RAST", function) + else: + feature.add_ontology_term("RAST", rast_function) return res[0]["analysis_events"] @@ -84,6 +86,14 @@ def annotate_genome_from_fasta(self, filepath, split="|"): return genome, res + def annotate_protein_sequence(self, protein_id: str, protein_seq: str): + p_features = [{"id": protein_id, "protein_translation": protein_seq}] + return self.f(p_features) + + def annotate_protein_sequences(self, protein_seqs: dict): + p_features = [{"id": protein_id, "protein_translation": protein_seq}] + return self.f(p_features) + def f1(self, protein_id, protein_seq): p_features = [{"id": protein_id, "protein_translation": protein_seq}] return self.f(p_features) diff --git a/modelseedpy/data/ModelReportTemplate.html b/modelseedpy/data/ModelReportTemplate.html new file mode 100644 index 00000000..cab60a0b --- /dev/null +++ b/modelseedpy/data/ModelReportTemplate.html @@ -0,0 +1,349 @@ + + + + + ModelSEED Reconstruction + + + + + + +
+ + + + + diff --git a/modelseedpy/data/atp_medias.tsv b/modelseedpy/data/atp_medias.tsv index 4a4b7a84..53d15048 100644 --- a/modelseedpy/data/atp_medias.tsv +++ b/modelseedpy/data/atp_medias.tsv @@ -1,30 +1,34 @@ -seed Glc/O2 Ac/O2 Etho/O2 Pyr/O2 Glyc/O2 Fum/O2 Succ/O2 Akg/O2 LLac/O2 Dlac/O2 For/O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For/NO2 For/NO3 For/NO Pyr/NO2 Pyr/NO3 Pyr/NO Ac/NO2 Ac/NO3 Ac/NO Glc/DMSO Glc/TMAO Pyr/DMSO Pyr/TMAO Pyr/SO4 Pyr/SO3 H2/CO2 H2/Ac For/SO4/H2 LLac/SO4/H2 For/SO4 LLac/SO4 H2/SO4 empty Light ANME Methane -EX_cpd00027_e0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00024_e0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00106_e0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00036_e0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00137_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00130_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00159_e0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 -EX_cpd00221_e0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00020_e0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00100_e0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00363_e0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00029_e0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -EX_cpd00047_e0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 -EX_cpd00204_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00011_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -EX_cpd00007_e0 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd11640_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 1000 1000 1000 0 0 1000 0 0 0 0 -EX_cpd00418_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00209_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00075_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00659_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00528_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd08021_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00811_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd00048_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 1000 1000 1000 1000 1000 0 0 0 0 -EX_cpd00081_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 -EX_cpd11632_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 -EX_cpd08701_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 -EX_cpd01024_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 +seed Glc.O2 Ac.O2 Etho.O2 Pyr.O2 Glyc.O2 Fum.O2 Succ.O2 Akg.O2 LLac.O2 Dlac.O2 For.O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For.NO2 For.NO3 For.NO Pyr.NO2 Pyr.NO3 Pyr.NO Ac.NO2 Ac.NO3 Ac.NO Glc.DMSO Glc.TMAO Pyr.DMSO Pyr.TMAO Pyr.SO4 Pyr.SO3 H2.CO2 H2.Ac For.SO4.H2 LLac.SO4.H2 For.SO4 LLac.SO4 H2.SO4 empty Light ANME Methane Methanol Methanol.H2 Methanamine.H2 Dimethylamine.H2 Trimethylamine.H2 +EX_cpd00027_e0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00024_e0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00106_e0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00036_e0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00137_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00130_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00159_e0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 +EX_cpd00221_e0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00020_e0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00100_e0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00363_e0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00029_e0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00047_e0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00204_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00011_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00007_e0 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd11640_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 1000 1000 1000 0 0 1000 0 0 0 0 0 1000 1000 1000 1000 +EX_cpd00418_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00209_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00075_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 1000 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00659_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00528_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd08021_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00811_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd00048_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 1000 1000 1000 1000 1000 0 0 0 0 0 0 0 0 0 +EX_cpd00081_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +EX_cpd11632_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 +EX_cpd08701_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 +EX_cpd01024_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 +EX_cpd00116_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 +EX_cpd00187_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +EX_cpd00425_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 +EX_cpd00441_e0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 diff --git a/modelseedpy/fbapkg/basefbapkg.py b/modelseedpy/fbapkg/basefbapkg.py index 662696f3..77effe32 100644 --- a/modelseedpy/fbapkg/basefbapkg.py +++ b/modelseedpy/fbapkg/basefbapkg.py @@ -33,8 +33,13 @@ class BaseFBAPkg: def __init__( self, model, name, variable_types={}, constraint_types={}, reaction_types={} ): - self.model = model - self.modelutl = MSModelUtil.get(model) + if isinstance(model, MSModelUtil): + self.model = model.model + self.modelutl = model + else: + self.model = model + self.modelutl = MSModelUtil.get(model) + self.name = name self.pkgmgr = MSPackageManager.get_pkg_mgr(model) diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 66e01035..1f61f7a8 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -16,21 +16,37 @@ def __init__(self, model): {"elements": "string"}, ) - def build_package(self, element_limits): + def build_package( + self, element_limits, exception_compounds=[], exception_reactions=[] + ): + # Converting exception compounds list into exception reaction list + self.parameters = { + "element_limits": element_limits, + "exception_compounds": exception_compounds, + "exception_reactions": exception_reactions, + } + exchange_hash = self.modelutl.exchange_hash() + for met in exception_compounds: + if met in exchange_hash: + exception_reactions.append(exchange_hash[met]) + # Now building or rebuilding constraints for element in element_limits: if element not in self.variables["elements"]: self.build_variable(element, element_limits[element]) - self.build_constraint(element) + for element in element_limits: + # This call will first remove existing constraints then build the new constraint + self.build_constraint(element, exception_reactions) def build_variable(self, element, limit): return BaseFBAPkg.build_variable( self, "elements", 0, limit, "continuous", element ) - def build_constraint(self, element): + def build_constraint(self, element, exception_reactions): coef = {self.variables["elements"][element]: -1} - for reaction in self.model.reactions: - if reaction.id[0:3] == "EX_": + rxnlist = self.modelutl.exchange_list() + for reaction in rxnlist: + if reaction not in exception_reactions: total = 0 for metabolite in reaction.metabolites: elements = metabolite.elements diff --git a/modelseedpy/fbapkg/flexiblebiomasspkg.py b/modelseedpy/fbapkg/flexiblebiomasspkg.py index ae8a1cfe..aa11ea2d 100644 --- a/modelseedpy/fbapkg/flexiblebiomasspkg.py +++ b/modelseedpy/fbapkg/flexiblebiomasspkg.py @@ -77,11 +77,11 @@ def build_package(self, parameters): newrxns = [] class_coef = {"rna": {}, "dna": {}, "protein": {}, "energy": {}} refcpd = { - "cpd00001": None, - "cpd00009": None, - "cpd00012": None, - "cpd00067": None, - "cpd00002": None, + "cpd00001": None, # Water + "cpd00009": None, # Orthophosphate + "cpd00012": None, # Pyrophosphate + "cpd00067": None, # Proton + "cpd00002": None, # ATP } # Finding all reference compounds in the model msid_hash = self.modelutl.msid_hash() @@ -93,7 +93,13 @@ def build_package(self, parameters): for metabolite in self.parameters["bio_rxn"].metabolites: met_class[metabolite] = None msid = MSModelUtil.metabolite_msid(metabolite) - if msid != "cpd11416" and msid != None: + if ( + msid != "cpd11416" # Biomass + and msid != "cpd11463" # Protein + and msid != "cpd11462" # RNA + and msid != "cpd11461" # DNA + and msid != None + ): if msid in refcpd: met_class[metabolite] = "refcpd" else: @@ -111,20 +117,24 @@ def build_package(self, parameters): self.parameters["use_" + curr_class + "_class"] = None break # Creating FLEX reactions and constraints for unclassified compounds - flexcpds = [] + flexcpds = {} for metabolite in self.parameters["bio_rxn"].metabolites: if not met_class[metabolite]: - flexcpds.append(metabolite) + flexcpds[metabolite] = self.parameters["bio_rxn"].metabolites[ + metabolite + ] elif ( met_class[metabolite] != "refcpd" and not self.parameters["use_" + met_class[metabolite] + "_class"] ): - flexcpds.append(metabolite) + flexcpds[metabolite] = self.parameters["bio_rxn"].metabolites[ + metabolite + ] self.modelutl.add_exchanges_for_metabolites( flexcpds, uptake=1000, excretion=1000, - prefix="FLEX_", + prefix="FLEX_" + self.parameters["bio_rxn"].id + "_", prefix_name="Biomass flex for ", ) for metabolite in flexcpds: @@ -199,31 +209,39 @@ def build_package(self, parameters): self.build_constraint( self.new_reactions[met_class + "_flex"], "flxcls" ) - if parameters["add_total_biomass_constraint"]: + if self.parameters["add_total_biomass_constraint"]: self.build_constraint(self.parameters["bio_rxn"], "flxbio") def build_variable(self, object, type): # !!! can the function be removed? pass def build_constraint(self, cobra_obj, obj_type): - element_mass = FBAHelper.elemental_mass() # !!! element_mass is never used if obj_type == "flxbio": # Sum(MW*(vdrn,for-vdrn,ref)) + Sum(massdiff*(vrxn,for-vrxn,ref)) = 0 coef = {} for metabolite in self.parameters["bio_rxn"].metabolites: - if "FLEX_" + metabolite.id in self.model.reactions: + if ( + "FLEX_" + self.parameters["bio_rxn"].id + "_" + metabolite.id + in self.model.reactions + ): mw = FBAHelper.metabolite_mw(metabolite) sign = -1 if self.parameters["bio_rxn"].metabolites[metabolite] > 0: sign = 1 coef[ self.model.reactions.get_by_id( - "FLEX_" + metabolite.id + "FLEX_" + + self.parameters["bio_rxn"].id + + "_" + + metabolite.id ).forward_variable ] = (sign * mw) coef[ self.model.reactions.get_by_id( - "FLEX_" + metabolite.id + "FLEX_" + + self.parameters["bio_rxn"].id + + "_" + + metabolite.id ).reverse_variable ] = (-1 * sign * mw) for met_class in classes: @@ -236,10 +254,13 @@ def build_constraint(self, cobra_obj, obj_type): if abs(massdiff) > 0.00001: coef[rxn.forward_variable] = massdiff coef[rxn.reverse_variable] = -massdiff - return BaseFBAPkg.build_constraint(self, obj_type, 0, 0, coef, cobra_obj) + return super().build_constraint(obj_type, 0, 0, coef, cobra_obj) elif obj_type == "flxcpd" or obj_type == "flxcls": + first_entry = None + second_entry = None + product = False biovar = self.parameters["bio_rxn"].forward_variable - object = cobra_obj + object = None const = None if obj_type == "flxcpd": # 0.75 * abs(bio_coef) * vbio - vdrn,for >= 0 @@ -250,7 +271,11 @@ def build_constraint(self, cobra_obj, obj_type): second_entry = self.parameters["flex_coefficient"][1] * abs( self.parameters["bio_rxn"].metabolites[cobra_obj] ) - object = self.model.reactions.get_by_id("FLEX_" + cobra_obj.id) + if self.parameters["bio_rxn"].metabolites[cobra_obj] > 0: + product = True + object = self.model.reactions.get_by_id( + "FLEX_" + self.parameters["bio_rxn"].id + "_" + cobra_obj.id + ) elif ( cobra_obj.id[0:-5] == None or not self.parameters["use_" + cobra_obj.id[0:-5] + "_class"] @@ -263,87 +288,153 @@ def build_constraint(self, cobra_obj, obj_type): second_entry = self.parameters["use_" + cobra_obj.id[0:-5] + "_class"][ 1 ] + object = cobra_obj if first_entry == second_entry: # If the value is positive, lock in the forward variable and set the reverse to zero if first_entry > 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - 0, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - object.lower_bound = 0 + if product: + const = super().build_constraint( + "f" + obj_type, + 0, + 0, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 + else: + const = super().build_constraint( + "f" + obj_type, + 0, + 0, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 # If the value is negative, lock in the reverse variable and set the forward to zero elif first_entry < 0: - const = BaseFBAPkg.build_constraint( - self, - "r" + obj_type, - 0, - 0, - {biovar: -first_entry, object.reverse_variable: -1}, - cobra_obj, - ) - object.upper_bound = 0 + if product: + const = super().build_constraint( + "r" + obj_type, + 0, + 0, + {biovar: -first_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 + else: + const = super().build_constraint( + "r" + obj_type, + 0, + 0, + {biovar: -first_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 # If the value is zero, lock both variables to zero if first_entry == 0: object.lower_bound = 0 object.upper_bound = 0 elif second_entry >= 0: if first_entry >= 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - None, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - object.lower_bound = 0 - if first_entry > 0: - BaseFBAPkg.build_constraint( - self, - "r" + obj_type, + if product: + const = super().build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 + if first_entry > 0: + super().build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.forward_variable: 1}, + cobra_obj, + ) + else: + const = super().build_constraint( + "f" + obj_type, 0, None, - {biovar: -first_entry, object.forward_variable: 1}, + {biovar: second_entry, object.reverse_variable: -1}, cobra_obj, ) + object.upper_bound = 0 + if first_entry > 0: + super().build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.reverse_variable: 1}, + cobra_obj, + ) else: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - None, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - BaseFBAPkg.build_constraint( - self, + if product: + const = super().build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + super().build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.reverse_variable: -1}, + cobra_obj, + ) + else: + const = super().build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + super().build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.forward_variable: -1}, + cobra_obj, + ) + else: + if second_entry < 0: + if product: + const = super().build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: 1}, + cobra_obj, + ) + else: + const = super().build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: 1}, + cobra_obj, + ) + if product: + super().build_constraint( "r" + obj_type, 0, None, {biovar: -first_entry, object.reverse_variable: -1}, cobra_obj, ) - else: - if second_entry < 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, + object.lower_bound = 0 + else: + super().build_constraint( + "r" + obj_type, 0, None, - {biovar: second_entry, object.reverse_variable: 1}, + {biovar: -first_entry, object.forward_variable: -1}, cobra_obj, ) - BaseFBAPkg.build_constraint( - self, - "r" + obj_type, - 0, - None, - {biovar: -first_entry, object.reverse_variable: -1}, - cobra_obj, - ) - object.upper_bound = 0 + object.upper_bound = 0 return const diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 58140418..dbc1441e 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -3,363 +3,44 @@ from __future__ import absolute_import import logging +import sys import re import json from optlang.symbolics import Zero, add from cobra import Model, Reaction, Metabolite +from cobra.io import ( + load_json_model, + save_json_model, + load_matlab_model, + save_matlab_model, + read_sbml_model, + write_sbml_model, +) from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg from modelseedpy.core.fbahelper import FBAHelper logger = logging.getLogger(__name__) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO -default_blacklist = [ - "rxn12985", - "rxn00238", - "rxn07058", - "rxn05305", - "rxn09037", - "rxn10643", - "rxn11317", - "rxn05254", - "rxn05257", - "rxn05258", - "rxn05259", - "rxn05264", - "rxn05268", - "rxn05269", - "rxn05270", - "rxn05271", - "rxn05272", - "rxn05273", - "rxn05274", - "rxn05275", - "rxn05276", - "rxn05277", - "rxn05278", - "rxn05279", - "rxn05280", - "rxn05281", - "rxn05282", - "rxn05283", - "rxn05284", - "rxn05285", - "rxn05286", - "rxn05963", - "rxn05964", - "rxn05971", - "rxn05989", - "rxn05990", - "rxn06041", - "rxn06042", - "rxn06043", - "rxn06044", - "rxn06045", - "rxn06046", - "rxn06079", - "rxn06080", - "rxn06081", - "rxn06086", - "rxn06087", - "rxn06088", - "rxn06089", - "rxn06090", - "rxn06091", - "rxn06092", - "rxn06138", - "rxn06139", - "rxn06140", - "rxn06141", - "rxn06145", - "rxn06217", - "rxn06218", - "rxn06219", - "rxn06220", - "rxn06221", - "rxn06222", - "rxn06223", - "rxn06235", - "rxn06362", - "rxn06368", - "rxn06378", - "rxn06474", - "rxn06475", - "rxn06502", - "rxn06562", - "rxn06569", - "rxn06604", - "rxn06702", - "rxn06706", - "rxn06715", - "rxn06803", - "rxn06811", - "rxn06812", - "rxn06850", - "rxn06901", - "rxn06971", - "rxn06999", - "rxn07123", - "rxn07172", - "rxn07254", - "rxn07255", - "rxn07269", - "rxn07451", - "rxn09037", - "rxn10018", - "rxn10077", - "rxn10096", - "rxn10097", - "rxn10098", - "rxn10099", - "rxn10101", - "rxn10102", - "rxn10103", - "rxn10104", - "rxn10105", - "rxn10106", - "rxn10107", - "rxn10109", - "rxn10111", - "rxn10403", - "rxn10410", - "rxn10416", - "rxn11313", - "rxn11316", - "rxn11318", - "rxn11353", - "rxn05224", - "rxn05795", - "rxn05796", - "rxn05797", - "rxn05798", - "rxn05799", - "rxn05801", - "rxn05802", - "rxn05803", - "rxn05804", - "rxn05805", - "rxn05806", - "rxn05808", - "rxn05812", - "rxn05815", - "rxn05832", - "rxn05836", - "rxn05851", - "rxn05857", - "rxn05869", - "rxn05870", - "rxn05884", - "rxn05888", - "rxn05896", - "rxn05898", - "rxn05900", - "rxn05903", - "rxn05904", - "rxn05905", - "rxn05911", - "rxn05921", - "rxn05925", - "rxn05936", - "rxn05947", - "rxn05956", - "rxn05959", - "rxn05960", - "rxn05980", - "rxn05991", - "rxn05992", - "rxn05999", - "rxn06001", - "rxn06014", - "rxn06017", - "rxn06021", - "rxn06026", - "rxn06027", - "rxn06034", - "rxn06048", - "rxn06052", - "rxn06053", - "rxn06054", - "rxn06057", - "rxn06059", - "rxn06061", - "rxn06102", - "rxn06103", - "rxn06127", - "rxn06128", - "rxn06129", - "rxn06130", - "rxn06131", - "rxn06132", - "rxn06137", - "rxn06146", - "rxn06161", - "rxn06167", - "rxn06172", - "rxn06174", - "rxn06175", - "rxn06187", - "rxn06189", - "rxn06203", - "rxn06204", - "rxn06246", - "rxn06261", - "rxn06265", - "rxn06266", - "rxn06286", - "rxn06291", - "rxn06294", - "rxn06310", - "rxn06320", - "rxn06327", - "rxn06334", - "rxn06337", - "rxn06339", - "rxn06342", - "rxn06343", - "rxn06350", - "rxn06352", - "rxn06358", - "rxn06361", - "rxn06369", - "rxn06380", - "rxn06395", - "rxn06415", - "rxn06419", - "rxn06420", - "rxn06421", - "rxn06423", - "rxn06450", - "rxn06457", - "rxn06463", - "rxn06464", - "rxn06466", - "rxn06471", - "rxn06482", - "rxn06483", - "rxn06486", - "rxn06492", - "rxn06497", - "rxn06498", - "rxn06501", - "rxn06505", - "rxn06506", - "rxn06521", - "rxn06534", - "rxn06580", - "rxn06585", - "rxn06593", - "rxn06609", - "rxn06613", - "rxn06654", - "rxn06667", - "rxn06676", - "rxn06693", - "rxn06730", - "rxn06746", - "rxn06762", - "rxn06779", - "rxn06790", - "rxn06791", - "rxn06792", - "rxn06793", - "rxn06794", - "rxn06795", - "rxn06796", - "rxn06797", - "rxn06821", - "rxn06826", - "rxn06827", - "rxn06829", - "rxn06839", - "rxn06841", - "rxn06842", - "rxn06851", - "rxn06866", - "rxn06867", - "rxn06873", - "rxn06885", - "rxn06891", - "rxn06892", - "rxn06896", - "rxn06938", - "rxn06939", - "rxn06944", - "rxn06951", - "rxn06952", - "rxn06955", - "rxn06957", - "rxn06960", - "rxn06964", - "rxn06965", - "rxn07086", - "rxn07097", - "rxn07103", - "rxn07104", - "rxn07105", - "rxn07106", - "rxn07107", - "rxn07109", - "rxn07119", - "rxn07179", - "rxn07186", - "rxn07187", - "rxn07188", - "rxn07195", - "rxn07196", - "rxn07197", - "rxn07198", - "rxn07201", - "rxn07205", - "rxn07206", - "rxn07210", - "rxn07244", - "rxn07245", - "rxn07253", - "rxn07275", - "rxn07299", - "rxn07302", - "rxn07651", - "rxn07723", - "rxn07736", - "rxn07878", - "rxn11417", - "rxn11582", - "rxn11593", - "rxn11597", - "rxn11615", - "rxn11617", - "rxn11619", - "rxn11620", - "rxn11624", - "rxn11626", - "rxn11638", - "rxn11648", - "rxn11651", - "rxn11665", - "rxn11666", - "rxn11667", - "rxn11698", - "rxn11983", - "rxn11986", - "rxn11994", - "rxn12006", - "rxn12007", - "rxn12014", - "rxn12017", - "rxn12022", - "rxn12160", - "rxn12161", - "rxn01267", - "rxn05294", - "rxn04656", -] +base_blacklist = {} +zero_threshold = 1e-8 class GapfillingPkg(BaseFBAPkg): """ """ def __init__(self, model): - BaseFBAPkg.__init__(self, model, "gapfilling", {}, {}) + BaseFBAPkg.__init__( + self, + model, + "gapfilling", + {"rmaxf": "reaction", "fmaxf": "reaction"}, + {"rmaxfc": "reaction", "fmaxfc": "reaction"}, + ) self.gapfilling_penalties = None + self.maxflux_variables = {} def build(self, template, minimum_objective=0.01): parameters = { @@ -406,9 +87,9 @@ def build_package(self, parameters): "default_excretion": 100, "default_uptake": 100, "minimum_obj": 0.01, - "set_objective": 1, "minimize_exchanges": False, - "blacklist": default_blacklist, + "add_max_flux_variables": False, + "blacklist": [], }, ) # Adding model reactions to original reaction list @@ -491,29 +172,68 @@ def build_package(self, parameters): ) self.model.solver.update() - if self.parameters["set_objective"] == 1: - reaction_objective = self.model.problem.Objective(Zero, direction="min") - obj_coef = dict() + + # Creating max flux variables and constraints to be used for global gapfilling and other formulations + if self.parameters["add_max_flux_variables"]: for reaction in self.model.reactions: if reaction.id in self.gapfilling_penalties: - if ( - self.parameters["minimize_exchanges"] - or reaction.id[0:3] != "EX_" - ): - # Minimizing gapfilled reactions - if "reverse" in self.gapfilling_penalties[reaction.id]: - obj_coef[reaction.reverse_variable] = abs( - self.gapfilling_penalties[reaction.id]["reverse"] - ) - if "forward" in self.gapfilling_penalties[reaction.id]: - obj_coef[reaction.forward_variable] = abs( - self.gapfilling_penalties[reaction.id]["forward"] - ) - else: - obj_coef[reaction.forward_variable] = 0 - obj_coef[reaction.reverse_variable] = 0 - self.model.objective = reaction_objective - reaction_objective.set_linear_coefficients(obj_coef) + if "reverse" in self.gapfilling_penalties[reaction.id]: + self.maxflux_variables[reaction.id][ + "reverse" + ] = self.build_variable( + "rmaxf", 0, 1000, "continuous", reaction + ) + self.build_constraint( + "rmaxfc", + 0, + None, + { + reaction.reverse_variable: -1, + self.maxflux_variables[reaction.id]["reverse"]: 1, + }, + reaction, + ) + if "forward" in self.gapfilling_penalties[reaction.id]: + self.maxflux_variables[reaction.id][ + "forward" + ] = self.build_variable( + "fmaxf", 0, 1000, "continuous", reaction + ) + self.build_constraint( + "fmaxfc", + 0, + None, + { + reaction.forward_variable: -1, + self.maxflux_variables[reaction.id]["forward"]: 1, + }, + reaction, + ) + + # Creating the gapfilling objective function and saving it under self.parameters["gfobj"] + reaction_objective = self.model.problem.Objective(Zero, direction="min") + obj_coef = dict() + for reaction in self.model.reactions: + if reaction.id in self.gapfilling_penalties: + if self.parameters["minimize_exchanges"] or reaction.id[0:3] != "EX_": + # Minimizing gapfilled reactions + if "reverse" in self.gapfilling_penalties[reaction.id]: + obj_coef[reaction.reverse_variable] = abs( + self.gapfilling_penalties[reaction.id]["reverse"] + ) + if "forward" in self.gapfilling_penalties[reaction.id]: + obj_coef[reaction.forward_variable] = abs( + self.gapfilling_penalties[reaction.id]["forward"] + ) + else: + obj_coef[reaction.forward_variable] = 0 + obj_coef[reaction.reverse_variable] = 0 + self.model.objective = reaction_objective + reaction_objective.set_linear_coefficients(obj_coef) + self.parameters["gfobj"] = self.model.objective + + def reset_original_objective(self): + self.parameters["origobj"] = self.model.objective def extend_model_with_model_for_gapfilling(self, source_model, index): new_metabolites = {} @@ -550,6 +270,11 @@ def extend_model_with_model_for_gapfilling(self, source_model, index): if re.search("(.+)_([a-z])\d+$", modelreaction.id) != None: m = re.search("(.+)_([a-z])\d+$", modelreaction.id) if m[1] not in self.parameters["blacklist"]: + if m[1] in base_blacklist: + if base_blacklist[m[1]] == ">" or base_blacklist[m[1]] == "=": + cobra_reaction.upper_bound = 0 + if base_blacklist[m[1]] == "<" or base_blacklist[m[1]] == "=": + cobra_reaction.lower_bound = 0 cobra_reaction = modelreaction.copy() cobra_reaction.id = groups[1] + "_" + groups[2] + index if ( @@ -679,6 +404,17 @@ def extend_model_with_template_for_gapfilling(self, template, index): cobra_reaction = self.convert_template_reaction( template_reaction, index, template, 1 ) # TODO: move function out + if template_reaction.reference_id in base_blacklist: + if ( + base_blacklist[template_reaction.reference_id] == ">" + or base_blacklist[template_reaction.reference_id] == "=" + ): + cobra_reaction.upper_bound = 0 + if ( + base_blacklist[template_reaction.reference_id] == "<" + or base_blacklist[template_reaction.reference_id] == "=" + ): + cobra_reaction.lower_bound = 0 new_penalties[cobra_reaction.id] = dict() if ( cobra_reaction.id not in self.model.reactions @@ -843,12 +579,12 @@ def knockout_gf_reactions_outside_solution(self, solution=None, flux_values=None if rxnobj.id in self.gapfilling_penalties: if ( "reverse" in self.gapfilling_penalties[rxnobj.id] - and flux_values[rxnobj.id]["reverse"] <= Zero + and flux_values[rxnobj.id]["reverse"] <= zero_threshold ): rxnobj.lower_bound = 0 if ( "forward" in self.gapfilling_penalties[rxnobj.id] - and flux_values[rxnobj.id]["forward"] <= Zero + and flux_values[rxnobj.id]["forward"] <= zero_threshold ): rxnobj.upper_bound = 0 rxnobj.update_variable_bounds() @@ -879,7 +615,7 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): condition["change"] = False if len(filtered_list) > 0: if max_iterations > 0: - print("Gapfilling test failed " + str(11 - max_iterations)) + logger.warning("Gapfilling test failed " + str(11 - max_iterations)) # Forcing filtered reactions to zero for item in filtered_list: if item[1] == ">": @@ -898,7 +634,34 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return None return solution + def test_gapfill_database(self): + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + self.model.objective = self.parameters["origobj"] + solution = self.model.optimize() + logger.debug( + "Objective with gapfill database:" + + str(solution.objective_value) + + "; min objective:" + + str(self.parameters["minimum_obj"]) + ) + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + self.model.objective = self.parameters["gfobj"] + if solution.objective_value < self.parameters["minimum_obj"]: + return False + return True + + def set_min_objective(self, min_objective): + self.parameters["minimum_obj"] = min_objective + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + def filter_database_based_on_tests(self, test_conditions): + # Setting the minimal growth constraint to zero + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + # Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: rxnlist = [] @@ -908,33 +671,24 @@ def filter_database_based_on_tests(self, test_conditions): rxnlist.append([reaction, "<"]) if "forward" in self.gapfilling_penalties[reaction.id]: rxnlist.append([reaction, ">"]) - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + filtered_list = self.modelutl.reaction_expansion_test( rxnlist, test_conditions ) # Now constraining filtered reactions to zero for item in filtered_list: - logger.debug("Filtering:", item[0].id, item[1]) + logger.debug("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 # Now testing if the gapfilling minimum objective can still be achieved - gfobj = self.model.objective - self.model.objective = self.parameters["origobj"] - solution = self.model.optimize() - # Restoring the minimum objective constraint - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ - "minimum_obj" - ] - print( - "Objective after filtering:", - solution.objective_value, - "; min objective:", - self.parameters["minimum_obj"], - ) - if solution.objective_value < self.parameters["minimum_obj"]: + if not self.test_gapfill_database(): # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached + # Restoring the minimum objective constraint + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ + "1" + ].lb = self.parameters["minimum_obj"] new_objective = self.model.problem.Objective(Zero, direction="min") filterobjcoef = dict() for item in filtered_list: @@ -945,7 +699,6 @@ def filter_database_based_on_tests(self, test_conditions): else: filterobjcoef[rxn.reverse_variable] = item[3] rxn.lower_bound = item[2] - self.model.objective = new_objective new_objective.set_linear_coefficients(filterobjcoef) solution = self.model.optimize() @@ -967,22 +720,24 @@ def filter_database_based_on_tests(self, test_conditions): else: count += -1 rxn.lower_bound = 0 - print("Reactions unfiltered:", count) + logger.debug("Reactions unfiltered:" + str(count)) # Checking for model reactions that can be removed to enable all tests to pass self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 filtered_list = self.modelutl.reaction_expansion_test( self.parameters["original_reactions"], test_conditions ) for item in filtered_list: - logger.debug("Filtering:", item[0].id, item[1]) + logger.debug("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ - "1" - ].lb = self.parameters["minimum_obj"] - self.model.objective = gfobj + # Restoring gapfilling objective function and minimal objective constraint + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + self.model.objective = self.parameters["gfobj"] + return True def compute_gapfilled_solution(self, flux_values=None): if flux_values is None: @@ -991,19 +746,23 @@ def compute_gapfilled_solution(self, flux_values=None): for reaction in self.model.reactions: if reaction.id in self.gapfilling_penalties: if ( - flux_values[reaction.id]["forward"] > Zero + flux_values[reaction.id]["forward"] > zero_threshold and "forward" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: + logger.debug(f"New gapfilled reaction: {reaction.id} >") output["new"][reaction.id] = ">" else: + logger.debug(f"Reversed gapfilled reaction: {reaction.id} >") output["reversed"][reaction.id] = ">" elif ( - flux_values[reaction.id]["reverse"] > Zero + flux_values[reaction.id]["reverse"] > zero_threshold and "reverse" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: + logger.debug(f"New gapfilled reaction: {reaction.id} <") output["new"][reaction.id] = "<" else: + logger.debug(f"Reversed gapfilled reaction: {reaction.id} <") output["reversed"][reaction.id] = "<" return output diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index 4dbf0779..9dc9b315 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -40,7 +40,9 @@ def build_package( self.parameters["default_uptake"] = 0 if self.parameters["default_excretion"] is None: self.parameters["default_excretion"] = 100 - if self.parameters["media"] is None and self.parameters["default_uptake"] == 0: + if ( + self.parameters["media"] and self.parameters["media"].name == "Complete" + ) and self.parameters["default_uptake"] == 0: self.parameters["default_uptake"] = 100 # First initializing all exchanges to default uptake and excretion diff --git a/pyproject.toml b/pyproject.toml index 0ed58542..8e0e52df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 88 -python-version = ['py36'] +python-version = ['py38'] include = '\.pyi?$' exclude = ''' ( diff --git a/setup.py b/setup.py index 5fba7f6c..3c27af19 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="ModelSEEDpy", - version="0.3.1", + version="0.4.2", description="Python package for building and analyzing models using ModelSEED", long_description_content_type="text/x-rst", long_description=readme, @@ -27,20 +27,22 @@ "Topic :: Scientific/Engineering :: Bio-Informatics", "Intended Audience :: Science/Research", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Natural Language :: English", ], install_requires=[ "networkx >= 2.4", - "cobra >= 0.17.1", + "cobra >= 0.29.0", + "pandas >= 2.2.2", "scikit-learn == 1.2.0", # version lock for pickle ML models "scipy >= 1.5.4", "chemicals >= 1.0.13", "chemw >= 0.3.2", "matplotlib >= 3.0.0", - "pyeda", + "Jinja2 >= 3.1.4", + "sympy >=1.12.0", ], tests_require=[ "pytest", diff --git a/tests/test_advanced.py b/tests/core/test_advanced.py similarity index 100% rename from tests/test_advanced.py rename to tests/core/test_advanced.py diff --git a/tests/test_basic.py b/tests/core/test_basic.py similarity index 100% rename from tests/test_basic.py rename to tests/core/test_basic.py diff --git a/tests/core/test_msatpcorreption.py b/tests/core/test_msatpcorreption.py index 108cc3ec..a60d33ec 100644 --- a/tests/core/test_msatpcorreption.py +++ b/tests/core/test_msatpcorreption.py @@ -108,7 +108,7 @@ def media_acetate_aerobic(): "h2o": (-1000, 1000), } ) - media.id = "glc/o2" + media.id = "ac/o2" return media @@ -205,12 +205,13 @@ def test_infinite_atp_model_growth_boost( def test_ms_atp_correction1(get_model, template, media_all_aerobic): + atp_hydrolysis_id = "ATPM_c0" model = get_model(["GLCpts_c0", "NADH16_c0", "CYTBD_c0", "O2t_c0"]) atp_correction = MSATPCorrection( model, template, media_all_aerobic, - atp_hydrolysis_id="ATPM_c0", + atp_hydrolysis_id=atp_hydrolysis_id, load_default_medias=False, ) atp_correction.evaluate_growth_media() @@ -234,9 +235,14 @@ def test_ms_atp_correction1(get_model, template, media_all_aerobic): tests = atp_correction.build_tests() assert tests - assert len(tests) == 1 - assert tests[0]["threshold"] > 0 - assert tests[0]["objective"] == "ATPM_c0" + assert len(tests) == 2 # glucose and empty + for t in tests: + if t["media"].id == "empty": + assert t["threshold"] <= 1e-05 + else: + assert t["threshold"] > 1e-05 + assert t["objective"] == atp_hydrolysis_id + assert t["is_max_threshold"] is True def test_ms_atp_correction_and_gap_fill1( @@ -248,33 +254,39 @@ def test_ms_atp_correction_and_gap_fill1( ): from modelseedpy import MSGapfill + atp_hydrolysis_id = "ATPM_c0" + model = get_model_with_infinite_atp_loop(["GLCpts_c0", "GLUSy_c0", "GLUDy_c0"]) model.reactions.ATPM_c0.lower_bound = 0 model.reactions.ATPM_c0.upper_bound = 1000 - + model.objective = atp_hydrolysis_id atp_correction = MSATPCorrection( model, template, [media_glucose_aerobic], - atp_hydrolysis_id="ATPM_c0", + atp_hydrolysis_id=atp_hydrolysis_id, load_default_medias=False, ) tests = atp_correction.run_atp_correction() - # expected tests = [{'media': MSMedia object, 'is_max_threshold': True, 'threshold': 21.0, 'objective': 'ATPM_c0'}] assert tests - assert len(tests) == 1 - assert tests[0]["threshold"] > 0 - assert tests[0]["objective"] == "ATPM_c0" - + assert len(tests) == 2 + for t in tests: + if t["media"].id == "empty": + assert t["threshold"] <= 1e-05 + else: + assert t["threshold"] > 1e-05 + assert t["objective"] == atp_hydrolysis_id + assert t["is_max_threshold"] is True + + model.objective = "BIOMASS_Ecoli_core_w_GAM_c0" gap_fill = MSGapfill(model, [template_genome_scale], [], tests, {}, []) result = gap_fill.run_gapfilling( media_genome_scale_glucose_aerobic, "BIOMASS_Ecoli_core_w_GAM_c0", minimum_obj=0.1, ) - # either GLUSy_c0 or GLUDy_c0 should be gap filled for glutamate assert result diff --git a/tests/core/test_msgapfill.py b/tests/core/test_msgapfill.py index 1ee694bd..622a0924 100644 --- a/tests/core/test_msgapfill.py +++ b/tests/core/test_msgapfill.py @@ -1,54 +1,4 @@ # -*- coding: utf-8 -*- -""" -from glob import glob -os.environ["HOME"] = 'C:\\Users\\Andrew Freiburger\\Dropbox\\My PC (DESKTOP-M302P50)\\Documents\\UVic Civil Engineering\\Internships\\Agronne\\cobrakbase' -import cobrakbase -token = 'xx' -kbase = cobrakbase.KBaseAPI(token) -import re - -# define the example individual model and associated API media package -model = kbase.get_from_ws('e_coli_core.kb', 95098) -model.solver = 'optlang-cplex' - -# import the modelseedpy packages -import modelseedpy -from modelseedpy.core.msgapfill import MSGapfill -gapfill = MSGapfill(model) - -def test_init(): - assert type(gapfill.model) is cobrakbase.core.kbasefba.fbamodel.FBAModel - assert type(gapfill.blacklist) is list - assert type(gapfill.solutions) is dict - -def test_run_gapfilling_and_integrate_gapfill_solution(): - solutions = gapfill.run_gapfilling() - - # test that the objective expression is correctly set - if solutions is not None: - assert type(solutions) is dict - - # verify the integrate_gapfill_solution function - model_2 = gapfill.integrate_gapfill_solution(solutions) - assert type(model_2) is cobrakbase.core.kbasefba.fbamodel.FBAModel - - for reaction in solutions['reversed']: - if solution["reversed"][reaction] == ">": - assert reaction.upper_bound == 100 - else: - assert reaction.lower_bound == -100 - - for reaction in solutions['new']: - if solution["new"][reaction] == ">": - assert reaction.upper_bound == 100 - assert reaction.lower_bound == 0 - else: - assert reaction.upper_bound == 0 - assert reaction.lower_bound == -100 - -def test_gapfill(): - pass -""" import os import pytest import json diff --git a/tests/core/test_msmodel.py b/tests/core/test_msmodel.py new file mode 100644 index 00000000..ec4027f5 --- /dev/null +++ b/tests/core/test_msmodel.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +from modelseedpy.core.msmodel import * + + +def test_get_direction_from_constraints1(): + res = get_direction_from_constraints(0, 1000) + + assert res == ">" + + +def test_get_direction_from_constraints2(): + res = get_direction_from_constraints(-1000, 0) + + assert res == "<" + + +def test_get_direction_from_constraints3(): + res = get_direction_from_constraints(-1000, 1000) + + assert res == "=" + + +def test_get_set_set1(): + res = get_set_set("A") + + assert len(res) == 1 + assert {"A"} in res + + +def test_get_set_set2(): + res = get_set_set("A and B") + + assert len(res) == 1 + assert {"A", "B"} in res + + +def test_get_set_set3(): + res = get_set_set("A or B") + + assert len(res) == 2 + assert {"A"} in res + assert {"B"} in res + + +def test_get_set_set4(): + res = get_set_set("A or B or C") + + assert len(res) == 3 + assert {"A"} in res + assert {"B"} in res + assert {"C"} in res + + +def test_get_set_set5(): + res = get_set_set("A or B and C") + + assert len(res) == 2 + assert {"A"} in res + assert {"B", "C"} in res + + +def test_get_set_set6(): + res = get_set_set("A and B or C") + + assert len(res) == 2 + assert {"A", "B"} in res + assert {"C"} in res + + +def test_get_set_set7(): + res = get_set_set("(A or B) and C") + + assert len(res) == 2 + assert {"A", "C"} in res + assert {"B", "C"} in res + + +def test_get_set_set8(): + res = get_set_set("A and (B or C)") + + assert len(res) == 2 + assert {"A", "B"} in res + assert {"A", "C"} in res diff --git a/tests/core/test_mstemplate.py b/tests/core/test_mstemplate.py new file mode 100644 index 00000000..9663e8c8 --- /dev/null +++ b/tests/core/test_mstemplate.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +import pytest + +from modelseedpy.core.mstemplate import ( + MSTemplate, + MSTemplateMetabolite, + MSTemplateReaction, + MSTemplateSpecies, +) +from modelseedpy.core.mstemplate import ( + NewModelTemplateRole, + NewModelTemplateComplex, + MSTemplateCompartment, +) + + +@pytest.fixture +def empty_template(): + return MSTemplate("test", "test name", "test") + + +def test_empty_template(): + template = MSTemplate("test", "test name", "test") + assert template.id == "test" + assert template.name == "test name" + assert len(template.roles) == 0 + assert len(template.complexes) == 0 + assert len(template.compounds) == 0 + assert len(template.compcompounds) == 0 + assert len(template.reactions) == 0 + + +def test_template_add_role(empty_template): + role = NewModelTemplateRole("role1", "metabolic function") + empty_template.add_roles([role]) + assert len(empty_template.roles) == 1 + + +def test_template_add_role_mult(empty_template): + role_a = NewModelTemplateRole("roleA", "metabolic function A") + role_b = NewModelTemplateRole("roleB", "metabolic function B") + role_c = NewModelTemplateRole("roleC", "metabolic function C") + empty_template.add_roles([role_a, role_b, role_c]) + assert len(empty_template.roles) == 3 + + +def test_template_add_simple_complex(empty_template): + role = NewModelTemplateRole("role1", "metabolic function") + empty_template.add_roles([role]) + + seed_complex = NewModelTemplateComplex("complex1", "example complex") + + seed_complex.add_role(empty_template.roles.role1) + + empty_template.add_complexes([seed_complex]) + + assert len(empty_template.complexes) == 1 + + +def test_template_add_simple_metabolite(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + empty_template.add_compounds([cpd_apple]) + + assert len(empty_template.compounds) == 1 + + +def test_template_add_simple_metabolite_species(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + empty_template.add_compounds([cpd_apple]) + + comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") + empty_template.add_comp_compounds([comp_cpd_apple]) + + assert len(empty_template.compounds) == 1 + assert len(empty_template.compcompounds) == 1 + assert empty_template.compcompounds.apple_k.compound + assert empty_template.compcompounds.apple_k.compound.name == "just a apple" + assert len(empty_template.compounds.apple.species) == 1 + + +def test_template_add_compartment(empty_template): + empty_template.compartments += [MSTemplateCompartment("w", "world", 4)] + + assert len(empty_template.compartments) == 1 + + +def test_template_add_reaction(empty_template): + cpd_apple = MSTemplateMetabolite("apple", "C100", "just a apple") + cpd_apple_pie = MSTemplateMetabolite("appie", "C1000", "apple pie (10 apples)") + empty_template.add_compounds([cpd_apple, cpd_apple_pie]) + + comp_cpd_apple = MSTemplateSpecies("apple_k", 0, "k", "apple") + comp_cpd_apple_pie = MSTemplateSpecies("appie_k", 0, "k", "appie") + empty_template.add_comp_compounds([comp_cpd_apple, comp_cpd_apple_pie]) + + rxn_make_pie = MSTemplateReaction( + "rxn_pie_k", "rxn00000", "make pie", "pie", 0, 1000 + ) + rxn_make_pie.add_metabolites( + { + empty_template.compcompounds.apple_k: -10, + empty_template.compcompounds.appie_k: 1, + } + ) + + empty_template.add_reactions([rxn_make_pie]) + + assert len(empty_template.reactions) == 1 + assert empty_template.reactions.rxn_pie_k.check_mass_balance() == {} diff --git a/tests/test_data/mock_data.py b/tests/test_data/mock_data.py index 4c86b371..478aad0e 100644 --- a/tests/test_data/mock_data.py +++ b/tests/test_data/mock_data.py @@ -271,10 +271,9 @@ def remap(model, bigg_to_seed_cpd, bigg_to_seed_rxn, index="0"): def mock_model_ecoli_core(seed=True): - from cobra.io import load_json_model - from os import path + from cobra.io import load_model - model = load_json_model(path.join(path.dirname(__file__), "e_coli_core.json")) + model = load_model("textbook") if not seed: return model bigg_to_seed_cpd = { diff --git a/tox.ini b/tox.ini index d5ff7ef9..0aa1e6aa 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,11 @@ [tox] -envlist = py38,py39,py310 +envlist = py39,py310,py311 [gh-actions] python = - 3.8: py38 3.9: py39 3.10: py310 + 3.11: py311 [testenv] setenv = ARCHIVEINTERFACE_CPCONFIG = {toxinidir}/server.conf