From 303d9e72f5aaef8036ec3cbfaf843bf4578cd100 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 13 May 2022 17:35:40 -0700 Subject: [PATCH 01/80] adding workbench test --- test/usi_test_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/usi_test_data.py b/test/usi_test_data.py index 53918b8..f3cfbaf 100644 --- a/test/usi_test_data.py +++ b/test/usi_test_data.py @@ -28,6 +28,8 @@ "mzspec:MassIVE:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", # MassIVE Task USIs disguised as GNPS Task USIs "mzspec:GNPS:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", + # Metabolomics Workbench USIs + "mzspec:ST000003:StemCell+Data+and+Raw+Files/iPSC-T1R1:scan:3", # Legacy cases. "mzspec:GNPSTASK-c95481f0c53d42e78a61bf899e9f9adb:spectra/specs_ms.mgf:scan:1943", "mzspec:GNPSTASK-64b22841ab3548f987b3cfc18696a581:spectra/specs_ms.mgf:scan:1469", From dccf8b89d8c1a97fe977250b593467f537ffbe0d Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 25 Jul 2022 15:32:50 -0700 Subject: [PATCH 02/80] adding more parsing --- metabolomics_spectrum_resolver/parsing.py | 53 ++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index dc1b0e6..0e55daf 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -4,6 +4,8 @@ from typing import Tuple import requests +import pandas as pd +from io import StringIO import urllib.parse import spectrum_utils.spectrum as sus import splash @@ -25,7 +27,7 @@ # PXLnnnnnn # Unofficial: MASSIVEKB # https://github.com/HUPO-PSI/usi/blob/master/CollectionIdentifiers.md - r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|MassIVE)" + r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|ST\d{6}|MassIVE)" # msRun identifier r":(.*)" # index flag @@ -116,6 +118,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_ms2lda(usi) elif collection == "motifdb": spectrum, source_link = _parse_motifdb(usi) + elif collection.startswith("st"): + spectrum, source_link = _parse_metabolomics_workbench(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -590,6 +594,53 @@ def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown MOTIFDB USI", 404) +# Parse GNPS library. +def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + accession = match.group(1) + filename = match.group(2) + index_flag = match.group(3) + index = match.group(4) + + if index_flag.lower() != "scan": + raise UsiError( + "Currently supported MW index flags: scan", 400 + ) + try: + request_url = ( + f"https://www.metabolomicsworkbench.org/" + f"data/ms2.php?A={accession}.zip" + f"&F={urllib.parse.quote_plus(filename)}&S={index}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + + response_text = lookup_request.text + response_text = response_text.replace("
", "").replace("

", "").lstrip().rstrip() + + # Parsing the MW Response + precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) + charge = int(response_text.split("\n")[2].split(":")[-1].replace("\"", "")) + peaks_df = pd.read_csv(StringIO(response_text), sep=r" +", skiprows=4) + mz = list(peaks_df["m/z"]) + intensity = list(peaks_df["intensity"]) + + source_link = ( + f"https://www.metabolomicsworkbench.org/" + f"data/DRCCMetadata.php?Mode=Study&StudyID={accession}&StudyType=MS&ResultType=1" + ) + + spectrum = sus.MsmsSpectrum( + usi, + float(precursor_mz), + int(charge), + mz, + intensity, + ) + return spectrum, source_link + except requests.exceptions.HTTPError: + raise UsiError("Unknown MW USI", 404) + def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]: # Parse out gapped sequence (e.g. X+129.04259), faking it # with Glycine as the base residue and adding more mods to From 6f5762e394064c0e8ebf7a1623cffce84f7166b6 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 25 Jul 2022 15:44:02 -0700 Subject: [PATCH 03/80] formatting --- metabolomics_spectrum_resolver/parsing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 0e55daf..d916915 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -616,7 +616,12 @@ def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: lookup_request.raise_for_status() response_text = lookup_request.text - response_text = response_text.replace("
", "").replace("

", "").lstrip().rstrip() + response_text = ( ++ response_text.replace("
", "")
++            .replace("

", "") ++ .lstrip() ++ .rstrip() ++ ) # Parsing the MW Response precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) From 3e2e0bd98dd27b25b3702ee277f22e120073a7c1 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Sun, 23 Oct 2022 11:53:33 -0700 Subject: [PATCH 04/80] fixing the port --- docker-compose-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 5423467..dc110df 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -6,7 +6,7 @@ services: - nginx-net environment: VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org - VIRTUAL_PORT: 5087 + VIRTUAL_PORT: 5000 LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh From bf8d8a4298448352a81b70e5884b8691954f38a2 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 11 Nov 2022 14:07:28 -0800 Subject: [PATCH 05/80] adding automated builds --- .github/workflows/build.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..471daab --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,25 @@ +name: Docker Build Test + +on: + - push + - pull_request + schedule: + - cron: '0 0 * * 1' + +jobs: + build-test: + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.8] +# TODO: We probably should switch to using the Docker version. + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Build Docker + run: | + cd basic && docker build . From f134587f17b1020129651e141b165048396d677c Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 11 Nov 2022 14:08:34 -0800 Subject: [PATCH 06/80] upgrading base --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fa855a9..63a95df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM continuumio/miniconda3:4.8.2 +FROM continuumio/miniconda3:4.10.3 MAINTAINER Mingxun Wang "mwang87@gmail.com" WORKDIR /app From d8cbd087657b646d0f465ea3233841e0dd3ae7b3 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 15:11:18 -0800 Subject: [PATCH 07/80] updating versions of packages --- Dockerfile | 2 +- Makefile | 12 ++++++------ requirements.txt | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 63a95df..be424e5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update -y && \ RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ - requests-cache scipy spectrum_utils werkzeug + requests-cache scipy spectrum_utils werkzeug==2.0.0 RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc diff --git a/Makefile b/Makefile index 5d5f5bc..586efe7 100644 --- a/Makefile +++ b/Makefile @@ -23,19 +23,19 @@ clear-cache: #Docker Compose server-compose-interactive: - docker-compose build - docker-compose up + docker-compose --compatibility build + docker-compose --compatibility up server-compose: - docker-compose build - docker-compose up -d + docker-compose --compatibility build + docker-compose --compatibility up -d server-compose-production-interactive: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up server-compose-production: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up -d attach: diff --git a/requirements.txt b/requirements.txt index fdddf45..62ea414 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,5 @@ requests requests_cache scipy spectrum_utils -werkzeug -git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python +werkzeug==2.0.0 +git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python \ No newline at end of file From ac1fec85b7a1a11fde42bb4fa9d42d589362a179 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 15:12:21 -0800 Subject: [PATCH 08/80] updating building --- .github/workflows/build.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 471daab..01b4964 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,10 +1,14 @@ name: Docker Build Test on: - - push - - pull_request + push: + branches: + master + pull_request: + branches: + master schedule: - - cron: '0 0 * * 1' + - cron: '0 0 * * 1' jobs: build-test: From 7400d5ff4ea455bcaa71344619f8ff6dd49683d5 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 15:24:09 -0800 Subject: [PATCH 09/80] using mambda --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index be424e5..855af87 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,8 @@ WORKDIR /app RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ apt-get install -y git-core -RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \ +RUN conda install -c conda-forge mamba +RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ requests-cache scipy spectrum_utils werkzeug==2.0.0 From b32683b9ece85feb9b80e38fe7671e6164da77a4 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 21:33:19 -0800 Subject: [PATCH 10/80] pinning version of spectrum_utils --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 855af87..24dda63 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ RUN conda install -c conda-forge mamba RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ - requests-cache scipy spectrum_utils werkzeug==2.0.0 + requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0 RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc From 0e518027dbca0a9f57d3f0a27b29762e9e0c8a3b Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 15 Nov 2022 13:54:29 -0800 Subject: [PATCH 11/80] limiting dns --- docker-compose-production.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index dc110df..827f08a 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -5,9 +5,11 @@ services: - default - nginx-net environment: - VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + #VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + VIRTUAL_HOST: metabolomics-usi.gnps2.org VIRTUAL_PORT: 5000 - LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + #LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + LETSENCRYPT_HOST: metabolomics-usi.gnps2.org LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh deploy: From 92c02986b5ce3bddfb83618bacb12cbc094f6405 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 13 Dec 2022 23:14:48 -0800 Subject: [PATCH 12/80] fixing build --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01b4964..c7945e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,4 +26,4 @@ jobs: python-version: ${{ matrix.python-version }} - name: Build Docker run: | - cd basic && docker build . + docker build . From c0a15f3918d97fabfedcdfe27b2544541ac187e5 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 13 Dec 2022 23:38:08 -0800 Subject: [PATCH 13/80] adding a todo --- metabolomics_spectrum_resolver/parsing.py | 11 +++++------ .../templates/homepage.html | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 7e7f107..c0b572f 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -618,16 +618,15 @@ def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: f"data/ms2.php?A={accession}.zip" f"&F={urllib.parse.quote_plus(filename)}&S={index}" ) + + # TODO: Do some extra exception handling if we don't find the filename directly. We might need to his another API to get the full filename + # Given the just the basename + lookup_request = requests.get(request_url, timeout=timeout) lookup_request.raise_for_status() response_text = lookup_request.text - response_text = ( -+ response_text.replace("
", "")
-+            .replace("

", "") -+ .lstrip() -+ .rstrip() -+ ) + response_text = (response_text.replace("
", "").replace("

", "").lstrip().rstrip()) # Parsing the MW Response precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 2e827bd..a6cf1ad 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -136,7 +136,7 @@


From 0964a5725f4d21be4c89b8902d55b75e94b0e688 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 13 Dec 2022 23:46:49 -0800 Subject: [PATCH 14/80] updating affiliation --- metabolomics_spectrum_resolver/dashinterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py index 5069ef2..26abbd9 100644 --- a/metabolomics_spectrum_resolver/dashinterface.py +++ b/metabolomics_spectrum_resolver/dashinterface.py @@ -426,7 +426,7 @@ dbc.CardHeader(html.H5("Contributors")), dbc.CardBody( [ - "Mingxun Wang, PhD – UC San Diego", + "Mingxun Wang, PhD – UC Riverside", html.Br(), "Wout Bittremieux, PhD – UC San Diego", html.Br(), From 3bdeb981915ff5c796a87482f241076cfed4e0a7 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 22 Dec 2022 09:16:02 -0800 Subject: [PATCH 15/80] adding more links --- .../templates/homepage.html | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index a6cf1ad..b2ac2fc 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -103,6 +103,14 @@


+ + +
+ + +
+ + From 43a52bab81bc7a5c84a8193f20889c390d355990 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 22 Dec 2022 09:36:50 -0800 Subject: [PATCH 16/80] updating --- metabolomics_spectrum_resolver/parsing.py | 42 ++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index c0b572f..3bb18e7 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -45,7 +45,7 @@ # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB - r":(MASSIVEKB|GNPS|MASSBANK|MS2LDA|MOTIFDB)" + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB)" # msRun identifier r":(.*)" # index flag @@ -322,6 +322,11 @@ def _parse_gnps(usi: str) -> Tuple[sus.MsmsSpectrum, str]: else: return _parse_gnps_library(usi) +def _parse_gnps2(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + ms_run = match.group(2) + if ms_run.lower().startswith("task"): + return _parse_gnps2_task(usi) # Parse GNPS clustered spectra in Molecular Networking. def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: @@ -362,6 +367,41 @@ def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown GNPS task USI", 404) +# Parse GNPS2 task spectra +def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + gnps_task_match = gnps_task_pattern.match(match.group(2)) + if gnps_task_match is None: + raise UsiError("Incorrectly formatted GNPS2 task", 400) + task = gnps_task_match.group(1) + filename = gnps_task_match.group(2) + index_flag = match.group(3) + if index_flag.lower() != "scan": + raise UsiError("Currently supported GNPS2 TASK index flags: scan", 400) + scan = match.group(4) + + try: + request_url = ( + f"https://gnps2.org/spectrumpeaks?format=json&usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + source_link = ( + f"https://gnps2.org//status?task={task}" + ) + if "precursor" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor"].get("mz", 0)) + charge = int(spectrum_dict["precursor"].get("charge", 0)) + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + raise UsiError("Unknown GNPS task USI", 404) + # Parse GNPS library. def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) From dd4e205ebbcd848c9ee0fcfee314d9bbe7bb340c Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 22 Dec 2022 10:03:31 -0800 Subject: [PATCH 17/80] small comments --- metabolomics_spectrum_resolver/parsing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 3bb18e7..a00f302 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -112,6 +112,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_msv_pxd(usi) elif collection == "gnps": spectrum, source_link = _parse_gnps(usi) + elif collection == "gnps2": + spectrum, source_link = _parse_gnps2(usi) elif collection == "massbank": spectrum, source_link = _parse_massbank(usi) elif collection == "ms2lda": @@ -389,7 +391,7 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: spectrum_dict = lookup_request.json() mz, intensity = zip(*spectrum_dict["peaks"]) source_link = ( - f"https://gnps2.org//status?task={task}" + f"https://gnps2.org/status?task={task}" ) if "precursor" in spectrum_dict: precursor_mz = float(spectrum_dict["precursor"].get("mz", 0)) @@ -400,7 +402,7 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) return spectrum, source_link except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): - raise UsiError("Unknown GNPS task USI", 404) + raise UsiError("Unknown GNPS2 task USI", 404) # Parse GNPS library. def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: From 1641d1298994d4959e6c31f28c03149e03075481 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 22 Dec 2022 11:26:02 -0800 Subject: [PATCH 18/80] cleanup --- metabolomics_spectrum_resolver/templates/homepage.html | 8 -------- 1 file changed, 8 deletions(-) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index b2ac2fc..9851c66 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -164,14 +164,6 @@

- -
- - From 08217060df42d2471595c86390e28d580efac5af Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 7 Feb 2023 21:26:53 -0800 Subject: [PATCH 19/80] updating parsing --- metabolomics_spectrum_resolver/parsing.py | 30 ++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index a00f302..d25b14e 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -45,7 +45,7 @@ # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB - r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB)" + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS)" # msRun identifier r":(.*)" # index flag @@ -122,6 +122,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_motifdb(usi) elif collection.startswith("st"): spectrum, source_link = _parse_metabolomics_workbench(usi) + elif collection.startswith("tinymass"): + spectrum, source_link = _parse_tinymass(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -404,6 +406,32 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): raise UsiError("Unknown GNPS2 task USI", 404) +# Parse TINYMASS task spectra +def _parse_tinymass(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + + try: + request_url = ( + f"https://tinymass.gnps2.org/resolve?usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + source_link = ( + f"https://tinymass.gnps2.org/resolve?usi={usi}" + ) + if "precursor" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor"]) + charge = 0 + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + raise UsiError("Unknown Tiny Mass task USI", 404) + # Parse GNPS library. def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) From f5e19684417b38e4660194e5ad500530c4a4f506 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 28 Feb 2023 17:16:12 -0800 Subject: [PATCH 20/80] fixing parsing for gnps2 --- metabolomics_spectrum_resolver/parsing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index d25b14e..4c24ebc 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -395,9 +395,9 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: source_link = ( f"https://gnps2.org/status?task={task}" ) - if "precursor" in spectrum_dict: - precursor_mz = float(spectrum_dict["precursor"].get("mz", 0)) - charge = int(spectrum_dict["precursor"].get("charge", 0)) + if "precursor_mz" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor_mz"]) + charge = 0 else: precursor_mz, charge = 0, 0 From f64d22344a2d66ebabcdf28388d42d57d57c6aad Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 9 Aug 2023 14:39:19 -0700 Subject: [PATCH 21/80] adding analytics --- metabolomics_spectrum_resolver/dashinterface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py index 26abbd9..0f6582f 100644 --- a/metabolomics_spectrum_resolver/dashinterface.py +++ b/metabolomics_spectrum_resolver/dashinterface.py @@ -39,6 +39,8 @@ gtag('config', 'UA-8412213-8'); + + {%metas%} {%title%} From 6a4119e7df91e924818f2a4a39b709ef633c60ed Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 9 Aug 2023 14:41:49 -0700 Subject: [PATCH 22/80] udpating links --- metabolomics_spectrum_resolver/dashinterface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py index 0f6582f..9e5c869 100644 --- a/metabolomics_spectrum_resolver/dashinterface.py +++ b/metabolomics_spectrum_resolver/dashinterface.py @@ -61,10 +61,10 @@ children=[ dbc.NavbarBrand( html.Img( - src="https://gnps-cytoscape.ucsd.edu/static/img/GNPS_logo.png", + src="https://gnps2.org/static/img/logo.png", width="120px", ), - href="https://gnps.ucsd.edu", + href="https://gnps2.org", ), dbc.Nav( [ From 1c37556f5baa0bf167649a6f76daf33ea799c621 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 23 Aug 2023 00:36:22 -0700 Subject: [PATCH 23/80] increasing scaling for workers --- run_worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_worker.sh b/run_worker.sh index 2073a4f..60066b7 100755 --- a/run_worker.sh +++ b/run_worker.sh @@ -3,4 +3,4 @@ source activate usi export C_FORCE_ROOT="true" #TODO: Make sure we don't run this worker as root -celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=12,1 -Q worker --max-tasks-per-child 10 --loglevel INFO +celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=32,1 -Q worker --max-tasks-per-child 10 --loglevel INFO From 4d7d1cd22a3dbbfcf9fcf96111668f6166b875a5 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 23 Aug 2023 00:36:49 -0700 Subject: [PATCH 24/80] increasing scaling for workers --- run_worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_worker.sh b/run_worker.sh index 60066b7..7b9f227 100755 --- a/run_worker.sh +++ b/run_worker.sh @@ -3,4 +3,4 @@ source activate usi export C_FORCE_ROOT="true" #TODO: Make sure we don't run this worker as root -celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=32,1 -Q worker --max-tasks-per-child 10 --loglevel INFO +celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=16,1 -Q worker --max-tasks-per-child 10 --loglevel INFO From bed45eaaab563166e8051d0fcc06d46648aeda74 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 18 Sep 2023 16:09:39 -0700 Subject: [PATCH 25/80] adding another dependency --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 24dda63..4274b77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ MAINTAINER Mingxun Wang "mwang87@gmail.com" WORKDIR /app RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ - apt-get install -y git-core + apt-get install -y git-core libarchive-dev RUN conda install -c conda-forge mamba RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ From a30e82cb33e1be0059cc268457fcc61d16232abf Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 19 Sep 2023 11:27:11 -0700 Subject: [PATCH 26/80] updating restart policy --- docker-compose-production.yml | 2 -- docker-compose.yml | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 827f08a..0d01048 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -5,10 +5,8 @@ services: - default - nginx-net environment: - #VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org VIRTUAL_HOST: metabolomics-usi.gnps2.org VIRTUAL_PORT: 5000 - #LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org LETSENCRYPT_HOST: metabolomics-usi.gnps2.org LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh diff --git a/docker-compose.yml b/docker-compose.yml index a0432e7..c7e7611 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: - ./logs/:/app/logs:rw networks: - default - restart: on-failure + restart: always command: /app/run_dev_server.sh metabolomicsusi-worker: @@ -24,7 +24,7 @@ services: - ./tmp:/app/tmp:rw - ./logs:/app/logs:rw command: /app/run_worker.sh - restart: on-failure + restart: always depends_on: - metabolomicsusi-redis networks: @@ -36,7 +36,7 @@ services: image: redis networks: - default - restart: on-failure + restart: always networks: nginx-net: From 107482516515d9809f6190eb0771fb93832bc280 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 19 Sep 2023 11:54:52 -0700 Subject: [PATCH 27/80] bug fix --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 4274b77..f348ab4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update -y && \ RUN conda install -c conda-forge mamba RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ - joblib matplotlib numba numpy openssl qrcode rdkit requests \ + joblib matplotlib==3.6.3 numba numpy openssl qrcode rdkit requests \ requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0 RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' From b8850d5373da91b45ada7df8422bd0f3e59d099c Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 20 Oct 2023 14:59:49 -0700 Subject: [PATCH 28/80] using a cache layer for gnps library spectrum --- metabolomics_spectrum_resolver/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 4c24ebc..7f47f33 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -443,8 +443,8 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: index = match.group(4) try: request_url = ( - f"https://gnps.ucsd.edu/ProteoSAFe/" - f"SpectrumCommentServlet?SpectrumID={index}" + f"https://external.gnps2.org/" + f"gnpsspectrum?SpectrumID={index}" ) lookup_request = requests.get(request_url, timeout=timeout) lookup_request.raise_for_status() From cf15c16224300393c2b7485dd43acc31e76d1585 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 8 Apr 2024 19:59:15 -0700 Subject: [PATCH 29/80] adding robot --- metabolomics_spectrum_resolver/views.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metabolomics_spectrum_resolver/views.py b/metabolomics_spectrum_resolver/views.py index 17becf0..61a20eb 100644 --- a/metabolomics_spectrum_resolver/views.py +++ b/metabolomics_spectrum_resolver/views.py @@ -625,6 +625,10 @@ def generate_qr(): qr_bytes.seek(0) return flask.send_file(qr_bytes, "image/png") +@blueprint.route("/robot.txt") +def robot(): + # Disallow all + return "User-agent: *\nDisallow: /", 200 @blueprint.errorhandler(Exception) def render_error(error): From 3107702361f4ada577b92d7d8b7bfe0053b146e7 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Apr 2024 11:14:13 -0700 Subject: [PATCH 30/80] adding a template for the domain name resolution --- .env_template | 1 + docker-compose-production.yml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 .env_template diff --git a/.env_template b/.env_template new file mode 100644 index 0000000..23398c8 --- /dev/null +++ b/.env_template @@ -0,0 +1 @@ +HOSTNAME=de.metabolomics-usi.gnps2.org \ No newline at end of file diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 0d01048..2dbf163 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -5,9 +5,9 @@ services: - default - nginx-net environment: - VIRTUAL_HOST: metabolomics-usi.gnps2.org + VIRTUAL_HOST: ${HOSTNAME:-metabolomics-usi.gnps2.org} VIRTUAL_PORT: 5000 - LETSENCRYPT_HOST: metabolomics-usi.gnps2.org + LETSENCRYPT_HOST: ${HOSTNAME:-metabolomics-usi.gnps2.org} LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh deploy: From dfc87552e0c8a083fce2de63c9e4daa456d8b32f Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 30 May 2024 10:24:37 -0700 Subject: [PATCH 31/80] updating parsing --- metabolomics_spectrum_resolver/parsing.py | 15 +++++++++++---- .../templates/homepage.html | 10 +++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 7f47f33..eb94118 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -571,22 +571,28 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]: lookup_request.raise_for_status() lookup_json = lookup_request.json() for spectrum_file in lookup_json["row_data"]: + # Checking if its an actual file we can resolve or if MSV will go to PX directly if any( spectrum_file["file_descriptor"].lower().endswith(extension) for extension in ["mzml", "mzxml", "mgf"] - ): - request_url = ( + ) or spectrum_file["file_descriptor"].startswith("f.ProteomeCentral"): + file_descriptor = spectrum_file['file_descriptor'] + if file_descriptor.startswith("f."): + file_descriptor = file_descriptor[2:] + + peaks_request_url = ( f"https://massive.ucsd.edu/ProteoSAFe/" f"DownloadResultFile?" f"task=4f2ac74ea114401787a7e96e143bb4a1&" f"invoke=annotatedSpectrumImageText&block=0&file=FILE->" - f"{urllib.parse.quote(spectrum_file['file_descriptor'])}" + f"{urllib.parse.quote(file_descriptor)}" f"&scan={scan}&peptide=*..*&force=false&" f"format=JSON&uploadfile=True" ) + try: spectrum_request = requests.get( - request_url, timeout=timeout + peaks_request_url, timeout=timeout ) spectrum_request.raise_for_status() spectrum_dict = spectrum_request.json() @@ -643,6 +649,7 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]: return spectrum, source_link except requests.exceptions.HTTPError: + raise pass raise UsiError("Unsupported/unknown USI", 404) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 9851c66..a98b3c8 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -159,9 +159,17 @@


+ + +
+ From e1c19481d186467ce28de1b23a20c5897fc53249 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Jul 2024 20:50:35 -0700 Subject: [PATCH 32/80] limiting logging --- docker-compose-production.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 2dbf163..9bcbe7f 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -14,20 +14,35 @@ services: resources: limits: memory: 16000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" metabolomicsusi-worker: deploy: resources: limits: memory: 16000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" metabolomicsusi-redis: deploy: resources: limits: memory: 4000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" networks: nginx-net: external: - name: nginx-net \ No newline at end of file + name: nginx-net From 700b88acc48da0baeb5432bf63cf9d19ff7ca8fc Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Sat, 14 Sep 2024 12:53:02 -0700 Subject: [PATCH 33/80] support for metabolights --- metabolomics_spectrum_resolver/parsing.py | 55 +++++++++++++++++-- .../templates/homepage.html | 2 +- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index eb94118..50f4b60 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -27,7 +27,7 @@ # PXLnnnnnn # Unofficial: MASSIVEKB # https://github.com/HUPO-PSI/usi/blob/master/CollectionIdentifiers.md - r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|ST\d{6}|MassIVE)" + r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|MassIVE)" # msRun identifier r":(.*)" # index flag @@ -44,8 +44,8 @@ r"^mzspec" # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB - # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB - r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS)" + # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|)" # msRun identifier r":(.*)" # index flag @@ -102,7 +102,6 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: # changes, be sure to change this logic. if ( annotation is not None - or collection.startswith("msv") or collection.startswith("pxd") or collection.startswith("pxl") or collection.startswith("rpxd") @@ -110,10 +109,19 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: or collection == "massive" ): spectrum, source_link = _parse_msv_pxd(usi) + elif collection.startswith("msv"): + # Lets try to use GNPS2 for this first + try: + spectrum, source_link = _parse_gnps2(usi) + except: + spectrum, source_link = _parse_msv_pxd(usi) elif collection == "gnps": spectrum, source_link = _parse_gnps(usi) elif collection == "gnps2": spectrum, source_link = _parse_gnps2(usi) + elif collection.startswith("mtbls"): + # Since they don't have their own resolver, we'll go here to GNPS2 for now + spectrum, source_link = _parse_gnps2(usi) elif collection == "massbank": spectrum, source_link = _parse_massbank(usi) elif collection == "ms2lda": @@ -331,6 +339,9 @@ def _parse_gnps2(usi: str) -> Tuple[sus.MsmsSpectrum, str]: ms_run = match.group(2) if ms_run.lower().startswith("task"): return _parse_gnps2_task(usi) + else: + # We are likely dealing with a dataset on the GNPS2 side + return _parse_gnps2_dataset(usi) # Parse GNPS clustered spectra in Molecular Networking. def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: @@ -406,6 +417,42 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): raise UsiError("Unknown GNPS2 task USI", 404) +def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + # TODO + match = _match_usi(usi) + dataset_identifier = match.group(1) + index_flag = match.group(3) + scan = match.group(4) + try: + request_url = ( + f"https://gnps2.org/spectrumpeaks?format=json&usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + + if "MTBLS" in dataset_identifier: + source_link = ( + f"https://www.ebi.ac.uk/metabolights/editor/{dataset_identifier}/descriptors" + ) + elif "MSV" in dataset_identifier: + source_link = ( + f"https://massive.ucsd.edu/ProteoSAFe/" + f"QueryMSV?id={dataset_identifier}" + ) + + if "precursor_mz" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor_mz"]) + charge = 0 + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + raise UsiError("Unknown GNPS2 Dataset USI", 404) + # Parse TINYMASS task spectra def _parse_tinymass(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index a98b3c8..cede7d7 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -152,7 +152,7 @@


From 4374cf95065dd4c7c9a88c58f3a944b61da8ada7 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 16 Sep 2024 10:55:17 -0700 Subject: [PATCH 34/80] bug fix for parsing --- metabolomics_spectrum_resolver/parsing.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 50f4b60..bd91dd2 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -391,8 +391,10 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: task = gnps_task_match.group(1) filename = gnps_task_match.group(2) index_flag = match.group(3) - if index_flag.lower() != "scan": - raise UsiError("Currently supported GNPS2 TASK index flags: scan", 400) + + if not (index_flag.lower() == "scan" or index_flag.lower() == "nativeid"): + raise UsiError("Currently supported GNPS2 TASK index flags: scan and nativeId", 400) + scan = match.group(4) try: @@ -423,6 +425,10 @@ def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: dataset_identifier = match.group(1) index_flag = match.group(3) scan = match.group(4) + + if not (index_flag.lower() == "scan" or index_flag.lower() == "nativeid"): + raise UsiError("Currently supported GNPS2 Dataset index flags: scan and nativeId", 400) + try: request_url = ( f"https://gnps2.org/spectrumpeaks?format=json&usi={usi}" From 807d13bb1b97f1a2bae230ad2d8dd8feb7a970f4 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 16 Sep 2024 10:56:25 -0700 Subject: [PATCH 35/80] updating examples --- .../templates/homepage.html | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index cede7d7..4efe0e0 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -79,30 +79,6 @@

From 9cfe35ead93dec450882c67ad88d62ccd27c9609 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 17 Sep 2024 09:26:24 -0700 Subject: [PATCH 36/80] cleanup --- metabolomics_spectrum_resolver/parsing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index bd91dd2..c74535a 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -420,7 +420,6 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown GNPS2 task USI", 404) def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: - # TODO match = _match_usi(usi) dataset_identifier = match.group(1) index_flag = match.group(3) From a809c5ca6c912fd3cee6945267063919edb7ad90 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 26 Sep 2024 13:26:02 -0700 Subject: [PATCH 37/80] using valkey --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index c7e7611..079f6fe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,7 +33,7 @@ services: metabolomicsusi-redis: container_name: metabolomicsusi-redis - image: redis + image: valkey/valkey:alpine3.20 networks: - default restart: always From 7956e23f34f5d70b0210d87f61d41bff0bc95ee1 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 9 Oct 2024 08:08:19 -0700 Subject: [PATCH 38/80] multiple gnps2 servers --- metabolomics_spectrum_resolver/parsing.py | 55 ++++++++++++++--------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index c74535a..ed06f2c 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -397,28 +397,39 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: scan = match.group(4) - try: - request_url = ( - f"https://gnps2.org/spectrumpeaks?format=json&usi={usi}" - ) - lookup_request = requests.get(request_url, timeout=timeout) - lookup_request.raise_for_status() - spectrum_dict = lookup_request.json() - mz, intensity = zip(*spectrum_dict["peaks"]) - source_link = ( - f"https://gnps2.org/status?task={task}" - ) - if "precursor_mz" in spectrum_dict: - precursor_mz = float(spectrum_dict["precursor_mz"]) - charge = 0 - else: - precursor_mz, charge = 0, 0 - - spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) - return spectrum, source_link - except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): - raise UsiError("Unknown GNPS2 task USI", 404) - + # We will try in order these GNPS2 URLs to see if the task is actually there + gnps2_server_url_list = [ + "https://gnps2.org", + "https://beta.gnps2.org", + "https://dev.gnps2.org", + "https://de.gnps2.org", + ] + + for gnps2server_url in gnps2_server_url_list: + try: + request_url = ( + f"{gnps2server_url}/spectrumpeaks?format=json&usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + source_link = ( + f"{gnps2server_url}/status?task={task}" + ) + if "precursor_mz" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor_mz"]) + charge = 0 + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + pass + + raise UsiError("Unknown GNPS2 task USI", 404) + def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) dataset_identifier = match.group(1) From 3e771d381d11ea232044a5cc677b31b88234d696 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 9 Oct 2024 11:56:34 -0700 Subject: [PATCH 39/80] using redis --- Dockerfile | 7 ++++++- docker-compose.yml | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index f348ab4..d96a91d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,10 +6,15 @@ RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ apt-get install -y git-core libarchive-dev RUN conda install -c conda-forge mamba -RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ +RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery==5.3.6 \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib==3.6.3 numba numpy openssl qrcode rdkit requests \ requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0 + +# install redis with pypi +RUN /bin/bash -c 'source activate usi && pip install redis' + +# installing hash RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc diff --git a/docker-compose.yml b/docker-compose.yml index 079f6fe..76a343f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,7 +33,8 @@ services: metabolomicsusi-redis: container_name: metabolomicsusi-redis - image: valkey/valkey:alpine3.20 + #image: valkey/valkey:alpine3.20 + image: redis:alpine networks: - default restart: always From a3bf5042c0a7926c51cd5a188a2f767680f0bf6c Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Mon, 2 Dec 2024 11:50:53 -0800 Subject: [PATCH 40/80] Recreate. --- metabolomics_spectrum_resolver/parsing.py | 2 +- test/usi_test_data.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index ed06f2c..4ef6a19 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -553,7 +553,7 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: index = match.group(4) # Clean up the new MassBank accessions if necessary. massbank_accession = re.match( - r"MSBNK-[A-Z0-9_]{1,32}-([A-Z0-9_]{1,64})", index + r"MSBNK-[A-z0-9_]{1,32}-([A-Za-z0-9_]{1,64})", index ) if massbank_accession is not None: index = massbank_accession.group(1) diff --git a/test/usi_test_data.py b/test/usi_test_data.py index 670b4b2..08357aa 100644 --- a/test/usi_test_data.py +++ b/test/usi_test_data.py @@ -10,6 +10,8 @@ "mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00005436077", "mzspec:MASSBANK::accession:SM858102", "mzspec:MASSBANK::accession:MSBNK-AAFC-AC000646", + # New Massbank identifier with lowercase + "mzspec:MASSBANK::accession:MSBNK-Athens_Univ-AU259904", "mzspec:MS2LDA:TASK-190:accession:270684", "mzspec:MOTIFDB::accession:171163", "mzspec:MSV000082791:(-)-epigallocatechin:scan:2", From 6b278cfeb72416cd54e15d09adb77ffe0dab800e Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Tue, 3 Dec 2024 09:55:03 -0800 Subject: [PATCH 41/80] Update parsing.py --- metabolomics_spectrum_resolver/parsing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 4ef6a19..1a7d47e 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -553,7 +553,8 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: index = match.group(4) # Clean up the new MassBank accessions if necessary. massbank_accession = re.match( - r"MSBNK-[A-z0-9_]{1,32}-([A-Za-z0-9_]{1,64})", index + # See https://github.com/MassBank/MassBank-web/blob/main/Documentation/MassBankRecordFormat.md#211-accession + r"MSBNK-[A-Za-z0–9_]{1,32}-([A-Z0–9_]{1,64})", index ) if massbank_accession is not None: index = massbank_accession.group(1) From 154fcb644cb4a7defda7135042a4b24a5c9498ee Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Wed, 22 Jan 2025 20:10:50 -0800 Subject: [PATCH 42/80] Try that again. --- metabolomics_spectrum_resolver/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 1a7d47e..4ce4d2e 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -554,7 +554,7 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Clean up the new MassBank accessions if necessary. massbank_accession = re.match( # See https://github.com/MassBank/MassBank-web/blob/main/Documentation/MassBankRecordFormat.md#211-accession - r"MSBNK-[A-Za-z0–9_]{1,32}-([A-Z0–9_]{1,64})", index + r"MSBNK-[A-Za-z0-9_]{1,32}-([A-Z-9_]{1,64})", index ) if massbank_accession is not None: index = massbank_accession.group(1) From 4bd7be5760c7ef5bbdb960a3cbed2c5d1150fba5 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Wed, 22 Jan 2025 20:12:16 -0800 Subject: [PATCH 43/80] Try that again. --- metabolomics_spectrum_resolver/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 4ce4d2e..3b7d64a 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -554,7 +554,7 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Clean up the new MassBank accessions if necessary. massbank_accession = re.match( # See https://github.com/MassBank/MassBank-web/blob/main/Documentation/MassBankRecordFormat.md#211-accession - r"MSBNK-[A-Za-z0-9_]{1,32}-([A-Z-9_]{1,64})", index + r"MSBNK-[A-Za-z0-9_]{1,32}-([A-Z0-9_]{1,64})", index ) if massbank_accession is not None: index = massbank_accession.group(1) From 0860f6f120b6b43b3a25bfd0dcb5b52198280975 Mon Sep 17 00:00:00 2001 From: YasinEl Date: Thu, 30 Jan 2025 16:08:14 -0800 Subject: [PATCH 44/80] massbankEU/MONA --- metabolomics_spectrum_resolver/parsing.py | 84 +++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 3b7d64a..984fa03 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -17,6 +17,7 @@ MS2LDA_SERVER = "http://ms2lda.org/basicviz/" MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" MASSBANK_SERVER = "https://massbank.us/rest/spectra/" +MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank3-api/v1/records/" # USI specification: http://www.psidev.info/usi usi_pattern = re.compile( @@ -584,6 +585,89 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown MassBank USI", 404) +# Parse MONA entry. +def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + index_flag = match.group(3) + if index_flag.lower() != "accession": + raise UsiError( + "Currently supported MassBank index flags: accession", 400 + ) + + index = match.group(4) + + try: + lookup_request = requests.get( + f"{MASSBANK_SERVER}{index}", timeout=timeout + ) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + + mz, intensity = [], [] + for peak in spectrum_dict["spectrum"].split(): + peak_mz, peak_intensity = peak.split(":") + mz.append(float(peak_mz)) + intensity.append(float(peak_intensity)) + precursor_mz = 0 + for metadata in spectrum_dict["metaData"]: + if metadata["name"] == "precursor m/z": + precursor_mz = float(metadata["value"]) + break + source_link = ( + f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}" + ) + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) + + return spectrum, source_link + + except requests.exceptions.HTTPError: + raise UsiError("Unknown MassBank USI", 404) + +# Parse MassBank entry. +def _parse_massbankEurope(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + index_flag = match.group(3) + if index_flag.lower() != "accession": + raise UsiError( + "Currently supported MassBank index flags: accession", 400 + ) + + index = match.group(4) + + try: + # Try requesting from massbankeurope first + lookup_request = requests.get( + f"{MASSBANKEUROPE_SERVER}{index}", timeout=timeout + ) + + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + + # If request is successful we know it was massbankeurope and parse accordingly + peaks = spectrum_dict["peak"]["peak"]["values"] + + mz = [peak["mz"] for peak in peaks] + intensity = [peak["intensity"] for peak in peaks] + + precursor_mz = next( + (float(item["value"]) for item in spectrum_dict['mass_spectrometry']['focused_ion'] if item["subtag"] == "PRECURSOR_M/Z"), + 0 + ) + + source_link = ( + f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}" + ) + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) + return spectrum, source_link + + + #show what error + except requests.exceptions.HTTPError: + raise UsiError("Unknown MassBank USI", 404) + + # Parse MS2LDA from ms2lda.org. def _parse_ms2lda(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) From 7ea9269be3457b1c32539eae52a1313a4be162dd Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Thu, 30 Jan 2025 16:31:30 -0800 Subject: [PATCH 45/80] Commenting and refactoring. --- metabolomics_spectrum_resolver/parsing.py | 69 ++++++++++++++++++++++- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 984fa03..a591813 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -16,7 +16,7 @@ MS2LDA_SERVER = "http://ms2lda.org/basicviz/" MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" -MASSBANK_SERVER = "https://massbank.us/rest/spectra/" +MONA_SERVER = "https://massbank.us/rest/spectra/" MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank3-api/v1/records/" # USI specification: http://www.psidev.info/usi @@ -545,6 +545,23 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Parse MassBank entry. def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + """ Parse a MassBank or MoNA USI and return the corresponding spectrum/source url. + + Parameters + ---------- + usi : str + The USI to be parsed. + + Returns + ------- + Tuple[sus.MsmsSpectrum, str] + The parsed spectrum and the source link. + + TODO: + ------ + - Determine MoNA/MassBank + - Make call to appropriate endpoint + """ match = _match_usi(usi) index_flag = match.group(3) if index_flag.lower() != "accession": @@ -561,7 +578,7 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: index = massbank_accession.group(1) try: lookup_request = requests.get( - f"{MASSBANK_SERVER}{index}", timeout=timeout + f"{MONA_SERVER}{index}", timeout=timeout ) lookup_request.raise_for_status() spectrum_dict = lookup_request.json() @@ -587,6 +604,29 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Parse MONA entry. def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + """ Parse a MONA USI and return the corresponding spectrum. Performs a web request to + MONA_SERVER. + + Parameters + ---------- + usi : str + The USI to be parsed. + + Globals + ------- + MONA_SERVER : str + The base URL for the MONA server. + + Returns + ------- + Tuple[sus.MsmsSpectrum, str] + The parsed spectrum and the source link. + + Raises + ------ + UsiError + If the USI could not be parsed because it is incorrectly formatted. + """ match = _match_usi(usi) index_flag = match.group(3) if index_flag.lower() != "accession": @@ -598,7 +638,7 @@ def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: try: lookup_request = requests.get( - f"{MASSBANK_SERVER}{index}", timeout=timeout + f"{MONA_SERVER}{index}", timeout=timeout ) lookup_request.raise_for_status() spectrum_dict = lookup_request.json() @@ -626,6 +666,29 @@ def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Parse MassBank entry. def _parse_massbankEurope(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + """ Parse a MassBank[EU|JP] USI and return the corresponding spectrum. Performs a web request to + MassBank Server. + + Parameters + ---------- + usi : str + The USI to be parsed. + + Globals + ------- + MassBank Server : str + The base URL for the MONA server. + + Returns + ------- + Tuple[sus.MsmsSpectrum, str] + The parsed spectrum and the source link. + + Raises + ------ + UsiError + If the USI could not be parsed because it is incorrectly formatted. + """ match = _match_usi(usi) index_flag = match.group(3) if index_flag.lower() != "accession": From 752777cd395c1276642bdbb9012b7967fbae0fed Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Thu, 30 Jan 2025 16:51:31 -0800 Subject: [PATCH 46/80] Implement MassBank decision point. --- metabolomics_spectrum_resolver/parsing.py | 41 +++++++++-------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index a591813..4555fc8 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -547,6 +547,11 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: """ Parse a MassBank or MoNA USI and return the corresponding spectrum/source url. + MassBank USIs are of the form: MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64} + + Fall back to MoNA if MassBank EU fails to respond. Note that partial MassBank ids + (e.g., SM858102) will only resolve to MoNA. + Parameters ---------- usi : str @@ -572,34 +577,20 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Clean up the new MassBank accessions if necessary. massbank_accession = re.match( # See https://github.com/MassBank/MassBank-web/blob/main/Documentation/MassBankRecordFormat.md#211-accession - r"MSBNK-[A-Za-z0-9_]{1,32}-([A-Z0-9_]{1,64})", index + r"(MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64})", index ) if massbank_accession is not None: - index = massbank_accession.group(1) - try: - lookup_request = requests.get( - f"{MONA_SERVER}{index}", timeout=timeout - ) - lookup_request.raise_for_status() - spectrum_dict = lookup_request.json() - mz, intensity = [], [] - for peak in spectrum_dict["spectrum"].split(): - peak_mz, peak_intensity = peak.split(":") - mz.append(float(peak_mz)) - intensity.append(float(peak_intensity)) - precursor_mz = 0 - for metadata in spectrum_dict["metaData"]: - if metadata["name"] == "precursor m/z": - precursor_mz = float(metadata["value"]) - break - source_link = ( - f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}" - ) + # It's certiainly MassBank EU/JP + try: + index = massbank_accession.group(1) # The whole thing + return _parse_massbankEurope(usi) + + except UsiError: + pass - spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) - return spectrum, source_link - except requests.exceptions.HTTPError: - raise UsiError("Unknown MassBank USI", 404) + # Either MassBank EU Failed or it's a MoNA entry, fallback to MoNA. + # Let the exception propagate if it fails + return _parse_mona(usi) # Parse MONA entry. From d3f7ae6e61f6f64a405e17b44973c0d28341d475 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Thu, 30 Jan 2025 16:52:47 -0800 Subject: [PATCH 47/80] Strip whitespace. --- metabolomics_spectrum_resolver/parsing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 4555fc8..d875c8f 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -93,6 +93,9 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: Tuple[sus.MsmsSpectrum, str, str] A tuple of the `MsmsSpectrum`, its source link, and its SPLASH. """ + # Very basic cleanup + usi = str(usi).strip() + match = _match_usi(usi) try: collection = match.group(1).lower() @@ -548,7 +551,7 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: """ Parse a MassBank or MoNA USI and return the corresponding spectrum/source url. MassBank USIs are of the form: MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64} - + Fall back to MoNA if MassBank EU fails to respond. Note that partial MassBank ids (e.g., SM858102) will only resolve to MoNA. From d9f1bab1eb102b7035ccb2aeb2435a76eb5bb090 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Thu, 30 Jan 2025 16:54:53 -0800 Subject: [PATCH 48/80] Remove todos. --- metabolomics_spectrum_resolver/parsing.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index d875c8f..48e3311 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -564,11 +564,6 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: ------- Tuple[sus.MsmsSpectrum, str] The parsed spectrum and the source link. - - TODO: - ------ - - Determine MoNA/MassBank - - Make call to appropriate endpoint """ match = _match_usi(usi) index_flag = match.group(3) From f4b1e788f6e2e2d8d814b65740325cc7905debf1 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Thu, 30 Jan 2025 17:22:42 -0800 Subject: [PATCH 49/80] Update linkout for MoNA. --- metabolomics_spectrum_resolver/parsing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 48e3311..d56c014 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -580,7 +580,6 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: if massbank_accession is not None: # It's certiainly MassBank EU/JP try: - index = massbank_accession.group(1) # The whole thing return _parse_massbankEurope(usi) except UsiError: @@ -643,7 +642,7 @@ def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: precursor_mz = float(metadata["value"]) break source_link = ( - f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}" + f"https://massbank.us/spectra/display/{index}" ) spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) From 3d0695eb7f04d1c509f1996b102de925633efd37 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Thu, 30 Jan 2025 20:19:51 -0800 Subject: [PATCH 50/80] Update linkout. --- metabolomics_spectrum_resolver/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index d56c014..241490b 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -707,7 +707,7 @@ def _parse_massbankEurope(usi: str) -> Tuple[sus.MsmsSpectrum, str]: ) source_link = ( - f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}" + f"https://massbank.eu/MassBank/" f"RecordDisplay?id={index}" ) spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) From f25c3e260ce3953b2a0316f7bfc94c748ed54f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Meier?= Date: Mon, 10 Feb 2025 13:06:22 +0100 Subject: [PATCH 51/80] Update parsing.py Hi, we want to turn off the preliminary MassBank3-api/ endpopint. This endpoint was only used during development. We deployed our new software now as a replacement for the old software on the our dev server https://msbi.ipb-halle.de/. Its now using MassBank-api/ and MassBank/ as planed for the future. Please adjust the URL you want to use to fetch data from MassBank accordingly. --- metabolomics_spectrum_resolver/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 241490b..9bd667d 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -17,7 +17,7 @@ MS2LDA_SERVER = "http://ms2lda.org/basicviz/" MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" MONA_SERVER = "https://massbank.us/rest/spectra/" -MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank3-api/v1/records/" +MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/v1/records/" # USI specification: http://www.psidev.info/usi usi_pattern = re.compile( From ac90007390bd891ff268b8050b14db55b1aec470 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Wed, 5 Mar 2025 08:44:22 -0800 Subject: [PATCH 52/80] udpating parsing --- metabolomics_spectrum_resolver/parsing.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 4ef6a19..ca80c26 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -627,11 +627,20 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]: scan = match.group(4) try: lookup_url = ( - f"https://massive.ucsd.edu/ProteoSAFe/" + f"https://proteomics3.ucsd.edu/ProteoSAFe/" f"QuerySpectrum?id={urllib.parse.quote_plus(usi)}" ) lookup_request = requests.get(lookup_url, timeout=timeout) - lookup_request.raise_for_status() + try: + lookup_request.raise_for_status() + except: + lookup_url = ( + f"https://proteomics3.ucsd.edu/ProteoSAFe/" + f"QuerySpectrum?id={urllib.parse.quote_plus(usi)}" + ) + lookup_request = requests.get(lookup_url, timeout=timeout) + lookup_request.raise_for_status() + lookup_json = lookup_request.json() for spectrum_file in lookup_json["row_data"]: # Checking if its an actual file we can resolve or if MSV will go to PX directly From f69d8c266cf86dfdc630e942a1210ade39ceb936 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Mon, 30 Jun 2025 19:44:45 -0700 Subject: [PATCH 53/80] Update parsing.py --- metabolomics_spectrum_resolver/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 9ac9cdd..7dcdd74 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -405,7 +405,7 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: gnps2_server_url_list = [ "https://gnps2.org", "https://beta.gnps2.org", - "https://dev.gnps2.org", + "http://dev.gnps2.org:4000", "https://de.gnps2.org", ] From 67303b700ca7c81507d94c49c7a40779ab942aec Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 10 Jul 2025 21:50:44 -0700 Subject: [PATCH 54/80] adding analytics as a hidden worker --- Dockerfile | 3 ++ metabolomics_spectrum_resolver/tasks.py | 9 +++- .../tasks_analytics.py | 44 +++++++++++++++++++ run_worker.sh | 4 ++ 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 metabolomics_spectrum_resolver/tasks_analytics.py diff --git a/Dockerfile b/Dockerfile index d96a91d..cdad5e0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,9 @@ RUN /bin/bash -c 'source activate usi && pip install redis' # installing hash RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' +# installing analytics +RUN /bin/bash -c 'source activate usi && pip install umami-analytics' + RUN echo "source activate usi" > ~/.bashrc COPY . /app diff --git a/metabolomics_spectrum_resolver/tasks.py b/metabolomics_spectrum_resolver/tasks.py index 24817d3..02192e1 100644 --- a/metabolomics_spectrum_resolver/tasks.py +++ b/metabolomics_spectrum_resolver/tasks.py @@ -9,7 +9,7 @@ import spectrum_utils.spectrum as sus from metabolomics_spectrum_resolver import drawing, parsing - +from metabolomics_spectrum_resolver import tasks_analytics memory = joblib.Memory("tmp/joblibcache", verbose=0) cached_parse_usi = memory.cache(parsing.parse_usi) @@ -110,6 +110,12 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: A tuple of (i) the `MsmsSpectrum`, (ii) its source link, and (iii) its SPLASH. """ + + # We are going to do the analytics now + tasks_analytics.task_analytics_event( + "parse_usi_or_spectrum" + ) + # First attempt to schedule with Celery. try: return _task_parse_usi.apply_async(args=(usi,)).get() @@ -142,6 +148,7 @@ def _task_parse_usi_or_spectrum( A tuple of (i) the `MsmsSpectrum`, (ii) its source link, and (iii) its SPLASH. """ + # noinspection PyTypeChecker return cached_parse_usi_or_spectrum(usi, spectrum) diff --git a/metabolomics_spectrum_resolver/tasks_analytics.py b/metabolomics_spectrum_resolver/tasks_analytics.py new file mode 100644 index 0000000..a4b3d27 --- /dev/null +++ b/metabolomics_spectrum_resolver/tasks_analytics.py @@ -0,0 +1,44 @@ +import io +import sys +from typing import Any, Tuple + +import celery +import umami + +celery_instance = celery.Celery( + "tasks_analytics", + backend="redis://metabolomicsusi-redis", + broker="redis://metabolomicsusi-redis", +) + +umami.set_url_base("https://analytics-api.gnps2.org/") +umami.set_website_id('2e8b3719-51ec-4786-9b29-3e9198c31ea5') +umami.set_hostname('analytics-api.gnps2.org') + +celery_instance.conf.task_routes = { + "metabolomics_spectrum_resolver.tasks_analytics._task_analytics_event": { + "queue": "worker-analytics" + }, +} + +def task_analytics_event(event_type: str) -> str: + """ + Task to log an analytics event using umami. + + Args: + event_type (str): The type of event to log. + + Returns: + str: Confirmation message indicating the event was sent. + """ + + _task_analytics_event.apply_async( + args=([event_type]) + ) + + return f"Event '{event_type}' logged." + +@celery_instance.task(time_limit=10) +def _task_analytics_event(event_type) -> str: + umami.new_event(event_name=event_type) + diff --git a/run_worker.sh b/run_worker.sh index 7b9f227..ee9c33f 100755 --- a/run_worker.sh +++ b/run_worker.sh @@ -2,5 +2,9 @@ source activate usi export C_FORCE_ROOT="true" + +# Running an analytics worker +celery -A metabolomics_spectrum_resolver.tasks_analytics worker --concurrency=1 -Q worker-analytics --loglevel INFO --detach + #TODO: Make sure we don't run this worker as root celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=16,1 -Q worker --max-tasks-per-child 10 --loglevel INFO From 3c9c0b234340d4832bb89990507455baa7bba6ce Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Thu, 10 Jul 2025 21:51:30 -0700 Subject: [PATCH 55/80] bump version --- metabolomics_spectrum_resolver/templates/minimal.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/templates/minimal.html b/metabolomics_spectrum_resolver/templates/minimal.html index 0e1fd4e..e9bc625 100644 --- a/metabolomics_spectrum_resolver/templates/minimal.html +++ b/metabolomics_spectrum_resolver/templates/minimal.html @@ -50,7 +50,7 @@
From 7a9da0a0520cd290ca5b51238a8d5a18b07d84f8 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Mon, 28 Jul 2025 11:09:58 -0700 Subject: [PATCH 56/80] Update MassBankEU API URL --- metabolomics_spectrum_resolver/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 7dcdd74..44b8b98 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -17,7 +17,7 @@ MS2LDA_SERVER = "http://ms2lda.org/basicviz/" MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" MONA_SERVER = "https://massbank.us/rest/spectra/" -MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/v1/records/" +MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/records/" # USI specification: http://www.psidev.info/usi usi_pattern = re.compile( From 7cb6c5946785e8949f7bac65d79ca2595e81890c Mon Sep 17 00:00:00 2001 From: YasinEl Date: Fri, 5 Sep 2025 16:29:05 -0700 Subject: [PATCH 57/80] added norman support --- metabolomics_spectrum_resolver/parsing.py | 87 ++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 44b8b98..8803c3f 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -18,6 +18,7 @@ MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" MONA_SERVER = "https://massbank.us/rest/spectra/" MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/records/" +NORMAN_SERVER = "http://server.norman-data.eu:8770/getScan" # USI specification: http://www.psidev.info/usi usi_pattern = re.compile( @@ -45,8 +46,8 @@ r"^mzspec" # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB - # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST - r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|)" + # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST, NORMAN + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|NORMAN-[0-9a-fA-F-]+)" # msRun identifier r":(.*)" # index flag @@ -136,6 +137,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_metabolomics_workbench(usi) elif collection.startswith("tinymass"): spectrum, source_link = _parse_tinymass(usi) + elif collection.startswith("norman"): + spectrum, source_link = _parse_norman(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -961,3 +964,83 @@ def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]: modifications[i] = float(match.group()) previous_mod_len += found_len return peptide, peptide_clean, modifications + +from typing import Tuple +import urllib.parse +import requests + +# import spectrum_utils.spectrum as sus +# class UsiError(Exception): ... + + + +def _parse_norman(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + NORMAN_FILES_BASE = "https://files.dsfp.norman-data.eu/" + + match = _match_usi(usi) + _accession = match.group(1) # not used + file_path = match.group(2) # relative path, e.g. "webform/sample/.../file.mzML" + index_flag = match.group(3) + scan_no = match.group(4) + + if index_flag.lower() != "scan": + raise UsiError("Currently supported index flag: scan", 400) + + # Construct full URL from path + file_url = f"{NORMAN_FILES_BASE}{file_path.lstrip('/')}" + print(f"[DEBUG] Constructed NORMAN file URL: {file_url}") + + if not file_url.lower().endswith(".mzml"): + raise UsiError("NORMAN file URL must point to an .mzML file.", 400) + + # The service expects an URL-encoded file_path (not the full URL!) in the query parameters + encoded_path = urllib.parse.quote_plus(file_url, safe=":/") + params = { + "file_path": encoded_path, + "scan_number": str(scan_no), + } + print(f"[DEBUG] Request params: {params}") + + try: + r = requests.post(NORMAN_SERVER, params=params, headers={"accept": "*/*"}, data="") + print(f"[DEBUG] Requesting: {r.url}") # Shows full request URL with params + + r.raise_for_status() + + payload = r.json() + if not isinstance(payload, dict): + raise UsiError("Unexpected response format (not a JSON object).", 502) + + precursor_list = payload.get("precursormz", []) + try: + precursor_mz = float(precursor_list[0]) if precursor_list else 0.0 + except Exception: + precursor_mz = 0.0 + + charge = 0 # not provided by API + spec = payload.get("spectrum", []) + if not isinstance(spec, list) or not spec: + raise UsiError("No peaks in NORMAN scan response.", 502) + + try: + mz = [float(p["mz"]) for p in spec] + intensity = [float(p["intensity"]) for p in spec] + except Exception as e: + raise UsiError(f"Malformed peaks in NORMAN scan response: {e}", 502) + + spectrum = sus.MsmsSpectrum( + usi, + precursor_mz, + charge, + mz, + intensity, + ) + return spectrum, file_url # return the constructed full URL + + except requests.exceptions.HTTPError as e: + status = getattr(e.response, "status_code", 502) + raise UsiError(f"NORMAN scan lookup failed (HTTP {status}).", status) + except ValueError as e: + raise UsiError(f"NORMAN scan parsing error (invalid JSON): {e}", 502) + except Exception as e: + raise UsiError(f"NORMAN scan parsing error: {e}", 502) From 41cafd0a8008bf1582af3a329bc28235357de662 Mon Sep 17 00:00:00 2001 From: YasinEl Date: Fri, 5 Sep 2025 16:33:08 -0700 Subject: [PATCH 58/80] bug fix --- metabolomics_spectrum_resolver/parsing.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 8803c3f..1d4fd20 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -965,15 +965,6 @@ def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]: previous_mod_len += found_len return peptide, peptide_clean, modifications -from typing import Tuple -import urllib.parse -import requests - -# import spectrum_utils.spectrum as sus -# class UsiError(Exception): ... - - - def _parse_norman(usi: str) -> Tuple[sus.MsmsSpectrum, str]: NORMAN_FILES_BASE = "https://files.dsfp.norman-data.eu/" From ff07df031ae5b57b20c683b79bab2a807418f0cd Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 12:17:00 -0700 Subject: [PATCH 59/80] updating --- .gitmodules | 3 ++ .../mass-spec-package | 1 + metabolomics_spectrum_resolver/parsing.py | 32 ++++++++++++++++++- test/test_unit.py | 4 +++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 .gitmodules create mode 160000 metabolomics_spectrum_resolver/mass-spec-package diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e8a8b6e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "metabolomics_spectrum_resolver/mass-spec-package"] + path = metabolomics_spectrum_resolver/mass-spec-package + url = https://github.com/AkJay1722/mass-spec-package.git diff --git a/metabolomics_spectrum_resolver/mass-spec-package b/metabolomics_spectrum_resolver/mass-spec-package new file mode 160000 index 0000000..ac5f476 --- /dev/null +++ b/metabolomics_spectrum_resolver/mass-spec-package @@ -0,0 +1 @@ +Subproject commit ac5f4766e6debaa206e210cf9f62ea29e4d89c05 diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 44b8b98..2a85e4c 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -12,6 +12,9 @@ from metabolomics_spectrum_resolver.error import UsiError +from metabolomics_spectrum_resolver.zenodo_mzml_repo import mzml_repo + + timeout = 45 # seconds MS2LDA_SERVER = "http://ms2lda.org/basicviz/" @@ -46,7 +49,7 @@ # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST - r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|)" + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|ZENODO-\d+|)" # msRun identifier r":(.*)" # index flag @@ -136,6 +139,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_metabolomics_workbench(usi) elif collection.startswith("tinymass"): spectrum, source_link = _parse_tinymass(usi) + elif collection.startswith("zenodo"): + spectrum, source_link = _parse_zenodo(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -473,6 +478,31 @@ def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): raise UsiError("Unknown GNPS2 Dataset USI", 404) +# parsing from Zenodo +def _parse_zenodo(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + zenodo_id = match.group(1).split("-")[-1] + filename = match.group(2) + index_flag = match.group(3) + if index_flag.lower() == "scan": + scan = match.group(4) + + zenodo_obj = mzml_repo(zenodo_id) + zenodo_obj.partial_indexing = False + scan_obj = zenodo_obj.get_scan(filename, int(scan)) + + # get peaks + intensity_list = scan_obj["intensities"] + mz_list = scan_obj["mz"] + charge = scan_obj["charge"] + precursor_mz = 0 + + source_link = f"https://zenodo.org/record/{zenodo_id}" + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz_list, intensity_list) + + return spectrum, source_link + # Parse TINYMASS task spectra def _parse_tinymass(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) diff --git a/test/test_unit.py b/test/test_unit.py index 98782bf..1d303b9 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -219,6 +219,10 @@ def test_parse_motifdb(): assert exc_info.value.error_code == 404 +# def test_zenodo(): +# usi = "" +# spectrum, _, splash_key = parsing.parse_usi(usi) + def test_parse_timeout(): with unittest.mock.patch( "metabolomics_spectrum_resolver.parsing.requests.get", From c274e9c6c82d1126b8455a07ef527dafc1ee39a1 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 12:20:38 -0700 Subject: [PATCH 60/80] updating --- test/test_unit.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/test_unit.py b/test/test_unit.py index 1d303b9..91b402b 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -218,11 +218,6 @@ def test_parse_motifdb(): parsing.parse_usi(usi.replace(":171163", ":this_index_does_not_exist")) assert exc_info.value.error_code == 404 - -# def test_zenodo(): -# usi = "" -# spectrum, _, splash_key = parsing.parse_usi(usi) - def test_parse_timeout(): with unittest.mock.patch( "metabolomics_spectrum_resolver.parsing.requests.get", From a7c794650218c75348d74cec50b0124aa487b590 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 15:17:29 -0700 Subject: [PATCH 61/80] bug fix for charge --- metabolomics_spectrum_resolver/mass-spec-package | 2 +- metabolomics_spectrum_resolver/parsing.py | 12 +++++++++++- .../templates/homepage.html | 6 ++++++ .../templates/minimal.html | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/metabolomics_spectrum_resolver/mass-spec-package b/metabolomics_spectrum_resolver/mass-spec-package index ac5f476..c1cc52a 160000 --- a/metabolomics_spectrum_resolver/mass-spec-package +++ b/metabolomics_spectrum_resolver/mass-spec-package @@ -1 +1 @@ -Subproject commit ac5f4766e6debaa206e210cf9f62ea29e4d89c05 +Subproject commit c1cc52a73645122d2eaac3ba52b1eeb346c36dc6 diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 2a85e4c..0ae35ca 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -495,7 +495,17 @@ def _parse_zenodo(usi: str) -> Tuple[sus.MsmsSpectrum, str]: intensity_list = scan_obj["intensities"] mz_list = scan_obj["mz"] charge = scan_obj["charge"] - precursor_mz = 0 + precursor_mz = scan_obj["precursor_mz"] + + try: + charge = int(charge) + except: + charge = 0 + + try: + precursor_mz = float(precursor_mz) + except: + precursor_mz = 0 source_link = f"https://zenodo.org/record/{zenodo_id}" diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 4efe0e0..6abe738 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -141,6 +141,12 @@

+ +
diff --git a/metabolomics_spectrum_resolver/templates/minimal.html b/metabolomics_spectrum_resolver/templates/minimal.html index e9bc625..a6be389 100644 --- a/metabolomics_spectrum_resolver/templates/minimal.html +++ b/metabolomics_spectrum_resolver/templates/minimal.html @@ -50,7 +50,7 @@
From 1279fe03b0295dd21dd2bb2f75ac121d3c383a26 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 15:21:19 -0700 Subject: [PATCH 62/80] missing file --- metabolomics_spectrum_resolver/zenodo_mzml_repo.py | 1 + 1 file changed, 1 insertion(+) create mode 120000 metabolomics_spectrum_resolver/zenodo_mzml_repo.py diff --git a/metabolomics_spectrum_resolver/zenodo_mzml_repo.py b/metabolomics_spectrum_resolver/zenodo_mzml_repo.py new file mode 120000 index 0000000..e34961a --- /dev/null +++ b/metabolomics_spectrum_resolver/zenodo_mzml_repo.py @@ -0,0 +1 @@ +mass-spec-package/zenodo_mzml_repo.py \ No newline at end of file From 755440d78c456a61c73bdab53282d6912fee8d3a Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 15:22:14 -0700 Subject: [PATCH 63/80] formatting --- metabolomics_spectrum_resolver/templates/homepage.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 6abe738..6a2bda5 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -141,6 +141,8 @@

+
+
From 0cb0889430505ab6cdf60f00abeaf941a6c954c9 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 15:25:53 -0700 Subject: [PATCH 64/80] extending timing --- metabolomics_spectrum_resolver/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/tasks.py b/metabolomics_spectrum_resolver/tasks.py index 02192e1..297f05a 100644 --- a/metabolomics_spectrum_resolver/tasks.py +++ b/metabolomics_spectrum_resolver/tasks.py @@ -126,7 +126,7 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: return parsing.parse_usi(usi) -@celery_instance.task(time_limit=30, base=celery_once.QueueOnce) +@celery_instance.task(time_limit=45, base=celery_once.QueueOnce) def _task_parse_usi_or_spectrum( usi: str, spectrum: dict ) -> Tuple[sus.MsmsSpectrum, str, str]: From 9e05f4de68e88c21f0df5cdbcfe6941aaad72857 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 9 Sep 2025 20:58:49 -0700 Subject: [PATCH 65/80] updating examples --- .../templates/homepage.html | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 6a2bda5..3adeaf9 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -160,16 +160,8 @@


- -
- - From adb1715f47b8c01d9d3e90d5f7ef82444c281475 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 15 Sep 2025 10:18:03 -0700 Subject: [PATCH 66/80] adding health testing --- docker-compose-production.yml | 20 ++++++++++++++++++++ docker-compose.yml | 29 +++++++++++++++++++++++++++++ logsapi/.gitkeep | 0 3 files changed, 49 insertions(+) create mode 100644 logsapi/.gitkeep diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 9bcbe7f..49d40b4 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -20,6 +20,26 @@ services: max-size: "10m" max-file: "3" + metabolomicsusi-api1: + networks: + - default + - nginx-net + environment: + VIRTUAL_HOST: ${HOSTNAME:-api.metabolomics-usi.gnps2.org} + VIRTUAL_PORT: 5000 + LETSENCRYPT_HOST: ${HOSTNAME:-api.metabolomics-usi.gnps2.org} + LETSENCRYPT_EMAIL: mwang87@gmail.com + command: /app/run_server.sh + deploy: + resources: + limits: + memory: 16000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + metabolomicsusi-worker: deploy: resources: diff --git a/docker-compose.yml b/docker-compose.yml index 76a343f..845ea49 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,35 @@ services: - default restart: always command: /app/run_dev_server.sh + labels: + autoheal: true + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5000/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + metabolomicsusi-api1: + build: + context: . + dockerfile: Dockerfile + container_name: metabolomicsusi-api1 + volumes: + - ./tmp:/app/tmp:rw + - ./logsapi/:/app/logs:rw + networks: + - default + restart: always + command: /app/run_dev_server.sh + labels: + autoheal: true + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5000/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s metabolomicsusi-worker: build: diff --git a/logsapi/.gitkeep b/logsapi/.gitkeep new file mode 100644 index 0000000..e69de29 From c2dd4fe07094915fb0ca1cadfcaf2efaa12d6287 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 15 Sep 2025 10:34:04 -0700 Subject: [PATCH 67/80] updating --- Dockerfile | 12 +++++++++--- docker-compose.yml | 6 ++++-- metabolomics_spectrum_resolver/views.py | 1 - 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index cdad5e0..f1eed3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,17 @@ -FROM continuumio/miniconda3:4.10.3 +FROM ubuntu:22.04 MAINTAINER Mingxun Wang "mwang87@gmail.com" WORKDIR /app RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ - apt-get install -y git-core libarchive-dev -RUN conda install -c conda-forge mamba + apt-get install -y git-core libarchive-dev build-essential wget vim curl + +# Install Mamba +ENV CONDA_DIR /opt/conda +RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O ~/miniforge.sh && /bin/bash ~/miniforge.sh -b -p /opt/conda +ENV PATH=$CONDA_DIR/bin:$PATH +RUN echo "export PATH=$CONDA_DIR:$PATH" >> ~/.bashrc + RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery==5.3.6 \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib==3.6.3 numba numpy openssl qrcode rdkit requests \ diff --git a/docker-compose.yml b/docker-compose.yml index 845ea49..38b67f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,7 @@ services: labels: autoheal: true healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5000/"] + test: ["CMD", "curl", "-f", "http://localhost:5000/heartbeat"] interval: 30s timeout: 10s retries: 3 @@ -28,6 +28,8 @@ services: context: . dockerfile: Dockerfile container_name: metabolomicsusi-api1 + ports: + - "5088:5000" volumes: - ./tmp:/app/tmp:rw - ./logsapi/:/app/logs:rw @@ -38,7 +40,7 @@ services: labels: autoheal: true healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5000/"] + test: ["CMD", "curl", "-f", "http://localhost:5000/heartbeat"] interval: 30s timeout: 10s retries: 3 diff --git a/metabolomics_spectrum_resolver/views.py b/metabolomics_spectrum_resolver/views.py index 61a20eb..7be9950 100644 --- a/metabolomics_spectrum_resolver/views.py +++ b/metabolomics_spectrum_resolver/views.py @@ -39,7 +39,6 @@ def render_homepage(): return flask.render_template("homepage.html") - @blueprint.route("/contributors", methods=["GET"]) def render_contributors(): return flask.render_template("contributors.html") From 532122b993e4a7fc7d468f2e3b11259772164f24 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Sep 2025 10:58:16 -0700 Subject: [PATCH 68/80] stopping load test --- .github/workflows/loadtest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/loadtest.yml b/.github/workflows/loadtest.yml index 921ec01..74c04fb 100644 --- a/.github/workflows/loadtest.yml +++ b/.github/workflows/loadtest.yml @@ -38,7 +38,7 @@ jobs: run: | export PATH="$HOME/miniconda/bin:$PATH" source ~/.bashrc - locust -f ./test/locustfile.py --headless -u 4 -r 10 \ - -H https://metabolomics-usi.ucsd.edu/ -t 120s + # locust -f ./test/locustfile.py --headless -u 4 -r 10 \ + # -H https://metabolomics-usi.ucsd.edu/ -t 120s From 15269ad200708617d8dcf1a96eb656ee9c90fe40 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Sep 2025 11:04:04 -0700 Subject: [PATCH 69/80] updating workers count --- run_worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_worker.sh b/run_worker.sh index ee9c33f..23601fb 100755 --- a/run_worker.sh +++ b/run_worker.sh @@ -7,4 +7,4 @@ export C_FORCE_ROOT="true" celery -A metabolomics_spectrum_resolver.tasks_analytics worker --concurrency=1 -Q worker-analytics --loglevel INFO --detach #TODO: Make sure we don't run this worker as root -celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=16,1 -Q worker --max-tasks-per-child 10 --loglevel INFO +celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=32,1 -Q worker --max-tasks-per-child 10 --loglevel INFO From 535204f6842f5268234423b77c493e2e68e2cea0 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Sep 2025 11:04:22 -0700 Subject: [PATCH 70/80] adding some debugging --- metabolomics_spectrum_resolver/tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metabolomics_spectrum_resolver/tasks.py b/metabolomics_spectrum_resolver/tasks.py index 297f05a..4574f70 100644 --- a/metabolomics_spectrum_resolver/tasks.py +++ b/metabolomics_spectrum_resolver/tasks.py @@ -116,6 +116,10 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: "parse_usi_or_spectrum" ) + # Debugging logging + import sys + sys.stderr.write(f"Parsing USI: {usi}\n", file=sys.stderr, flush=True) + # First attempt to schedule with Celery. try: return _task_parse_usi.apply_async(args=(usi,)).get() From 39a6d900188e6b29b6c72776f41b94d2a45a75bc Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Sep 2025 11:06:33 -0700 Subject: [PATCH 71/80] bug fix --- metabolomics_spectrum_resolver/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/tasks.py b/metabolomics_spectrum_resolver/tasks.py index 4574f70..b223b3e 100644 --- a/metabolomics_spectrum_resolver/tasks.py +++ b/metabolomics_spectrum_resolver/tasks.py @@ -118,7 +118,7 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: # Debugging logging import sys - sys.stderr.write(f"Parsing USI: {usi}\n", file=sys.stderr, flush=True) + sys.stderr.write("Parsing {}\n".format(usi), file=sys.stderr, flush=True) # First attempt to schedule with Celery. try: From 40ebe237e7540cb14755eb68dcbb075f071d267b Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Sep 2025 11:07:26 -0700 Subject: [PATCH 72/80] bug fix --- metabolomics_spectrum_resolver/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/tasks.py b/metabolomics_spectrum_resolver/tasks.py index b223b3e..77418c0 100644 --- a/metabolomics_spectrum_resolver/tasks.py +++ b/metabolomics_spectrum_resolver/tasks.py @@ -118,7 +118,7 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: # Debugging logging import sys - sys.stderr.write("Parsing {}\n".format(usi), file=sys.stderr, flush=True) + print("Parsing {}\n".format(usi), file=sys.stderr, flush=True) # First attempt to schedule with Celery. try: From 9399e97b43dbb0fe7843b8a23955f411330ddb93 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 16 Sep 2025 11:10:02 -0700 Subject: [PATCH 73/80] adding more redis --- docker-compose-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 49d40b4..8e7399b 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -55,7 +55,7 @@ services: deploy: resources: limits: - memory: 4000M + memory: 8000M logging: driver: "json-file" options: From 4e5d78cbdc794be936822326f1b51eb5be7c7441 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 22 Dec 2025 16:21:17 -0800 Subject: [PATCH 74/80] adding more server for the parsing --- metabolomics_spectrum_resolver/parsing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 9f389de..5c748f3 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -413,8 +413,11 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: gnps2_server_url_list = [ "https://gnps2.org", "https://beta.gnps2.org", - "http://dev.gnps2.org:4000", + "http://dev2.gnps2.org", "https://de.gnps2.org", + "https://br.gnps2.org", + "https://kr.gnps2.org", + "https://gnps2.jgi.doe.gov", ] for gnps2server_url in gnps2_server_url_list: From d37d5f5eee1bf7aee46b9b4cd34483677bbca65c Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Sun, 18 Jan 2026 15:09:56 -0800 Subject: [PATCH 75/80] Update MassBank API Endpoint. --- metabolomics_spectrum_resolver/parsing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 5c748f3..9d4fd5c 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -14,13 +14,17 @@ from metabolomics_spectrum_resolver.zenodo_mzml_repo import mzml_repo +import logging + +# Init logging +logging.basicConfig(level=logging.INFO) timeout = 45 # seconds MS2LDA_SERVER = "http://ms2lda.org/basicviz/" MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" MONA_SERVER = "https://massbank.us/rest/spectra/" -MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/records/" +MASSBANKEUROPE_SERVER = "https://massbank.eu/MassBank-api/records/" NORMAN_SERVER = "http://server.norman-data.eu:8770/getScan" # USI specification: http://www.psidev.info/usi From dfcf4f4325e94d6722ec163cda6aed2d4a83d966 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Sun, 18 Jan 2026 15:11:01 -0800 Subject: [PATCH 76/80] If EU and MONA fail, raise MONA error from EU error. Distinct UsiError for MONA. --- metabolomics_spectrum_resolver/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 9d4fd5c..b5761fa 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -633,7 +633,7 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: return _parse_massbankEurope(usi) except UsiError: - pass + return _parse_mona(usi) # Either MassBank EU Failed or it's a MoNA entry, fallback to MoNA. # Let the exception propagate if it fails @@ -700,7 +700,7 @@ def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: return spectrum, source_link except requests.exceptions.HTTPError: - raise UsiError("Unknown MassBank USI", 404) + raise UsiError("Unknown MONA USI", 404) # Parse MassBank entry. def _parse_massbankEurope(usi: str) -> Tuple[sus.MsmsSpectrum, str]: From 76c35f0c69321211653325f8830a7a13cc4e0a2c Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Sun, 18 Jan 2026 15:01:52 -0800 Subject: [PATCH 77/80] Checkout submodiles in unit tests. --- .github/workflows/unittest.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 7a3c890..6e1dfa6 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -14,6 +14,8 @@ jobs: # TODO: We probably should switch to using the Docker version. steps: - uses: actions/checkout@v2 + with: + submodules: 'true' - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: From 5123d8190a7347b6f33877523caa3f93efaa01f4 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Sun, 18 Jan 2026 15:14:44 -0800 Subject: [PATCH 78/80] Update dependencies in unittest workflow --- .github/workflows/unittest.yml | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 6e1dfa6..91af374 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -34,7 +34,7 @@ jobs: apt-get update -y && apt-get install -y git-core source activate usi pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" - pip install celery celery-once joblib + pip install celery celery-once joblib umami-analytics echo "source activate usi" > ~/.bashrc - name: Run unit and integration tests run: | diff --git a/requirements.txt b/requirements.txt index 62ea414..849d8a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ redis requests requests_cache scipy +umami-analytics spectrum_utils werkzeug==2.0.0 git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python \ No newline at end of file From 9487fafe1f244e5b01a3d53e22429bdfde10bb64 Mon Sep 17 00:00:00 2001 From: Michael Strobel Date: Sun, 18 Jan 2026 16:20:31 -0800 Subject: [PATCH 79/80] Quote plus for GNPS2 tasks. --- metabolomics_spectrum_resolver/parsing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index b5761fa..9d11fe1 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -284,7 +284,6 @@ def _match_usi(usi: str) -> re.Match: raise UsiError(f"Incorrectly formatted USI: {usi}", 400) return match - def _convert_legacy_usi(usi: str) -> str: """ Convert a legacy format metabolomics USI to the proper metabolomics USI @@ -413,6 +412,9 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: scan = match.group(4) + # Reconstruct the USI for URL usage + request_usi = f"mzspec:GNPS2:TASK-{task}-{urllib.parse.quote_plus(filename)}:scan:{scan}" + # We will try in order these GNPS2 URLs to see if the task is actually there gnps2_server_url_list = [ "https://gnps2.org", @@ -427,7 +429,7 @@ def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: for gnps2server_url in gnps2_server_url_list: try: request_url = ( - f"{gnps2server_url}/spectrumpeaks?format=json&usi={usi}" + f"{gnps2server_url}/spectrumpeaks?format=json&usi={request_usi}" ) lookup_request = requests.get(request_url, timeout=timeout) lookup_request.raise_for_status() From 0f6e3c3441d757590c0961496b4322ab8b8a640b Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 19 Jan 2026 16:07:05 -0800 Subject: [PATCH 80/80] updating --- .dockerignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 8c3d5a0..79554af 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,5 @@ logs +logsapi output temp -tmp \ No newline at end of file +tmp