From 303d9e72f5aaef8036ec3cbfaf843bf4578cd100 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 13 May 2022 17:35:40 -0700 Subject: [PATCH 01/14] adding workbench test --- test/usi_test_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/usi_test_data.py b/test/usi_test_data.py index 53918b8..f3cfbaf 100644 --- a/test/usi_test_data.py +++ b/test/usi_test_data.py @@ -28,6 +28,8 @@ "mzspec:MassIVE:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", # MassIVE Task USIs disguised as GNPS Task USIs "mzspec:GNPS:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", + # Metabolomics Workbench USIs + "mzspec:ST000003:StemCell+Data+and+Raw+Files/iPSC-T1R1:scan:3", # Legacy cases. "mzspec:GNPSTASK-c95481f0c53d42e78a61bf899e9f9adb:spectra/specs_ms.mgf:scan:1943", "mzspec:GNPSTASK-64b22841ab3548f987b3cfc18696a581:spectra/specs_ms.mgf:scan:1469", From dccf8b89d8c1a97fe977250b593467f537ffbe0d Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 25 Jul 2022 15:32:50 -0700 Subject: [PATCH 02/14] adding more parsing --- metabolomics_spectrum_resolver/parsing.py | 53 ++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index dc1b0e6..0e55daf 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -4,6 +4,8 @@ from typing import Tuple import requests +import pandas as pd +from io import StringIO import urllib.parse import spectrum_utils.spectrum as sus import splash @@ -25,7 +27,7 @@ # PXLnnnnnn # Unofficial: MASSIVEKB # https://github.com/HUPO-PSI/usi/blob/master/CollectionIdentifiers.md - r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|MassIVE)" + r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|ST\d{6}|MassIVE)" # msRun identifier r":(.*)" # index flag @@ -116,6 +118,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_ms2lda(usi) elif collection == "motifdb": spectrum, source_link = _parse_motifdb(usi) + elif collection.startswith("st"): + spectrum, source_link = _parse_metabolomics_workbench(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -590,6 +594,53 @@ def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown MOTIFDB USI", 404) +# Parse GNPS library. +def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + accession = match.group(1) + filename = match.group(2) + index_flag = match.group(3) + index = match.group(4) + + if index_flag.lower() != "scan": + raise UsiError( + "Currently supported MW index flags: scan", 400 + ) + try: + request_url = ( + f"https://www.metabolomicsworkbench.org/" + f"data/ms2.php?A={accession}.zip" + f"&F={urllib.parse.quote_plus(filename)}&S={index}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + + response_text = lookup_request.text + response_text = response_text.replace("
", "").replace("

", "").lstrip().rstrip() + + # Parsing the MW Response + precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) + charge = int(response_text.split("\n")[2].split(":")[-1].replace("\"", "")) + peaks_df = pd.read_csv(StringIO(response_text), sep=r" +", skiprows=4) + mz = list(peaks_df["m/z"]) + intensity = list(peaks_df["intensity"]) + + source_link = ( + f"https://www.metabolomicsworkbench.org/" + f"data/DRCCMetadata.php?Mode=Study&StudyID={accession}&StudyType=MS&ResultType=1" + ) + + spectrum = sus.MsmsSpectrum( + usi, + float(precursor_mz), + int(charge), + mz, + intensity, + ) + return spectrum, source_link + except requests.exceptions.HTTPError: + raise UsiError("Unknown MW USI", 404) + def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]: # Parse out gapped sequence (e.g. X+129.04259), faking it # with Glycine as the base residue and adding more mods to From 6f5762e394064c0e8ebf7a1623cffce84f7166b6 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 25 Jul 2022 15:44:02 -0700 Subject: [PATCH 03/14] formatting --- metabolomics_spectrum_resolver/parsing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 0e55daf..d916915 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -616,7 +616,12 @@ def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: lookup_request.raise_for_status() response_text = lookup_request.text - response_text = response_text.replace("
", "").replace("

", "").lstrip().rstrip() + response_text = ( ++ response_text.replace("
", "")
++            .replace("

", "") ++ .lstrip() ++ .rstrip() ++ ) # Parsing the MW Response precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) From 3e2e0bd98dd27b25b3702ee277f22e120073a7c1 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Sun, 23 Oct 2022 11:53:33 -0700 Subject: [PATCH 04/14] fixing the port --- docker-compose-production.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 5423467..dc110df 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -6,7 +6,7 @@ services: - nginx-net environment: VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org - VIRTUAL_PORT: 5087 + VIRTUAL_PORT: 5000 LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh From bf8d8a4298448352a81b70e5884b8691954f38a2 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 11 Nov 2022 14:07:28 -0800 Subject: [PATCH 05/14] adding automated builds --- .github/workflows/build.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..471daab --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,25 @@ +name: Docker Build Test + +on: + - push + - pull_request + schedule: + - cron: '0 0 * * 1' + +jobs: + build-test: + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.8] +# TODO: We probably should switch to using the Docker version. + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Build Docker + run: | + cd basic && docker build . From f134587f17b1020129651e141b165048396d677c Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Fri, 11 Nov 2022 14:08:34 -0800 Subject: [PATCH 06/14] upgrading base --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fa855a9..63a95df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM continuumio/miniconda3:4.8.2 +FROM continuumio/miniconda3:4.10.3 MAINTAINER Mingxun Wang "mwang87@gmail.com" WORKDIR /app From d8cbd087657b646d0f465ea3233841e0dd3ae7b3 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 15:11:18 -0800 Subject: [PATCH 07/14] updating versions of packages --- Dockerfile | 2 +- Makefile | 12 ++++++------ requirements.txt | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 63a95df..be424e5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update -y && \ RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ - requests-cache scipy spectrum_utils werkzeug + requests-cache scipy spectrum_utils werkzeug==2.0.0 RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc diff --git a/Makefile b/Makefile index 5d5f5bc..586efe7 100644 --- a/Makefile +++ b/Makefile @@ -23,19 +23,19 @@ clear-cache: #Docker Compose server-compose-interactive: - docker-compose build - docker-compose up + docker-compose --compatibility build + docker-compose --compatibility up server-compose: - docker-compose build - docker-compose up -d + docker-compose --compatibility build + docker-compose --compatibility up -d server-compose-production-interactive: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up server-compose-production: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up -d attach: diff --git a/requirements.txt b/requirements.txt index fdddf45..62ea414 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,5 @@ requests requests_cache scipy spectrum_utils -werkzeug -git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python +werkzeug==2.0.0 +git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python \ No newline at end of file From ac1fec85b7a1a11fde42bb4fa9d42d589362a179 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 15:12:21 -0800 Subject: [PATCH 08/14] updating building --- .github/workflows/build.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 471daab..01b4964 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,10 +1,14 @@ name: Docker Build Test on: - - push - - pull_request + push: + branches: + master + pull_request: + branches: + master schedule: - - cron: '0 0 * * 1' + - cron: '0 0 * * 1' jobs: build-test: From 7400d5ff4ea455bcaa71344619f8ff6dd49683d5 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 15:24:09 -0800 Subject: [PATCH 09/14] using mambda --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index be424e5..855af87 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,8 @@ WORKDIR /app RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ apt-get install -y git-core -RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \ +RUN conda install -c conda-forge mamba +RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ requests-cache scipy spectrum_utils werkzeug==2.0.0 From b32683b9ece85feb9b80e38fe7671e6164da77a4 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Mon, 14 Nov 2022 21:33:19 -0800 Subject: [PATCH 10/14] pinning version of spectrum_utils --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 855af87..24dda63 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ RUN conda install -c conda-forge mamba RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ - requests-cache scipy spectrum_utils werkzeug==2.0.0 + requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0 RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc From 0e518027dbca0a9f57d3f0a27b29762e9e0c8a3b Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 15 Nov 2022 13:54:29 -0800 Subject: [PATCH 11/14] limiting dns --- docker-compose-production.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker-compose-production.yml b/docker-compose-production.yml index dc110df..827f08a 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -5,9 +5,11 @@ services: - default - nginx-net environment: - VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + #VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + VIRTUAL_HOST: metabolomics-usi.gnps2.org VIRTUAL_PORT: 5000 - LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + #LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + LETSENCRYPT_HOST: metabolomics-usi.gnps2.org LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh deploy: From 92c02986b5ce3bddfb83618bacb12cbc094f6405 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 13 Dec 2022 23:14:48 -0800 Subject: [PATCH 12/14] fixing build --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01b4964..c7945e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,4 +26,4 @@ jobs: python-version: ${{ matrix.python-version }} - name: Build Docker run: | - cd basic && docker build . + docker build . From c0a15f3918d97fabfedcdfe27b2544541ac187e5 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 13 Dec 2022 23:38:08 -0800 Subject: [PATCH 13/14] adding a todo --- metabolomics_spectrum_resolver/parsing.py | 11 +++++------ .../templates/homepage.html | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index 7e7f107..c0b572f 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -618,16 +618,15 @@ def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: f"data/ms2.php?A={accession}.zip" f"&F={urllib.parse.quote_plus(filename)}&S={index}" ) + + # TODO: Do some extra exception handling if we don't find the filename directly. We might need to his another API to get the full filename + # Given the just the basename + lookup_request = requests.get(request_url, timeout=timeout) lookup_request.raise_for_status() response_text = lookup_request.text - response_text = ( -+ response_text.replace("
", "")
-+            .replace("

", "") -+ .lstrip() -+ .rstrip() -+ ) + response_text = (response_text.replace("
", "").replace("

", "").lstrip().rstrip()) # Parsing the MW Response precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 2e827bd..a6cf1ad 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -136,7 +136,7 @@


From 0964a5725f4d21be4c89b8902d55b75e94b0e688 Mon Sep 17 00:00:00 2001 From: Mingxun Wang Date: Tue, 13 Dec 2022 23:46:49 -0800 Subject: [PATCH 14/14] updating affiliation --- metabolomics_spectrum_resolver/dashinterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py index 5069ef2..26abbd9 100644 --- a/metabolomics_spectrum_resolver/dashinterface.py +++ b/metabolomics_spectrum_resolver/dashinterface.py @@ -426,7 +426,7 @@ dbc.CardHeader(html.H5("Contributors")), dbc.CardBody( [ - "Mingxun Wang, PhD – UC San Diego", + "Mingxun Wang, PhD – UC Riverside", html.Br(), "Wout Bittremieux, PhD – UC San Diego", html.Br(),