diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..c7945e3 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,29 @@ +name: Docker Build Test + +on: + push: + branches: + master + pull_request: + branches: + master + schedule: + - cron: '0 0 * * 1' + +jobs: + build-test: + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.8] +# TODO: We probably should switch to using the Docker version. + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Build Docker + run: | + docker build . diff --git a/Dockerfile b/Dockerfile index fa855a9..24dda63 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,15 @@ -FROM continuumio/miniconda3:4.8.2 +FROM continuumio/miniconda3:4.10.3 MAINTAINER Mingxun Wang "mwang87@gmail.com" WORKDIR /app RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ apt-get install -y git-core -RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \ +RUN conda install -c conda-forge mamba +RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ joblib matplotlib numba numpy openssl qrcode rdkit requests \ - requests-cache scipy spectrum_utils werkzeug + requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0 RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc diff --git a/Makefile b/Makefile index 5d5f5bc..586efe7 100644 --- a/Makefile +++ b/Makefile @@ -23,19 +23,19 @@ clear-cache: #Docker Compose server-compose-interactive: - docker-compose build - docker-compose up + docker-compose --compatibility build + docker-compose --compatibility up server-compose: - docker-compose build - docker-compose up -d + docker-compose --compatibility build + docker-compose --compatibility up -d server-compose-production-interactive: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up server-compose-production: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up -d attach: diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 5423467..827f08a 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -5,9 +5,11 @@ services: - default - nginx-net environment: - VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org - VIRTUAL_PORT: 5087 - LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + #VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + VIRTUAL_HOST: metabolomics-usi.gnps2.org + VIRTUAL_PORT: 5000 + #LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + LETSENCRYPT_HOST: metabolomics-usi.gnps2.org LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh deploy: diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py index 5069ef2..26abbd9 100644 --- a/metabolomics_spectrum_resolver/dashinterface.py +++ b/metabolomics_spectrum_resolver/dashinterface.py @@ -426,7 +426,7 @@ dbc.CardHeader(html.H5("Contributors")), dbc.CardBody( [ - "Mingxun Wang, PhD – UC San Diego", + "Mingxun Wang, PhD – UC Riverside", html.Br(), "Wout Bittremieux, PhD – UC San Diego", html.Br(), diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index f488e4c..c0b572f 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -4,6 +4,8 @@ from typing import Tuple import requests +import pandas as pd +from io import StringIO import urllib.parse import spectrum_utils.spectrum as sus import splash @@ -25,7 +27,7 @@ # PXLnnnnnn # Unofficial: MASSIVEKB # https://github.com/HUPO-PSI/usi/blob/master/CollectionIdentifiers.md - r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|MassIVE)" + r":(MSV\d{9}|PXD\d{6}|PXL\d{6}|RPXD\d{6}|ST\d{6}|MassIVE)" # msRun identifier r":(.*)" # index flag @@ -116,6 +118,8 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: spectrum, source_link = _parse_ms2lda(usi) elif collection == "motifdb": spectrum, source_link = _parse_motifdb(usi) + elif collection.startswith("st"): + spectrum, source_link = _parse_metabolomics_workbench(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -596,6 +600,57 @@ def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown MOTIFDB USI", 404) +# Parse GNPS library. +def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + accession = match.group(1) + filename = match.group(2) + index_flag = match.group(3) + index = match.group(4) + + if index_flag.lower() != "scan": + raise UsiError( + "Currently supported MW index flags: scan", 400 + ) + try: + request_url = ( + f"https://www.metabolomicsworkbench.org/" + f"data/ms2.php?A={accession}.zip" + f"&F={urllib.parse.quote_plus(filename)}&S={index}" + ) + + # TODO: Do some extra exception handling if we don't find the filename directly. We might need to his another API to get the full filename + # Given the just the basename + + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + + response_text = lookup_request.text + response_text = (response_text.replace("
", "").replace("

", "").lstrip().rstrip()) + + # Parsing the MW Response + precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) + charge = int(response_text.split("\n")[2].split(":")[-1].replace("\"", "")) + peaks_df = pd.read_csv(StringIO(response_text), sep=r" +", skiprows=4) + mz = list(peaks_df["m/z"]) + intensity = list(peaks_df["intensity"]) + + source_link = ( + f"https://www.metabolomicsworkbench.org/" + f"data/DRCCMetadata.php?Mode=Study&StudyID={accession}&StudyType=MS&ResultType=1" + ) + + spectrum = sus.MsmsSpectrum( + usi, + float(precursor_mz), + int(charge), + mz, + intensity, + ) + return spectrum, source_link + except requests.exceptions.HTTPError: + raise UsiError("Unknown MW USI", 404) + def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]: # Parse out gapped sequence (e.g. X+129.04259), faking it # with Glycine as the base residue and adding more mods to diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 2e827bd..a6cf1ad 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -136,7 +136,7 @@


- +
diff --git a/requirements.txt b/requirements.txt index fdddf45..62ea414 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,5 @@ requests requests_cache scipy spectrum_utils -werkzeug -git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python +werkzeug==2.0.0 +git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python \ No newline at end of file diff --git a/test/usi_test_data.py b/test/usi_test_data.py index b80eee0..670b4b2 100644 --- a/test/usi_test_data.py +++ b/test/usi_test_data.py @@ -29,6 +29,8 @@ "mzspec:MassIVE:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", # MassIVE Task USIs disguised as GNPS Task USIs "mzspec:GNPS:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", + # Metabolomics Workbench USIs + "mzspec:ST000003:StemCell+Data+and+Raw+Files/iPSC-T1R1:scan:3", # Legacy cases. "mzspec:GNPSTASK-c95481f0c53d42e78a61bf899e9f9adb:spectra/specs_ms.mgf:scan:1943", "mzspec:GNPSTASK-64b22841ab3548f987b3cfc18696a581:spectra/specs_ms.mgf:scan:1469",