AltinLab · ljwoods2 · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025
diff --git a/.github/workflows/gh-ci.yml b/.github/workflows/gh-ci.yml
@@ -0,0 +1,76 @@
+name: GH Actions CI
+'on':
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+      - develop
+  schedule:
+    # Weekly tests at midnight on Sundays run on main by default:
+    #   Scheduled workflows run on the latest commit on the default or base branch.
+    #   (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule)
+    - cron: 0 0 * * 0
+
+concurrency:
+  # Specific group naming so CI is only cancelled
+  # within same PR or on merge to main
+  group: ${{ github.ref }}-${{ github.head_ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    shell: bash -l {0}
+
+jobs:
+  main-tests:
+    if: github.repository == 'ljwoods2/TCRdock'
+    # needs: environment-config
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - macOS-latest
+          - ubuntu-latest
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build information
+        run: |
+          uname -a
+          df -h
+          ulimit -a
+
+      # More info on options: https://github.com/conda-incubator/setup-miniconda
+      - name: Create & activate conda env
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          add-pip-as-python-dependency: true
+          auto-activate-base: false
+          miniconda-version: "latest"
+          environment-file: devtools/conda-envs/test_env.yaml
+          activate-environment: tcrdock-test
+          auto-update-conda: true
+          show-channel-urls: true
+
+      - name: Install package
+        run: |
+          python --version
+          python -m pip install .
+
+      - name: Python information
+        run: |
+          which python
+          which pip
+          pip list
+          conda info
+          conda list
+
+      - name: Run tests
+        run: |
+          pytest -v -x --color=yes tcrdock/tests
diff --git a/.gitignore b/.gitignore
@@ -2,11 +2,21 @@
 *pyc
 __pycache__
 *.fasta.pdb
+*.fasta.blast
+*.fasta
+*.tsv
 *.phr
 *.pin
 *.pot
 *.psq
 *.ptf
 *.pto
 imgt_prot_blast_db_*fasta
-ncbi-blast-*
+ncbi-blast-*
+*.egg-info
+**/*.log
+**/*.ipynb
+.vscode
+.nextflow
+build
+tmp
diff --git a/README.md b/README.md
@@ -137,14 +137,12 @@ The non-AlphaFold Python package requirements are listed in `requirements.txt`.
 Those specific
 package versions should work, but there should also be plenty of flexibility on the
 versions. The TCR and MHC parsing code also requires the NCBI BLAST+ software
-to be installed, which can be done by running the script
-`download_blast.py`. A potential installation route would be:
+to be installed, which can be done using conda. A potential installation route would be:
 
 ```
-conda create --name tcrdock_test python=3.8
+conda create -c bioconda --name tcrdock_test python blast
 source activate tcrdock_test   # or: conda activate tcrdock_test
-pip3 install -r requirements.txt
-python download_blast.py
+pip install <path/to/cloned/repo>
 ```
 
 To run the AlphaFold simulations, you will need a Python environment that satisfies

diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -0,0 +1,15 @@
+name: tcrdock-test
+channels:
+  - defaults
+  - conda-forge
+  - bioconda
+dependencies:
+  ### Base depends ###
+  - python>=3.10.0
+  - pip
+
+  ## main deps
+  - blast
+
+  ## testing
+  - pytest
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,33 @@
+[build-system]
+requires = ["setuptools >= 42.0.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tcrdock"
+version = "0.1.0"
+
+dependencies = [
+    "biopython",
+    "numpy",
+    "pandas",
+    "scipy",
+    "matplotlib",
+]
+
+[tool.setuptools.packages.find]
+where   = ["."]
+include = ["tcrdock*"] 
+
+[tool.setuptools.package-data]
+tcrdock = [
+    "**/*.alfas",
+    "**/*.fasta",
+    "**/*.txt",
+    "**/*.tsv",
+    "**/tcr/*.pdb",
+    "**/tcr/*.json",
+    "**/ternary/*.pdb",
+    "**/ternary/*.json",
+    "**/pmhc/*.pdb",
+    "**/pmhc/*.json",
+]
diff --git a/tcrdock/blast.py b/tcrdock/blast.py
@@ -5,63 +5,64 @@
 from os.path import exists, isdir
 from .util import amino_acids
 
-path_to_blast_executables = Path(__file__).parents[1] / 'ncbi-blast-2.11.0+' / 'bin'
-assert isdir( path_to_blast_executables ),\
-    'You need to download blast; please run download_blast.py in TCRdock/ folder'
+blastp_exe = str("blastp")
+makeblastdb_exe = str("makeblastdb")
 
-blastp_exe = str(path_to_blast_executables / 'blastp')
-makeblastdb_exe = str(path_to_blast_executables / 'makeblastdb')
+blast_fields = (
+    "evalue bitscore qaccver saccver pident length mismatch"
+    " gapopen qstart qend qlen qseq sstart send slen sseq"
+)
 
-blast_fields = ('evalue bitscore qaccver saccver pident length mismatch'
-                ' gapopen qstart qend qlen qseq sstart send slen sseq')
 
-def make_blast_dbs(fastafile, dbtype='prot'):
-    assert dbtype in ['prot','nucl']
+def make_blast_dbs(fastafile, dbtype="prot"):
+    assert dbtype in ["prot", "nucl"]
 
-    cmd = f'{makeblastdb_exe} -in {fastafile} -dbtype {dbtype}'
+    cmd = f"{makeblastdb_exe} -in {fastafile} -dbtype {dbtype}"
     print(cmd)
     system(cmd)
 
 
 def check_for_blast_dbs(fastafile):
-    return exists(str(fastafile)+'.phr')
+    return exists(str(fastafile) + ".phr")
+
 
 def blast_file_and_read_hits(
-        fname,
-        dbfile,
-        evalue = 1e-3,
-        num_alignments = 10000,
-        verbose=False,
-        clobber=False,
-        extra_blast_args=''
+    fname,
+    dbfile,
+    evalue=1e-3,
+    num_alignments=10000,
+    verbose=False,
+    clobber=False,
+    extra_blast_args="",
 ):
-    assert exists(dbfile), f'missing file for BLAST-ing against: {dbfile}'
+    assert exists(dbfile), f"missing file for BLAST-ing against: {dbfile}"
 
     # check for blast database files
     if not check_for_blast_dbs(dbfile):
         # maybe we haven't set up the blast files yet...
-        print('WARNING: missing blast db files, trying to create...')
+        print("WARNING: missing blast db files, trying to create...")
         make_blast_dbs(dbfile)
-        assert check_for_blast_dbs(dbfile), 'Failed to create blast db files!'
+        assert check_for_blast_dbs(dbfile), "Failed to create blast db files!"
 
-    outfile = fname+'.blast'
+    outfile = fname + ".blast"
     assert clobber or not exists(outfile)
 
-    cmd = (f'{blastp_exe} -query {fname} -db {dbfile} {extra_blast_args} '
-           f' -outfmt "10 delim=, {blast_fields}" -evalue {evalue}'
-           f' -num_alignments {num_alignments} -out {outfile}')
+    cmd = (
+        f"{blastp_exe} -query {fname} -db {dbfile} {extra_blast_args} "
+        f' -outfmt "10 delim=, {blast_fields}" -evalue {evalue}'
+        f" -num_alignments {num_alignments} -out {outfile}"
+    )
 
     if not verbose:
-        cmd += ' 2> /dev/null'
+        cmd += " 2> /dev/null"
 
     if verbose:
         print(cmd)
     system(cmd)
 
-    blast_hits = pd.read_csv(
-        outfile, header=None, names=blast_fields.split())
-    #blast_hits.rename(columns={'saccver':'pdb_chain', 'qaccver':'allele'}, inplace = True)
-    #blast_hits.sort_values('pident', ascending=False, inplace=True)
+    blast_hits = pd.read_csv(outfile, header=None, names=blast_fields.split())
+    # blast_hits.rename(columns={'saccver':'pdb_chain', 'qaccver':'allele'}, inplace = True)
+    # blast_hits.sort_values('pident', ascending=False, inplace=True)
 
     if exists(outfile):
         remove(outfile)
@@ -70,41 +71,41 @@ def blast_file_and_read_hits(
 
 
 def blast_sequence_and_read_hits(
-        query_sequence,
-        dbfile,
-        tmpfile_prefix = '',
-        evalue = 1e-3,
-        num_alignments = 10000,
-        verbose=False,
+    query_sequence,
+    dbfile,
+    tmpfile_prefix="",
+    evalue=1e-3,
+    num_alignments=10000,
+    verbose=False,
 ):
-    tmpfile = f'{tmpfile_prefix}tmp_fasta_{random.random()}.fasta'
+    tmpfile = f"{tmpfile_prefix}tmp_fasta_{random.random()}.fasta"
 
-    out = open(tmpfile, 'w')
-    out.write(f'>tmp\n{query_sequence}\n')
+    out = open(tmpfile, "w")
+    out.write(f">tmp\n{query_sequence}\n")
     out.close()
 
     blast_hits = blast_file_and_read_hits(
-        tmpfile, dbfile, evalue, num_alignments, verbose, clobber=True)
+        tmpfile, dbfile, evalue, num_alignments, verbose, clobber=True
+    )
 
     if exists(tmpfile):
         remove(tmpfile)
 
     return blast_hits
 
 
-
 def setup_query_to_hit_map(hit):
-    ''' hit is a single row from blast_hits
+    """hit is a single row from blast_hits
 
     query2hit_align is a dictionary mapping from full-length, 0-indexed positions
     in query sequence to full-length, 0-indexed positions in hit sequence
-    '''
+    """
 
     query2hit_align = {}
-    for ii,(qaa,haa) in enumerate(zip(hit.qseq, hit.sseq)):
+    for ii, (qaa, haa) in enumerate(zip(hit.qseq, hit.sseq)):
         if qaa in amino_acids and haa in amino_acids:
-            qpos = hit.qstart + ii - hit.qseq[:ii].count('-') - 1 #0-idx
-            hpos = hit.sstart + ii - hit.sseq[:ii].count('-') - 1 #
+            qpos = hit.qstart + ii - hit.qseq[:ii].count("-") - 1  # 0-idx
+            hpos = hit.sstart + ii - hit.sseq[:ii].count("-") - 1  #
             query2hit_align[qpos] = hpos
 
     return query2hit_align
diff --git a/tcrdock/db/__init__.py b/tcrdock/db/__init__.py
diff --git a/tcrdock/db/new_imgt_hla/__init__.py b/tcrdock/db/new_imgt_hla/__init__.py
diff --git a/tcrdock/db/pdb/__init__.py b/tcrdock/db/pdb/__init__.py
diff --git a/tcrdock/db/pdb/pmhc/__init__.py b/tcrdock/db/pdb/pmhc/__init__.py
diff --git a/tcrdock/db/pdb/tcr/__init__.py b/tcrdock/db/pdb/tcr/__init__.py
diff --git a/tcrdock/db/pdb/ternary/__init__.py b/tcrdock/db/pdb/ternary/__init__.py