diff --git a/envs/bp3.yaml b/envs/bp3.yaml index c6deaf6..385cc55 100644 --- a/envs/bp3.yaml +++ b/envs/bp3.yaml @@ -5,9 +5,6 @@ dependencies: - pip - python==3.8.8 - - - - pip: - bp3==0.0.12.7 - fair-esm==1.0.3 diff --git a/envs/env.yaml b/envs/env.yaml index 5a3a278..f7351c9 100644 --- a/envs/env.yaml +++ b/envs/env.yaml @@ -9,6 +9,10 @@ dependencies: - matplotlib - polars - biopython + - plotnine + - pyarrow + - matplotlib + - scikit-learn - pip: - torch diff --git a/notebooks/example_code.ipynb b/notebooks/example_code.ipynb deleted file mode 100644 index 903ad99..0000000 --- a/notebooks/example_code.ipynb +++ /dev/null @@ -1,284 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "bae1dc08", - "metadata": {}, - "source": [ - "## Environment:\n", - "\n", - "This notebook will run with the 'envs/env.yaml` environment (epident-experiments)" - ] - }, - { - "cell_type": "markdown", - "id": "257bb096", - "metadata": {}, - "source": [ - "## Bepipred 3 dataset\n", - "\n", - "- job_name: unique identifier for protein, comes from hash of seq\n", - "- seq: amino acid sequence of protein\n", - "- test: boolean indicating if seq is part of test set\n", - "- epitope_boolmask: boolean array the same length as seq indiciating if the AA at that position is an epitope residue\n", - "- raw_protein_id: original ID assigned to protein in BP3C50ID set\n", - "- RSA: relative solvent accessiblity of the protein at each AA, calculated by FreeSASA\n", - "- SA: absolute solvent accessibility of the protein at each AA, calculated by FreeSASA" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "777f08b2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| job_name | seq | test | epitope_boolmask | raw_protein_id | RSA | SA |
|---|---|---|---|---|---|---|
| str | str | bool | list[bool] | str | list[f64] | list[f64] |
| "bf2a62534941cf895971e1daa33a46… | "LIQTPSSLLVQTNHTAKMSCEVKSISKLTS… | true | [false, false, … false] | "3b9k_B" | [0.205823, 0.471213, … 1.001547] | [36.957627, 82.806331, … 152.205138] |
| "d4febd28417e8a4bf6266337c7a2de… | "GNVDLVFLFDGSMSLQPDEFQKILDFMKDV… | true | [false, false, … false] | "3hi6_A" | [0.840245, 0.294451, … 0.605393] | [68.13546, 42.698276, … 129.669178] |
| "17d233a2b305a3544cf6c164f8ad67… | "DERETWSGKVDFLLSVIGFAVDLANVWRFP… | true | [false, false, … false] | "4xp9_C" | [1.100846, 1.039373, … 1.03129] | [157.156786, 181.038055, … 185.178502] |
| "34e0c5de18ccd222f24d4bc9d0f0e4… | "KAMHVAQPAVVLASSRGIASFVCEYASPGK… | true | [true, true, … false] | "5ggv_Y" | [0.731129, 0.872878, … 1.227995] | [149.866882, 94.934212, … 168.493256] |
| "f4c930a3f1b5fb78cef62c5021adc0… | "GSHHHHHHGSGTDITNQLTNVTVGIDSGTT… | true | [false, false, … false] | "5jq6_A" | [1.462795, 0.796439, … 0.90302] | [118.618012, 94.250586, … 119.379304] |
| … | … | … | … | … | … | … |
| "2c282aeeb88596bf1f1f99be1bb7f0… | "LDKIDLSYETTESGDTAVSEDSYDKYASQN… | false | [false, false, … false] | "7jum_A" | [0.474081, 0.908022, … 0.986187] | [85.126053, 129.629226, … 143.007018] |
| "5196520df0000bf1b3fafa8c0e9ecc… | "TDRQLAEEYLYRYGYTRVASLGPALLLLQK… | false | [false, false, … false] | "5th9_A" | [0.871364, 0.432033, … 0.256134] | [122.513787, 61.677101, … 54.861323] |
| "96836e4358c57e3f571a4f2bb8a8f8… | "LPWLNVSADGDNVHLVLNVSEEQHFGLSLY… | false | [false, false, … true] | "6hga_B" | [0.93068, 0.110532, … 1.220648] | [167.112849, 15.166135, … 223.341912] |
| "9d838eec0c24655e9902a3ac128a34… | "CSSPPCECHQEEDFRVTCKDIQRIPSLPPS… | false | [false, false, … false] | "2xwt_C" | [0.479508, 1.059907, … 0.528113] | [63.390948, 125.429402, … 74.252646] |
| "cb56653d3f7b5272b7874963549242… | "CSVVVGENYSIKCDATKCTIEDKNRGIIKT… | false | [false, false, … false] | "6vtw_A" | [0.628252, 0.682526, … 0.637367] | [83.054881, 80.770091, … 113.980313] |