From 23d6bd4b7fab4a34c5ac6726e54a3271acf0d440 Mon Sep 17 00:00:00 2001 From: Jacob Sesate Date: Tue, 30 Sep 2025 22:19:08 -0700 Subject: [PATCH 1/3] preliminary model pipeline (regression) --- envs/bp3.yaml | 3 - envs/env.yaml | 4 + notebooks/best_esm_embedding_vars.pkl | Bin 0 -> 30592 bytes notebooks/example_code.ipynb | 274 +++++++----- notebooks/regression.ipynb | 587 ++++++++++++++++++++++++++ 5 files changed, 764 insertions(+), 104 deletions(-) create mode 100644 notebooks/best_esm_embedding_vars.pkl create mode 100644 notebooks/regression.ipynb diff --git a/envs/bp3.yaml b/envs/bp3.yaml index c6deaf6..385cc55 100644 --- a/envs/bp3.yaml +++ b/envs/bp3.yaml @@ -5,9 +5,6 @@ dependencies: - pip - python==3.8.8 - - - - pip: - bp3==0.0.12.7 - fair-esm==1.0.3 diff --git a/envs/env.yaml b/envs/env.yaml index 5a3a278..f7351c9 100644 --- a/envs/env.yaml +++ b/envs/env.yaml @@ -9,6 +9,10 @@ dependencies: - matplotlib - polars - biopython + - plotnine + - pyarrow + - matplotlib + - scikit-learn - pip: - torch diff --git a/notebooks/best_esm_embedding_vars.pkl b/notebooks/best_esm_embedding_vars.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4b87fb82c1c79cc413603859b400d56db207d565 GIT binary patch literal 30592 zcmY-1cXW-}{s!=^M~^zX6VZDo>WiqaLlA^SoFIfKA$rT!89@*d{pg+OVU!o6_cBC3 zqm1aCi1wS`*pK_{`^UX&eb#fI?{|OSGk4aVb?3^|^M(u)|NWy&bjirv{fG1m8&SS% z_W^@~%l8}7cX02ZL4$%uL`S9#?;g}QXi)Ur=*TqxW&Zuw9)m}O1^>UK>{&B<+SDo0 zZKF$?sUlO7G(Ubqp*+rVk(IKfq(*Ngp$5(H8pwE?F?&`H%&$ExUq7L@=__;^RHQ@Jf7X}WQCQ=m!_b6Z1ICD zlt19Pw!pP;KkhLa|5&c_W4I^Ng>~@vocC$?8@w#%lOE)Tf8=*r(=Rzy{|es6F*gR@ zk@Lp2P`HEUT5IlfQEqmi=QinfZkei&SD#8iACTx@N8nAmE9EE2k}su*AB1ByNi627OA&x;8R9t zss%ULt=dZ#s%`$Eu@zs&D7K&EH^t@c2LFfMxL684pX_+Y+{El)9c8Sb%i6oGH#y!){DSaqFPTGR5`qS*eD z$2y%8;Z3>b^|1!1IYc$mFB9SgwQUlUW7N@txW@MUopN&l)mR(uBi@c{WUIg6S^1~q za85?eI;!!R-X3fb+X8E$B{jEbdB)Jqq50dyMCy@Q&4=rijWg5FX6+VPpHi_i&H&>I6T>J+E7Tj#g*(<{tMxiSQ%* zEWeqTl$+kPzC64K@w{B)3Rn%F$2D#_fB6IU=riHSkLEQK%ZFgBNn^cRc18R=*H}-t z&Q-4}%W85I~(?f@7MAlg5c>mzwUY~xG&GO9W12W%uQ?BcKzZP#IC&A&{B=3 zUX1-RM{OU)Zu7qnAI4K%m#)IEvp4gOgXd>=?OF|A&;7FhQg}GO)-ikuyc_q}zQw>T zc2}FZ@QU1DZ}L?5Yxa=%$?$GGH@1U6+@JeJWHsexBRZ!T?TNTI|MZP>uV<@sPSfi9 zUxSz9bvoa@0`JZ}va1FE!t1>D+yhVGeAN8)%FXMvru18dcqy*2U0VuY%r!};qEXYC zY8+$dBEEua#JxW75&SBnO8~qV*N07O0k6yX<9^wcn^$P8=$0I%j=7tUY5ARpH{p6m z(f#mpyk^w0-OA0pR39?aiFkFcu~PiKk_Dny`WQ;GOwsuA~d_iJU*#VH$iO=Z*H&;F~xvuIGh& z^RtW*?r=ADt7}T-WZR{9)%tdNtq|_V;9GbdIb^K((H~(?cvE>?|g5EUuIu%t2lflujv??2EI}25BWYrJ+~LT2uy~bDfh(#_;LfZyol67vO%&JuQ40d+KpNBGsPf`MuVc@9-f2wgrWkNibYuu}SovvOrTidctBK!q! zY1B9hpTT+O_1*B<+#eRO3+~Bz>q0YlTJCr3uL~c>J#tbJcy-Pj|9B`jYtVD$!{5M< ziG45TMoTT;#@R!_ZS11Px8K!uCi6C{5)Z&%YumJ*t=xR~zp--1>u|(JalO&D4}7)u z4evIEXV>am)`9QUdOnth|HgT7DldEsuW!WurrbR9zcqzulo8yX_;oJVGdga5uh!ocYW1a$d~qg1m?3Dg5Ezc&_bPQ}}PZr{(|4 zJ8;kZ0rgbgyh~@W_LfJ@DqeHkpaO`mrnr&$%T#r>x!Tlw$KY3Zsugzt9?9DqpZCG* z@YpyRK5WmPZR@UNh2iFu@kqO?89~+iA4eD!Cy*T?|%FjJp|_*T@tz5f7kvT+MLssQovR)j}>cN@B-}e`AK*q zZT*gi;jegZ!1;-@aTm2UUp-ZFMJVy61lV*{J2)X zJQM0m(>7LeI>i6xxDlQT@nIAfdml|u%V*=a8XGplzi^FQyaHZ}w{%sO%FPv2A2lKv z@e5qzcBwObJ0BrWF1Xo)Y622!Bff@foX!-O`X|MW{cpyrM{VIp$)y|N1vM|eTDjSa zYQ&YLh_B+Qa@_Cm&wK-;W>3_lr5Y>A3p|2YaTckM`kh=qx(aF#7 zPaBaAY!Dm0e{nbBzW!gN`m8(nrN+l3_*1^GYw{;}D!zbq>@K_=FDdd|fd_Gqh&-&^ zl(bIb=~;-ITq9?Uf#2d9Sz{EuD6ikLau4_^?uozO4gQIHjM=r7n}cW_(Xk@pc{whs zmxrHZmkUb4vuhqz67^T9zG7fe#076D3wXl2bB*m$0eBhBcjSZ5;2u#mJG?sQt=DPc zBiLov&k^d!kbO@}%tzdZYvgJPKgf@g1xCPc@Qr1LUw(kcip_z@Z)G=J zt>HI#Zq%U~@KW3_E|!K*;vTuLIQ$;>h-Mz}F`PFV!_8WB2I-d#aZj$1&C|jAXlF=# zI9gr#ExR!*0e(zde_f1nb0m*lFaq(RyruZo4c>t3#gH!W@0!2s0QX|=k+TlG1-oqL zsocy+>$vwVjCenei)~*=sn`C_ZuR~QAI0lid2TB=t5LnEcnR_KT0B!2{16}C78VMh zqaAagKk5%qy=-0^@ddo4%>Qkq+Wst$6}g|notzhg9>d>JUQV|kfrn`MPJ7@{yuM7a z5tM(7ZDSI!%0`@>(di>{sF6}U$<&j~Ng?waMM+}ud#5d-gyP-n=@^_L&tfDdJt zC6>bbY3@E9{zm&ML#M*?@H(=_hv7fxni|i!rP!W|>qWIii0`27rCWP&W#$ksv^3*( zai#|1@f0_Vm&4Qz&hhTD!*zHCUdA|d1^$TZo%d$Ib8(L?zNvC^AdM9_tAaBV+uFY# zsxcB^v?yo2VYhrmCwi+iE)ue`mL$FAJ8(V9Mgdn5jiYvlEM z@ND$QiHP-28ls++lk+0&U+{XIceS_-f5+>HcYEQ*`S{}2B=~3UkwyE$E$-Prq#Hb% zd*sd*@K4<1>|O}IhWEG5=7z^;{`=y=>bZL}!8P{0%zlE!H7V$dr)p>X-K9BWb1AG(r7$s-IV>vILMZ%A=mptPIuc57f zyevGDT~zUecj39>^zuPJ_cs^QbB$9=!M?%rYKeP4o=V9Bs0prc&MnA3O8*F$_W#UuB6 zBL14=a?_76^*b1#dG7b{_UzVzlknpFC}Zy*%FQn{Hstya#6R#`g>xhPZ+4k)5xg{y z6{jb{k8@s}904E4A5bnErrh+Wb)x?63tmesJ9I}&m^RhBD?F4RD*{`?!`P+00NjVS z5q0PHS4Xa|^=}^pzs#?aW_9J}K3=na1;j^dHF*X6DX$}*U+AZPIoqj5#II0n&(Bju z=>_m2np+v*X|;O4y?xcI;Y>;kz&py|jPRD)oBF4KAK+68zeT;(TV34a`l~a%h?YV|qUyXaj!^B|a{Wvdt#=|Fa zzx=14a`POmDbmzOyscK_pWH*$duTTsody0@^V;9Lt9(k$i{6Fz(AL>D2mVcamg9Hj zWvU6azn_ppnI zPmZ7c<_B6|@6V_;oG6wlwPB zbG_K#RUIdz7GG2bUX+g}^PUV;`K;_BemeY&_FAhx{0QfLn!o9ydXlxd0j=RvmUOSGf8T<~fFY>JOQ9U!+E&s9bL~YHqMdA0g{z08utDbAzBVxbVmG{u{Q7zyr zw7kz7Q{`K0bNgn1-_bnjQ*)J%)p~q?xzxOl1K0sSDp>55dZa(99?Bp6Mo?83jk8f00-e0>;{35ue1KIe$LE}4`vk2$a6fH*w-gQ4*nT|L zHUJz%?2O!`*zT`AD&ZA;wbmZx)<|uctZjLBuD9~mTF;+@nkrA!{9r&c<#F1p%65a# z*7Cj+m78;EeewD#;#akG0^^$htT*#fT&C~YLQQ?BO>H^aRK8Pt?b+jY<(sr^3fzZJ z(|qUHR%&c>8Y?E2ZLN4F^VyU>s=a`=P0q~l=iF{x-vr;G<(oc$x6(Y|6@0JuDz|s= z5?cO!cpJ55DPFVcaK-j?ypG5q;YYbfu2~PC$L@Ih6h2P-R!u%DH^=hW@X;+uYLF(?qaXw-Ko9mDZ(yxPKE3Li?HZ=9aMgUmTxfxK1w@}*DUxX z?YSZ2I;x(3wEdqK@KgRo%Qx{*ZdT><89Ka z=QXqJMZAF)&u;{(niZOFxC9T-*6jQUzLj0BsOL~U<@vcHb^!bVyNnnH-^o29Y6RRv z^Fg!Wdo-_q8NQACMTaNw`r5mEvj?d?uW4UB?+*AScB62*ZYuwm)?cYEJY3tS!gl3m zQ$GLB9f-T}Ix_e&d^x*VoB*%E^>XwRcnMy|r&%(*GUr8k_wH)_7h3-xt(BVzw5II1 z9Pvt8&EVUJhfrL+PTNDZ3yxbAtHU4jmf~4Q zE%+(^hSu9e_z8B=;4}Otd)UR8VD+rc>_(40@XYLDemwjUpFwzJ>8W}K@bPW+^T1DO z`GYl;o5krouJVHsKc+pm`F8kYt{01n^-^P}XlpiXque}5^&(&e;yzkD&2G4%jSV^o z&!GAH6Cppxnk~6L#HH9Cq&?REE8-0(F59Kz(&AXL3+Jk>}qTVEC5r)}ds z3I1C9t{D^IlePL(dHSiIdc2O9oDXi-`UA?sTWR?^4d8!ip4VHsxt`arKMe8jTD;II z_&Q!w#=GEiv~}jcM12_5i|5}E57X)!*!rv27Spcd;iKG)qZ-@o7{qh(He%@q#J5mf zOiw#NwV&m7Yj9ckb*(13sd6)lYD)Io0B%AoGVDgnEN-#R9e_V!7gZmirV`gw$Q`DZ z?5SO{O<#CtUS2Ml3ICh(#?F24`Px{w>+p+O&oakAHMgslA2$I0R`W9R;M2KBSj&`~ zt@v5}-ymL)YZ4;Az;Ch}=iLVVT*vgJnxsF{E4JU~*NTF@;QP78Dl!-~yQsz*HX3n% zUfwDk19$T8QfAnOnod-6vHNqxziQhY${wz6Igsn)E?0-=VYhD9ftTbS<7Q)cLw51E z3GdCXm5FWPx42(Sh=%9TdL|x)kJa+mkHZghzcJKpu=*%H=?vmhM#c7p+SO)cgYRPx zTh$a{5|)`a*yB(d9F3#Eqo{Uiv{_Hs9%-T`b!jqpWuAraT`1n z=bbaF!X4}~xi>sJ_c(+4C^t9I`3>Jyh}-yC;@DUCDeYNX^9@z&U*NplSr$Hs^G2_B z@JzgpI5hy?mh)oVNO&uLmKeE2xw)Lycm9vJ5<8}>M@ta5%f<2VKY4pAOXx85hlCr| z$ZZ1@+l%rtGIS#1*(qLe`F6zf@l+9i2tJW-P;uOG)J&ur5qJUIm^tDlTFz*@4^KB- zeUZt$q_yFfuj9NUPZ08hu>wZbY8hRfUX}k=QHA^ zxn77gBh@d*L2>b-4EW!_mi_DhC{Qe_3|>eqvwJDFf8|}|w-$(dQCzg>fq0Czi$`yG zaX!5$wi-2Ws7BuSiuhoT$89V-N?q~?UrcuJR&Khu#_&bFi?+@AAow3#fki^Ol@w@S<1~cx^ev@6U{pA(3BNVIbuJxpt3csY~ z=gx!==lv`8pAR>*{MeQ7sq7+XE&RRq-g8^vg?Jx%a|iryt;aYG57By7U4b9td~C)v zQr)u)_lxNC@bvsH`NISLk=^K80^XDRWu_&0~{2iai5jX`tg!3Y1DSR96FZ(*-)!1$F zmrv$BjaBQBk7loV{W^T5*5gP}ZuX@!Tka3QC5auCKBFZyZ(nl#H^d_;?%N>iIQ1VS ztB9=e&jNP%K5j?xr)2bP~BIjk5mhj(spAI>@z=v^<%QF~$m(D3f zvk-VS?zcS%g&$=Xp5x%d*p2Ej@CbG}av8h_&ux2bIlL0POuq$wh2Q1V?5J`xm_C>F z;yB`;__-G!pMYQBda>v-{2;ISvi&u93C>$L6O?Ozh1Gg=2l4T|rEolezv3DZm3yN4 z>NVKUtu74T%mS$zT6f zB`$4>N$URNwSR||3ci59qg?C79{4rx5qHYL8&h6RDN+?aiu1BU zJ$QZXd)e*qMBc~MJOVz2do0Hy_#*DHUhRO#vkR|d@Ida70Vm*IoVV?D!FSNRgta{x zUXNYG6qu~;zlrx3T}mi7GtmdO@_Kt{%dE(5E zi0|YYdH*xw$0;6`CiU-Xn>oDYS63EzMxJVwDgyt&?s`y0xml6w#n#4%`*OYGY&blL zT@;%EAId(VWArK1TWHdeX$iN;!u)+1hs<5u7i_(L9RoxBa-#baf%+jMoE2Anr8 z*HUhFq_K|Mp@ZTzM5-ljqr(7TlVF#DaXUj z-c)0x?~M2^o@$gE3LnTduADRAx%rqP<2-m>UPpK>hBwpl`Io?dW4DTIg=gk{tjtIYm?j#0zu1 z)xHtDIlHLQ174Qh_OLhn0FRYP!{F|GhGrF(D>uv1I*I%DBEFel<=f~GJcP%}KioD2Sidt^PgS!$m*yr+>m zJ$xefxbG|pPscq8)$77La!;7i68<~C%P@W5<2i3FYXjfTF1GiAJ9)0~=mX!#dx}as z;CDH1l-mRUp!M(G2T#FyQR#$o^9lVNgnybSb+$D8Ecq&ja&r;I-+JdnygjwcWwk26 z+h{eNtH8hUtJ1#mh6nTZnR58RCsDs}#010Busf@S!}D{@vpEvLEND)<&YMDzF$@GU$wX`TBVb+knKxbj)~>hNyd;|y;MPolgq4u!%c=i^32 z!JqM5QDrW?318Xg>00<4?w@*Ty>fFnogq?gM0_pBh2M7gWgdI-aXfr6yF8JA`oUEH zKKwo6yLrothm+u&d93vOj+#KKDS2AVRY(5FHS0R1fS2YP_eojd2RMIqZYAaB392`u zt0BIGYefGV@E80|Ea8Qkwp5dpwk_f#`8GMHwu2wzZQ?>5@FSeJuJ(cN<8_Q|(eT@x zmzkEqpKxB}ii4-*^**|_)mM4Qc~L60a9$30~&cw^4{JZP%i zOh@ayzG_E23y*b<90M=RHO8IA@T=@Tf9^s39jZ49>_@yjf5G9Y&cP?~ShrKR;Q2Xk zw7&V65%EU>&#%Gf;N!T* zTGlC6&Aq~Td1@$p9lOhWCcH7v6|VX4?(A{{D;eU#RkK-0!zz1w0$)WrNgWd+Y5wxVk&w*Sz+%0JEFp1i#gxEP*~ zr|y5X1U{B~PPQq$SZ)7=d!$Dbcrnf=O^Sf;;@8R_d*N%?qYk`;x8!{)u1kiybAQ~~ zMoZK_t2pmiV!{)6PhmR)AHZ(yHI}NLliXuPc*9GwORrY&N1FFssoeZdpGBmKLwq>L z9m{eoQ)8p}41Q4y;F9xBd#F?8|00k7{B;yO755kwCd1G3b8Cj?Uaoo`@j3l&=Y{)l z-nH)mJcj#SOP|4;vWIlozf$$L<9^4if~$VI`5T?bTNYlg_&%}GW5PbwvYlQl\n", - "shape: (358, 7)
job_nameseqtestepitope_boolmaskraw_protein_idRSASA
strstrboollist[bool]strlist[f64]list[f64]
"bf2a62534941cf895971e1daa33a46…"LIQTPSSLLVQTNHTAKMSCEVKSISKLTS…true[false, false, … false]"3b9k_B"[0.205823, 0.471213, … 1.001547][36.957627, 82.806331, … 152.205138]
"d4febd28417e8a4bf6266337c7a2de…"GNVDLVFLFDGSMSLQPDEFQKILDFMKDV…true[false, false, … false]"3hi6_A"[0.840245, 0.294451, … 0.605393][68.13546, 42.698276, … 129.669178]
"17d233a2b305a3544cf6c164f8ad67…"DERETWSGKVDFLLSVIGFAVDLANVWRFP…true[false, false, … false]"4xp9_C"[1.100846, 1.039373, … 1.03129][157.156786, 181.038055, … 185.178502]
"34e0c5de18ccd222f24d4bc9d0f0e4…"KAMHVAQPAVVLASSRGIASFVCEYASPGK…true[true, true, … false]"5ggv_Y"[0.731129, 0.872878, … 1.227995][149.866882, 94.934212, … 168.493256]
"f4c930a3f1b5fb78cef62c5021adc0…"GSHHHHHHGSGTDITNQLTNVTVGIDSGTT…true[false, false, … false]"5jq6_A"[1.462795, 0.796439, … 0.90302][118.618012, 94.250586, … 119.379304]
"2c282aeeb88596bf1f1f99be1bb7f0…"LDKIDLSYETTESGDTAVSEDSYDKYASQN…false[false, false, … false]"7jum_A"[0.474081, 0.908022, … 0.986187][85.126053, 129.629226, … 143.007018]
"5196520df0000bf1b3fafa8c0e9ecc…"TDRQLAEEYLYRYGYTRVASLGPALLLLQK…false[false, false, … false]"5th9_A"[0.871364, 0.432033, … 0.256134][122.513787, 61.677101, … 54.861323]
"96836e4358c57e3f571a4f2bb8a8f8…"LPWLNVSADGDNVHLVLNVSEEQHFGLSLY…false[false, false, … true]"6hga_B"[0.93068, 0.110532, … 1.220648][167.112849, 15.166135, … 223.341912]
"9d838eec0c24655e9902a3ac128a34…"CSSPPCECHQEEDFRVTCKDIQRIPSLPPS…false[false, false, … false]"2xwt_C"[0.479508, 1.059907, … 0.528113][63.390948, 125.429402, … 74.252646]
"cb56653d3f7b5272b7874963549242…"CSVVVGENYSIKCDATKCTIEDKNRGIIKT…false[false, false, … false]"6vtw_A"[0.628252, 0.682526, … 0.637367][83.054881, 80.770091, … 113.980313]
" - ], "text/plain": [ - "shape: (358, 7)\n", - "┌───────────────┬──────────────┬───────┬──────────────┬──────────────┬──────────────┬──────────────┐\n", - "│ job_name ┆ seq ┆ test ┆ epitope_bool ┆ raw_protein_ ┆ RSA ┆ SA │\n", - "│ --- ┆ --- ┆ --- ┆ mask ┆ id ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ bool ┆ --- ┆ --- ┆ list[f64] ┆ list[f64] │\n", - "│ ┆ ┆ ┆ list[bool] ┆ str ┆ ┆ │\n", - "╞═══════════════╪══════════════╪═══════╪══════════════╪══════════════╪══════════════╪══════════════╡\n", - "│ bf2a62534941c ┆ LIQTPSSLLVQT ┆ true ┆ [false, ┆ 3b9k_B ┆ [0.205823, ┆ [36.957627, │\n", - "│ f895971e1daa3 ┆ NHTAKMSCEVKS ┆ ┆ false, … ┆ ┆ 0.471213, … ┆ 82.806331, … │\n", - "│ 3a46… ┆ ISKLTS… ┆ ┆ false] ┆ ┆ 1.00154… ┆ 152.2… │\n", - "│ d4febd28417e8 ┆ GNVDLVFLFDGS ┆ true ┆ [false, ┆ 3hi6_A ┆ [0.840245, ┆ [68.13546, │\n", - "│ a4bf6266337c7 ┆ MSLQPDEFQKIL ┆ ┆ false, … ┆ ┆ 0.294451, … ┆ 42.698276, … │\n", - "│ a2de… ┆ DFMKDV… ┆ ┆ false] ┆ ┆ 0.60539… ┆ 129.66… │\n", - "│ 17d233a2b305a ┆ DERETWSGKVDF ┆ true ┆ [false, ┆ 4xp9_C ┆ [1.100846, ┆ [157.156786, │\n", - "│ 3544cf6c164f8 ┆ LLSVIGFAVDLA ┆ ┆ false, … ┆ ┆ 1.039373, … ┆ 181.038055, │\n", - "│ ad67… ┆ NVWRFP… ┆ ┆ false] ┆ ┆ 1.03129… ┆ … 185… │\n", - "│ 34e0c5de18ccd ┆ KAMHVAQPAVVL ┆ true ┆ [true, true, ┆ 5ggv_Y ┆ [0.731129, ┆ [149.866882, │\n", - "│ 222f24d4bc9d0 ┆ ASSRGIASFVCE ┆ ┆ … false] ┆ ┆ 0.872878, … ┆ 94.934212, … │\n", - "│ f0e4… ┆ YASPGK… ┆ ┆ ┆ ┆ 1.22799… ┆ 168.… │\n", - "│ f4c930a3f1b5f ┆ GSHHHHHHGSGT ┆ true ┆ [false, ┆ 5jq6_A ┆ [1.462795, ┆ [118.618012, │\n", - "│ b78cef62c5021 ┆ DITNQLTNVTVG ┆ ┆ false, … ┆ ┆ 0.796439, … ┆ 94.250586, … │\n", - "│ adc0… ┆ IDSGTT… ┆ ┆ false] ┆ ┆ 0.90302… ┆ 119.… │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2c282aeeb8859 ┆ LDKIDLSYETTE ┆ false ┆ [false, ┆ 7jum_A ┆ [0.474081, ┆ [85.126053, │\n", - "│ 6bf1f1f99be1b ┆ SGDTAVSEDSYD ┆ ┆ false, … ┆ ┆ 0.908022, … ┆ 129.629226, │\n", - "│ b7f0… ┆ KYASQN… ┆ ┆ false] ┆ ┆ 0.98618… ┆ … 143.… │\n", - "│ 5196520df0000 ┆ TDRQLAEEYLYR ┆ false ┆ [false, ┆ 5th9_A ┆ [0.871364, ┆ [122.513787, │\n", - "│ bf1b3fafa8c0e ┆ YGYTRVASLGPA ┆ ┆ false, … ┆ ┆ 0.432033, … ┆ 61.677101, … │\n", - "│ 9ecc… ┆ LLLLQK… ┆ ┆ false] ┆ ┆ 0.25613… ┆ 54.8… │\n", - "│ 96836e4358c57 ┆ LPWLNVSADGDN ┆ false ┆ [false, ┆ 6hga_B ┆ [0.93068, ┆ [167.112849, │\n", - "│ e3f571a4f2bb8 ┆ VHLVLNVSEEQH ┆ ┆ false, … ┆ ┆ 0.110532, … ┆ 15.166135, … │\n", - "│ a8f8… ┆ FGLSLY… ┆ ┆ true] ┆ ┆ 1.220648… ┆ 223.… │\n", - "│ 9d838eec0c246 ┆ CSSPPCECHQEE ┆ false ┆ [false, ┆ 2xwt_C ┆ [0.479508, ┆ [63.390948, │\n", - "│ 55e9902a3ac12 ┆ DFRVTCKDIQRI ┆ ┆ false, … ┆ ┆ 1.059907, … ┆ 125.429402, │\n", - "│ 8a34… ┆ PSLPPS… ┆ ┆ false] ┆ ┆ 0.52811… ┆ … 74.2… │\n", - "│ cb56653d3f7b5 ┆ CSVVVGENYSIK ┆ false ┆ [false, ┆ 6vtw_A ┆ [0.628252, ┆ [83.054881, │\n", - "│ 272b787496354 ┆ CDATKCTIEDKN ┆ ┆ false, … ┆ ┆ 0.682526, … ┆ 80.770091, … │\n", - "│ 9242… ┆ RGIIKT… ┆ ┆ false] ┆ ┆ 0.63736… ┆ 113.9… │\n", - "└───────────────┴──────────────┴───────┴──────────────┴──────────────┴──────────────┴──────────────┘" + "117" ] }, - "execution_count": 22, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -95,8 +47,9 @@ "import polars as pl\n", "\n", "bp3 = pl.read_parquet(\"../data/bp3c50id/bp3c50id.rsa.parquet\")\n", + "#bp3 = pl.read_parquet(\"../data/bp3c50id/bp3c50id.bp3.parquet\")\n", "\n", - "bp3" + "len(bp3.select(\"seq\")[0].item())" ] }, { @@ -109,24 +62,32 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 3, "id": "d1cd1126", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jsesate/miniconda3/envs/epident-experiments/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, { "data": { "text/plain": [ - "array([[-316. , -916. , -148. , ..., 114.5 , -175. , 56.75],\n", - " [-498. , -616. , -120.5 , ..., 198. , -235. , 105.5 ],\n", - " [-508. , -608. , -107. , ..., 288. , -241. , -6.03],\n", + "array([[-576. , -422. , -53.75 , ..., 12.125, -142. , 58.5 ],\n", + " [-652. , -294. , 72. , ..., 262. , -286. , -12.06 ],\n", + " [-600. , -450. , 18.88 , ..., 36.25 , -109.5 , -71. ],\n", " ...,\n", - " [-370. , -528. , -242. , ..., 163. , -460. , -227. ],\n", - " [-370. , -528. , -242. , ..., 163. , -460. , -227. ],\n", - " [-370. , -528. , -242. , ..., 163. , -460. , -227. ]],\n", - " shape=(256, 384), dtype=float16)" + " [-378. , -616. , -304. , ..., 192. , -584. , -216. ],\n", + " [-378. , -616. , -304. , ..., 192. , -584. , -216. ],\n", + " [-378. , -616. , -304. , ..., 192. , -584. , -216. ]],\n", + " shape=(768, 384), dtype=float16)" ] }, - "execution_count": 17, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -137,14 +98,15 @@ "\n", "\n", "INF_DIR = Path(\"../data/bp3c50id/inference\")\n", - "sample_job_name = bp3.select(\"job_name\")[0].item()\n", + "sample_job_name = bp3.select(\"job_name\")[2].item()\n", "\n", "af3_output = AF3Output(INF_DIR / sample_job_name)\n", "\n", "af3_single_embed = af3_output.get_single_embeddings()\n", "af3_pairwise_embed = af3_output.get_pair_embeddings()\n", "\n", - "af3_single_embed" + "af3_single_embed\n", + "#af3_pairwise_embed" ] }, { @@ -157,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "id": "8e84ca95", "metadata": {}, "outputs": [ @@ -165,40 +127,128 @@ "name": "stdout", "output_type": "stream", "text": [ - "Alpha carbon pLDDT: [89.05000305 90.90000153 93.55000305 92.66999817 95.01999664 96.\n", - " 97.02999878 96.05000305 97.12000275 96.63999939 96.61000061 95.05999756\n", - " 93.01999664 93.45999908 92.69000244 93.5 93.80999756 93.55000305\n", - " 92.87999725 91.15000153 88.44000244 82.69999695 76.98000336 72.58000183\n", - " 68.90000153 72.23999786 69.37999725 71.36000061 77.56999969 84.30999756\n", - " 87.66999817 91.04000092 92.34999847 93.26999664 93. 92.87000275\n", - " 86.84999847 79.66000366 76.95999908 76.51000214 80.98999786 81.33999634\n", - " 84.69999695 89.47000122 90.79000092 90.59999847 90.86000061 90.16999817\n", - " 89.73999786 88.19000244 84.12999725 81.91999817 78.51000214 80.23999786\n", - " 81.58000183 82.37999725 84.91000366 85.86000061 87. 85.43000031\n", - " 80.48000336 74.48000336 74.86000061 60.11000061 59.43999863 62.54999924\n", - " 57.09000015 70.15000153 77.37999725 75.16000366 75.26999664 73.08999634\n", - " 64.12999725 57.47999954 55.90000153 61.22000122 75.45999908 83.11000061\n", - " 84.58000183 87.79000092 90.01999664 90.48999786 90.80000305 88.38999939\n", - " 89.47000122 91.26999664 91.04000092 91.87999725 89.41000366 90.81999969\n", - " 93.27999878 93.80999756 95.09999847 95.44000244 95.37999725 93.48999786\n", - " 92.26000214 88.79000092 80.73000336 69.51999664 64.80000305 66.72000122\n", - " 77.19999695 84.18000031 89.05000305 91.90000153 93.55000305 94.54000092\n", - " 96.01000214 96.37999725 96.51000214 95.43000031 96.23999786 95.08999634\n", - " 96.22000122 91.58999634 80.5 ]\n" + ", , , ..., , , ]>\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/home/lwoods/miniconda3/envs/epident-experiments/lib/python3.13/site-packages/MDAnalysis/coordinates/MMCIF.py:139: UserWarning: 1 A^3 CRYST1 record, this is usually a placeholder. Unit cell dimensions will be set to None.\n", + "/home/jsesate/miniconda3/envs/epident-experiments/lib/python3.13/site-packages/MDAnalysis/coordinates/MMCIF.py:139: UserWarning: 1 A^3 CRYST1 record, this is usually a placeholder. Unit cell dimensions will be set to None.\n", " warnings.warn(\n" ] } ], "source": [ "u = af3_output.get_mda_universe()\n", - "\n", + "print(u.atoms)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a1a6ac7f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Alpha carbon pLDDT: [69.04000092 90.44000244 93.47000122 93.87999725 94.23000336 94.08999634\n", + " 91.55000305 92.23999786 94.29000092 93.56999969 91.97000122 93.80000305\n", + " 93.52999878 90.19999695 91. 92.70999908 90.81999969 88.83999634\n", + " 90.33000183 88.86000061 89.34999847 87.77999878 88.33999634 89.69999695\n", + " 90.81999969 90.26000214 90.75 93.15000153 95.62999725 96.09999847\n", + " 95.29000092 96.18000031 97.12000275 96.65000153 95.94000244 97.08999634\n", + " 96.94999695 97.33000183 97.62999725 98.05000305 98.09999847 98.19000244\n", + " 98.18000031 97.77999878 97.97000122 97.88999939 97.70999908 97.19999695\n", + " 97.48999786 97.93000031 97.41000366 96.66999817 97.23999786 98.01999664\n", + " 98.25 98.01999664 97.88999939 98.31999969 98.58000183 98.08999634\n", + " 98.12999725 98.5 98.31999969 98.08999634 98.23999786 97.91999817\n", + " 97.12999725 97.04000092 97.29000092 96.72000122 97.30999756 98.01000214\n", + " 97.95999908 98.12999725 98.37999725 97.58999634 98. 97.86000061\n", + " 97.90000153 98.09999847 98.05999756 98.34999847 98.19000244 98.15000153\n", + " 98.44000244 98.16999817 98.34999847 97.55000305 96.83000183 96.48000336\n", + " 94.91999817 94.91000366 93.54000092 93.41999817 92.83000183 89.87000275\n", + " 80.34999847 80.37999725 85.29000092 86.36000061 90.30999756 92.72000122\n", + " 93.12000275 92.09999847 93.58999634 95.87999725 96.54000092 95.41000366\n", + " 96.63999939 97.59999847 97.80000305 97.23999786 97.68000031 97.38999939\n", + " 97.58000183 97.44000244 96.58000183 95.87999725 96.11000061 97.19000244\n", + " 97.37000275 97.44999695 96.08000183 95.90000153 95.65000153 95.75\n", + " 96.72000122 97.31999969 97.5 96.66999817 94.75 93.80999756\n", + " 93.05000305 92.70999908 92.97000122 91.61000061 92.26999664 88.43000031\n", + " 82.11000061 77.48999786 66.93000031 59.59999847 48.08000183 57.41999817\n", + " 79.16000366 87.52999878 91.86000061 93.05999756 95.01999664 95.37999725\n", + " 94.76000214 95.16999817 97.20999908 96.45999908 95.58999634 96.59999847\n", + " 97.58000183 97.56999969 96.81999969 96.83000183 95.56999969 95.34999847\n", + " 96.54000092 95.52999878 94.69999695 94.08999634 94.77999878 94.33999634\n", + " 95. 95.41000366 95.45999908 97.09999847 97.37000275 97.44000244\n", + " 97.97000122 97.76000214 97.47000122 97.43000031 97.73999786 97.30999756\n", + " 96.16000366 96.79000092 96.31999969 95.18000031 94.66999817 94.41000366\n", + " 93.44000244 92.30999756 92.55000305 90.62999725 87.55999756 87.79000092\n", + " 85.22000122 86.43000031 87.26999664 89.08000183 87.33000183 88.08999634\n", + " 86.70999908 83.44000244 84.87999725 87.94000244 87.48000336 88.58000183\n", + " 87.36000061 87.08000183 85.84999847 87.62000275 87.05999756 85.34999847\n", + " 81.41000366 71.08999634 67.79000092 78.40000153 86.98999786 91.79000092\n", + " 94.26999664 93.15000153 94.30000305 95.83999634 95.81999969 95.61000061\n", + " 97.04000092 97.33000183 96.23999786 96.62999725 95.73999786 97.\n", + " 97.66000366 96.62999725 96.76000214 97.79000092 97.37999725 97.20999908\n", + " 97.55999756 97.65000153 97.44999695 97.56999969 97.19999695 97.69999695\n", + " 97.44000244 96.91999817 97.19999695 97.23999786 95.76999664 93.5\n", + " 93.81999969 96.08999634 95.79000092 91.38999939 90.23999786 94.62000275\n", + " 95.98000336 95.19999695 94.20999908 95.11000061 95.69000244 95.41999817\n", + " 94.19000244 94.29000092 94.69999695 91.04000092 95.23000336 93.01000214\n", + " 94.48999786 94.90000153 94.91000366 95.66000366 96.55999756 95.18000031\n", + " 94.94999695 96.66000366 96.34999847 95.84999847 96.72000122 97.22000122\n", + " 97.44000244 97.66999817 97.73999786 98.18000031 98.13999939 98.36000061\n", + " 98.19000244 97.91999817 97.62000275 97.90000153 96.94999695 94.11000061\n", + " 95.08000183 96.16999817 94.48000336 94.62000275 95.84999847 95.51000214\n", + " 93.18000031 94.61000061 96.16999817 95.02999878 94.23000336 95.73000336\n", + " 96.83999634 94.52999878 94.37000275 95.80999756 96.30999756 94.91999817\n", + " 94.83000183 95.44000244 94.79000092 92.84999847 93.26000214 93.66000366\n", + " 92.20999908 90.30000305 88.19000244 91.63999939 92.29000092 92.55999756\n", + " 91. 90.01000214 91.12000275 88.22000122 90.73999786 88.94999695\n", + " 85.81999969 90.08000183 88.94999695 90.79000092 91.98999786 94.23000336\n", + " 94.36000061 93.69000244 93.88999939 95.73000336 94.08000183 95.01000214\n", + " 95.55999756 95.31999969 94.98000336 95.44999695 96.22000122 96.25\n", + " 96.37999725 97.37000275 96.01000214 96.04000092 96.40000153 96.04000092\n", + " 93.55000305 93.52999878 94.94000244 94.05999756 90.54000092 90.91999817\n", + " 91.87999725 90.5 87.94999695 87.12999725 88.26000214 86.23999786\n", + " 85.59999847 87.66000366 83.66999817 73.52999878 62.49000168 59.43000031\n", + " 79.93000031 90.08000183 93.12999725 94.76999664 93.79000092 94.80999756\n", + " 96.12000275 96.48000336 95.48999786 96.97000122 96.41000366 96.83000183\n", + " 95.59999847 96.20999908 95.30000305 94.04000092 94.41000366 93.81999969\n", + " 93.06999969 93.01000214 95.01000214 94.43000031 93.26999664 94.30000305\n", + " 94.73000336 93.43000031 93.98000336 95.16999817 94.77999878 94.44000244\n", + " 94.77999878 95.48999786 94.08999634 93.91000366 95.23999786 93.80000305\n", + " 94.30999756 95.76999664 96.63999939 96.91999817 95.34999847 93.51999664\n", + " 94.73000336 93.76999664 81.12999725 72.05000305 78.80999756 89.94999695\n", + " 91.45999908 91.19999695 89.76000214 91.05999756 92.36000061 87.91000366\n", + " 89.88999939 89.41999817 93.61000061 94.76999664 95.18000031 95.79000092\n", + " 96.72000122 97.25 97.56999969 97.98000336 98.41999817 98.40000153\n", + " 98.31999969 98.52999878 98.37999725 98.45999908 98.23999786 98.23999786\n", + " 98.11000061 98.01000214 97.75 98.08999634 97.45999908 97.56999969\n", + " 98.40000153 98.54000092 98.06999969 98.22000122 98.61000061 98.61000061\n", + " 98.09999847 98.26999664 98.12999725 97.94999695 98.19000244 98.36000061\n", + " 98.44999695 98.12000275 98.48999786 98.66999817 98.76999664 98.62000275\n", + " 98.62000275 98.51000214 98.58000183 98.38999939 98.33000183 98.08000183\n", + " 97.44000244 97.51999664 96.91999817 94.95999908 95.26000214 94.94000244\n", + " 91.88999939 93.51999664 93.58999634 92.65000153 91.22000122 90.36000061\n", + " 89.41000366 89.48999786 87.58000183 87.56999969 89.16000366 91.04000092\n", + " 92.59999847 93.04000092 92.51999664 90.72000122 87.91000366 89.26000214\n", + " 92.86000061 94.83999634 95.48999786 94.84999847 95.52999878 95.25\n", + " 94.43000031 94.58999634 95.19000244 94.87000275 93.94000244 95.72000122\n", + " 96.05999756 95.48999786 94.76999664 95.88999939 96.84999847 96.04000092\n", + " 97.12000275 98.13999939 98.31999969 98.27999878 98.26000214 98.41999817\n", + " 98.22000122 97.93000031 98.06999969 98.09999847 97.38999939 96.72000122\n", + " 96.98000336 96.84999847 95.95999908 96.23000336 95.18000031 96.51999664\n", + " 96.20999908 96.54000092 96.80000305 96.88999939 97.33999634 97.05999756\n", + " 96.90000153 97.41999817 97.54000092 97.27999878 97.19000244 96.43000031\n", + " 92.98999786 93.41000366 92.15000153 89.19000244 82.48999786]\n" + ] + } + ], + "source": [ "# select all alpha carbons in topology\n", "calphas = u.select_atoms(\"name CA\")\n", "\n", @@ -219,29 +269,29 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "id": "ae7add4b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[-7.9217e-02, -8.2230e-02, 5.8380e-02, ..., 2.4681e-01,\n", - " 9.6495e-02, 1.1700e+02],\n", - " [ 2.7191e-01, 1.3160e-01, -1.2749e-01, ..., 8.3813e-02,\n", - " 2.6999e-02, 1.1700e+02],\n", - " [ 7.5211e-02, -1.2474e-01, -3.1285e-01, ..., -7.0912e-02,\n", - " -1.3021e-01, 1.1700e+02],\n", + "tensor([[ 2.8654e-01, 1.0777e-01, -1.2961e-01, ..., -1.4857e-01,\n", + " -2.2857e-01, 1.8300e+02],\n", + " [-5.2170e-02, -7.7566e-02, -2.3158e-01, ..., -1.2442e-01,\n", + " 2.3262e-02, 1.8300e+02],\n", + " [ 7.1554e-02, -6.7267e-03, 7.6057e-02, ..., -2.8993e-01,\n", + " 4.3512e-02, 1.8300e+02],\n", " ...,\n", - " [-9.3008e-02, 1.5062e-01, 3.5336e-01, ..., -3.2767e-01,\n", - " -1.1053e-01, 1.1700e+02],\n", - " [ 6.3777e-02, 1.2429e-01, 2.3989e-01, ..., -2.6909e-01,\n", - " 8.8695e-02, 1.1700e+02],\n", - " [ 7.8697e-02, -1.0143e-02, 3.3305e-01, ..., -1.6285e-01,\n", - " 1.1192e-01, 1.1700e+02]])" + " [ 1.0423e-02, 7.0801e-02, -1.1883e-01, ..., -2.1286e-01,\n", + " 5.9898e-02, 1.8300e+02],\n", + " [-1.7578e-01, 1.4636e-01, -3.6238e-02, ..., -1.2346e-01,\n", + " 1.2382e-02, 1.8300e+02],\n", + " [ 7.2781e-02, 1.7082e-01, -1.5018e-01, ..., -2.6568e-01,\n", + " -3.6069e-01, 1.8300e+02]])" ] }, - "execution_count": 21, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -252,12 +302,34 @@ "\n", "ESM_ENCODING_DIR = Path(\"/tgen_labs/altin/esm_encodings\")\n", "\n", - "sample_job_name = bp3.select(\"job_name\")[0].item()\n", + "sample_job_name = bp3.select(\"job_name\")[1].item()\n", "\n", "esm_2_embed = torch.load(ESM_ENCODING_DIR / (sample_job_name + \".pt\"))\n", "\n", "esm_2_embed" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b68e6889", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([183, 1281])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "esm_2_embed.shape" + ] } ], "metadata": { diff --git a/notebooks/regression.ipynb b/notebooks/regression.ipynb new file mode 100644 index 0000000..53aa301 --- /dev/null +++ b/notebooks/regression.ipynb @@ -0,0 +1,587 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "97313f50", + "metadata": {}, + "source": [ + "# Machine Learning on Embeddings for Epitope Prediction\n", + "\n", + "The goal of this notebook is to put together a basic machine learning pipeline that can make epitope predictions using embeddings from AF3 and ESM" + ] + }, + { + "cell_type": "markdown", + "id": "0fa5456c", + "metadata": {}, + "source": [ + "## Environment:\n", + "\n", + "This notebook will run with the 'envs/env.yaml` environment (epident-experiments)" + ] + }, + { + "cell_type": "markdown", + "id": "a5097707", + "metadata": {}, + "source": [ + "## Bepipred 3 dataset\n", + "\n", + "- job_name: unique identifier for protein, comes from hash of seq\n", + "- seq: amino acid sequence of protein\n", + "- train: boolean indicating if seq is part of train set\n", + "- epitope_boolmask: boolean array the same length as seq indiciating if the AA at that position is an epitope residue\n", + "- raw_protein_id: original ID assigned to protein in BP3C50ID set\n", + "- RSA: relative solvent accessiblity of the protein at each AA, calculated by FreeSASA\n", + "- SA: absolute solvent accessibility of the protein at each AA, calculated by FreeSASA" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "47295d86", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (358, 7)\n", + "┌───────────────┬──────────────┬───────┬──────────────┬──────────────┬──────────────┬──────────────┐\n", + "│ job_name ┆ seq ┆ train ┆ epitope_bool ┆ raw_protein_ ┆ RSA ┆ SA │\n", + "│ --- ┆ --- ┆ --- ┆ mask ┆ id ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ bool ┆ --- ┆ --- ┆ list[f64] ┆ list[f64] │\n", + "│ ┆ ┆ ┆ list[bool] ┆ str ┆ ┆ │\n", + "╞═══════════════╪══════════════╪═══════╪══════════════╪══════════════╪══════════════╪══════════════╡\n", + "│ bf2a62534941c ┆ LIQTPSSLLVQT ┆ true ┆ [false, ┆ 3b9k_B ┆ [0.205823, ┆ [36.957627, │\n", + "│ f895971e1daa3 ┆ NHTAKMSCEVKS ┆ ┆ false, … ┆ ┆ 0.471213, … ┆ 82.806331, … │\n", + "│ 3a46… ┆ ISKLTS… ┆ ┆ false] ┆ ┆ 1.00154… ┆ 152.2… │\n", + "│ d4febd28417e8 ┆ GNVDLVFLFDGS ┆ true ┆ [false, ┆ 3hi6_A ┆ [0.840245, ┆ [68.13546, │\n", + "│ a4bf6266337c7 ┆ MSLQPDEFQKIL ┆ ┆ false, … ┆ ┆ 0.294451, … ┆ 42.698276, … │\n", + "│ a2de… ┆ DFMKDV… ┆ ┆ false] ┆ ┆ 0.60539… ┆ 129.66… │\n", + "│ 17d233a2b305a ┆ DERETWSGKVDF ┆ true ┆ [false, ┆ 4xp9_C ┆ [1.100846, ┆ [157.156786, │\n", + "│ 3544cf6c164f8 ┆ LLSVIGFAVDLA ┆ ┆ false, … ┆ ┆ 1.039373, … ┆ 181.038055, │\n", + "│ ad67… ┆ NVWRFP… ┆ ┆ false] ┆ ┆ 1.03129… ┆ … 185… │\n", + "│ 34e0c5de18ccd ┆ KAMHVAQPAVVL ┆ true ┆ [true, true, ┆ 5ggv_Y ┆ [0.731129, ┆ [149.866882, │\n", + "│ 222f24d4bc9d0 ┆ ASSRGIASFVCE ┆ ┆ … false] ┆ ┆ 0.872878, … ┆ 94.934212, … │\n", + "│ f0e4… ┆ YASPGK… ┆ ┆ ┆ ┆ 1.22799… ┆ 168.… │\n", + "│ f4c930a3f1b5f ┆ GSHHHHHHGSGT ┆ true ┆ [false, ┆ 5jq6_A ┆ [1.462795, ┆ [118.618012, │\n", + "│ b78cef62c5021 ┆ DITNQLTNVTVG ┆ ┆ false, … ┆ ┆ 0.796439, … ┆ 94.250586, … │\n", + "│ adc0… ┆ IDSGTT… ┆ ┆ false] ┆ ┆ 0.90302… ┆ 119.… │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2c282aeeb8859 ┆ LDKIDLSYETTE ┆ false ┆ [false, ┆ 7jum_A ┆ [0.474081, ┆ [85.126053, │\n", + "│ 6bf1f1f99be1b ┆ SGDTAVSEDSYD ┆ ┆ false, … ┆ ┆ 0.908022, … ┆ 129.629226, │\n", + "│ b7f0… ┆ KYASQN… ┆ ┆ false] ┆ ┆ 0.98618… ┆ … 143.… │\n", + "│ 5196520df0000 ┆ TDRQLAEEYLYR ┆ false ┆ [false, ┆ 5th9_A ┆ [0.871364, ┆ [122.513787, │\n", + "│ bf1b3fafa8c0e ┆ YGYTRVASLGPA ┆ ┆ false, … ┆ ┆ 0.432033, … ┆ 61.677101, … │\n", + "│ 9ecc… ┆ LLLLQK… ┆ ┆ false] ┆ ┆ 0.25613… ┆ 54.8… │\n", + "│ 96836e4358c57 ┆ LPWLNVSADGDN ┆ false ┆ [false, ┆ 6hga_B ┆ [0.93068, ┆ [167.112849, │\n", + "│ e3f571a4f2bb8 ┆ VHLVLNVSEEQH ┆ ┆ false, … ┆ ┆ 0.110532, … ┆ 15.166135, … │\n", + "│ a8f8… ┆ FGLSLY… ┆ ┆ true] ┆ ┆ 1.220648… ┆ 223.… │\n", + "│ 9d838eec0c246 ┆ CSSPPCECHQEE ┆ false ┆ [false, ┆ 2xwt_C ┆ [0.479508, ┆ [63.390948, │\n", + "│ 55e9902a3ac12 ┆ DFRVTCKDIQRI ┆ ┆ false, … ┆ ┆ 1.059907, … ┆ 125.429402, │\n", + "│ 8a34… ┆ PSLPPS… ┆ ┆ false] ┆ ┆ 0.52811… ┆ … 74.2… │\n", + "│ cb56653d3f7b5 ┆ CSVVVGENYSIK ┆ false ┆ [false, ┆ 6vtw_A ┆ [0.628252, ┆ [83.054881, │\n", + "│ 272b787496354 ┆ CDATKCTIEDKN ┆ ┆ false, … ┆ ┆ 0.682526, … ┆ 80.770091, … │\n", + "│ 9242… ┆ RGIIKT… ┆ ┆ false] ┆ ┆ 0.63736… ┆ 113.9… │\n", + "└───────────────┴──────────────┴───────┴──────────────┴──────────────┴──────────────┴──────────────┘\n" + ] + } + ], + "source": [ + "# --- Imports ---\n", + "\n", + "import polars as pl\n", + "import pandas as pd\n", + "import pickle\n", + "from mdaf3.AF3OutputParser import AF3Output\n", + "from pathlib import Path\n", + "import torch\n", + "import numpy as np\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.metrics import roc_auc_score\n", + "from plotnine import *\n", + "theme_set(theme_classic())\n", + "\n", + "# --- Bepipred3 Data ---\n", + "\n", + "bp3 = pl.read_parquet(\"../data/bp3c50id/bp3c50id.rsa.parquet\")\n", + "bp3 = bp3.rename({\"test\" : \"train\"})\n", + "print(bp3)" + ] + }, + { + "cell_type": "markdown", + "id": "1c40a25c", + "metadata": {}, + "source": [ + "## Getting structural embeddings for a protein\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "615ee18f", + "metadata": {}, + "outputs": [], + "source": [ + "# --- AF3 Embeddings ---\n", + "\n", + "INF_DIR = Path(\"../data/bp3c50id/inference\")\n", + "sample_job_name = bp3.select(\"job_name\")[0].item()\n", + "af3_output = AF3Output(INF_DIR / sample_job_name)\n", + "\n", + "af3_single_embed = af3_output.get_single_embeddings()\n", + "af3_pairwise_embed = af3_output.get_pair_embeddings()\n", + "\n", + "#af3_single_embed\n", + "#af3_pairwise_embed" + ] + }, + { + "cell_type": "markdown", + "id": "6769fd0f", + "metadata": {}, + "source": [ + "## Getting LM embeddings for a protein\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4e432eef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (358, 5)\n", + "┌─────────────────────┬────────────────────┬────────────────────┬────────────────────┬─────────────┐\n", + "│ esm_emb ┆ seq ┆ train_boolmask ┆ epitope_boolmask ┆ RSA │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ object ┆ str ┆ list[bool] ┆ list[bool] ┆ list[f64] │\n", + "╞═════════════════════╪════════════════════╪════════════════════╪════════════════════╪═════════════╡\n", + "│ tensor([[-7.9217e-0 ┆ LIQTPSSLLVQTNHTAKM ┆ [true, true, … ┆ [false, false, … ┆ [0.205823, │\n", + "│ 2, -8.2230e… ┆ SCEVKSISKLTS… ┆ true] ┆ false] ┆ 0.471213, … │\n", + "│ ┆ ┆ ┆ ┆ 1.00154… │\n", + "│ tensor([[ ┆ GNVDLVFLFDGSMSLQPD ┆ [true, true, … ┆ [false, false, … ┆ [0.840245, │\n", + "│ 2.8654e-01, ┆ EFQKILDFMKDV… ┆ true] ┆ false] ┆ 0.294451, … │\n", + "│ 1.0777e… ┆ ┆ ┆ ┆ 0.60539… │\n", + "│ tensor([[ ┆ DERETWSGKVDFLLSVIG ┆ [true, true, … ┆ [false, false, … ┆ [1.100846, │\n", + "│ 1.1028e-01, ┆ FAVDLANVWRFP… ┆ true] ┆ false] ┆ 1.039373, … │\n", + "│ -3.8646e… ┆ ┆ ┆ ┆ 1.03129… │\n", + "│ tensor([[-2.5486e-0 ┆ KAMHVAQPAVVLASSRGI ┆ [true, true, … ┆ [true, true, … ┆ [0.731129, │\n", + "│ 1, 5.5604e… ┆ ASFVCEYASPGK… ┆ true] ┆ false] ┆ 0.872878, … │\n", + "│ ┆ ┆ ┆ ┆ 1.22799… │\n", + "│ tensor([[-1.5629e-0 ┆ GSHHHHHHGSGTDITNQL ┆ [true, true, … ┆ [false, false, … ┆ [1.462795, │\n", + "│ 1, 7.9395e… ┆ TNVTVGIDSGTT… ┆ true] ┆ false] ┆ 0.796439, … │\n", + "│ ┆ ┆ ┆ ┆ 0.90302… │\n", + "│ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ tensor([[-5.8743e-0 ┆ LDKIDLSYETTESGDTAV ┆ [false, false, … ┆ [false, false, … ┆ [0.474081, │\n", + "│ 2, -4.0843e… ┆ SEDSYDKYASQN… ┆ false] ┆ false] ┆ 0.908022, … │\n", + "│ ┆ ┆ ┆ ┆ 0.98618… │\n", + "│ tensor([[ ┆ TDRQLAEEYLYRYGYTRV ┆ [false, false, … ┆ [false, false, … ┆ [0.871364, │\n", + "│ 4.7727e-03, ┆ ASLGPALLLLQK… ┆ false] ┆ false] ┆ 0.432033, … │\n", + "│ 1.5819e… ┆ ┆ ┆ ┆ 0.25613… │\n", + "│ tensor([[ ┆ LPWLNVSADGDNVHLVLN ┆ [false, false, … ┆ [false, false, … ┆ [0.93068, │\n", + "│ 1.3285e-01, ┆ VSEEQHFGLSLY… ┆ false] ┆ true] ┆ 0.110532, … │\n", + "│ 7.9789e… ┆ ┆ ┆ ┆ 1.220648… │\n", + "│ tensor([[ ┆ CSSPPCECHQEEDFRVTC ┆ [false, false, … ┆ [false, false, … ┆ [0.479508, │\n", + "│ 1.1656e-01, ┆ KDIQRIPSLPPS… ┆ false] ┆ false] ┆ 1.059907, … │\n", + "│ 1.4177e… ┆ ┆ ┆ ┆ 0.52811… │\n", + "│ tensor([[ ┆ CSVVVGENYSIKCDATKC ┆ [false, false, … ┆ [false, false, … ┆ [0.628252, │\n", + "│ 6.2200e-02, ┆ TIEDKNRGIIKT… ┆ false] ┆ false] ┆ 0.682526, … │\n", + "│ 1.6793e… ┆ ┆ ┆ ┆ 0.63736… │\n", + "└─────────────────────┴────────────────────┴────────────────────┴────────────────────┴─────────────┘\n" + ] + } + ], + "source": [ + "# --- ESM Embeddings ---\n", + "\n", + "ESM_ENCODING_DIR = Path(\"/tgen_labs/altin/esm_encodings\")\n", + "esm_2_embed = torch.load(ESM_ENCODING_DIR / (sample_job_name + \".pt\")).tolist()\n", + "\n", + "if \"esm_emb\" not in bp3.columns:\n", + " esm_embeddings = []\n", + " for job_num in range(bp3.shape[0]):\n", + " job_name = bp3.select(\"job_name\")[job_num].item()\n", + " esm_embeddings.append(torch.load(ESM_ENCODING_DIR / (job_name + \".pt\")))\n", + " esm_emb = pl.Series(\"esm_emb\", esm_embeddings)\n", + " bp3.insert_column(0, esm_emb)\n", + " bp3 = bp3.drop(['job_name', \"raw_protein_id\", 'SA'])\n", + "\n", + "if \"train_boolmask\" not in bp3:\n", + " train_boolmask = []\n", + " for (esm_emb, seq, train, epitope_boolmask, rsa) in bp3.iter_rows():\n", + " train_bools = []\n", + " for num in range(len(seq)):\n", + " train_bools.append(train)\n", + " train_boolmask.append(train_bools)\n", + " train_boolmask = pl.Series(\"train_boolmask\", train_boolmask)\n", + " bp3.insert_column(2, train_boolmask)\n", + " bp3 = bp3.drop(['train'])\n", + " \n", + "print(bp3)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d3709f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (84_439, 5)
embeddingsresiduestrain_boolsepitope_boolsrsa_vals
list[f64]strboolboolf64
[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" + ], + "text/plain": [ + "shape: (84_439, 5)\n", + "┌────────────────────────────────┬──────────┬─────────────┬───────────────┬──────────┐\n", + "│ embeddings ┆ residues ┆ train_bools ┆ epitope_bools ┆ rsa_vals │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ list[f64] ┆ str ┆ bool ┆ bool ┆ f64 │\n", + "╞════════════════════════════════╪══════════╪═════════════╪═══════════════╪══════════╡\n", + "│ [-0.079217, -0.08223, … 117.0] ┆ L ┆ true ┆ false ┆ 0.205823 │\n", + "│ [0.271906, 0.131599, … 117.0] ┆ I ┆ true ┆ false ┆ 0.471213 │\n", + "│ [0.075211, -0.124738, … 117.0] ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", + "│ [0.033206, 0.13658, … 117.0] ┆ T ┆ true ┆ false ┆ 0.437416 │\n", + "│ [-0.153488, 0.178101, … 117.0] ┆ P ┆ true ┆ false ┆ 0.312792 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ [-0.102616, 0.023357, … 47.0] ┆ V ┆ false ┆ false ┆ 0.09529 │\n", + "│ [0.006365, -0.054578, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", + "│ [-0.021138, 0.060409, … 47.0] ┆ K ┆ false ┆ false ┆ 0.883928 │\n", + "│ [-0.013476, 0.081914, … 47.0] ┆ A ┆ false ┆ false ┆ 0.828726 │\n", + "│ [-0.079727, 0.132829, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", + "└────────────────────────────────┴──────────┴─────────────┴───────────────┴──────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# --- Transform to Per-Residue Basis ---\n", + " \n", + "embs = []\n", + "residues = []\n", + "train_bools = []\n", + "epitope_bools = []\n", + "rsa_vals = []\n", + "for (esm_emb, seq, train_boolmask, epitope_boolmask, rsa) in bp3.iter_rows():\n", + " embs.extend(esm_emb.tolist())\n", + " residues.extend(seq)\n", + " train_bools.extend(train_boolmask)\n", + " epitope_bools.extend(epitope_boolmask)\n", + " rsa_vals.extend(rsa)\n", + "\n", + "data = {\n", + " \"embeddings\" : embs,\n", + " \"residues\" : residues,\n", + " \"train_bools\" : train_bools,\n", + " \"epitope_bools\" : epitope_bools,\n", + " \"rsa_vals\" : rsa_vals\n", + "}\n", + "\n", + "bp3_res = pl.DataFrame(data)\n", + "bp3_res\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f35c6fd", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Extract N Most Informative Features of Embedding ---\n", + "\n", + "# emb_var 1280 : 81 median diff\n", + "# emb_var 1280 : 1.13 median diff\n", + "# emb_var 1280 : 0.37 median diff\n", + "\n", + "\n", + "\n", + "num_emb_vars = bp3_res.select(\"embeddings\")[0].item().shape[0]\n", + "\n", + "med_diff = []\n", + "for var in range(num_emb_vars):\n", + " var_epi = []\n", + " var_nepi = []\n", + " for (embedding, residue, train_bool, epitope_bool, rsa) in bp3_res.iter_rows():\n", + " if epitope_bool == True:\n", + " var_epi.append(embedding[var])\n", + " else:\n", + " var_nepi.append(embedding[var])\n", + " med_diff.append(np.median(var_nepi) - np.median(var_epi))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "da101158", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1281\n", + "[(np.float64(81.0), 1280), (np.float64(1.130703330039978), 234), (np.float64(0.3712773323059082), 1160), (np.float64(0.045004742220044136), 839), (np.float64(0.044218819588422775), 553), (np.float64(0.04032979533076286), 696), (np.float64(0.03747392725199461), 414), (np.float64(0.03660698514431715), 1251), (np.float64(0.03643316403031349), 600), (np.float64(0.0351506844162941), 381), (np.float64(0.034491033758968115), 756), (np.float64(0.033380577340722084), 300), (np.float64(0.03337776567786932), 1014), (np.float64(0.03284870833158493), 1186), (np.float64(0.03240643069148064), 855), (np.float64(0.032072730362415314), 608), (np.float64(0.031966139416908845), 86), (np.float64(0.031056914827786386), 396), (np.float64(0.030288565903902054), 655), (np.float64(0.030203919857740402), 166), (np.float64(0.030178461922332644), 423), (np.float64(0.029451459646224976), 809), (np.float64(0.028364425525069237), 77), (np.float64(0.02815424744039774), 249), (np.float64(0.028020352125167847), 145), (np.float64(0.02772002387791872), 370), (np.float64(0.027637861669063568), 564), (np.float64(0.026874929666519165), 1194), (np.float64(0.02658862737007439), 1088), (np.float64(0.025990422815084457), 230), (np.float64(0.025970193557441235), 968), (np.float64(0.025764848105609417), 998), (np.float64(0.025550130754709244), 1174), (np.float64(0.025247633457183838), 803), (np.float64(0.02497640997171402), 236), (np.float64(0.024945996701717377), 812), (np.float64(0.02489049779251218), 1127), (np.float64(0.024773985147476196), 880), (np.float64(0.024766715243458748), 628), (np.float64(0.024555565789341927), 1067), (np.float64(0.02454405650496483), 683), (np.float64(0.024141178466379642), 152), (np.float64(0.024102460592985153), 1006), (np.float64(0.023943433538079262), 583), (np.float64(0.023688404820859432), 191), (np.float64(0.023484595119953156), 1053), (np.float64(0.023418080061674118), 609), (np.float64(0.023391427472233772), 153), (np.float64(0.0233256034553051), 1107), (np.float64(0.023274637758731842), 271), (np.float64(0.022974496707320213), 168), (np.float64(0.022787262685596943), 192), (np.float64(0.02268359251320362), 260), (np.float64(0.02266924805007875), 593), (np.float64(0.022517642006278038), 921), (np.float64(0.022394230589270592), 727), (np.float64(0.022389421239495277), 789), (np.float64(0.022317957365885377), 1162), (np.float64(0.022150119300931692), 277), (np.float64(0.02212531678378582), 899), (np.float64(0.022036344325897517), 1094), (np.float64(0.02189606800675392), 1111), (np.float64(0.02164968801662326), 412), (np.float64(0.021564900875091553), 1165), (np.float64(0.021400924772024155), 36), (np.float64(0.02118323463946581), 681), (np.float64(0.021175827831029892), 853), (np.float64(0.020939030684530735), 1125), (np.float64(0.020914054475724697), 695), (np.float64(0.02084817737340927), 973), (np.float64(0.020482300780713558), 1092), (np.float64(0.020417840220034122), 450), (np.float64(0.020330642815679312), 68), (np.float64(0.020232222974300385), 1049), (np.float64(0.020162058994174004), 405), (np.float64(0.02016097353771329), 483), (np.float64(0.01997297373600304), 892), (np.float64(0.01992867747321725), 75), (np.float64(0.01989478268660605), 462), (np.float64(0.01922575756907463), 1055), (np.float64(0.0190451480448246), 971), (np.float64(0.018956223502755165), 1212), (np.float64(0.018928367644548416), 775), (np.float64(0.01885544741526246), 340), (np.float64(0.01867196150124073), 22), (np.float64(0.018671827390789986), 237), (np.float64(0.018619922921061516), 1077), (np.float64(0.01853789109736681), 1168), (np.float64(0.01844160445034504), 375), (np.float64(0.01838996820151806), 471), (np.float64(0.018250529188662767), 283), (np.float64(0.018210260197520256), 1193), (np.float64(0.01818249374628067), 883), (np.float64(0.018178826197981834), 354), (np.float64(0.018157916143536568), 448), (np.float64(0.01814010553061962), 1072), (np.float64(0.018098924192599952), 1264), (np.float64(0.018092042300850153), 866), (np.float64(0.017926552798599005), 347), (np.float64(0.017782390117645264), 67), (np.float64(0.017713487148284912), 580), (np.float64(0.017520148307085037), 905), (np.float64(0.017419555690139532), 1069), (np.float64(0.01740977691952139), 611), (np.float64(0.017335407435894012), 507), (np.float64(0.01730018761008978), 728), (np.float64(0.017248489893972874), 215), (np.float64(0.017228491604328156), 297), (np.float64(0.017207475379109383), 668), (np.float64(0.01716914726421237), 90), (np.float64(0.017164206132292747), 418), (np.float64(0.017009243369102478), 1145), (np.float64(0.016953904181718826), 615), (np.float64(0.01688589807599783), 1085), (np.float64(0.016667735180817544), 211), (np.float64(0.016580134630203247), 950), (np.float64(0.016412150114774704), 582), (np.float64(0.016342705115675926), 301), (np.float64(0.016321652568876743), 25), (np.float64(0.01623747358098626), 566), (np.float64(0.01623537763953209), 659), (np.float64(0.016207854729145765), 1187), (np.float64(0.016190076246857643), 1129), (np.float64(0.016053708270192146), 410), (np.float64(0.015998678281903267), 374), (np.float64(0.01599816046655178), 944), (np.float64(0.01595117896795273), 137), (np.float64(0.01590883918106556), 984), (np.float64(0.01588423765497282), 213), (np.float64(0.01587132178246975), 717), (np.float64(0.015777988824993372), 974), (np.float64(0.015773339197039604), 346), (np.float64(0.015697740018367767), 571), (np.float64(0.015640379322576337), 219), (np.float64(0.015593299642205238), 351), (np.float64(0.015562902670353651), 1114), (np.float64(0.015533394180238247), 388), (np.float64(0.01539867208339274), 1137), (np.float64(0.01523844338953495), 428), (np.float64(0.015186004340648651), 836), (np.float64(0.015168139711022377), 776), (np.float64(0.01514124684035778), 66), (np.float64(0.015134465182200074), 589), (np.float64(0.015024304389953613), 1198), (np.float64(0.014992635697126389), 732), (np.float64(0.014886489138007164), 200), (np.float64(0.014842655509710312), 777), (np.float64(0.014836808666586876), 810), (np.float64(0.014810930006206036), 762), (np.float64(0.014797125943005085), 722), (np.float64(0.014722553140018135), 1146), (np.float64(0.014658856205642223), 939), (np.float64(0.014645400457084179), 871), (np.float64(0.014637693762779236), 1228), (np.float64(0.014579221606254578), 460), (np.float64(0.014410372823476791), 265), (np.float64(0.014292508363723755), 907), (np.float64(0.014270318672060966), 1052), (np.float64(0.014171725139021873), 1183), (np.float64(0.014159210142679513), 718), (np.float64(0.01415821723639965), 363), (np.float64(0.014137148391455412), 350), (np.float64(0.013962782919406891), 813), (np.float64(0.01390154892578721), 33), (np.float64(0.013901078724302351), 715), (np.float64(0.013888734392821789), 1151), (np.float64(0.013806648552417755), 110), (np.float64(0.013695838861167431), 664), (np.float64(0.013664640951901674), 401), (np.float64(0.01362670212984085), 1005), (np.float64(0.013555938377976418), 397), (np.float64(0.013532337732613087), 657), (np.float64(0.01352951256558299), 291), (np.float64(0.013322470709681511), 774), (np.float64(0.013301345519721508), 631), (np.float64(0.013249438256025314), 822), (np.float64(0.013248251751065254), 55), (np.float64(0.013229165226221085), 231), (np.float64(0.013227107585407794), 314), (np.float64(0.013184343464672565), 288), (np.float64(0.013178281486034393), 646), (np.float64(0.013057534699328244), 1008), (np.float64(0.012972468510270119), 243), (np.float64(0.012916450956254266), 379), (np.float64(0.012898104265332222), 1189), (np.float64(0.012832388281822205), 786), (np.float64(0.012810321524739265), 441), (np.float64(0.012777255848050117), 791), (np.float64(0.012744493782520294), 686), (np.float64(0.01274031586945057), 203), (np.float64(0.012716792523860931), 85), (np.float64(0.012634440790861845), 1004), (np.float64(0.01250067725777626), 500), (np.float64(0.012495343806222081), 563), (np.float64(0.012440497055649757), 990), (np.float64(0.012401574291288853), 1032), (np.float64(0.012400735169649124), 793), (np.float64(0.012385524809360504), 801), (np.float64(0.012380607426166534), 765), (np.float64(0.01234420482069254), 279), (np.float64(0.012343136593699455), 854), (np.float64(0.012326118245255202), 222), (np.float64(0.012296153232455254), 1188), (np.float64(0.012261290132300928), 165), (np.float64(0.012197593227028847), 403), (np.float64(0.012096164748072624), 979), (np.float64(0.012080555781722069), 652), (np.float64(0.012075373902916908), 1084), (np.float64(0.011993632419034839), 248), (np.float64(0.011948315426707268), 307), (np.float64(0.011938711628317833), 207), (np.float64(0.011916568502783775), 20), (np.float64(0.011913990136235952), 1243), (np.float64(0.0119064562022686), 819), (np.float64(0.011905217543244362), 845), (np.float64(0.011900685727596283), 1041), (np.float64(0.011890262365341187), 214), (np.float64(0.011802103370428085), 976), (np.float64(0.011791346594691277), 1120), (np.float64(0.011742846050765365), 339), (np.float64(0.011728386860340834), 59), (np.float64(0.01166495680809021), 253), (np.float64(0.011663809418678284), 1021), (np.float64(0.011610973626375198), 117), (np.float64(0.011607056483626366), 719), (np.float64(0.011598647572100163), 568), (np.float64(0.011571774259209633), 415), (np.float64(0.011481711699161679), 278), (np.float64(0.011479867622256279), 977), (np.float64(0.01147252693772316), 469), (np.float64(0.01137647032737732), 472), (np.float64(0.011283046565949917), 561), (np.float64(0.011235762620344758), 188), (np.float64(0.011222146451473236), 1259), (np.float64(0.011220555752515793), 118), (np.float64(0.011210506781935692), 150), (np.float64(0.01115427166223526), 282), (np.float64(0.011089973151683807), 730), (np.float64(0.0110830869525671), 183), (np.float64(0.011028191074728966), 399), (np.float64(0.01101304218173027), 92), (np.float64(0.010961954947561026), 123), (np.float64(0.010941036212898325), 1218), (np.float64(0.010906634852290154), 312), (np.float64(0.01089153066277504), 820), (np.float64(0.01083652675151825), 425), (np.float64(0.010806520935148), 220), (np.float64(0.01080569438636303), 1244), (np.float64(0.010785584338009357), 1254), (np.float64(0.010722700506448746), 700), (np.float64(0.010713106952607632), 323), (np.float64(0.01068238914012909), 284), (np.float64(0.010637138038873672), 835), (np.float64(0.010622672736644745), 1263), (np.float64(0.010598766501061618), 16), (np.float64(0.010591836180537939), 129), (np.float64(0.010561587288975716), 276), (np.float64(0.010534115135669708), 266), (np.float64(0.010474463924765587), 159), (np.float64(0.010459182783961296), 451), (np.float64(0.010430864989757538), 1247), (np.float64(0.01038616243749857), 444), (np.float64(0.010379638464655727), 131), (np.float64(0.0101566631346941), 913), (np.float64(0.010046117007732391), 4), (np.float64(0.009935624897480011), 1209), (np.float64(0.009927066043019295), 667), (np.float64(0.009867895394563675), 966), (np.float64(0.009789712727069855), 42), (np.float64(0.009780844673514366), 993), (np.float64(0.009772205725312233), 119), (np.float64(0.009715504944324493), 273), (np.float64(0.009608583524823189), 1019), (np.float64(0.009587500244379044), 570), (np.float64(0.009521863423287868), 162), (np.float64(0.009470891673117876), 975), (np.float64(0.009450562531128526), 1202), (np.float64(0.009449162287637591), 100), (np.float64(0.009391489322297275), 934), (np.float64(0.009280447848141193), 196), (np.float64(0.009250136092305183), 430), (np.float64(0.00911640003323555), 285), (np.float64(0.009104061871767044), 103), (np.float64(0.009091995656490326), 932), (np.float64(0.009053241461515427), 761), (np.float64(0.008984031155705452), 6), (np.float64(0.008899634703993797), 121), (np.float64(0.008806407451629639), 106), (np.float64(0.008774453774094582), 262), (np.float64(0.008766988146817312), 1255), (np.float64(0.008755201008170843), 426), (np.float64(0.008747384359594434), 1246), (np.float64(0.008727488107979298), 56), (np.float64(0.008709082379937172), 1018), (np.float64(0.008707253262400627), 338), (np.float64(0.008586497977375984), 47), (np.float64(0.008559728041291237), 242), (np.float64(0.008496899157762527), 739), (np.float64(0.008455098606646061), 1153), (np.float64(0.008453513495624065), 796), (np.float64(0.008421202190220356), 180), (np.float64(0.008398983627557755), 578), (np.float64(0.008393193129450083), 138), (np.float64(0.008391544222831726), 27), (np.float64(0.00835740938782692), 639), (np.float64(0.00834231125190854), 187), (np.float64(0.00831507908878848), 64), (np.float64(0.008314916864037514), 98), (np.float64(0.008291925652883947), 1203), (np.float64(0.008287357166409492), 490), (np.float64(0.008284620009362698), 771), (np.float64(0.008237404748797417), 595), (np.float64(0.008215129375457764), 353), (np.float64(0.008181661367416382), 128), (np.float64(0.008086762623861432), 587), (np.float64(0.008051972836256027), 475), (np.float64(0.008047517389059067), 1060), (np.float64(0.008003609604202211), 713), (np.float64(0.008001109352335334), 503), (np.float64(0.00792611576616764), 114), (np.float64(0.007904700934886932), 281), (np.float64(0.007900640368461609), 456), (np.float64(0.00789184495806694), 274), (np.float64(0.007889870554208755), 212), (np.float64(0.007881866302341223), 512), (np.float64(0.00787946954369545), 355), (np.float64(0.00786761287599802), 299), (np.float64(0.007863425649702549), 653), (np.float64(0.007847219705581665), 310), (np.float64(0.007825737819075584), 377), (np.float64(0.007806180045008659), 1025), (np.float64(0.0077806273475289345), 542), (np.float64(0.007778293918818235), 197), (np.float64(0.0077378100249916315), 654), (np.float64(0.007702145725488663), 594), (np.float64(0.007652724161744118), 613), (np.float64(0.007633641362190247), 417), (np.float64(0.0076003409922122955), 329), (np.float64(0.007589031883981079), 888), (np.float64(0.007576806470751762), 923), (np.float64(0.007573945447802544), 1108), (np.float64(0.007561119273304939), 43), (np.float64(0.007557529956102371), 1090), (np.float64(0.00754080805927515), 19), (np.float64(0.007507447153329849), 724), (np.float64(0.0074921175837516785), 317), (np.float64(0.0074846018105745316), 952), (np.float64(0.007444923743605614), 376), (np.float64(0.007425621151924133), 45), (np.float64(0.007377568632364273), 687), (np.float64(0.007334676454775035), 1002), (np.float64(0.0073283761739730835), 206), (np.float64(0.007327833212912083), 560), (np.float64(0.007297255098819733), 783), (np.float64(0.00729526299983263), 61), (np.float64(0.007223796099424362), 592), (np.float64(0.0072027649730443954), 446), (np.float64(0.007195578888058662), 1241), (np.float64(0.00719551183283329), 957), (np.float64(0.007170367985963821), 943), (np.float64(0.007156253792345524), 729), (np.float64(0.007133243838325143), 1065), (np.float64(0.007120907306671143), 901), (np.float64(0.007117012515664101), 735), (np.float64(0.0071118175983428955), 893), (np.float64(0.007111807353794575), 58), (np.float64(0.00709662027657032), 547), (np.float64(0.007087027654051781), 362), (np.float64(0.007054241374135017), 498), (np.float64(0.007042464800179005), 1221), (np.float64(0.006980568170547485), 122), (np.float64(0.00695190392434597), 886), (np.float64(0.006930340081453323), 321), (np.float64(0.006925065070390701), 510), (np.float64(0.006918858736753464), 313), (np.float64(0.006893233919981867), 330), (np.float64(0.00686962716281414), 11), (np.float64(0.006865903967991471), 365), (np.float64(0.006832782179117203), 394), (np.float64(0.006829655729234219), 891), (np.float64(0.006801697425544262), 53), (np.float64(0.006791293621063232), 937), (np.float64(0.006698655895888805), 703), (np.float64(0.006694257725030184), 1170), (np.float64(0.006679002195596695), 189), (np.float64(0.006648337468504906), 409), (np.float64(0.006589038297533989), 972), (np.float64(0.006570897996425629), 416), (np.float64(0.006568379700183868), 1011), (np.float64(0.006532957944727968), 360), (np.float64(0.006522510200738907), 656), (np.float64(0.0065031107515096664), 395), (np.float64(0.006459635682404041), 607), (np.float64(0.006452583707869053), 750), (np.float64(0.006433840841054916), 303), (np.float64(0.006367001682519913), 287), (np.float64(0.006355821620672941), 171), (np.float64(0.00631660595536232), 525), (np.float64(0.006311326549621299), 630), (np.float64(0.006254343315958977), 621), (np.float64(0.006236137822270393), 1265), (np.float64(0.0062336549162864685), 280), (np.float64(0.006093231961131096), 1), (np.float64(0.006068775430321693), 515), (np.float64(0.006007391959428787), 179), (np.float64(0.005940468981862068), 50), (np.float64(0.0059141237288713455), 328), (np.float64(0.0059125833213329315), 184), (np.float64(0.005842448212206364), 1229), (np.float64(0.005820596590638161), 559), (np.float64(0.0058171721175313), 485), (np.float64(0.005750535521656275), 1045), (np.float64(0.005683631170541048), 113), (np.float64(0.00566563755273819), 13), (np.float64(0.005600292701274157), 331), (np.float64(0.005594167858362198), 629), (np.float64(0.005578726530075073), 531), (np.float64(0.005515436641871929), 598), (np.float64(0.0054828329011797905), 758), (np.float64(0.005473967641592026), 1276), (np.float64(0.005444618873298168), 32), (np.float64(0.005436782957985997), 755), (np.float64(0.005431024357676506), 785), (np.float64(0.005363805568777025), 532), (np.float64(0.005330108106136322), 830), (np.float64(0.00532750366255641), 1267), (np.float64(0.0053042881190776825), 294), (np.float64(0.005254607647657394), 1017), (np.float64(0.005230085924267769), 1216), (np.float64(0.0052208518027327955), 404), (np.float64(0.005198197439312935), 1015), (np.float64(0.005155934486538172), 678), (np.float64(0.005148696713149548), 663), (np.float64(0.005032102577388287), 359), (np.float64(0.004991074092686176), 714), (np.float64(0.0049825385212898254), 627), (np.float64(0.004934079828672111), 904), (np.float64(0.00489051453769207), 83), (np.float64(0.00488685816526413), 584), (np.float64(0.0048807961866259575), 411), (np.float64(0.004836020991206169), 1027), (np.float64(0.0048017119988799095), 684), (np.float64(0.004774243570864201), 1176), (np.float64(0.0047730617225170135), 1219), (np.float64(0.004693788127042353), 823), (np.float64(0.00468137301504612), 908), (np.float64(0.00467224046587944), 605), (np.float64(0.0045488253235816956), 156), (np.float64(0.004545961506664753), 84), (np.float64(0.004537271335721016), 708), (np.float64(0.0045341793447732925), 926), (np.float64(0.004530996084213257), 182), (np.float64(0.0044933343306183815), 30), (np.float64(0.004464704543352127), 9), (np.float64(0.004407189786434174), 569), (np.float64(0.0043976083397865295), 1149), (np.float64(0.0043945867801085114), 95), (np.float64(0.004293292760848999), 172), (np.float64(0.0042603835463523865), 387), (np.float64(0.004210165643598884), 173), (np.float64(0.004184460442047566), 496), (np.float64(0.004150079563260078), 367), (np.float64(0.00413903035223484), 341), (np.float64(0.0040975576266646385), 550), (np.float64(0.004085277207195759), 251), (np.float64(0.004077760153450072), 1269), (np.float64(0.004069924354553223), 1249), (np.float64(0.004054168239235878), 298), (np.float64(0.004032887518405914), 1222), (np.float64(0.004022389650344849), 897), (np.float64(0.004020331427454948), 91), (np.float64(0.004010087111964822), 167), (np.float64(0.004000985994935036), 368), (np.float64(0.003997102379798889), 619), (np.float64(0.003981940448284149), 624), (np.float64(0.003933241590857506), 848), (np.float64(0.003733748570084572), 319), (np.float64(0.0037168003618717194), 1234), (np.float64(0.0036879992112517357), 186), (np.float64(0.003684638999402523), 380), (np.float64(0.0036621559411287308), 746), (np.float64(0.0036391839385032654), 178), (np.float64(0.0036176294088363647), 1051), (np.float64(0.003613073378801346), 1058), (np.float64(0.0035451650619506836), 518), (np.float64(0.0035063475370407104), 1200), (np.float64(0.0035057906061410904), 931), (np.float64(0.0035018213093280792), 751), (np.float64(0.0034943416249006987), 143), (np.float64(0.00347183458507061), 828), (np.float64(0.003445371985435486), 1258), (np.float64(0.0034322869032621384), 1040), (np.float64(0.003407709300518036), 366), (np.float64(0.003358466550707817), 985), (np.float64(0.0033377669751644135), 826), (np.float64(0.003312641754746437), 17), (np.float64(0.0032705982448533177), 14), (np.float64(0.0032210182398557663), 255), (np.float64(0.0032125506550073624), 39), (np.float64(0.003189890761859715), 348), (np.float64(0.0031792623922228813), 1100), (np.float64(0.0031745098531246185), 701), (np.float64(0.0031258314847946167), 480), (np.float64(0.003095071529969573), 546), (np.float64(0.0030517131090164185), 158), (np.float64(0.003032959997653961), 10), (np.float64(0.003000635653734207), 912), (np.float64(0.0029672272503376007), 15), (np.float64(0.002951675094664097), 290), (np.float64(0.0028542475774884224), 804), (np.float64(0.0028172172605991364), 1124), (np.float64(0.0027712839655578136), 309), (np.float64(0.0026745274662971497), 1133), (np.float64(0.0026256144046783447), 900), (np.float64(0.0026158811524510384), 205), (np.float64(0.002593526616692543), 1106), (np.float64(0.0025607850402593613), 545), (np.float64(0.0024483129382133484), 754), (np.float64(0.0024405624717473984), 514), (np.float64(0.002425260841846466), 868), (np.float64(0.0023892847821116447), 509), (np.float64(0.0023853182792663574), 29), (np.float64(0.002379018100327812), 1154), (np.float64(0.0022975997999310493), 790), (np.float64(0.0022021420300006866), 267), (np.float64(0.002168288454413414), 534), (np.float64(0.0021635047160089016), 1007), (np.float64(0.002135808579623699), 768), (np.float64(0.002078305697068572), 951), (np.float64(0.002071140334010124), 245), (np.float64(0.0020304229110479355), 486), (np.float64(0.002023644745349884), 1250), (np.float64(0.0020101824775338173), 28), (np.float64(0.0019969623535871506), 1245), (np.float64(0.001986062154173851), 1022), (np.float64(0.0019746623001992702), 1239), (np.float64(0.001973463222384453), 244), (np.float64(0.0019369125366210938), 1035), (np.float64(0.0019024861976504326), 548), (np.float64(0.0018787621520459652), 527), (np.float64(0.0018643662333488464), 459), (np.float64(0.0018462538719177246), 706), (np.float64(0.001827546686399728), 1274), (np.float64(0.001788940280675888), 382), (np.float64(0.0017553488723933697), 1206), (np.float64(0.0017540152184665203), 218), (np.float64(0.001738311955705285), 769), (np.float64(0.001677100546658039), 136), (np.float64(0.001619689166545868), 1113), (np.float64(0.0016147047281265259), 1279), (np.float64(0.0016042403876781464), 601), (np.float64(0.0015992438420653343), 596), (np.float64(0.0015618205070495605), 671), (np.float64(0.0015448471531271935), 928), (np.float64(0.001539589837193489), 626), (np.float64(0.001523636281490326), 1095), (np.float64(0.0015228185802698135), 63), (np.float64(0.0015169456601142883), 1180), (np.float64(0.0015078596770763397), 694), (np.float64(0.001481717685237527), 125), (np.float64(0.0014607235789299011), 1068), (np.float64(0.0014108158648014069), 250), (np.float64(0.0014008060097694397), 748), (np.float64(0.0013657081872224808), 204), (np.float64(0.001345967873930931), 1061), (np.float64(0.0013415142893791199), 371), (np.float64(0.0013408008962869644), 37), (np.float64(0.0012580184265971184), 960), (np.float64(0.001243335660547018), 1132), (np.float64(0.0011727940291166306), 1191), (np.float64(0.0011576693505048752), 342), (np.float64(0.001130678690969944), 1223), (np.float64(0.0011231759563088417), 127), (np.float64(0.0010981885716319084), 1235), (np.float64(0.0010806471109390259), 521), (np.float64(0.0010549724102020264), 989), (np.float64(0.0010166391730308533), 910), (np.float64(0.0010066886898130178), 488), (np.float64(0.000985151156783104), 738), (np.float64(0.0009567076340317726), 982), (np.float64(0.0009152404963970184), 612), (np.float64(0.0008938983082771301), 930), (np.float64(0.0008527413010597229), 335), (np.float64(0.000825216993689537), 477), (np.float64(0.0007924996316432953), 920), (np.float64(0.0007924232631921768), 896), (np.float64(0.0007629562169313431), 581), (np.float64(0.0007573636248707771), 80), (np.float64(0.000741329975426197), 1013), (np.float64(0.0007395949214696884), 1075), (np.float64(0.0007391385734081268), 163), (np.float64(0.0007354002445936203), 858), (np.float64(0.0007210071198642254), 916), (np.float64(0.0007144920527935028), 124), (np.float64(0.0006856508553028107), 1131), (np.float64(0.0006828904151916504), 21), (np.float64(0.0006541721522808075), 1097), (np.float64(0.0006299437955021858), 909), (np.float64(0.0006266646087169647), 870), (np.float64(0.000575515441596508), 270), (np.float64(0.0005712788552045822), 421), (np.float64(0.00056855333968997), 308), (np.float64(0.0004581841640174389), 261), (np.float64(0.0004357830621302128), 487), (np.float64(0.0004345737397670746), 194), (np.float64(0.0004092305898666382), 879), (np.float64(0.00037114880979061127), 235), (np.float64(0.00035897456109523773), 516), (np.float64(0.0003486813511699438), 1134), (np.float64(0.00034497492015361786), 704), (np.float64(0.000283312052488327), 88), (np.float64(0.00022942200303077698), 135), (np.float64(0.0002283584326505661), 1119), (np.float64(0.00020135263912379742), 229), (np.float64(0.00017299503087997437), 1159), (np.float64(8.101761341094971e-05), 702), (np.float64(-1.817755401134491e-05), 316), (np.float64(-2.541765570640564e-05), 1036), (np.float64(-4.7507346607744694e-05), 433), (np.float64(-0.00012370478361845016), 46), (np.float64(-0.0001281891018152237), 1155), (np.float64(-0.00013002753257751465), 461), (np.float64(-0.00014249759260565042), 447), (np.float64(-0.00016005896031856537), 1063), (np.float64(-0.0001608431339263916), 857), (np.float64(-0.0001817811280488968), 493), (np.float64(-0.00020514801144599915), 662), (np.float64(-0.0002080760896205902), 1080), (np.float64(-0.0002263011410832405), 1214), (np.float64(-0.000227256678044796), 1181), (np.float64(-0.00023396313190460205), 811), (np.float64(-0.00026110000908374786), 427), (np.float64(-0.00028219353407621384), 1205), (np.float64(-0.0003402328584343195), 419), (np.float64(-0.00036205508513376117), 62), (np.float64(-0.0003917478024959564), 537), (np.float64(-0.0005154851824045181), 492), (np.float64(-0.0005159936845302582), 637), (np.float64(-0.0005178460851311684), 856), (np.float64(-0.0005494393408298492), 109), (np.float64(-0.0005625635385513306), 97), (np.float64(-0.000569885887671262), 1115), (np.float64(-0.0005779080092906952), 933), (np.float64(-0.0005808547139167786), 506), (np.float64(-0.0006279787048697472), 1103), (np.float64(-0.0007155854254961014), 1256), (np.float64(-0.0007434776052832603), 688), (np.float64(-0.0007441248744726181), 1169), (np.float64(-0.000748196616768837), 147), (np.float64(-0.0007578060030937195), 468), (np.float64(-0.0007839640602469444), 26), (np.float64(-0.0008056208025664091), 740), (np.float64(-0.0008387919515371323), 948), (np.float64(-0.0009087081998586655), 1196), (np.float64(-0.0009119641035795212), 1161), (np.float64(-0.0009306315332651138), 73), (np.float64(-0.000951351597905159), 142), (np.float64(-0.0009772293269634247), 540), (np.float64(-0.0009796805679798126), 1260), (np.float64(-0.001017771428450942), 691), (np.float64(-0.0010307496413588524), 590), (np.float64(-0.001033630222082138), 953), (np.float64(-0.0010345056653022766), 551), (np.float64(-0.0010652430355548859), 41), (np.float64(-0.0010662563145160675), 1031), (np.float64(-0.0010681245476007462), 707), (np.float64(-0.0010780454613268375), 661), (np.float64(-0.001093613333068788), 385), (np.float64(-0.0010970290750265121), 126), (np.float64(-0.0011139344424009323), 767), (np.float64(-0.0011796094477176666), 766), (np.float64(-0.0011811880394816399), 1009), (np.float64(-0.0012204386293888092), 992), (np.float64(-0.0013030210830038413), 802), (np.float64(-0.0013143308460712433), 1099), (np.float64(-0.00135769322514534), 938), (np.float64(-0.0013707373291254044), 1147), (np.float64(-0.0013769203796982765), 389), (np.float64(-0.0014346614480018616), 742), (np.float64(-0.001471739262342453), 489), (np.float64(-0.0014727767556905746), 52), (np.float64(-0.0015113605186343193), 232), (np.float64(-0.0015142625197768211), 1177), (np.float64(-0.0015361960977315903), 289), (np.float64(-0.0016520395874977112), 825), (np.float64(-0.0016639144159853458), 252), (np.float64(-0.0016933688893914223), 987), (np.float64(-0.0017160698771476746), 946), (np.float64(-0.00173667399212718), 1252), (np.float64(-0.0017373105511069298), 733), (np.float64(-0.0017650823428994045), 649), (np.float64(-0.0017763301730155945), 522), (np.float64(-0.0017936015501618385), 115), (np.float64(-0.0018141400068998337), 577), (np.float64(-0.0018463386222720146), 674), (np.float64(-0.0018810424953699112), 1116), (np.float64(-0.0018857363611459732), 1199), (np.float64(-0.0019829915836453438), 814), (np.float64(-0.00200633704662323), 995), (np.float64(-0.002010106109082699), 1204), (np.float64(-0.0020110905170440674), 1057), (np.float64(-0.0020224787294864655), 1207), (np.float64(-0.002053378149867058), 945), (np.float64(-0.002064388245344162), 575), (np.float64(-0.0020839348435401917), 470), (np.float64(-0.00216059572994709), 465), (np.float64(-0.0022006616927683353), 981), (np.float64(-0.002226443961262703), 1139), (np.float64(-0.00228223018348217), 71), (np.float64(-0.002282954752445221), 520), (np.float64(-0.002297590486705303), 673), (np.float64(-0.002300553023815155), 1079), (np.float64(-0.0023086005821824074), 807), (np.float64(-0.002316609607078135), 1074), (np.float64(-0.0023270510137081146), 749), (np.float64(-0.002357354387640953), 216), (np.float64(-0.002396269701421261), 1073), (np.float64(-0.0024244561791419983), 104), (np.float64(-0.002444714307785034), 1024), (np.float64(-0.0024672462604939938), 999), (np.float64(-0.002483328804373741), 34), (np.float64(-0.0025172159948851913), 838), (np.float64(-0.002524329349398613), 935), (np.float64(-0.0025281142443418503), 967), (np.float64(-0.0025460217148065567), 457), (np.float64(-0.002552484627813101), 455), (np.float64(-0.002560259774327278), 474), (np.float64(-0.0026358672184869647), 429), (np.float64(-0.0026509244926273823), 269), (np.float64(-0.0026514939963817596), 1253), (np.float64(-0.0026922840625047684), 623), (np.float64(-0.00269710854627192), 1225), (np.float64(-0.002712603658437729), 38), (np.float64(-0.0027409472968429327), 965), (np.float64(-0.00277055986225605), 743), (np.float64(-0.0028255488723516464), 391), (np.float64(-0.0028463639318943024), 190), (np.float64(-0.0028850361704826355), 336), (np.float64(-0.002894410863518715), 832), (np.float64(-0.002898396924138069), 778), (np.float64(-0.0030250325798988342), 76), (np.float64(-0.00303669273853302), 1121), (np.float64(-0.0030500199645757675), 65), (np.float64(-0.0030538206920027733), 185), (np.float64(-0.0030581308528780937), 393), (np.float64(-0.003087669610977173), 210), (np.float64(-0.003108654171228409), 51), (np.float64(-0.0031100647029234096), 473), (np.float64(-0.0031547900289297104), 889), (np.float64(-0.0031569916754961014), 195), (np.float64(-0.003162076696753502), 1273), (np.float64(-0.0031907064840197563), 843), (np.float64(-0.0033862628042697906), 130), (np.float64(-0.0034333346411585808), 442), (np.float64(-0.0034815147519111633), 752), (np.float64(-0.0035286154597997665), 902), (np.float64(-0.0035514887422323227), 1086), (np.float64(-0.003552686423063278), 333), (np.float64(-0.0035611912608146667), 1102), (np.float64(-0.003571145236492157), 1237), (np.float64(-0.0035725105553865433), 169), (np.float64(-0.003596900962293148), 1278), (np.float64(-0.0036182962357997894), 936), (np.float64(-0.003647194243967533), 402), (np.float64(-0.0036538057029247284), 112), (np.float64(-0.003661118447780609), 1136), (np.float64(-0.0036774892359972), 549), (np.float64(-0.0037003178149461746), 176), (np.float64(-0.0037345218006521463), 788), (np.float64(-0.003779228776693344), 174), (np.float64(-0.003787299618124962), 716), (np.float64(-0.00380537798628211), 983), (np.float64(-0.0038185007870197296), 144), (np.float64(-0.003865445323754102), 70), (np.float64(-0.003870982676744461), 911), (np.float64(-0.003874685149639845), 1001), (np.float64(-0.0038925185799598694), 49), (np.float64(-0.003927405923604965), 358), (np.float64(-0.00400954857468605), 602), (np.float64(-0.004042258486151695), 481), (np.float64(-0.00408430490642786), 918), (np.float64(-0.0041113197803497314), 720), (np.float64(-0.004242537543177605), 863), (np.float64(-0.004256729036569595), 1010), (np.float64(-0.004292648285627365), 651), (np.float64(-0.0043027400970458984), 78), (np.float64(-0.004380635917186737), 797), (np.float64(-0.004390287562273443), 986), (np.float64(-0.004402928985655308), 35), (np.float64(-0.004403814557008445), 482), (np.float64(-0.004492867738008499), 956), (np.float64(-0.0045052869245409966), 181), (np.float64(-0.004513232968747616), 603), (np.float64(-0.00453865434974432), 1184), (np.float64(-0.00456337071955204), 89), (np.float64(-0.00461997021920979), 508), (np.float64(-0.004656549543142319), 625), (np.float64(-0.004690955160185695), 536), (np.float64(-0.0046967072412371635), 296), (np.float64(-0.0047052884474396706), 322), (np.float64(-0.004711035639047623), 873), (np.float64(-0.004717739298939705), 731), (np.float64(-0.0047803036868572235), 275), (np.float64(-0.004825130105018616), 638), (np.float64(-0.004828300327062607), 961), (np.float64(-0.004890882410109043), 102), (np.float64(-0.004894081503152847), 1178), (np.float64(-0.004899552091956139), 1210), (np.float64(-0.0049577930476516485), 557), (np.float64(-0.004958644509315491), 1217), (np.float64(-0.0049695546622388065), 139), (np.float64(-0.004982003942131996), 361), (np.float64(-0.004986512009054422), 494), (np.float64(-0.00502120703458786), 504), (np.float64(-0.0050244322046637535), 787), (np.float64(-0.005043705925345421), 1164), (np.float64(-0.005074195563793182), 604), (np.float64(-0.005097134271636605), 406), (np.float64(-0.005099068395793438), 1268), (np.float64(-0.005101373419165611), 922), (np.float64(-0.005114092491567135), 692), (np.float64(-0.005152318626642227), 384), (np.float64(-0.005154757760465145), 711), (np.float64(-0.0051582116866484284), 268), (np.float64(-0.005177075508981943), 352), (np.float64(-0.005228022113442421), 530), (np.float64(-0.00522840628400445), 170), (np.float64(-0.00528366956859827), 1056), (np.float64(-0.005368012934923172), 969), (np.float64(-0.005396544001996517), 1231), (np.float64(-0.005413727834820747), 882), (np.float64(-0.005450633354485035), 524), (np.float64(-0.005477700382471085), 850), (np.float64(-0.005487922579050064), 334), (np.float64(-0.005521275103092194), 161), (np.float64(-0.00552127743139863), 175), (np.float64(-0.0055327690206468105), 865), (np.float64(-0.005547152832150459), 1220), (np.float64(-0.005717332474887371), 8), (np.float64(-0.005721753463149071), 292), (np.float64(-0.00574151985347271), 18), (np.float64(-0.005785588873550296), 794), (np.float64(-0.005798071622848511), 644), (np.float64(-0.00580454315058887), 132), (np.float64(-0.005812995135784149), 69), (np.float64(-0.005842681974172592), 1227), (np.float64(-0.005866500549018383), 390), (np.float64(-0.005866686813533306), 1192), (np.float64(-0.005951972212642431), 875), (np.float64(-0.005956872366368771), 2), (np.float64(-0.0059717330150306225), 753), (np.float64(-0.005976843181997538), 1070), (np.float64(-0.005981167778372765), 970), (np.float64(-0.005995499901473522), 372), (np.float64(-0.006027504801750183), 258), (np.float64(-0.00602865032851696), 1248), (np.float64(-0.006082542240619659), 1262), (np.float64(-0.0061778719536960125), 1043), (np.float64(-0.0062213437631726265), 476), (np.float64(-0.006271482445299625), 618), (np.float64(-0.0063001555390655994), 111), (np.float64(-0.006341526517644525), 1066), (np.float64(-0.006352424621582031), 96), (np.float64(-0.006389547139406204), 148), (np.float64(-0.006395917385816574), 154), (np.float64(-0.006402084603905678), 764), (np.float64(-0.006419172510504723), 57), (np.float64(-0.006443299353122711), 1175), (np.float64(-0.0064479149878025055), 806), (np.float64(-0.006505150347948074), 241), (np.float64(-0.0065887924283742905), 884), (np.float64(-0.006634398130699992), 511), (np.float64(-0.006656968966126442), 224), (np.float64(-0.006672145798802376), 1117), (np.float64(-0.006685070693492889), 443), (np.float64(-0.006695944350212812), 842), (np.float64(-0.0067441752180457115), 666), (np.float64(-0.0068170540034770966), 398), (np.float64(-0.006845368072390556), 0), (np.float64(-0.006857441738247871), 1182), (np.float64(-0.006921172142028809), 610), (np.float64(-0.006955621996894479), 808), (np.float64(-0.007136133732274175), 439), (np.float64(-0.0071418872103095055), 792), (np.float64(-0.007211441406980157), 1166), (np.float64(-0.007216833531856537), 239), (np.float64(-0.007237443060148507), 586), (np.float64(-0.007238788530230522), 306), (np.float64(-0.00724145770072937), 107), (np.float64(-0.007245765998959541), 817), (np.float64(-0.007268328219652176), 1098), (np.float64(-0.007300347089767456), 555), (np.float64(-0.007359273731708527), 193), (np.float64(-0.007387600839138031), 201), (np.float64(-0.0074022915214300156), 99), (np.float64(-0.0074034701101481915), 1232), (np.float64(-0.0074781812727451324), 257), (np.float64(-0.007478212472051382), 432), (np.float64(-0.007487598806619644), 1089), (np.float64(-0.007495214231312275), 1242), (np.float64(-0.007495550438761711), 815), (np.float64(-0.007613290101289749), 599), (np.float64(-0.0076307556591928005), 263), (np.float64(-0.00764109194278717), 680), (np.float64(-0.007670147344470024), 526), (np.float64(-0.007670966908335686), 454), (np.float64(-0.0076980628073215485), 721), (np.float64(-0.007704330608248711), 1144), (np.float64(-0.007711821002885699), 576), (np.float64(-0.007724279537796974), 648), (np.float64(-0.0077279843389987946), 1012), (np.float64(-0.007735062390565872), 286), (np.float64(-0.007739881053566933), 737), (np.float64(-0.007761240005493164), 872), (np.float64(-0.0077701956033706665), 709), (np.float64(-0.007822123123332858), 349), (np.float64(-0.007830768823623657), 1033), (np.float64(-0.007877346128225327), 1270), (np.float64(-0.007887596264481544), 614), (np.float64(-0.00789869949221611), 246), (np.float64(-0.007918609771877527), 1110), (np.float64(-0.007958957925438881), 528), (np.float64(-0.007987448945641518), 1109), (np.float64(-0.00801955908536911), 1236), (np.float64(-0.008025538176298141), 1091), (np.float64(-0.008026821538805962), 881), (np.float64(-0.008048209361732006), 829), (np.float64(-0.008056597784161568), 499), (np.float64(-0.008070695213973522), 885), (np.float64(-0.00811498612165451), 689), (np.float64(-0.008118484169244766), 565), (np.float64(-0.00811863038688898), 1158), (np.float64(-0.008136065676808357), 1172), (np.float64(-0.008224183460697532), 675), (np.float64(-0.00822520349174738), 108), (np.float64(-0.008245592936873436), 40), (np.float64(-0.008287797681987286), 772), (np.float64(-0.008296319516375661), 140), (np.float64(-0.008329878211952746), 164), (np.float64(-0.008337317034602165), 392), (np.float64(-0.008361676707863808), 146), (np.float64(-0.008454600349068642), 225), (np.float64(-0.008487144485116005), 779), (np.float64(-0.008503612130880356), 533), (np.float64(-0.008577090688049793), 847), (np.float64(-0.008597470819950104), 562), (np.float64(-0.008647900074720383), 846), (np.float64(-0.008654453791677952), 925), (np.float64(-0.00868566706776619), 72), (np.float64(-0.008686518296599388), 302), (np.float64(-0.008748093619942665), 440), (np.float64(-0.008753710426390171), 827), (np.float64(-0.008770633023232222), 903), (np.float64(-0.008915271610021591), 1118), (np.float64(-0.008982018567621708), 431), (np.float64(-0.008990546382847242), 710), (np.float64(-0.009008477441966534), 541), (np.float64(-0.009019730612635612), 782), (np.float64(-0.009068425744771957), 157), (np.float64(-0.009090296924114227), 959), (np.float64(-0.00919034518301487), 697), (np.float64(-0.00919616175815463), 1028), (np.float64(-0.009215446189045906), 824), (np.float64(-0.009354566223919392), 927), (np.float64(-0.009406345896422863), 1042), (np.float64(-0.00941525585949421), 816), (np.float64(-0.009422685950994492), 650), (np.float64(-0.00945487868739292), 1143), (np.float64(-0.009470253251492977), 478), (np.float64(-0.009476244449615479), 821), (np.float64(-0.009483122266829014), 963), (np.float64(-0.009523652493953705), 887), (np.float64(-0.00956575758755207), 895), (np.float64(-0.00958152487874031), 356), (np.float64(-0.009585897030774504), 305), (np.float64(-0.009609293192625046), 723), (np.float64(-0.009649815503507853), 101), (np.float64(-0.009656770154833794), 1271), (np.float64(-0.009658633265644312), 463), (np.float64(-0.009679041802883148), 958), (np.float64(-0.009686156583484262), 295), (np.float64(-0.009690960869193077), 3), (np.float64(-0.009700579568743706), 642), (np.float64(-0.009711027145385742), 495), (np.float64(-0.009808659553527832), 311), (np.float64(-0.009825988119700924), 1197), (np.float64(-0.009919967502355576), 770), (np.float64(-0.00992558989673853), 915), (np.float64(-0.00995855126529932), 860), (np.float64(-0.00996008887887001), 1130), (np.float64(-0.009978827089071274), 332), (np.float64(-0.009996309876441956), 324), (np.float64(-0.01000710017979145), 844), (np.float64(-0.010009054094552994), 800), (np.float64(-0.010019579902291298), 947), (np.float64(-0.010040998458862305), 202), (np.float64(-0.01004641002509743), 994), (np.float64(-0.010080473497509956), 151), (np.float64(-0.01013161102309823), 869), (np.float64(-0.010141927748918533), 919), (np.float64(-0.010255863424390554), 898), (np.float64(-0.010261062532663345), 665), (np.float64(-0.010276105254888535), 861), (np.float64(-0.010293344035744667), 458), (np.float64(-0.010300910100340843), 726), (np.float64(-0.010341383516788483), 325), (np.float64(-0.010353345642215572), 635), (np.float64(-0.010372515767812729), 1087), (np.float64(-0.010401349514722824), 1000), (np.float64(-0.010496689938008785), 647), (np.float64(-0.01055026613175869), 1179), (np.float64(-0.01059710793197155), 1208), (np.float64(-0.010630078613758087), 606), (np.float64(-0.010634157806634903), 1081), (np.float64(-0.010682531632483006), 435), (np.float64(-0.010732075199484825), 318), (np.float64(-0.010796718299388885), 940), (np.float64(-0.010817267000675201), 12), (np.float64(-0.010851098224520683), 134), (np.float64(-0.010936714708805084), 505), (np.float64(-0.01104088919237256), 991), (np.float64(-0.011065786704421043), 79), (np.float64(-0.011071410030126572), 5), (np.float64(-0.011135176755487919), 867), (np.float64(-0.011154929175972939), 1126), (np.float64(-0.011161897331476212), 1272), (np.float64(-0.01126299798488617), 1238), (np.float64(-0.011272568255662918), 1112), (np.float64(-0.011295948177576065), 634), (np.float64(-0.01129804365336895), 780), (np.float64(-0.01131666952278465), 705), (np.float64(-0.01133042573928833), 874), (np.float64(-0.011340262368321419), 1185), (np.float64(-0.011386333149857819), 479), (np.float64(-0.011390786617994308), 1101), (np.float64(-0.011419500224292278), 227), (np.float64(-0.011442882008850574), 620), (np.float64(-0.01144443266093731), 660), (np.float64(-0.011451411992311478), 1224), (np.float64(-0.011496592778712511), 633), (np.float64(-0.011498132022097707), 1030), (np.float64(-0.011539971455931664), 798), (np.float64(-0.011559644713997841), 217), (np.float64(-0.011612750589847565), 209), (np.float64(-0.0116298608481884), 579), (np.float64(-0.011648551328107715), 1034), (np.float64(-0.011675120331346989), 149), (np.float64(-0.011696023866534233), 567), (np.float64(-0.011698195710778236), 160), (np.float64(-0.011711067520081997), 1167), (np.float64(-0.0117823276668787), 1163), (np.float64(-0.011843113228678703), 1029), (np.float64(-0.011853933800011873), 535), (np.float64(-0.01186610758304596), 591), (np.float64(-0.011869622394442558), 1062), (np.float64(-0.011904150247573853), 670), (np.float64(-0.011912490415852517), 74), (np.float64(-0.011915481183677912), 864), (np.float64(-0.011936145718209445), 841), (np.float64(-0.012056197971105576), 315), (np.float64(-0.012068057432770729), 964), (np.float64(-0.012071516364812851), 1078), (np.float64(-0.012117337435483932), 337), (np.float64(-0.012127349153161049), 452), (np.float64(-0.012201700359582901), 877), (np.float64(-0.01221482828259468), 1128), (np.float64(-0.012246077458257787), 449), (np.float64(-0.012495806440711021), 682), (np.float64(-0.012575287837535143), 177), (np.float64(-0.012676802929490805), 436), (np.float64(-0.012683648616075516), 198), (np.float64(-0.012702982407063246), 914), (np.float64(-0.012723691645078361), 523), (np.float64(-0.012760473415255547), 757), (np.float64(-0.012779026292264462), 48), (np.float64(-0.012828223407268524), 852), (np.float64(-0.012916180305182934), 1016), (np.float64(-0.012964524328708649), 636), (np.float64(-0.012966942158527672), 1150), (np.float64(-0.012986363843083382), 247), (np.float64(-0.013018159195780754), 1054), (np.float64(-0.013066044077277184), 996), (np.float64(-0.013116424903273582), 693), (np.float64(-0.01320071890950203), 833), (np.float64(-0.013295786455273628), 87), (np.float64(-0.013389321975409985), 373), (np.float64(-0.013403642922639847), 572), (np.float64(-0.013429042883217335), 862), (np.float64(-0.013557696132920682), 81), (np.float64(-0.01362999901175499), 677), (np.float64(-0.013825366098899394), 364), (np.float64(-0.013825431524310261), 105), (np.float64(-0.013843605294823647), 949), (np.float64(-0.013897279277443886), 369), (np.float64(-0.013940966688096523), 1046), (np.float64(-0.013956151902675629), 573), (np.float64(-0.013969846069812775), 1096), (np.float64(-0.01397152990102768), 1047), (np.float64(-0.01400591991841793), 781), (np.float64(-0.014015112072229385), 466), (np.float64(-0.014026038348674774), 1148), (np.float64(-0.014037872664630413), 617), (np.float64(-0.014065070077776909), 544), (np.float64(-0.014098634012043476), 929), (np.float64(-0.01418947521597147), 1266), (np.float64(-0.014249518513679504), 54), (np.float64(-0.014250874519348145), 543), (np.float64(-0.014288587495684624), 585), (np.float64(-0.01435130089521408), 556), (np.float64(-0.014362127520143986), 1213), (np.float64(-0.014388982206583023), 272), (np.float64(-0.01446759165264666), 44), (np.float64(-0.0145144232083112), 941), (np.float64(-0.01452496147248894), 616), (np.float64(-0.01462503895163536), 320), (np.float64(-0.014764860272407532), 859), (np.float64(-0.014927219599485397), 878), (np.float64(-0.014956824481487274), 622), (np.float64(-0.014958196319639683), 1215), (np.float64(-0.014981732238084078), 1050), (np.float64(-0.015076996758580208), 763), (np.float64(-0.015152443200349808), 497), (np.float64(-0.01515391655266285), 1156), (np.float64(-0.015214354265481234), 519), (np.float64(-0.015253475634381175), 343), (np.float64(-0.015282157342880964), 906), (np.float64(-0.015289867296814919), 386), (np.float64(-0.015292404219508171), 434), (np.float64(-0.01533450186252594), 672), (np.float64(-0.015339143574237823), 1195), (np.float64(-0.015478499233722687), 734), (np.float64(-0.015486100688576698), 501), (np.float64(-0.015492841601371765), 513), (np.float64(-0.015516646206378937), 233), (np.float64(-0.015645429491996765), 773), (np.float64(-0.0157010480761528), 155), (np.float64(-0.015702321310527623), 597), (np.float64(-0.015805164322955534), 849), (np.float64(-0.015820898115634918), 1277), (np.float64(-0.015887961140833795), 357), (np.float64(-0.015930459601804614), 645), (np.float64(-0.016012540087103844), 517), (np.float64(-0.016064459457993507), 1048), (np.float64(-0.016075864201411605), 1201), (np.float64(-0.016258132178336382), 574), (np.float64(-0.01628638431429863), 1083), (np.float64(-0.016317928209900856), 1037), (np.float64(-0.016351506114006042), 378), (np.float64(-0.016361628659069538), 413), (np.float64(-0.016448184847831726), 1076), (np.float64(-0.01659383624792099), 1211), (np.float64(-0.016631217673420906), 383), (np.float64(-0.01673525758087635), 133), (np.float64(-0.016851751133799553), 1171), (np.float64(-0.017033321782946587), 345), (np.float64(-0.01705419272184372), 1138), (np.float64(-0.01710225734859705), 1140), (np.float64(-0.017209792509675026), 1105), (np.float64(-0.01725015789270401), 818), (np.float64(-0.017269250005483627), 228), (np.float64(-0.017377035692334175), 208), (np.float64(-0.017424164339900017), 1226), (np.float64(-0.017525162547826767), 1141), (np.float64(-0.01753891631960869), 942), (np.float64(-0.017641677986830473), 744), (np.float64(-0.017784817813662812), 1122), (np.float64(-0.017843062058091164), 760), (np.float64(-0.0179891474545002), 1152), (np.float64(-0.018129284493625164), 1003), (np.float64(-0.01813964545726776), 326), (np.float64(-0.01821867097169161), 658), (np.float64(-0.018225931096822023), 120), (np.float64(-0.018232353730127215), 679), (np.float64(-0.01825845241546631), 978), (np.float64(-0.01828348310664296), 924), (np.float64(-0.018405072391033173), 116), (np.float64(-0.018510638969019055), 438), (np.float64(-0.018527057953178883), 685), (np.float64(-0.01854758709669113), 94), (np.float64(-0.018557699862867594), 502), (np.float64(-0.01855983817949891), 554), (np.float64(-0.01861389074474573), 445), (np.float64(-0.018743810476735234), 199), (np.float64(-0.018759075552225113), 669), (np.float64(-0.018766134977340698), 1261), (np.float64(-0.01877213642001152), 82), (np.float64(-0.01886759651824832), 890), (np.float64(-0.018873335095122457), 453), (np.float64(-0.018922503106296062), 962), (np.float64(-0.01896221563220024), 698), (np.float64(-0.019119519740343094), 988), (np.float64(-0.019161410629749298), 745), (np.float64(-0.01922638714313507), 1026), (np.float64(-0.019319428130984306), 917), (np.float64(-0.01936191599816084), 344), (np.float64(-0.019529331475496292), 1071), (np.float64(-0.01953260414302349), 484), (np.float64(-0.019547119736671448), 93), (np.float64(-0.019778557121753693), 1020), (np.float64(-0.019798152148723602), 643), (np.float64(-0.019840769469738007), 1233), (np.float64(-0.019868917763233185), 238), (np.float64(-0.019939441233873367), 24), (np.float64(-0.019996959250420332), 240), (np.float64(-0.02009878307580948), 1173), (np.float64(-0.020234012976288795), 1142), (np.float64(-0.020388811826705933), 795), (np.float64(-0.020517916418612003), 223), (np.float64(-0.020581429824233055), 420), (np.float64(-0.020620129944290966), 1039), (np.float64(-0.02065638266503811), 840), (np.float64(-0.020701369736343622), 293), (np.float64(-0.020729095675051212), 837), (np.float64(-0.02078204322606325), 7), (np.float64(-0.02080471720546484), 1038), (np.float64(-0.02098647691309452), 799), (np.float64(-0.02110620215535164), 980), (np.float64(-0.021208827383816242), 221), (np.float64(-0.021225396543741226), 558), (np.float64(-0.021289877127856016), 1157), (np.float64(-0.021346226800233126), 539), (np.float64(-0.02135976031422615), 464), (np.float64(-0.021395526826381683), 1044), (np.float64(-0.021515470929443836), 831), (np.float64(-0.021526120603084564), 1082), (np.float64(-0.021593546494841576), 31), (np.float64(-0.021753717213869095), 256), (np.float64(-0.02176509378477931), 491), (np.float64(-0.022058885544538498), 1064), (np.float64(-0.022263603284955025), 834), (np.float64(-0.022382635856047273), 954), (np.float64(-0.02256319299340248), 955), (np.float64(-0.022585909813642502), 747), (np.float64(-0.0230522045167163), 1275), (np.float64(-0.023085430613718927), 408), (np.float64(-0.023244470125064254), 641), (np.float64(-0.023779388517141342), 467), (np.float64(-0.02389063686132431), 424), (np.float64(-0.023980213329195976), 1093), (np.float64(-0.024013042449951172), 1123), (np.float64(-0.02409262489527464), 304), (np.float64(-0.024205811321735382), 1190), (np.float64(-0.025242964271456003), 699), (np.float64(-0.0252546314150095), 759), (np.float64(-0.025966100860387087), 264), (np.float64(-0.02601127838715911), 529), (np.float64(-0.026383422315120697), 60), (np.float64(-0.026427260600030422), 141), (np.float64(-0.02657921239733696), 1023), (np.float64(-0.026649098843336105), 1059), (np.float64(-0.02696368470788002), 538), (np.float64(-0.026969667291268706), 894), (np.float64(-0.02746322425082326), 1257), (np.float64(-0.027611277997493744), 805), (np.float64(-0.027830212842673063), 437), (np.float64(-0.028053276240825653), 422), (np.float64(-0.02825447265058756), 588), (np.float64(-0.02829993050545454), 784), (np.float64(-0.02857894729822874), 676), (np.float64(-0.028608759865164757), 1240), (np.float64(-0.029095172882080078), 1135), (np.float64(-0.029543783515691757), 712), (np.float64(-0.029558178037405014), 552), (np.float64(-0.029595278203487396), 741), (np.float64(-0.029920198023319244), 254), (np.float64(-0.02996830642223358), 632), (np.float64(-0.030348291620612144), 400), (np.float64(-0.0308592370711267), 640), (np.float64(-0.032068658620119095), 725), (np.float64(-0.0321959547836741), 259), (np.float64(-0.03227374702692032), 1230), (np.float64(-0.033374167047441006), 997), (np.float64(-0.033400426618754864), 1104), (np.float64(-0.03494056686758995), 407), (np.float64(-0.035000767558813095), 327), (np.float64(-0.03854627627879381), 851), (np.float64(-0.039288025349378586), 23), (np.float64(-0.04906845884397626), 226), (np.float64(-0.10356737673282623), 690), (np.float64(-0.17738884687423706), 736), (np.float64(-0.18347840011119843), 876)]\n" + ] + } + ], + "source": [ + "print(len(med_diff))\n", + "n_features = 100\n", + "indexed_list = [(value, index) for index, value in enumerate(med_diff)]\n", + "sorted_indexed_list = sorted(indexed_list, key=lambda x: x[0], reverse=True)\n", + "largest_10_indices = [index for value, index in sorted_indexed_list[:n_features]]\n", + "print(sorted_indexed_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "82e761fb", + "metadata": {}, + "outputs": [], + "source": [ + "with open('best_embedding_vars.pkl', 'wb') as f:\n", + " pickle.dump(sorted_indexed_list, f)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "c866b393", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (84_439, 8)
emb_1160emb_234emb_1280embeddingsresiduestrain_boolsepitope_boolsrsa_vals
f64f64f64list[f64]strboolboolf64
-5.638327-0.626679117.0[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
-7.059569-2.33806117.0[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
-3.4552060.347305117.0[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
-5.985601-1.758423117.0[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
-4.628117-0.632118117.0[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
-8.687856-2.58618547.0[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
-8.632407-3.70580147.0[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
-8.433228-4.29324847.0[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
-8.325913-3.82434947.0[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
-8.116076-4.16655747.0[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" + ], + "text/plain": [ + "shape: (84_439, 8)\n", + "┌───────────┬───────────┬──────────┬─────────────┬──────────┬─────────────┬─────────────┬──────────┐\n", + "│ emb_1160 ┆ emb_234 ┆ emb_1280 ┆ embeddings ┆ residues ┆ train_bools ┆ epitope_boo ┆ rsa_vals │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ls ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ list[f64] ┆ str ┆ bool ┆ --- ┆ f64 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ bool ┆ │\n", + "╞═══════════╪═══════════╪══════════╪═════════════╪══════════╪═════════════╪═════════════╪══════════╡\n", + "│ -5.638327 ┆ -0.626679 ┆ 117.0 ┆ [-0.079217, ┆ L ┆ true ┆ false ┆ 0.205823 │\n", + "│ ┆ ┆ ┆ -0.08223, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", + "│ -7.059569 ┆ -2.33806 ┆ 117.0 ┆ [0.271906, ┆ I ┆ true ┆ false ┆ 0.471213 │\n", + "│ ┆ ┆ ┆ 0.131599, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", + "│ -3.455206 ┆ 0.347305 ┆ 117.0 ┆ [0.075211, ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", + "│ ┆ ┆ ┆ -0.124738, ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ … 117.0] ┆ ┆ ┆ ┆ │\n", + "│ -5.985601 ┆ -1.758423 ┆ 117.0 ┆ [0.033206, ┆ T ┆ true ┆ false ┆ 0.437416 │\n", + "│ ┆ ┆ ┆ 0.13658, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", + "│ -4.628117 ┆ -0.632118 ┆ 117.0 ┆ [-0.153488, ┆ P ┆ true ┆ false ┆ 0.312792 │\n", + "│ ┆ ┆ ┆ 0.178101, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ -8.687856 ┆ -2.586185 ┆ 47.0 ┆ [-0.102616, ┆ V ┆ false ┆ false ┆ 0.09529 │\n", + "│ ┆ ┆ ┆ 0.023357, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", + "│ -8.632407 ┆ -3.705801 ┆ 47.0 ┆ [0.006365, ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", + "│ ┆ ┆ ┆ -0.054578, ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ … 47.0] ┆ ┆ ┆ ┆ │\n", + "│ -8.433228 ┆ -4.293248 ┆ 47.0 ┆ [-0.021138, ┆ K ┆ false ┆ false ┆ 0.883928 │\n", + "│ ┆ ┆ ┆ 0.060409, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", + "│ -8.325913 ┆ -3.824349 ┆ 47.0 ┆ [-0.013476, ┆ A ┆ false ┆ false ┆ 0.828726 │\n", + "│ ┆ ┆ ┆ 0.081914, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", + "│ -8.116076 ┆ -4.166557 ┆ 47.0 ┆ [-0.079727, ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", + "│ ┆ ┆ ┆ 0.132829, … ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", + "└───────────┴───────────┴──────────┴─────────────┴──────────┴─────────────┴─────────────┴──────────┘" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "emb_1280 = []\n", + "emb_234 = []\n", + "emb_1160 = []\n", + "for (embedding, residue, train_bool, epitope_bool, rsa) in bp3_res.iter_rows():\n", + " emb_1280.append(embedding[1280])\n", + " emb_234.append(embedding[234])\n", + " emb_1160.append(embedding[1160])\n", + "\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1280\", emb_1280))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_234\", emb_234))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1160\", emb_1160))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "d3becb0b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "" + }, + "metadata": { + "image/png": { + "height": 480, + "width": 640 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "(\n", + "ggplot(bp3_res, aes(x = epitope_bools, y = rsa_vals))\n", + "+ geom_boxplot()\n", + "+ labs(\n", + " x = \"Epitope Status\",\n", + " y = \"Embedding var 1160\"\n", + ")\n", + "#+ geom_jitter()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "0a766107", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Cross-Validation Fold Details ---\n", + "Fold 1: Train AUC = 0.6385, Test AUC = 0.6482\n", + "Fold 2: Train AUC = 0.6405, Test AUC = 0.6400\n", + "Fold 3: Train AUC = 0.6416, Test AUC = 0.6355\n", + "Fold 4: Train AUC = 0.6409, Test AUC = 0.6383\n", + "Fold 5: Train AUC = 0.6405, Test AUC = 0.6399\n", + "\n", + "--- Overfitting Check ---\n", + "Average Training AUC across folds: 0.6404 (+/- 0.0010)\n", + "Average Test (Validation) AUC across folds: 0.6404 (+/- 0.0042)\n" + ] + } + ], + "source": [ + "# --- BP3 CV Evaluation ---\n", + "\n", + "agg_features = [\n", + " #\"emb_1280\",\n", + " #\"emb_1160\",\n", + " \"emb_234\",\n", + " #\"rsa_vals\"\n", + "]\n", + "\n", + "train_df = bp3_res.to_pandas()\n", + "X_df = train_df[agg_features]\n", + "y_df = train_df[\"epitope_bools\"]\n", + "\n", + "X = X_df.values\n", + "y = y_df.values\n", + "\n", + "n_splits = 5\n", + "cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=1)\n", + "\n", + "train_auc_scores = []\n", + "test_auc_scores = []\n", + "\n", + "\n", + "print(\"--- Cross-Validation Fold Details ---\")\n", + "for fold, (train_index, test_index) in enumerate(cv.split(X, y)):\n", + " X_train, X_test = X[train_index], X[test_index]\n", + " y_train, y_test = y[train_index], y[test_index]\n", + "\n", + " # --- Choose Classifier ---\n", + "\n", + " neg_count = (y_train == 0).sum()\n", + " pos_count = (y_train == 1).sum()\n", + " scale_pos_weight_value = neg_count / pos_count if pos_count > 0 else 1\n", + "\n", + " clf = LogisticRegression(class_weight=\"balanced\")\n", + " clf.fit(X_train, y_train)\n", + "\n", + " # --- Training AUC Calculation ---\n", + " y_train_proba = clf.predict_proba(X_train)[:, 1]\n", + " train_auc = roc_auc_score(y_train, y_train_proba)\n", + " train_auc_scores.append(train_auc)\n", + "\n", + " # --- Test AUC Calculation ---\n", + " y_test_proba = clf.predict_proba(X_test)[:, 1]\n", + " test_auc = roc_auc_score(y_test, y_test_proba)\n", + " test_auc_scores.append(test_auc)\n", + "\n", + " print(f\"Fold {fold+1}: Train AUC = {train_auc:.4f}, Test AUC = {test_auc:.4f}\")\n", + "\n", + "# Mean ROC data\n", + "mean_auc_test = np.mean(test_auc_scores)\n", + "std_auc_test = np.std(test_auc_scores)\n", + "\n", + "# --- Overfitting Check Section ---\n", + "print(\"\\n--- Overfitting Check ---\")\n", + "mean_train_auc = np.mean(train_auc_scores)\n", + "std_train_auc = np.std(train_auc_scores)\n", + "\n", + "print(\n", + " f\"Average Training AUC across folds: {mean_train_auc:.4f} (+/- {std_train_auc:.4f})\"\n", + ")\n", + "print(\n", + " f\"Average Test (Validation) AUC across folds: {mean_auc_test:.4f} (+/- {std_auc_test:.4f})\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "epident-experiments", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d76a30ce434b92d5551674ff15dfa8c7fdd9adeb Mon Sep 17 00:00:00 2001 From: Jacob Sesate Date: Wed, 1 Oct 2025 23:53:03 -0700 Subject: [PATCH 2/3] basic neural net implemented AUC 0.76 --- notebooks/regression.ipynb | 346 ++++++++++++++++++++++++------------- 1 file changed, 230 insertions(+), 116 deletions(-) diff --git a/notebooks/regression.ipynb b/notebooks/regression.ipynb index 53aa301..cddf591 100644 --- a/notebooks/regression.ipynb +++ b/notebooks/regression.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 48, "id": "47295d86", "metadata": {}, "outputs": [ @@ -98,8 +98,12 @@ "from pathlib import Path\n", "import torch\n", "import numpy as np\n", + "from scipy import stats\n", "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.neural_network import MLPClassifier\n", "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.preprocessing import StandardScaler \n", "from sklearn.metrics import roc_auc_score\n", "from plotnine import *\n", "theme_set(theme_classic())\n", @@ -229,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "7d3709f2", "metadata": {}, "outputs": [ @@ -243,27 +247,27 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (84_439, 5)
embeddingsresiduestrain_boolsepitope_boolsrsa_vals
list[f64]strboolboolf64
[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" + "shape: (84_439, 6)
indexembeddingsresiduestrain_boolsepitope_boolsrsa_vals
u32list[f64]strboolboolf64
0[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
1[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
2[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
3[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
4[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
84434[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
84435[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
84436[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
84437[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
84438[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" ], "text/plain": [ - "shape: (84_439, 5)\n", - "┌────────────────────────────────┬──────────┬─────────────┬───────────────┬──────────┐\n", - "│ embeddings ┆ residues ┆ train_bools ┆ epitope_bools ┆ rsa_vals │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ list[f64] ┆ str ┆ bool ┆ bool ┆ f64 │\n", - "╞════════════════════════════════╪══════════╪═════════════╪═══════════════╪══════════╡\n", - "│ [-0.079217, -0.08223, … 117.0] ┆ L ┆ true ┆ false ┆ 0.205823 │\n", - "│ [0.271906, 0.131599, … 117.0] ┆ I ┆ true ┆ false ┆ 0.471213 │\n", - "│ [0.075211, -0.124738, … 117.0] ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", - "│ [0.033206, 0.13658, … 117.0] ┆ T ┆ true ┆ false ┆ 0.437416 │\n", - "│ [-0.153488, 0.178101, … 117.0] ┆ P ┆ true ┆ false ┆ 0.312792 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ [-0.102616, 0.023357, … 47.0] ┆ V ┆ false ┆ false ┆ 0.09529 │\n", - "│ [0.006365, -0.054578, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", - "│ [-0.021138, 0.060409, … 47.0] ┆ K ┆ false ┆ false ┆ 0.883928 │\n", - "│ [-0.013476, 0.081914, … 47.0] ┆ A ┆ false ┆ false ┆ 0.828726 │\n", - "│ [-0.079727, 0.132829, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", - "└────────────────────────────────┴──────────┴─────────────┴───────────────┴──────────┘" + "shape: (84_439, 6)\n", + "┌───────┬────────────────────────────────┬──────────┬─────────────┬───────────────┬──────────┐\n", + "│ index ┆ embeddings ┆ residues ┆ train_bools ┆ epitope_bools ┆ rsa_vals │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ list[f64] ┆ str ┆ bool ┆ bool ┆ f64 │\n", + "╞═══════╪════════════════════════════════╪══════════╪═════════════╪═══════════════╪══════════╡\n", + "│ 0 ┆ [-0.079217, -0.08223, … 117.0] ┆ L ┆ true ┆ false ┆ 0.205823 │\n", + "│ 1 ┆ [0.271906, 0.131599, … 117.0] ┆ I ┆ true ┆ false ┆ 0.471213 │\n", + "│ 2 ┆ [0.075211, -0.124738, … 117.0] ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", + "│ 3 ┆ [0.033206, 0.13658, … 117.0] ┆ T ┆ true ┆ false ┆ 0.437416 │\n", + "│ 4 ┆ [-0.153488, 0.178101, … 117.0] ┆ P ┆ true ┆ false ┆ 0.312792 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 84434 ┆ [-0.102616, 0.023357, … 47.0] ┆ V ┆ false ┆ false ┆ 0.09529 │\n", + "│ 84435 ┆ [0.006365, -0.054578, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", + "│ 84436 ┆ [-0.021138, 0.060409, … 47.0] ┆ K ┆ false ┆ false ┆ 0.883928 │\n", + "│ 84437 ┆ [-0.013476, 0.081914, … 47.0] ┆ A ┆ false ┆ false ┆ 0.828726 │\n", + "│ 84438 ┆ [-0.079727, 0.132829, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", + "└───────┴────────────────────────────────┴──────────┴─────────────┴───────────────┴──────────┘" ] }, "execution_count": 4, @@ -294,10 +298,101 @@ " \"rsa_vals\" : rsa_vals\n", "}\n", "\n", - "bp3_res = pl.DataFrame(data)\n", + "bp3_res = pl.DataFrame(data).with_row_index()\n", "bp3_res\n" ] }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1930cc74", + "metadata": {}, + "outputs": [], + "source": [ + "var_list = []\n", + "for var in range(len(bp3_res.select(\"embeddings\")[0].item().to_list())):\n", + " var_list.append(\"var\" + str(var))\n", + "\n", + "var_names = []\n", + "for var in range(bp3_res.height):\n", + " var_names.append(var_list)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "4745d9c4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (84_439, 1_286)
indexfield_0field_1field_2field_3field_4field_5field_6field_7field_8field_9field_10field_11field_12field_13field_14field_15field_16field_17field_18field_19field_20field_21field_22field_23field_24field_25field_26field_27field_28field_29field_30field_31field_32field_33field_34field_35field_1248field_1249field_1250field_1251field_1252field_1253field_1254field_1255field_1256field_1257field_1258field_1259field_1260field_1261field_1262field_1263field_1264field_1265field_1266field_1267field_1268field_1269field_1270field_1271field_1272field_1273field_1274field_1275field_1276field_1277field_1278field_1279field_1280residuestrain_boolsepitope_boolsrsa_vals
u32f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64strboolboolf64
0-0.079217-0.082230.05838-0.014281-0.174122-0.0797250.3110820.159403-0.0609380.0907240.1115890.2618340.0248810.1987760.0182710.118280.4828110.2449080.2902090.064428-0.2587630.0223620.16690.3730520.144847-0.003963-0.0595780.0051040.0854770.1407550.0165580.155509-0.3468790.3512540.0651720.068091-0.063390.166055-0.1355060.020469-0.0469170.2641240.108552-0.164620.0830870.034967-0.0664350.118048-0.23741-0.019455-0.0298530.1063120.154892-0.1098440.137809-0.0545360.3200510.080654-0.236098-0.390364-0.207083-0.0288720.1334240.464410.105135-0.0357880.2468070.096495117.0"L"truefalse0.205823
10.2719060.131599-0.1274880.218813-0.318881-0.326190.2830680.242302-0.119848-0.1710260.3600550.0485740.051404-0.1205290.077561-0.309660.4949730.2468520.169573-0.0658120.102363-0.0204510.131842-0.1283080.2764130.028705-0.048571-0.099105-0.303759-0.0387120.0078540.247406-0.098611-0.053378-0.306477-0.2419840.053280.227172-0.1237880.5895190.0819920.1237220.1227750.044760.020768-0.143507-0.0116570.201338-0.0628580.0836410.104042-0.1092360.13792-0.0313770.134023-0.0809850.2332330.1425810.0503230.062281-0.1553070.1702650.0127590.1842510.210409-0.0662360.0838130.026999117.0"I"truefalse0.471213
20.075211-0.124738-0.3128460.2247690.048075-0.4120290.6330580.185147-0.1606330.340180.1290260.192040.383036-0.4132610.087679-0.4428050.1652230.1389720.0108170.0660110.031720.015374-0.1541990.069085-0.0745260.3421120.755198-0.0395840.108980.26805-0.161822-0.0933520.091560.4409210.022243-0.135373-0.1222560.535823-0.2992450.0576280.0856830.4711250.2723950.040267-0.290351-0.3300090.0617870.039009-0.1362310.3900630.0817050.4074170.0824210.109496-0.500798-0.3719280.2104990.054817-0.037059-0.100564-0.172441-0.105397-0.1155310.156894-0.043453-0.367025-0.070912-0.130206117.0"Q"truefalse0.046812
30.0332060.136580.0296490.1156010.0805040.205870.1748430.082550.2366840.4095610.445139-0.0430160.395532-0.2801860.365009-0.2886930.028138-0.0088780.1798280.069011-0.096423-0.1835030.0277350.1868930.022349-0.217998-0.129381-0.1035670.090453-0.170609-0.1468670.2056910.0643410.008259-0.1148860.089-0.2271270.322171-0.0757570.385428-0.1696710.104973-0.01161-0.0593590.2851-0.235702-0.0502770.098363-0.0797740.2296830.108128-0.089006-0.140968-0.207516-0.022658-0.1277320.0386160.0099310.187821-0.034119-0.089407-0.252221-0.0862750.2001420.02470.14825-0.084408-0.163228117.0"T"truefalse0.437416
4-0.1534880.1781010.0767670.277805-0.062598-0.017160.254210.138347-0.0164130.1441550.2623250.2390950.683909-0.2604520.1629660.1167010.409806-0.16385-0.0955090.39366-0.202302-0.118709-0.2035110.071376-0.020334-0.2410920.009244-0.014142-0.097118-0.131135-0.1037880.1977430.1497180.314476-0.1884190.228339-0.1422420.34187-0.3194510.152567-0.198857-0.0463830.295451-0.0821090.181405-0.146599-0.1490760.018108-0.3292110.3177270.307067-0.0665610.22278-0.182435-0.404469-0.1866190.0405970.474888-0.049860.046669-0.072711-0.144304-0.0246760.2585580.335825-0.0847740.0455890.106432117.0"P"truefalse0.312792
84434-0.1026160.023357-0.0229380.006425-0.0996420.045261-0.003071-0.082837-0.090808-0.1531860.0087460.0178390.181520.156434-0.0236290.166111-0.2914430.074171-0.1138640.0061950.0794160.143738-0.048027-0.116025-0.0343350.057570.184138-0.0125280.079836-0.1289980.1673430.032930.225176-0.184082-0.162267-0.1480910.0273350.0810630.065476-0.2209970.023829-0.114449-0.0338310.005947-0.3300150.234441-0.0757680.168944-0.21160.1855420.173111-0.1442210.0150830.093506-0.058342-0.0795020.10016-0.116845-0.085286-0.1211330.105609-0.023850.1043860.0417190.245388-0.067179-0.1235210.20393847.0"V"falsefalse0.09529
844350.006365-0.054578-0.0537970.183082-0.1151550.0193550.090079-0.005044-0.04882-0.129596-0.0905350.001260.049185-0.03601-0.0768110.16542-0.1476350.048409-0.0586080.030474-0.0037960.1716430.04543-0.0643590.1949120.0840960.002967-0.1797760.019379-0.161040.1505540.1286140.105592-0.379701-0.087574-0.122342-0.0494710.1047860.094349-0.062865-0.0714850.0493470.048873-0.035775-0.2011580.0096620.1017040.2761-0.1476280.203010.105058-0.0860.2310710.1699140.1716310.1136050.00657-0.185583-0.0043140.019410.025256-0.039905-0.0449910.11730.097847-0.062373-0.1105870.09324247.0"Q"falsefalse0.559269
84436-0.0211380.060409-0.2595480.0769380.010231-0.0687950.021015-0.0664180.0059920.004406-0.1588240.0077660.1891180.143350.0666660.149638-0.1291980.0466210.147404-0.022679-0.0864470.078762-0.0401140.2426050.260802-0.1055870.06337-0.008405-0.089341-0.1711930.034680.0635870.1250310.008744-0.0501390.084319-0.001104-0.158556-0.067502-0.1237910.031946-0.042822-0.0305550.101412-0.0052190.1428040.0601460.0524940.008135-0.0347240.432915-0.0676560.029798-0.0277150.1387210.0757760.2627080.11978-0.054657-0.149773-0.019055-0.0687770.16741-0.2558310.133178-0.049465-0.1390630.12068947.0"K"falsefalse0.883928
84437-0.0134760.081914-0.1234640.0470740.0734790.075831-0.138856-0.14434-0.190178-0.0476-0.009502-0.0443610.0717220.204098-0.1246570.281845-0.147479-0.1260140.008064-0.016273-0.0285630.148271-0.1270830.072913-0.0924460.0746180.05848-0.005448-0.059232-0.210546-0.214381-0.1220720.172205-0.258711-0.034651-0.0623120.1819-0.0791290.149473-0.187563-0.0270180.0289720.2022190.08421-0.179594-0.059007-0.1277280.039554-0.3321860.3169070.078125-0.2054740.2915440.134560.096681-0.0128570.0573390.170203-0.005569-0.1839280.066849-0.0263670.3147210.0152160.1480860.106204-0.0894170.28617347.0"A"falsefalse0.828726
84438-0.0797270.1328290.0570060.167942-0.1868890.0727410.0841750.109943-0.014526-0.182992-0.02501-0.081883-0.0899010.108202-0.1623410.124711-0.158898-0.0049580.033675-0.063126-0.051280.039882-0.1784010.0441210.1524980.0217820.057184-0.107421-0.074607-0.061235-0.0654660.0953980.161038-0.3890950.093764-0.154974-0.0366020.260034-0.16953-0.2636530.0202410.1568090.1356280.12855-0.0786950.109689-0.0404330.10314-0.1188650.2668410.0570620.1268890.1231020.1239330.1648730.133015-0.089658-0.107521-0.039538-0.147168-0.089189-0.023737-0.1751490.1075640.231201-0.01757-0.2529760.13080747.0"Q"falsefalse0.637367
" + ], + "text/plain": [ + "shape: (84_439, 1_286)\n", + "┌───────┬───────────┬───────────┬───────────┬───┬──────────┬─────────────┬──────────────┬──────────┐\n", + "│ index ┆ field_0 ┆ field_1 ┆ field_2 ┆ … ┆ residues ┆ train_bools ┆ epitope_bool ┆ rsa_vals │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ s ┆ --- │\n", + "│ u32 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ str ┆ bool ┆ --- ┆ f64 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ bool ┆ │\n", + "╞═══════╪═══════════╪═══════════╪═══════════╪═══╪══════════╪═════════════╪══════════════╪══════════╡\n", + "│ 0 ┆ -0.079217 ┆ -0.08223 ┆ 0.05838 ┆ … ┆ L ┆ true ┆ false ┆ 0.205823 │\n", + "│ 1 ┆ 0.271906 ┆ 0.131599 ┆ -0.127488 ┆ … ┆ I ┆ true ┆ false ┆ 0.471213 │\n", + "│ 2 ┆ 0.075211 ┆ -0.124738 ┆ -0.312846 ┆ … ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", + "│ 3 ┆ 0.033206 ┆ 0.13658 ┆ 0.029649 ┆ … ┆ T ┆ true ┆ false ┆ 0.437416 │\n", + "│ 4 ┆ -0.153488 ┆ 0.178101 ┆ 0.076767 ┆ … ┆ P ┆ true ┆ false ┆ 0.312792 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 84434 ┆ -0.102616 ┆ 0.023357 ┆ -0.022938 ┆ … ┆ V ┆ false ┆ false ┆ 0.09529 │\n", + "│ 84435 ┆ 0.006365 ┆ -0.054578 ┆ -0.053797 ┆ … ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", + "│ 84436 ┆ -0.021138 ┆ 0.060409 ┆ -0.259548 ┆ … ┆ K ┆ false ┆ false ┆ 0.883928 │\n", + "│ 84437 ┆ -0.013476 ┆ 0.081914 ┆ -0.123464 ┆ … ┆ A ┆ false ┆ false ┆ 0.828726 │\n", + "│ 84438 ┆ -0.079727 ┆ 0.132829 ┆ 0.057006 ┆ … ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", + "└───────┴───────────┴───────────┴───────────┴───┴──────────┴─────────────┴──────────────┴──────────┘" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp3_res" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e8468437", + "metadata": {}, + "outputs": [], + "source": [ + "bp3_res = bp3_res.with_columns(\n", + " pl.col(\"embeddings\").list.to_struct(upper_bound=1281)\n", + ").unnest(\"embeddings\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68686bd6", + "metadata": {}, + "outputs": [], + "source": [ + "#with open('bp3_res.pkl', 'wb') as f:\n", + "# pickle.dump(bp3_res, f)\n", + "#\n", + "#with open(\"bp3_res.pkl\", 'rb') as f:\n", + "# bp3_res = pickle.load(f)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -307,15 +402,10 @@ "source": [ "# --- Extract N Most Informative Features of Embedding ---\n", "\n", - "# emb_var 1280 : 81 median diff\n", - "# emb_var 1280 : 1.13 median diff\n", - "# emb_var 1280 : 0.37 median diff\n", - "\n", - "\n", - "\n", "num_emb_vars = bp3_res.select(\"embeddings\")[0].item().shape[0]\n", "\n", - "med_diff = []\n", + "u_vals = []\n", + "p_vals = []\n", "for var in range(num_emb_vars):\n", " var_epi = []\n", " var_nepi = []\n", @@ -324,47 +414,73 @@ " var_epi.append(embedding[var])\n", " else:\n", " var_nepi.append(embedding[var])\n", - " med_diff.append(np.median(var_nepi) - np.median(var_epi))\n" + " u, p = stats.mannwhitneyu(var_epi, var_nepi, alternative=\"two-sided\")\n", + " u_vals.append(u)\n", + " p_vals.append(p)\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 22, - "id": "da101158", + "execution_count": 26, + "id": "8feee99b", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"best_esm_embedding_vars.pkl\", 'rb') as file:\n", + " # Load the pickled data from the file\n", + " p_vals = pickle.load(file)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "7f40786a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1281\n", "[(np.float64(81.0), 1280), (np.float64(1.130703330039978), 234), (np.float64(0.3712773323059082), 1160), (np.float64(0.045004742220044136), 839), (np.float64(0.044218819588422775), 553), (np.float64(0.04032979533076286), 696), (np.float64(0.03747392725199461), 414), (np.float64(0.03660698514431715), 1251), (np.float64(0.03643316403031349), 600), (np.float64(0.0351506844162941), 381), (np.float64(0.034491033758968115), 756), (np.float64(0.033380577340722084), 300), (np.float64(0.03337776567786932), 1014), (np.float64(0.03284870833158493), 1186), (np.float64(0.03240643069148064), 855), (np.float64(0.032072730362415314), 608), (np.float64(0.031966139416908845), 86), (np.float64(0.031056914827786386), 396), (np.float64(0.030288565903902054), 655), (np.float64(0.030203919857740402), 166), (np.float64(0.030178461922332644), 423), (np.float64(0.029451459646224976), 809), (np.float64(0.028364425525069237), 77), (np.float64(0.02815424744039774), 249), (np.float64(0.028020352125167847), 145), (np.float64(0.02772002387791872), 370), (np.float64(0.027637861669063568), 564), (np.float64(0.026874929666519165), 1194), (np.float64(0.02658862737007439), 1088), (np.float64(0.025990422815084457), 230), (np.float64(0.025970193557441235), 968), (np.float64(0.025764848105609417), 998), (np.float64(0.025550130754709244), 1174), (np.float64(0.025247633457183838), 803), (np.float64(0.02497640997171402), 236), (np.float64(0.024945996701717377), 812), (np.float64(0.02489049779251218), 1127), (np.float64(0.024773985147476196), 880), (np.float64(0.024766715243458748), 628), (np.float64(0.024555565789341927), 1067), (np.float64(0.02454405650496483), 683), (np.float64(0.024141178466379642), 152), (np.float64(0.024102460592985153), 1006), (np.float64(0.023943433538079262), 583), (np.float64(0.023688404820859432), 191), (np.float64(0.023484595119953156), 1053), (np.float64(0.023418080061674118), 609), (np.float64(0.023391427472233772), 153), (np.float64(0.0233256034553051), 1107), (np.float64(0.023274637758731842), 271), (np.float64(0.022974496707320213), 168), (np.float64(0.022787262685596943), 192), (np.float64(0.02268359251320362), 260), (np.float64(0.02266924805007875), 593), (np.float64(0.022517642006278038), 921), (np.float64(0.022394230589270592), 727), (np.float64(0.022389421239495277), 789), (np.float64(0.022317957365885377), 1162), (np.float64(0.022150119300931692), 277), (np.float64(0.02212531678378582), 899), (np.float64(0.022036344325897517), 1094), (np.float64(0.02189606800675392), 1111), (np.float64(0.02164968801662326), 412), (np.float64(0.021564900875091553), 1165), (np.float64(0.021400924772024155), 36), (np.float64(0.02118323463946581), 681), (np.float64(0.021175827831029892), 853), (np.float64(0.020939030684530735), 1125), (np.float64(0.020914054475724697), 695), (np.float64(0.02084817737340927), 973), (np.float64(0.020482300780713558), 1092), (np.float64(0.020417840220034122), 450), (np.float64(0.020330642815679312), 68), (np.float64(0.020232222974300385), 1049), (np.float64(0.020162058994174004), 405), (np.float64(0.02016097353771329), 483), (np.float64(0.01997297373600304), 892), (np.float64(0.01992867747321725), 75), (np.float64(0.01989478268660605), 462), (np.float64(0.01922575756907463), 1055), (np.float64(0.0190451480448246), 971), (np.float64(0.018956223502755165), 1212), (np.float64(0.018928367644548416), 775), (np.float64(0.01885544741526246), 340), (np.float64(0.01867196150124073), 22), (np.float64(0.018671827390789986), 237), (np.float64(0.018619922921061516), 1077), (np.float64(0.01853789109736681), 1168), (np.float64(0.01844160445034504), 375), (np.float64(0.01838996820151806), 471), (np.float64(0.018250529188662767), 283), (np.float64(0.018210260197520256), 1193), (np.float64(0.01818249374628067), 883), (np.float64(0.018178826197981834), 354), (np.float64(0.018157916143536568), 448), (np.float64(0.01814010553061962), 1072), (np.float64(0.018098924192599952), 1264), (np.float64(0.018092042300850153), 866), (np.float64(0.017926552798599005), 347), (np.float64(0.017782390117645264), 67), (np.float64(0.017713487148284912), 580), (np.float64(0.017520148307085037), 905), (np.float64(0.017419555690139532), 1069), (np.float64(0.01740977691952139), 611), (np.float64(0.017335407435894012), 507), (np.float64(0.01730018761008978), 728), (np.float64(0.017248489893972874), 215), (np.float64(0.017228491604328156), 297), (np.float64(0.017207475379109383), 668), (np.float64(0.01716914726421237), 90), (np.float64(0.017164206132292747), 418), (np.float64(0.017009243369102478), 1145), (np.float64(0.016953904181718826), 615), (np.float64(0.01688589807599783), 1085), (np.float64(0.016667735180817544), 211), (np.float64(0.016580134630203247), 950), (np.float64(0.016412150114774704), 582), (np.float64(0.016342705115675926), 301), (np.float64(0.016321652568876743), 25), (np.float64(0.01623747358098626), 566), (np.float64(0.01623537763953209), 659), (np.float64(0.016207854729145765), 1187), (np.float64(0.016190076246857643), 1129), (np.float64(0.016053708270192146), 410), (np.float64(0.015998678281903267), 374), (np.float64(0.01599816046655178), 944), (np.float64(0.01595117896795273), 137), (np.float64(0.01590883918106556), 984), (np.float64(0.01588423765497282), 213), (np.float64(0.01587132178246975), 717), (np.float64(0.015777988824993372), 974), (np.float64(0.015773339197039604), 346), (np.float64(0.015697740018367767), 571), (np.float64(0.015640379322576337), 219), (np.float64(0.015593299642205238), 351), (np.float64(0.015562902670353651), 1114), (np.float64(0.015533394180238247), 388), (np.float64(0.01539867208339274), 1137), (np.float64(0.01523844338953495), 428), (np.float64(0.015186004340648651), 836), (np.float64(0.015168139711022377), 776), (np.float64(0.01514124684035778), 66), (np.float64(0.015134465182200074), 589), (np.float64(0.015024304389953613), 1198), (np.float64(0.014992635697126389), 732), (np.float64(0.014886489138007164), 200), (np.float64(0.014842655509710312), 777), (np.float64(0.014836808666586876), 810), (np.float64(0.014810930006206036), 762), (np.float64(0.014797125943005085), 722), (np.float64(0.014722553140018135), 1146), (np.float64(0.014658856205642223), 939), (np.float64(0.014645400457084179), 871), (np.float64(0.014637693762779236), 1228), (np.float64(0.014579221606254578), 460), (np.float64(0.014410372823476791), 265), (np.float64(0.014292508363723755), 907), (np.float64(0.014270318672060966), 1052), (np.float64(0.014171725139021873), 1183), (np.float64(0.014159210142679513), 718), (np.float64(0.01415821723639965), 363), (np.float64(0.014137148391455412), 350), (np.float64(0.013962782919406891), 813), (np.float64(0.01390154892578721), 33), (np.float64(0.013901078724302351), 715), (np.float64(0.013888734392821789), 1151), (np.float64(0.013806648552417755), 110), (np.float64(0.013695838861167431), 664), (np.float64(0.013664640951901674), 401), (np.float64(0.01362670212984085), 1005), (np.float64(0.013555938377976418), 397), (np.float64(0.013532337732613087), 657), (np.float64(0.01352951256558299), 291), (np.float64(0.013322470709681511), 774), (np.float64(0.013301345519721508), 631), (np.float64(0.013249438256025314), 822), (np.float64(0.013248251751065254), 55), (np.float64(0.013229165226221085), 231), (np.float64(0.013227107585407794), 314), (np.float64(0.013184343464672565), 288), (np.float64(0.013178281486034393), 646), (np.float64(0.013057534699328244), 1008), (np.float64(0.012972468510270119), 243), (np.float64(0.012916450956254266), 379), (np.float64(0.012898104265332222), 1189), (np.float64(0.012832388281822205), 786), (np.float64(0.012810321524739265), 441), (np.float64(0.012777255848050117), 791), (np.float64(0.012744493782520294), 686), (np.float64(0.01274031586945057), 203), (np.float64(0.012716792523860931), 85), (np.float64(0.012634440790861845), 1004), (np.float64(0.01250067725777626), 500), (np.float64(0.012495343806222081), 563), (np.float64(0.012440497055649757), 990), (np.float64(0.012401574291288853), 1032), (np.float64(0.012400735169649124), 793), (np.float64(0.012385524809360504), 801), (np.float64(0.012380607426166534), 765), (np.float64(0.01234420482069254), 279), (np.float64(0.012343136593699455), 854), (np.float64(0.012326118245255202), 222), (np.float64(0.012296153232455254), 1188), (np.float64(0.012261290132300928), 165), (np.float64(0.012197593227028847), 403), (np.float64(0.012096164748072624), 979), (np.float64(0.012080555781722069), 652), (np.float64(0.012075373902916908), 1084), (np.float64(0.011993632419034839), 248), (np.float64(0.011948315426707268), 307), (np.float64(0.011938711628317833), 207), (np.float64(0.011916568502783775), 20), (np.float64(0.011913990136235952), 1243), (np.float64(0.0119064562022686), 819), (np.float64(0.011905217543244362), 845), (np.float64(0.011900685727596283), 1041), (np.float64(0.011890262365341187), 214), (np.float64(0.011802103370428085), 976), (np.float64(0.011791346594691277), 1120), (np.float64(0.011742846050765365), 339), (np.float64(0.011728386860340834), 59), (np.float64(0.01166495680809021), 253), (np.float64(0.011663809418678284), 1021), (np.float64(0.011610973626375198), 117), (np.float64(0.011607056483626366), 719), (np.float64(0.011598647572100163), 568), (np.float64(0.011571774259209633), 415), (np.float64(0.011481711699161679), 278), (np.float64(0.011479867622256279), 977), (np.float64(0.01147252693772316), 469), (np.float64(0.01137647032737732), 472), (np.float64(0.011283046565949917), 561), (np.float64(0.011235762620344758), 188), (np.float64(0.011222146451473236), 1259), (np.float64(0.011220555752515793), 118), (np.float64(0.011210506781935692), 150), (np.float64(0.01115427166223526), 282), (np.float64(0.011089973151683807), 730), (np.float64(0.0110830869525671), 183), (np.float64(0.011028191074728966), 399), (np.float64(0.01101304218173027), 92), (np.float64(0.010961954947561026), 123), (np.float64(0.010941036212898325), 1218), (np.float64(0.010906634852290154), 312), (np.float64(0.01089153066277504), 820), (np.float64(0.01083652675151825), 425), (np.float64(0.010806520935148), 220), (np.float64(0.01080569438636303), 1244), (np.float64(0.010785584338009357), 1254), (np.float64(0.010722700506448746), 700), (np.float64(0.010713106952607632), 323), (np.float64(0.01068238914012909), 284), (np.float64(0.010637138038873672), 835), (np.float64(0.010622672736644745), 1263), (np.float64(0.010598766501061618), 16), (np.float64(0.010591836180537939), 129), (np.float64(0.010561587288975716), 276), (np.float64(0.010534115135669708), 266), (np.float64(0.010474463924765587), 159), (np.float64(0.010459182783961296), 451), (np.float64(0.010430864989757538), 1247), (np.float64(0.01038616243749857), 444), (np.float64(0.010379638464655727), 131), (np.float64(0.0101566631346941), 913), (np.float64(0.010046117007732391), 4), (np.float64(0.009935624897480011), 1209), (np.float64(0.009927066043019295), 667), (np.float64(0.009867895394563675), 966), (np.float64(0.009789712727069855), 42), (np.float64(0.009780844673514366), 993), (np.float64(0.009772205725312233), 119), (np.float64(0.009715504944324493), 273), (np.float64(0.009608583524823189), 1019), (np.float64(0.009587500244379044), 570), (np.float64(0.009521863423287868), 162), (np.float64(0.009470891673117876), 975), (np.float64(0.009450562531128526), 1202), (np.float64(0.009449162287637591), 100), (np.float64(0.009391489322297275), 934), (np.float64(0.009280447848141193), 196), (np.float64(0.009250136092305183), 430), (np.float64(0.00911640003323555), 285), (np.float64(0.009104061871767044), 103), (np.float64(0.009091995656490326), 932), (np.float64(0.009053241461515427), 761), (np.float64(0.008984031155705452), 6), (np.float64(0.008899634703993797), 121), (np.float64(0.008806407451629639), 106), (np.float64(0.008774453774094582), 262), (np.float64(0.008766988146817312), 1255), (np.float64(0.008755201008170843), 426), (np.float64(0.008747384359594434), 1246), (np.float64(0.008727488107979298), 56), (np.float64(0.008709082379937172), 1018), (np.float64(0.008707253262400627), 338), (np.float64(0.008586497977375984), 47), (np.float64(0.008559728041291237), 242), (np.float64(0.008496899157762527), 739), (np.float64(0.008455098606646061), 1153), (np.float64(0.008453513495624065), 796), (np.float64(0.008421202190220356), 180), (np.float64(0.008398983627557755), 578), (np.float64(0.008393193129450083), 138), (np.float64(0.008391544222831726), 27), (np.float64(0.00835740938782692), 639), (np.float64(0.00834231125190854), 187), (np.float64(0.00831507908878848), 64), (np.float64(0.008314916864037514), 98), (np.float64(0.008291925652883947), 1203), (np.float64(0.008287357166409492), 490), (np.float64(0.008284620009362698), 771), (np.float64(0.008237404748797417), 595), (np.float64(0.008215129375457764), 353), (np.float64(0.008181661367416382), 128), (np.float64(0.008086762623861432), 587), (np.float64(0.008051972836256027), 475), (np.float64(0.008047517389059067), 1060), (np.float64(0.008003609604202211), 713), (np.float64(0.008001109352335334), 503), (np.float64(0.00792611576616764), 114), (np.float64(0.007904700934886932), 281), (np.float64(0.007900640368461609), 456), (np.float64(0.00789184495806694), 274), (np.float64(0.007889870554208755), 212), (np.float64(0.007881866302341223), 512), (np.float64(0.00787946954369545), 355), (np.float64(0.00786761287599802), 299), (np.float64(0.007863425649702549), 653), (np.float64(0.007847219705581665), 310), (np.float64(0.007825737819075584), 377), (np.float64(0.007806180045008659), 1025), (np.float64(0.0077806273475289345), 542), (np.float64(0.007778293918818235), 197), (np.float64(0.0077378100249916315), 654), (np.float64(0.007702145725488663), 594), (np.float64(0.007652724161744118), 613), (np.float64(0.007633641362190247), 417), (np.float64(0.0076003409922122955), 329), (np.float64(0.007589031883981079), 888), (np.float64(0.007576806470751762), 923), (np.float64(0.007573945447802544), 1108), (np.float64(0.007561119273304939), 43), (np.float64(0.007557529956102371), 1090), (np.float64(0.00754080805927515), 19), (np.float64(0.007507447153329849), 724), (np.float64(0.0074921175837516785), 317), (np.float64(0.0074846018105745316), 952), (np.float64(0.007444923743605614), 376), (np.float64(0.007425621151924133), 45), (np.float64(0.007377568632364273), 687), (np.float64(0.007334676454775035), 1002), (np.float64(0.0073283761739730835), 206), (np.float64(0.007327833212912083), 560), (np.float64(0.007297255098819733), 783), (np.float64(0.00729526299983263), 61), (np.float64(0.007223796099424362), 592), (np.float64(0.0072027649730443954), 446), (np.float64(0.007195578888058662), 1241), (np.float64(0.00719551183283329), 957), (np.float64(0.007170367985963821), 943), (np.float64(0.007156253792345524), 729), (np.float64(0.007133243838325143), 1065), (np.float64(0.007120907306671143), 901), (np.float64(0.007117012515664101), 735), (np.float64(0.0071118175983428955), 893), (np.float64(0.007111807353794575), 58), (np.float64(0.00709662027657032), 547), (np.float64(0.007087027654051781), 362), (np.float64(0.007054241374135017), 498), (np.float64(0.007042464800179005), 1221), (np.float64(0.006980568170547485), 122), (np.float64(0.00695190392434597), 886), (np.float64(0.006930340081453323), 321), (np.float64(0.006925065070390701), 510), (np.float64(0.006918858736753464), 313), (np.float64(0.006893233919981867), 330), (np.float64(0.00686962716281414), 11), (np.float64(0.006865903967991471), 365), (np.float64(0.006832782179117203), 394), (np.float64(0.006829655729234219), 891), (np.float64(0.006801697425544262), 53), (np.float64(0.006791293621063232), 937), (np.float64(0.006698655895888805), 703), (np.float64(0.006694257725030184), 1170), (np.float64(0.006679002195596695), 189), (np.float64(0.006648337468504906), 409), (np.float64(0.006589038297533989), 972), (np.float64(0.006570897996425629), 416), (np.float64(0.006568379700183868), 1011), (np.float64(0.006532957944727968), 360), (np.float64(0.006522510200738907), 656), (np.float64(0.0065031107515096664), 395), (np.float64(0.006459635682404041), 607), (np.float64(0.006452583707869053), 750), (np.float64(0.006433840841054916), 303), (np.float64(0.006367001682519913), 287), (np.float64(0.006355821620672941), 171), (np.float64(0.00631660595536232), 525), (np.float64(0.006311326549621299), 630), (np.float64(0.006254343315958977), 621), (np.float64(0.006236137822270393), 1265), (np.float64(0.0062336549162864685), 280), (np.float64(0.006093231961131096), 1), (np.float64(0.006068775430321693), 515), (np.float64(0.006007391959428787), 179), (np.float64(0.005940468981862068), 50), (np.float64(0.0059141237288713455), 328), (np.float64(0.0059125833213329315), 184), (np.float64(0.005842448212206364), 1229), (np.float64(0.005820596590638161), 559), (np.float64(0.0058171721175313), 485), (np.float64(0.005750535521656275), 1045), (np.float64(0.005683631170541048), 113), (np.float64(0.00566563755273819), 13), (np.float64(0.005600292701274157), 331), (np.float64(0.005594167858362198), 629), (np.float64(0.005578726530075073), 531), (np.float64(0.005515436641871929), 598), (np.float64(0.0054828329011797905), 758), (np.float64(0.005473967641592026), 1276), (np.float64(0.005444618873298168), 32), (np.float64(0.005436782957985997), 755), (np.float64(0.005431024357676506), 785), (np.float64(0.005363805568777025), 532), (np.float64(0.005330108106136322), 830), (np.float64(0.00532750366255641), 1267), (np.float64(0.0053042881190776825), 294), (np.float64(0.005254607647657394), 1017), (np.float64(0.005230085924267769), 1216), (np.float64(0.0052208518027327955), 404), (np.float64(0.005198197439312935), 1015), (np.float64(0.005155934486538172), 678), (np.float64(0.005148696713149548), 663), (np.float64(0.005032102577388287), 359), (np.float64(0.004991074092686176), 714), (np.float64(0.0049825385212898254), 627), (np.float64(0.004934079828672111), 904), (np.float64(0.00489051453769207), 83), (np.float64(0.00488685816526413), 584), (np.float64(0.0048807961866259575), 411), (np.float64(0.004836020991206169), 1027), (np.float64(0.0048017119988799095), 684), (np.float64(0.004774243570864201), 1176), (np.float64(0.0047730617225170135), 1219), (np.float64(0.004693788127042353), 823), (np.float64(0.00468137301504612), 908), (np.float64(0.00467224046587944), 605), (np.float64(0.0045488253235816956), 156), (np.float64(0.004545961506664753), 84), (np.float64(0.004537271335721016), 708), (np.float64(0.0045341793447732925), 926), (np.float64(0.004530996084213257), 182), (np.float64(0.0044933343306183815), 30), (np.float64(0.004464704543352127), 9), (np.float64(0.004407189786434174), 569), (np.float64(0.0043976083397865295), 1149), (np.float64(0.0043945867801085114), 95), (np.float64(0.004293292760848999), 172), (np.float64(0.0042603835463523865), 387), (np.float64(0.004210165643598884), 173), (np.float64(0.004184460442047566), 496), (np.float64(0.004150079563260078), 367), (np.float64(0.00413903035223484), 341), (np.float64(0.0040975576266646385), 550), (np.float64(0.004085277207195759), 251), (np.float64(0.004077760153450072), 1269), (np.float64(0.004069924354553223), 1249), (np.float64(0.004054168239235878), 298), (np.float64(0.004032887518405914), 1222), (np.float64(0.004022389650344849), 897), (np.float64(0.004020331427454948), 91), (np.float64(0.004010087111964822), 167), (np.float64(0.004000985994935036), 368), (np.float64(0.003997102379798889), 619), (np.float64(0.003981940448284149), 624), (np.float64(0.003933241590857506), 848), (np.float64(0.003733748570084572), 319), (np.float64(0.0037168003618717194), 1234), (np.float64(0.0036879992112517357), 186), (np.float64(0.003684638999402523), 380), (np.float64(0.0036621559411287308), 746), (np.float64(0.0036391839385032654), 178), (np.float64(0.0036176294088363647), 1051), (np.float64(0.003613073378801346), 1058), (np.float64(0.0035451650619506836), 518), (np.float64(0.0035063475370407104), 1200), (np.float64(0.0035057906061410904), 931), (np.float64(0.0035018213093280792), 751), (np.float64(0.0034943416249006987), 143), (np.float64(0.00347183458507061), 828), (np.float64(0.003445371985435486), 1258), (np.float64(0.0034322869032621384), 1040), (np.float64(0.003407709300518036), 366), (np.float64(0.003358466550707817), 985), (np.float64(0.0033377669751644135), 826), (np.float64(0.003312641754746437), 17), (np.float64(0.0032705982448533177), 14), (np.float64(0.0032210182398557663), 255), (np.float64(0.0032125506550073624), 39), (np.float64(0.003189890761859715), 348), (np.float64(0.0031792623922228813), 1100), (np.float64(0.0031745098531246185), 701), (np.float64(0.0031258314847946167), 480), (np.float64(0.003095071529969573), 546), (np.float64(0.0030517131090164185), 158), (np.float64(0.003032959997653961), 10), (np.float64(0.003000635653734207), 912), (np.float64(0.0029672272503376007), 15), (np.float64(0.002951675094664097), 290), (np.float64(0.0028542475774884224), 804), (np.float64(0.0028172172605991364), 1124), (np.float64(0.0027712839655578136), 309), (np.float64(0.0026745274662971497), 1133), (np.float64(0.0026256144046783447), 900), (np.float64(0.0026158811524510384), 205), (np.float64(0.002593526616692543), 1106), (np.float64(0.0025607850402593613), 545), (np.float64(0.0024483129382133484), 754), (np.float64(0.0024405624717473984), 514), (np.float64(0.002425260841846466), 868), (np.float64(0.0023892847821116447), 509), (np.float64(0.0023853182792663574), 29), (np.float64(0.002379018100327812), 1154), (np.float64(0.0022975997999310493), 790), (np.float64(0.0022021420300006866), 267), (np.float64(0.002168288454413414), 534), (np.float64(0.0021635047160089016), 1007), (np.float64(0.002135808579623699), 768), (np.float64(0.002078305697068572), 951), (np.float64(0.002071140334010124), 245), (np.float64(0.0020304229110479355), 486), (np.float64(0.002023644745349884), 1250), (np.float64(0.0020101824775338173), 28), (np.float64(0.0019969623535871506), 1245), (np.float64(0.001986062154173851), 1022), (np.float64(0.0019746623001992702), 1239), (np.float64(0.001973463222384453), 244), (np.float64(0.0019369125366210938), 1035), (np.float64(0.0019024861976504326), 548), (np.float64(0.0018787621520459652), 527), (np.float64(0.0018643662333488464), 459), (np.float64(0.0018462538719177246), 706), (np.float64(0.001827546686399728), 1274), (np.float64(0.001788940280675888), 382), (np.float64(0.0017553488723933697), 1206), (np.float64(0.0017540152184665203), 218), (np.float64(0.001738311955705285), 769), (np.float64(0.001677100546658039), 136), (np.float64(0.001619689166545868), 1113), (np.float64(0.0016147047281265259), 1279), (np.float64(0.0016042403876781464), 601), (np.float64(0.0015992438420653343), 596), (np.float64(0.0015618205070495605), 671), (np.float64(0.0015448471531271935), 928), (np.float64(0.001539589837193489), 626), (np.float64(0.001523636281490326), 1095), (np.float64(0.0015228185802698135), 63), (np.float64(0.0015169456601142883), 1180), (np.float64(0.0015078596770763397), 694), (np.float64(0.001481717685237527), 125), (np.float64(0.0014607235789299011), 1068), (np.float64(0.0014108158648014069), 250), (np.float64(0.0014008060097694397), 748), (np.float64(0.0013657081872224808), 204), (np.float64(0.001345967873930931), 1061), (np.float64(0.0013415142893791199), 371), (np.float64(0.0013408008962869644), 37), (np.float64(0.0012580184265971184), 960), (np.float64(0.001243335660547018), 1132), (np.float64(0.0011727940291166306), 1191), (np.float64(0.0011576693505048752), 342), (np.float64(0.001130678690969944), 1223), (np.float64(0.0011231759563088417), 127), (np.float64(0.0010981885716319084), 1235), (np.float64(0.0010806471109390259), 521), (np.float64(0.0010549724102020264), 989), (np.float64(0.0010166391730308533), 910), (np.float64(0.0010066886898130178), 488), (np.float64(0.000985151156783104), 738), (np.float64(0.0009567076340317726), 982), (np.float64(0.0009152404963970184), 612), (np.float64(0.0008938983082771301), 930), (np.float64(0.0008527413010597229), 335), (np.float64(0.000825216993689537), 477), (np.float64(0.0007924996316432953), 920), (np.float64(0.0007924232631921768), 896), (np.float64(0.0007629562169313431), 581), (np.float64(0.0007573636248707771), 80), (np.float64(0.000741329975426197), 1013), (np.float64(0.0007395949214696884), 1075), (np.float64(0.0007391385734081268), 163), (np.float64(0.0007354002445936203), 858), (np.float64(0.0007210071198642254), 916), (np.float64(0.0007144920527935028), 124), (np.float64(0.0006856508553028107), 1131), (np.float64(0.0006828904151916504), 21), (np.float64(0.0006541721522808075), 1097), (np.float64(0.0006299437955021858), 909), (np.float64(0.0006266646087169647), 870), (np.float64(0.000575515441596508), 270), (np.float64(0.0005712788552045822), 421), (np.float64(0.00056855333968997), 308), (np.float64(0.0004581841640174389), 261), (np.float64(0.0004357830621302128), 487), (np.float64(0.0004345737397670746), 194), (np.float64(0.0004092305898666382), 879), (np.float64(0.00037114880979061127), 235), (np.float64(0.00035897456109523773), 516), (np.float64(0.0003486813511699438), 1134), (np.float64(0.00034497492015361786), 704), (np.float64(0.000283312052488327), 88), (np.float64(0.00022942200303077698), 135), (np.float64(0.0002283584326505661), 1119), (np.float64(0.00020135263912379742), 229), (np.float64(0.00017299503087997437), 1159), (np.float64(8.101761341094971e-05), 702), (np.float64(-1.817755401134491e-05), 316), (np.float64(-2.541765570640564e-05), 1036), (np.float64(-4.7507346607744694e-05), 433), (np.float64(-0.00012370478361845016), 46), (np.float64(-0.0001281891018152237), 1155), (np.float64(-0.00013002753257751465), 461), (np.float64(-0.00014249759260565042), 447), (np.float64(-0.00016005896031856537), 1063), (np.float64(-0.0001608431339263916), 857), (np.float64(-0.0001817811280488968), 493), (np.float64(-0.00020514801144599915), 662), (np.float64(-0.0002080760896205902), 1080), (np.float64(-0.0002263011410832405), 1214), (np.float64(-0.000227256678044796), 1181), (np.float64(-0.00023396313190460205), 811), (np.float64(-0.00026110000908374786), 427), (np.float64(-0.00028219353407621384), 1205), (np.float64(-0.0003402328584343195), 419), (np.float64(-0.00036205508513376117), 62), (np.float64(-0.0003917478024959564), 537), (np.float64(-0.0005154851824045181), 492), (np.float64(-0.0005159936845302582), 637), (np.float64(-0.0005178460851311684), 856), (np.float64(-0.0005494393408298492), 109), (np.float64(-0.0005625635385513306), 97), (np.float64(-0.000569885887671262), 1115), (np.float64(-0.0005779080092906952), 933), (np.float64(-0.0005808547139167786), 506), (np.float64(-0.0006279787048697472), 1103), (np.float64(-0.0007155854254961014), 1256), (np.float64(-0.0007434776052832603), 688), (np.float64(-0.0007441248744726181), 1169), (np.float64(-0.000748196616768837), 147), (np.float64(-0.0007578060030937195), 468), (np.float64(-0.0007839640602469444), 26), (np.float64(-0.0008056208025664091), 740), (np.float64(-0.0008387919515371323), 948), (np.float64(-0.0009087081998586655), 1196), (np.float64(-0.0009119641035795212), 1161), (np.float64(-0.0009306315332651138), 73), (np.float64(-0.000951351597905159), 142), (np.float64(-0.0009772293269634247), 540), (np.float64(-0.0009796805679798126), 1260), (np.float64(-0.001017771428450942), 691), (np.float64(-0.0010307496413588524), 590), (np.float64(-0.001033630222082138), 953), (np.float64(-0.0010345056653022766), 551), (np.float64(-0.0010652430355548859), 41), (np.float64(-0.0010662563145160675), 1031), (np.float64(-0.0010681245476007462), 707), (np.float64(-0.0010780454613268375), 661), (np.float64(-0.001093613333068788), 385), (np.float64(-0.0010970290750265121), 126), (np.float64(-0.0011139344424009323), 767), (np.float64(-0.0011796094477176666), 766), (np.float64(-0.0011811880394816399), 1009), (np.float64(-0.0012204386293888092), 992), (np.float64(-0.0013030210830038413), 802), (np.float64(-0.0013143308460712433), 1099), (np.float64(-0.00135769322514534), 938), (np.float64(-0.0013707373291254044), 1147), (np.float64(-0.0013769203796982765), 389), (np.float64(-0.0014346614480018616), 742), (np.float64(-0.001471739262342453), 489), (np.float64(-0.0014727767556905746), 52), (np.float64(-0.0015113605186343193), 232), (np.float64(-0.0015142625197768211), 1177), (np.float64(-0.0015361960977315903), 289), (np.float64(-0.0016520395874977112), 825), (np.float64(-0.0016639144159853458), 252), (np.float64(-0.0016933688893914223), 987), (np.float64(-0.0017160698771476746), 946), (np.float64(-0.00173667399212718), 1252), (np.float64(-0.0017373105511069298), 733), (np.float64(-0.0017650823428994045), 649), (np.float64(-0.0017763301730155945), 522), (np.float64(-0.0017936015501618385), 115), (np.float64(-0.0018141400068998337), 577), (np.float64(-0.0018463386222720146), 674), (np.float64(-0.0018810424953699112), 1116), (np.float64(-0.0018857363611459732), 1199), (np.float64(-0.0019829915836453438), 814), (np.float64(-0.00200633704662323), 995), (np.float64(-0.002010106109082699), 1204), (np.float64(-0.0020110905170440674), 1057), (np.float64(-0.0020224787294864655), 1207), (np.float64(-0.002053378149867058), 945), (np.float64(-0.002064388245344162), 575), (np.float64(-0.0020839348435401917), 470), (np.float64(-0.00216059572994709), 465), (np.float64(-0.0022006616927683353), 981), (np.float64(-0.002226443961262703), 1139), (np.float64(-0.00228223018348217), 71), (np.float64(-0.002282954752445221), 520), (np.float64(-0.002297590486705303), 673), (np.float64(-0.002300553023815155), 1079), (np.float64(-0.0023086005821824074), 807), (np.float64(-0.002316609607078135), 1074), (np.float64(-0.0023270510137081146), 749), (np.float64(-0.002357354387640953), 216), (np.float64(-0.002396269701421261), 1073), (np.float64(-0.0024244561791419983), 104), (np.float64(-0.002444714307785034), 1024), (np.float64(-0.0024672462604939938), 999), (np.float64(-0.002483328804373741), 34), (np.float64(-0.0025172159948851913), 838), (np.float64(-0.002524329349398613), 935), (np.float64(-0.0025281142443418503), 967), (np.float64(-0.0025460217148065567), 457), (np.float64(-0.002552484627813101), 455), (np.float64(-0.002560259774327278), 474), (np.float64(-0.0026358672184869647), 429), (np.float64(-0.0026509244926273823), 269), (np.float64(-0.0026514939963817596), 1253), (np.float64(-0.0026922840625047684), 623), (np.float64(-0.00269710854627192), 1225), (np.float64(-0.002712603658437729), 38), (np.float64(-0.0027409472968429327), 965), (np.float64(-0.00277055986225605), 743), (np.float64(-0.0028255488723516464), 391), (np.float64(-0.0028463639318943024), 190), (np.float64(-0.0028850361704826355), 336), (np.float64(-0.002894410863518715), 832), (np.float64(-0.002898396924138069), 778), (np.float64(-0.0030250325798988342), 76), (np.float64(-0.00303669273853302), 1121), (np.float64(-0.0030500199645757675), 65), (np.float64(-0.0030538206920027733), 185), (np.float64(-0.0030581308528780937), 393), (np.float64(-0.003087669610977173), 210), (np.float64(-0.003108654171228409), 51), (np.float64(-0.0031100647029234096), 473), (np.float64(-0.0031547900289297104), 889), (np.float64(-0.0031569916754961014), 195), (np.float64(-0.003162076696753502), 1273), (np.float64(-0.0031907064840197563), 843), (np.float64(-0.0033862628042697906), 130), (np.float64(-0.0034333346411585808), 442), (np.float64(-0.0034815147519111633), 752), (np.float64(-0.0035286154597997665), 902), (np.float64(-0.0035514887422323227), 1086), (np.float64(-0.003552686423063278), 333), (np.float64(-0.0035611912608146667), 1102), (np.float64(-0.003571145236492157), 1237), (np.float64(-0.0035725105553865433), 169), (np.float64(-0.003596900962293148), 1278), (np.float64(-0.0036182962357997894), 936), (np.float64(-0.003647194243967533), 402), (np.float64(-0.0036538057029247284), 112), (np.float64(-0.003661118447780609), 1136), (np.float64(-0.0036774892359972), 549), (np.float64(-0.0037003178149461746), 176), (np.float64(-0.0037345218006521463), 788), (np.float64(-0.003779228776693344), 174), (np.float64(-0.003787299618124962), 716), (np.float64(-0.00380537798628211), 983), (np.float64(-0.0038185007870197296), 144), (np.float64(-0.003865445323754102), 70), (np.float64(-0.003870982676744461), 911), (np.float64(-0.003874685149639845), 1001), (np.float64(-0.0038925185799598694), 49), (np.float64(-0.003927405923604965), 358), (np.float64(-0.00400954857468605), 602), (np.float64(-0.004042258486151695), 481), (np.float64(-0.00408430490642786), 918), (np.float64(-0.0041113197803497314), 720), (np.float64(-0.004242537543177605), 863), (np.float64(-0.004256729036569595), 1010), (np.float64(-0.004292648285627365), 651), (np.float64(-0.0043027400970458984), 78), (np.float64(-0.004380635917186737), 797), (np.float64(-0.004390287562273443), 986), (np.float64(-0.004402928985655308), 35), (np.float64(-0.004403814557008445), 482), (np.float64(-0.004492867738008499), 956), (np.float64(-0.0045052869245409966), 181), (np.float64(-0.004513232968747616), 603), (np.float64(-0.00453865434974432), 1184), (np.float64(-0.00456337071955204), 89), (np.float64(-0.00461997021920979), 508), (np.float64(-0.004656549543142319), 625), (np.float64(-0.004690955160185695), 536), (np.float64(-0.0046967072412371635), 296), (np.float64(-0.0047052884474396706), 322), (np.float64(-0.004711035639047623), 873), (np.float64(-0.004717739298939705), 731), (np.float64(-0.0047803036868572235), 275), (np.float64(-0.004825130105018616), 638), (np.float64(-0.004828300327062607), 961), (np.float64(-0.004890882410109043), 102), (np.float64(-0.004894081503152847), 1178), (np.float64(-0.004899552091956139), 1210), (np.float64(-0.0049577930476516485), 557), (np.float64(-0.004958644509315491), 1217), (np.float64(-0.0049695546622388065), 139), (np.float64(-0.004982003942131996), 361), (np.float64(-0.004986512009054422), 494), (np.float64(-0.00502120703458786), 504), (np.float64(-0.0050244322046637535), 787), (np.float64(-0.005043705925345421), 1164), (np.float64(-0.005074195563793182), 604), (np.float64(-0.005097134271636605), 406), (np.float64(-0.005099068395793438), 1268), (np.float64(-0.005101373419165611), 922), (np.float64(-0.005114092491567135), 692), (np.float64(-0.005152318626642227), 384), (np.float64(-0.005154757760465145), 711), (np.float64(-0.0051582116866484284), 268), (np.float64(-0.005177075508981943), 352), (np.float64(-0.005228022113442421), 530), (np.float64(-0.00522840628400445), 170), (np.float64(-0.00528366956859827), 1056), (np.float64(-0.005368012934923172), 969), (np.float64(-0.005396544001996517), 1231), (np.float64(-0.005413727834820747), 882), (np.float64(-0.005450633354485035), 524), (np.float64(-0.005477700382471085), 850), (np.float64(-0.005487922579050064), 334), (np.float64(-0.005521275103092194), 161), (np.float64(-0.00552127743139863), 175), (np.float64(-0.0055327690206468105), 865), (np.float64(-0.005547152832150459), 1220), (np.float64(-0.005717332474887371), 8), (np.float64(-0.005721753463149071), 292), (np.float64(-0.00574151985347271), 18), (np.float64(-0.005785588873550296), 794), (np.float64(-0.005798071622848511), 644), (np.float64(-0.00580454315058887), 132), (np.float64(-0.005812995135784149), 69), (np.float64(-0.005842681974172592), 1227), (np.float64(-0.005866500549018383), 390), (np.float64(-0.005866686813533306), 1192), (np.float64(-0.005951972212642431), 875), (np.float64(-0.005956872366368771), 2), (np.float64(-0.0059717330150306225), 753), (np.float64(-0.005976843181997538), 1070), (np.float64(-0.005981167778372765), 970), (np.float64(-0.005995499901473522), 372), (np.float64(-0.006027504801750183), 258), (np.float64(-0.00602865032851696), 1248), (np.float64(-0.006082542240619659), 1262), (np.float64(-0.0061778719536960125), 1043), (np.float64(-0.0062213437631726265), 476), (np.float64(-0.006271482445299625), 618), (np.float64(-0.0063001555390655994), 111), (np.float64(-0.006341526517644525), 1066), (np.float64(-0.006352424621582031), 96), (np.float64(-0.006389547139406204), 148), (np.float64(-0.006395917385816574), 154), (np.float64(-0.006402084603905678), 764), (np.float64(-0.006419172510504723), 57), (np.float64(-0.006443299353122711), 1175), (np.float64(-0.0064479149878025055), 806), (np.float64(-0.006505150347948074), 241), (np.float64(-0.0065887924283742905), 884), (np.float64(-0.006634398130699992), 511), (np.float64(-0.006656968966126442), 224), (np.float64(-0.006672145798802376), 1117), (np.float64(-0.006685070693492889), 443), (np.float64(-0.006695944350212812), 842), (np.float64(-0.0067441752180457115), 666), (np.float64(-0.0068170540034770966), 398), (np.float64(-0.006845368072390556), 0), (np.float64(-0.006857441738247871), 1182), (np.float64(-0.006921172142028809), 610), (np.float64(-0.006955621996894479), 808), (np.float64(-0.007136133732274175), 439), (np.float64(-0.0071418872103095055), 792), (np.float64(-0.007211441406980157), 1166), (np.float64(-0.007216833531856537), 239), (np.float64(-0.007237443060148507), 586), (np.float64(-0.007238788530230522), 306), (np.float64(-0.00724145770072937), 107), (np.float64(-0.007245765998959541), 817), (np.float64(-0.007268328219652176), 1098), (np.float64(-0.007300347089767456), 555), (np.float64(-0.007359273731708527), 193), (np.float64(-0.007387600839138031), 201), (np.float64(-0.0074022915214300156), 99), (np.float64(-0.0074034701101481915), 1232), (np.float64(-0.0074781812727451324), 257), (np.float64(-0.007478212472051382), 432), (np.float64(-0.007487598806619644), 1089), (np.float64(-0.007495214231312275), 1242), (np.float64(-0.007495550438761711), 815), (np.float64(-0.007613290101289749), 599), (np.float64(-0.0076307556591928005), 263), (np.float64(-0.00764109194278717), 680), (np.float64(-0.007670147344470024), 526), (np.float64(-0.007670966908335686), 454), (np.float64(-0.0076980628073215485), 721), (np.float64(-0.007704330608248711), 1144), (np.float64(-0.007711821002885699), 576), (np.float64(-0.007724279537796974), 648), (np.float64(-0.0077279843389987946), 1012), (np.float64(-0.007735062390565872), 286), (np.float64(-0.007739881053566933), 737), (np.float64(-0.007761240005493164), 872), (np.float64(-0.0077701956033706665), 709), (np.float64(-0.007822123123332858), 349), (np.float64(-0.007830768823623657), 1033), (np.float64(-0.007877346128225327), 1270), (np.float64(-0.007887596264481544), 614), (np.float64(-0.00789869949221611), 246), (np.float64(-0.007918609771877527), 1110), (np.float64(-0.007958957925438881), 528), (np.float64(-0.007987448945641518), 1109), (np.float64(-0.00801955908536911), 1236), (np.float64(-0.008025538176298141), 1091), (np.float64(-0.008026821538805962), 881), (np.float64(-0.008048209361732006), 829), (np.float64(-0.008056597784161568), 499), (np.float64(-0.008070695213973522), 885), (np.float64(-0.00811498612165451), 689), (np.float64(-0.008118484169244766), 565), (np.float64(-0.00811863038688898), 1158), (np.float64(-0.008136065676808357), 1172), (np.float64(-0.008224183460697532), 675), (np.float64(-0.00822520349174738), 108), (np.float64(-0.008245592936873436), 40), (np.float64(-0.008287797681987286), 772), (np.float64(-0.008296319516375661), 140), (np.float64(-0.008329878211952746), 164), (np.float64(-0.008337317034602165), 392), (np.float64(-0.008361676707863808), 146), (np.float64(-0.008454600349068642), 225), (np.float64(-0.008487144485116005), 779), (np.float64(-0.008503612130880356), 533), (np.float64(-0.008577090688049793), 847), (np.float64(-0.008597470819950104), 562), (np.float64(-0.008647900074720383), 846), (np.float64(-0.008654453791677952), 925), (np.float64(-0.00868566706776619), 72), (np.float64(-0.008686518296599388), 302), (np.float64(-0.008748093619942665), 440), (np.float64(-0.008753710426390171), 827), (np.float64(-0.008770633023232222), 903), (np.float64(-0.008915271610021591), 1118), (np.float64(-0.008982018567621708), 431), (np.float64(-0.008990546382847242), 710), (np.float64(-0.009008477441966534), 541), (np.float64(-0.009019730612635612), 782), (np.float64(-0.009068425744771957), 157), (np.float64(-0.009090296924114227), 959), (np.float64(-0.00919034518301487), 697), (np.float64(-0.00919616175815463), 1028), (np.float64(-0.009215446189045906), 824), (np.float64(-0.009354566223919392), 927), (np.float64(-0.009406345896422863), 1042), (np.float64(-0.00941525585949421), 816), (np.float64(-0.009422685950994492), 650), (np.float64(-0.00945487868739292), 1143), (np.float64(-0.009470253251492977), 478), (np.float64(-0.009476244449615479), 821), (np.float64(-0.009483122266829014), 963), (np.float64(-0.009523652493953705), 887), (np.float64(-0.00956575758755207), 895), (np.float64(-0.00958152487874031), 356), (np.float64(-0.009585897030774504), 305), (np.float64(-0.009609293192625046), 723), (np.float64(-0.009649815503507853), 101), (np.float64(-0.009656770154833794), 1271), (np.float64(-0.009658633265644312), 463), (np.float64(-0.009679041802883148), 958), (np.float64(-0.009686156583484262), 295), (np.float64(-0.009690960869193077), 3), (np.float64(-0.009700579568743706), 642), (np.float64(-0.009711027145385742), 495), (np.float64(-0.009808659553527832), 311), (np.float64(-0.009825988119700924), 1197), (np.float64(-0.009919967502355576), 770), (np.float64(-0.00992558989673853), 915), (np.float64(-0.00995855126529932), 860), (np.float64(-0.00996008887887001), 1130), (np.float64(-0.009978827089071274), 332), (np.float64(-0.009996309876441956), 324), (np.float64(-0.01000710017979145), 844), (np.float64(-0.010009054094552994), 800), (np.float64(-0.010019579902291298), 947), (np.float64(-0.010040998458862305), 202), (np.float64(-0.01004641002509743), 994), (np.float64(-0.010080473497509956), 151), (np.float64(-0.01013161102309823), 869), (np.float64(-0.010141927748918533), 919), (np.float64(-0.010255863424390554), 898), (np.float64(-0.010261062532663345), 665), (np.float64(-0.010276105254888535), 861), (np.float64(-0.010293344035744667), 458), (np.float64(-0.010300910100340843), 726), (np.float64(-0.010341383516788483), 325), (np.float64(-0.010353345642215572), 635), (np.float64(-0.010372515767812729), 1087), (np.float64(-0.010401349514722824), 1000), (np.float64(-0.010496689938008785), 647), (np.float64(-0.01055026613175869), 1179), (np.float64(-0.01059710793197155), 1208), (np.float64(-0.010630078613758087), 606), (np.float64(-0.010634157806634903), 1081), (np.float64(-0.010682531632483006), 435), (np.float64(-0.010732075199484825), 318), (np.float64(-0.010796718299388885), 940), (np.float64(-0.010817267000675201), 12), (np.float64(-0.010851098224520683), 134), (np.float64(-0.010936714708805084), 505), (np.float64(-0.01104088919237256), 991), (np.float64(-0.011065786704421043), 79), (np.float64(-0.011071410030126572), 5), (np.float64(-0.011135176755487919), 867), (np.float64(-0.011154929175972939), 1126), (np.float64(-0.011161897331476212), 1272), (np.float64(-0.01126299798488617), 1238), (np.float64(-0.011272568255662918), 1112), (np.float64(-0.011295948177576065), 634), (np.float64(-0.01129804365336895), 780), (np.float64(-0.01131666952278465), 705), (np.float64(-0.01133042573928833), 874), (np.float64(-0.011340262368321419), 1185), (np.float64(-0.011386333149857819), 479), (np.float64(-0.011390786617994308), 1101), (np.float64(-0.011419500224292278), 227), (np.float64(-0.011442882008850574), 620), (np.float64(-0.01144443266093731), 660), (np.float64(-0.011451411992311478), 1224), (np.float64(-0.011496592778712511), 633), (np.float64(-0.011498132022097707), 1030), (np.float64(-0.011539971455931664), 798), (np.float64(-0.011559644713997841), 217), (np.float64(-0.011612750589847565), 209), (np.float64(-0.0116298608481884), 579), (np.float64(-0.011648551328107715), 1034), (np.float64(-0.011675120331346989), 149), (np.float64(-0.011696023866534233), 567), (np.float64(-0.011698195710778236), 160), (np.float64(-0.011711067520081997), 1167), (np.float64(-0.0117823276668787), 1163), (np.float64(-0.011843113228678703), 1029), (np.float64(-0.011853933800011873), 535), (np.float64(-0.01186610758304596), 591), (np.float64(-0.011869622394442558), 1062), (np.float64(-0.011904150247573853), 670), (np.float64(-0.011912490415852517), 74), (np.float64(-0.011915481183677912), 864), (np.float64(-0.011936145718209445), 841), (np.float64(-0.012056197971105576), 315), (np.float64(-0.012068057432770729), 964), (np.float64(-0.012071516364812851), 1078), (np.float64(-0.012117337435483932), 337), (np.float64(-0.012127349153161049), 452), (np.float64(-0.012201700359582901), 877), (np.float64(-0.01221482828259468), 1128), (np.float64(-0.012246077458257787), 449), (np.float64(-0.012495806440711021), 682), (np.float64(-0.012575287837535143), 177), (np.float64(-0.012676802929490805), 436), (np.float64(-0.012683648616075516), 198), (np.float64(-0.012702982407063246), 914), (np.float64(-0.012723691645078361), 523), (np.float64(-0.012760473415255547), 757), (np.float64(-0.012779026292264462), 48), (np.float64(-0.012828223407268524), 852), (np.float64(-0.012916180305182934), 1016), (np.float64(-0.012964524328708649), 636), (np.float64(-0.012966942158527672), 1150), (np.float64(-0.012986363843083382), 247), (np.float64(-0.013018159195780754), 1054), (np.float64(-0.013066044077277184), 996), (np.float64(-0.013116424903273582), 693), (np.float64(-0.01320071890950203), 833), (np.float64(-0.013295786455273628), 87), (np.float64(-0.013389321975409985), 373), (np.float64(-0.013403642922639847), 572), (np.float64(-0.013429042883217335), 862), (np.float64(-0.013557696132920682), 81), (np.float64(-0.01362999901175499), 677), (np.float64(-0.013825366098899394), 364), (np.float64(-0.013825431524310261), 105), (np.float64(-0.013843605294823647), 949), (np.float64(-0.013897279277443886), 369), (np.float64(-0.013940966688096523), 1046), (np.float64(-0.013956151902675629), 573), (np.float64(-0.013969846069812775), 1096), (np.float64(-0.01397152990102768), 1047), (np.float64(-0.01400591991841793), 781), (np.float64(-0.014015112072229385), 466), (np.float64(-0.014026038348674774), 1148), (np.float64(-0.014037872664630413), 617), (np.float64(-0.014065070077776909), 544), (np.float64(-0.014098634012043476), 929), (np.float64(-0.01418947521597147), 1266), (np.float64(-0.014249518513679504), 54), (np.float64(-0.014250874519348145), 543), (np.float64(-0.014288587495684624), 585), (np.float64(-0.01435130089521408), 556), (np.float64(-0.014362127520143986), 1213), (np.float64(-0.014388982206583023), 272), (np.float64(-0.01446759165264666), 44), (np.float64(-0.0145144232083112), 941), (np.float64(-0.01452496147248894), 616), (np.float64(-0.01462503895163536), 320), (np.float64(-0.014764860272407532), 859), (np.float64(-0.014927219599485397), 878), (np.float64(-0.014956824481487274), 622), (np.float64(-0.014958196319639683), 1215), (np.float64(-0.014981732238084078), 1050), (np.float64(-0.015076996758580208), 763), (np.float64(-0.015152443200349808), 497), (np.float64(-0.01515391655266285), 1156), (np.float64(-0.015214354265481234), 519), (np.float64(-0.015253475634381175), 343), (np.float64(-0.015282157342880964), 906), (np.float64(-0.015289867296814919), 386), (np.float64(-0.015292404219508171), 434), (np.float64(-0.01533450186252594), 672), (np.float64(-0.015339143574237823), 1195), (np.float64(-0.015478499233722687), 734), (np.float64(-0.015486100688576698), 501), (np.float64(-0.015492841601371765), 513), (np.float64(-0.015516646206378937), 233), (np.float64(-0.015645429491996765), 773), (np.float64(-0.0157010480761528), 155), (np.float64(-0.015702321310527623), 597), (np.float64(-0.015805164322955534), 849), (np.float64(-0.015820898115634918), 1277), (np.float64(-0.015887961140833795), 357), (np.float64(-0.015930459601804614), 645), (np.float64(-0.016012540087103844), 517), (np.float64(-0.016064459457993507), 1048), (np.float64(-0.016075864201411605), 1201), (np.float64(-0.016258132178336382), 574), (np.float64(-0.01628638431429863), 1083), (np.float64(-0.016317928209900856), 1037), (np.float64(-0.016351506114006042), 378), (np.float64(-0.016361628659069538), 413), (np.float64(-0.016448184847831726), 1076), (np.float64(-0.01659383624792099), 1211), (np.float64(-0.016631217673420906), 383), (np.float64(-0.01673525758087635), 133), (np.float64(-0.016851751133799553), 1171), (np.float64(-0.017033321782946587), 345), (np.float64(-0.01705419272184372), 1138), (np.float64(-0.01710225734859705), 1140), (np.float64(-0.017209792509675026), 1105), (np.float64(-0.01725015789270401), 818), (np.float64(-0.017269250005483627), 228), (np.float64(-0.017377035692334175), 208), (np.float64(-0.017424164339900017), 1226), (np.float64(-0.017525162547826767), 1141), (np.float64(-0.01753891631960869), 942), (np.float64(-0.017641677986830473), 744), (np.float64(-0.017784817813662812), 1122), (np.float64(-0.017843062058091164), 760), (np.float64(-0.0179891474545002), 1152), (np.float64(-0.018129284493625164), 1003), (np.float64(-0.01813964545726776), 326), (np.float64(-0.01821867097169161), 658), (np.float64(-0.018225931096822023), 120), (np.float64(-0.018232353730127215), 679), (np.float64(-0.01825845241546631), 978), (np.float64(-0.01828348310664296), 924), (np.float64(-0.018405072391033173), 116), (np.float64(-0.018510638969019055), 438), (np.float64(-0.018527057953178883), 685), (np.float64(-0.01854758709669113), 94), (np.float64(-0.018557699862867594), 502), (np.float64(-0.01855983817949891), 554), (np.float64(-0.01861389074474573), 445), (np.float64(-0.018743810476735234), 199), (np.float64(-0.018759075552225113), 669), (np.float64(-0.018766134977340698), 1261), (np.float64(-0.01877213642001152), 82), (np.float64(-0.01886759651824832), 890), (np.float64(-0.018873335095122457), 453), (np.float64(-0.018922503106296062), 962), (np.float64(-0.01896221563220024), 698), (np.float64(-0.019119519740343094), 988), (np.float64(-0.019161410629749298), 745), (np.float64(-0.01922638714313507), 1026), (np.float64(-0.019319428130984306), 917), (np.float64(-0.01936191599816084), 344), (np.float64(-0.019529331475496292), 1071), (np.float64(-0.01953260414302349), 484), (np.float64(-0.019547119736671448), 93), (np.float64(-0.019778557121753693), 1020), (np.float64(-0.019798152148723602), 643), (np.float64(-0.019840769469738007), 1233), (np.float64(-0.019868917763233185), 238), (np.float64(-0.019939441233873367), 24), (np.float64(-0.019996959250420332), 240), (np.float64(-0.02009878307580948), 1173), (np.float64(-0.020234012976288795), 1142), (np.float64(-0.020388811826705933), 795), (np.float64(-0.020517916418612003), 223), (np.float64(-0.020581429824233055), 420), (np.float64(-0.020620129944290966), 1039), (np.float64(-0.02065638266503811), 840), (np.float64(-0.020701369736343622), 293), (np.float64(-0.020729095675051212), 837), (np.float64(-0.02078204322606325), 7), (np.float64(-0.02080471720546484), 1038), (np.float64(-0.02098647691309452), 799), (np.float64(-0.02110620215535164), 980), (np.float64(-0.021208827383816242), 221), (np.float64(-0.021225396543741226), 558), (np.float64(-0.021289877127856016), 1157), (np.float64(-0.021346226800233126), 539), (np.float64(-0.02135976031422615), 464), (np.float64(-0.021395526826381683), 1044), (np.float64(-0.021515470929443836), 831), (np.float64(-0.021526120603084564), 1082), (np.float64(-0.021593546494841576), 31), (np.float64(-0.021753717213869095), 256), (np.float64(-0.02176509378477931), 491), (np.float64(-0.022058885544538498), 1064), (np.float64(-0.022263603284955025), 834), (np.float64(-0.022382635856047273), 954), (np.float64(-0.02256319299340248), 955), (np.float64(-0.022585909813642502), 747), (np.float64(-0.0230522045167163), 1275), (np.float64(-0.023085430613718927), 408), (np.float64(-0.023244470125064254), 641), (np.float64(-0.023779388517141342), 467), (np.float64(-0.02389063686132431), 424), (np.float64(-0.023980213329195976), 1093), (np.float64(-0.024013042449951172), 1123), (np.float64(-0.02409262489527464), 304), (np.float64(-0.024205811321735382), 1190), (np.float64(-0.025242964271456003), 699), (np.float64(-0.0252546314150095), 759), (np.float64(-0.025966100860387087), 264), (np.float64(-0.02601127838715911), 529), (np.float64(-0.026383422315120697), 60), (np.float64(-0.026427260600030422), 141), (np.float64(-0.02657921239733696), 1023), (np.float64(-0.026649098843336105), 1059), (np.float64(-0.02696368470788002), 538), (np.float64(-0.026969667291268706), 894), (np.float64(-0.02746322425082326), 1257), (np.float64(-0.027611277997493744), 805), (np.float64(-0.027830212842673063), 437), (np.float64(-0.028053276240825653), 422), (np.float64(-0.02825447265058756), 588), (np.float64(-0.02829993050545454), 784), (np.float64(-0.02857894729822874), 676), (np.float64(-0.028608759865164757), 1240), (np.float64(-0.029095172882080078), 1135), (np.float64(-0.029543783515691757), 712), (np.float64(-0.029558178037405014), 552), (np.float64(-0.029595278203487396), 741), (np.float64(-0.029920198023319244), 254), (np.float64(-0.02996830642223358), 632), (np.float64(-0.030348291620612144), 400), (np.float64(-0.0308592370711267), 640), (np.float64(-0.032068658620119095), 725), (np.float64(-0.0321959547836741), 259), (np.float64(-0.03227374702692032), 1230), (np.float64(-0.033374167047441006), 997), (np.float64(-0.033400426618754864), 1104), (np.float64(-0.03494056686758995), 407), (np.float64(-0.035000767558813095), 327), (np.float64(-0.03854627627879381), 851), (np.float64(-0.039288025349378586), 23), (np.float64(-0.04906845884397626), 226), (np.float64(-0.10356737673282623), 690), (np.float64(-0.17738884687423706), 736), (np.float64(-0.18347840011119843), 876)]\n" ] } ], "source": [ - "print(len(med_diff))\n", - "n_features = 100\n", - "indexed_list = [(value, index) for index, value in enumerate(med_diff)]\n", - "sorted_indexed_list = sorted(indexed_list, key=lambda x: x[0], reverse=True)\n", - "largest_10_indices = [index for value, index in sorted_indexed_list[:n_features]]\n", - "print(sorted_indexed_list)" + "print(p_vals)" ] }, { "cell_type": "code", - "execution_count": 24, - "id": "82e761fb", + "execution_count": 5, + "id": "da101158", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'p_vals' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[43mp_vals\u001b[49m))\n\u001b[32m 2\u001b[39m indexed_list = [(value, index) \u001b[38;5;28;01mfor\u001b[39;00m index, value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(p_vals)]\n\u001b[32m 3\u001b[39m sorted_indexed_list = \u001b[38;5;28msorted\u001b[39m(indexed_list, key=\u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[32m0\u001b[39m], reverse=\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "\u001b[31mNameError\u001b[39m: name 'p_vals' is not defined" + ] + } + ], "source": [ - "with open('best_embedding_vars.pkl', 'wb') as f:\n", - " pickle.dump(sorted_indexed_list, f)\n" + "print(len(p_vals))\n", + "indexed_list = [(value, index) for index, value in enumerate(p_vals)]\n", + "sorted_indexed_list = sorted(indexed_list, key=lambda x: x[0], reverse=False)\n", + "print(sorted_indexed_list)\n", + "\n", + "#with open('best_embedding_vars_whitney2.pkl', 'wb') as f:\n", + "# pickle.dump(sorted_indexed_list, f)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "c866b393", "metadata": {}, "outputs": [ @@ -378,78 +494,79 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (84_439, 8)
emb_1160emb_234emb_1280embeddingsresiduestrain_boolsepitope_boolsrsa_vals
f64f64f64list[f64]strboolboolf64
-5.638327-0.626679117.0[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
-7.059569-2.33806117.0[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
-3.4552060.347305117.0[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
-5.985601-1.758423117.0[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
-4.628117-0.632118117.0[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
-8.687856-2.58618547.0[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
-8.632407-3.70580147.0[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
-8.433228-4.29324847.0[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
-8.325913-3.82434947.0[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
-8.116076-4.16655747.0[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" + "shape: (84_439, 15)
emb_1271emb_1272emb_1273emb_1274emb_1275emb_1276emb_1277emb_1278emb_1279emb_1280embeddingsresiduestrain_boolsepitope_boolsrsa_vals
f64f64f64f64f64f64f64f64f64f64list[f64]strboolboolf64
-0.390364-0.207083-0.0288720.1334240.464410.105135-0.0357880.2468070.096495117.0[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
0.062281-0.1553070.1702650.0127590.1842510.210409-0.0662360.0838130.026999117.0[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
-0.100564-0.172441-0.105397-0.1155310.156894-0.043453-0.367025-0.070912-0.130206117.0[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
-0.034119-0.089407-0.252221-0.0862750.2001420.02470.14825-0.084408-0.163228117.0[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
0.046669-0.072711-0.144304-0.0246760.2585580.335825-0.0847740.0455890.106432117.0[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
-0.1211330.105609-0.023850.1043860.0417190.245388-0.067179-0.1235210.20393847.0[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
0.019410.025256-0.039905-0.0449910.11730.097847-0.062373-0.1105870.09324247.0[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
-0.149773-0.019055-0.0687770.16741-0.2558310.133178-0.049465-0.1390630.12068947.0[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
-0.1839280.066849-0.0263670.3147210.0152160.1480860.106204-0.0894170.28617347.0[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
-0.147168-0.089189-0.023737-0.1751490.1075640.231201-0.01757-0.2529760.13080747.0[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" ], "text/plain": [ - "shape: (84_439, 8)\n", - "┌───────────┬───────────┬──────────┬─────────────┬──────────┬─────────────┬─────────────┬──────────┐\n", - "│ emb_1160 ┆ emb_234 ┆ emb_1280 ┆ embeddings ┆ residues ┆ train_bools ┆ epitope_boo ┆ rsa_vals │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ls ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ list[f64] ┆ str ┆ bool ┆ --- ┆ f64 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ bool ┆ │\n", - "╞═══════════╪═══════════╪══════════╪═════════════╪══════════╪═════════════╪═════════════╪══════════╡\n", - "│ -5.638327 ┆ -0.626679 ┆ 117.0 ┆ [-0.079217, ┆ L ┆ true ┆ false ┆ 0.205823 │\n", - "│ ┆ ┆ ┆ -0.08223, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", - "│ -7.059569 ┆ -2.33806 ┆ 117.0 ┆ [0.271906, ┆ I ┆ true ┆ false ┆ 0.471213 │\n", - "│ ┆ ┆ ┆ 0.131599, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", - "│ -3.455206 ┆ 0.347305 ┆ 117.0 ┆ [0.075211, ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", - "│ ┆ ┆ ┆ -0.124738, ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ … 117.0] ┆ ┆ ┆ ┆ │\n", - "│ -5.985601 ┆ -1.758423 ┆ 117.0 ┆ [0.033206, ┆ T ┆ true ┆ false ┆ 0.437416 │\n", - "│ ┆ ┆ ┆ 0.13658, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", - "│ -4.628117 ┆ -0.632118 ┆ 117.0 ┆ [-0.153488, ┆ P ┆ true ┆ false ┆ 0.312792 │\n", - "│ ┆ ┆ ┆ 0.178101, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 117.0] ┆ ┆ ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ -8.687856 ┆ -2.586185 ┆ 47.0 ┆ [-0.102616, ┆ V ┆ false ┆ false ┆ 0.09529 │\n", - "│ ┆ ┆ ┆ 0.023357, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", - "│ -8.632407 ┆ -3.705801 ┆ 47.0 ┆ [0.006365, ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", - "│ ┆ ┆ ┆ -0.054578, ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ … 47.0] ┆ ┆ ┆ ┆ │\n", - "│ -8.433228 ┆ -4.293248 ┆ 47.0 ┆ [-0.021138, ┆ K ┆ false ┆ false ┆ 0.883928 │\n", - "│ ┆ ┆ ┆ 0.060409, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", - "│ -8.325913 ┆ -3.824349 ┆ 47.0 ┆ [-0.013476, ┆ A ┆ false ┆ false ┆ 0.828726 │\n", - "│ ┆ ┆ ┆ 0.081914, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", - "│ -8.116076 ┆ -4.166557 ┆ 47.0 ┆ [-0.079727, ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", - "│ ┆ ┆ ┆ 0.132829, … ┆ ┆ ┆ ┆ │\n", - "│ ┆ ┆ ┆ 47.0] ┆ ┆ ┆ ┆ │\n", - "└───────────┴───────────┴──────────┴─────────────┴──────────┴─────────────┴─────────────┴──────────┘" + "shape: (84_439, 15)\n", + "┌───────────┬───────────┬───────────┬───────────┬───┬──────────┬────────────┬───────────┬──────────┐\n", + "│ emb_1271 ┆ emb_1272 ┆ emb_1273 ┆ emb_1274 ┆ … ┆ residues ┆ train_bool ┆ epitope_b ┆ rsa_vals │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ s ┆ ools ┆ --- │\n", + "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ str ┆ --- ┆ --- ┆ f64 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ bool ┆ bool ┆ │\n", + "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪══════════╪════════════╪═══════════╪══════════╡\n", + "│ -0.390364 ┆ -0.207083 ┆ -0.028872 ┆ 0.133424 ┆ … ┆ L ┆ true ┆ false ┆ 0.205823 │\n", + "│ 0.062281 ┆ -0.155307 ┆ 0.170265 ┆ 0.012759 ┆ … ┆ I ┆ true ┆ false ┆ 0.471213 │\n", + "│ -0.100564 ┆ -0.172441 ┆ -0.105397 ┆ -0.115531 ┆ … ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", + "│ -0.034119 ┆ -0.089407 ┆ -0.252221 ┆ -0.086275 ┆ … ┆ T ┆ true ┆ false ┆ 0.437416 │\n", + "│ 0.046669 ┆ -0.072711 ┆ -0.144304 ┆ -0.024676 ┆ … ┆ P ┆ true ┆ false ┆ 0.312792 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ -0.121133 ┆ 0.105609 ┆ -0.02385 ┆ 0.104386 ┆ … ┆ V ┆ false ┆ false ┆ 0.09529 │\n", + "│ 0.01941 ┆ 0.025256 ┆ -0.039905 ┆ -0.044991 ┆ … ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", + "│ -0.149773 ┆ -0.019055 ┆ -0.068777 ┆ 0.16741 ┆ … ┆ K ┆ false ┆ false ┆ 0.883928 │\n", + "│ -0.183928 ┆ 0.066849 ┆ -0.026367 ┆ 0.314721 ┆ … ┆ A ┆ false ┆ false ┆ 0.828726 │\n", + "│ -0.147168 ┆ -0.089189 ┆ -0.023737 ┆ -0.175149 ┆ … ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", + "└───────────┴───────────┴───────────┴───────────┴───┴──────────┴────────────┴───────────┴──────────┘" ] }, - "execution_count": 26, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emb_1280 = []\n", - "emb_234 = []\n", - "emb_1160 = []\n", - "for (embedding, residue, train_bool, epitope_bool, rsa) in bp3_res.iter_rows():\n", + "emb_1279 = []\n", + "emb_1278 = []\n", + "emb_1277 = []\n", + "emb_1276 = []\n", + "emb_1275 = []\n", + "emb_1274 = []\n", + "emb_1273 = []\n", + "emb_1272 = []\n", + "emb_1271 = []\n", + "for (idx, embedding, residue, train_bool, epitope_bool, rsa) in bp3_res.iter_rows():\n", " emb_1280.append(embedding[1280])\n", - " emb_234.append(embedding[234])\n", - " emb_1160.append(embedding[1160])\n", + " emb_1279.append(embedding[1279])\n", + " emb_1278.append(embedding[1278])\n", + " emb_1277.append(embedding[1277])\n", + " emb_1276.append(embedding[1276])\n", + " emb_1275.append(embedding[1275])\n", + " emb_1274.append(embedding[1274])\n", + " emb_1273.append(embedding[1273])\n", + " emb_1272.append(embedding[1272])\n", + " emb_1271.append(embedding[1271])\n", "\n", "bp3_res.insert_column(0, pl.Series(\"emb_1280\", emb_1280))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_234\", emb_234))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1160\", emb_1160))\n" + "bp3_res.insert_column(0, pl.Series(\"emb_1279\", emb_1279))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1278\", emb_1278))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1277\", emb_1277))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1276\", emb_1276))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1275\", emb_1275))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1274\", emb_1274))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1273\", emb_1273))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1272\", emb_1272))\n", + "bp3_res.insert_column(0, pl.Series(\"emb_1271\", emb_1271))\n" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 42, "id": "d3becb0b", "metadata": {}, "outputs": [ { "data": { - "image/png": "" + "image/png": "" }, "metadata": { "image/png": { @@ -462,7 +579,7 @@ ], "source": [ "(\n", - "ggplot(bp3_res, aes(x = epitope_bools, y = rsa_vals))\n", + "ggplot(bp3_res, aes(x = epitope_bools, y = emb_1276))\n", "+ geom_boxplot()\n", "+ labs(\n", " x = \"Epitope Status\",\n", @@ -474,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "id": "0a766107", "metadata": {}, "outputs": [ @@ -482,28 +599,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "--- Cross-Validation Fold Details ---\n", - "Fold 1: Train AUC = 0.6385, Test AUC = 0.6482\n", - "Fold 2: Train AUC = 0.6405, Test AUC = 0.6400\n", - "Fold 3: Train AUC = 0.6416, Test AUC = 0.6355\n", - "Fold 4: Train AUC = 0.6409, Test AUC = 0.6383\n", - "Fold 5: Train AUC = 0.6405, Test AUC = 0.6399\n", - "\n", - "--- Overfitting Check ---\n", - "Average Training AUC across folds: 0.6404 (+/- 0.0010)\n", - "Average Test (Validation) AUC across folds: 0.6404 (+/- 0.0042)\n" + "--- Cross-Validation Fold Details ---\n" ] } ], "source": [ "# --- BP3 CV Evaluation ---\n", "\n", - "agg_features = [\n", - " #\"emb_1280\",\n", - " #\"emb_1160\",\n", - " \"emb_234\",\n", - " #\"rsa_vals\"\n", - "]\n", + "agg_features = []\n", + "for emb in range(1281):\n", + " field = \"field_\" + str(emb)\n", + " agg_features.append(field)\n", + "agg_features.append(\"rsa_vals\")\n", "\n", "train_df = bp3_res.to_pandas()\n", "X_df = train_df[agg_features]\n", @@ -524,13 +631,20 @@ " X_train, X_test = X[train_index], X[test_index]\n", " y_train, y_test = y[train_index], y[test_index]\n", "\n", - " # --- Choose Classifier ---\n", + " # --- Scale Features ---\n", + " scaler = StandardScaler() \n", + " scaler.fit(X_train) \n", + " X_train = scaler.transform(X_train) \n", + " X_test = scaler.transform(X_test) \n", "\n", + " # --- Choose Classifier ---\n", " neg_count = (y_train == 0).sum()\n", " pos_count = (y_train == 1).sum()\n", " scale_pos_weight_value = neg_count / pos_count if pos_count > 0 else 1\n", "\n", - " clf = LogisticRegression(class_weight=\"balanced\")\n", + " #clf = RandomForestClassifier(class_weight=\"balanced\")\n", + " #clf = LogisticRegression(class_weight=\"balanced\", penalty=\"l2\", max_iter=10000, n_jobs=-1)\n", + " clf = MLPClassifier(alpha=5)\n", " clf.fit(X_train, y_train)\n", "\n", " # --- Training AUC Calculation ---\n", @@ -579,7 +693,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.13.7" } }, "nbformat": 4, From 595ad6df0844aaffa2922f99f0074c658393eeb3 Mon Sep 17 00:00:00 2001 From: Jacob Sesate Date: Tue, 14 Oct 2025 16:28:04 -0700 Subject: [PATCH 3/3] added network representation and analysis; finalized LR model --- notebooks/best_esm_embedding_vars.pkl | Bin 30592 -> 0 bytes notebooks/example_code.ipynb | 356 --------- notebooks/regression.ipynb | 1056 +++++++++++++------------ 3 files changed, 535 insertions(+), 877 deletions(-) delete mode 100644 notebooks/best_esm_embedding_vars.pkl delete mode 100644 notebooks/example_code.ipynb diff --git a/notebooks/best_esm_embedding_vars.pkl b/notebooks/best_esm_embedding_vars.pkl deleted file mode 100644 index 4b87fb82c1c79cc413603859b400d56db207d565..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 30592 zcmY-1cXW-}{s!=^M~^zX6VZDo>WiqaLlA^SoFIfKA$rT!89@*d{pg+OVU!o6_cBC3 zqm1aCi1wS`*pK_{`^UX&eb#fI?{|OSGk4aVb?3^|^M(u)|NWy&bjirv{fG1m8&SS% z_W^@~%l8}7cX02ZL4$%uL`S9#?;g}QXi)Ur=*TqxW&Zuw9)m}O1^>UK>{&B<+SDo0 zZKF$?sUlO7G(Ubqp*+rVk(IKfq(*Ngp$5(H8pwE?F?&`H%&$ExUq7L@=__;^RHQ@Jf7X}WQCQ=m!_b6Z1ICD zlt19Pw!pP;KkhLa|5&c_W4I^Ng>~@vocC$?8@w#%lOE)Tf8=*r(=Rzy{|es6F*gR@ zk@Lp2P`HEUT5IlfQEqmi=QinfZkei&SD#8iACTx@N8nAmE9EE2k}su*AB1ByNi627OA&x;8R9t zss%ULt=dZ#s%`$Eu@zs&D7K&EH^t@c2LFfMxL684pX_+Y+{El)9c8Sb%i6oGH#y!){DSaqFPTGR5`qS*eD z$2y%8;Z3>b^|1!1IYc$mFB9SgwQUlUW7N@txW@MUopN&l)mR(uBi@c{WUIg6S^1~q za85?eI;!!R-X3fb+X8E$B{jEbdB)Jqq50dyMCy@Q&4=rijWg5FX6+VPpHi_i&H&>I6T>J+E7Tj#g*(<{tMxiSQ%* zEWeqTl$+kPzC64K@w{B)3Rn%F$2D#_fB6IU=riHSkLEQK%ZFgBNn^cRc18R=*H}-t z&Q-4}%W85I~(?f@7MAlg5c>mzwUY~xG&GO9W12W%uQ?BcKzZP#IC&A&{B=3 zUX1-RM{OU)Zu7qnAI4K%m#)IEvp4gOgXd>=?OF|A&;7FhQg}GO)-ikuyc_q}zQw>T zc2}FZ@QU1DZ}L?5Yxa=%$?$GGH@1U6+@JeJWHsexBRZ!T?TNTI|MZP>uV<@sPSfi9 zUxSz9bvoa@0`JZ}va1FE!t1>D+yhVGeAN8)%FXMvru18dcqy*2U0VuY%r!};qEXYC zY8+$dBEEua#JxW75&SBnO8~qV*N07O0k6yX<9^wcn^$P8=$0I%j=7tUY5ARpH{p6m z(f#mpyk^w0-OA0pR39?aiFkFcu~PiKk_Dny`WQ;GOwsuA~d_iJU*#VH$iO=Z*H&;F~xvuIGh& z^RtW*?r=ADt7}T-WZR{9)%tdNtq|_V;9GbdIb^K((H~(?cvE>?|g5EUuIu%t2lflujv??2EI}25BWYrJ+~LT2uy~bDfh(#_;LfZyol67vO%&JuQ40d+KpNBGsPf`MuVc@9-f2wgrWkNibYuu}SovvOrTidctBK!q! zY1B9hpTT+O_1*B<+#eRO3+~Bz>q0YlTJCr3uL~c>J#tbJcy-Pj|9B`jYtVD$!{5M< ziG45TMoTT;#@R!_ZS11Px8K!uCi6C{5)Z&%YumJ*t=xR~zp--1>u|(JalO&D4}7)u z4evIEXV>am)`9QUdOnth|HgT7DldEsuW!WurrbR9zcqzulo8yX_;oJVGdga5uh!ocYW1a$d~qg1m?3Dg5Ezc&_bPQ}}PZr{(|4 zJ8;kZ0rgbgyh~@W_LfJ@DqeHkpaO`mrnr&$%T#r>x!Tlw$KY3Zsugzt9?9DqpZCG* z@YpyRK5WmPZR@UNh2iFu@kqO?89~+iA4eD!Cy*T?|%FjJp|_*T@tz5f7kvT+MLssQovR)j}>cN@B-}e`AK*q zZT*gi;jegZ!1;-@aTm2UUp-ZFMJVy61lV*{J2)X zJQM0m(>7LeI>i6xxDlQT@nIAfdml|u%V*=a8XGplzi^FQyaHZ}w{%sO%FPv2A2lKv z@e5qzcBwObJ0BrWF1Xo)Y622!Bff@foX!-O`X|MW{cpyrM{VIp$)y|N1vM|eTDjSa zYQ&YLh_B+Qa@_Cm&wK-;W>3_lr5Y>A3p|2YaTckM`kh=qx(aF#7 zPaBaAY!Dm0e{nbBzW!gN`m8(nrN+l3_*1^GYw{;}D!zbq>@K_=FDdd|fd_Gqh&-&^ zl(bIb=~;-ITq9?Uf#2d9Sz{EuD6ikLau4_^?uozO4gQIHjM=r7n}cW_(Xk@pc{whs zmxrHZmkUb4vuhqz67^T9zG7fe#076D3wXl2bB*m$0eBhBcjSZ5;2u#mJG?sQt=DPc zBiLov&k^d!kbO@}%tzdZYvgJPKgf@g1xCPc@Qr1LUw(kcip_z@Z)G=J zt>HI#Zq%U~@KW3_E|!K*;vTuLIQ$;>h-Mz}F`PFV!_8WB2I-d#aZj$1&C|jAXlF=# zI9gr#ExR!*0e(zde_f1nb0m*lFaq(RyruZo4c>t3#gH!W@0!2s0QX|=k+TlG1-oqL zsocy+>$vwVjCenei)~*=sn`C_ZuR~QAI0lid2TB=t5LnEcnR_KT0B!2{16}C78VMh zqaAagKk5%qy=-0^@ddo4%>Qkq+Wst$6}g|notzhg9>d>JUQV|kfrn`MPJ7@{yuM7a z5tM(7ZDSI!%0`@>(di>{sF6}U$<&j~Ng?waMM+}ud#5d-gyP-n=@^_L&tfDdJt zC6>bbY3@E9{zm&ML#M*?@H(=_hv7fxni|i!rP!W|>qWIii0`27rCWP&W#$ksv^3*( zai#|1@f0_Vm&4Qz&hhTD!*zHCUdA|d1^$TZo%d$Ib8(L?zNvC^AdM9_tAaBV+uFY# zsxcB^v?yo2VYhrmCwi+iE)ue`mL$FAJ8(V9Mgdn5jiYvlEM z@ND$QiHP-28ls++lk+0&U+{XIceS_-f5+>HcYEQ*`S{}2B=~3UkwyE$E$-Prq#Hb% zd*sd*@K4<1>|O}IhWEG5=7z^;{`=y=>bZL}!8P{0%zlE!H7V$dr)p>X-K9BWb1AG(r7$s-IV>vILMZ%A=mptPIuc57f zyevGDT~zUecj39>^zuPJ_cs^QbB$9=!M?%rYKeP4o=V9Bs0prc&MnA3O8*F$_W#UuB6 zBL14=a?_76^*b1#dG7b{_UzVzlknpFC}Zy*%FQn{Hstya#6R#`g>xhPZ+4k)5xg{y z6{jb{k8@s}904E4A5bnErrh+Wb)x?63tmesJ9I}&m^RhBD?F4RD*{`?!`P+00NjVS z5q0PHS4Xa|^=}^pzs#?aW_9J}K3=na1;j^dHF*X6DX$}*U+AZPIoqj5#II0n&(Bju z=>_m2np+v*X|;O4y?xcI;Y>;kz&py|jPRD)oBF4KAK+68zeT;(TV34a`l~a%h?YV|qUyXaj!^B|a{Wvdt#=|Fa zzx=14a`POmDbmzOyscK_pWH*$duTTsody0@^V;9Lt9(k$i{6Fz(AL>D2mVcamg9Hj zWvU6azn_ppnI zPmZ7c<_B6|@6V_;oG6wlwPB zbG_K#RUIdz7GG2bUX+g}^PUV;`K;_BemeY&_FAhx{0QfLn!o9ydXlxd0j=RvmUOSGf8T<~fFY>JOQ9U!+E&s9bL~YHqMdA0g{z08utDbAzBVxbVmG{u{Q7zyr zw7kz7Q{`K0bNgn1-_bnjQ*)J%)p~q?xzxOl1K0sSDp>55dZa(99?Bp6Mo?83jk8f00-e0>;{35ue1KIe$LE}4`vk2$a6fH*w-gQ4*nT|L zHUJz%?2O!`*zT`AD&ZA;wbmZx)<|uctZjLBuD9~mTF;+@nkrA!{9r&c<#F1p%65a# z*7Cj+m78;EeewD#;#akG0^^$htT*#fT&C~YLQQ?BO>H^aRK8Pt?b+jY<(sr^3fzZJ z(|qUHR%&c>8Y?E2ZLN4F^VyU>s=a`=P0q~l=iF{x-vr;G<(oc$x6(Y|6@0JuDz|s= z5?cO!cpJ55DPFVcaK-j?ypG5q;YYbfu2~PC$L@Ih6h2P-R!u%DH^=hW@X;+uYLF(?qaXw-Ko9mDZ(yxPKE3Li?HZ=9aMgUmTxfxK1w@}*DUxX z?YSZ2I;x(3wEdqK@KgRo%Qx{*ZdT><89Ka z=QXqJMZAF)&u;{(niZOFxC9T-*6jQUzLj0BsOL~U<@vcHb^!bVyNnnH-^o29Y6RRv z^Fg!Wdo-_q8NQACMTaNw`r5mEvj?d?uW4UB?+*AScB62*ZYuwm)?cYEJY3tS!gl3m zQ$GLB9f-T}Ix_e&d^x*VoB*%E^>XwRcnMy|r&%(*GUr8k_wH)_7h3-xt(BVzw5II1 z9Pvt8&EVUJhfrL+PTNDZ3yxbAtHU4jmf~4Q zE%+(^hSu9e_z8B=;4}Otd)UR8VD+rc>_(40@XYLDemwjUpFwzJ>8W}K@bPW+^T1DO z`GYl;o5krouJVHsKc+pm`F8kYt{01n^-^P}XlpiXque}5^&(&e;yzkD&2G4%jSV^o z&!GAH6Cppxnk~6L#HH9Cq&?REE8-0(F59Kz(&AXL3+Jk>}qTVEC5r)}ds z3I1C9t{D^IlePL(dHSiIdc2O9oDXi-`UA?sTWR?^4d8!ip4VHsxt`arKMe8jTD;II z_&Q!w#=GEiv~}jcM12_5i|5}E57X)!*!rv27Spcd;iKG)qZ-@o7{qh(He%@q#J5mf zOiw#NwV&m7Yj9ckb*(13sd6)lYD)Io0B%AoGVDgnEN-#R9e_V!7gZmirV`gw$Q`DZ z?5SO{O<#CtUS2Ml3ICh(#?F24`Px{w>+p+O&oakAHMgslA2$I0R`W9R;M2KBSj&`~ zt@v5}-ymL)YZ4;Az;Ch}=iLVVT*vgJnxsF{E4JU~*NTF@;QP78Dl!-~yQsz*HX3n% zUfwDk19$T8QfAnOnod-6vHNqxziQhY${wz6Igsn)E?0-=VYhD9ftTbS<7Q)cLw51E z3GdCXm5FWPx42(Sh=%9TdL|x)kJa+mkHZghzcJKpu=*%H=?vmhM#c7p+SO)cgYRPx zTh$a{5|)`a*yB(d9F3#Eqo{Uiv{_Hs9%-T`b!jqpWuAraT`1n z=bbaF!X4}~xi>sJ_c(+4C^t9I`3>Jyh}-yC;@DUCDeYNX^9@z&U*NplSr$Hs^G2_B z@JzgpI5hy?mh)oVNO&uLmKeE2xw)Lycm9vJ5<8}>M@ta5%f<2VKY4pAOXx85hlCr| z$ZZ1@+l%rtGIS#1*(qLe`F6zf@l+9i2tJW-P;uOG)J&ur5qJUIm^tDlTFz*@4^KB- zeUZt$q_yFfuj9NUPZ08hu>wZbY8hRfUX}k=QHA^ zxn77gBh@d*L2>b-4EW!_mi_DhC{Qe_3|>eqvwJDFf8|}|w-$(dQCzg>fq0Czi$`yG zaX!5$wi-2Ws7BuSiuhoT$89V-N?q~?UrcuJR&Khu#_&bFi?+@AAow3#fki^Ol@w@S<1~cx^ev@6U{pA(3BNVIbuJxpt3csY~ z=gx!==lv`8pAR>*{MeQ7sq7+XE&RRq-g8^vg?Jx%a|iryt;aYG57By7U4b9td~C)v zQr)u)_lxNC@bvsH`NISLk=^K80^XDRWu_&0~{2iai5jX`tg!3Y1DSR96FZ(*-)!1$F zmrv$BjaBQBk7loV{W^T5*5gP}ZuX@!Tka3QC5auCKBFZyZ(nl#H^d_;?%N>iIQ1VS ztB9=e&jNP%K5j?xr)2bP~BIjk5mhj(spAI>@z=v^<%QF~$m(D3f zvk-VS?zcS%g&$=Xp5x%d*p2Ej@CbG}av8h_&ux2bIlL0POuq$wh2Q1V?5J`xm_C>F z;yB`;__-G!pMYQBda>v-{2;ISvi&u93C>$L6O?Ozh1Gg=2l4T|rEolezv3DZm3yN4 z>NVKUtu74T%mS$zT6f zB`$4>N$URNwSR||3ci59qg?C79{4rx5qHYL8&h6RDN+?aiu1BU zJ$QZXd)e*qMBc~MJOVz2do0Hy_#*DHUhRO#vkR|d@Ida70Vm*IoVV?D!FSNRgta{x zUXNYG6qu~;zlrx3T}mi7GtmdO@_Kt{%dE(5E zi0|YYdH*xw$0;6`CiU-Xn>oDYS63EzMxJVwDgyt&?s`y0xml6w#n#4%`*OYGY&blL zT@;%EAId(VWArK1TWHdeX$iN;!u)+1hs<5u7i_(L9RoxBa-#baf%+jMoE2Anr8 z*HUhFq_K|Mp@ZTzM5-ljqr(7TlVF#DaXUj z-c)0x?~M2^o@$gE3LnTduADRAx%rqP<2-m>UPpK>hBwpl`Io?dW4DTIg=gk{tjtIYm?j#0zu1 z)xHtDIlHLQ174Qh_OLhn0FRYP!{F|GhGrF(D>uv1I*I%DBEFel<=f~GJcP%}KioD2Sidt^PgS!$m*yr+>m zJ$xefxbG|pPscq8)$77La!;7i68<~C%P@W5<2i3FYXjfTF1GiAJ9)0~=mX!#dx}as z;CDH1l-mRUp!M(G2T#FyQR#$o^9lVNgnybSb+$D8Ecq&ja&r;I-+JdnygjwcWwk26 z+h{eNtH8hUtJ1#mh6nTZnR58RCsDs}#010Busf@S!}D{@vpEvLEND)<&YMDzF$@GU$wX`TBVb+knKxbj)~>hNyd;|y;MPolgq4u!%c=i^32 z!JqM5QDrW?318Xg>00<4?w@*Ty>fFnogq?gM0_pBh2M7gWgdI-aXfr6yF8JA`oUEH zKKwo6yLrothm+u&d93vOj+#KKDS2AVRY(5FHS0R1fS2YP_eojd2RMIqZYAaB392`u zt0BIGYefGV@E80|Ea8Qkwp5dpwk_f#`8GMHwu2wzZQ?>5@FSeJuJ(cN<8_Q|(eT@x zmzkEqpKxB}ii4-*^**|_)mM4Qc~L60a9$30~&cw^4{JZP%i zOh@ayzG_E23y*b<90M=RHO8IA@T=@Tf9^s39jZ49>_@yjf5G9Y&cP?~ShrKR;Q2Xk zw7&V65%EU>&#%Gf;N!T* zTGlC6&Aq~Td1@$p9lOhWCcH7v6|VX4?(A{{D;eU#RkK-0!zz1w0$)WrNgWd+Y5wxVk&w*Sz+%0JEFp1i#gxEP*~ zr|y5X1U{B~PPQq$SZ)7=d!$Dbcrnf=O^Sf;;@8R_d*N%?qYk`;x8!{)u1kiybAQ~~ zMoZK_t2pmiV!{)6PhmR)AHZ(yHI}NLliXuPc*9GwORrY&N1FFssoeZdpGBmKLwq>L z9m{eoQ)8p}41Q4y;F9xBd#F?8|00k7{B;yO755kwCd1G3b8Cj?Uaoo`@j3l&=Y{)l z-nH)mJcj#SOP|4;vWIlozf$$L<9^4if~$VI`5T?bTNYlg_&%}GW5PbwvYlQl, , , ..., , , ]>\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/jsesate/miniconda3/envs/epident-experiments/lib/python3.13/site-packages/MDAnalysis/coordinates/MMCIF.py:139: UserWarning: 1 A^3 CRYST1 record, this is usually a placeholder. Unit cell dimensions will be set to None.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "u = af3_output.get_mda_universe()\n", - "print(u.atoms)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a1a6ac7f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Alpha carbon pLDDT: [69.04000092 90.44000244 93.47000122 93.87999725 94.23000336 94.08999634\n", - " 91.55000305 92.23999786 94.29000092 93.56999969 91.97000122 93.80000305\n", - " 93.52999878 90.19999695 91. 92.70999908 90.81999969 88.83999634\n", - " 90.33000183 88.86000061 89.34999847 87.77999878 88.33999634 89.69999695\n", - " 90.81999969 90.26000214 90.75 93.15000153 95.62999725 96.09999847\n", - " 95.29000092 96.18000031 97.12000275 96.65000153 95.94000244 97.08999634\n", - " 96.94999695 97.33000183 97.62999725 98.05000305 98.09999847 98.19000244\n", - " 98.18000031 97.77999878 97.97000122 97.88999939 97.70999908 97.19999695\n", - " 97.48999786 97.93000031 97.41000366 96.66999817 97.23999786 98.01999664\n", - " 98.25 98.01999664 97.88999939 98.31999969 98.58000183 98.08999634\n", - " 98.12999725 98.5 98.31999969 98.08999634 98.23999786 97.91999817\n", - " 97.12999725 97.04000092 97.29000092 96.72000122 97.30999756 98.01000214\n", - " 97.95999908 98.12999725 98.37999725 97.58999634 98. 97.86000061\n", - " 97.90000153 98.09999847 98.05999756 98.34999847 98.19000244 98.15000153\n", - " 98.44000244 98.16999817 98.34999847 97.55000305 96.83000183 96.48000336\n", - " 94.91999817 94.91000366 93.54000092 93.41999817 92.83000183 89.87000275\n", - " 80.34999847 80.37999725 85.29000092 86.36000061 90.30999756 92.72000122\n", - " 93.12000275 92.09999847 93.58999634 95.87999725 96.54000092 95.41000366\n", - " 96.63999939 97.59999847 97.80000305 97.23999786 97.68000031 97.38999939\n", - " 97.58000183 97.44000244 96.58000183 95.87999725 96.11000061 97.19000244\n", - " 97.37000275 97.44999695 96.08000183 95.90000153 95.65000153 95.75\n", - " 96.72000122 97.31999969 97.5 96.66999817 94.75 93.80999756\n", - " 93.05000305 92.70999908 92.97000122 91.61000061 92.26999664 88.43000031\n", - " 82.11000061 77.48999786 66.93000031 59.59999847 48.08000183 57.41999817\n", - " 79.16000366 87.52999878 91.86000061 93.05999756 95.01999664 95.37999725\n", - " 94.76000214 95.16999817 97.20999908 96.45999908 95.58999634 96.59999847\n", - " 97.58000183 97.56999969 96.81999969 96.83000183 95.56999969 95.34999847\n", - " 96.54000092 95.52999878 94.69999695 94.08999634 94.77999878 94.33999634\n", - " 95. 95.41000366 95.45999908 97.09999847 97.37000275 97.44000244\n", - " 97.97000122 97.76000214 97.47000122 97.43000031 97.73999786 97.30999756\n", - " 96.16000366 96.79000092 96.31999969 95.18000031 94.66999817 94.41000366\n", - " 93.44000244 92.30999756 92.55000305 90.62999725 87.55999756 87.79000092\n", - " 85.22000122 86.43000031 87.26999664 89.08000183 87.33000183 88.08999634\n", - " 86.70999908 83.44000244 84.87999725 87.94000244 87.48000336 88.58000183\n", - " 87.36000061 87.08000183 85.84999847 87.62000275 87.05999756 85.34999847\n", - " 81.41000366 71.08999634 67.79000092 78.40000153 86.98999786 91.79000092\n", - " 94.26999664 93.15000153 94.30000305 95.83999634 95.81999969 95.61000061\n", - " 97.04000092 97.33000183 96.23999786 96.62999725 95.73999786 97.\n", - " 97.66000366 96.62999725 96.76000214 97.79000092 97.37999725 97.20999908\n", - " 97.55999756 97.65000153 97.44999695 97.56999969 97.19999695 97.69999695\n", - " 97.44000244 96.91999817 97.19999695 97.23999786 95.76999664 93.5\n", - " 93.81999969 96.08999634 95.79000092 91.38999939 90.23999786 94.62000275\n", - " 95.98000336 95.19999695 94.20999908 95.11000061 95.69000244 95.41999817\n", - " 94.19000244 94.29000092 94.69999695 91.04000092 95.23000336 93.01000214\n", - " 94.48999786 94.90000153 94.91000366 95.66000366 96.55999756 95.18000031\n", - " 94.94999695 96.66000366 96.34999847 95.84999847 96.72000122 97.22000122\n", - " 97.44000244 97.66999817 97.73999786 98.18000031 98.13999939 98.36000061\n", - " 98.19000244 97.91999817 97.62000275 97.90000153 96.94999695 94.11000061\n", - " 95.08000183 96.16999817 94.48000336 94.62000275 95.84999847 95.51000214\n", - " 93.18000031 94.61000061 96.16999817 95.02999878 94.23000336 95.73000336\n", - " 96.83999634 94.52999878 94.37000275 95.80999756 96.30999756 94.91999817\n", - " 94.83000183 95.44000244 94.79000092 92.84999847 93.26000214 93.66000366\n", - " 92.20999908 90.30000305 88.19000244 91.63999939 92.29000092 92.55999756\n", - " 91. 90.01000214 91.12000275 88.22000122 90.73999786 88.94999695\n", - " 85.81999969 90.08000183 88.94999695 90.79000092 91.98999786 94.23000336\n", - " 94.36000061 93.69000244 93.88999939 95.73000336 94.08000183 95.01000214\n", - " 95.55999756 95.31999969 94.98000336 95.44999695 96.22000122 96.25\n", - " 96.37999725 97.37000275 96.01000214 96.04000092 96.40000153 96.04000092\n", - " 93.55000305 93.52999878 94.94000244 94.05999756 90.54000092 90.91999817\n", - " 91.87999725 90.5 87.94999695 87.12999725 88.26000214 86.23999786\n", - " 85.59999847 87.66000366 83.66999817 73.52999878 62.49000168 59.43000031\n", - " 79.93000031 90.08000183 93.12999725 94.76999664 93.79000092 94.80999756\n", - " 96.12000275 96.48000336 95.48999786 96.97000122 96.41000366 96.83000183\n", - " 95.59999847 96.20999908 95.30000305 94.04000092 94.41000366 93.81999969\n", - " 93.06999969 93.01000214 95.01000214 94.43000031 93.26999664 94.30000305\n", - " 94.73000336 93.43000031 93.98000336 95.16999817 94.77999878 94.44000244\n", - " 94.77999878 95.48999786 94.08999634 93.91000366 95.23999786 93.80000305\n", - " 94.30999756 95.76999664 96.63999939 96.91999817 95.34999847 93.51999664\n", - " 94.73000336 93.76999664 81.12999725 72.05000305 78.80999756 89.94999695\n", - " 91.45999908 91.19999695 89.76000214 91.05999756 92.36000061 87.91000366\n", - " 89.88999939 89.41999817 93.61000061 94.76999664 95.18000031 95.79000092\n", - " 96.72000122 97.25 97.56999969 97.98000336 98.41999817 98.40000153\n", - " 98.31999969 98.52999878 98.37999725 98.45999908 98.23999786 98.23999786\n", - " 98.11000061 98.01000214 97.75 98.08999634 97.45999908 97.56999969\n", - " 98.40000153 98.54000092 98.06999969 98.22000122 98.61000061 98.61000061\n", - " 98.09999847 98.26999664 98.12999725 97.94999695 98.19000244 98.36000061\n", - " 98.44999695 98.12000275 98.48999786 98.66999817 98.76999664 98.62000275\n", - " 98.62000275 98.51000214 98.58000183 98.38999939 98.33000183 98.08000183\n", - " 97.44000244 97.51999664 96.91999817 94.95999908 95.26000214 94.94000244\n", - " 91.88999939 93.51999664 93.58999634 92.65000153 91.22000122 90.36000061\n", - " 89.41000366 89.48999786 87.58000183 87.56999969 89.16000366 91.04000092\n", - " 92.59999847 93.04000092 92.51999664 90.72000122 87.91000366 89.26000214\n", - " 92.86000061 94.83999634 95.48999786 94.84999847 95.52999878 95.25\n", - " 94.43000031 94.58999634 95.19000244 94.87000275 93.94000244 95.72000122\n", - " 96.05999756 95.48999786 94.76999664 95.88999939 96.84999847 96.04000092\n", - " 97.12000275 98.13999939 98.31999969 98.27999878 98.26000214 98.41999817\n", - " 98.22000122 97.93000031 98.06999969 98.09999847 97.38999939 96.72000122\n", - " 96.98000336 96.84999847 95.95999908 96.23000336 95.18000031 96.51999664\n", - " 96.20999908 96.54000092 96.80000305 96.88999939 97.33999634 97.05999756\n", - " 96.90000153 97.41999817 97.54000092 97.27999878 97.19000244 96.43000031\n", - " 92.98999786 93.41000366 92.15000153 89.19000244 82.48999786]\n" - ] - } - ], - "source": [ - "# select all alpha carbons in topology\n", - "calphas = u.select_atoms(\"name CA\")\n", - "\n", - "# get their plddt\n", - "print(f\"Alpha carbon pLDDT: {calphas.tempfactors}\")\n", - "\n", - "# get the contact probability array\n", - "contact_probs = af3_output.get_contact_prob_ndarr()" - ] - }, - { - "cell_type": "markdown", - "id": "91ae6e70", - "metadata": {}, - "source": [ - "## Getting LM embeddings for a protein\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ae7add4b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[ 2.8654e-01, 1.0777e-01, -1.2961e-01, ..., -1.4857e-01,\n", - " -2.2857e-01, 1.8300e+02],\n", - " [-5.2170e-02, -7.7566e-02, -2.3158e-01, ..., -1.2442e-01,\n", - " 2.3262e-02, 1.8300e+02],\n", - " [ 7.1554e-02, -6.7267e-03, 7.6057e-02, ..., -2.8993e-01,\n", - " 4.3512e-02, 1.8300e+02],\n", - " ...,\n", - " [ 1.0423e-02, 7.0801e-02, -1.1883e-01, ..., -2.1286e-01,\n", - " 5.9898e-02, 1.8300e+02],\n", - " [-1.7578e-01, 1.4636e-01, -3.6238e-02, ..., -1.2346e-01,\n", - " 1.2382e-02, 1.8300e+02],\n", - " [ 7.2781e-02, 1.7082e-01, -1.5018e-01, ..., -2.6568e-01,\n", - " -3.6069e-01, 1.8300e+02]])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pathlib import Path\n", - "import torch\n", - "\n", - "ESM_ENCODING_DIR = Path(\"/tgen_labs/altin/esm_encodings\")\n", - "\n", - "sample_job_name = bp3.select(\"job_name\")[1].item()\n", - "\n", - "esm_2_embed = torch.load(ESM_ENCODING_DIR / (sample_job_name + \".pt\"))\n", - "\n", - "esm_2_embed" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "b68e6889", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([183, 1281])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "esm_2_embed.shape" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "epident-experiments", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/regression.ipynb b/notebooks/regression.ipynb index cddf591..6c95435 100644 --- a/notebooks/regression.ipynb +++ b/notebooks/regression.ipynb @@ -20,631 +20,578 @@ "This notebook will run with the 'envs/env.yaml` environment (epident-experiments)" ] }, - { - "cell_type": "markdown", - "id": "a5097707", - "metadata": {}, - "source": [ - "## Bepipred 3 dataset\n", - "\n", - "- job_name: unique identifier for protein, comes from hash of seq\n", - "- seq: amino acid sequence of protein\n", - "- train: boolean indicating if seq is part of train set\n", - "- epitope_boolmask: boolean array the same length as seq indiciating if the AA at that position is an epitope residue\n", - "- raw_protein_id: original ID assigned to protein in BP3C50ID set\n", - "- RSA: relative solvent accessiblity of the protein at each AA, calculated by FreeSASA\n", - "- SA: absolute solvent accessibility of the protein at each AA, calculated by FreeSASA" - ] - }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "id": "47295d86", "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "shape: (358, 7)\n", - "┌───────────────┬──────────────┬───────┬──────────────┬──────────────┬──────────────┬──────────────┐\n", - "│ job_name ┆ seq ┆ train ┆ epitope_bool ┆ raw_protein_ ┆ RSA ┆ SA │\n", - "│ --- ┆ --- ┆ --- ┆ mask ┆ id ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ bool ┆ --- ┆ --- ┆ list[f64] ┆ list[f64] │\n", - "│ ┆ ┆ ┆ list[bool] ┆ str ┆ ┆ │\n", - "╞═══════════════╪══════════════╪═══════╪══════════════╪══════════════╪══════════════╪══════════════╡\n", - "│ bf2a62534941c ┆ LIQTPSSLLVQT ┆ true ┆ [false, ┆ 3b9k_B ┆ [0.205823, ┆ [36.957627, │\n", - "│ f895971e1daa3 ┆ NHTAKMSCEVKS ┆ ┆ false, … ┆ ┆ 0.471213, … ┆ 82.806331, … │\n", - "│ 3a46… ┆ ISKLTS… ┆ ┆ false] ┆ ┆ 1.00154… ┆ 152.2… │\n", - "│ d4febd28417e8 ┆ GNVDLVFLFDGS ┆ true ┆ [false, ┆ 3hi6_A ┆ [0.840245, ┆ [68.13546, │\n", - "│ a4bf6266337c7 ┆ MSLQPDEFQKIL ┆ ┆ false, … ┆ ┆ 0.294451, … ┆ 42.698276, … │\n", - "│ a2de… ┆ DFMKDV… ┆ ┆ false] ┆ ┆ 0.60539… ┆ 129.66… │\n", - "│ 17d233a2b305a ┆ DERETWSGKVDF ┆ true ┆ [false, ┆ 4xp9_C ┆ [1.100846, ┆ [157.156786, │\n", - "│ 3544cf6c164f8 ┆ LLSVIGFAVDLA ┆ ┆ false, … ┆ ┆ 1.039373, … ┆ 181.038055, │\n", - "│ ad67… ┆ NVWRFP… ┆ ┆ false] ┆ ┆ 1.03129… ┆ … 185… │\n", - "│ 34e0c5de18ccd ┆ KAMHVAQPAVVL ┆ true ┆ [true, true, ┆ 5ggv_Y ┆ [0.731129, ┆ [149.866882, │\n", - "│ 222f24d4bc9d0 ┆ ASSRGIASFVCE ┆ ┆ … false] ┆ ┆ 0.872878, … ┆ 94.934212, … │\n", - "│ f0e4… ┆ YASPGK… ┆ ┆ ┆ ┆ 1.22799… ┆ 168.… │\n", - "│ f4c930a3f1b5f ┆ GSHHHHHHGSGT ┆ true ┆ [false, ┆ 5jq6_A ┆ [1.462795, ┆ [118.618012, │\n", - "│ b78cef62c5021 ┆ DITNQLTNVTVG ┆ ┆ false, … ┆ ┆ 0.796439, … ┆ 94.250586, … │\n", - "│ adc0… ┆ IDSGTT… ┆ ┆ false] ┆ ┆ 0.90302… ┆ 119.… │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2c282aeeb8859 ┆ LDKIDLSYETTE ┆ false ┆ [false, ┆ 7jum_A ┆ [0.474081, ┆ [85.126053, │\n", - "│ 6bf1f1f99be1b ┆ SGDTAVSEDSYD ┆ ┆ false, … ┆ ┆ 0.908022, … ┆ 129.629226, │\n", - "│ b7f0… ┆ KYASQN… ┆ ┆ false] ┆ ┆ 0.98618… ┆ … 143.… │\n", - "│ 5196520df0000 ┆ TDRQLAEEYLYR ┆ false ┆ [false, ┆ 5th9_A ┆ [0.871364, ┆ [122.513787, │\n", - "│ bf1b3fafa8c0e ┆ YGYTRVASLGPA ┆ ┆ false, … ┆ ┆ 0.432033, … ┆ 61.677101, … │\n", - "│ 9ecc… ┆ LLLLQK… ┆ ┆ false] ┆ ┆ 0.25613… ┆ 54.8… │\n", - "│ 96836e4358c57 ┆ LPWLNVSADGDN ┆ false ┆ [false, ┆ 6hga_B ┆ [0.93068, ┆ [167.112849, │\n", - "│ e3f571a4f2bb8 ┆ VHLVLNVSEEQH ┆ ┆ false, … ┆ ┆ 0.110532, … ┆ 15.166135, … │\n", - "│ a8f8… ┆ FGLSLY… ┆ ┆ true] ┆ ┆ 1.220648… ┆ 223.… │\n", - "│ 9d838eec0c246 ┆ CSSPPCECHQEE ┆ false ┆ [false, ┆ 2xwt_C ┆ [0.479508, ┆ [63.390948, │\n", - "│ 55e9902a3ac12 ┆ DFRVTCKDIQRI ┆ ┆ false, … ┆ ┆ 1.059907, … ┆ 125.429402, │\n", - "│ 8a34… ┆ PSLPPS… ┆ ┆ false] ┆ ┆ 0.52811… ┆ … 74.2… │\n", - "│ cb56653d3f7b5 ┆ CSVVVGENYSIK ┆ false ┆ [false, ┆ 6vtw_A ┆ [0.628252, ┆ [83.054881, │\n", - "│ 272b787496354 ┆ CDATKCTIEDKN ┆ ┆ false, … ┆ ┆ 0.682526, … ┆ 80.770091, … │\n", - "│ 9242… ┆ RGIIKT… ┆ ┆ false] ┆ ┆ 0.63736… ┆ 113.9… │\n", - "└───────────────┴──────────────┴───────┴──────────────┴──────────────┴──────────────┴──────────────┘\n" + "/home/jsesate/miniconda3/envs/epident-experiments/lib/python3.13/site-packages/MDAnalysis/coordinates/MMCIF.py:139: UserWarning: 1 A^3 CRYST1 record, this is usually a placeholder. Unit cell dimensions will be set to None.\n" ] } ], "source": [ - "# --- Imports ---\n", + "###################\n", + "# --- Imports --- #\n", + "###################\n", "\n", - "import polars as pl\n", - "import pandas as pd\n", "import pickle\n", - "from mdaf3.AF3OutputParser import AF3Output\n", + "import sys\n", + "import os\n", "from pathlib import Path\n", + "\n", + "import polars as pl\n", + "import polars.selectors as cs\n", + "import pandas as pd\n", "import torch\n", "import numpy as np\n", - "from scipy import stats\n", + "\n", + "from mdaf3.AF3OutputParser import AF3Output\n", + "from MDAnalysis.lib import distances\n", + "import networkx as nx\n", + "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.neural_network import MLPClassifier\n", - "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.model_selection import StratifiedKFold, KFold, GridSearchCV\n", "from sklearn.preprocessing import StandardScaler \n", + "from sklearn.decomposition import PCA\n", "from sklearn.metrics import roc_auc_score\n", + "\n", "from plotnine import *\n", + "import matplotlib.pyplot as plt\n", "theme_set(theme_classic())\n", "\n", - "# --- Bepipred3 Data ---\n", + "ESM_ENCODING_DIR = Path(\"/tgen_labs/altin/esm_encodings\")\n", + "INF_DIR = Path(\"../data/bp3c50id/inference\")\n", + "\n", + "NUM_ESM_EMB_VARS = 1280\n", + "NUM_AF3_EMB_VARS = 348\n", + "\n", + "#################################\n", + "# --- Import Bepipred3 Data --- #\n", + "#################################\n", + "# - job_name: unique identifier for protein, comes from hash of seq\n", + "# - seq: amino acid sequence of protein\n", + "# - train: boolean indicating if seq is part of train set\n", + "# - epitope_boolmask: boolean array the same length as seq indiciating if the AA at that position is an epitope residue\n", + "# - raw_protein_id: original ID assigned to protein in BP3C50ID set\n", + "# - RSA: relative solvent accessiblity of the protein at each AA, calculated by FreeSASA\n", + "# - SA: absolute solvent accessibility of the protein at each AA, calculated by FreeSASA\n", "\n", "bp3 = pl.read_parquet(\"../data/bp3c50id/bp3c50id.rsa.parquet\")\n", - "bp3 = bp3.rename({\"test\" : \"train\"})\n", - "print(bp3)" - ] - }, - { - "cell_type": "markdown", - "id": "1c40a25c", - "metadata": {}, - "source": [ - "## Getting structural embeddings for a protein\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "615ee18f", - "metadata": {}, - "outputs": [], - "source": [ - "# --- AF3 Embeddings ---\n", "\n", - "INF_DIR = Path(\"../data/bp3c50id/inference\")\n", - "sample_job_name = bp3.select(\"job_name\")[0].item()\n", - "af3_output = AF3Output(INF_DIR / sample_job_name)\n", + "# train and test labels were swapped\n", + "bp3 = bp3.rename({\"test\" : \"train\"})\n", + "bp3 = bp3.with_row_index()\n", + "\n", + "########################\n", + "# --- Num Residues --- #\n", + "########################\n", + "\n", + "if \"seq_len\" not in bp3.columns:\n", + " seq_lens = []\n", + " for cols in bp3.iter_rows(named=True):\n", + " seq_len = len(cols['seq'])\n", + " seq_lens.append(seq_len)\n", + " seq_lens = pl.Series(\"seq_len\", seq_lens)\n", + " bp3.insert_column(3, seq_lens)\n", + "\n", + "#####################\n", + "# --- AF3 PTM --- #\n", + "#####################\n", + "\n", + "if \"ptms\" not in bp3.columns:\n", + " ptms = []\n", + " for cols in bp3.iter_rows(named=True):\n", + " job_name = cols['job_name']\n", + " af3_output = AF3Output(INF_DIR / job_name)\n", + " ptm = af3_output.get_summary_metrics()['ptm']\n", + " ptms.append(ptm)\n", + " ptms = pl.Series(\"ptm\", ptms)\n", + " bp3.insert_column(1, ptms)\n", + "\n", + "#####################\n", + "# --- AF3 pLDDT --- #\n", + "#####################\n", + "\n", + "if \"ptms\" not in bp3.columns:\n", + " pLDDTs = []\n", + " for cols in bp3.iter_rows(named=True):\n", + " job_name = cols['job_name']\n", + " af3_output = AF3Output(INF_DIR / job_name)\n", + " u = af3_output.get_mda_universe()\n", + " ca_atoms = u.select_atoms(\"protein and name CA\")\n", + " cur_protein_pLDDTs = []\n", + " for residue in ca_atoms:\n", + " pLDDT = residue.tempfactor\n", + " cur_protein_pLDDTs.append(pLDDT)\n", + " pLDDTs.append(cur_protein_pLDDTs)\n", + " pLDDTs = pl.Series(\"pLDDT\", pLDDTs)\n", + " bp3.insert_column(1, pLDDTs)\n", + "\n", + "##########################\n", + "# --- ESM Embeddings --- #\n", + "##########################\n", "\n", - "af3_single_embed = af3_output.get_single_embeddings()\n", - "af3_pairwise_embed = af3_output.get_pair_embeddings()\n", + "if \"esm_emb\" not in bp3.columns:\n", + " esm_embeddings = []\n", + " for cols in bp3.iter_rows(named=True):\n", + " job_name = cols['job_name']\n", + " # remove last column of embedding (sequence lengths)\n", + " embedding = torch.load(ESM_ENCODING_DIR / (job_name + \".pt\"))[:,:-1]\n", + " esm_embeddings.append(embedding)\n", + " esm_emb = pl.Series(\"esm_emb\", esm_embeddings)\n", + " bp3.insert_column(1, esm_emb)\n", + "\n", + "##########################\n", + "# --- AF3 Embeddings --- #\n", + "##########################\n", + "\n", + "if \"af_emb\" not in bp3.columns:\n", + " af_embeddings = []\n", + " for cols in bp3.iter_rows(named=True):\n", + " job_name = cols['job_name']\n", + " af3_output = AF3Output(INF_DIR / job_name)\n", + " af3_single_embed = af3_output.get_single_embeddings()\n", + " # remove alphafold embedding padding \n", + " num_tokens = cols['seq_len']\n", + " if af3_single_embed.shape[0] != num_tokens:\n", + " diff = int(af3_single_embed.shape[0] - num_tokens)\n", + " af_emb = af3_single_embed[:-diff,:]\n", + " af3_tensor = torch.from_numpy(af_emb)\n", + " af_embeddings.append(af3_tensor)\n", + " af_emb = pl.Series(\"af_emb\", af_embeddings)\n", + " bp3.insert_column(1, af_emb)\n", + " # TEMP: for some reason, row 192 has an esm embedding smaller than af3 embedding... \n", + " bp3 = bp3.filter(pl.col(\"index\") != 192).drop(\"index\")\n", + " bp3 = bp3.with_row_index()\n", + "\n", + "#########################################\n", + "# --- AF3 Protein Structure Network --- #\n", + "#########################################\n", + "# For each AF3 structure, we generalize a graph whose nodes are residues\n", + "# and whose edges are the distance between residues. Edges are only drawn when\n", + "# the probability of contact exceeds the threshold specified (0.5) AND the\n", + "# predicted alignment error falls below the cutoff (5).\n", + "\n", + "#cutoff_dist = 10 # Angstroms\n", + "cutoff_prob = 0.825 # Probability of Contact\n", + "cutoff_pae = 10 # Predicted Alignment Error\n", + "\n", + "if \"af_graph\" not in bp3.columns:\n", + " af_graphs = []\n", + " for cols in bp3.iter_rows(named=True):\n", + " job_name = cols['job_name']\n", + " af3_output = AF3Output(INF_DIR / job_name)\n", + " u = af3_output.get_mda_universe()\n", + "\n", + " ca_atoms = u.select_atoms('protein and name CA')\n", + " ca_positions = ca_atoms.positions\n", + "\n", + " dist_array_flat = distances.self_distance_array(ca_positions)\n", + " n_residues = len(ca_atoms)\n", + " distance_matrix = np.zeros((n_residues, n_residues))\n", + " triu_indices = np.triu_indices(n_residues, k=1)\n", + " distance_matrix[triu_indices] = dist_array_flat\n", + " distance_matrix.T[triu_indices] = dist_array_flat\n", + "\n", + " contact_probability_matrix = af3_output.get_contact_prob_ndarr()\n", + " pae_mtx = af3_output.get_pae_ndarr()\n", + "\n", + " cont_adj_mtx = (contact_probability_matrix >= cutoff_prob).astype(int)\n", + " pae_adj_mtx = (pae_mtx <= cutoff_pae).astype(int)\n", + " adj_mtx = distance_matrix * pae_adj_mtx * cont_adj_mtx\n", + " np.fill_diagonal(adj_mtx, 0)\n", + "\n", + " resids = ca_atoms.resids\n", + " G = nx.Graph()\n", + " for i, resid in enumerate(resids):\n", + " G.add_node(resid)\n", + " \n", + " af_graphs.append(G)\n", + "\n", + " # Iterate over the upper triangle of the adjacency matrix to find contacts (edges)\n", + " for i in range(n_residues):\n", + " for j in range(i + 1, n_residues):\n", + " if adj_mtx[i, j] > 1:\n", + " distance = distance_matrix[i, j]\n", + " G.add_edge(resids[i], resids[j], weight=distance)\n", + " af_graphs = pl.Series(\"af_graph\", af_graphs)\n", + " bp3.insert_column(1, af_graphs)\n", "\n", - "#af3_single_embed\n", - "#af3_pairwise_embed" - ] - }, - { - "cell_type": "markdown", - "id": "6769fd0f", - "metadata": {}, - "source": [ - "## Getting LM embeddings for a protein\n" + " \n", + "###############################################\n", + "# --- Analysis of AF3 Network's Structure --- #\n", + "###############################################\n", + "\n", + "closeness_centrality = []\n", + "betweenness_centrality = []\n", + "load_centrality = []\n", + "eigenvector_centrality = []\n", + "degree_centrality = []\n", + "clustering = []\n", + "coreness = []\n", + "triangles = []\n", + "density = []\n", + "lapl_n1 = []\n", + "lapl_f = []\n", + "induced_subgraphs = []\n", + "SUBGRAPH_DISTANCE_CUTOFF = 15\n", + "for cols in bp3.iter_rows(named=True):\n", + " af_graph = cols['af_graph']\n", + " closeness_centrality.append(pl.Series(list(nx.closeness_centrality(af_graph).values())))\n", + " betweenness_centrality.append(pl.Series('betweenness_centrality', list(nx.betweenness_centrality(af_graph).values())))\n", + " load_centrality.append(pl.Series('load_centrality', list(nx.load_centrality(af_graph).values())))\n", + " eigenvector_centrality.append(pl.Series('eigenvector_centrality', list(nx.eigenvector_centrality(af_graph, max_iter=10000).values())))\n", + " degree_centrality.append(pl.Series('degree_centrality', list(nx.degree_centrality(af_graph).values())))\n", + " clustering.append(pl.Series('clustering', list(nx.clustering(af_graph).values()), dtype=pl.Float64))\n", + " coreness.append(pl.Series('coreness', list(nx.core_number(af_graph).values()), dtype=pl.Float64))\n", + " triangles.append(pl.Series('triangles', list(nx.triangles(af_graph).values()), dtype=pl.Float64))\n", + " density.append(nx.density(af_graph))\n", + "\n", + " lapl_mtx = nx.laplacian_matrix(af_graph).toarray()\n", + " eigvals, _ = np.linalg.eig(lapl_mtx)\n", + " # remove near 0 eigvals to pull fielder val\n", + " eigvals = np.real(eigvals[np.abs(eigvals) >= 1e-3])\n", + " lapl_f.append(np.min(eigvals))\n", + " lapl_n1.append(np.max(eigvals))\n", + "closeness_centrality = pl.Series('closeness_centrality', closeness_centrality)\n", + "betweenness_centrality = pl.Series('betweenness_centrality', betweenness_centrality)\n", + "load_centrality = pl.Series('load_centrality', load_centrality)\n", + "eigenvector_centrality = pl.Series('eigenvector_centrality', eigenvector_centrality)\n", + "degree_centrality = pl.Series('degree_centrality', degree_centrality)\n", + "clustering = pl.Series('clustering', clustering)\n", + "coreness = pl.Series('coreness', coreness)\n", + "triangles = pl.Series('triangles', triangles)\n", + "density = pl.Series('density', density)\n", + "lapl_n1 = pl.Series(\"lapl_n1\", lapl_n1)\n", + "lapl_f = pl.Series(\"lapl_f\", lapl_f)\n", + "\n", + "graph_features = pl.DataFrame([\n", + " closeness_centrality, betweenness_centrality, load_centrality, eigenvector_centrality, degree_centrality, \n", + " clustering, coreness, triangles, density, lapl_f, lapl_n1]).with_row_index()\n", + "\n", + "if 'clustering' not in bp3.columns:\n", + " bp3 = bp3.join(graph_features, on='index', how='full').drop(['index_right'])\n", + "\n", + "##########################################\n", + "# --- Transform to Per-Residue Basis --- #\n", + "##########################################\n", + "\n", + "af_emb = []\n", + "esm_emb = []\n", + "af_graph = []\n", + "epitope = []\n", + "rsa = []\n", + "sa = []\n", + "closeness_centrality = []\n", + "betweenness_centrality = []\n", + "load_centrality = []\n", + "eigenvector_centrality = []\n", + "degree_centrality = []\n", + "clustering = []\n", + "coreness = []\n", + "triangles = []\n", + "pLDDT = []\n", + "ptm = []\n", + "job_name = []\n", + "seq = []\n", + "seq_len = []\n", + "train = []\n", + "raw_protein_id = []\n", + "density = []\n", + "lapl_f = []\n", + "lapl_n1 = []\n", + "bp3_res = bp3.drop(\"index\")\n", + "for cols in bp3.iter_rows(named=True):\n", + " # Residue Features\n", + " af_emb.extend(cols['af_emb'])\n", + " esm_emb.extend(cols['esm_emb'])\n", + " epitope.extend(cols['epitope_boolmask'])\n", + " rsa.extend(cols['RSA'])\n", + " sa.extend(cols['SA'])\n", + " closeness_centrality.extend(cols['closeness_centrality'])\n", + " betweenness_centrality.extend(cols['betweenness_centrality'])\n", + " load_centrality.extend(cols['load_centrality'])\n", + " eigenvector_centrality.extend(cols['eigenvector_centrality'])\n", + " degree_centrality.extend(cols['degree_centrality'])\n", + " clustering.extend(cols['clustering'])\n", + " coreness.extend(cols['coreness'])\n", + " triangles.extend(cols['triangles'])\n", + " pLDDT.extend(cols['pLDDT'])\n", + "\n", + " # Global Features\n", + " for repeats in range(cols['seq_len']):\n", + " af_graph.append(cols['af_graph'])\n", + " ptm.append(cols['ptm'])\n", + " job_name.append(cols['job_name'])\n", + " seq.append(cols['seq'])\n", + " seq_len.append(cols['seq_len'])\n", + " train.append(cols['train'])\n", + " raw_protein_id.append(cols['raw_protein_id'])\n", + " density.append(cols['density'])\n", + " lapl_f.append(cols['lapl_f'])\n", + " lapl_n1.append(cols['lapl_n1'])\n", + "\n", + "af_emb = pl.Series('af_emb', af_emb)\n", + "esm_emb = pl.Series('esm_emb', esm_emb)\n", + "af_graph = pl.Series('af_graph', af_graph)\n", + "epitope = pl.Series('epitope', epitope)\n", + "rsa = pl.Series('rsa', rsa)\n", + "sa = pl.Series('sa', sa)\n", + "closeness_centrality = pl.Series('closeness_centrality', closeness_centrality)\n", + "betweenness_centrality = pl.Series('betweenness_centrality', betweenness_centrality)\n", + "load_centrality = pl.Series('load_centrality', load_centrality)\n", + "eigenvector_centrality = pl.Series('eigenvector_centrality', eigenvector_centrality)\n", + "degree_centrality = pl.Series('degree_centrality', degree_centrality)\n", + "clustering = pl.Series('clustering', clustering)\n", + "coreness = pl.Series('coreness', coreness)\n", + "triangles = pl.Series('triangles', triangles)\n", + "pLDDT = pl.Series('pLDDT', pLDDT)\n", + "ptm = pl.Series('ptm', ptm)\n", + "job_name = pl.Series('job_name', job_name)\n", + "seq = pl.Series('seq', seq)\n", + "seq_len = pl.Series('seq_len', seq_len)\n", + "train = pl.Series('train', train)\n", + "raw_protein_id = pl.Series('raw_protein_id', raw_protein_id)\n", + "density = pl.Series('density', density)\n", + "lapl_f = pl.Series('lapl_f', lapl_f)\n", + "lapl_n1 = pl.Series('lapl_n1', lapl_n1)\n", + "\n", + "bp3_res = pl.DataFrame([\n", + " job_name, raw_protein_id, seq, seq_len, esm_emb, af_emb, af_graph,\n", + " closeness_centrality, betweenness_centrality, load_centrality, eigenvector_centrality,\n", + " degree_centrality, clustering, coreness, triangles, density, lapl_f, lapl_n1, \n", + " ptm, pLDDT, rsa, sa, epitope, train\n", + " ]).with_row_index()\n", + "\n", + "#####################################\n", + "# --- Explode Embedding Columns --- #\n", + "#####################################\n", + "\n", + "bp3_esm_res = bp3_res.select(\n", + " pl.col('index'),\n", + " pl.col(\"esm_emb\").map_batches(\n", + " lambda s: pl.Series(\n", + " np.stack([t.cpu().numpy() for t in s.to_list()]),\n", + " dtype=pl.List(pl.Float64)\n", + " ),\n", + " return_dtype=pl.List(pl.Float64)\n", + " )\n", + ").with_columns(\n", + " pl.col(\"esm_emb\").list.to_struct(\n", + " fields=[f\"esm_{i}\" for i in range(NUM_ESM_EMB_VARS)]\n", + " )\n", + ").unnest(\"esm_emb\")\n", + "\n", + "bp3_af3_res = bp3_res.select(\n", + " pl.col(\"index\"),\n", + " pl.col(\"af_emb\").map_batches(\n", + " lambda s: pl.Series(\n", + " np.stack([t.cpu().numpy() for t in s.to_list()]),\n", + " dtype=pl.List(pl.Float64)\n", + " ),\n", + " return_dtype=pl.List(pl.Float64)\n", + " )\n", + ").with_columns(\n", + " pl.col(\"af_emb\").list.to_struct(\n", + " fields=[f\"af3_{i}\" for i in range(NUM_AF3_EMB_VARS)]\n", + " )\n", + ").unnest(\"af_emb\")\n", + "\n", + "bp3_no_emb_res = bp3_res.drop(['af_emb', 'esm_emb'])\n", + "bp3_res = bp3_no_emb_res.join(bp3_esm_res, on='index', how='full').drop(['index_right'])\n", + "bp3_res = bp3_res.join(bp3_af3_res, on='index', how='full').drop(['index_right'])\n", + "bp3_df = bp3_res.drop('af_graph')\n" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "4e432eef", + "execution_count": null, + "id": "68686bd6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "shape: (358, 5)\n", - "┌─────────────────────┬────────────────────┬────────────────────┬────────────────────┬─────────────┐\n", - "│ esm_emb ┆ seq ┆ train_boolmask ┆ epitope_boolmask ┆ RSA │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ object ┆ str ┆ list[bool] ┆ list[bool] ┆ list[f64] │\n", - "╞═════════════════════╪════════════════════╪════════════════════╪════════════════════╪═════════════╡\n", - "│ tensor([[-7.9217e-0 ┆ LIQTPSSLLVQTNHTAKM ┆ [true, true, … ┆ [false, false, … ┆ [0.205823, │\n", - "│ 2, -8.2230e… ┆ SCEVKSISKLTS… ┆ true] ┆ false] ┆ 0.471213, … │\n", - "│ ┆ ┆ ┆ ┆ 1.00154… │\n", - "│ tensor([[ ┆ GNVDLVFLFDGSMSLQPD ┆ [true, true, … ┆ [false, false, … ┆ [0.840245, │\n", - "│ 2.8654e-01, ┆ EFQKILDFMKDV… ┆ true] ┆ false] ┆ 0.294451, … │\n", - "│ 1.0777e… ┆ ┆ ┆ ┆ 0.60539… │\n", - "│ tensor([[ ┆ DERETWSGKVDFLLSVIG ┆ [true, true, … ┆ [false, false, … ┆ [1.100846, │\n", - "│ 1.1028e-01, ┆ FAVDLANVWRFP… ┆ true] ┆ false] ┆ 1.039373, … │\n", - "│ -3.8646e… ┆ ┆ ┆ ┆ 1.03129… │\n", - "│ tensor([[-2.5486e-0 ┆ KAMHVAQPAVVLASSRGI ┆ [true, true, … ┆ [true, true, … ┆ [0.731129, │\n", - "│ 1, 5.5604e… ┆ ASFVCEYASPGK… ┆ true] ┆ false] ┆ 0.872878, … │\n", - "│ ┆ ┆ ┆ ┆ 1.22799… │\n", - "│ tensor([[-1.5629e-0 ┆ GSHHHHHHGSGTDITNQL ┆ [true, true, … ┆ [false, false, … ┆ [1.462795, │\n", - "│ 1, 7.9395e… ┆ TNVTVGIDSGTT… ┆ true] ┆ false] ┆ 0.796439, … │\n", - "│ ┆ ┆ ┆ ┆ 0.90302… │\n", - "│ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ tensor([[-5.8743e-0 ┆ LDKIDLSYETTESGDTAV ┆ [false, false, … ┆ [false, false, … ┆ [0.474081, │\n", - "│ 2, -4.0843e… ┆ SEDSYDKYASQN… ┆ false] ┆ false] ┆ 0.908022, … │\n", - "│ ┆ ┆ ┆ ┆ 0.98618… │\n", - "│ tensor([[ ┆ TDRQLAEEYLYRYGYTRV ┆ [false, false, … ┆ [false, false, … ┆ [0.871364, │\n", - "│ 4.7727e-03, ┆ ASLGPALLLLQK… ┆ false] ┆ false] ┆ 0.432033, … │\n", - "│ 1.5819e… ┆ ┆ ┆ ┆ 0.25613… │\n", - "│ tensor([[ ┆ LPWLNVSADGDNVHLVLN ┆ [false, false, … ┆ [false, false, … ┆ [0.93068, │\n", - "│ 1.3285e-01, ┆ VSEEQHFGLSLY… ┆ false] ┆ true] ┆ 0.110532, … │\n", - "│ 7.9789e… ┆ ┆ ┆ ┆ 1.220648… │\n", - "│ tensor([[ ┆ CSSPPCECHQEEDFRVTC ┆ [false, false, … ┆ [false, false, … ┆ [0.479508, │\n", - "│ 1.1656e-01, ┆ KDIQRIPSLPPS… ┆ false] ┆ false] ┆ 1.059907, … │\n", - "│ 1.4177e… ┆ ┆ ┆ ┆ 0.52811… │\n", - "│ tensor([[ ┆ CSVVVGENYSIKCDATKC ┆ [false, false, … ┆ [false, false, … ┆ [0.628252, │\n", - "│ 6.2200e-02, ┆ TIEDKNRGIIKT… ┆ false] ┆ false] ┆ 0.682526, … │\n", - "│ 1.6793e… ┆ ┆ ┆ ┆ 0.63736… │\n", - "└─────────────────────┴────────────────────┴────────────────────┴────────────────────┴─────────────┘\n" + "Num Epitope Residues: 10811\n", + "Num Non-Epitope Residues: 70702\n", + "Column Names: ['index', 'job_name', 'raw_protein_id', 'seq', 'seq_len', 'closeness_centrality', 'betweenness_centrality', 'load_centrality', 'eigenvector_centrality', 'degree_centrality', 'clustering', 'coreness', 'triangles', 'density', 'lapl_f', 'lapl_n1', 'ptm', 'pLDDT', 'rsa', 'sa', 'epitope', 'train', 'esm_0', 'esm_1', 'esm_2', 'esm_3', 'esm_4', 'esm_5', 'esm_6', 'esm_7', 'esm_8', 'esm_9', 'esm_10', 'esm_11', 'esm_12', 'esm_13', 'esm_14', 'esm_15', 'esm_16', 'esm_17', 'esm_18', 'esm_19', 'esm_20', 'esm_21', 'esm_22', 'esm_23', 'esm_24', 'esm_25', 'esm_26', 'esm_27', 'esm_28', 'esm_29', 'esm_30', 'esm_31', 'esm_32', 'esm_33', 'esm_34', 'esm_35', 'esm_36', 'esm_37', 'esm_38', 'esm_39', 'esm_40', 'esm_41', 'esm_42', 'esm_43', 'esm_44', 'esm_45', 'esm_46', 'esm_47', 'esm_48', 'esm_49', 'esm_50', 'esm_51', 'esm_52', 'esm_53', 'esm_54', 'esm_55', 'esm_56', 'esm_57', 'esm_58', 'esm_59', 'esm_60', 'esm_61', 'esm_62', 'esm_63', 'esm_64', 'esm_65', 'esm_66', 'esm_67', 'esm_68', 'esm_69', 'esm_70', 'esm_71', 'esm_72', 'esm_73', 'esm_74', 'esm_75', 'esm_76', 'esm_77', 'esm_78', 'esm_79', 'esm_80', 'esm_81', 'esm_82', 'esm_83', 'esm_84', 'esm_85', 'esm_86', 'esm_87', 'esm_88', 'esm_89', 'esm_90', 'esm_91', 'esm_92', 'esm_93', 'esm_94', 'esm_95', 'esm_96', 'esm_97', 'esm_98', 'esm_99', 'esm_100', 'esm_101', 'esm_102', 'esm_103', 'esm_104', 'esm_105', 'esm_106', 'esm_107', 'esm_108', 'esm_109', 'esm_110', 'esm_111', 'esm_112', 'esm_113', 'esm_114', 'esm_115', 'esm_116', 'esm_117', 'esm_118', 'esm_119', 'esm_120', 'esm_121', 'esm_122', 'esm_123', 'esm_124', 'esm_125', 'esm_126', 'esm_127', 'esm_128', 'esm_129', 'esm_130', 'esm_131', 'esm_132', 'esm_133', 'esm_134', 'esm_135', 'esm_136', 'esm_137', 'esm_138', 'esm_139', 'esm_140', 'esm_141', 'esm_142', 'esm_143', 'esm_144', 'esm_145', 'esm_146', 'esm_147', 'esm_148', 'esm_149', 'esm_150', 'esm_151', 'esm_152', 'esm_153', 'esm_154', 'esm_155', 'esm_156', 'esm_157', 'esm_158', 'esm_159', 'esm_160', 'esm_161', 'esm_162', 'esm_163', 'esm_164', 'esm_165', 'esm_166', 'esm_167', 'esm_168', 'esm_169', 'esm_170', 'esm_171', 'esm_172', 'esm_173', 'esm_174', 'esm_175', 'esm_176', 'esm_177', 'esm_178', 'esm_179', 'esm_180', 'esm_181', 'esm_182', 'esm_183', 'esm_184', 'esm_185', 'esm_186', 'esm_187', 'esm_188', 'esm_189', 'esm_190', 'esm_191', 'esm_192', 'esm_193', 'esm_194', 'esm_195', 'esm_196', 'esm_197', 'esm_198', 'esm_199', 'esm_200', 'esm_201', 'esm_202', 'esm_203', 'esm_204', 'esm_205', 'esm_206', 'esm_207', 'esm_208', 'esm_209', 'esm_210', 'esm_211', 'esm_212', 'esm_213', 'esm_214', 'esm_215', 'esm_216', 'esm_217', 'esm_218', 'esm_219', 'esm_220', 'esm_221', 'esm_222', 'esm_223', 'esm_224', 'esm_225', 'esm_226', 'esm_227', 'esm_228', 'esm_229', 'esm_230', 'esm_231', 'esm_232', 'esm_233', 'esm_234', 'esm_235', 'esm_236', 'esm_237', 'esm_238', 'esm_239', 'esm_240', 'esm_241', 'esm_242', 'esm_243', 'esm_244', 'esm_245', 'esm_246', 'esm_247', 'esm_248', 'esm_249', 'esm_250', 'esm_251', 'esm_252', 'esm_253', 'esm_254', 'esm_255', 'esm_256', 'esm_257', 'esm_258', 'esm_259', 'esm_260', 'esm_261', 'esm_262', 'esm_263', 'esm_264', 'esm_265', 'esm_266', 'esm_267', 'esm_268', 'esm_269', 'esm_270', 'esm_271', 'esm_272', 'esm_273', 'esm_274', 'esm_275', 'esm_276', 'esm_277', 'esm_278', 'esm_279', 'esm_280', 'esm_281', 'esm_282', 'esm_283', 'esm_284', 'esm_285', 'esm_286', 'esm_287', 'esm_288', 'esm_289', 'esm_290', 'esm_291', 'esm_292', 'esm_293', 'esm_294', 'esm_295', 'esm_296', 'esm_297', 'esm_298', 'esm_299', 'esm_300', 'esm_301', 'esm_302', 'esm_303', 'esm_304', 'esm_305', 'esm_306', 'esm_307', 'esm_308', 'esm_309', 'esm_310', 'esm_311', 'esm_312', 'esm_313', 'esm_314', 'esm_315', 'esm_316', 'esm_317', 'esm_318', 'esm_319', 'esm_320', 'esm_321', 'esm_322', 'esm_323', 'esm_324', 'esm_325', 'esm_326', 'esm_327', 'esm_328', 'esm_329', 'esm_330', 'esm_331', 'esm_332', 'esm_333', 'esm_334', 'esm_335', 'esm_336', 'esm_337', 'esm_338', 'esm_339', 'esm_340', 'esm_341', 'esm_342', 'esm_343', 'esm_344', 'esm_345', 'esm_346', 'esm_347', 'esm_348', 'esm_349', 'esm_350', 'esm_351', 'esm_352', 'esm_353', 'esm_354', 'esm_355', 'esm_356', 'esm_357', 'esm_358', 'esm_359', 'esm_360', 'esm_361', 'esm_362', 'esm_363', 'esm_364', 'esm_365', 'esm_366', 'esm_367', 'esm_368', 'esm_369', 'esm_370', 'esm_371', 'esm_372', 'esm_373', 'esm_374', 'esm_375', 'esm_376', 'esm_377', 'esm_378', 'esm_379', 'esm_380', 'esm_381', 'esm_382', 'esm_383', 'esm_384', 'esm_385', 'esm_386', 'esm_387', 'esm_388', 'esm_389', 'esm_390', 'esm_391', 'esm_392', 'esm_393', 'esm_394', 'esm_395', 'esm_396', 'esm_397', 'esm_398', 'esm_399', 'esm_400', 'esm_401', 'esm_402', 'esm_403', 'esm_404', 'esm_405', 'esm_406', 'esm_407', 'esm_408', 'esm_409', 'esm_410', 'esm_411', 'esm_412', 'esm_413', 'esm_414', 'esm_415', 'esm_416', 'esm_417', 'esm_418', 'esm_419', 'esm_420', 'esm_421', 'esm_422', 'esm_423', 'esm_424', 'esm_425', 'esm_426', 'esm_427', 'esm_428', 'esm_429', 'esm_430', 'esm_431', 'esm_432', 'esm_433', 'esm_434', 'esm_435', 'esm_436', 'esm_437', 'esm_438', 'esm_439', 'esm_440', 'esm_441', 'esm_442', 'esm_443', 'esm_444', 'esm_445', 'esm_446', 'esm_447', 'esm_448', 'esm_449', 'esm_450', 'esm_451', 'esm_452', 'esm_453', 'esm_454', 'esm_455', 'esm_456', 'esm_457', 'esm_458', 'esm_459', 'esm_460', 'esm_461', 'esm_462', 'esm_463', 'esm_464', 'esm_465', 'esm_466', 'esm_467', 'esm_468', 'esm_469', 'esm_470', 'esm_471', 'esm_472', 'esm_473', 'esm_474', 'esm_475', 'esm_476', 'esm_477', 'esm_478', 'esm_479', 'esm_480', 'esm_481', 'esm_482', 'esm_483', 'esm_484', 'esm_485', 'esm_486', 'esm_487', 'esm_488', 'esm_489', 'esm_490', 'esm_491', 'esm_492', 'esm_493', 'esm_494', 'esm_495', 'esm_496', 'esm_497', 'esm_498', 'esm_499', 'esm_500', 'esm_501', 'esm_502', 'esm_503', 'esm_504', 'esm_505', 'esm_506', 'esm_507', 'esm_508', 'esm_509', 'esm_510', 'esm_511', 'esm_512', 'esm_513', 'esm_514', 'esm_515', 'esm_516', 'esm_517', 'esm_518', 'esm_519', 'esm_520', 'esm_521', 'esm_522', 'esm_523', 'esm_524', 'esm_525', 'esm_526', 'esm_527', 'esm_528', 'esm_529', 'esm_530', 'esm_531', 'esm_532', 'esm_533', 'esm_534', 'esm_535', 'esm_536', 'esm_537', 'esm_538', 'esm_539', 'esm_540', 'esm_541', 'esm_542', 'esm_543', 'esm_544', 'esm_545', 'esm_546', 'esm_547', 'esm_548', 'esm_549', 'esm_550', 'esm_551', 'esm_552', 'esm_553', 'esm_554', 'esm_555', 'esm_556', 'esm_557', 'esm_558', 'esm_559', 'esm_560', 'esm_561', 'esm_562', 'esm_563', 'esm_564', 'esm_565', 'esm_566', 'esm_567', 'esm_568', 'esm_569', 'esm_570', 'esm_571', 'esm_572', 'esm_573', 'esm_574', 'esm_575', 'esm_576', 'esm_577', 'esm_578', 'esm_579', 'esm_580', 'esm_581', 'esm_582', 'esm_583', 'esm_584', 'esm_585', 'esm_586', 'esm_587', 'esm_588', 'esm_589', 'esm_590', 'esm_591', 'esm_592', 'esm_593', 'esm_594', 'esm_595', 'esm_596', 'esm_597', 'esm_598', 'esm_599', 'esm_600', 'esm_601', 'esm_602', 'esm_603', 'esm_604', 'esm_605', 'esm_606', 'esm_607', 'esm_608', 'esm_609', 'esm_610', 'esm_611', 'esm_612', 'esm_613', 'esm_614', 'esm_615', 'esm_616', 'esm_617', 'esm_618', 'esm_619', 'esm_620', 'esm_621', 'esm_622', 'esm_623', 'esm_624', 'esm_625', 'esm_626', 'esm_627', 'esm_628', 'esm_629', 'esm_630', 'esm_631', 'esm_632', 'esm_633', 'esm_634', 'esm_635', 'esm_636', 'esm_637', 'esm_638', 'esm_639', 'esm_640', 'esm_641', 'esm_642', 'esm_643', 'esm_644', 'esm_645', 'esm_646', 'esm_647', 'esm_648', 'esm_649', 'esm_650', 'esm_651', 'esm_652', 'esm_653', 'esm_654', 'esm_655', 'esm_656', 'esm_657', 'esm_658', 'esm_659', 'esm_660', 'esm_661', 'esm_662', 'esm_663', 'esm_664', 'esm_665', 'esm_666', 'esm_667', 'esm_668', 'esm_669', 'esm_670', 'esm_671', 'esm_672', 'esm_673', 'esm_674', 'esm_675', 'esm_676', 'esm_677', 'esm_678', 'esm_679', 'esm_680', 'esm_681', 'esm_682', 'esm_683', 'esm_684', 'esm_685', 'esm_686', 'esm_687', 'esm_688', 'esm_689', 'esm_690', 'esm_691', 'esm_692', 'esm_693', 'esm_694', 'esm_695', 'esm_696', 'esm_697', 'esm_698', 'esm_699', 'esm_700', 'esm_701', 'esm_702', 'esm_703', 'esm_704', 'esm_705', 'esm_706', 'esm_707', 'esm_708', 'esm_709', 'esm_710', 'esm_711', 'esm_712', 'esm_713', 'esm_714', 'esm_715', 'esm_716', 'esm_717', 'esm_718', 'esm_719', 'esm_720', 'esm_721', 'esm_722', 'esm_723', 'esm_724', 'esm_725', 'esm_726', 'esm_727', 'esm_728', 'esm_729', 'esm_730', 'esm_731', 'esm_732', 'esm_733', 'esm_734', 'esm_735', 'esm_736', 'esm_737', 'esm_738', 'esm_739', 'esm_740', 'esm_741', 'esm_742', 'esm_743', 'esm_744', 'esm_745', 'esm_746', 'esm_747', 'esm_748', 'esm_749', 'esm_750', 'esm_751', 'esm_752', 'esm_753', 'esm_754', 'esm_755', 'esm_756', 'esm_757', 'esm_758', 'esm_759', 'esm_760', 'esm_761', 'esm_762', 'esm_763', 'esm_764', 'esm_765', 'esm_766', 'esm_767', 'esm_768', 'esm_769', 'esm_770', 'esm_771', 'esm_772', 'esm_773', 'esm_774', 'esm_775', 'esm_776', 'esm_777', 'esm_778', 'esm_779', 'esm_780', 'esm_781', 'esm_782', 'esm_783', 'esm_784', 'esm_785', 'esm_786', 'esm_787', 'esm_788', 'esm_789', 'esm_790', 'esm_791', 'esm_792', 'esm_793', 'esm_794', 'esm_795', 'esm_796', 'esm_797', 'esm_798', 'esm_799', 'esm_800', 'esm_801', 'esm_802', 'esm_803', 'esm_804', 'esm_805', 'esm_806', 'esm_807', 'esm_808', 'esm_809', 'esm_810', 'esm_811', 'esm_812', 'esm_813', 'esm_814', 'esm_815', 'esm_816', 'esm_817', 'esm_818', 'esm_819', 'esm_820', 'esm_821', 'esm_822', 'esm_823', 'esm_824', 'esm_825', 'esm_826', 'esm_827', 'esm_828', 'esm_829', 'esm_830', 'esm_831', 'esm_832', 'esm_833', 'esm_834', 'esm_835', 'esm_836', 'esm_837', 'esm_838', 'esm_839', 'esm_840', 'esm_841', 'esm_842', 'esm_843', 'esm_844', 'esm_845', 'esm_846', 'esm_847', 'esm_848', 'esm_849', 'esm_850', 'esm_851', 'esm_852', 'esm_853', 'esm_854', 'esm_855', 'esm_856', 'esm_857', 'esm_858', 'esm_859', 'esm_860', 'esm_861', 'esm_862', 'esm_863', 'esm_864', 'esm_865', 'esm_866', 'esm_867', 'esm_868', 'esm_869', 'esm_870', 'esm_871', 'esm_872', 'esm_873', 'esm_874', 'esm_875', 'esm_876', 'esm_877', 'esm_878', 'esm_879', 'esm_880', 'esm_881', 'esm_882', 'esm_883', 'esm_884', 'esm_885', 'esm_886', 'esm_887', 'esm_888', 'esm_889', 'esm_890', 'esm_891', 'esm_892', 'esm_893', 'esm_894', 'esm_895', 'esm_896', 'esm_897', 'esm_898', 'esm_899', 'esm_900', 'esm_901', 'esm_902', 'esm_903', 'esm_904', 'esm_905', 'esm_906', 'esm_907', 'esm_908', 'esm_909', 'esm_910', 'esm_911', 'esm_912', 'esm_913', 'esm_914', 'esm_915', 'esm_916', 'esm_917', 'esm_918', 'esm_919', 'esm_920', 'esm_921', 'esm_922', 'esm_923', 'esm_924', 'esm_925', 'esm_926', 'esm_927', 'esm_928', 'esm_929', 'esm_930', 'esm_931', 'esm_932', 'esm_933', 'esm_934', 'esm_935', 'esm_936', 'esm_937', 'esm_938', 'esm_939', 'esm_940', 'esm_941', 'esm_942', 'esm_943', 'esm_944', 'esm_945', 'esm_946', 'esm_947', 'esm_948', 'esm_949', 'esm_950', 'esm_951', 'esm_952', 'esm_953', 'esm_954', 'esm_955', 'esm_956', 'esm_957', 'esm_958', 'esm_959', 'esm_960', 'esm_961', 'esm_962', 'esm_963', 'esm_964', 'esm_965', 'esm_966', 'esm_967', 'esm_968', 'esm_969', 'esm_970', 'esm_971', 'esm_972', 'esm_973', 'esm_974', 'esm_975', 'esm_976', 'esm_977', 'esm_978', 'esm_979', 'esm_980', 'esm_981', 'esm_982', 'esm_983', 'esm_984', 'esm_985', 'esm_986', 'esm_987', 'esm_988', 'esm_989', 'esm_990', 'esm_991', 'esm_992', 'esm_993', 'esm_994', 'esm_995', 'esm_996', 'esm_997', 'esm_998', 'esm_999', 'esm_1000', 'esm_1001', 'esm_1002', 'esm_1003', 'esm_1004', 'esm_1005', 'esm_1006', 'esm_1007', 'esm_1008', 'esm_1009', 'esm_1010', 'esm_1011', 'esm_1012', 'esm_1013', 'esm_1014', 'esm_1015', 'esm_1016', 'esm_1017', 'esm_1018', 'esm_1019', 'esm_1020', 'esm_1021', 'esm_1022', 'esm_1023', 'esm_1024', 'esm_1025', 'esm_1026', 'esm_1027', 'esm_1028', 'esm_1029', 'esm_1030', 'esm_1031', 'esm_1032', 'esm_1033', 'esm_1034', 'esm_1035', 'esm_1036', 'esm_1037', 'esm_1038', 'esm_1039', 'esm_1040', 'esm_1041', 'esm_1042', 'esm_1043', 'esm_1044', 'esm_1045', 'esm_1046', 'esm_1047', 'esm_1048', 'esm_1049', 'esm_1050', 'esm_1051', 'esm_1052', 'esm_1053', 'esm_1054', 'esm_1055', 'esm_1056', 'esm_1057', 'esm_1058', 'esm_1059', 'esm_1060', 'esm_1061', 'esm_1062', 'esm_1063', 'esm_1064', 'esm_1065', 'esm_1066', 'esm_1067', 'esm_1068', 'esm_1069', 'esm_1070', 'esm_1071', 'esm_1072', 'esm_1073', 'esm_1074', 'esm_1075', 'esm_1076', 'esm_1077', 'esm_1078', 'esm_1079', 'esm_1080', 'esm_1081', 'esm_1082', 'esm_1083', 'esm_1084', 'esm_1085', 'esm_1086', 'esm_1087', 'esm_1088', 'esm_1089', 'esm_1090', 'esm_1091', 'esm_1092', 'esm_1093', 'esm_1094', 'esm_1095', 'esm_1096', 'esm_1097', 'esm_1098', 'esm_1099', 'esm_1100', 'esm_1101', 'esm_1102', 'esm_1103', 'esm_1104', 'esm_1105', 'esm_1106', 'esm_1107', 'esm_1108', 'esm_1109', 'esm_1110', 'esm_1111', 'esm_1112', 'esm_1113', 'esm_1114', 'esm_1115', 'esm_1116', 'esm_1117', 'esm_1118', 'esm_1119', 'esm_1120', 'esm_1121', 'esm_1122', 'esm_1123', 'esm_1124', 'esm_1125', 'esm_1126', 'esm_1127', 'esm_1128', 'esm_1129', 'esm_1130', 'esm_1131', 'esm_1132', 'esm_1133', 'esm_1134', 'esm_1135', 'esm_1136', 'esm_1137', 'esm_1138', 'esm_1139', 'esm_1140', 'esm_1141', 'esm_1142', 'esm_1143', 'esm_1144', 'esm_1145', 'esm_1146', 'esm_1147', 'esm_1148', 'esm_1149', 'esm_1150', 'esm_1151', 'esm_1152', 'esm_1153', 'esm_1154', 'esm_1155', 'esm_1156', 'esm_1157', 'esm_1158', 'esm_1159', 'esm_1160', 'esm_1161', 'esm_1162', 'esm_1163', 'esm_1164', 'esm_1165', 'esm_1166', 'esm_1167', 'esm_1168', 'esm_1169', 'esm_1170', 'esm_1171', 'esm_1172', 'esm_1173', 'esm_1174', 'esm_1175', 'esm_1176', 'esm_1177', 'esm_1178', 'esm_1179', 'esm_1180', 'esm_1181', 'esm_1182', 'esm_1183', 'esm_1184', 'esm_1185', 'esm_1186', 'esm_1187', 'esm_1188', 'esm_1189', 'esm_1190', 'esm_1191', 'esm_1192', 'esm_1193', 'esm_1194', 'esm_1195', 'esm_1196', 'esm_1197', 'esm_1198', 'esm_1199', 'esm_1200', 'esm_1201', 'esm_1202', 'esm_1203', 'esm_1204', 'esm_1205', 'esm_1206', 'esm_1207', 'esm_1208', 'esm_1209', 'esm_1210', 'esm_1211', 'esm_1212', 'esm_1213', 'esm_1214', 'esm_1215', 'esm_1216', 'esm_1217', 'esm_1218', 'esm_1219', 'esm_1220', 'esm_1221', 'esm_1222', 'esm_1223', 'esm_1224', 'esm_1225', 'esm_1226', 'esm_1227', 'esm_1228', 'esm_1229', 'esm_1230', 'esm_1231', 'esm_1232', 'esm_1233', 'esm_1234', 'esm_1235', 'esm_1236', 'esm_1237', 'esm_1238', 'esm_1239', 'esm_1240', 'esm_1241', 'esm_1242', 'esm_1243', 'esm_1244', 'esm_1245', 'esm_1246', 'esm_1247', 'esm_1248', 'esm_1249', 'esm_1250', 'esm_1251', 'esm_1252', 'esm_1253', 'esm_1254', 'esm_1255', 'esm_1256', 'esm_1257', 'esm_1258', 'esm_1259', 'esm_1260', 'esm_1261', 'esm_1262', 'esm_1263', 'esm_1264', 'esm_1265', 'esm_1266', 'esm_1267', 'esm_1268', 'esm_1269', 'esm_1270', 'esm_1271', 'esm_1272', 'esm_1273', 'esm_1274', 'esm_1275', 'esm_1276', 'esm_1277', 'esm_1278', 'esm_1279', 'af3_0', 'af3_1', 'af3_2', 'af3_3', 'af3_4', 'af3_5', 'af3_6', 'af3_7', 'af3_8', 'af3_9', 'af3_10', 'af3_11', 'af3_12', 'af3_13', 'af3_14', 'af3_15', 'af3_16', 'af3_17', 'af3_18', 'af3_19', 'af3_20', 'af3_21', 'af3_22', 'af3_23', 'af3_24', 'af3_25', 'af3_26', 'af3_27', 'af3_28', 'af3_29', 'af3_30', 'af3_31', 'af3_32', 'af3_33', 'af3_34', 'af3_35', 'af3_36', 'af3_37', 'af3_38', 'af3_39', 'af3_40', 'af3_41', 'af3_42', 'af3_43', 'af3_44', 'af3_45', 'af3_46', 'af3_47', 'af3_48', 'af3_49', 'af3_50', 'af3_51', 'af3_52', 'af3_53', 'af3_54', 'af3_55', 'af3_56', 'af3_57', 'af3_58', 'af3_59', 'af3_60', 'af3_61', 'af3_62', 'af3_63', 'af3_64', 'af3_65', 'af3_66', 'af3_67', 'af3_68', 'af3_69', 'af3_70', 'af3_71', 'af3_72', 'af3_73', 'af3_74', 'af3_75', 'af3_76', 'af3_77', 'af3_78', 'af3_79', 'af3_80', 'af3_81', 'af3_82', 'af3_83', 'af3_84', 'af3_85', 'af3_86', 'af3_87', 'af3_88', 'af3_89', 'af3_90', 'af3_91', 'af3_92', 'af3_93', 'af3_94', 'af3_95', 'af3_96', 'af3_97', 'af3_98', 'af3_99', 'af3_100', 'af3_101', 'af3_102', 'af3_103', 'af3_104', 'af3_105', 'af3_106', 'af3_107', 'af3_108', 'af3_109', 'af3_110', 'af3_111', 'af3_112', 'af3_113', 'af3_114', 'af3_115', 'af3_116', 'af3_117', 'af3_118', 'af3_119', 'af3_120', 'af3_121', 'af3_122', 'af3_123', 'af3_124', 'af3_125', 'af3_126', 'af3_127', 'af3_128', 'af3_129', 'af3_130', 'af3_131', 'af3_132', 'af3_133', 'af3_134', 'af3_135', 'af3_136', 'af3_137', 'af3_138', 'af3_139', 'af3_140', 'af3_141', 'af3_142', 'af3_143', 'af3_144', 'af3_145', 'af3_146', 'af3_147', 'af3_148', 'af3_149', 'af3_150', 'af3_151', 'af3_152', 'af3_153', 'af3_154', 'af3_155', 'af3_156', 'af3_157', 'af3_158', 'af3_159', 'af3_160', 'af3_161', 'af3_162', 'af3_163', 'af3_164', 'af3_165', 'af3_166', 'af3_167', 'af3_168', 'af3_169', 'af3_170', 'af3_171', 'af3_172', 'af3_173', 'af3_174', 'af3_175', 'af3_176', 'af3_177', 'af3_178', 'af3_179', 'af3_180', 'af3_181', 'af3_182', 'af3_183', 'af3_184', 'af3_185', 'af3_186', 'af3_187', 'af3_188', 'af3_189', 'af3_190', 'af3_191', 'af3_192', 'af3_193', 'af3_194', 'af3_195', 'af3_196', 'af3_197', 'af3_198', 'af3_199', 'af3_200', 'af3_201', 'af3_202', 'af3_203', 'af3_204', 'af3_205', 'af3_206', 'af3_207', 'af3_208', 'af3_209', 'af3_210', 'af3_211', 'af3_212', 'af3_213', 'af3_214', 'af3_215', 'af3_216', 'af3_217', 'af3_218', 'af3_219', 'af3_220', 'af3_221', 'af3_222', 'af3_223', 'af3_224', 'af3_225', 'af3_226', 'af3_227', 'af3_228', 'af3_229', 'af3_230', 'af3_231', 'af3_232', 'af3_233', 'af3_234', 'af3_235', 'af3_236', 'af3_237', 'af3_238', 'af3_239', 'af3_240', 'af3_241', 'af3_242', 'af3_243', 'af3_244', 'af3_245', 'af3_246', 'af3_247', 'af3_248', 'af3_249', 'af3_250', 'af3_251', 'af3_252', 'af3_253', 'af3_254', 'af3_255', 'af3_256', 'af3_257', 'af3_258', 'af3_259', 'af3_260', 'af3_261', 'af3_262', 'af3_263', 'af3_264', 'af3_265', 'af3_266', 'af3_267', 'af3_268', 'af3_269', 'af3_270', 'af3_271', 'af3_272', 'af3_273', 'af3_274', 'af3_275', 'af3_276', 'af3_277', 'af3_278', 'af3_279', 'af3_280', 'af3_281', 'af3_282', 'af3_283', 'af3_284', 'af3_285', 'af3_286', 'af3_287', 'af3_288', 'af3_289', 'af3_290', 'af3_291', 'af3_292', 'af3_293', 'af3_294', 'af3_295', 'af3_296', 'af3_297', 'af3_298', 'af3_299', 'af3_300', 'af3_301', 'af3_302', 'af3_303', 'af3_304', 'af3_305', 'af3_306', 'af3_307', 'af3_308', 'af3_309', 'af3_310', 'af3_311', 'af3_312', 'af3_313', 'af3_314', 'af3_315', 'af3_316', 'af3_317', 'af3_318', 'af3_319', 'af3_320', 'af3_321', 'af3_322', 'af3_323', 'af3_324', 'af3_325', 'af3_326', 'af3_327', 'af3_328', 'af3_329', 'af3_330', 'af3_331', 'af3_332', 'af3_333', 'af3_334', 'af3_335', 'af3_336', 'af3_337', 'af3_338', 'af3_339', 'af3_340', 'af3_341', 'af3_342', 'af3_343', 'af3_344', 'af3_345', 'af3_346', 'af3_347']\n" ] } ], "source": [ - "# --- ESM Embeddings ---\n", + "##########################\n", + "# --- Save DataFrame --- #\n", + "##########################\n", "\n", - "ESM_ENCODING_DIR = Path(\"/tgen_labs/altin/esm_encodings\")\n", - "esm_2_embed = torch.load(ESM_ENCODING_DIR / (sample_job_name + \".pt\")).tolist()\n", + "#with open('bp3_pae10.pkl', 'wb') as f:\n", + "# pickle.dump(bp3_df, f)\n", "\n", - "if \"esm_emb\" not in bp3.columns:\n", - " esm_embeddings = []\n", - " for job_num in range(bp3.shape[0]):\n", - " job_name = bp3.select(\"job_name\")[job_num].item()\n", - " esm_embeddings.append(torch.load(ESM_ENCODING_DIR / (job_name + \".pt\")))\n", - " esm_emb = pl.Series(\"esm_emb\", esm_embeddings)\n", - " bp3.insert_column(0, esm_emb)\n", - " bp3 = bp3.drop(['job_name', \"raw_protein_id\", 'SA'])\n", - "\n", - "if \"train_boolmask\" not in bp3:\n", - " train_boolmask = []\n", - " for (esm_emb, seq, train, epitope_boolmask, rsa) in bp3.iter_rows():\n", - " train_bools = []\n", - " for num in range(len(seq)):\n", - " train_bools.append(train)\n", - " train_boolmask.append(train_bools)\n", - " train_boolmask = pl.Series(\"train_boolmask\", train_boolmask)\n", - " bp3.insert_column(2, train_boolmask)\n", - " bp3 = bp3.drop(['train'])\n", - " \n", - "print(bp3)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "7d3709f2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (84_439, 6)
indexembeddingsresiduestrain_boolsepitope_boolsrsa_vals
u32list[f64]strboolboolf64
0[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
1[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
2[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
3[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
4[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
84434[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
84435[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
84436[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
84437[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
84438[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" - ], - "text/plain": [ - "shape: (84_439, 6)\n", - "┌───────┬────────────────────────────────┬──────────┬─────────────┬───────────────┬──────────┐\n", - "│ index ┆ embeddings ┆ residues ┆ train_bools ┆ epitope_bools ┆ rsa_vals │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ u32 ┆ list[f64] ┆ str ┆ bool ┆ bool ┆ f64 │\n", - "╞═══════╪════════════════════════════════╪══════════╪═════════════╪═══════════════╪══════════╡\n", - "│ 0 ┆ [-0.079217, -0.08223, … 117.0] ┆ L ┆ true ┆ false ┆ 0.205823 │\n", - "│ 1 ┆ [0.271906, 0.131599, … 117.0] ┆ I ┆ true ┆ false ┆ 0.471213 │\n", - "│ 2 ┆ [0.075211, -0.124738, … 117.0] ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", - "│ 3 ┆ [0.033206, 0.13658, … 117.0] ┆ T ┆ true ┆ false ┆ 0.437416 │\n", - "│ 4 ┆ [-0.153488, 0.178101, … 117.0] ┆ P ┆ true ┆ false ┆ 0.312792 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 84434 ┆ [-0.102616, 0.023357, … 47.0] ┆ V ┆ false ┆ false ┆ 0.09529 │\n", - "│ 84435 ┆ [0.006365, -0.054578, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", - "│ 84436 ┆ [-0.021138, 0.060409, … 47.0] ┆ K ┆ false ┆ false ┆ 0.883928 │\n", - "│ 84437 ┆ [-0.013476, 0.081914, … 47.0] ┆ A ┆ false ┆ false ┆ 0.828726 │\n", - "│ 84438 ┆ [-0.079727, 0.132829, … 47.0] ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", - "└───────┴────────────────────────────────┴──────────┴─────────────┴───────────────┴──────────┘" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# --- Transform to Per-Residue Basis ---\n", - " \n", - "embs = []\n", - "residues = []\n", - "train_bools = []\n", - "epitope_bools = []\n", - "rsa_vals = []\n", - "for (esm_emb, seq, train_boolmask, epitope_boolmask, rsa) in bp3.iter_rows():\n", - " embs.extend(esm_emb.tolist())\n", - " residues.extend(seq)\n", - " train_bools.extend(train_boolmask)\n", - " epitope_bools.extend(epitope_boolmask)\n", - " rsa_vals.extend(rsa)\n", - "\n", - "data = {\n", - " \"embeddings\" : embs,\n", - " \"residues\" : residues,\n", - " \"train_bools\" : train_bools,\n", - " \"epitope_bools\" : epitope_bools,\n", - " \"rsa_vals\" : rsa_vals\n", - "}\n", - "\n", - "bp3_res = pl.DataFrame(data).with_row_index()\n", - "bp3_res\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "1930cc74", - "metadata": {}, - "outputs": [], - "source": [ - "var_list = []\n", - "for var in range(len(bp3_res.select(\"embeddings\")[0].item().to_list())):\n", - " var_list.append(\"var\" + str(var))\n", + "with open(\"bp3_pae10.pkl\", 'rb') as f:\n", + " bp3_all_df = pickle.load(f)\n", + "bp3_train_df = bp3_all_df.filter(pl.col(\"train\") == True)\n", + "print(f\"Num Epitope Residues: {len(bp3_train_df.filter(pl.col(\"epitope\") == True))}\")\n", + "print(f\"Num Non-Epitope Residues: {len(bp3_train_df.filter(pl.col(\"epitope\") == False))}\")\n", "\n", - "var_names = []\n", - "for var in range(bp3_res.height):\n", - " var_names.append(var_list)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "4745d9c4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (84_439, 1_286)
indexfield_0field_1field_2field_3field_4field_5field_6field_7field_8field_9field_10field_11field_12field_13field_14field_15field_16field_17field_18field_19field_20field_21field_22field_23field_24field_25field_26field_27field_28field_29field_30field_31field_32field_33field_34field_35field_1248field_1249field_1250field_1251field_1252field_1253field_1254field_1255field_1256field_1257field_1258field_1259field_1260field_1261field_1262field_1263field_1264field_1265field_1266field_1267field_1268field_1269field_1270field_1271field_1272field_1273field_1274field_1275field_1276field_1277field_1278field_1279field_1280residuestrain_boolsepitope_boolsrsa_vals
u32f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64strboolboolf64
0-0.079217-0.082230.05838-0.014281-0.174122-0.0797250.3110820.159403-0.0609380.0907240.1115890.2618340.0248810.1987760.0182710.118280.4828110.2449080.2902090.064428-0.2587630.0223620.16690.3730520.144847-0.003963-0.0595780.0051040.0854770.1407550.0165580.155509-0.3468790.3512540.0651720.068091-0.063390.166055-0.1355060.020469-0.0469170.2641240.108552-0.164620.0830870.034967-0.0664350.118048-0.23741-0.019455-0.0298530.1063120.154892-0.1098440.137809-0.0545360.3200510.080654-0.236098-0.390364-0.207083-0.0288720.1334240.464410.105135-0.0357880.2468070.096495117.0"L"truefalse0.205823
10.2719060.131599-0.1274880.218813-0.318881-0.326190.2830680.242302-0.119848-0.1710260.3600550.0485740.051404-0.1205290.077561-0.309660.4949730.2468520.169573-0.0658120.102363-0.0204510.131842-0.1283080.2764130.028705-0.048571-0.099105-0.303759-0.0387120.0078540.247406-0.098611-0.053378-0.306477-0.2419840.053280.227172-0.1237880.5895190.0819920.1237220.1227750.044760.020768-0.143507-0.0116570.201338-0.0628580.0836410.104042-0.1092360.13792-0.0313770.134023-0.0809850.2332330.1425810.0503230.062281-0.1553070.1702650.0127590.1842510.210409-0.0662360.0838130.026999117.0"I"truefalse0.471213
20.075211-0.124738-0.3128460.2247690.048075-0.4120290.6330580.185147-0.1606330.340180.1290260.192040.383036-0.4132610.087679-0.4428050.1652230.1389720.0108170.0660110.031720.015374-0.1541990.069085-0.0745260.3421120.755198-0.0395840.108980.26805-0.161822-0.0933520.091560.4409210.022243-0.135373-0.1222560.535823-0.2992450.0576280.0856830.4711250.2723950.040267-0.290351-0.3300090.0617870.039009-0.1362310.3900630.0817050.4074170.0824210.109496-0.500798-0.3719280.2104990.054817-0.037059-0.100564-0.172441-0.105397-0.1155310.156894-0.043453-0.367025-0.070912-0.130206117.0"Q"truefalse0.046812
30.0332060.136580.0296490.1156010.0805040.205870.1748430.082550.2366840.4095610.445139-0.0430160.395532-0.2801860.365009-0.2886930.028138-0.0088780.1798280.069011-0.096423-0.1835030.0277350.1868930.022349-0.217998-0.129381-0.1035670.090453-0.170609-0.1468670.2056910.0643410.008259-0.1148860.089-0.2271270.322171-0.0757570.385428-0.1696710.104973-0.01161-0.0593590.2851-0.235702-0.0502770.098363-0.0797740.2296830.108128-0.089006-0.140968-0.207516-0.022658-0.1277320.0386160.0099310.187821-0.034119-0.089407-0.252221-0.0862750.2001420.02470.14825-0.084408-0.163228117.0"T"truefalse0.437416
4-0.1534880.1781010.0767670.277805-0.062598-0.017160.254210.138347-0.0164130.1441550.2623250.2390950.683909-0.2604520.1629660.1167010.409806-0.16385-0.0955090.39366-0.202302-0.118709-0.2035110.071376-0.020334-0.2410920.009244-0.014142-0.097118-0.131135-0.1037880.1977430.1497180.314476-0.1884190.228339-0.1422420.34187-0.3194510.152567-0.198857-0.0463830.295451-0.0821090.181405-0.146599-0.1490760.018108-0.3292110.3177270.307067-0.0665610.22278-0.182435-0.404469-0.1866190.0405970.474888-0.049860.046669-0.072711-0.144304-0.0246760.2585580.335825-0.0847740.0455890.106432117.0"P"truefalse0.312792
84434-0.1026160.023357-0.0229380.006425-0.0996420.045261-0.003071-0.082837-0.090808-0.1531860.0087460.0178390.181520.156434-0.0236290.166111-0.2914430.074171-0.1138640.0061950.0794160.143738-0.048027-0.116025-0.0343350.057570.184138-0.0125280.079836-0.1289980.1673430.032930.225176-0.184082-0.162267-0.1480910.0273350.0810630.065476-0.2209970.023829-0.114449-0.0338310.005947-0.3300150.234441-0.0757680.168944-0.21160.1855420.173111-0.1442210.0150830.093506-0.058342-0.0795020.10016-0.116845-0.085286-0.1211330.105609-0.023850.1043860.0417190.245388-0.067179-0.1235210.20393847.0"V"falsefalse0.09529
844350.006365-0.054578-0.0537970.183082-0.1151550.0193550.090079-0.005044-0.04882-0.129596-0.0905350.001260.049185-0.03601-0.0768110.16542-0.1476350.048409-0.0586080.030474-0.0037960.1716430.04543-0.0643590.1949120.0840960.002967-0.1797760.019379-0.161040.1505540.1286140.105592-0.379701-0.087574-0.122342-0.0494710.1047860.094349-0.062865-0.0714850.0493470.048873-0.035775-0.2011580.0096620.1017040.2761-0.1476280.203010.105058-0.0860.2310710.1699140.1716310.1136050.00657-0.185583-0.0043140.019410.025256-0.039905-0.0449910.11730.097847-0.062373-0.1105870.09324247.0"Q"falsefalse0.559269
84436-0.0211380.060409-0.2595480.0769380.010231-0.0687950.021015-0.0664180.0059920.004406-0.1588240.0077660.1891180.143350.0666660.149638-0.1291980.0466210.147404-0.022679-0.0864470.078762-0.0401140.2426050.260802-0.1055870.06337-0.008405-0.089341-0.1711930.034680.0635870.1250310.008744-0.0501390.084319-0.001104-0.158556-0.067502-0.1237910.031946-0.042822-0.0305550.101412-0.0052190.1428040.0601460.0524940.008135-0.0347240.432915-0.0676560.029798-0.0277150.1387210.0757760.2627080.11978-0.054657-0.149773-0.019055-0.0687770.16741-0.2558310.133178-0.049465-0.1390630.12068947.0"K"falsefalse0.883928
84437-0.0134760.081914-0.1234640.0470740.0734790.075831-0.138856-0.14434-0.190178-0.0476-0.009502-0.0443610.0717220.204098-0.1246570.281845-0.147479-0.1260140.008064-0.016273-0.0285630.148271-0.1270830.072913-0.0924460.0746180.05848-0.005448-0.059232-0.210546-0.214381-0.1220720.172205-0.258711-0.034651-0.0623120.1819-0.0791290.149473-0.187563-0.0270180.0289720.2022190.08421-0.179594-0.059007-0.1277280.039554-0.3321860.3169070.078125-0.2054740.2915440.134560.096681-0.0128570.0573390.170203-0.005569-0.1839280.066849-0.0263670.3147210.0152160.1480860.106204-0.0894170.28617347.0"A"falsefalse0.828726
84438-0.0797270.1328290.0570060.167942-0.1868890.0727410.0841750.109943-0.014526-0.182992-0.02501-0.081883-0.0899010.108202-0.1623410.124711-0.158898-0.0049580.033675-0.063126-0.051280.039882-0.1784010.0441210.1524980.0217820.057184-0.107421-0.074607-0.061235-0.0654660.0953980.161038-0.3890950.093764-0.154974-0.0366020.260034-0.16953-0.2636530.0202410.1568090.1356280.12855-0.0786950.109689-0.0404330.10314-0.1188650.2668410.0570620.1268890.1231020.1239330.1648730.133015-0.089658-0.107521-0.039538-0.147168-0.089189-0.023737-0.1751490.1075640.231201-0.01757-0.2529760.13080747.0"Q"falsefalse0.637367
" - ], - "text/plain": [ - "shape: (84_439, 1_286)\n", - "┌───────┬───────────┬───────────┬───────────┬───┬──────────┬─────────────┬──────────────┬──────────┐\n", - "│ index ┆ field_0 ┆ field_1 ┆ field_2 ┆ … ┆ residues ┆ train_bools ┆ epitope_bool ┆ rsa_vals │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ s ┆ --- │\n", - "│ u32 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ str ┆ bool ┆ --- ┆ f64 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ bool ┆ │\n", - "╞═══════╪═══════════╪═══════════╪═══════════╪═══╪══════════╪═════════════╪══════════════╪══════════╡\n", - "│ 0 ┆ -0.079217 ┆ -0.08223 ┆ 0.05838 ┆ … ┆ L ┆ true ┆ false ┆ 0.205823 │\n", - "│ 1 ┆ 0.271906 ┆ 0.131599 ┆ -0.127488 ┆ … ┆ I ┆ true ┆ false ┆ 0.471213 │\n", - "│ 2 ┆ 0.075211 ┆ -0.124738 ┆ -0.312846 ┆ … ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", - "│ 3 ┆ 0.033206 ┆ 0.13658 ┆ 0.029649 ┆ … ┆ T ┆ true ┆ false ┆ 0.437416 │\n", - "│ 4 ┆ -0.153488 ┆ 0.178101 ┆ 0.076767 ┆ … ┆ P ┆ true ┆ false ┆ 0.312792 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 84434 ┆ -0.102616 ┆ 0.023357 ┆ -0.022938 ┆ … ┆ V ┆ false ┆ false ┆ 0.09529 │\n", - "│ 84435 ┆ 0.006365 ┆ -0.054578 ┆ -0.053797 ┆ … ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", - "│ 84436 ┆ -0.021138 ┆ 0.060409 ┆ -0.259548 ┆ … ┆ K ┆ false ┆ false ┆ 0.883928 │\n", - "│ 84437 ┆ -0.013476 ┆ 0.081914 ┆ -0.123464 ┆ … ┆ A ┆ false ┆ false ┆ 0.828726 │\n", - "│ 84438 ┆ -0.079727 ┆ 0.132829 ┆ 0.057006 ┆ … ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", - "└───────┴───────────┴───────────┴───────────┴───┴──────────┴─────────────┴──────────────┴──────────┘" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bp3_res" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e8468437", - "metadata": {}, - "outputs": [], - "source": [ - "bp3_res = bp3_res.with_columns(\n", - " pl.col(\"embeddings\").list.to_struct(upper_bound=1281)\n", - ").unnest(\"embeddings\")" + "print(f\"Column Names: {bp3_all_df.columns}\")\n", + "bp3_test_df = bp3_all_df.filter(pl.col(\"train\") == False)" ] }, { "cell_type": "code", "execution_count": null, - "id": "68686bd6", + "id": "d3becb0b", "metadata": {}, "outputs": [], "source": [ - "#with open('bp3_res.pkl', 'wb') as f:\n", - "# pickle.dump(bp3_res, f)\n", - "#\n", - "#with open(\"bp3_res.pkl\", 'rb') as f:\n", - "# bp3_res = pickle.load(f)" + "#######################################################################\n", + "# --- Visualize Variable Distributions for Epitope vs. NonEpitope --- #\n", + "#######################################################################\n", + "\n", + "bp3_plot = bp3_train_df.to_pandas()\n", + "plot_var = \"pLDDT\"\n", + "( #\n", + "ggplot(aes(x = bp3_plot[\"epitope\"], y = bp3_plot[plot_var]))\n", + "+ geom_boxplot() \n", + "+ labs(\n", + " x = \"Epitope Status\",\n", + " y = f\"{plot_var}\"\n", + ")\n", + "#+ geom_jitter()\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "6f35c6fd", - "metadata": {}, - "outputs": [], - "source": [ - "# --- Extract N Most Informative Features of Embedding ---\n", - "\n", - "num_emb_vars = bp3_res.select(\"embeddings\")[0].item().shape[0]\n", - "\n", - "u_vals = []\n", - "p_vals = []\n", - "for var in range(num_emb_vars):\n", - " var_epi = []\n", - " var_nepi = []\n", - " for (embedding, residue, train_bool, epitope_bool, rsa) in bp3_res.iter_rows():\n", - " if epitope_bool == True:\n", - " var_epi.append(embedding[var])\n", - " else:\n", - " var_nepi.append(embedding[var])\n", - " u, p = stats.mannwhitneyu(var_epi, var_nepi, alternative=\"two-sided\")\n", - " u_vals.append(u)\n", - " p_vals.append(p)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "8feee99b", - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"best_esm_embedding_vars.pkl\", 'rb') as file:\n", - " # Load the pickled data from the file\n", - " p_vals = pickle.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "7f40786a", + "id": "0a766107", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[(np.float64(81.0), 1280), (np.float64(1.130703330039978), 234), (np.float64(0.3712773323059082), 1160), (np.float64(0.045004742220044136), 839), (np.float64(0.044218819588422775), 553), (np.float64(0.04032979533076286), 696), (np.float64(0.03747392725199461), 414), (np.float64(0.03660698514431715), 1251), (np.float64(0.03643316403031349), 600), (np.float64(0.0351506844162941), 381), (np.float64(0.034491033758968115), 756), (np.float64(0.033380577340722084), 300), (np.float64(0.03337776567786932), 1014), (np.float64(0.03284870833158493), 1186), (np.float64(0.03240643069148064), 855), (np.float64(0.032072730362415314), 608), (np.float64(0.031966139416908845), 86), (np.float64(0.031056914827786386), 396), (np.float64(0.030288565903902054), 655), (np.float64(0.030203919857740402), 166), (np.float64(0.030178461922332644), 423), (np.float64(0.029451459646224976), 809), (np.float64(0.028364425525069237), 77), (np.float64(0.02815424744039774), 249), (np.float64(0.028020352125167847), 145), (np.float64(0.02772002387791872), 370), (np.float64(0.027637861669063568), 564), (np.float64(0.026874929666519165), 1194), (np.float64(0.02658862737007439), 1088), (np.float64(0.025990422815084457), 230), (np.float64(0.025970193557441235), 968), (np.float64(0.025764848105609417), 998), (np.float64(0.025550130754709244), 1174), (np.float64(0.025247633457183838), 803), (np.float64(0.02497640997171402), 236), (np.float64(0.024945996701717377), 812), (np.float64(0.02489049779251218), 1127), (np.float64(0.024773985147476196), 880), (np.float64(0.024766715243458748), 628), (np.float64(0.024555565789341927), 1067), (np.float64(0.02454405650496483), 683), (np.float64(0.024141178466379642), 152), (np.float64(0.024102460592985153), 1006), (np.float64(0.023943433538079262), 583), (np.float64(0.023688404820859432), 191), (np.float64(0.023484595119953156), 1053), (np.float64(0.023418080061674118), 609), (np.float64(0.023391427472233772), 153), (np.float64(0.0233256034553051), 1107), (np.float64(0.023274637758731842), 271), (np.float64(0.022974496707320213), 168), (np.float64(0.022787262685596943), 192), (np.float64(0.02268359251320362), 260), (np.float64(0.02266924805007875), 593), (np.float64(0.022517642006278038), 921), (np.float64(0.022394230589270592), 727), (np.float64(0.022389421239495277), 789), (np.float64(0.022317957365885377), 1162), (np.float64(0.022150119300931692), 277), (np.float64(0.02212531678378582), 899), (np.float64(0.022036344325897517), 1094), (np.float64(0.02189606800675392), 1111), (np.float64(0.02164968801662326), 412), (np.float64(0.021564900875091553), 1165), (np.float64(0.021400924772024155), 36), (np.float64(0.02118323463946581), 681), (np.float64(0.021175827831029892), 853), (np.float64(0.020939030684530735), 1125), (np.float64(0.020914054475724697), 695), (np.float64(0.02084817737340927), 973), (np.float64(0.020482300780713558), 1092), (np.float64(0.020417840220034122), 450), (np.float64(0.020330642815679312), 68), (np.float64(0.020232222974300385), 1049), (np.float64(0.020162058994174004), 405), (np.float64(0.02016097353771329), 483), (np.float64(0.01997297373600304), 892), (np.float64(0.01992867747321725), 75), (np.float64(0.01989478268660605), 462), (np.float64(0.01922575756907463), 1055), (np.float64(0.0190451480448246), 971), (np.float64(0.018956223502755165), 1212), (np.float64(0.018928367644548416), 775), (np.float64(0.01885544741526246), 340), (np.float64(0.01867196150124073), 22), (np.float64(0.018671827390789986), 237), (np.float64(0.018619922921061516), 1077), (np.float64(0.01853789109736681), 1168), (np.float64(0.01844160445034504), 375), (np.float64(0.01838996820151806), 471), (np.float64(0.018250529188662767), 283), (np.float64(0.018210260197520256), 1193), (np.float64(0.01818249374628067), 883), (np.float64(0.018178826197981834), 354), (np.float64(0.018157916143536568), 448), (np.float64(0.01814010553061962), 1072), (np.float64(0.018098924192599952), 1264), (np.float64(0.018092042300850153), 866), (np.float64(0.017926552798599005), 347), (np.float64(0.017782390117645264), 67), (np.float64(0.017713487148284912), 580), (np.float64(0.017520148307085037), 905), (np.float64(0.017419555690139532), 1069), (np.float64(0.01740977691952139), 611), (np.float64(0.017335407435894012), 507), (np.float64(0.01730018761008978), 728), (np.float64(0.017248489893972874), 215), (np.float64(0.017228491604328156), 297), (np.float64(0.017207475379109383), 668), (np.float64(0.01716914726421237), 90), (np.float64(0.017164206132292747), 418), (np.float64(0.017009243369102478), 1145), (np.float64(0.016953904181718826), 615), (np.float64(0.01688589807599783), 1085), (np.float64(0.016667735180817544), 211), (np.float64(0.016580134630203247), 950), (np.float64(0.016412150114774704), 582), (np.float64(0.016342705115675926), 301), (np.float64(0.016321652568876743), 25), (np.float64(0.01623747358098626), 566), (np.float64(0.01623537763953209), 659), (np.float64(0.016207854729145765), 1187), (np.float64(0.016190076246857643), 1129), (np.float64(0.016053708270192146), 410), (np.float64(0.015998678281903267), 374), (np.float64(0.01599816046655178), 944), (np.float64(0.01595117896795273), 137), (np.float64(0.01590883918106556), 984), (np.float64(0.01588423765497282), 213), (np.float64(0.01587132178246975), 717), (np.float64(0.015777988824993372), 974), (np.float64(0.015773339197039604), 346), (np.float64(0.015697740018367767), 571), (np.float64(0.015640379322576337), 219), (np.float64(0.015593299642205238), 351), (np.float64(0.015562902670353651), 1114), (np.float64(0.015533394180238247), 388), (np.float64(0.01539867208339274), 1137), (np.float64(0.01523844338953495), 428), (np.float64(0.015186004340648651), 836), (np.float64(0.015168139711022377), 776), (np.float64(0.01514124684035778), 66), (np.float64(0.015134465182200074), 589), (np.float64(0.015024304389953613), 1198), (np.float64(0.014992635697126389), 732), (np.float64(0.014886489138007164), 200), (np.float64(0.014842655509710312), 777), (np.float64(0.014836808666586876), 810), (np.float64(0.014810930006206036), 762), (np.float64(0.014797125943005085), 722), (np.float64(0.014722553140018135), 1146), (np.float64(0.014658856205642223), 939), (np.float64(0.014645400457084179), 871), (np.float64(0.014637693762779236), 1228), (np.float64(0.014579221606254578), 460), (np.float64(0.014410372823476791), 265), (np.float64(0.014292508363723755), 907), (np.float64(0.014270318672060966), 1052), (np.float64(0.014171725139021873), 1183), (np.float64(0.014159210142679513), 718), (np.float64(0.01415821723639965), 363), (np.float64(0.014137148391455412), 350), (np.float64(0.013962782919406891), 813), (np.float64(0.01390154892578721), 33), (np.float64(0.013901078724302351), 715), (np.float64(0.013888734392821789), 1151), (np.float64(0.013806648552417755), 110), (np.float64(0.013695838861167431), 664), (np.float64(0.013664640951901674), 401), (np.float64(0.01362670212984085), 1005), (np.float64(0.013555938377976418), 397), (np.float64(0.013532337732613087), 657), (np.float64(0.01352951256558299), 291), (np.float64(0.013322470709681511), 774), (np.float64(0.013301345519721508), 631), (np.float64(0.013249438256025314), 822), (np.float64(0.013248251751065254), 55), (np.float64(0.013229165226221085), 231), (np.float64(0.013227107585407794), 314), (np.float64(0.013184343464672565), 288), (np.float64(0.013178281486034393), 646), (np.float64(0.013057534699328244), 1008), (np.float64(0.012972468510270119), 243), (np.float64(0.012916450956254266), 379), (np.float64(0.012898104265332222), 1189), (np.float64(0.012832388281822205), 786), (np.float64(0.012810321524739265), 441), (np.float64(0.012777255848050117), 791), (np.float64(0.012744493782520294), 686), (np.float64(0.01274031586945057), 203), (np.float64(0.012716792523860931), 85), (np.float64(0.012634440790861845), 1004), (np.float64(0.01250067725777626), 500), (np.float64(0.012495343806222081), 563), (np.float64(0.012440497055649757), 990), (np.float64(0.012401574291288853), 1032), (np.float64(0.012400735169649124), 793), (np.float64(0.012385524809360504), 801), (np.float64(0.012380607426166534), 765), (np.float64(0.01234420482069254), 279), (np.float64(0.012343136593699455), 854), (np.float64(0.012326118245255202), 222), (np.float64(0.012296153232455254), 1188), (np.float64(0.012261290132300928), 165), (np.float64(0.012197593227028847), 403), (np.float64(0.012096164748072624), 979), (np.float64(0.012080555781722069), 652), (np.float64(0.012075373902916908), 1084), (np.float64(0.011993632419034839), 248), (np.float64(0.011948315426707268), 307), (np.float64(0.011938711628317833), 207), (np.float64(0.011916568502783775), 20), (np.float64(0.011913990136235952), 1243), (np.float64(0.0119064562022686), 819), (np.float64(0.011905217543244362), 845), (np.float64(0.011900685727596283), 1041), (np.float64(0.011890262365341187), 214), (np.float64(0.011802103370428085), 976), (np.float64(0.011791346594691277), 1120), (np.float64(0.011742846050765365), 339), (np.float64(0.011728386860340834), 59), (np.float64(0.01166495680809021), 253), (np.float64(0.011663809418678284), 1021), (np.float64(0.011610973626375198), 117), (np.float64(0.011607056483626366), 719), (np.float64(0.011598647572100163), 568), (np.float64(0.011571774259209633), 415), (np.float64(0.011481711699161679), 278), (np.float64(0.011479867622256279), 977), (np.float64(0.01147252693772316), 469), (np.float64(0.01137647032737732), 472), (np.float64(0.011283046565949917), 561), (np.float64(0.011235762620344758), 188), (np.float64(0.011222146451473236), 1259), (np.float64(0.011220555752515793), 118), (np.float64(0.011210506781935692), 150), (np.float64(0.01115427166223526), 282), (np.float64(0.011089973151683807), 730), (np.float64(0.0110830869525671), 183), (np.float64(0.011028191074728966), 399), (np.float64(0.01101304218173027), 92), (np.float64(0.010961954947561026), 123), (np.float64(0.010941036212898325), 1218), (np.float64(0.010906634852290154), 312), (np.float64(0.01089153066277504), 820), (np.float64(0.01083652675151825), 425), (np.float64(0.010806520935148), 220), (np.float64(0.01080569438636303), 1244), (np.float64(0.010785584338009357), 1254), (np.float64(0.010722700506448746), 700), (np.float64(0.010713106952607632), 323), (np.float64(0.01068238914012909), 284), (np.float64(0.010637138038873672), 835), (np.float64(0.010622672736644745), 1263), (np.float64(0.010598766501061618), 16), (np.float64(0.010591836180537939), 129), (np.float64(0.010561587288975716), 276), (np.float64(0.010534115135669708), 266), (np.float64(0.010474463924765587), 159), (np.float64(0.010459182783961296), 451), (np.float64(0.010430864989757538), 1247), (np.float64(0.01038616243749857), 444), (np.float64(0.010379638464655727), 131), (np.float64(0.0101566631346941), 913), (np.float64(0.010046117007732391), 4), (np.float64(0.009935624897480011), 1209), (np.float64(0.009927066043019295), 667), (np.float64(0.009867895394563675), 966), (np.float64(0.009789712727069855), 42), (np.float64(0.009780844673514366), 993), (np.float64(0.009772205725312233), 119), (np.float64(0.009715504944324493), 273), (np.float64(0.009608583524823189), 1019), (np.float64(0.009587500244379044), 570), (np.float64(0.009521863423287868), 162), (np.float64(0.009470891673117876), 975), (np.float64(0.009450562531128526), 1202), (np.float64(0.009449162287637591), 100), (np.float64(0.009391489322297275), 934), (np.float64(0.009280447848141193), 196), (np.float64(0.009250136092305183), 430), (np.float64(0.00911640003323555), 285), (np.float64(0.009104061871767044), 103), (np.float64(0.009091995656490326), 932), (np.float64(0.009053241461515427), 761), (np.float64(0.008984031155705452), 6), (np.float64(0.008899634703993797), 121), (np.float64(0.008806407451629639), 106), (np.float64(0.008774453774094582), 262), (np.float64(0.008766988146817312), 1255), (np.float64(0.008755201008170843), 426), (np.float64(0.008747384359594434), 1246), (np.float64(0.008727488107979298), 56), (np.float64(0.008709082379937172), 1018), (np.float64(0.008707253262400627), 338), (np.float64(0.008586497977375984), 47), (np.float64(0.008559728041291237), 242), (np.float64(0.008496899157762527), 739), (np.float64(0.008455098606646061), 1153), (np.float64(0.008453513495624065), 796), (np.float64(0.008421202190220356), 180), (np.float64(0.008398983627557755), 578), (np.float64(0.008393193129450083), 138), (np.float64(0.008391544222831726), 27), (np.float64(0.00835740938782692), 639), (np.float64(0.00834231125190854), 187), (np.float64(0.00831507908878848), 64), (np.float64(0.008314916864037514), 98), (np.float64(0.008291925652883947), 1203), (np.float64(0.008287357166409492), 490), (np.float64(0.008284620009362698), 771), (np.float64(0.008237404748797417), 595), (np.float64(0.008215129375457764), 353), (np.float64(0.008181661367416382), 128), (np.float64(0.008086762623861432), 587), (np.float64(0.008051972836256027), 475), (np.float64(0.008047517389059067), 1060), (np.float64(0.008003609604202211), 713), (np.float64(0.008001109352335334), 503), (np.float64(0.00792611576616764), 114), (np.float64(0.007904700934886932), 281), (np.float64(0.007900640368461609), 456), (np.float64(0.00789184495806694), 274), (np.float64(0.007889870554208755), 212), (np.float64(0.007881866302341223), 512), (np.float64(0.00787946954369545), 355), (np.float64(0.00786761287599802), 299), (np.float64(0.007863425649702549), 653), (np.float64(0.007847219705581665), 310), (np.float64(0.007825737819075584), 377), (np.float64(0.007806180045008659), 1025), (np.float64(0.0077806273475289345), 542), (np.float64(0.007778293918818235), 197), (np.float64(0.0077378100249916315), 654), (np.float64(0.007702145725488663), 594), (np.float64(0.007652724161744118), 613), (np.float64(0.007633641362190247), 417), (np.float64(0.0076003409922122955), 329), (np.float64(0.007589031883981079), 888), (np.float64(0.007576806470751762), 923), (np.float64(0.007573945447802544), 1108), (np.float64(0.007561119273304939), 43), (np.float64(0.007557529956102371), 1090), (np.float64(0.00754080805927515), 19), (np.float64(0.007507447153329849), 724), (np.float64(0.0074921175837516785), 317), (np.float64(0.0074846018105745316), 952), (np.float64(0.007444923743605614), 376), (np.float64(0.007425621151924133), 45), (np.float64(0.007377568632364273), 687), (np.float64(0.007334676454775035), 1002), (np.float64(0.0073283761739730835), 206), (np.float64(0.007327833212912083), 560), (np.float64(0.007297255098819733), 783), (np.float64(0.00729526299983263), 61), (np.float64(0.007223796099424362), 592), (np.float64(0.0072027649730443954), 446), (np.float64(0.007195578888058662), 1241), (np.float64(0.00719551183283329), 957), (np.float64(0.007170367985963821), 943), (np.float64(0.007156253792345524), 729), (np.float64(0.007133243838325143), 1065), (np.float64(0.007120907306671143), 901), (np.float64(0.007117012515664101), 735), (np.float64(0.0071118175983428955), 893), (np.float64(0.007111807353794575), 58), (np.float64(0.00709662027657032), 547), (np.float64(0.007087027654051781), 362), (np.float64(0.007054241374135017), 498), (np.float64(0.007042464800179005), 1221), (np.float64(0.006980568170547485), 122), (np.float64(0.00695190392434597), 886), (np.float64(0.006930340081453323), 321), (np.float64(0.006925065070390701), 510), (np.float64(0.006918858736753464), 313), (np.float64(0.006893233919981867), 330), (np.float64(0.00686962716281414), 11), (np.float64(0.006865903967991471), 365), (np.float64(0.006832782179117203), 394), (np.float64(0.006829655729234219), 891), (np.float64(0.006801697425544262), 53), (np.float64(0.006791293621063232), 937), (np.float64(0.006698655895888805), 703), (np.float64(0.006694257725030184), 1170), (np.float64(0.006679002195596695), 189), (np.float64(0.006648337468504906), 409), (np.float64(0.006589038297533989), 972), (np.float64(0.006570897996425629), 416), (np.float64(0.006568379700183868), 1011), (np.float64(0.006532957944727968), 360), (np.float64(0.006522510200738907), 656), (np.float64(0.0065031107515096664), 395), (np.float64(0.006459635682404041), 607), (np.float64(0.006452583707869053), 750), (np.float64(0.006433840841054916), 303), (np.float64(0.006367001682519913), 287), (np.float64(0.006355821620672941), 171), (np.float64(0.00631660595536232), 525), (np.float64(0.006311326549621299), 630), (np.float64(0.006254343315958977), 621), (np.float64(0.006236137822270393), 1265), (np.float64(0.0062336549162864685), 280), (np.float64(0.006093231961131096), 1), (np.float64(0.006068775430321693), 515), (np.float64(0.006007391959428787), 179), (np.float64(0.005940468981862068), 50), (np.float64(0.0059141237288713455), 328), (np.float64(0.0059125833213329315), 184), (np.float64(0.005842448212206364), 1229), (np.float64(0.005820596590638161), 559), (np.float64(0.0058171721175313), 485), (np.float64(0.005750535521656275), 1045), (np.float64(0.005683631170541048), 113), (np.float64(0.00566563755273819), 13), (np.float64(0.005600292701274157), 331), (np.float64(0.005594167858362198), 629), (np.float64(0.005578726530075073), 531), (np.float64(0.005515436641871929), 598), (np.float64(0.0054828329011797905), 758), (np.float64(0.005473967641592026), 1276), (np.float64(0.005444618873298168), 32), (np.float64(0.005436782957985997), 755), (np.float64(0.005431024357676506), 785), (np.float64(0.005363805568777025), 532), (np.float64(0.005330108106136322), 830), (np.float64(0.00532750366255641), 1267), (np.float64(0.0053042881190776825), 294), (np.float64(0.005254607647657394), 1017), (np.float64(0.005230085924267769), 1216), (np.float64(0.0052208518027327955), 404), (np.float64(0.005198197439312935), 1015), (np.float64(0.005155934486538172), 678), (np.float64(0.005148696713149548), 663), (np.float64(0.005032102577388287), 359), (np.float64(0.004991074092686176), 714), (np.float64(0.0049825385212898254), 627), (np.float64(0.004934079828672111), 904), (np.float64(0.00489051453769207), 83), (np.float64(0.00488685816526413), 584), (np.float64(0.0048807961866259575), 411), (np.float64(0.004836020991206169), 1027), (np.float64(0.0048017119988799095), 684), (np.float64(0.004774243570864201), 1176), (np.float64(0.0047730617225170135), 1219), (np.float64(0.004693788127042353), 823), (np.float64(0.00468137301504612), 908), (np.float64(0.00467224046587944), 605), (np.float64(0.0045488253235816956), 156), (np.float64(0.004545961506664753), 84), (np.float64(0.004537271335721016), 708), (np.float64(0.0045341793447732925), 926), (np.float64(0.004530996084213257), 182), (np.float64(0.0044933343306183815), 30), (np.float64(0.004464704543352127), 9), (np.float64(0.004407189786434174), 569), (np.float64(0.0043976083397865295), 1149), (np.float64(0.0043945867801085114), 95), (np.float64(0.004293292760848999), 172), (np.float64(0.0042603835463523865), 387), (np.float64(0.004210165643598884), 173), (np.float64(0.004184460442047566), 496), (np.float64(0.004150079563260078), 367), (np.float64(0.00413903035223484), 341), (np.float64(0.0040975576266646385), 550), (np.float64(0.004085277207195759), 251), (np.float64(0.004077760153450072), 1269), (np.float64(0.004069924354553223), 1249), (np.float64(0.004054168239235878), 298), (np.float64(0.004032887518405914), 1222), (np.float64(0.004022389650344849), 897), (np.float64(0.004020331427454948), 91), (np.float64(0.004010087111964822), 167), (np.float64(0.004000985994935036), 368), (np.float64(0.003997102379798889), 619), (np.float64(0.003981940448284149), 624), (np.float64(0.003933241590857506), 848), (np.float64(0.003733748570084572), 319), (np.float64(0.0037168003618717194), 1234), (np.float64(0.0036879992112517357), 186), (np.float64(0.003684638999402523), 380), (np.float64(0.0036621559411287308), 746), (np.float64(0.0036391839385032654), 178), (np.float64(0.0036176294088363647), 1051), (np.float64(0.003613073378801346), 1058), (np.float64(0.0035451650619506836), 518), (np.float64(0.0035063475370407104), 1200), (np.float64(0.0035057906061410904), 931), (np.float64(0.0035018213093280792), 751), (np.float64(0.0034943416249006987), 143), (np.float64(0.00347183458507061), 828), (np.float64(0.003445371985435486), 1258), (np.float64(0.0034322869032621384), 1040), (np.float64(0.003407709300518036), 366), (np.float64(0.003358466550707817), 985), (np.float64(0.0033377669751644135), 826), (np.float64(0.003312641754746437), 17), (np.float64(0.0032705982448533177), 14), (np.float64(0.0032210182398557663), 255), (np.float64(0.0032125506550073624), 39), (np.float64(0.003189890761859715), 348), (np.float64(0.0031792623922228813), 1100), (np.float64(0.0031745098531246185), 701), (np.float64(0.0031258314847946167), 480), (np.float64(0.003095071529969573), 546), (np.float64(0.0030517131090164185), 158), (np.float64(0.003032959997653961), 10), (np.float64(0.003000635653734207), 912), (np.float64(0.0029672272503376007), 15), (np.float64(0.002951675094664097), 290), (np.float64(0.0028542475774884224), 804), (np.float64(0.0028172172605991364), 1124), (np.float64(0.0027712839655578136), 309), (np.float64(0.0026745274662971497), 1133), (np.float64(0.0026256144046783447), 900), (np.float64(0.0026158811524510384), 205), (np.float64(0.002593526616692543), 1106), (np.float64(0.0025607850402593613), 545), (np.float64(0.0024483129382133484), 754), (np.float64(0.0024405624717473984), 514), (np.float64(0.002425260841846466), 868), (np.float64(0.0023892847821116447), 509), (np.float64(0.0023853182792663574), 29), (np.float64(0.002379018100327812), 1154), (np.float64(0.0022975997999310493), 790), (np.float64(0.0022021420300006866), 267), (np.float64(0.002168288454413414), 534), (np.float64(0.0021635047160089016), 1007), (np.float64(0.002135808579623699), 768), (np.float64(0.002078305697068572), 951), (np.float64(0.002071140334010124), 245), (np.float64(0.0020304229110479355), 486), (np.float64(0.002023644745349884), 1250), (np.float64(0.0020101824775338173), 28), (np.float64(0.0019969623535871506), 1245), (np.float64(0.001986062154173851), 1022), (np.float64(0.0019746623001992702), 1239), (np.float64(0.001973463222384453), 244), (np.float64(0.0019369125366210938), 1035), (np.float64(0.0019024861976504326), 548), (np.float64(0.0018787621520459652), 527), (np.float64(0.0018643662333488464), 459), (np.float64(0.0018462538719177246), 706), (np.float64(0.001827546686399728), 1274), (np.float64(0.001788940280675888), 382), (np.float64(0.0017553488723933697), 1206), (np.float64(0.0017540152184665203), 218), (np.float64(0.001738311955705285), 769), (np.float64(0.001677100546658039), 136), (np.float64(0.001619689166545868), 1113), (np.float64(0.0016147047281265259), 1279), (np.float64(0.0016042403876781464), 601), (np.float64(0.0015992438420653343), 596), (np.float64(0.0015618205070495605), 671), (np.float64(0.0015448471531271935), 928), (np.float64(0.001539589837193489), 626), (np.float64(0.001523636281490326), 1095), (np.float64(0.0015228185802698135), 63), (np.float64(0.0015169456601142883), 1180), (np.float64(0.0015078596770763397), 694), (np.float64(0.001481717685237527), 125), (np.float64(0.0014607235789299011), 1068), (np.float64(0.0014108158648014069), 250), (np.float64(0.0014008060097694397), 748), (np.float64(0.0013657081872224808), 204), (np.float64(0.001345967873930931), 1061), (np.float64(0.0013415142893791199), 371), (np.float64(0.0013408008962869644), 37), (np.float64(0.0012580184265971184), 960), (np.float64(0.001243335660547018), 1132), (np.float64(0.0011727940291166306), 1191), (np.float64(0.0011576693505048752), 342), (np.float64(0.001130678690969944), 1223), (np.float64(0.0011231759563088417), 127), (np.float64(0.0010981885716319084), 1235), (np.float64(0.0010806471109390259), 521), (np.float64(0.0010549724102020264), 989), (np.float64(0.0010166391730308533), 910), (np.float64(0.0010066886898130178), 488), (np.float64(0.000985151156783104), 738), (np.float64(0.0009567076340317726), 982), (np.float64(0.0009152404963970184), 612), (np.float64(0.0008938983082771301), 930), (np.float64(0.0008527413010597229), 335), (np.float64(0.000825216993689537), 477), (np.float64(0.0007924996316432953), 920), (np.float64(0.0007924232631921768), 896), (np.float64(0.0007629562169313431), 581), (np.float64(0.0007573636248707771), 80), (np.float64(0.000741329975426197), 1013), (np.float64(0.0007395949214696884), 1075), (np.float64(0.0007391385734081268), 163), (np.float64(0.0007354002445936203), 858), (np.float64(0.0007210071198642254), 916), (np.float64(0.0007144920527935028), 124), (np.float64(0.0006856508553028107), 1131), (np.float64(0.0006828904151916504), 21), (np.float64(0.0006541721522808075), 1097), (np.float64(0.0006299437955021858), 909), (np.float64(0.0006266646087169647), 870), (np.float64(0.000575515441596508), 270), (np.float64(0.0005712788552045822), 421), (np.float64(0.00056855333968997), 308), (np.float64(0.0004581841640174389), 261), (np.float64(0.0004357830621302128), 487), (np.float64(0.0004345737397670746), 194), (np.float64(0.0004092305898666382), 879), (np.float64(0.00037114880979061127), 235), (np.float64(0.00035897456109523773), 516), (np.float64(0.0003486813511699438), 1134), (np.float64(0.00034497492015361786), 704), (np.float64(0.000283312052488327), 88), (np.float64(0.00022942200303077698), 135), (np.float64(0.0002283584326505661), 1119), (np.float64(0.00020135263912379742), 229), (np.float64(0.00017299503087997437), 1159), (np.float64(8.101761341094971e-05), 702), (np.float64(-1.817755401134491e-05), 316), (np.float64(-2.541765570640564e-05), 1036), (np.float64(-4.7507346607744694e-05), 433), (np.float64(-0.00012370478361845016), 46), (np.float64(-0.0001281891018152237), 1155), (np.float64(-0.00013002753257751465), 461), (np.float64(-0.00014249759260565042), 447), (np.float64(-0.00016005896031856537), 1063), (np.float64(-0.0001608431339263916), 857), (np.float64(-0.0001817811280488968), 493), (np.float64(-0.00020514801144599915), 662), (np.float64(-0.0002080760896205902), 1080), (np.float64(-0.0002263011410832405), 1214), (np.float64(-0.000227256678044796), 1181), (np.float64(-0.00023396313190460205), 811), (np.float64(-0.00026110000908374786), 427), (np.float64(-0.00028219353407621384), 1205), (np.float64(-0.0003402328584343195), 419), (np.float64(-0.00036205508513376117), 62), (np.float64(-0.0003917478024959564), 537), (np.float64(-0.0005154851824045181), 492), (np.float64(-0.0005159936845302582), 637), (np.float64(-0.0005178460851311684), 856), (np.float64(-0.0005494393408298492), 109), (np.float64(-0.0005625635385513306), 97), (np.float64(-0.000569885887671262), 1115), (np.float64(-0.0005779080092906952), 933), (np.float64(-0.0005808547139167786), 506), (np.float64(-0.0006279787048697472), 1103), (np.float64(-0.0007155854254961014), 1256), (np.float64(-0.0007434776052832603), 688), (np.float64(-0.0007441248744726181), 1169), (np.float64(-0.000748196616768837), 147), (np.float64(-0.0007578060030937195), 468), (np.float64(-0.0007839640602469444), 26), (np.float64(-0.0008056208025664091), 740), (np.float64(-0.0008387919515371323), 948), (np.float64(-0.0009087081998586655), 1196), (np.float64(-0.0009119641035795212), 1161), (np.float64(-0.0009306315332651138), 73), (np.float64(-0.000951351597905159), 142), (np.float64(-0.0009772293269634247), 540), (np.float64(-0.0009796805679798126), 1260), (np.float64(-0.001017771428450942), 691), (np.float64(-0.0010307496413588524), 590), (np.float64(-0.001033630222082138), 953), (np.float64(-0.0010345056653022766), 551), (np.float64(-0.0010652430355548859), 41), (np.float64(-0.0010662563145160675), 1031), (np.float64(-0.0010681245476007462), 707), (np.float64(-0.0010780454613268375), 661), (np.float64(-0.001093613333068788), 385), (np.float64(-0.0010970290750265121), 126), (np.float64(-0.0011139344424009323), 767), (np.float64(-0.0011796094477176666), 766), (np.float64(-0.0011811880394816399), 1009), (np.float64(-0.0012204386293888092), 992), (np.float64(-0.0013030210830038413), 802), (np.float64(-0.0013143308460712433), 1099), (np.float64(-0.00135769322514534), 938), (np.float64(-0.0013707373291254044), 1147), (np.float64(-0.0013769203796982765), 389), (np.float64(-0.0014346614480018616), 742), (np.float64(-0.001471739262342453), 489), (np.float64(-0.0014727767556905746), 52), (np.float64(-0.0015113605186343193), 232), (np.float64(-0.0015142625197768211), 1177), (np.float64(-0.0015361960977315903), 289), (np.float64(-0.0016520395874977112), 825), (np.float64(-0.0016639144159853458), 252), (np.float64(-0.0016933688893914223), 987), (np.float64(-0.0017160698771476746), 946), (np.float64(-0.00173667399212718), 1252), (np.float64(-0.0017373105511069298), 733), (np.float64(-0.0017650823428994045), 649), (np.float64(-0.0017763301730155945), 522), (np.float64(-0.0017936015501618385), 115), (np.float64(-0.0018141400068998337), 577), (np.float64(-0.0018463386222720146), 674), (np.float64(-0.0018810424953699112), 1116), (np.float64(-0.0018857363611459732), 1199), (np.float64(-0.0019829915836453438), 814), (np.float64(-0.00200633704662323), 995), (np.float64(-0.002010106109082699), 1204), (np.float64(-0.0020110905170440674), 1057), (np.float64(-0.0020224787294864655), 1207), (np.float64(-0.002053378149867058), 945), (np.float64(-0.002064388245344162), 575), (np.float64(-0.0020839348435401917), 470), (np.float64(-0.00216059572994709), 465), (np.float64(-0.0022006616927683353), 981), (np.float64(-0.002226443961262703), 1139), (np.float64(-0.00228223018348217), 71), (np.float64(-0.002282954752445221), 520), (np.float64(-0.002297590486705303), 673), (np.float64(-0.002300553023815155), 1079), (np.float64(-0.0023086005821824074), 807), (np.float64(-0.002316609607078135), 1074), (np.float64(-0.0023270510137081146), 749), (np.float64(-0.002357354387640953), 216), (np.float64(-0.002396269701421261), 1073), (np.float64(-0.0024244561791419983), 104), (np.float64(-0.002444714307785034), 1024), (np.float64(-0.0024672462604939938), 999), (np.float64(-0.002483328804373741), 34), (np.float64(-0.0025172159948851913), 838), (np.float64(-0.002524329349398613), 935), (np.float64(-0.0025281142443418503), 967), (np.float64(-0.0025460217148065567), 457), (np.float64(-0.002552484627813101), 455), (np.float64(-0.002560259774327278), 474), (np.float64(-0.0026358672184869647), 429), (np.float64(-0.0026509244926273823), 269), (np.float64(-0.0026514939963817596), 1253), (np.float64(-0.0026922840625047684), 623), (np.float64(-0.00269710854627192), 1225), (np.float64(-0.002712603658437729), 38), (np.float64(-0.0027409472968429327), 965), (np.float64(-0.00277055986225605), 743), (np.float64(-0.0028255488723516464), 391), (np.float64(-0.0028463639318943024), 190), (np.float64(-0.0028850361704826355), 336), (np.float64(-0.002894410863518715), 832), (np.float64(-0.002898396924138069), 778), (np.float64(-0.0030250325798988342), 76), (np.float64(-0.00303669273853302), 1121), (np.float64(-0.0030500199645757675), 65), (np.float64(-0.0030538206920027733), 185), (np.float64(-0.0030581308528780937), 393), (np.float64(-0.003087669610977173), 210), (np.float64(-0.003108654171228409), 51), (np.float64(-0.0031100647029234096), 473), (np.float64(-0.0031547900289297104), 889), (np.float64(-0.0031569916754961014), 195), (np.float64(-0.003162076696753502), 1273), (np.float64(-0.0031907064840197563), 843), (np.float64(-0.0033862628042697906), 130), (np.float64(-0.0034333346411585808), 442), (np.float64(-0.0034815147519111633), 752), (np.float64(-0.0035286154597997665), 902), (np.float64(-0.0035514887422323227), 1086), (np.float64(-0.003552686423063278), 333), (np.float64(-0.0035611912608146667), 1102), (np.float64(-0.003571145236492157), 1237), (np.float64(-0.0035725105553865433), 169), (np.float64(-0.003596900962293148), 1278), (np.float64(-0.0036182962357997894), 936), (np.float64(-0.003647194243967533), 402), (np.float64(-0.0036538057029247284), 112), (np.float64(-0.003661118447780609), 1136), (np.float64(-0.0036774892359972), 549), (np.float64(-0.0037003178149461746), 176), (np.float64(-0.0037345218006521463), 788), (np.float64(-0.003779228776693344), 174), (np.float64(-0.003787299618124962), 716), (np.float64(-0.00380537798628211), 983), (np.float64(-0.0038185007870197296), 144), (np.float64(-0.003865445323754102), 70), (np.float64(-0.003870982676744461), 911), (np.float64(-0.003874685149639845), 1001), (np.float64(-0.0038925185799598694), 49), (np.float64(-0.003927405923604965), 358), (np.float64(-0.00400954857468605), 602), (np.float64(-0.004042258486151695), 481), (np.float64(-0.00408430490642786), 918), (np.float64(-0.0041113197803497314), 720), (np.float64(-0.004242537543177605), 863), (np.float64(-0.004256729036569595), 1010), (np.float64(-0.004292648285627365), 651), (np.float64(-0.0043027400970458984), 78), (np.float64(-0.004380635917186737), 797), (np.float64(-0.004390287562273443), 986), (np.float64(-0.004402928985655308), 35), (np.float64(-0.004403814557008445), 482), (np.float64(-0.004492867738008499), 956), (np.float64(-0.0045052869245409966), 181), (np.float64(-0.004513232968747616), 603), (np.float64(-0.00453865434974432), 1184), (np.float64(-0.00456337071955204), 89), (np.float64(-0.00461997021920979), 508), (np.float64(-0.004656549543142319), 625), (np.float64(-0.004690955160185695), 536), (np.float64(-0.0046967072412371635), 296), (np.float64(-0.0047052884474396706), 322), (np.float64(-0.004711035639047623), 873), (np.float64(-0.004717739298939705), 731), (np.float64(-0.0047803036868572235), 275), (np.float64(-0.004825130105018616), 638), (np.float64(-0.004828300327062607), 961), (np.float64(-0.004890882410109043), 102), (np.float64(-0.004894081503152847), 1178), (np.float64(-0.004899552091956139), 1210), (np.float64(-0.0049577930476516485), 557), (np.float64(-0.004958644509315491), 1217), (np.float64(-0.0049695546622388065), 139), (np.float64(-0.004982003942131996), 361), (np.float64(-0.004986512009054422), 494), (np.float64(-0.00502120703458786), 504), (np.float64(-0.0050244322046637535), 787), (np.float64(-0.005043705925345421), 1164), (np.float64(-0.005074195563793182), 604), (np.float64(-0.005097134271636605), 406), (np.float64(-0.005099068395793438), 1268), (np.float64(-0.005101373419165611), 922), (np.float64(-0.005114092491567135), 692), (np.float64(-0.005152318626642227), 384), (np.float64(-0.005154757760465145), 711), (np.float64(-0.0051582116866484284), 268), (np.float64(-0.005177075508981943), 352), (np.float64(-0.005228022113442421), 530), (np.float64(-0.00522840628400445), 170), (np.float64(-0.00528366956859827), 1056), (np.float64(-0.005368012934923172), 969), (np.float64(-0.005396544001996517), 1231), (np.float64(-0.005413727834820747), 882), (np.float64(-0.005450633354485035), 524), (np.float64(-0.005477700382471085), 850), (np.float64(-0.005487922579050064), 334), (np.float64(-0.005521275103092194), 161), (np.float64(-0.00552127743139863), 175), (np.float64(-0.0055327690206468105), 865), (np.float64(-0.005547152832150459), 1220), (np.float64(-0.005717332474887371), 8), (np.float64(-0.005721753463149071), 292), (np.float64(-0.00574151985347271), 18), (np.float64(-0.005785588873550296), 794), (np.float64(-0.005798071622848511), 644), (np.float64(-0.00580454315058887), 132), (np.float64(-0.005812995135784149), 69), (np.float64(-0.005842681974172592), 1227), (np.float64(-0.005866500549018383), 390), (np.float64(-0.005866686813533306), 1192), (np.float64(-0.005951972212642431), 875), (np.float64(-0.005956872366368771), 2), (np.float64(-0.0059717330150306225), 753), (np.float64(-0.005976843181997538), 1070), (np.float64(-0.005981167778372765), 970), (np.float64(-0.005995499901473522), 372), (np.float64(-0.006027504801750183), 258), (np.float64(-0.00602865032851696), 1248), (np.float64(-0.006082542240619659), 1262), (np.float64(-0.0061778719536960125), 1043), (np.float64(-0.0062213437631726265), 476), (np.float64(-0.006271482445299625), 618), (np.float64(-0.0063001555390655994), 111), (np.float64(-0.006341526517644525), 1066), (np.float64(-0.006352424621582031), 96), (np.float64(-0.006389547139406204), 148), (np.float64(-0.006395917385816574), 154), (np.float64(-0.006402084603905678), 764), (np.float64(-0.006419172510504723), 57), (np.float64(-0.006443299353122711), 1175), (np.float64(-0.0064479149878025055), 806), (np.float64(-0.006505150347948074), 241), (np.float64(-0.0065887924283742905), 884), (np.float64(-0.006634398130699992), 511), (np.float64(-0.006656968966126442), 224), (np.float64(-0.006672145798802376), 1117), (np.float64(-0.006685070693492889), 443), (np.float64(-0.006695944350212812), 842), (np.float64(-0.0067441752180457115), 666), (np.float64(-0.0068170540034770966), 398), (np.float64(-0.006845368072390556), 0), (np.float64(-0.006857441738247871), 1182), (np.float64(-0.006921172142028809), 610), (np.float64(-0.006955621996894479), 808), (np.float64(-0.007136133732274175), 439), (np.float64(-0.0071418872103095055), 792), (np.float64(-0.007211441406980157), 1166), (np.float64(-0.007216833531856537), 239), (np.float64(-0.007237443060148507), 586), (np.float64(-0.007238788530230522), 306), (np.float64(-0.00724145770072937), 107), (np.float64(-0.007245765998959541), 817), (np.float64(-0.007268328219652176), 1098), (np.float64(-0.007300347089767456), 555), (np.float64(-0.007359273731708527), 193), (np.float64(-0.007387600839138031), 201), (np.float64(-0.0074022915214300156), 99), (np.float64(-0.0074034701101481915), 1232), (np.float64(-0.0074781812727451324), 257), (np.float64(-0.007478212472051382), 432), (np.float64(-0.007487598806619644), 1089), (np.float64(-0.007495214231312275), 1242), (np.float64(-0.007495550438761711), 815), (np.float64(-0.007613290101289749), 599), (np.float64(-0.0076307556591928005), 263), (np.float64(-0.00764109194278717), 680), (np.float64(-0.007670147344470024), 526), (np.float64(-0.007670966908335686), 454), (np.float64(-0.0076980628073215485), 721), (np.float64(-0.007704330608248711), 1144), (np.float64(-0.007711821002885699), 576), (np.float64(-0.007724279537796974), 648), (np.float64(-0.0077279843389987946), 1012), (np.float64(-0.007735062390565872), 286), (np.float64(-0.007739881053566933), 737), (np.float64(-0.007761240005493164), 872), (np.float64(-0.0077701956033706665), 709), (np.float64(-0.007822123123332858), 349), (np.float64(-0.007830768823623657), 1033), (np.float64(-0.007877346128225327), 1270), (np.float64(-0.007887596264481544), 614), (np.float64(-0.00789869949221611), 246), (np.float64(-0.007918609771877527), 1110), (np.float64(-0.007958957925438881), 528), (np.float64(-0.007987448945641518), 1109), (np.float64(-0.00801955908536911), 1236), (np.float64(-0.008025538176298141), 1091), (np.float64(-0.008026821538805962), 881), (np.float64(-0.008048209361732006), 829), (np.float64(-0.008056597784161568), 499), (np.float64(-0.008070695213973522), 885), (np.float64(-0.00811498612165451), 689), (np.float64(-0.008118484169244766), 565), (np.float64(-0.00811863038688898), 1158), (np.float64(-0.008136065676808357), 1172), (np.float64(-0.008224183460697532), 675), (np.float64(-0.00822520349174738), 108), (np.float64(-0.008245592936873436), 40), (np.float64(-0.008287797681987286), 772), (np.float64(-0.008296319516375661), 140), (np.float64(-0.008329878211952746), 164), (np.float64(-0.008337317034602165), 392), (np.float64(-0.008361676707863808), 146), (np.float64(-0.008454600349068642), 225), (np.float64(-0.008487144485116005), 779), (np.float64(-0.008503612130880356), 533), (np.float64(-0.008577090688049793), 847), (np.float64(-0.008597470819950104), 562), (np.float64(-0.008647900074720383), 846), (np.float64(-0.008654453791677952), 925), (np.float64(-0.00868566706776619), 72), (np.float64(-0.008686518296599388), 302), (np.float64(-0.008748093619942665), 440), (np.float64(-0.008753710426390171), 827), (np.float64(-0.008770633023232222), 903), (np.float64(-0.008915271610021591), 1118), (np.float64(-0.008982018567621708), 431), (np.float64(-0.008990546382847242), 710), (np.float64(-0.009008477441966534), 541), (np.float64(-0.009019730612635612), 782), (np.float64(-0.009068425744771957), 157), (np.float64(-0.009090296924114227), 959), (np.float64(-0.00919034518301487), 697), (np.float64(-0.00919616175815463), 1028), (np.float64(-0.009215446189045906), 824), (np.float64(-0.009354566223919392), 927), (np.float64(-0.009406345896422863), 1042), (np.float64(-0.00941525585949421), 816), (np.float64(-0.009422685950994492), 650), (np.float64(-0.00945487868739292), 1143), (np.float64(-0.009470253251492977), 478), (np.float64(-0.009476244449615479), 821), (np.float64(-0.009483122266829014), 963), (np.float64(-0.009523652493953705), 887), (np.float64(-0.00956575758755207), 895), (np.float64(-0.00958152487874031), 356), (np.float64(-0.009585897030774504), 305), (np.float64(-0.009609293192625046), 723), (np.float64(-0.009649815503507853), 101), (np.float64(-0.009656770154833794), 1271), (np.float64(-0.009658633265644312), 463), (np.float64(-0.009679041802883148), 958), (np.float64(-0.009686156583484262), 295), (np.float64(-0.009690960869193077), 3), (np.float64(-0.009700579568743706), 642), (np.float64(-0.009711027145385742), 495), (np.float64(-0.009808659553527832), 311), (np.float64(-0.009825988119700924), 1197), (np.float64(-0.009919967502355576), 770), (np.float64(-0.00992558989673853), 915), (np.float64(-0.00995855126529932), 860), (np.float64(-0.00996008887887001), 1130), (np.float64(-0.009978827089071274), 332), (np.float64(-0.009996309876441956), 324), (np.float64(-0.01000710017979145), 844), (np.float64(-0.010009054094552994), 800), (np.float64(-0.010019579902291298), 947), (np.float64(-0.010040998458862305), 202), (np.float64(-0.01004641002509743), 994), (np.float64(-0.010080473497509956), 151), (np.float64(-0.01013161102309823), 869), (np.float64(-0.010141927748918533), 919), (np.float64(-0.010255863424390554), 898), (np.float64(-0.010261062532663345), 665), (np.float64(-0.010276105254888535), 861), (np.float64(-0.010293344035744667), 458), (np.float64(-0.010300910100340843), 726), (np.float64(-0.010341383516788483), 325), (np.float64(-0.010353345642215572), 635), (np.float64(-0.010372515767812729), 1087), (np.float64(-0.010401349514722824), 1000), (np.float64(-0.010496689938008785), 647), (np.float64(-0.01055026613175869), 1179), (np.float64(-0.01059710793197155), 1208), (np.float64(-0.010630078613758087), 606), (np.float64(-0.010634157806634903), 1081), (np.float64(-0.010682531632483006), 435), (np.float64(-0.010732075199484825), 318), (np.float64(-0.010796718299388885), 940), (np.float64(-0.010817267000675201), 12), (np.float64(-0.010851098224520683), 134), (np.float64(-0.010936714708805084), 505), (np.float64(-0.01104088919237256), 991), (np.float64(-0.011065786704421043), 79), (np.float64(-0.011071410030126572), 5), (np.float64(-0.011135176755487919), 867), (np.float64(-0.011154929175972939), 1126), (np.float64(-0.011161897331476212), 1272), (np.float64(-0.01126299798488617), 1238), (np.float64(-0.011272568255662918), 1112), (np.float64(-0.011295948177576065), 634), (np.float64(-0.01129804365336895), 780), (np.float64(-0.01131666952278465), 705), (np.float64(-0.01133042573928833), 874), (np.float64(-0.011340262368321419), 1185), (np.float64(-0.011386333149857819), 479), (np.float64(-0.011390786617994308), 1101), (np.float64(-0.011419500224292278), 227), (np.float64(-0.011442882008850574), 620), (np.float64(-0.01144443266093731), 660), (np.float64(-0.011451411992311478), 1224), (np.float64(-0.011496592778712511), 633), (np.float64(-0.011498132022097707), 1030), (np.float64(-0.011539971455931664), 798), (np.float64(-0.011559644713997841), 217), (np.float64(-0.011612750589847565), 209), (np.float64(-0.0116298608481884), 579), (np.float64(-0.011648551328107715), 1034), (np.float64(-0.011675120331346989), 149), (np.float64(-0.011696023866534233), 567), (np.float64(-0.011698195710778236), 160), (np.float64(-0.011711067520081997), 1167), (np.float64(-0.0117823276668787), 1163), (np.float64(-0.011843113228678703), 1029), (np.float64(-0.011853933800011873), 535), (np.float64(-0.01186610758304596), 591), (np.float64(-0.011869622394442558), 1062), (np.float64(-0.011904150247573853), 670), (np.float64(-0.011912490415852517), 74), (np.float64(-0.011915481183677912), 864), (np.float64(-0.011936145718209445), 841), (np.float64(-0.012056197971105576), 315), (np.float64(-0.012068057432770729), 964), (np.float64(-0.012071516364812851), 1078), (np.float64(-0.012117337435483932), 337), (np.float64(-0.012127349153161049), 452), (np.float64(-0.012201700359582901), 877), (np.float64(-0.01221482828259468), 1128), (np.float64(-0.012246077458257787), 449), (np.float64(-0.012495806440711021), 682), (np.float64(-0.012575287837535143), 177), (np.float64(-0.012676802929490805), 436), (np.float64(-0.012683648616075516), 198), (np.float64(-0.012702982407063246), 914), (np.float64(-0.012723691645078361), 523), (np.float64(-0.012760473415255547), 757), (np.float64(-0.012779026292264462), 48), (np.float64(-0.012828223407268524), 852), (np.float64(-0.012916180305182934), 1016), (np.float64(-0.012964524328708649), 636), (np.float64(-0.012966942158527672), 1150), (np.float64(-0.012986363843083382), 247), (np.float64(-0.013018159195780754), 1054), (np.float64(-0.013066044077277184), 996), (np.float64(-0.013116424903273582), 693), (np.float64(-0.01320071890950203), 833), (np.float64(-0.013295786455273628), 87), (np.float64(-0.013389321975409985), 373), (np.float64(-0.013403642922639847), 572), (np.float64(-0.013429042883217335), 862), (np.float64(-0.013557696132920682), 81), (np.float64(-0.01362999901175499), 677), (np.float64(-0.013825366098899394), 364), (np.float64(-0.013825431524310261), 105), (np.float64(-0.013843605294823647), 949), (np.float64(-0.013897279277443886), 369), (np.float64(-0.013940966688096523), 1046), (np.float64(-0.013956151902675629), 573), (np.float64(-0.013969846069812775), 1096), (np.float64(-0.01397152990102768), 1047), (np.float64(-0.01400591991841793), 781), (np.float64(-0.014015112072229385), 466), (np.float64(-0.014026038348674774), 1148), (np.float64(-0.014037872664630413), 617), (np.float64(-0.014065070077776909), 544), (np.float64(-0.014098634012043476), 929), (np.float64(-0.01418947521597147), 1266), (np.float64(-0.014249518513679504), 54), (np.float64(-0.014250874519348145), 543), (np.float64(-0.014288587495684624), 585), (np.float64(-0.01435130089521408), 556), (np.float64(-0.014362127520143986), 1213), (np.float64(-0.014388982206583023), 272), (np.float64(-0.01446759165264666), 44), (np.float64(-0.0145144232083112), 941), (np.float64(-0.01452496147248894), 616), (np.float64(-0.01462503895163536), 320), (np.float64(-0.014764860272407532), 859), (np.float64(-0.014927219599485397), 878), (np.float64(-0.014956824481487274), 622), (np.float64(-0.014958196319639683), 1215), (np.float64(-0.014981732238084078), 1050), (np.float64(-0.015076996758580208), 763), (np.float64(-0.015152443200349808), 497), (np.float64(-0.01515391655266285), 1156), (np.float64(-0.015214354265481234), 519), (np.float64(-0.015253475634381175), 343), (np.float64(-0.015282157342880964), 906), (np.float64(-0.015289867296814919), 386), (np.float64(-0.015292404219508171), 434), (np.float64(-0.01533450186252594), 672), (np.float64(-0.015339143574237823), 1195), (np.float64(-0.015478499233722687), 734), (np.float64(-0.015486100688576698), 501), (np.float64(-0.015492841601371765), 513), (np.float64(-0.015516646206378937), 233), (np.float64(-0.015645429491996765), 773), (np.float64(-0.0157010480761528), 155), (np.float64(-0.015702321310527623), 597), (np.float64(-0.015805164322955534), 849), (np.float64(-0.015820898115634918), 1277), (np.float64(-0.015887961140833795), 357), (np.float64(-0.015930459601804614), 645), (np.float64(-0.016012540087103844), 517), (np.float64(-0.016064459457993507), 1048), (np.float64(-0.016075864201411605), 1201), (np.float64(-0.016258132178336382), 574), (np.float64(-0.01628638431429863), 1083), (np.float64(-0.016317928209900856), 1037), (np.float64(-0.016351506114006042), 378), (np.float64(-0.016361628659069538), 413), (np.float64(-0.016448184847831726), 1076), (np.float64(-0.01659383624792099), 1211), (np.float64(-0.016631217673420906), 383), (np.float64(-0.01673525758087635), 133), (np.float64(-0.016851751133799553), 1171), (np.float64(-0.017033321782946587), 345), (np.float64(-0.01705419272184372), 1138), (np.float64(-0.01710225734859705), 1140), (np.float64(-0.017209792509675026), 1105), (np.float64(-0.01725015789270401), 818), (np.float64(-0.017269250005483627), 228), (np.float64(-0.017377035692334175), 208), (np.float64(-0.017424164339900017), 1226), (np.float64(-0.017525162547826767), 1141), (np.float64(-0.01753891631960869), 942), (np.float64(-0.017641677986830473), 744), (np.float64(-0.017784817813662812), 1122), (np.float64(-0.017843062058091164), 760), (np.float64(-0.0179891474545002), 1152), (np.float64(-0.018129284493625164), 1003), (np.float64(-0.01813964545726776), 326), (np.float64(-0.01821867097169161), 658), (np.float64(-0.018225931096822023), 120), (np.float64(-0.018232353730127215), 679), (np.float64(-0.01825845241546631), 978), (np.float64(-0.01828348310664296), 924), (np.float64(-0.018405072391033173), 116), (np.float64(-0.018510638969019055), 438), (np.float64(-0.018527057953178883), 685), (np.float64(-0.01854758709669113), 94), (np.float64(-0.018557699862867594), 502), (np.float64(-0.01855983817949891), 554), (np.float64(-0.01861389074474573), 445), (np.float64(-0.018743810476735234), 199), (np.float64(-0.018759075552225113), 669), (np.float64(-0.018766134977340698), 1261), (np.float64(-0.01877213642001152), 82), (np.float64(-0.01886759651824832), 890), (np.float64(-0.018873335095122457), 453), (np.float64(-0.018922503106296062), 962), (np.float64(-0.01896221563220024), 698), (np.float64(-0.019119519740343094), 988), (np.float64(-0.019161410629749298), 745), (np.float64(-0.01922638714313507), 1026), (np.float64(-0.019319428130984306), 917), (np.float64(-0.01936191599816084), 344), (np.float64(-0.019529331475496292), 1071), (np.float64(-0.01953260414302349), 484), (np.float64(-0.019547119736671448), 93), (np.float64(-0.019778557121753693), 1020), (np.float64(-0.019798152148723602), 643), (np.float64(-0.019840769469738007), 1233), (np.float64(-0.019868917763233185), 238), (np.float64(-0.019939441233873367), 24), (np.float64(-0.019996959250420332), 240), (np.float64(-0.02009878307580948), 1173), (np.float64(-0.020234012976288795), 1142), (np.float64(-0.020388811826705933), 795), (np.float64(-0.020517916418612003), 223), (np.float64(-0.020581429824233055), 420), (np.float64(-0.020620129944290966), 1039), (np.float64(-0.02065638266503811), 840), (np.float64(-0.020701369736343622), 293), (np.float64(-0.020729095675051212), 837), (np.float64(-0.02078204322606325), 7), (np.float64(-0.02080471720546484), 1038), (np.float64(-0.02098647691309452), 799), (np.float64(-0.02110620215535164), 980), (np.float64(-0.021208827383816242), 221), (np.float64(-0.021225396543741226), 558), (np.float64(-0.021289877127856016), 1157), (np.float64(-0.021346226800233126), 539), (np.float64(-0.02135976031422615), 464), (np.float64(-0.021395526826381683), 1044), (np.float64(-0.021515470929443836), 831), (np.float64(-0.021526120603084564), 1082), (np.float64(-0.021593546494841576), 31), (np.float64(-0.021753717213869095), 256), (np.float64(-0.02176509378477931), 491), (np.float64(-0.022058885544538498), 1064), (np.float64(-0.022263603284955025), 834), (np.float64(-0.022382635856047273), 954), (np.float64(-0.02256319299340248), 955), (np.float64(-0.022585909813642502), 747), (np.float64(-0.0230522045167163), 1275), (np.float64(-0.023085430613718927), 408), (np.float64(-0.023244470125064254), 641), (np.float64(-0.023779388517141342), 467), (np.float64(-0.02389063686132431), 424), (np.float64(-0.023980213329195976), 1093), (np.float64(-0.024013042449951172), 1123), (np.float64(-0.02409262489527464), 304), (np.float64(-0.024205811321735382), 1190), (np.float64(-0.025242964271456003), 699), (np.float64(-0.0252546314150095), 759), (np.float64(-0.025966100860387087), 264), (np.float64(-0.02601127838715911), 529), (np.float64(-0.026383422315120697), 60), (np.float64(-0.026427260600030422), 141), (np.float64(-0.02657921239733696), 1023), (np.float64(-0.026649098843336105), 1059), (np.float64(-0.02696368470788002), 538), (np.float64(-0.026969667291268706), 894), (np.float64(-0.02746322425082326), 1257), (np.float64(-0.027611277997493744), 805), (np.float64(-0.027830212842673063), 437), (np.float64(-0.028053276240825653), 422), (np.float64(-0.02825447265058756), 588), (np.float64(-0.02829993050545454), 784), (np.float64(-0.02857894729822874), 676), (np.float64(-0.028608759865164757), 1240), (np.float64(-0.029095172882080078), 1135), (np.float64(-0.029543783515691757), 712), (np.float64(-0.029558178037405014), 552), (np.float64(-0.029595278203487396), 741), (np.float64(-0.029920198023319244), 254), (np.float64(-0.02996830642223358), 632), (np.float64(-0.030348291620612144), 400), (np.float64(-0.0308592370711267), 640), (np.float64(-0.032068658620119095), 725), (np.float64(-0.0321959547836741), 259), (np.float64(-0.03227374702692032), 1230), (np.float64(-0.033374167047441006), 997), (np.float64(-0.033400426618754864), 1104), (np.float64(-0.03494056686758995), 407), (np.float64(-0.035000767558813095), 327), (np.float64(-0.03854627627879381), 851), (np.float64(-0.039288025349378586), 23), (np.float64(-0.04906845884397626), 226), (np.float64(-0.10356737673282623), 690), (np.float64(-0.17738884687423706), 736), (np.float64(-0.18347840011119843), 876)]\n" + "Num Features: 1632\n" ] } ], "source": [ - "print(p_vals)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "da101158", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'p_vals' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[43mp_vals\u001b[49m))\n\u001b[32m 2\u001b[39m indexed_list = [(value, index) \u001b[38;5;28;01mfor\u001b[39;00m index, value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(p_vals)]\n\u001b[32m 3\u001b[39m sorted_indexed_list = \u001b[38;5;28msorted\u001b[39m(indexed_list, key=\u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[32m0\u001b[39m], reverse=\u001b[38;5;28;01mFalse\u001b[39;00m)\n", - "\u001b[31mNameError\u001b[39m: name 'p_vals' is not defined" - ] - } - ], - "source": [ - "print(len(p_vals))\n", - "indexed_list = [(value, index) for index, value in enumerate(p_vals)]\n", - "sorted_indexed_list = sorted(indexed_list, key=lambda x: x[0], reverse=False)\n", - "print(sorted_indexed_list)\n", + "#############################\n", + "# --- Feature Selection --- #\n", + "#############################\n", "\n", - "#with open('best_embedding_vars_whitney2.pkl', 'wb') as f:\n", - "# pickle.dump(sorted_indexed_list, f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c866b393", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (84_439, 15)
emb_1271emb_1272emb_1273emb_1274emb_1275emb_1276emb_1277emb_1278emb_1279emb_1280embeddingsresiduestrain_boolsepitope_boolsrsa_vals
f64f64f64f64f64f64f64f64f64f64list[f64]strboolboolf64
-0.390364-0.207083-0.0288720.1334240.464410.105135-0.0357880.2468070.096495117.0[-0.079217, -0.08223, … 117.0]"L"truefalse0.205823
0.062281-0.1553070.1702650.0127590.1842510.210409-0.0662360.0838130.026999117.0[0.271906, 0.131599, … 117.0]"I"truefalse0.471213
-0.100564-0.172441-0.105397-0.1155310.156894-0.043453-0.367025-0.070912-0.130206117.0[0.075211, -0.124738, … 117.0]"Q"truefalse0.046812
-0.034119-0.089407-0.252221-0.0862750.2001420.02470.14825-0.084408-0.163228117.0[0.033206, 0.13658, … 117.0]"T"truefalse0.437416
0.046669-0.072711-0.144304-0.0246760.2585580.335825-0.0847740.0455890.106432117.0[-0.153488, 0.178101, … 117.0]"P"truefalse0.312792
-0.1211330.105609-0.023850.1043860.0417190.245388-0.067179-0.1235210.20393847.0[-0.102616, 0.023357, … 47.0]"V"falsefalse0.09529
0.019410.025256-0.039905-0.0449910.11730.097847-0.062373-0.1105870.09324247.0[0.006365, -0.054578, … 47.0]"Q"falsefalse0.559269
-0.149773-0.019055-0.0687770.16741-0.2558310.133178-0.049465-0.1390630.12068947.0[-0.021138, 0.060409, … 47.0]"K"falsefalse0.883928
-0.1839280.066849-0.0263670.3147210.0152160.1480860.106204-0.0894170.28617347.0[-0.013476, 0.081914, … 47.0]"A"falsefalse0.828726
-0.147168-0.089189-0.023737-0.1751490.1075640.231201-0.01757-0.2529760.13080747.0[-0.079727, 0.132829, … 47.0]"Q"falsefalse0.637367
" - ], - "text/plain": [ - "shape: (84_439, 15)\n", - "┌───────────┬───────────┬───────────┬───────────┬───┬──────────┬────────────┬───────────┬──────────┐\n", - "│ emb_1271 ┆ emb_1272 ┆ emb_1273 ┆ emb_1274 ┆ … ┆ residues ┆ train_bool ┆ epitope_b ┆ rsa_vals │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ s ┆ ools ┆ --- │\n", - "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ str ┆ --- ┆ --- ┆ f64 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ bool ┆ bool ┆ │\n", - "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪══════════╪════════════╪═══════════╪══════════╡\n", - "│ -0.390364 ┆ -0.207083 ┆ -0.028872 ┆ 0.133424 ┆ … ┆ L ┆ true ┆ false ┆ 0.205823 │\n", - "│ 0.062281 ┆ -0.155307 ┆ 0.170265 ┆ 0.012759 ┆ … ┆ I ┆ true ┆ false ┆ 0.471213 │\n", - "│ -0.100564 ┆ -0.172441 ┆ -0.105397 ┆ -0.115531 ┆ … ┆ Q ┆ true ┆ false ┆ 0.046812 │\n", - "│ -0.034119 ┆ -0.089407 ┆ -0.252221 ┆ -0.086275 ┆ … ┆ T ┆ true ┆ false ┆ 0.437416 │\n", - "│ 0.046669 ┆ -0.072711 ┆ -0.144304 ┆ -0.024676 ┆ … ┆ P ┆ true ┆ false ┆ 0.312792 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ -0.121133 ┆ 0.105609 ┆ -0.02385 ┆ 0.104386 ┆ … ┆ V ┆ false ┆ false ┆ 0.09529 │\n", - "│ 0.01941 ┆ 0.025256 ┆ -0.039905 ┆ -0.044991 ┆ … ┆ Q ┆ false ┆ false ┆ 0.559269 │\n", - "│ -0.149773 ┆ -0.019055 ┆ -0.068777 ┆ 0.16741 ┆ … ┆ K ┆ false ┆ false ┆ 0.883928 │\n", - "│ -0.183928 ┆ 0.066849 ┆ -0.026367 ┆ 0.314721 ┆ … ┆ A ┆ false ┆ false ┆ 0.828726 │\n", - "│ -0.147168 ┆ -0.089189 ┆ -0.023737 ┆ -0.175149 ┆ … ┆ Q ┆ false ┆ false ┆ 0.637367 │\n", - "└───────────┴───────────┴───────────┴───────────┴───┴──────────┴────────────┴───────────┴──────────┘" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "emb_1280 = []\n", - "emb_1279 = []\n", - "emb_1278 = []\n", - "emb_1277 = []\n", - "emb_1276 = []\n", - "emb_1275 = []\n", - "emb_1274 = []\n", - "emb_1273 = []\n", - "emb_1272 = []\n", - "emb_1271 = []\n", - "for (idx, embedding, residue, train_bool, epitope_bool, rsa) in bp3_res.iter_rows():\n", - " emb_1280.append(embedding[1280])\n", - " emb_1279.append(embedding[1279])\n", - " emb_1278.append(embedding[1278])\n", - " emb_1277.append(embedding[1277])\n", - " emb_1276.append(embedding[1276])\n", - " emb_1275.append(embedding[1275])\n", - " emb_1274.append(embedding[1274])\n", - " emb_1273.append(embedding[1273])\n", - " emb_1272.append(embedding[1272])\n", - " emb_1271.append(embedding[1271])\n", - "\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1280\", emb_1280))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1279\", emb_1279))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1278\", emb_1278))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1277\", emb_1277))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1276\", emb_1276))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1275\", emb_1275))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1274\", emb_1274))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1273\", emb_1273))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1272\", emb_1272))\n", - "bp3_res.insert_column(0, pl.Series(\"emb_1271\", emb_1271))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "d3becb0b", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "" - }, - "metadata": { - "image/png": { - "height": 480, - "width": 640 - } - }, - "output_type": "display_data" - } - ], - "source": [ - "(\n", - "ggplot(bp3_res, aes(x = epitope_bools, y = emb_1276))\n", - "+ geom_boxplot()\n", - "+ labs(\n", - " x = \"Epitope Status\",\n", - " y = \"Embedding var 1160\"\n", - ")\n", - "#+ geom_jitter()\n", - ")" + "agg_features = []\n", + "\n", + "for emb in range(NUM_ESM_EMB_VARS):\n", + " esm_vars = f\"esm_{emb}\"\n", + " agg_features.append(esm_vars)\n", + "\n", + "for emb in range(NUM_AF3_EMB_VARS):\n", + " af3_vars = f\"af3_{emb}\"\n", + " agg_features.append(af3_vars)\n", + "\n", + "#agg_features.extend([\n", + "# 'closeness_centrality', 'betweenness_centrality', 'load_centrality', \n", + "# 'eigenvector_centrality', 'degree_centrality', 'clustering', \n", + "# 'coreness', 'triangles', 'density', 'lapl_n1', 'lapl_f'])\n", + "agg_features.append(\"seq_len\") \n", + "#agg_features.append('pLDDT')\n", + "agg_features.append(\"ptm\") \n", + "agg_features.append(\"rsa\")\n", + "agg_features.append(\"sa\")\n", + "\n", + "print(f\"Num Features: {len(agg_features)}\")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "0a766107", + "execution_count": 187, + "id": "54f52a38", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "--- Cross-Validation Fold Details ---\n" + "--- Cross-Validation Fold Details ---\n", + "Fold 1: Train AUC = 0.7720, Test AUC = 0.7450\n", + "Fold 2: Train AUC = 0.7781, Test AUC = 0.7380\n", + "Fold 3: Train AUC = 0.7792, Test AUC = 0.7384\n", + "Fold 4: Train AUC = 0.7748, Test AUC = 0.7583\n", + "Fold 5: Train AUC = 0.7717, Test AUC = 0.7499\n", + "\n", + "--- Overfitting Check ---\n", + "PCA Variance Kept: 95.0% with 1103.6000 (+/- 3.3823) components\n", + "Average Training AUC across folds: 0.7752 (+/- 0.0031)\n", + "Average Test (Validation) AUC across folds: 0.7459 (+/- 0.0076)\n" ] } ], "source": [ - "# --- BP3 CV Evaluation ---\n", - "\n", - "agg_features = []\n", - "for emb in range(1281):\n", - " field = \"field_\" + str(emb)\n", - " agg_features.append(field)\n", - "agg_features.append(\"rsa_vals\")\n", + "###################################\n", + "# --- 5-Fold Cross Validation --- #\n", + "###################################\n", "\n", - "train_df = bp3_res.to_pandas()\n", + "train_df = bp3_train_df.to_pandas()\n", "X_df = train_df[agg_features]\n", - "y_df = train_df[\"epitope_bools\"]\n", + "y_df = train_df[\"epitope\"]\n", "\n", "X = X_df.values\n", "y = y_df.values\n", "\n", "n_splits = 5\n", - "cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=1)\n", + "cv = KFold(\n", + " n_splits=n_splits, \n", + " shuffle=False,\n", + " #random_state=11\n", + " )\n", "\n", "train_auc_scores = []\n", "test_auc_scores = []\n", - "\n", + "components = []\n", "\n", "print(\"--- Cross-Validation Fold Details ---\")\n", "for fold, (train_index, test_index) in enumerate(cv.split(X, y)):\n", + "\n", + " # --- Cross Validation ---\n", " X_train, X_test = X[train_index], X[test_index]\n", " y_train, y_test = y[train_index], y[test_index]\n", "\n", - " # --- Scale Features ---\n", + " # --- Scale Features (Required for PCA) ---\n", " scaler = StandardScaler() \n", " scaler.fit(X_train) \n", " X_train = scaler.transform(X_train) \n", " X_test = scaler.transform(X_test) \n", "\n", - " # --- Choose Classifier ---\n", - " neg_count = (y_train == 0).sum()\n", - " pos_count = (y_train == 1).sum()\n", - " scale_pos_weight_value = neg_count / pos_count if pos_count > 0 else 1\n", + " # --- Calibrate PCA ---\n", + " pca = PCA(n_components=None, random_state=11) # n_components=None keeps all 1280 components\n", + " pca.fit(X_train)\n", + "\n", + " # Calculate the cumulative explained variance\n", + " cumulative_variance = np.cumsum(pca.explained_variance_ratio_)\n", + " variance_threshold = 0.95\n", + " optimal_k = np.where(cumulative_variance >= variance_threshold)[0][0] + 1\n", + " components.append(optimal_k)\n", + " pca_final = PCA(n_components=optimal_k, random_state=11)\n", + "\n", + " # --- Enable PCA ---\n", + " #pca_final = PCA(n_components=1111, random_state=11)\n", + " X_train = pca_final.fit_transform(X_train)\n", + " X_test = pca_final.transform(X_test)\n", + "\n", + " # Lasso Regularization (best so far)\n", + " clf = LogisticRegression(solver=\"saga\", class_weight=\"balanced\", penalty='l1', C=0.0025, max_iter=500, n_jobs=-1, random_state=11)\n", + "\n", + " # Ridge Regularization\n", + " #clf = LogisticRegression(solver=\"saga\", class_weight=\"balanced\", penalty='l2', C=0.000025, max_iter=500, n_jobs=-1, random_state=11)\n", "\n", - " #clf = RandomForestClassifier(class_weight=\"balanced\")\n", - " #clf = LogisticRegression(class_weight=\"balanced\", penalty=\"l2\", max_iter=10000, n_jobs=-1)\n", - " clf = MLPClassifier(alpha=5)\n", " clf.fit(X_train, y_train)\n", "\n", " # --- Training AUC Calculation ---\n", @@ -663,18 +610,85 @@ "mean_auc_test = np.mean(test_auc_scores)\n", "std_auc_test = np.std(test_auc_scores)\n", "\n", + "# Mean PCA Components\n", + "mean_components = np.mean(components)\n", + "std_components = np.std(components)\n", + "\n", "# --- Overfitting Check Section ---\n", "print(\"\\n--- Overfitting Check ---\")\n", "mean_train_auc = np.mean(train_auc_scores)\n", "std_train_auc = np.std(train_auc_scores)\n", "\n", - "print(\n", - " f\"Average Training AUC across folds: {mean_train_auc:.4f} (+/- {std_train_auc:.4f})\"\n", - ")\n", + "print(f\"PCA Variance Kept: {variance_threshold*100}% with {mean_components:.4f} (+/- {std_components:.4f}) components\")\n", + "\n", + "print(f\"Average Training AUC across folds: {mean_train_auc:.4f} (+/- {std_train_auc:.4f})\")\n", "print(\n", " f\"Average Test (Validation) AUC across folds: {mean_auc_test:.4f} (+/- {std_auc_test:.4f})\"\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "9ec60245", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PCA Variance Kept: 95.0% with 1024 components\n", + "Train AUC = 0.7421, Test AUC = 0.7703\n" + ] + } + ], + "source": [ + "################################\n", + "# --- BP3 Final Evaluation --- #\n", + "################################\n", + "\n", + "train_df = bp3_train_df.to_pandas()\n", + "X_train = train_df[agg_features]\n", + "y_train = train_df[\"epitope\"]\n", + "\n", + "test_df = bp3_test_df.to_pandas()\n", + "X_test = test_df[agg_features]\n", + "y_test = test_df[\"epitope\"]\n", + "\n", + "# --- Scale Features (Required for PCA) ---\n", + "scaler = StandardScaler() \n", + "scaler.fit(X_train) \n", + "X_train = scaler.transform(X_train) \n", + "X_test = scaler.transform(X_test) \n", + "\n", + "# --- PCA ---\n", + "pca = PCA(n_components=None, random_state=11) # n_components=None keeps all 1280 components\n", + "pca.fit(X_train)\n", + "cumulative_variance = np.cumsum(pca.explained_variance_ratio_)\n", + "variance_threshold = 0.95\n", + "optimal_k = np.where(cumulative_variance >= variance_threshold)[0][0] + 1\n", + "components.append(optimal_k)\n", + "pca_final = PCA(n_components=optimal_k, random_state=11)\n", + "X_train = pca_final.fit_transform(X_train)\n", + "X_test = pca_final.transform(X_test)\n", + "\n", + "# --- Fit Model ---\n", + "clf = LogisticRegression(solver=\"saga\", class_weight=\"balanced\", penalty='l1', C=0.0025, max_iter=500, n_jobs=-1, random_state=11)\n", + "clf.fit(X_train, y_train)\n", + "\n", + "# --- Training AUC Calculation ---\n", + "y_train_proba = clf.predict_proba(X_train)[:, 1]\n", + "train_auc = roc_auc_score(y_train, y_train_proba)\n", + "train_auc_scores.append(train_auc)\n", + "\n", + "# --- Test AUC Calculation ---\n", + "y_test_proba = clf.predict_proba(X_test)[:, 1]\n", + "test_auc = roc_auc_score(y_test, y_test_proba)\n", + "test_auc_scores.append(test_auc)\n", + "\n", + "print(f\"PCA Variance Kept: {variance_threshold*100}% with {optimal_k} components\")\n", + "print(f\"Train AUC = {train_auc:.4f}, Test AUC = {test_auc:.4f}\")" + ] } ], "metadata": {