diff --git a/.gitignore b/.gitignore index dd99524..e21e2e1 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,9 @@ wheels/ # OS dependent files .DS_Store + +# unrelated files +.coverage +*.ipynb +!examples/*.ipynb + diff --git a/Makefile b/Makefile index 05f5a18..b2d4d6a 100644 --- a/Makefile +++ b/Makefile @@ -13,12 +13,18 @@ test: ## Run tests without regression uv run ruff check uv run pytest tests -m "not slow" +fix-and-test: + $(MAKE) ruff-fix + $(MAKE) test-all + test-all: - uv run ruff check uv run pytest tests test-coverage: ## Run tests and calculate test coverage uv run pytest --cov=bbttest tests +ruff-fix: + uv run ruff check --fix + help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/README.md b/README.md index a8f4d74..0076b35 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ pip install git+https://github.com/scikit-fingerprints/bbt-test To generate results from BBT model you need to first fit posterior MCMC samples. BBT-Test supports unpaired (1 metric readout per algorithm per dataset) and paired (multiple metric readouts per algorithm per dataset) data. +For hands-on example of using the package, check out our example notebook: [01_simple_bbt_comparison.ipynb](examples/01_simple_bbt_comparison.ipynb). + ### Unpaired posterior fitting Start with single dataframe in shape (n_datasets, n_algorithms), optionally this dataframe can contain a dataset column: diff --git a/bbttest/bbt/_types.py b/bbttest/bbt/_types.py index 9edc5ba..deac7ea 100644 --- a/bbttest/bbt/_types.py +++ b/bbttest/bbt/_types.py @@ -20,6 +20,8 @@ "in_rope", "weak_interpretation", "strong_interpretation", + "weak_interpretation_raw", + "strong_interpretation_raw", ] ALL_PROPERTIES_COLUMNS: list[ReportedPropertyColumnType] = list( diff --git a/bbttest/bbt/_utils.py b/bbttest/bbt/_utils.py index c224112..dc120fc 100644 --- a/bbttest/bbt/_utils.py +++ b/bbttest/bbt/_utils.py @@ -31,6 +31,9 @@ def _validate_params(func): @wraps(func) def wrapper(*args, **kwargs): + for kwarg in kwargs: + if kwarg not in sig.parameters: + raise ValueError(f"Unexpected keyword argument '{kwarg}'") bound_args = sig.bind(*args, **kwargs) bound_args.apply_defaults() for name, value in bound_args.arguments.items(): diff --git a/bbttest/bbt/py_bbt.py b/bbttest/bbt/py_bbt.py index db6126a..c800c17 100644 --- a/bbttest/bbt/py_bbt.py +++ b/bbttest/bbt/py_bbt.py @@ -4,7 +4,12 @@ import numpy as np import pandas as pd -from ._types import HyperPriorType, ReportedPropertyColumnType, TieSolverType +from ._types import ( + ALL_PROPERTIES_COLUMNS, + HyperPriorType, + ReportedPropertyColumnType, + TieSolverType, +) from ._utils import _validate_params from .alg import _construct_win_table, _get_pwin, _hdi from .model import _mcmcbbt_pymc @@ -79,6 +84,8 @@ class PyBBT: `_ """ + ALL_PROPERTIES_COLUMNS = ALL_PROPERTIES_COLUMNS + _WEAK_INTERPRETATION_THRESHOLD = 0.95 _STRONG_INTERPRETATION_BETTER_THRESHOLD = 0.70 _STRONG_INTERPRETATION_EQUAL_THRESHOLD = 0.55 @@ -231,7 +238,7 @@ def posterior_table( out_table["strong_interpretation_raw"] = np.where( out_table["mean"] > self._STRONG_INTERPRETATION_BETTER_THRESHOLD, - out_table["left_model"] + ">", + ">", np.where( out_table["mean"] <= self._STRONG_INTERPRETATION_EQUAL_THRESHOLD, "=", @@ -262,6 +269,7 @@ def posterior_table( ) return out_table[["pair", *columns]] + @_validate_params def rope_comparison_control_table( self, rope_values: Sequence[tuple[float, float]], @@ -306,8 +314,9 @@ def rope_comparison_control_table( selected_models=selected_models, columns=( "left_model", - "weak_interpretation", - "strong_interpretation", + "right_model", + "weak_interpretation_raw", + "strong_interpretation_raw", ), ) better_models: list[str] = [] @@ -316,18 +325,29 @@ def rope_comparison_control_table( unknown_models: list[str] = [] for _, row in posterior_df.iterrows(): interpretation_col = ( - "weak_interpretation" + "weak_interpretation_raw" if interpretation == "weak" - else "strong_interpretation" + else "strong_interpretation_raw" ) - if row[interpretation_col] == f"{row['left_model']} better": - better_models.append(row["left_model"]) - elif row[interpretation_col] == "Equivalent": - equivalent_models.append(row["left_model"]) - elif row[interpretation_col] == "Unknown": - unknown_models.append(row["left_model"]) + non_control_model = ( + row["right_model"] + if row["left_model"] == control_model + else row["left_model"] + ) + + if row[interpretation_col] == ">": + if row["left_model"] == control_model: + worse_models.append(non_control_model) + else: + better_models.append(non_control_model) + elif row[interpretation_col] == "=": + equivalent_models.append(non_control_model) + elif row[interpretation_col] == "?": + unknown_models.append(non_control_model) else: - worse_models.append(row["left_model"]) + raise RuntimeError( + f"Unexpected interpretation value {row[interpretation_col]} in row {row['pair']}. Please report this as a bug." + ) if not return_as_array: better_models_str = join_char.join(better_models) equivalent_models_str = join_char.join(equivalent_models) diff --git a/datasets/benchmarking_mol.csv b/datasets/benchmarking_mol.csv new file mode 100644 index 0000000..9f8d95e --- /dev/null +++ b/datasets/benchmarking_mol.csv @@ -0,0 +1,26 @@ +dataset,AtomPair_count,CDDD,CLAMP,ChemBERTa-10M-MTR,ChemFM-3B,ChemGPT-4.7M,ECFP_count,GEM,GNN-GraphCL-sum,GraphFP-CP,GraphMVP_CP-max,MoLFormer-XL-both-10pct,SELFormer-Lite,SimSon,TT,chemformer_mask,coati,grover_large,mat_masking_2M,mol2vec,mol_r_tag_1024,molbert,rmat_4M,unimolv1,unimolv2 +AMES,0.8145254459652627,0.8355244864790773,0.8459975719125106,0.8426452446689773,0.8194070767001508,0.7335193561651882,0.8481701227750689,0.7126309502829505,0.7789569014470619,0.5807867786719928,0.8080058352425151,0.8166421899782647,0.7205085276782393,0.7463177269968083,0.8370175644715973,0.8017074937829212,0.8062239323268519,0.8149405706005599,0.8176819597015802,0.8063541483091503,0.8127807476159705,0.8539838258043041,0.8554191388121953,0.7725518416260353,0.8058469913254617 +Bioavailability_Ma,0.7120053209178583,0.6687728633189225,0.641835716661124,0.744595942800133,0.6704356501496508,0.695710009976721,0.6910542068506818,0.7005320917858331,0.6769205187894911,0.6614566012637179,0.648819421350183,0.7223145992683738,0.6696042567342868,0.6311938809444629,0.6915530428999002,0.7031925507149982,0.6217159960093117,0.8839374792151646,0.6867309610907882,0.7263052876621217,0.5874625872963086,0.6835716661124045,0.7283006318589956,0.6601263717991352,0.7653807781842367 +CYP1A2_Veith,0.9288641730543384,0.923714467918524,0.9507841998257334,0.9261630403212568,0.9038250262031344,0.8679914508328177,0.9278665597494602,0.8753829446008915,0.8971167081286542,0.7630870449178547,0.8894167750571418,0.9142961774994002,0.8789724582959755,0.8582107363396432,0.9151504628168052,0.9044431676116632,0.8970137897940371,0.8569611941052419,0.9269052519920696,0.9133440250539848,0.9013234161310284,0.9293585599009964,0.9308565583603784,0.901898622283398,0.9156012830064784 +CYP2C19_Veith,0.8581617924425365,0.8721135770601274,0.9223048361905296,0.8712846316196391,0.8536529402778946,0.7899815755263134,0.8744442262188677,0.770266453429103,0.8349491378530968,0.6038650867134916,0.8503511873030125,0.8550894880739722,0.7905159164383093,0.7917155336082976,0.8512889337810075,0.8396796448534616,0.8488420015649891,0.7794356686587545,0.8746050896439495,0.8515386461908725,0.8513609482213056,0.8851874713578931,0.8807565568992016,0.8265592996829495,0.8631962564961079 +CYP2C9_Substrate_CarbonMangels,0.5963103635377102,0.6551817688551276,0.6977753662506783,0.692078133478025,0.667932718393923,0.6524688008681498,0.6272381985892567,0.5973955507325014,0.6587086272381986,0.5879001627780792,0.6740368963646228,0.6250678241996744,0.6367335865436788,0.5755561584373305,0.5826098752034725,0.6405317417254477,0.6724091155724362,0.5721649484536082,0.6245252306022789,0.644872490504612,0.6049918610960392,0.6466359196961478,0.6663049376017363,0.5949538795442213,0.6793271839392295 +CYP2C9_Veith,0.8717376922652849,0.8812034842053036,0.9155552755310185,0.8796941081962307,0.8565025635371301,0.8259968654753372,0.8819434052908887,0.8048740701046527,0.8287907665409181,0.6694722500059067,0.8384621139927386,0.8659746560292346,0.8167854583257859,0.8095870775676718,0.8695888102194955,0.863039386326227,0.8663519015853764,0.8590219968024698,0.876420183818607,0.8636198246871382,0.8579658667590747,0.8845333259826892,0.8793838060060013,0.8477920502784055,0.8649319146590219 +CYP2D6_Substrate_CarbonMangels,0.7936046511627907,0.8100353892821032,0.8431496461071789,0.8145854398382204,0.7834934277047523,0.7296511627906976,0.804726996966633,0.7669362992922144,0.8211577350859454,0.4970930232558139,0.7930990899898888,0.8303842264914054,0.7306622851365014,0.7831142568250758,0.7832406471183013,0.7906976744186046,0.8312689585439839,0.7520222446916077,0.8303842264914054,0.8120576339737109,0.782355915065723,0.8224216380182002,0.8162285136501517,0.7554347826086957,0.7960060667340749 +CYP2D6_Veith,0.8584982043680441,0.8681169595964715,0.8934522769316356,0.8601299390219312,0.8567858136286035,0.7904575578371347,0.872068073736207,0.7947070531932723,0.8134802170269133,0.5143666067260971,0.8281027517167855,0.8552481777411953,0.8198453264451658,0.758300079932249,0.847481927349548,0.8301848161816788,0.8464835497777305,0.8185724079667531,0.8598931410271655,0.842437302819137,0.8385404764292931,0.8682570736938108,0.8752777723202793,0.8240435791034559,0.8543082241287849 +CYP3A4_Substrate_CarbonMangels,0.6839963833634719,0.6589059674502713,0.64376130198915,0.6583408679927667,0.6338155515370705,0.6499773960216997,0.6637658227848102,0.6177667269439421,0.6317811934900542,0.523508137432188,0.6778933092224231,0.6306509945750453,0.66873869801085,0.6353978300180831,0.589624773960217,0.6421790235081375,0.6017179023508138,0.5752712477396021,0.6808318264014466,0.6348327305605785,0.6192359855334539,0.6772151898734177,0.6462477396021701,0.6625226039783002,0.60623869801085 +CYP3A4_Veith,0.8669962030469048,0.8633300413060452,0.903643473444684,0.8622570183575008,0.8446845993180165,0.8035443789746224,0.87512161594053,0.7692566913791147,0.7938341218628425,0.6597229358655252,0.831473004264065,0.8499916587146413,0.7942605283703799,0.7804099908579513,0.8613992005712112,0.8433620051115396,0.8393291604663111,0.8504447573353264,0.8771281955464209,0.8439282115616888,0.8600512488572438,0.8658360970792156,0.8824839680495404,0.8243925876001788,0.8605303722882482 +DILI,0.9286956521739133,0.9234782608695652,0.888695652173913,0.8965217391304348,0.8860869565217392,0.8130434782608695,0.9152173913043478,0.8910869565217392,0.895,0.6552173913043479,0.9021739130434784,0.9204347826086956,0.7130434782608696,0.8843478260869565,0.9169565217391304,0.8817391304347826,0.883695652173913,0.8956521739130435,0.9352173913043478,0.9221739130434784,0.9230434782608696,0.9215217391304348,0.912608695652174,0.9221739130434782,0.9208695652173912 +HIA_Hou,0.9868312757201646,0.94320987654321,0.9728395061728394,0.9641975308641976,0.9716049382716048,0.8934156378600823,0.9497942386831276,0.920576131687243,0.8,0.6325102880658435,0.931275720164609,0.9806584362139916,0.8987654320987655,0.9238683127572016,0.9139917695473252,0.962962962962963,0.937448559670782,0.9530864197530864,0.9786008230452676,0.9925925925925928,0.9613168724279836,0.9732510288065844,0.9872427983539096,0.9386831275720164,0.9485596707818932 +PAMPA_NCATS,0.7048673705897502,0.7622971928920937,0.7482616533608034,0.7296420293587433,0.7262941024980686,0.6829770795776461,0.7644604687097605,0.6549060005150656,0.6740664434715427,0.4690703064640741,0.6934329126963689,0.7164563481843935,0.7187226371362349,0.5711048158640227,0.728920937419521,0.7227916559361318,0.7376770538243627,0.6671130569147568,0.7455060520216328,0.7203193407159412,0.6732938449652331,0.739943342776204,0.726191089363894,0.7263456090651559,0.7356682977079578 +Pgp_Broccatelli,0.903425753132498,0.91762196747534,0.9329512130098642,0.928918954945348,0.9049586776859504,0.8626699546787523,0.9277525993068516,0.8675686483604372,0.9162556651559584,0.7138762996534258,0.8774993335110637,0.9292188749666755,0.8533724340175954,0.8679018928285791,0.9264196214342842,0.8866302319381498,0.8844308184484136,0.8661690215942415,0.922354038922954,0.8943615035990402,0.9253199146894162,0.9186883497733938,0.9265529192215408,0.8856971474273527,0.8776992801919489 +SARSCoV2_3CLPro_Diamond,0.7265745007680491,0.7402457757296466,0.7379416282642088,0.7714285714285715,0.7204301075268817,0.6728110599078341,0.7331797235023042,0.7503840245775729,0.7152073732718893,0.6287250384024577,0.7516129032258064,0.7485407066052226,0.6359447004608295,0.7585253456221198,0.708141321044547,0.7268817204301076,0.7084485407066051,0.7397849462365591,0.7156682027649769,0.7247311827956988,0.7185867895545314,0.72642089093702,0.7658986175115207,0.6924731182795699,0.7353302611367126 +SARSCoV2_Vitro_Touret,0.5645802805107808,0.6276952061963575,0.6324052752773708,0.5295164329076826,0.52857441909148,0.5626962528783757,0.5938873770148628,0.6161817039983253,0.5480427046263345,0.5545321331379527,0.5520200962947457,0.5869792756960435,0.4967552857441909,0.629997906635964,0.5688716767845928,0.55714883818296,0.4975926313585932,0.486079129160561,0.6083315888633033,0.5449026585723257,0.4785430186309399,0.6133556625497174,0.5830018840276324,0.517479589700649,0.5534854511199497 +hERG,0.8435415403274712,0.8770466949666464,0.9003941782898727,0.8424802910855064,0.751516070345664,0.7612189205579138,0.8045785324439054,0.7328684050939963,0.7152819890842934,0.6704063068526379,0.8154942389326865,0.8308065494238932,0.7897210430563978,0.747725894481504,0.8423286840509401,0.832929047907823,0.8606731352334749,0.9530018192844149,0.8211036992116435,0.8233778047301394,0.8521831412977562,0.8573377804730139,0.8365676167374165,0.8091267434808975,0.78077622801698 +hERG_Karim,0.8707022332602238,0.8634323749475452,0.8596954388812743,0.8684191029915945,0.8326721774356292,0.7821298268757524,0.8783917297552222,0.7649976419540876,0.8106986726938284,0.5694962849028115,0.846036188402616,0.8413800843810004,0.7679469962144103,0.7809715370312728,0.8624753345080254,0.8289131260501322,0.8316733943310697,0.6962339270006486,0.8532706622045324,0.8538285211320167,0.8334478722574932,0.8761459190877873,0.8699798916459933,0.7781623386065469,0.8066255837753172 +ogbg-molbace,0.8709789601808381,0.8112502173535037,0.8342027473482871,0.8262910798122066,0.864545296470179,0.789340984176665,0.8596765779864372,0.7625630325160841,0.7815162580420796,0.6442357850808555,0.8177708224656582,0.8365501651886628,0.7723874108850635,0.7122239610502521,0.8730655538167275,0.8125543383759346,0.8334202747348287,0.8535037384802643,0.838810641627543,0.823074247956877,0.8135106937923839,0.8194227090940706,0.8302903842809947,0.8036863154234045,0.8258563728047296 +ogbg-molbbbp,0.7381365740740741,0.7231867283950617,0.7009066358024691,0.7208236882716049,0.6943479938271606,0.6887056327160495,0.7201003086419753,0.6756847993827161,0.7038966049382716,0.5511188271604938,0.6868248456790123,0.714940200617284,0.646701388888889,0.7098765432098765,0.679783950617284,0.6837384259259259,0.6879822530864199,0.7307098765432098,0.7261766975308642,0.7304205246913581,0.7055844907407406,0.7426215277777777,0.7265142746913581,0.6687885802469136,0.6610725308641976 +ogbg-molclintox,0.685350415319918,0.9361415215653563,0.761158030097661,0.7393515622284781,0.918466096687867,0.7579775310186634,0.7629661488200743,0.6992831821499322,0.6730128940326,0.5836126750773294,0.7893932679943002,0.8836496020574844,0.8100345810308275,0.7490446773016369,0.6624643763250269,0.7107483578354709,0.8784537587321448,0.6691351266812637,0.8898203176589163,0.8933166510270044,0.8905540784763494,0.871610537656831,0.8641139262503041,0.9166136655892676,0.8237766308692177 +ogbg-molhiv,0.8114312752274087,0.8144556673554917,0.93112651847274,0.7870854979818073,0.7817841209756851,0.7468844221105528,0.7893238571621699,0.7427734827986084,0.7545863935059914,0.5222922303826827,0.7017665249323541,0.7801743950250101,0.7613857750289911,0.7546559721685351,0.8008690781977249,0.7757971378358021,0.7881602580196605,0.7304640877575852,0.7759180517974488,0.7801533433886375,0.7757557015848473,0.7794491689215307,0.7774246231155778,0.771157713578528,0.7609693778792924 +ogbg-molmuv,0.7847982062603639,0.7505515332745804,0.9538875049933596,0.7610820032921798,0.6592362259713641,0.6078312235391929,0.7394131645651287,0.6466112111932127,0.6648343348475036,0.5777699271769144,0.6569077928851594,0.7981076630230599,0.597234324900685,0.6090277349727774,0.7801389611725246,0.7427803814620861,0.7555934977158004,0.6485992802102507,0.7541811833214094,0.784767265466513,0.7583653622561716,0.7660191429147382,0.7735443844162032,0.7465818892952342,0.7388285736382725 +ogbg-molsider,0.6869266781940169,0.639254436731632,0.6921749849321256,0.6917469549305515,0.6728463472551298,0.6329586662880131,0.6863617997542489,0.652717459185894,0.6409775235393228,0.5307502431492865,0.6729195664622901,0.6655429178829346,0.5950587662813914,0.5991447413702715,0.6808944388372183,0.6298043585963656,0.6395070055414712,0.5790117403415879,0.655327943527307,0.6825739326709382,0.6628242686972917,0.6399717638635757,0.6873461835744622,0.6144490489451263,0.6218491127682689 +ogbg-moltox21,0.7793677101920942,0.7833803433436679,0.8320623962565512,0.7887222679343062,0.7582697383714505,0.7083346524640269,0.7817977043382754,0.7198347490077192,0.7551421822081971,0.5520488086890106,0.759697908862892,0.7502529731163675,0.7122280489638442,0.7210298631507531,0.7561706408668748,0.7466120217303461,0.748534989937518,0.689452274900356,0.7703849859168921,0.7778255851570117,0.7703945599045738,0.7638956942207354,0.7797656511703458,0.749669751655326,0.7250482371061816 diff --git a/examples/01_simple_bbt_comparison.ipynb b/examples/01_simple_bbt_comparison.ipynb new file mode 100644 index 0000000..13375a5 --- /dev/null +++ b/examples/01_simple_bbt_comparison.ipynb @@ -0,0 +1,1381 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "53a3a983", + "metadata": {}, + "source": [ + "# Analyzing multi-dataset multi-algorithm evaluation with `bbt-test`\n", + "\n", + "---\n", + "\n", + "## Introduction\n", + "\n", + "`bbt-test` enables researchers to compare their evaluation results using Bayesian Testing. In contrast to NHST (Null Hypothesis Significance Testing), Bayesian Testing:\n", + "\n", + "### Provides actual probability of one algorithm being better than another\n", + "\n", + "The Bayesian Bradley-Terry (BBT) model provides the actual probability of algorithm $A_1$ being better than algorithm $A_2$ given the observed data $D$:\n", + "\n", + "$$ P(A_1 > A_2 | D) $$\n", + "\n", + "while NHST reports the probability of a certain statistic $t(D)$ being larger than a critical value $\\tau$, given the null hypothesis $H_0$ that one algorithm is no better than another:\n", + "\n", + "$$ P(t(D) > \\tau | A_1 \\leq A_2) $$\n", + "\n", + "### Can differentiate between \"no difference\" and \"inconclusive\"\n", + "\n", + "BBT result interpretations fall into 4 categories:\n", + "- $A_1$ is better than $A_2$\n", + "- $A_2$ is better than $A_1$\n", + "- No difference between $A_1$ and $A_2$\n", + "- Inconclusive results\n", + "\n", + "NHST can only differentiate between \"significant difference\" and \"no significant difference\".\n", + "\n", + "### Does not rely on an arbitrary significance level\n", + "\n", + "NHST relies on an arbitrary significance level $\\alpha$ (e.g., 0.05) to determine whether results are statistically significant. Bayesian Testing instead relies on ROPE (Region of Practical Equivalence) to determine whether results are practically significant.\n", + "\n", + "If you are interested in learning more about Bayesian Testing and the Bradley-Terry model, we recommend the following articles:\n", + "\n", + "[Wainer, Jacques. \"A Bayesian Bradley-Terry model to compare multiple ML algorithms on multiple data sets.\" Journal of Machine Learning Research 24.341 (2023): 1-34.](https://www.jmlr.org/papers/volume24/22-0907/22-0907.pdf)\n", + "\n", + "[Benavoli, Alessio, et al. \"Time for a change: a tutorial for comparing multiple classifiers through Bayesian analysis.\" Journal of Machine Learning Research 18.77 (2017): 1-36.](https://www.jmlr.org/papers/volume18/16-305/16-305.pdf)\n", + "\n", + "## Practical guide\n", + "\n", + "In this notebook we will show how to use `bbt-test` to analyze the results of a comparison of 25 models on 25 datasets, taken from [Benchmarking pretrained molecular embedding models for molecular representation learning](https://arxiv.org/pdf/2508.06199?), reproducing tables from the original article.\n", + "\n", + "We will start by loading and preparing the data, then fit the BBT model and analyze the results, and finally compare how different ROPE values affect the interpretation.\n", + "\n", + "Steps:\n", + "1. [Loading the data](#loading-the-data)\n", + "2. [Fitting the BBT model](#fitting-the-bbt-model)\n", + "3. [Analyzing the results given the ROPE value](#analyzing-the-results-given-the-rope-value)\n", + "4. [Comparing the results across different ROPE values](#comparing-the-results-across-different-rope-values)\n", + "5. [Weak vs strong interpretation](#weak-vs-strong-interpretation)" + ] + }, + { + "cell_type": "markdown", + "id": "a23ee3f7", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "## Necessary imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b6a15859", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "import bbttest" + ] + }, + { + "cell_type": "markdown", + "id": "a6492d9f", + "metadata": {}, + "source": [ + "## Loading the data\n", + "\n", + "`PyBBT` expects data in wide format, where each row corresponds to a dataset, columns correspond to algorithms, and values correspond to the algorithm's performance on that dataset. An additional column indicating the dataset name can be included, but it is not required when each dataset has only one run." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "308e8c0f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetAtomPair_countCDDDCLAMPChemBERTa-10M-MTRChemFM-3BChemGPT-4.7MECFP_countGEMGNN-GraphCL-sum...chemformer_maskcoatigrover_largemat_masking_2Mmol2vecmol_r_tag_1024molbertrmat_4Munimolv1unimolv2
0AMES0.8145250.8355240.8459980.8426450.8194070.7335190.8481700.7126310.778957...0.8017070.8062240.8149410.8176820.8063540.8127810.8539840.8554190.7725520.805847
1Bioavailability_Ma0.7120050.6687730.6418360.7445960.6704360.6957100.6910540.7005320.676921...0.7031930.6217160.8839370.6867310.7263050.5874630.6835720.7283010.6601260.765381
2CYP1A2_Veith0.9288640.9237140.9507840.9261630.9038250.8679910.9278670.8753830.897117...0.9044430.8970140.8569610.9269050.9133440.9013230.9293590.9308570.9018990.915601
3CYP2C19_Veith0.8581620.8721140.9223050.8712850.8536530.7899820.8744440.7702660.834949...0.8396800.8488420.7794360.8746050.8515390.8513610.8851870.8807570.8265590.863196
4CYP2C9_Substrate_CarbonMangels0.5963100.6551820.6977750.6920780.6679330.6524690.6272380.5973960.658709...0.6405320.6724090.5721650.6245250.6448720.6049920.6466360.6663050.5949540.679327
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " dataset AtomPair_count CDDD CLAMP \\\n", + "0 AMES 0.814525 0.835524 0.845998 \n", + "1 Bioavailability_Ma 0.712005 0.668773 0.641836 \n", + "2 CYP1A2_Veith 0.928864 0.923714 0.950784 \n", + "3 CYP2C19_Veith 0.858162 0.872114 0.922305 \n", + "4 CYP2C9_Substrate_CarbonMangels 0.596310 0.655182 0.697775 \n", + "\n", + " ChemBERTa-10M-MTR ChemFM-3B ChemGPT-4.7M ECFP_count GEM \\\n", + "0 0.842645 0.819407 0.733519 0.848170 0.712631 \n", + "1 0.744596 0.670436 0.695710 0.691054 0.700532 \n", + "2 0.926163 0.903825 0.867991 0.927867 0.875383 \n", + "3 0.871285 0.853653 0.789982 0.874444 0.770266 \n", + "4 0.692078 0.667933 0.652469 0.627238 0.597396 \n", + "\n", + " GNN-GraphCL-sum ... chemformer_mask coati grover_large \\\n", + "0 0.778957 ... 0.801707 0.806224 0.814941 \n", + "1 0.676921 ... 0.703193 0.621716 0.883937 \n", + "2 0.897117 ... 0.904443 0.897014 0.856961 \n", + "3 0.834949 ... 0.839680 0.848842 0.779436 \n", + "4 0.658709 ... 0.640532 0.672409 0.572165 \n", + "\n", + " mat_masking_2M mol2vec mol_r_tag_1024 molbert rmat_4M unimolv1 \\\n", + "0 0.817682 0.806354 0.812781 0.853984 0.855419 0.772552 \n", + "1 0.686731 0.726305 0.587463 0.683572 0.728301 0.660126 \n", + "2 0.926905 0.913344 0.901323 0.929359 0.930857 0.901899 \n", + "3 0.874605 0.851539 0.851361 0.885187 0.880757 0.826559 \n", + "4 0.624525 0.644872 0.604992 0.646636 0.666305 0.594954 \n", + "\n", + " unimolv2 \n", + "0 0.805847 \n", + "1 0.765381 \n", + "2 0.915601 \n", + "3 0.863196 \n", + "4 0.679327 \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"../datasets/benchmarking_mol.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "c52f0f09", + "metadata": {}, + "source": [ + "## Fitting the BBT model\n", + "\n", + "With the data loaded, we can set up the BBT model and fit the posterior distribution. Once fitted, the model object stores the sampling results, allowing multiple analyses to be run without re-fitting.\n", + "\n", + "We set `local_rope_value` to $0.01$, meaning that a difference of $0.01$ or less in the reported metric is considered practically negligible. If multiple runs per dataset are provided, PyBBT will calculate this threshold automatically using in-dataset variance.\n", + "\n", + "We use `spread` as the `tie_solver` (the default). The authors of the BBT model recommend any of `spread`, `add`, or `forget`, noting similar performance across all three. We also use the default prior distribution — `log_normal` with scale $1.0$, corresponding to $\\sigma \\sim \\text{LogNormal}(0, 1)$.\n", + "\n", + "Finally, we set `maximize=True` to indicate that higher values of the reported metric are better." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "04f4b768", + "metadata": {}, + "outputs": [], + "source": [ + "model = bbttest.PyBBT(\n", + " local_rope_value=0.01,\n", + " hyper_prior=\"log_normal\",\n", + " tie_solver=\"spread\",\n", + " maximize=True,\n", + " scale=1.0,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f8c3bd1f", + "metadata": {}, + "source": [ + "Next, we fit the model via MCMC sampling to obtain the posterior distribution. **Remember** — if the dataframe contains a dataset column, it must be specified via `dataset_col` in `fit()`. Otherwise, PyBBT will treat it as just another algorithm, leading to errors or incorrect results.\n", + "\n", + "By default, PyBBT uses 4 chains with 1000 draws each (PyMC defaults)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "45c21e4c", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cbfb06390f72471982dab8313ac0033d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Constructing win table: 0%| | 0/300 [00:00NUTS: [sigma, beta]\n", + ">Metropolis: [win1_rep]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "862e8135ca2b450fb3c34d062917bf2c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 13 seconds.\n",
+      "The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details\n",
+      "The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       ""
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(\n",
+    "    data=df,\n",
+    "    dataset_col=\"dataset\",\n",
+    "    random_seed=42,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31a81d92",
+   "metadata": {},
+   "source": [
+    "Once the model is fitted, we can run multiple analyses on the results."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "394eac1c",
+   "metadata": {},
+   "source": [
+    "## Analyzing the results given the ROPE value\n",
+    "\n",
+    "We will perform the analysis using the same ROPE value as in the original article ($35\\% - 65\\%$)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "833130bc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pairmeandeltaabove_50in_ropeweak_interpretation
0CLAMP > rmat_4M0.550.110.911.00Equivalent
1CLAMP > molbert0.600.111.000.95CLAMP better
2CLAMP > CDDD0.610.101.000.87CLAMP better
3CLAMP > ChemBERTa-10M-MTR0.620.111.000.84CLAMP better
4CLAMP > ECFP_count0.620.111.000.83CLAMP better
.....................
295ChemGPT-4.7M > SELFormer-Lite0.560.120.940.99Equivalent
296ChemGPT-4.7M > GraphFP-CP0.870.071.000.00ChemGPT-4.7M better
297SimSon > SELFormer-Lite0.560.120.931.00Equivalent
298SimSon > GraphFP-CP0.870.071.000.00SimSon better
299SELFormer-Lite > GraphFP-CP0.850.091.000.00SELFormer-Lite better
\n", + "

300 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " pair mean delta above_50 in_rope \\\n", + "0 CLAMP > rmat_4M 0.55 0.11 0.91 1.00 \n", + "1 CLAMP > molbert 0.60 0.11 1.00 0.95 \n", + "2 CLAMP > CDDD 0.61 0.10 1.00 0.87 \n", + "3 CLAMP > ChemBERTa-10M-MTR 0.62 0.11 1.00 0.84 \n", + "4 CLAMP > ECFP_count 0.62 0.11 1.00 0.83 \n", + ".. ... ... ... ... ... \n", + "295 ChemGPT-4.7M > SELFormer-Lite 0.56 0.12 0.94 0.99 \n", + "296 ChemGPT-4.7M > GraphFP-CP 0.87 0.07 1.00 0.00 \n", + "297 SimSon > SELFormer-Lite 0.56 0.12 0.93 1.00 \n", + "298 SimSon > GraphFP-CP 0.87 0.07 1.00 0.00 \n", + "299 SELFormer-Lite > GraphFP-CP 0.85 0.09 1.00 0.00 \n", + "\n", + " weak_interpretation \n", + "0 Equivalent \n", + "1 CLAMP better \n", + "2 CLAMP better \n", + "3 CLAMP better \n", + "4 CLAMP better \n", + ".. ... \n", + "295 Equivalent \n", + "296 ChemGPT-4.7M better \n", + "297 Equivalent \n", + "298 SimSon better \n", + "299 SELFormer-Lite better \n", + "\n", + "[300 rows x 6 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.posterior_table(rope_value=(0.35, 0.65))" + ] + }, + { + "cell_type": "markdown", + "id": "d03d68a3", + "metadata": {}, + "source": [ + "In this case, the result table contains 300 rows, corresponding to all pairwise comparisons. To make it more readable, we can filter the results to include only comparisons against a specific model (called control)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "33410e75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pairmeandeltaabove_50in_ropeweak_interpretation
0CLAMP > ECFP_count0.620.111.000.83CLAMP better
1rmat_4M > ECFP_count0.570.110.980.99Equivalent
2molbert > ECFP_count0.520.110.741.00Equivalent
3CDDD > ECFP_count0.500.110.551.00Equivalent
4ChemBERTa-10M-MTR > ECFP_count0.500.100.521.00Equivalent
5ECFP_count > mat_masking_2M0.510.110.631.00Equivalent
6ECFP_count > AtomPair_count0.550.110.941.00Equivalent
7ECFP_count > MoLFormer-XL-both-10pct0.600.101.000.93ECFP_count better
8ECFP_count > mol2vec0.640.101.000.66ECFP_count better
9ECFP_count > TT0.700.091.000.03ECFP_count better
10ECFP_count > ChemFM-3B0.740.081.000.00ECFP_count better
11ECFP_count > unimolv20.760.081.000.00ECFP_count better
12ECFP_count > mol_r_tag_10240.760.081.000.00ECFP_count better
13ECFP_count > coati0.780.071.000.00ECFP_count better
14ECFP_count > GraphMVP_CP-max0.800.071.000.00ECFP_count better
15ECFP_count > chemformer_mask0.810.071.000.00ECFP_count better
16ECFP_count > unimolv10.860.051.000.00ECFP_count better
17ECFP_count > GNN-GraphCL-sum0.880.051.000.00ECFP_count better
18ECFP_count > grover_large0.880.051.000.00ECFP_count better
19ECFP_count > GEM0.920.031.000.00ECFP_count better
20ECFP_count > ChemGPT-4.7M0.920.031.000.00ECFP_count better
21ECFP_count > SimSon0.920.031.000.00ECFP_count better
22ECFP_count > SELFormer-Lite0.940.031.000.00ECFP_count better
23ECFP_count > GraphFP-CP0.990.011.000.00ECFP_count better
\n", + "
" + ], + "text/plain": [ + " pair mean delta above_50 in_rope \\\n", + "0 CLAMP > ECFP_count 0.62 0.11 1.00 0.83 \n", + "1 rmat_4M > ECFP_count 0.57 0.11 0.98 0.99 \n", + "2 molbert > ECFP_count 0.52 0.11 0.74 1.00 \n", + "3 CDDD > ECFP_count 0.50 0.11 0.55 1.00 \n", + "4 ChemBERTa-10M-MTR > ECFP_count 0.50 0.10 0.52 1.00 \n", + "5 ECFP_count > mat_masking_2M 0.51 0.11 0.63 1.00 \n", + "6 ECFP_count > AtomPair_count 0.55 0.11 0.94 1.00 \n", + "7 ECFP_count > MoLFormer-XL-both-10pct 0.60 0.10 1.00 0.93 \n", + "8 ECFP_count > mol2vec 0.64 0.10 1.00 0.66 \n", + "9 ECFP_count > TT 0.70 0.09 1.00 0.03 \n", + "10 ECFP_count > ChemFM-3B 0.74 0.08 1.00 0.00 \n", + "11 ECFP_count > unimolv2 0.76 0.08 1.00 0.00 \n", + "12 ECFP_count > mol_r_tag_1024 0.76 0.08 1.00 0.00 \n", + "13 ECFP_count > coati 0.78 0.07 1.00 0.00 \n", + "14 ECFP_count > GraphMVP_CP-max 0.80 0.07 1.00 0.00 \n", + "15 ECFP_count > chemformer_mask 0.81 0.07 1.00 0.00 \n", + "16 ECFP_count > unimolv1 0.86 0.05 1.00 0.00 \n", + "17 ECFP_count > GNN-GraphCL-sum 0.88 0.05 1.00 0.00 \n", + "18 ECFP_count > grover_large 0.88 0.05 1.00 0.00 \n", + "19 ECFP_count > GEM 0.92 0.03 1.00 0.00 \n", + "20 ECFP_count > ChemGPT-4.7M 0.92 0.03 1.00 0.00 \n", + "21 ECFP_count > SimSon 0.92 0.03 1.00 0.00 \n", + "22 ECFP_count > SELFormer-Lite 0.94 0.03 1.00 0.00 \n", + "23 ECFP_count > GraphFP-CP 0.99 0.01 1.00 0.00 \n", + "\n", + " weak_interpretation \n", + "0 CLAMP better \n", + "1 Equivalent \n", + "2 Equivalent \n", + "3 Equivalent \n", + "4 Equivalent \n", + "5 Equivalent \n", + "6 Equivalent \n", + "7 ECFP_count better \n", + "8 ECFP_count better \n", + "9 ECFP_count better \n", + "10 ECFP_count better \n", + "11 ECFP_count better \n", + "12 ECFP_count better \n", + "13 ECFP_count better \n", + "14 ECFP_count better \n", + "15 ECFP_count better \n", + "16 ECFP_count better \n", + "17 ECFP_count better \n", + "18 ECFP_count better \n", + "19 ECFP_count better \n", + "20 ECFP_count better \n", + "21 ECFP_count better \n", + "22 ECFP_count better \n", + "23 ECFP_count better " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.posterior_table(\n", + " rope_value=(0.35, 0.65),\n", + " control_model=\"ECFP_count\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "be05ca39", + "metadata": {}, + "source": [ + "We can also filter the results to include only specific list of models:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e47c3389", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pairmeandeltaabove_50in_ropeweak_interpretation
0CLAMP > ECFP_count0.620.111.00.83CLAMP better
1ECFP_count > TT0.700.091.00.03ECFP_count better
2ECFP_count > ChemFM-3B0.740.081.00.00ECFP_count better
\n", + "
" + ], + "text/plain": [ + " pair mean delta above_50 in_rope weak_interpretation\n", + "0 CLAMP > ECFP_count 0.62 0.11 1.0 0.83 CLAMP better\n", + "1 ECFP_count > TT 0.70 0.09 1.0 0.03 ECFP_count better\n", + "2 ECFP_count > ChemFM-3B 0.74 0.08 1.0 0.00 ECFP_count better" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.posterior_table(\n", + " rope_value=(0.35, 0.65),\n", + " control_model=\"ECFP_count\",\n", + " selected_models=[\"CLAMP\", \"TT\", \"ChemFM-3B\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b6d878d8", + "metadata": {}, + "source": [ + "## Comparing the results across different ROPE values\n", + "\n", + "While the BBT model does not depend on a single fixed ROPE value, the choice of ROPE does influence the interpretation. To assess the sensitivity of the results to this choice, we can run the analysis across multiple values. Keep in mind that a larger ROPE requires a greater performance difference before declaring one model superior.\n", + "\n", + "We compare BBT interpretations against the control model (`ECFP_count`) for the following ROPE values:\n", + "- $45\\% - 55\\%$ (default recommended by the BBT authors)\n", + "- $40\\% - 60\\%$\n", + "- $35\\% - 65\\%$ (used in the original article)\n", + "\n", + "And a more extreme value:\n", + "- $20\\% - 80\\%$" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4a6d68df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rope_valuebetter_modelsequivalent_modelsworse_modelsunknown_models
0(0.45, 0.55)CLAMP, rmat_4MMoLFormer-XL-both-10pct, mol2vec, TT, ChemFM-3...molbert, CDDD, ChemBERTa-10M-MTR, mat_masking_...
1(0.4, 0.6)CLAMP, rmat_4Mmolbert, CDDD, ChemBERTa-10M-MTR, mat_masking_2MMoLFormer-XL-both-10pct, mol2vec, TT, ChemFM-3...AtomPair_count
2(0.35, 0.65)CLAMPrmat_4M, molbert, CDDD, ChemBERTa-10M-MTR, mat...MoLFormer-XL-both-10pct, mol2vec, TT, ChemFM-3...
3(0.2, 0.8)CLAMP, rmat_4M, molbert, CDDD, ChemBERTa-10M-M...mol_r_tag_1024, coati, GraphMVP_CP-max, chemfo...
\n", + "
" + ], + "text/plain": [ + " rope_value better_models \\\n", + "0 (0.45, 0.55) CLAMP, rmat_4M \n", + "1 (0.4, 0.6) CLAMP, rmat_4M \n", + "2 (0.35, 0.65) CLAMP \n", + "3 (0.2, 0.8) \n", + "\n", + " equivalent_models \\\n", + "0 \n", + "1 molbert, CDDD, ChemBERTa-10M-MTR, mat_masking_2M \n", + "2 rmat_4M, molbert, CDDD, ChemBERTa-10M-MTR, mat... \n", + "3 CLAMP, rmat_4M, molbert, CDDD, ChemBERTa-10M-M... \n", + "\n", + " worse_models \\\n", + "0 MoLFormer-XL-both-10pct, mol2vec, TT, ChemFM-3... \n", + "1 MoLFormer-XL-both-10pct, mol2vec, TT, ChemFM-3... \n", + "2 MoLFormer-XL-both-10pct, mol2vec, TT, ChemFM-3... \n", + "3 mol_r_tag_1024, coati, GraphMVP_CP-max, chemfo... \n", + "\n", + " unknown_models \n", + "0 molbert, CDDD, ChemBERTa-10M-MTR, mat_masking_... \n", + "1 AtomPair_count \n", + "2 \n", + "3 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.rope_comparison_control_table(\n", + " rope_values=((0.45, 0.55), (0.40, 0.60), (0.35, 0.65), (0.20, 0.80)),\n", + " control_model=\"ECFP_count\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5ed9d931", + "metadata": {}, + "source": [ + "## Weak vs strong interpretation\n", + "\n", + "There are two ways to interpret the posterior distribution of the BBT model. As the authors suggest:\n", + "\n", + "> We believe that there are two main approaches to utilizing the BBT model in research. For researchers or audiences who are more familiar with the frequentist approach, we recommend using the summary values related to the weak interpretation of the parameters (the `above.50` and `in.rope` values) and a $0.95$ probability threshold. This approach utilizes familiar threshold numbers such as $0.95$ or $95\\%$.\n", + "\n", + "> If researchers and their audiences are more comfortable with Bayesian results, we recommend following the strong interpretation (and the mean summary of the probabilities) and choosing a threshold of $0.70$.\n", + "\n", + "> — Wainer (2023)\n", + "\n", + "Note: the paper uses R-style column names (`above.50`, `in.rope`); in `bbt-test` these are `above_50` and `in_rope`.\n", + "\n", + "Before exploring the two interpretations, let's revisit all the columns returned by `posterior_table`. By default it returns only the subset needed for weak interpretation. Let's look at the full table:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9fbb7442", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pairleft_modelright_modelmedianmeanhdi_lowhdi_highdeltaabove_50in_ropeweak_interpretationstrong_interpretation
0CLAMP > ECFP_countCLAMPECFP_count0.620.620.560.670.111.00.83CLAMP betterUnknown
1ECFP_count > TTECFP_countTT0.700.700.660.740.091.00.03ECFP_count betterECFP_count better
2ECFP_count > ChemFM-3BECFP_countChemFM-3B0.740.740.700.780.081.00.00ECFP_count betterECFP_count better
\n", + "
" + ], + "text/plain": [ + " pair left_model right_model median mean hdi_low \\\n", + "0 CLAMP > ECFP_count CLAMP ECFP_count 0.62 0.62 0.56 \n", + "1 ECFP_count > TT ECFP_count TT 0.70 0.70 0.66 \n", + "2 ECFP_count > ChemFM-3B ECFP_count ChemFM-3B 0.74 0.74 0.70 \n", + "\n", + " hdi_high delta above_50 in_rope weak_interpretation \\\n", + "0 0.67 0.11 1.0 0.83 CLAMP better \n", + "1 0.74 0.09 1.0 0.03 ECFP_count better \n", + "2 0.78 0.08 1.0 0.00 ECFP_count better \n", + "\n", + " strong_interpretation \n", + "0 Unknown \n", + "1 ECFP_count better \n", + "2 ECFP_count better " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.posterior_table(\n", + " rope_value=(0.35, 0.65),\n", + " control_model=\"ECFP_count\",\n", + " selected_models=[\"CLAMP\", \"TT\", \"ChemFM-3B\"],\n", + " columns=bbttest.PyBBT.ALL_PROPERTIES_COLUMNS,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "01684204", + "metadata": {}, + "source": [ + "The key columns describing the posterior distribution for each model pair are:\n", + "- `mean`: the average probability of `left_model` being better than `right_model`.\n", + "- `median`: the median probability of `left_model` being better than `right_model`.\n", + "- `hdi_low` and `hdi_high`: the lower and upper bounds of the Highest Density Interval (HDI) of the posterior.\n", + "- `above_50`: proportion of posterior samples where $P(\\text{left\\_model} > \\text{right\\_model}) > 0.5$.\n", + "- `in_rope`: proportion of posterior samples where $P(\\text{left\\_model} > \\text{right\\_model})$ falls within the ROPE.\n", + "\n", + "**Weak interpretation** uses `above_50` and `in_rope` as follows:\n", + "1. If `in_rope` $\\geq 0.95$ → models are **equivalent**.\n", + "2. Else if `above_50` $\\geq 0.95$ → `left_model` is **better**.\n", + "3. Otherwise → result is **inconclusive** (Unknown).\n", + "\n", + "Model pairs are always ordered by the internal posterior $\\beta$ estimate of global performance, so `left_model` is always the globally stronger model in the pair.\n", + "\n", + "**Strong interpretation** uses `mean`:\n", + "1. If `mean` $> 0.70$ → `left_model` is **better**.\n", + "2. If `mean` $\\leq 0.55$ → models are **equivalent**.\n", + "3. If $0.55 <$ `mean` $\\leq 0.70$ → result is **inconclusive** (Unknown)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "bbt-test (3.11.13)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index d25f523..bbd70c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ dependencies = [ [dependency-groups] dev = [ - "coverage", "jupyter", "mypy", "pre-commit", @@ -39,6 +38,7 @@ dev = [ test = [ "pre-commit", "pytest", + "pytest-cov", "ruff", "mypy", ] diff --git a/tests/bbt/test__utils.py b/tests/bbt/test__utils.py index 1a1f0dd..ccd3531 100644 --- a/tests/bbt/test__utils.py +++ b/tests/bbt/test__utils.py @@ -30,3 +30,10 @@ def test_literal_validation(self, params, should_raise): mock_fun(**params) else: mock_fun(**params) + + def test_unexpected_kwarg(self): + """Test if _validate_params raises an error for unexpected keyword arguments.""" + with pytest.raises( + ValueError, match="Unexpected keyword argument 'unexpected_kwarg'" + ): + mock_fun(param_lit="option1", param_str="valid string", unexpected_kwarg=42) diff --git a/tests/regression/test_benchmarking_mol.py b/tests/regression/test_benchmarking_mol.py index 8ce10a3..dcd1d46 100644 --- a/tests/regression/test_benchmarking_mol.py +++ b/tests/regression/test_benchmarking_mol.py @@ -106,6 +106,130 @@ def _extract_interpretations(results): return interpretations +# ---- SCORES FROM THE ARTICLE ---- +ROPE_1_VALUE = (0.45, 0.55) +ROPE_1_BETTER_MODELS = ["CLAMP", "rmat_4M"] +ROPE_1_EQUIVALENT_MODELS = [] +ROPE_1_UNKNOWN_MODELS = [ + "AtomPair_count", + "CDDD", + "ChemBERTa-10M-MTR", + "mat_masking_2M", + "molbert", +] +ROPE_1_WORSE_MODELS = [ + "ChemFM-3B", + "ChemGPT-4.7M", + "GEM", + "GNN-GraphCL-sum", + "GraphFP-CP", + "GraphMVP_CP-max", + "MoLFormer-XL-both-10pct", + "SELFormer-Lite", + "SimSon", + "TT", + "chemformer_mask", + "coati", + "grover_large", + "mol2vec", + "mol_r_tag_1024", + "unimolv1", + "unimolv2", +] + +ROPE_2_VALUE = (0.4, 0.6) +ROPE_2_BETTER_MODELS = ["CLAMP", "rmat_4M"] +ROPE_2_EQUIVALENT_MODELS = [ + "CDDD", + "ChemBERTa-10M-MTR", + "mat_masking_2M", + "molbert", +] +ROPE_2_UNKNOWN_MODELS = ["AtomPair_count"] +ROPE_2_WORSE_MODELS = [ + "ChemFM-3B", + "ChemGPT-4.7M", + "GEM", + "GNN-GraphCL-sum", + "GraphFP-CP", + "GraphMVP_CP-max", + "MoLFormer-XL-both-10pct", + "SELFormer-Lite", + "SimSon", + "TT", + "chemformer_mask", + "coati", + "grover_large", + "mol2vec", + "mol_r_tag_1024", + "unimolv1", + "unimolv2", +] + +ROPE_3_VALUE = (0.35, 0.65) +ROPE_3_BETTER_MODELS = ["CLAMP"] +ROPE_3_EQUIVALENT_MODELS = [ + "AtomPair_count", + "CDDD", + "ChemBERTa-10M-MTR", + "mat_masking_2M", + "molbert", + "rmat_4M", +] +ROPE_3_UNKNOWN_MODELS = [] +ROPE_3_WORSE_MODELS = [ + "ChemFM-3B", + "ChemGPT-4.7M", + "GEM", + "GNN-GraphCL-sum", + "GraphFP-CP", + "GraphMVP_CP-max", + "MoLFormer-XL-both-10pct", + "SELFormer-Lite", + "SimSon", + "TT", + "chemformer_mask", + "coati", + "grover_large", + "mol2vec", + "mol_r_tag_1024", + "unimolv1", + "unimolv2", +] + +ROPE_4_VALUE = (0.3, 0.7) +ROPE_4_BETTER_MODELS = [] +ROPE_4_EQUIVALENT_MODELS = [ + "AtomPair_count", + "CDDD", + "CLAMP", + "ChemBERTa-10M-MTR", + "MoLFormer-XL-both-10pct", + "mat_masking_2M", + "mol2vec", + "molbert", + "rmat_4M", +] +ROPE_4_UNKNOWN_MODELS = [] +ROPE_4_WORSE_MODELS = [ + "ChemFM-3B", + "ChemGPT-4.7M", + "GEM", + "GNN-GraphCL-sum", + "GraphFP-CP", + "GraphMVP_CP-max", + "SELFormer-Lite", + "SimSon", + "TT", + "chemformer_mask", + "coati", + "grover_large", + "mol_r_tag_1024", + "unimolv1", + "unimolv2", +] + + class TestWeakInterpretationAgainstECFP: """Test weak interpretation results against ECFP baseline for different ROPE values.""" @@ -114,130 +238,32 @@ class TestWeakInterpretationAgainstECFP: "rope,better_models,equivalent_models,unknown_models,worse_models", [ ( - (0.45, 0.55), - ["CLAMP", "rmat_4M"], - [], - [ - "AtomPair_count", - "CDDD", - "ChemBERTa-10M-MTR", - "mat_masking_2M", - "molbert", - ], - [ - "ChemFM-3B", - "ChemGPT-4.7M", - "GEM", - "GNN-GraphCL-sum", - "GraphFP-CP", - "GraphMVP_CP-max", - "MoLFormer-XL-both-10pct", - "SELFormer-Lite", - "SimSon", - "TT", - "chemformer_mask", - "coati", - "grover_large", - "mol2vec", - "mol_r_tag_1024", - "unimolv1", - "unimolv2", - ], + ROPE_1_VALUE, + ROPE_1_BETTER_MODELS, + ROPE_1_EQUIVALENT_MODELS, + ROPE_1_UNKNOWN_MODELS, + ROPE_1_WORSE_MODELS, ), ( - (0.4, 0.6), - ["CLAMP", "rmat_4M"], - [ - "CDDD", - "ChemBERTa-10M-MTR", - "mat_masking_2M", - "molbert", - ], - ["AtomPair_count"], - [ - "ChemFM-3B", - "ChemGPT-4.7M", - "GEM", - "GNN-GraphCL-sum", - "GraphFP-CP", - "GraphMVP_CP-max", - "MoLFormer-XL-both-10pct", - "SELFormer-Lite", - "SimSon", - "TT", - "chemformer_mask", - "coati", - "grover_large", - "mol2vec", - "mol_r_tag_1024", - "unimolv1", - "unimolv2", - ], + ROPE_2_VALUE, + ROPE_2_BETTER_MODELS, + ROPE_2_EQUIVALENT_MODELS, + ROPE_2_UNKNOWN_MODELS, + ROPE_2_WORSE_MODELS, ), ( - (0.35, 0.65), - ["CLAMP"], - [ - "AtomPair_count", - "CDDD", - "ChemBERTa-10M-MTR", - "mat_masking_2M", - "molbert", - "rmat_4M", - ], - [], - [ - "ChemFM-3B", - "ChemGPT-4.7M", - "GEM", - "GNN-GraphCL-sum", - "GraphFP-CP", - "GraphMVP_CP-max", - "MoLFormer-XL-both-10pct", - "SELFormer-Lite", - "SimSon", - "TT", - "chemformer_mask", - "coati", - "grover_large", - "mol2vec", - "mol_r_tag_1024", - "unimolv1", - "unimolv2", - ], + ROPE_3_VALUE, + ROPE_3_BETTER_MODELS, + ROPE_3_EQUIVALENT_MODELS, + ROPE_3_UNKNOWN_MODELS, + ROPE_3_WORSE_MODELS, ), ( - (0.3, 0.7), - [], - [ - "AtomPair_count", - "CDDD", - "CLAMP", - "ChemBERTa-10M-MTR", - "MoLFormer-XL-both-10pct", - "mat_masking_2M", - "mol2vec", - "molbert", - "rmat_4M", - ], - [], - [ - "ChemFM-3B", - "ChemGPT-4.7M", - "GEM", - "GNN-GraphCL-sum", - "GraphFP-CP", - "GraphMVP_CP-max", - "SELFormer-Lite", - "SimSon", - "TT", - "chemformer_mask", - "coati", - "grover_large", - "mol_r_tag_1024", - "unimolv1", - "unimolv2", - ], + ROPE_4_VALUE, + ROPE_4_BETTER_MODELS, + ROPE_4_EQUIVALENT_MODELS, + ROPE_4_UNKNOWN_MODELS, + ROPE_4_WORSE_MODELS, ), ], ids=["rope_0.45_0.55", "rope_0.4_0.6", "rope_0.35_0.65", "rope_0.3_0.7"], @@ -300,3 +326,75 @@ def test_weak_interpretation_for_rope( assert interpretations[model] == "ECFP better", ( f"Model {model} should be worse than ECFP for ROPE {rope}" ) + + @pytest.mark.slow + def test_multiple_rope_table( + self, + fitted_model, + ): + """ + Test posterior table with multiple ROPE values. + + Parameters + ---------- + fitted_model : PyBBT + Fitted PyBBT model fixture. + """ + # Given + expected_dataframe = pd.DataFrame( + { + "rope_value": [ + ROPE_1_VALUE, + ROPE_2_VALUE, + ROPE_3_VALUE, + ROPE_4_VALUE, + ], + "better_models": [ + sorted(ROPE_1_BETTER_MODELS), + sorted(ROPE_2_BETTER_MODELS), + sorted(ROPE_3_BETTER_MODELS), + sorted(ROPE_4_BETTER_MODELS), + ], + "equivalent_models": [ + sorted(ROPE_1_EQUIVALENT_MODELS), + sorted(ROPE_2_EQUIVALENT_MODELS), + sorted(ROPE_3_EQUIVALENT_MODELS), + sorted(ROPE_4_EQUIVALENT_MODELS), + ], + "unknown_models": [ + sorted(ROPE_1_UNKNOWN_MODELS), + sorted(ROPE_2_UNKNOWN_MODELS), + sorted(ROPE_3_UNKNOWN_MODELS), + sorted(ROPE_4_UNKNOWN_MODELS), + ], + "worse_models": [ + sorted(ROPE_1_WORSE_MODELS), + sorted(ROPE_2_WORSE_MODELS), + sorted(ROPE_3_WORSE_MODELS), + sorted(ROPE_4_WORSE_MODELS), + ], + } + ) + + # When + + results = fitted_model.rope_comparison_control_table( + rope_values=(ROPE_1_VALUE, ROPE_2_VALUE, ROPE_3_VALUE, ROPE_4_VALUE), + control_model="ECFP_count", + return_as_array=True, # Return as array as order does not matter and we need to sort + ) + for column in [ + "better_models", + "equivalent_models", + "unknown_models", + "worse_models", + ]: + results[column] = results[column].apply(sorted) + + # Then + # Order columns to match expected dataframe, ignore index + results = results[expected_dataframe.columns] + pd.testing.assert_frame_equal( + results.reset_index(drop=True), + expected_dataframe.reset_index(drop=True), + ) diff --git a/uv.lock b/uv.lock index 8e39870..cb01286 100644 --- a/uv.lock +++ b/uv.lock @@ -155,7 +155,6 @@ dependencies = [ [package.dev-dependencies] dev = [ - { name = "coverage" }, { name = "jupyter" }, { name = "mypy" }, { name = "pre-commit" }, @@ -166,6 +165,7 @@ test = [ { name = "mypy" }, { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-cov" }, { name = "ruff" }, ] @@ -178,7 +178,6 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "coverage" }, { name = "jupyter" }, { name = "mypy" }, { name = "pre-commit" }, @@ -189,6 +188,7 @@ test = [ { name = "mypy" }, { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-cov" }, { name = "ruff" }, ] @@ -600,6 +600,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/a3/43b749004e3c09452e39bb56347a008f0a0668aad37324a99b5c8ca91d9e/coverage-7.12.0-py3-none-any.whl", hash = "sha256:159d50c0b12e060b15ed3d39f87ed43d4f7f7ad40b8a534f4dd331adbb51104a", size = 209503, upload-time = "2025-11-18T13:34:18.892Z" }, ] +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, +] + [[package]] name = "cycler" version = "0.12.1" @@ -2185,6 +2190,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2730,6 +2749,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610, upload-time = "2024-10-24T14:58:28.029Z" }, ] +[[package]] +name = "tomli" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" }, + { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" }, + { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" }, + { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" }, + { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" }, + { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" }, + { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" }, + { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" }, + { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" }, + { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" }, + { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" }, + { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" }, + { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" }, + { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" }, + { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" }, + { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" }, + { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" }, + { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" }, + { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" }, + { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" }, + { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" }, + { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" }, + { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" }, + { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" }, + { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" }, + { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" }, + { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" }, + { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" }, + { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" }, + { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" }, + { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" }, + { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" }, + { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" }, + { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" }, + { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" }, + { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" }, + { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" }, + { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, +] + [[package]] name = "toolz" version = "1.1.0"