From 75568bbbce9e8a61dc06c52099f341c354cb3aa0 Mon Sep 17 00:00:00 2001 From: RaulFD-creator Date: Fri, 19 Dec 2025 10:19:04 +0000 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Add=20logistic=20re?= =?UTF-8?q?gression?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data/h_param_search/logreg_class.yml | 31 +++++++++++++++++++ autopeptideml/train/architectures.py | 11 +++++-- 2 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 autopeptideml/data/h_param_search/logreg_class.yml diff --git a/autopeptideml/data/h_param_search/logreg_class.yml b/autopeptideml/data/h_param_search/logreg_class.yml new file mode 100644 index 0000000..9bac92b --- /dev/null +++ b/autopeptideml/data/h_param_search/logreg_class.yml @@ -0,0 +1,31 @@ +penalty: + type: fixed + value: l2 + +solver: + type: categorical + values: + - liblinear + - lbfgs + - saga + - newton-cg + +C: + type: float + min: 1e-3 + max: 1e3 + log: True + +fit_intercept: + type: categorical + values: + - True + - False + +max_iter: + type: fixed + value: 1000 + +tol: + type: fixed + value: 1e-4 diff --git a/autopeptideml/train/architectures.py b/autopeptideml/train/architectures.py index 0efcde5..711bc8e 100644 --- a/autopeptideml/train/architectures.py +++ b/autopeptideml/train/architectures.py @@ -26,8 +26,11 @@ class OnnxModel: :type path: str """ def __init__(self, path: str): + so = rt.SessionOptions() + so.log_severity_level = 3 # 0 = verbose, 1 = info, 2 = warning, 3 = error, 4 = fatal self.session = rt.InferenceSession( - path, providers=['CPUExecutionProvider'] + path, providers=['CPUExecutionProvider'], + sess_options=so ) def predict(self, x: np.ndarray): @@ -237,13 +240,14 @@ def load_sklearn_models(task: str) -> Dict[str, Callable]: raise ImportError("This function requires scikit-learn", "Please try: `pip install scikit-learn`") - from sklearn import (svm, ensemble, neighbors) + from sklearn import (svm, ensemble, neighbors, linear_model) if 'class' in task: arch = { 'knn': neighbors.KNeighborsClassifier, 'svm': svm.SVC, 'rf': ensemble.RandomForestClassifier, 'gradboost': ensemble.GradientBoostingClassifier, + 'logreg': linear_model.LogisticRegression } elif 'reg' in task: @@ -252,7 +256,8 @@ def load_sklearn_models(task: str) -> Dict[str, Callable]: 'svm': svm.SVR, 'rf': ensemble.RandomForestRegressor, 'adaboost': ensemble.AdaBoostRegressor, - 'gradboost': ensemble.GradientBoostingRegressor + 'gradboost': ensemble.GradientBoostingRegressor, + 'linreg': linear_model.LinearRegression } else: raise NotImplementedError( From e870d3c3b9d8ddcb12e81021c41ef490959c534d Mon Sep 17 00:00:00 2001 From: RaulFD-creator Date: Fri, 19 Dec 2025 10:19:36 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=AA=B2=20Enable=20custom=20hpspace=20?= =?UTF-8?q?to=20be=20applied=20to=20only=20specific=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- autopeptideml/train/trainer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/autopeptideml/train/trainer.py b/autopeptideml/train/trainer.py index fa16321..b3b672a 100644 --- a/autopeptideml/train/trainer.py +++ b/autopeptideml/train/trainer.py @@ -260,11 +260,10 @@ def _get_hpspace(self, models: List[str], custom_hpspace: dict) -> dict: if models is None: models = ALL_MODELS for model in models: + config_path = osp.join(config_dir, f'{model}_{self.task}.yml') + hpspace = yaml.safe_load(open(config_path)) if model in custom_hpspace: - hpspace = custom_hpspace[model] - else: - config_path = osp.join(config_dir, f'{model}_{self.task}.yml') - hpspace = yaml.safe_load(open(config_path)) + hpspace.update(custom_hpspace[model]) if 'n_jobs' in hpspace: hpspace['n_jobs'] = {'type': 'fixed', 'value': self.n_jobs} if 'random_state' in hpspace: @@ -417,7 +416,6 @@ def _hpo_step(self, trial) -> dict: if self.task == 'reg' and h_m['name'] == 'svm': if 'probability' in h_m['variables']: del h_m['variables']['probability'] - arch = arch(**h_m['variables']) train_x, train_y = x[h_m['representation']][train_idx], y[train_idx] arch.fit(train_x, train_y) From 499f09ea871158ea7c83fd91b6085839a3223255 Mon Sep 17 00:00:00 2001 From: RaulFD-creator Date: Fri, 19 Dec 2025 10:25:20 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Add=20functionality?= =?UTF-8?q?=20for=20downloading=20databases?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- autopeptideml/db/__init__.py | 2 +- autopeptideml/db/negative_sampling.py | 28 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/autopeptideml/db/__init__.py b/autopeptideml/db/__init__.py index cd5ff9d..e40f11c 100644 --- a/autopeptideml/db/__init__.py +++ b/autopeptideml/db/__init__.py @@ -1 +1 @@ -from .negative_sampling import add_negatives_from_db +from .negative_sampling import add_negatives_from_db, setup_databases diff --git a/autopeptideml/db/negative_sampling.py b/autopeptideml/db/negative_sampling.py index 2dc0def..5c5e69f 100644 --- a/autopeptideml/db/negative_sampling.py +++ b/autopeptideml/db/negative_sampling.py @@ -43,6 +43,34 @@ def _length(input_str: List[str], n_jobs: int, MATCHING = {'mw': _mw, 'length': _length} +def setup_databases(): + try: + import gdown + except ImportError: + raise ImportError("This module requires gdown. Try: `pip install gdown`") + + db_dir = osp.join(osp.dirname(__file__), '..', 'data', 'dbs') + if not osp.isdir(db_dir): + os.makedirs(db_dir, exist_ok=True) + + verbose = True + + print("Downloading canonical database...") + path = osp.join(db_dir, 'canonical.csv') + FILE_ID = "189VtkbQ2bVpQlAe2UMBSzt_O4F7EyBWl" + gdown.download(id=FILE_ID, output=path, quiet=verbose) + + print("Downloading non-canonical database...") + path = osp.join(db_dir, 'non-canonical.csv') + FILE_ID = "1U4RXDNx_aijVDJ1oTaRKjo78Yakd3Mg4" + gdown.download(id=FILE_ID, output=path, quiet=verbose) + + print("Downloading negative database...") + path = osp.join(db_dir, 'both.csv') + FILE_ID = "189VtkbQ2bVpQlAe2UMBSzt_O4F7EyBWl" + gdown.download(id=FILE_ID, output=path, quiet=verbose) + + def get_neg_db(target_db: str, verbose: bool, return_path: bool = False) -> pd.DataFrame: """ Retrieves a precompiled database of negative samples. From f48af8c7fc04469dd0f235dbeb64564f1cda429f Mon Sep 17 00:00:00 2001 From: RaulFD-creator Date: Fri, 9 Jan 2026 15:17:26 +0000 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Specify=20support?= =?UTF-8?q?=20for=20linear=20regression=20and=20logreg?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- autopeptideml/train/architectures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autopeptideml/train/architectures.py b/autopeptideml/train/architectures.py index 711bc8e..29cad8c 100644 --- a/autopeptideml/train/architectures.py +++ b/autopeptideml/train/architectures.py @@ -12,7 +12,7 @@ from skl2onnx import to_onnx -SKLEARN_MODELS = ['knn', 'svm', 'rf', 'gradboost'] +SKLEARN_MODELS = ['knn', 'svm', 'rf', 'gradboost', 'logreg', 'linreg'] ALL_MODELS = SKLEARN_MODELS + ['lightgbm', 'xgboost']