From 75568bbbce9e8a61dc06c52099f341c354cb3aa0 Mon Sep 17 00:00:00 2001
From: RaulFD-creator <raulfdz9@gmail.com>
Date: Fri, 19 Dec 2025 10:19:04 +0000
Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Add=20logistic=20re?=
 =?UTF-8?q?gression?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../data/h_param_search/logreg_class.yml      | 31 +++++++++++++++++++
 autopeptideml/train/architectures.py          | 11 +++++--
 2 files changed, 39 insertions(+), 3 deletions(-)
 create mode 100644 autopeptideml/data/h_param_search/logreg_class.yml

diff --git a/autopeptideml/data/h_param_search/logreg_class.yml b/autopeptideml/data/h_param_search/logreg_class.yml
new file mode 100644
index 0000000..9bac92b
--- /dev/null
+++ b/autopeptideml/data/h_param_search/logreg_class.yml
@@ -0,0 +1,31 @@
+penalty:
+  type: fixed
+  value: l2
+
+solver:
+  type: categorical
+  values:
+    - liblinear
+    - lbfgs
+    - saga
+    - newton-cg
+
+C:
+  type: float
+  min: 1e-3
+  max: 1e3
+  log: True
+
+fit_intercept:
+  type: categorical
+  values:
+    - True
+    - False
+
+max_iter:
+  type: fixed
+  value: 1000
+
+tol:
+  type: fixed
+  value: 1e-4
diff --git a/autopeptideml/train/architectures.py b/autopeptideml/train/architectures.py
index 0efcde5..711bc8e 100644
--- a/autopeptideml/train/architectures.py
+++ b/autopeptideml/train/architectures.py
@@ -26,8 +26,11 @@ class OnnxModel:
     :type path: str
     """
     def __init__(self, path: str):
+        so = rt.SessionOptions()
+        so.log_severity_level = 3  # 0 = verbose, 1 = info, 2 = warning, 3 = error, 4 = fatal
         self.session = rt.InferenceSession(
-            path, providers=['CPUExecutionProvider']
+            path, providers=['CPUExecutionProvider'],
+            sess_options=so
         )
 
     def predict(self, x: np.ndarray):
@@ -237,13 +240,14 @@ def load_sklearn_models(task: str) -> Dict[str, Callable]:
         raise ImportError("This function requires scikit-learn",
                           "Please try: `pip install scikit-learn`")
 
-    from sklearn import (svm, ensemble, neighbors)
+    from sklearn import (svm, ensemble, neighbors, linear_model)
     if 'class' in task:
         arch = {
             'knn': neighbors.KNeighborsClassifier,
             'svm': svm.SVC,
             'rf': ensemble.RandomForestClassifier,
             'gradboost': ensemble.GradientBoostingClassifier,
+            'logreg': linear_model.LogisticRegression
 
         }
     elif 'reg' in task:
@@ -252,7 +256,8 @@ def load_sklearn_models(task: str) -> Dict[str, Callable]:
             'svm': svm.SVR,
             'rf': ensemble.RandomForestRegressor,
             'adaboost': ensemble.AdaBoostRegressor,
-            'gradboost': ensemble.GradientBoostingRegressor
+            'gradboost': ensemble.GradientBoostingRegressor,
+            'linreg': linear_model.LinearRegression
         }
     else:
         raise NotImplementedError(

From e870d3c3b9d8ddcb12e81021c41ef490959c534d Mon Sep 17 00:00:00 2001
From: RaulFD-creator <raulfdz9@gmail.com>
Date: Fri, 19 Dec 2025 10:19:36 +0000
Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=AA=B2=20Enable=20custom=20hpspace=20?=
 =?UTF-8?q?to=20be=20applied=20to=20only=20specific=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 autopeptideml/train/trainer.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/autopeptideml/train/trainer.py b/autopeptideml/train/trainer.py
index fa16321..b3b672a 100644
--- a/autopeptideml/train/trainer.py
+++ b/autopeptideml/train/trainer.py
@@ -260,11 +260,10 @@ def _get_hpspace(self, models: List[str], custom_hpspace: dict) -> dict:
         if models is None:
             models = ALL_MODELS
         for model in models:
+            config_path = osp.join(config_dir, f'{model}_{self.task}.yml')
+            hpspace = yaml.safe_load(open(config_path))
             if model in custom_hpspace:
-                hpspace = custom_hpspace[model]
-            else:
-                config_path = osp.join(config_dir, f'{model}_{self.task}.yml')
-                hpspace = yaml.safe_load(open(config_path))
+                hpspace.update(custom_hpspace[model])
             if 'n_jobs' in hpspace:
                 hpspace['n_jobs'] = {'type': 'fixed', 'value': self.n_jobs}
             if 'random_state' in hpspace:
@@ -417,7 +416,6 @@ def _hpo_step(self, trial) -> dict:
                 if self.task == 'reg' and h_m['name'] == 'svm':
                     if 'probability' in h_m['variables']:
                         del h_m['variables']['probability']
-
                 arch = arch(**h_m['variables'])
                 train_x, train_y = x[h_m['representation']][train_idx], y[train_idx]
                 arch.fit(train_x, train_y)

From 499f09ea871158ea7c83fd91b6085839a3223255 Mon Sep 17 00:00:00 2001
From: RaulFD-creator <raulfdz9@gmail.com>
Date: Fri, 19 Dec 2025 10:25:20 +0000
Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Add=20functionality?=
 =?UTF-8?q?=20for=20downloading=20databases?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 autopeptideml/db/__init__.py          |  2 +-
 autopeptideml/db/negative_sampling.py | 28 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/autopeptideml/db/__init__.py b/autopeptideml/db/__init__.py
index cd5ff9d..e40f11c 100644
--- a/autopeptideml/db/__init__.py
+++ b/autopeptideml/db/__init__.py
@@ -1 +1 @@
-from .negative_sampling import add_negatives_from_db
+from .negative_sampling import add_negatives_from_db, setup_databases
diff --git a/autopeptideml/db/negative_sampling.py b/autopeptideml/db/negative_sampling.py
index 2dc0def..5c5e69f 100644
--- a/autopeptideml/db/negative_sampling.py
+++ b/autopeptideml/db/negative_sampling.py
@@ -43,6 +43,34 @@ def _length(input_str: List[str], n_jobs: int,
 MATCHING = {'mw': _mw, 'length': _length}
 
 
+def setup_databases():
+    try:
+        import gdown
+    except ImportError:
+        raise ImportError("This module requires gdown. Try: `pip install gdown`")
+
+    db_dir = osp.join(osp.dirname(__file__), '..', 'data', 'dbs')
+    if not osp.isdir(db_dir):
+        os.makedirs(db_dir, exist_ok=True)
+
+    verbose = True
+
+    print("Downloading canonical database...")
+    path = osp.join(db_dir, 'canonical.csv')
+    FILE_ID = "189VtkbQ2bVpQlAe2UMBSzt_O4F7EyBWl"
+    gdown.download(id=FILE_ID, output=path, quiet=verbose)
+
+    print("Downloading non-canonical database...")
+    path = osp.join(db_dir, 'non-canonical.csv')
+    FILE_ID = "1U4RXDNx_aijVDJ1oTaRKjo78Yakd3Mg4"
+    gdown.download(id=FILE_ID, output=path, quiet=verbose)
+
+    print("Downloading negative database...")
+    path = osp.join(db_dir, 'both.csv')
+    FILE_ID = "189VtkbQ2bVpQlAe2UMBSzt_O4F7EyBWl"
+    gdown.download(id=FILE_ID, output=path, quiet=verbose)
+
+
 def get_neg_db(target_db: str, verbose: bool, return_path: bool = False) -> pd.DataFrame:
     """
     Retrieves a precompiled database of negative samples.

From f48af8c7fc04469dd0f235dbeb64564f1cda429f Mon Sep 17 00:00:00 2001
From: RaulFD-creator <raulfdz9@gmail.com>
Date: Fri, 9 Jan 2026 15:17:26 +0000
Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Specify=20support?=
 =?UTF-8?q?=20for=20linear=20regression=20and=20logreg?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 autopeptideml/train/architectures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autopeptideml/train/architectures.py b/autopeptideml/train/architectures.py
index 711bc8e..29cad8c 100644
--- a/autopeptideml/train/architectures.py
+++ b/autopeptideml/train/architectures.py
@@ -12,7 +12,7 @@
 from skl2onnx import to_onnx
 
 
-SKLEARN_MODELS = ['knn', 'svm', 'rf', 'gradboost']
+SKLEARN_MODELS = ['knn', 'svm', 'rf', 'gradboost', 'logreg', 'linreg']
 ALL_MODELS = SKLEARN_MODELS + ['lightgbm', 'xgboost']