IBM · RaulFD-creator · Jan 9, 2026 · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
diff --git a/autopeptideml/data/h_param_search/logreg_class.yml b/autopeptideml/data/h_param_search/logreg_class.yml
@@ -0,0 +1,31 @@
+penalty:
+  type: fixed
+  value: l2
+
+solver:
+  type: categorical
+  values:
+    - liblinear
+    - lbfgs
+    - saga
+    - newton-cg
+
+C:
+  type: float
+  min: 1e-3
+  max: 1e3
+  log: True
+
+fit_intercept:
+  type: categorical
+  values:
+    - True
+    - False
+
+max_iter:
+  type: fixed
+  value: 1000
+
+tol:
+  type: fixed
+  value: 1e-4
diff --git a/autopeptideml/db/__init__.py b/autopeptideml/db/__init__.py
@@ -1 +1 @@
-from .negative_sampling import add_negatives_from_db
+from .negative_sampling import add_negatives_from_db, setup_databases
diff --git a/autopeptideml/db/negative_sampling.py b/autopeptideml/db/negative_sampling.py
@@ -43,6 +43,34 @@ def _length(input_str: List[str], n_jobs: int,
 MATCHING = {'mw': _mw, 'length': _length}
 
 
+def setup_databases():
+    try:
+        import gdown
+    except ImportError:
+        raise ImportError("This module requires gdown. Try: `pip install gdown`")
+
+    db_dir = osp.join(osp.dirname(__file__), '..', 'data', 'dbs')
+    if not osp.isdir(db_dir):
+        os.makedirs(db_dir, exist_ok=True)
+
+    verbose = True
+
+    print("Downloading canonical database...")
+    path = osp.join(db_dir, 'canonical.csv')
+    FILE_ID = "189VtkbQ2bVpQlAe2UMBSzt_O4F7EyBWl"
+    gdown.download(id=FILE_ID, output=path, quiet=verbose)
+
+    print("Downloading non-canonical database...")
+    path = osp.join(db_dir, 'non-canonical.csv')
+    FILE_ID = "1U4RXDNx_aijVDJ1oTaRKjo78Yakd3Mg4"
+    gdown.download(id=FILE_ID, output=path, quiet=verbose)
+
+    print("Downloading negative database...")
+    path = osp.join(db_dir, 'both.csv')
+    FILE_ID = "189VtkbQ2bVpQlAe2UMBSzt_O4F7EyBWl"
+    gdown.download(id=FILE_ID, output=path, quiet=verbose)
+
+
 def get_neg_db(target_db: str, verbose: bool, return_path: bool = False) -> pd.DataFrame:
     """
     Retrieves a precompiled database of negative samples.

diff --git a/autopeptideml/train/architectures.py b/autopeptideml/train/architectures.py
@@ -12,7 +12,7 @@
 from skl2onnx import to_onnx
 
 
-SKLEARN_MODELS = ['knn', 'svm', 'rf', 'gradboost']
+SKLEARN_MODELS = ['knn', 'svm', 'rf', 'gradboost', 'logreg', 'linreg']
 ALL_MODELS = SKLEARN_MODELS + ['lightgbm', 'xgboost']
 
 
@@ -26,8 +26,11 @@ class OnnxModel:
     :type path: str
     """
     def __init__(self, path: str):
+        so = rt.SessionOptions()
+        so.log_severity_level = 3  # 0 = verbose, 1 = info, 2 = warning, 3 = error, 4 = fatal
         self.session = rt.InferenceSession(
-            path, providers=['CPUExecutionProvider']
+            path, providers=['CPUExecutionProvider'],
+            sess_options=so
         )
 
     def predict(self, x: np.ndarray):
@@ -237,13 +240,14 @@ def load_sklearn_models(task: str) -> Dict[str, Callable]:
         raise ImportError("This function requires scikit-learn",
                           "Please try: `pip install scikit-learn`")
 
-    from sklearn import (svm, ensemble, neighbors)
+    from sklearn import (svm, ensemble, neighbors, linear_model)
     if 'class' in task:
         arch = {
             'knn': neighbors.KNeighborsClassifier,
             'svm': svm.SVC,
             'rf': ensemble.RandomForestClassifier,
             'gradboost': ensemble.GradientBoostingClassifier,
+            'logreg': linear_model.LogisticRegression
 
         }
     elif 'reg' in task:
@@ -252,7 +256,8 @@ def load_sklearn_models(task: str) -> Dict[str, Callable]:
             'svm': svm.SVR,
             'rf': ensemble.RandomForestRegressor,
             'adaboost': ensemble.AdaBoostRegressor,
-            'gradboost': ensemble.GradientBoostingRegressor
+            'gradboost': ensemble.GradientBoostingRegressor,
+            'linreg': linear_model.LinearRegression
         }
     else:
         raise NotImplementedError(

diff --git a/autopeptideml/train/trainer.py b/autopeptideml/train/trainer.py
@@ -260,11 +260,10 @@ def _get_hpspace(self, models: List[str], custom_hpspace: dict) -> dict:
         if models is None:
             models = ALL_MODELS
         for model in models:
+            config_path = osp.join(config_dir, f'{model}_{self.task}.yml')
+            hpspace = yaml.safe_load(open(config_path))
             if model in custom_hpspace:
-                hpspace = custom_hpspace[model]
-            else:
-                config_path = osp.join(config_dir, f'{model}_{self.task}.yml')
-                hpspace = yaml.safe_load(open(config_path))
+                hpspace.update(custom_hpspace[model])
             if 'n_jobs' in hpspace:
                 hpspace['n_jobs'] = {'type': 'fixed', 'value': self.n_jobs}
             if 'random_state' in hpspace:
@@ -417,7 +416,6 @@ def _hpo_step(self, trial) -> dict:
                 if self.task == 'reg' and h_m['name'] == 'svm':
                     if 'probability' in h_m['variables']:
                         del h_m['variables']['probability']
-
                 arch = arch(**h_m['variables'])
                 train_x, train_y = x[h_m['representation']][train_idx], y[train_idx]
                 arch.fit(train_x, train_y)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .negative_sampling import add_negatives_from_db
		from .negative_sampling import add_negatives_from_db, setup_databases