yellowman · yellowman · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/RATE_LIMIT_SECURITY_AUDIT.md b/RATE_LIMIT_SECURITY_AUDIT.md
diff --git a/docs/ISSUES.md b/docs/ISSUES.md
diff --git a/liminallm/api/routes.py b/liminallm/api/routes.py
@@ -378,6 +378,8 @@ def _get_system_settings(runtime) -> dict:
         "chat_rate_limit_per_minute": 60,
         "chat_rate_limit_window_seconds": 60,
         "login_rate_limit_per_minute": 10,
+        "refresh_rate_limit_per_minute": 20,
+        "refresh_rate_limit_window_seconds": 60,
         "signup_rate_limit_per_minute": 5,
         "reset_rate_limit_per_minute": 5,
         "mfa_rate_limit_per_minute": 5,
@@ -1109,13 +1111,21 @@ async def oauth_callback(
 async def refresh_tokens(
     body: TokenRefreshRequest,
     response: Response,
+    request: Request,
     authorization: Optional[str] = Header(None),
     x_tenant_id: Optional[str] = Header(
         None, convert_underscores=False, alias="X-Tenant-ID"
     ),
 ):
     runtime = get_runtime()
     tenant_hint = body.tenant_id or x_tenant_id
+    client_ip = request.client.host if request.client else "unknown"
+    await _enforce_rate_limit(
+        runtime,
+        f"refresh:{client_ip}",
+        _get_rate_limit(runtime, "refresh_rate_limit_per_minute"),
+        _get_rate_limit(runtime, "refresh_rate_limit_window_seconds"),
+    )
     user, session, tokens = await runtime.auth.refresh_tokens(
         body.refresh_token, tenant_hint=tenant_hint
     )
@@ -3133,6 +3143,8 @@ async def update_system_settings(
         "chat_rate_limit_per_minute",
         "chat_rate_limit_window_seconds",
         "login_rate_limit_per_minute",
+        "refresh_rate_limit_per_minute",
+        "refresh_rate_limit_window_seconds",
         "signup_rate_limit_per_minute",
         "reset_rate_limit_per_minute",
         "mfa_rate_limit_per_minute",
@@ -3191,6 +3203,8 @@ async def update_system_settings(
         "chat_rate_limit_per_minute",
         "chat_rate_limit_window_seconds",
         "login_rate_limit_per_minute",
+        "refresh_rate_limit_per_minute",
+        "refresh_rate_limit_window_seconds",
         "signup_rate_limit_per_minute",
         "reset_rate_limit_per_minute",
         "mfa_rate_limit_per_minute",

diff --git a/liminallm/app.py b/liminallm/app.py
@@ -15,13 +15,16 @@
 
 from liminallm.api.error_handling import register_exception_handlers
 from liminallm.api.routes import get_admin_user, router
+from liminallm.config import Settings
 from liminallm.logging import get_logger, set_correlation_id
 
 logger = get_logger(__name__)
 
+_settings = Settings.from_env()
+
 # Version info per SPEC §18
 __version__ = "0.1.0"
-__build__ = os.getenv("BUILD_SHA", "dev")
+__build__ = _settings.build_sha
 
 
 _cleanup_task: asyncio.Task | None = None
@@ -77,9 +80,8 @@ async def lifespan(app: FastAPI):
 
 
 def _allowed_origins() -> List[str]:
-    env_value = os.getenv("CORS_ALLOW_ORIGINS")
-    if env_value:
-        return [origin.strip() for origin in env_value.split(",") if origin.strip()]
+    if _settings.cors_allow_origins:
+        return _settings.cors_allow_origins
     # Default to common local dev hosts; avoid wildcard when credentials are enabled.
     return [
         "http://localhost",
@@ -91,10 +93,7 @@ def _allowed_origins() -> List[str]:
 
 
 def _allow_credentials() -> bool:
-    flag = os.getenv("CORS_ALLOW_CREDENTIALS")
-    if flag is None:
-        return False
-    return flag.lower() in {"1", "true", "yes", "on"}
+    return _settings.cors_allow_credentials
 
 
 app.add_middleware(
@@ -199,12 +198,7 @@ async def add_security_headers(request, call_next):
     response.headers.setdefault(
         "Permissions-Policy", "camera=(), microphone=(), geolocation=(), payment=()"
     )
-    if request.url.scheme == "https" and os.getenv("ENABLE_HSTS", "false").lower() in {
-        "1",
-        "true",
-        "yes",
-        "on",
-    }:
+    if request.url.scheme == "https" and _settings.enable_hsts:
         response.headers.setdefault(
             "Strict-Transport-Security", "max-age=63072000; includeSubDomains"
         )
@@ -432,6 +426,38 @@ async def metrics() -> Response:
         lines.append('# TYPE liminallm_database_healthy gauge')
         lines.append(f'liminallm_database_healthy {db_healthy}')
 
+        # Training job activity
+        list_jobs = getattr(runtime.store, "list_training_jobs", None)
+        if callable(list_jobs):
+            try:
+                jobs = list_jobs()
+                active = len([j for j in jobs if j.status in {"queued", "running"}])
+                lines.append('# HELP liminallm_training_jobs_active Active training jobs')
+                lines.append('# TYPE liminallm_training_jobs_active gauge')
+                lines.append(f'liminallm_training_jobs_active {active}')
+            except Exception as exc:
+                logger.warning("metrics_training_jobs_failed", error=str(exc))
+
+        # Preference event ingestion rate proxy
+        if hasattr(runtime.store, "list_preference_events"):
+            try:
+                events = runtime.store.list_preference_events(user_id=None)  # type: ignore[arg-type]
+                lines.append('# HELP liminallm_preference_events_total Total recorded preference events')
+                lines.append('# TYPE liminallm_preference_events_total counter')
+                lines.append(f'liminallm_preference_events_total {len(events)}')
+            except Exception as exc:
+                logger.warning("metrics_preference_events_failed", error=str(exc))
+
+        # Adapter usage counts
+        if hasattr(runtime.store, "list_artifacts"):
+            try:
+                adapters = runtime.store.list_artifacts(kind="adapter", owner_user_id=None)  # type: ignore[arg-type]
+                lines.append('# HELP liminallm_adapters_total Adapters stored in system')
+                lines.append('# TYPE liminallm_adapters_total gauge')
+                lines.append(f'liminallm_adapters_total {len(adapters)}')
+            except Exception as exc:
+                logger.warning("metrics_adapters_failed", error=str(exc))
+
     except Exception as exc:
         logger.error("metrics_collection_failed", error=str(exc))
 

diff --git a/liminallm/config.py b/liminallm/config.py
@@ -10,7 +10,7 @@
 from typing import Any
 
 from dotenv import dotenv_values
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 from liminallm.logging import get_logger
 
@@ -338,6 +338,24 @@ class Settings(BaseModel):
         "ALLOW_SIGNUP",
         description="Allow new user signups (overridable via admin UI)",
     )
+    log_level: str = env_field("INFO", "LOG_LEVEL")
+    log_json: bool = env_field(True, "LOG_JSON")
+    log_dev_mode: bool = env_field(False, "LOG_DEV_MODE")
+    build_sha: str = env_field("dev", "BUILD_SHA")
+    cors_allow_origins: list[str] = env_field(
+        [
+            "http://localhost",
+            "http://localhost:3000",
+            "http://localhost:5173",
+            "http://127.0.0.1:3000",
+            "http://127.0.0.1:5173",
+        ],
+        "CORS_ALLOW_ORIGINS",
+        description="Comma-separated CORS origins (overridable via admin UI)",
+    )
+    cors_allow_credentials: bool = env_field(False, "CORS_ALLOW_CREDENTIALS")
+    enable_hsts: bool = env_field(False, "ENABLE_HSTS")
+    mfa_secret_key: str | None = env_field(None, "MFA_SECRET_KEY")
     use_memory_store: bool = env_field(False, "USE_MEMORY_STORE")
     allow_redis_fallback_dev: bool = env_field(False, "ALLOW_REDIS_FALLBACK_DEV")
     test_mode: bool = env_field(
@@ -459,9 +477,58 @@ class Settings(BaseModel):
         "TRAINING_WORKER_POLL_INTERVAL",
         description="Training worker poll interval in seconds (overridable via admin UI)",
     )
+    max_active_training_jobs: int = env_field(
+        10,
+        "MAX_ACTIVE_TRAINING_JOBS",
+        description="Global cap on simultaneously active training jobs",
+    )
 
     model_config = ConfigDict(extra="ignore")
 
+    @field_validator("cors_allow_origins", mode="before")
+    @classmethod
+    def _parse_cors_origins(cls, value: Any) -> list[str]:
+        if value is None:
+            return []
+        if isinstance(value, str):
+            return [v.strip() for v in value.split(",") if v.strip()]
+        if isinstance(value, list):
+            return value
+        return []
+
+    @field_validator(
+        "smtp_port", "training_worker_poll_interval", "tmp_cleanup_interval_seconds", "tmp_max_age_hours", "max_active_training_jobs"
+    )
+    @classmethod
+    def _validate_positive_int(cls, value: int) -> int:
+        if value <= 0:
+            raise ValueError("must be positive")
+        return value
+
+    @field_validator("smtp_port")
+    @classmethod
+    def _validate_smtp_port(cls, value: int) -> int:
+        if not 1 <= value <= 65535:
+            raise ValueError("smtp_port must be between 1 and 65535")
+        return value
+
+    @field_validator("log_level")
+    @classmethod
+    def _normalize_log_level(cls, value: str) -> str:
+        return (value or "INFO").upper()
+
+    @model_validator(mode="after")
+    def _validate_required_pairs(self):
+        if self.oauth_google_client_id and not self.oauth_google_client_secret:
+            raise ValueError("oauth_google_client_secret required when client_id is set")
+        if self.oauth_github_client_id and not self.oauth_github_client_secret:
+            raise ValueError("oauth_github_client_secret required when client_id is set")
+        if self.oauth_microsoft_client_id and not self.oauth_microsoft_client_secret:
+            raise ValueError("oauth_microsoft_client_secret required when client_id is set")
+        if (self.smtp_host or self.smtp_user) and not self.smtp_password:
+            raise ValueError("smtp_password required when smtp_host or smtp_user is set")
+        return self
+
     @classmethod
     def from_env(cls) -> "Settings":
         env_file_values = dotenv_values(".env")

diff --git a/liminallm/service/auth.py b/liminallm/service/auth.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import base64
+import contextlib
 import hashlib
 import hmac
 import json
@@ -130,6 +131,7 @@ def __init__(
         # Issue 28.4: Thread-safe lock for mutable state dictionaries
         # Protects all in-memory fallback state from concurrent access
         import threading
+
         self._state_lock = threading.Lock()
         self._mfa_challenges: dict[str, tuple[str, datetime]] = {}
         # Issue 11.1: In-memory fallback for MFA lockout when Redis unavailable
@@ -153,6 +155,13 @@ def _now(self) -> datetime:
 
         return datetime.now(timezone.utc)
 
+    @contextlib.contextmanager
+    def _with_state_lock(self):
+        """Context manager for thread-safe state dictionary access (Issue 28.4)."""
+
+        with self._state_lock:
+            yield
+
     def cleanup_expired_states(self) -> int:
         """Clean up expired OAuth states, MFA challenges, and email verification tokens.
 
@@ -406,7 +415,8 @@ async def start_oauth(
         expires_at = self._now() + timedelta(minutes=10)
         # Issue 28.4: Thread-safe state mutation
         with self._state_lock:
-            self._oauth_states[state] = (provider, expires_at, tenant_id)
+            with self._with_state_lock():
+                self._oauth_states[state] = (provider, expires_at, tenant_id)
         if self.cache:
             await self.cache.set_oauth_state(state, provider, expires_at, tenant_id)
 
@@ -644,14 +654,17 @@ async def complete_oauth(
         # Issue 28.4: Thread-safe state mutation
         with self._state_lock:
             if stored is None:
-                stored = self._oauth_states.pop(state, None)
+                with self._with_state_lock():
+                    stored = self._oauth_states.pop(state, None)
             else:
-                self._oauth_states.pop(state, None)
+                with self._with_state_lock():
+                    self._oauth_states.pop(state, None)
         now = self._now()
 
         async def _clear_oauth_state() -> None:
             with self._state_lock:
-                self._oauth_states.pop(state, None)
+                with self._with_state_lock():
+                    self._oauth_states.pop(state, None)
             if self.cache and not cache_state_used:
                 await self.cache.pop_oauth_state(state)
 
@@ -1201,7 +1214,8 @@ async def initiate_password_reset(self, email: str) -> str:
         else:
             # Issue 11.2: In-memory fallback for password reset tokens
             with self._state_lock:
-                self._password_reset_tokens[token] = (email, expires_at)
+                with self._with_state_lock():
+                    self._password_reset_tokens[token] = (email, expires_at)
         self.logger.info(
             "password_reset_requested",
             email_hash=hashlib.sha256(email.encode()).hexdigest(),
@@ -1215,15 +1229,18 @@ async def complete_password_reset(self, token: str, new_password: str) -> bool:
         else:
             # Issue 11.2: In-memory fallback for password reset tokens
             with self._state_lock:
-                stored = self._password_reset_tokens.get(token)
+                with self._with_state_lock():
+                    stored = self._password_reset_tokens.get(token)
                 if stored:
                     stored_email, expires_at = stored
                     if expires_at <= self._now() - self._clock_skew_leeway:
                         # Remove expired token to prevent memory leak
-                        self._password_reset_tokens.pop(token, None)
+                        with self._with_state_lock():
+                            self._password_reset_tokens.pop(token, None)
                     else:
                         email = stored_email
-                        self._password_reset_tokens.pop(token, None)
+                        with self._with_state_lock():
+                            self._password_reset_tokens.pop(token, None)
         if not email:
             self.logger.warning("password_reset_invalid_token", token_prefix=token[:8])
             return False

diff --git a/liminallm/service/clustering.py b/liminallm/service/clustering.py
@@ -406,6 +406,7 @@ def promote_skill_adapters(
             if ratio < positive_ratio:
                 continue
             owner_id = cluster.user_id
+            visibility = "private" if owner_id else "global"
             schema = {
                 "kind": "adapter.lora",
                 "scope": "per-user" if cluster.user_id else "global",
@@ -431,6 +432,7 @@ def promote_skill_adapters(
                 schema=schema,
                 description=cluster.description or "Cluster skill adapter",
                 owner_user_id=owner_id,
+                visibility=visibility,
             )
             if self.training and cluster.user_id:
                 self.training.ensure_user_adapter(