From cc6cc67fd06d1b66553a1a3d04bc4bc5a51c8ac7 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 30 Jan 2026 08:40:24 -0500 Subject: [PATCH 01/17] managed identity refactor --- backend/api/citations/router.py | 93 +++++++++----- backend/api/core/cit_utils.py | 24 +++- backend/api/core/config.py | 13 +- backend/api/services/azure_openai_client.py | 44 +++++-- backend/api/services/cit_db_service.py | 44 +++---- backend/api/services/postgres_auth.py | 132 ++++++++++++++++++++ backend/api/services/sr_db_service.py | 80 ++++++------ backend/api/services/storage.py | 13 +- backend/api/services/user_db.py | 18 +-- backend/api/sr/router.py | 22 +++- backend/main.py | 12 +- 11 files changed, 370 insertions(+), 125 deletions(-) create mode 100644 backend/api/services/postgres_auth.py diff --git a/backend/api/citations/router.py b/backend/api/citations/router.py index 2833a4c1..c18d7752 100644 --- a/backend/api/citations/router.py +++ b/backend/api/citations/router.py @@ -31,6 +31,8 @@ import psycopg2 import psycopg2.extras +from api.services.postgres_auth import pgsql_entra_auth_configured + from ..services.sr_db_service import srdb_service @@ -42,6 +44,34 @@ router = APIRouter() +def _get_db_conn_str() -> Optional[str]: + """ + Get database connection string for PostgreSQL. + + If POSTGRES_URI is set, returns it directly (local development). + If Entra ID env variables are configured (POSTGRES_HOST, POSTGRES_DATABASE, POSTGRES_USER), + returns None to signal that connect_postgres() should use Entra ID authentication. + """ + if settings.POSTGRES_URI: + return settings.POSTGRES_URI + + # If Entra ID config is available, return None to let connect_postgres use token auth + if settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER: + return None + + # No configuration available - return None, let downstream handle the error + return None + + +def _is_postgres_configured() -> bool: + """ + Check if PostgreSQL is configured via Entra ID env vars or connection string. + """ + has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + has_uri_config = settings.POSTGRES_URI + return bool(has_entra_config or has_uri_config) + + class UploadResult(BaseModel): sr_id: str db_name: str @@ -88,7 +118,7 @@ async def upload_screening_csv( - The SR must exist and the user must be a member of the SR (or owner). """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, _ = await load_sr_and_check(sr_id, current_user, db_conn_str, srdb_service, require_screening=False) except HTTPException: @@ -96,13 +126,13 @@ async def upload_screening_csv( except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to load systematic review or screening: {e}") - # Check admin DSN (use centralized settings) - admin_dsn = settings.POSTGRES_URI - if not admin_dsn: + # Check admin DSN (use centralized settings) - need either Entra ID config or POSTGRES_URI + if not _is_postgres_configured(): raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Postgres admin DSN not configured. Set POSTGRES_ADMIN_DSN or DATABASE_URL in configuration/environment.", + detail="Postgres not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.", ) + admin_dsn = _get_db_conn_str() # Read CSV content include_columns = None @@ -132,20 +162,22 @@ async def upload_screening_csv( # Save DB connection metadata into SR Mongo doc try: - parsed = _parse_dsn(admin_dsn) - # construct a connection string for the new DB (do not alter credentials - reuse admin DSN but point to DB) - if "://" in admin_dsn: - import urllib.parse as up - - p = up.urlparse(admin_dsn) - new_path = "/" + db_name - new_p = p._replace(path=new_path) - db_conn = up.urlunparse(new_p) - else: - if "dbname=" in admin_dsn: - db_conn = re.sub(r"dbname=[^ ]+", f"dbname={db_name}", admin_dsn) + db_conn = None + if not pgsql_entra_auth_configured(): + parsed = _parse_dsn(admin_dsn) + # construct a connection string for the new DB (do not alter credentials - reuse admin DSN but point to DB) + if "://" in admin_dsn: + import urllib.parse as up + + p = up.urlparse(admin_dsn) + new_path = "/" + db_name + new_p = p._replace(path=new_path) + db_conn = up.urlunparse(new_p) else: - db_conn = f"{admin_dsn} dbname={db_name}" + if "dbname=" in admin_dsn: + db_conn = re.sub(r"dbname=[^ ]+", f"dbname={db_name}", admin_dsn) + else: + db_conn = f"{admin_dsn} dbname={db_name}" screening_info = { "screening_db": { @@ -159,7 +191,7 @@ async def upload_screening_csv( # Update SR document with screening DB info using PostgreSQL await run_in_threadpool( srdb_service.update_screening_db_info, - settings.POSTGRES_URI, + _get_db_conn_str(), sr_id, screening_info["screening_db"] ) @@ -189,7 +221,7 @@ async def list_citation_ids( Returns a simple list of integers (the 'id' primary key from the citations table). """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, db_conn = await load_sr_and_check(sr_id, current_user, db_conn_str, srdb_service) except HTTPException: @@ -226,7 +258,7 @@ async def get_citation_by_id( Returns: a JSON object representing the citation row (keys are DB column names). """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, db_conn = await load_sr_and_check(sr_id, current_user, db_conn_str, srdb_service) except HTTPException: @@ -277,7 +309,7 @@ async def build_combined_citation( the format ": \\n" for each included column, in the order provided. """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, db_conn = await load_sr_and_check(sr_id, current_user, db_conn_str, srdb_service) except HTTPException: @@ -330,7 +362,7 @@ async def upload_citation_fulltext( to the storage path (container/blob). """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, db_conn = await load_sr_and_check(sr_id, current_user, db_conn_str, srdb_service) except HTTPException: @@ -431,7 +463,7 @@ async def hard_delete_screening_resources(sr_id: str, current_user: Dict[str, An - POSTGRES_ADMIN_DSN or DATABASE_URL must be configured in settings. """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, db_conn = await load_sr_and_check(sr_id, current_user, db_conn_str, srdb_service) except HTTPException: @@ -447,12 +479,13 @@ async def hard_delete_screening_resources(sr_id: str, current_user: Dict[str, An return {"status": "no_screening_db", "message": "No screening DB configured for this SR", "deleted_db": False, "deleted_files": 0} db_conn = screening.get("connection_string") - if not db_conn: + if not db_conn and not pgsql_entra_auth_configured(): return {"status": "no_screening_db", "message": "Incomplete screening DB metadata", "deleted_db": False, "deleted_files": 0} - admin_dsn = settings.POSTGRES_ADMIN_DSN or settings.DATABASE_URL - if not admin_dsn: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Postgres admin DSN not configured. Set POSTGRES_ADMIN_DSN or DATABASE_URL in configuration/environment.") + # Check if Postgres is configured for admin operations + if not _is_postgres_configured(): + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Postgres not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.") + admin_dsn = _get_db_conn_str() # 1) collect fulltext URLs from the screening DB try: @@ -538,7 +571,7 @@ async def hard_delete_screening_resources(sr_id: str, current_user: Dict[str, An try: await run_in_threadpool( srdb_service.clear_screening_db_info, - settings.POSTGRES_URI, + _get_db_conn_str(), sr_id ) except Exception: @@ -574,7 +607,7 @@ async def export_citations_csv( Content-Disposition. """ - db_conn_str = settings.POSTGRES_URI + db_conn_str = _get_db_conn_str() try: sr, screening, db_conn = await load_sr_and_check( sr_id, current_user, db_conn_str, srdb_service diff --git a/backend/api/core/cit_utils.py b/backend/api/core/cit_utils.py index 4cfd3e56..a3fd562d 100644 --- a/backend/api/core/cit_utils.py +++ b/backend/api/core/cit_utils.py @@ -13,10 +13,24 @@ from fastapi import HTTPException, status from fastapi.concurrency import run_in_threadpool +from api.services.postgres_auth import pgsql_entra_auth_configured + +from .config import settings + + +def _is_postgres_configured(db_conn_str: Optional[str] = None) -> bool: + """ + Check if PostgreSQL is configured via Entra ID env vars or connection string. + """ + has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + has_uri_config = db_conn_str or settings.POSTGRES_URI + return bool(has_entra_config or has_uri_config) + + async def load_sr_and_check( sr_id: str, current_user: Dict[str, Any], - db_conn_str: str, + db_conn_str: Optional[str], srdb_service, require_screening: bool = True, require_visible: bool = True, @@ -27,7 +41,7 @@ async def load_sr_and_check( Args: sr_id: SR id string current_user: current user dict (must contain "id" and "email") - db_conn_str: PostgreSQL connection string + db_conn_str: PostgreSQL connection string (can be None if using Entra ID auth) srdb_service: SR DB service instance (must implement get_systematic_review and user_has_sr_permission) require_screening: if True, also ensure the SR has a configured screening_db and return its connection string require_visible: if True, require the SR 'visible' flag to be True; set False for endpoints like hard-delete @@ -38,10 +52,10 @@ async def load_sr_and_check( Raises HTTPException with appropriate status codes on failure so routers can just propagate. """ # ensure DB helper present and call it - if not db_conn_str: + if not _is_postgres_configured(db_conn_str): raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Server misconfiguration: PostgreSQL connection not available", + detail="Server misconfiguration: PostgreSQL connection not available. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.", ) try: await run_in_threadpool(srdb_service.ensure_db_available, db_conn_str) @@ -82,7 +96,7 @@ async def load_sr_and_check( if not screening: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="No screening database configured for this systematic review") db_conn = screening.get("connection_string") - if not db_conn: + if not db_conn and not pgsql_entra_auth_configured(): raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Screening DB connection info missing") return sr, screening, db_conn diff --git a/backend/api/core/config.py b/backend/api/core/config.py index c2c2fff7..5afb720f 100644 --- a/backend/api/core/config.py +++ b/backend/api/core/config.py @@ -28,6 +28,9 @@ class Settings(BaseSettings): # Storage settings STORAGE_TYPE: str = os.getenv("STORAGE_TYPE", "azure") + AZURE_STORAGE_ACCOUNT_NAME: Optional[str] = os.getenv( + "AZURE_STORAGE_ACCOUNT_NAME" + ) AZURE_STORAGE_CONNECTION_STRING: Optional[str] = os.getenv( "AZURE_STORAGE_CONNECTION_STRING" ) @@ -120,8 +123,14 @@ def convert_max_file_size(cls, v): DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true" # Database and external system environment variables - # Postgres DSN used for systematic reviews and screening databases - POSTGRES_URI: str = os.getenv("POSTGRES_URI") + # Postgres settings for Entra ID authentication + POSTGRES_HOST: Optional[str] = os.getenv("POSTGRES_HOST") + POSTGRES_DATABASE: Optional[str] = os.getenv("POSTGRES_DATABASE") + POSTGRES_USER: Optional[str] = os.getenv("POSTGRES_USER") # Entra ID user (e.g., user@tenant.onmicrosoft.com) + POSTGRES_PORT: int = int(os.getenv("POSTGRES_PORT", "5432")) + POSTGRES_SSL_MODE: str = os.getenv("POSTGRES_SSL_MODE", "require") + # Legacy: Postgres DSN used for systematic reviews and screening databases (fallback) + POSTGRES_URI: Optional[str] = os.getenv("POSTGRES_URI") # Databricks settings DATABRICKS_INSTANCE: str = os.getenv("DATABRICKS_INSTANCE") diff --git a/backend/api/services/azure_openai_client.py b/backend/api/services/azure_openai_client.py index c34e07be..73e38b77 100644 --- a/backend/api/services/azure_openai_client.py +++ b/backend/api/services/azure_openai_client.py @@ -1,10 +1,32 @@ """Azure OpenAI client service for chat completions""" +import time from typing import Dict, List, Any, Optional +from azure.identity import DefaultAzureCredential, get_bearer_token_provider from openai import AzureOpenAI from ..core.config import settings +# Token cache TTL in seconds (9 minutes) +TOKEN_CACHE_TTL = 9 * 60 + + +class CachedTokenProvider: + """Simple in-memory token cache wrapper""" + + def __init__(self, token_provider): + self._token_provider = token_provider + self._cached_token: Optional[str] = None + self._token_expiry: float = 0 + + def __call__(self) -> str: + """Return cached token or fetch a new one if expired""" + current_time = time.time() + if self._cached_token is None or current_time >= self._token_expiry: + self._cached_token = self._token_provider() + self._token_expiry = current_time + TOKEN_CACHE_TTL + return self._cached_token + class AzureOpenAIClient: """Client for Azure OpenAI chat completions""" @@ -12,24 +34,28 @@ class AzureOpenAIClient: def __init__(self): self.default_model = settings.DEFAULT_CHAT_MODEL + # Create token provider for Azure OpenAI using DefaultAzureCredential + # Wrapped with caching to avoid fetching a new token on every request + self._credential = DefaultAzureCredential() + self._token_provider = CachedTokenProvider( + get_bearer_token_provider( + self._credential, "https://cognitiveservices.azure.com/.default" + ) + ) + self.model_configs = { "gpt-4o": { - "api_key": settings.AZURE_OPENAI_API_KEY, "endpoint": settings.AZURE_OPENAI_ENDPOINT, "deployment": settings.AZURE_OPENAI_DEPLOYMENT_NAME, "api_version": settings.AZURE_OPENAI_API_VERSION, }, "gpt-4o-mini": { - "api_key": settings.AZURE_OPENAI_GPT4O_MINI_API_KEY - or settings.AZURE_OPENAI_API_KEY, "endpoint": settings.AZURE_OPENAI_GPT4O_MINI_ENDPOINT or settings.AZURE_OPENAI_ENDPOINT, "deployment": settings.AZURE_OPENAI_GPT4O_MINI_DEPLOYMENT, "api_version": settings.AZURE_OPENAI_GPT4O_MINI_API_VERSION, }, "gpt-4.1-mini": { - "api_key": settings.AZURE_OPENAI_GPT41_MINI_API_KEY - or settings.AZURE_OPENAI_API_KEY, "endpoint": settings.AZURE_OPENAI_GPT41_MINI_ENDPOINT or settings.AZURE_OPENAI_ENDPOINT, "deployment": settings.AZURE_OPENAI_GPT41_MINI_DEPLOYMENT, @@ -49,13 +75,13 @@ def _get_official_client(self, model: str) -> AzureOpenAI: """Get official Azure OpenAI client instance""" if model not in self._official_clients: config = self._get_model_config(model) - if not config.get("api_key"): + if not config.get("endpoint"): raise ValueError( - f"Azure OpenAI API key not configured for model {model}" + f"Azure OpenAI endpoint not configured for model {model}" ) self._official_clients[model] = AzureOpenAI( - api_key=config["api_key"], + azure_ad_token_provider=self._token_provider, azure_endpoint=config["endpoint"], api_version=config["api_version"], ) @@ -303,7 +329,7 @@ def get_available_models(self) -> List[str]: return [ model for model, config in self.model_configs.items() - if config.get("api_key") and config.get("endpoint") + if config.get("endpoint") ] def is_configured(self) -> bool: diff --git a/backend/api/services/cit_db_service.py b/backend/api/services/cit_db_service.py index 1aaf520c..09a5d468 100644 --- a/backend/api/services/cit_db_service.py +++ b/backend/api/services/cit_db_service.py @@ -26,14 +26,7 @@ except Exception: settings = None - -def _ensure_psycopg2(): - try: - import psycopg2 - import psycopg2.extras # noqa: F401 - return psycopg2 - except Exception: - raise RuntimeError("psycopg2 is not installed on the server environment") +from .postgres_auth import _ensure_psycopg2, connect_postgres # ----------------------- @@ -122,14 +115,13 @@ def __init__(self): def _ensure_psycopg2(self): return _ensure_psycopg2() - def _connect(self, db_conn_str: str): + def _connect(self, db_conn_str: Optional[str] = None): """ - Connect and return a psycopg2 connection. Raises RuntimeError if psycopg2 missing. + Connect and return a psycopg2 connection using Entra ID auth (preferred) or connection string. + Raises RuntimeError if psycopg2 missing. Caller is responsible for closing the connection. """ - psycopg2 = self._ensure_psycopg2() - conn = psycopg2.connect(db_conn_str) - return conn + return connect_postgres(db_conn_str) # ----------------------- # Generic column ops @@ -284,7 +276,7 @@ def get_citation_by_id(self, db_conn_str: str, citation_id: int) -> Optional[Dic psycopg2 = self._ensure_psycopg2() conn = None try: - conn = psycopg2.connect(db_conn_str) + conn = connect_postgres(db_conn_str) try: cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) except Exception: @@ -329,7 +321,7 @@ def list_citation_ids(self, db_conn_str: str, filter_step=None) -> List[int]: psycopg2 = self._ensure_psycopg2() conn = None try: - conn = psycopg2.connect(db_conn_str) + conn = connect_postgres(db_conn_str) cur = conn.cursor() if filter_step is not None: @@ -386,7 +378,7 @@ def list_fulltext_urls(self, db_conn_str: str) -> List[str]: psycopg2 = self._ensure_psycopg2() conn = None try: - conn = psycopg2.connect(db_conn_str) + conn = connect_postgres(db_conn_str) cur = conn.cursor() cur.execute('SELECT fulltext_url FROM "citations" WHERE fulltext_url IS NOT NULL') rows = cur.fetchall() @@ -446,7 +438,7 @@ def get_column_value(self, db_conn_str: str, citation_id: int, column: str) -> A psycopg2 = self._ensure_psycopg2() conn = None try: - conn = psycopg2.connect(db_conn_str) + conn = connect_postgres(db_conn_str) try: cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) except Exception: @@ -502,7 +494,7 @@ def drop_database(self, admin_dsn: str, db_name: str) -> None: psycopg2 = self._ensure_psycopg2() conn = None try: - conn = psycopg2.connect(admin_dsn) + conn = connect_postgres(admin_dsn) conn.autocommit = True cur = conn.cursor() try: @@ -551,16 +543,20 @@ def create_db_and_table_sync(self, admin_dsn: str, db_name: str, columns: List[s - fulltext_md5 TEXT - created_at TIMESTAMP WITH TIME ZONE DEFAULT now() """ - if not admin_dsn: + # Check if Entra ID config is available + has_entra_config = settings and settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + + if not admin_dsn and not has_entra_config: # try falling back to settings if available - admin_dsn = (settings.POSTGRES_ADMIN_DSN if settings else None) or (settings.DATABASE_URL if settings else None) - if not admin_dsn: - raise RuntimeError("Postgres admin DSN not configured") + admin_dsn = (settings.POSTGRES_ADMIN_DSN if settings else None) or (settings.DATABASE_URL if settings else None) or (settings.POSTGRES_URI if settings else None) + + if not admin_dsn and not has_entra_config: + raise RuntimeError("Postgres not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.") psycopg2 = self._ensure_psycopg2() conn = None try: - conn = psycopg2.connect(admin_dsn) + conn = connect_postgres(admin_dsn) conn.autocommit = True cur = conn.cursor() cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,)) @@ -573,7 +569,7 @@ def create_db_and_table_sync(self, admin_dsn: str, db_name: str, columns: List[s # connect to created DB db_dsn = _construct_db_dsn_from_admin(admin_dsn, db_name) - conn = psycopg2.connect(db_dsn) + conn = connect_postgres(db_dsn) cur = conn.cursor() # Create table diff --git a/backend/api/services/postgres_auth.py b/backend/api/services/postgres_auth.py new file mode 100644 index 00000000..553e3ed3 --- /dev/null +++ b/backend/api/services/postgres_auth.py @@ -0,0 +1,132 @@ +""" +PostgreSQL authentication helper using Azure Entra ID (DefaultAzureCredential). + +This module provides a centralized way to connect to Azure Database for PostgreSQL +using Entra ID authentication, with fallback to connection string for local development. +""" + +from typing import Optional +import logging +import datetime +from functools import lru_cache + +logger = logging.getLogger(__name__) + +# Azure PostgreSQL OAuth scope +POSTGRES_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" +_current_pgsql_token = None +_pgsql_token_expiration = None + +def _ensure_psycopg2(): + """Ensure psycopg2 is available.""" + try: + import psycopg2 + import psycopg2.extras # noqa: F401 + return psycopg2 + except ImportError: + raise RuntimeError("psycopg2 is not installed on the server environment") + +@lru_cache(maxsize=2) +def get_postgres_token() -> str: + """ + Get an access token for Azure Database for PostgreSQL using DefaultAzureCredential. + + Returns: + Access token string to use as password for PostgreSQL connection. + """ + from azure.identity import DefaultAzureCredential + global _current_pgsql_token, _pgsql_token_expiration + + credential = DefaultAzureCredential() + current_epoch_time = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) + if not _current_pgsql_token or current_epoch_time >= _pgsql_token_expiration: + token = credential.get_token(POSTGRES_SCOPE) + _current_pgsql_token = token.token + _pgsql_token_expiration = token.expires_on - 60 # Refresh 1 minute before expiry + return _current_pgsql_token + +def connect_with_entra_id( + host: str, + database: str, + user: str, + port: int = 5432, + sslmode: str = "require", +): + """ + Connect to Azure Database for PostgreSQL using Entra ID authentication. + + Args: + host: PostgreSQL server hostname (e.g., myserver.postgres.database.azure.com) + database: Database name + user: Entra ID user (e.g., user@tenant.onmicrosoft.com or managed identity client ID) + port: PostgreSQL port (default 5432) + sslmode: SSL mode (default "require") + + Returns: + psycopg2 connection object + """ + psycopg2 = _ensure_psycopg2() + + # Get access token from Entra ID + access_token = get_postgres_token() + + conn = psycopg2.connect( + host=host, + database=database, + user=user, + password=access_token, + port=port, + sslmode=sslmode, + ) + + return conn + + +def connect_postgres(db_conn_str: Optional[str] = None): + """ + Connect to PostgreSQL using Entra ID authentication (preferred) or connection string (fallback). + + If POSTGRES_HOST, POSTGRES_DATABASE, and POSTGRES_USER are configured, uses Entra ID authentication. + Otherwise, falls back to the provided connection string (for local development). + + Args: + db_conn_str: Optional connection string (used as fallback for local dev) + + Returns: + psycopg2 connection object + """ + from ..core.config import settings + + # Prefer Entra ID authentication if configured + if settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER: + logger.debug("Connecting to PostgreSQL using Entra ID authentication") + return connect_with_entra_id( + host=settings.POSTGRES_HOST, + database=settings.POSTGRES_DATABASE, + user=settings.POSTGRES_USER, + port=settings.POSTGRES_PORT, + sslmode=settings.POSTGRES_SSL_MODE, + ) + + # Fallback to connection string (local development) + if db_conn_str: + logger.debug("Connecting to PostgreSQL using connection string") + psycopg2 = _ensure_psycopg2() + return psycopg2.connect(db_conn_str) + + # Check for POSTGRES_URI as final fallback + if settings.POSTGRES_URI: + logger.debug("Connecting to PostgreSQL using POSTGRES_URI") + psycopg2 = _ensure_psycopg2() + return psycopg2.connect(settings.POSTGRES_URI) + + raise RuntimeError( + "PostgreSQL not configured. Set POSTGRES_HOST, POSTGRES_DATABASE, and POSTGRES_USER " + "for Entra ID auth, or POSTGRES_URI for local development." + ) + + +def pgsql_entra_auth_configured(): + from ..core.config import settings + has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + return has_entra_config \ No newline at end of file diff --git a/backend/api/services/sr_db_service.py b/backend/api/services/sr_db_service.py index 5b9d9c3c..6ab70e4d 100644 --- a/backend/api/services/sr_db_service.py +++ b/backend/api/services/sr_db_service.py @@ -16,16 +16,10 @@ from fastapi import HTTPException, status -logger = logging.getLogger(__name__) - +from .postgres_auth import _ensure_psycopg2, connect_postgres +from ..core.config import settings -def _ensure_psycopg2(): - try: - import psycopg2 - import psycopg2.extras # noqa: F401 - return psycopg2 - except Exception: - raise RuntimeError("psycopg2 is not installed on the server environment") +logger = logging.getLogger(__name__) class SRDBService: @@ -36,24 +30,45 @@ def __init__(self): def _ensure_psycopg2(self): return _ensure_psycopg2() - def _connect(self, db_conn_str: str): + def _connect(self, db_conn_str: Optional[str] = None): """ - Connect and return a psycopg2 connection. Raises RuntimeError if psycopg2 missing. + Connect and return a psycopg2 connection using Entra ID auth (preferred) or connection string. + Raises RuntimeError if psycopg2 missing. Caller is responsible for closing the connection. """ - psycopg2 = self._ensure_psycopg2() - conn = psycopg2.connect(db_conn_str) - return conn + return connect_postgres(db_conn_str) + + def _is_postgres_configured(self, db_conn_str: Optional[str] = None) -> bool: + """ + Check if PostgreSQL is configured via Entra ID env vars or connection string. + """ + has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + has_uri_config = db_conn_str or settings.POSTGRES_URI + return bool(has_entra_config or has_uri_config) + + def _ensure_postgres_configured(self, db_conn_str: Optional[str] = None) -> None: + """ + Raise HTTPException if PostgreSQL is not configured. + """ + if not self._is_postgres_configured(db_conn_str): + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Systematic review DB not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev." + ) def ensure_db_available(self, db_conn_str: Optional[str] = None) -> None: """ Raise an HTTPException (503) if the PostgreSQL connection is not available. Routers call this to provide consistent error messages when Postgres is not configured. """ - if not db_conn_str: + # Check if any Postgres config is available + has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + has_uri_config = db_conn_str or settings.POSTGRES_URI + + if not has_entra_config and not has_uri_config: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="PostgreSQL connection not configured. Set POSTGRES_URI environment variable.", + detail="PostgreSQL connection not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.", ) # Try to connect to verify availability try: @@ -217,8 +232,7 @@ def create_systematic_review( """ Create a new SR document and insert into the table. Returns the created document. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) sr_id = str(uuid.uuid4()) now = datetime.utcnow().isoformat() @@ -303,8 +317,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ requester must be a member or owner. Returns a dict with update result metadata. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) sr = self.get_systematic_review(db_conn_str, sr_id) if not sr or not sr.get("visible", True): @@ -371,8 +384,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request Remove a user id from the SR's users list. Owner cannot be removed. Enforces requester permissions (must be a member or owner). """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) sr = self.get_systematic_review(db_conn_str, sr_id) if not sr or not sr.get("visible", True): @@ -444,8 +456,7 @@ def user_has_sr_permission(self, db_conn_str: str, sr_id: str, user_id: str) -> Note: this check deliberately ignores the SR's 'visible' flag so membership checks work regardless of whether the SR is hidden/soft-deleted. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) doc = self.get_systematic_review(db_conn_str, sr_id, ignore_visibility=True) if not doc: @@ -462,8 +473,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, The requester must be a member or owner. Returns the updated SR document. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) sr = self.get_systematic_review(db_conn_str, sr_id) if not sr or not sr.get("visible", True): @@ -530,8 +540,7 @@ def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> """ Return all SR documents where the user is a member (regardless of visible flag). """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) conn = None try: @@ -595,8 +604,7 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: Return SR document by id. Returns None if not found. If ignore_visibility is False, only returns visible SRs. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) conn = None try: @@ -665,8 +673,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ Set the visible flag on the SR. Only owner is allowed to change visibility. Returns update metadata. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) sr = self.get_systematic_review(db_conn_str, sr_id, ignore_visibility=True) if not sr: @@ -726,8 +733,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ Permanently remove the SR document. Only owner may hard delete. Returns deletion metadata (deleted_count). """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) sr = self.get_systematic_review(db_conn_str, sr_id, ignore_visibility=True) if not sr: @@ -771,8 +777,7 @@ def update_screening_db_info(self, db_conn_str: str, sr_id: str, screening_db: D """ Update the screening_db field in the SR document with screening database metadata. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) conn = None try: @@ -809,8 +814,7 @@ def clear_screening_db_info(self, db_conn_str: str, sr_id: str) -> None: """ Remove the screening_db field from the SR document. """ - if not db_conn_str: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Systematic review DB not configured") + self._ensure_postgres_configured(db_conn_str) conn = None try: diff --git a/backend/api/services/storage.py b/backend/api/services/storage.py index 9c34ee29..e1b36467 100644 --- a/backend/api/services/storage.py +++ b/backend/api/services/storage.py @@ -6,6 +6,7 @@ from datetime import datetime, timezone from typing import Dict, List, Optional, Any +from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceNotFoundError @@ -19,11 +20,13 @@ class AzureStorageService: """Service for managing user data in Azure Blob Storage""" def __init__(self): - if not settings.AZURE_STORAGE_CONNECTION_STRING: - raise ValueError("Azure Storage connection string not configured") + if not settings.AZURE_STORAGE_ACCOUNT_NAME: + raise ValueError("AZURE_STORAGE_ACCOUNT_NAME is not configured") - self.blob_service_client = BlobServiceClient.from_connection_string( - settings.AZURE_STORAGE_CONNECTION_STRING + account_url = f"https://{settings.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net" + credential = DefaultAzureCredential() + self.blob_service_client = BlobServiceClient( + account_url=account_url, credential=credential ) self.container_name = settings.AZURE_STORAGE_CONTAINER_NAME @@ -354,5 +357,5 @@ async def delete_file_hash_metadata(self, user_id: str, document_id: str) -> boo # Global storage service instance storage_service = ( - AzureStorageService() if settings.AZURE_STORAGE_CONNECTION_STRING else None + AzureStorageService() if settings.AZURE_STORAGE_ACCOUNT_NAME else None ) diff --git a/backend/api/services/user_db.py b/backend/api/services/user_db.py index d42a9a66..2a9fb41f 100644 --- a/backend/api/services/user_db.py +++ b/backend/api/services/user_db.py @@ -5,6 +5,7 @@ from datetime import datetime from typing import Dict, List, Optional, Any +from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceNotFoundError from passlib.context import CryptContext @@ -17,12 +18,15 @@ class UserDatabaseService: """Service for managing user data in Azure Blob Storage""" def __init__(self): - if not settings.AZURE_STORAGE_CONNECTION_STRING: - raise ValueError("Azure Storage connection string not configured") + if not settings.AZURE_STORAGE_ACCOUNT_NAME: + raise ValueError("AZURE_STORAGE_ACCOUNT_NAME is not configured") - self.blob_service_client = BlobServiceClient.from_connection_string( - settings.AZURE_STORAGE_CONNECTION_STRING + account_url = f"https://{settings.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net" + credential = DefaultAzureCredential() + self.blob_service_client = BlobServiceClient( + account_url=account_url, credential=credential ) + self.container_name = settings.AZURE_STORAGE_CONTAINER_NAME self.pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") @@ -108,8 +112,8 @@ async def create_user(self, user_data: UserCreate) -> Optional[UserRead]: if await self._save_user_registry(registry): # Create user directory structure in storage from .storage import storage_service - from .dual_milvus_manager import dual_milvus_manager - from .base_knowledge_manager import base_knowledge_manager + # from .dual_milvus_manager import dual_milvus_manager + # from .base_knowledge_manager import base_knowledge_manager if storage_service: await storage_service.create_user_directory(user_id) @@ -245,7 +249,7 @@ async def get_user_count(self) -> int: # Global user database service instance user_db_service = ( - UserDatabaseService() if settings.AZURE_STORAGE_CONNECTION_STRING else None + UserDatabaseService() if settings.AZURE_STORAGE_ACCOUNT_NAME else None ) # Alias for backward compatibility diff --git a/backend/api/sr/router.py b/backend/api/sr/router.py index cf6d26ae..20199992 100644 --- a/backend/api/sr/router.py +++ b/backend/api/sr/router.py @@ -29,8 +29,26 @@ router = APIRouter() # Helper to get database connection string -def _get_db_conn_str() -> str: - return settings.POSTGRES_URI +def _get_db_conn_str() -> Optional[str]: + """ + Get database connection string for PostgreSQL. + + If POSTGRES_URI is set, returns it directly (local development). + If Entra ID env variables are configured (POSTGRES_HOST, POSTGRES_DATABASE, POSTGRES_USER), + returns None to signal that connect_postgres() should use Entra ID authentication. + """ + if settings.POSTGRES_URI: + return settings.POSTGRES_URI + + # If Entra ID config is available, return None to let connect_postgres use token auth + if settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER: + return None + + # No configuration available + raise ValueError( + "PostgreSQL not configured. Set POSTGRES_URI for local development, " + "or POSTGRES_HOST, POSTGRES_DATABASE, and POSTGRES_USER for Entra ID authentication." + ) class SystematicReviewCreate(BaseModel): name: str diff --git a/backend/main.py b/backend/main.py index bd2c8c78..e89158e2 100644 --- a/backend/main.py +++ b/backend/main.py @@ -31,11 +31,17 @@ async def startup_event(): print("📚 Initializing systematic review database...", flush=True) # Ensure systematic review table exists in PostgreSQL try: - if settings.POSTGRES_URI: - await run_in_threadpool(srdb_service.ensure_table_exists, settings.POSTGRES_URI) + # Check if Entra ID or POSTGRES_URI is configured + has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER + has_uri_config = settings.POSTGRES_URI + + if has_entra_config or has_uri_config: + # Pass connection string if available, otherwise None for Entra ID auth + db_conn_str = settings.POSTGRES_URI if has_uri_config else None + await run_in_threadpool(srdb_service.ensure_table_exists, db_conn_str) print("✓ Systematic review table initialized", flush=True) else: - print("⚠️ POSTGRES_URI not configured - skipping SR table initialization", flush=True) + print("⚠️ PostgreSQL not configured - skipping SR table initialization", flush=True) except Exception as e: print(f"⚠️ Failed to ensure SR table exists: {e}", flush=True) print("🎯 CAN-SR Backend ready!", flush=True) From 13ec748ed40f6f629a1aca2cf3e680b3da198e08 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 30 Jan 2026 09:25:09 -0500 Subject: [PATCH 02/17] change backend url on web app dockerfile --- frontend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 7d59e395..6eb0724f 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -16,7 +16,7 @@ RUN npm ci FROM base AS build # Public build-time environment variables -ARG NEXT_PUBLIC_BACKEND_URL="https://grep-exp-can-sr-api-dv.phac-aspc.gc.ca" +ARG NEXT_PUBLIC_BACKEND_URL="https://was-sdse-spib-hail-api-dt.azurewebsites.net" ENV NEXT_PUBLIC_BACKEND_URL=${NEXT_PUBLIC_BACKEND_URL} From f68fd51bbea2eaa2eecb2075293386fcd0504034 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 30 Jan 2026 09:36:19 -0500 Subject: [PATCH 03/17] remove system --- frontend/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 6eb0724f..e044f708 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -31,8 +31,8 @@ FROM base AS run ENV NODE_ENV=production ENV PORT=$PORT -RUN addgroup --system --gid 1001 nodejs -RUN adduser --system --uid 1001 nextjs +RUN addgroup --gid 1001 nodejs +RUN adduser --uid 1001 nextjs RUN mkdir .next RUN chown nextjs:nodejs .next From f0fe10c1560fdc62345416d61e35a51dce5648a2 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 30 Jan 2026 09:49:52 -0500 Subject: [PATCH 04/17] fixes --- frontend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/Dockerfile b/frontend/Dockerfile index e044f708..03f48d16 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -32,7 +32,7 @@ ENV NODE_ENV=production ENV PORT=$PORT RUN addgroup --gid 1001 nodejs -RUN adduser --uid 1001 nextjs +RUN adduser --uid 1001 --ingroup nodejs --disabled-password --gecos "" nextjs RUN mkdir .next RUN chown nextjs:nodejs .next From 85ea28810934cc999da854b9e2a90fa6fb40d341 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 30 Jan 2026 10:13:38 -0500 Subject: [PATCH 05/17] change next config --- frontend/next.config.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/next.config.ts b/frontend/next.config.ts index 9f367351..1d65f5ba 100644 --- a/frontend/next.config.ts +++ b/frontend/next.config.ts @@ -2,6 +2,7 @@ import type { NextConfig } from 'next' const nextConfig: NextConfig = { /* config options here */ + output: 'standalone', eslint: { // Disable ESLint during builds for production deployment ignoreDuringBuilds: true, From b255f6283d9f27a0b1e15b2856aa3a47632fadd5 Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 2 Feb 2026 09:01:46 -0500 Subject: [PATCH 06/17] azure ssh access --- backend/Dockerfile | 14 +++++++++++++- backend/entrypoint.sh | 10 ++++++++++ backend/sshd_config | 12 ++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 backend/entrypoint.sh create mode 100644 backend/sshd_config diff --git a/backend/Dockerfile b/backend/Dockerfile index 610e9252..18c4a472 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -38,12 +38,24 @@ RUN mkdir -p uploads RUN useradd -m -u 1001 appuser && chown -R appuser:appuser /app USER appuser +COPY sshd_config /etc/ssh/ +COPY entrypoint.sh /entrypoint.sh + +RUN apt-get update \ + && apt-get install -y --no-install-recommends dialog \ + && apt-get install -y --no-install-recommends openssh-server \ + && echo "root:Docker!" | chpasswd \ + && chmod u+x /entrypoint.sh + +USER root + # Expose port -EXPOSE 8000 +EXPOSE 8000 2222 # Health check HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" # Run the application +ENTRYPOINT ["/entrypoint.sh"] CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/entrypoint.sh b/backend/entrypoint.sh new file mode 100644 index 00000000..b599c0a2 --- /dev/null +++ b/backend/entrypoint.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -e + +# Get env vars in the Dockerfile to show up in the SSH session +eval $(printenv | sed -n "s/^\([^=]\+\)=\(.*\)$/export \1=\2/p" | sed 's/"/\\\"/g' | sed '/=/s//="/' | sed 's/$/"/' >> /etc/profile) + +echo "Starting SSH ..." +service ssh start + +exec "$@" \ No newline at end of file diff --git a/backend/sshd_config b/backend/sshd_config new file mode 100644 index 00000000..9c224d52 --- /dev/null +++ b/backend/sshd_config @@ -0,0 +1,12 @@ +Port 2222 +ListenAddress 0.0.0.0 +LoginGraceTime 180 +X11Forwarding yes +Ciphers aes128-cbc,3des-cbc,aes256-cbc,aes128-ctr,aes192-ctr,aes256-ctr +MACs hmac-sha1,hmac-sha1-96 +StrictModes yes +SyslogFacility DAEMON +PasswordAuthentication yes +PermitEmptyPasswords no +PermitRootLogin yes +Subsystem sftp internal-sftp \ No newline at end of file From 7ee890a8f0118d0f12c4809a4f1845a7eafc7f72 Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 2 Feb 2026 09:13:11 -0500 Subject: [PATCH 07/17] updates --- backend/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 18c4a472..f113ca6e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -39,13 +39,13 @@ RUN useradd -m -u 1001 appuser && chown -R appuser:appuser /app USER appuser COPY sshd_config /etc/ssh/ -COPY entrypoint.sh /entrypoint.sh +# COPY entrypoint.sh /entrypoint.sh RUN apt-get update \ && apt-get install -y --no-install-recommends dialog \ && apt-get install -y --no-install-recommends openssh-server \ && echo "root:Docker!" | chpasswd \ - && chmod u+x /entrypoint.sh + && chmod u+x ./entrypoint.sh USER root From 940239f58144ae9164c999caea2dc66e52a2dd9e Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 2 Feb 2026 09:26:46 -0500 Subject: [PATCH 08/17] updates --- backend/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index f113ca6e..7c3b25b8 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -36,7 +36,7 @@ RUN mkdir -p uploads # Create non-root user for security RUN useradd -m -u 1001 appuser && chown -R appuser:appuser /app -USER appuser + COPY sshd_config /etc/ssh/ # COPY entrypoint.sh /entrypoint.sh @@ -47,11 +47,11 @@ RUN apt-get update \ && echo "root:Docker!" | chpasswd \ && chmod u+x ./entrypoint.sh -USER root - # Expose port EXPOSE 8000 2222 +USER appuser + # Health check HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" From 856ef5463483c5e339ffb3a97fa3dffa63d9e46c Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 2 Feb 2026 09:34:40 -0500 Subject: [PATCH 09/17] more updates --- backend/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 7c3b25b8..980dd466 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -39,13 +39,13 @@ RUN useradd -m -u 1001 appuser && chown -R appuser:appuser /app COPY sshd_config /etc/ssh/ -# COPY entrypoint.sh /entrypoint.sh +COPY entrypoint.sh /entrypoint.sh RUN apt-get update \ && apt-get install -y --no-install-recommends dialog \ && apt-get install -y --no-install-recommends openssh-server \ && echo "root:Docker!" | chpasswd \ - && chmod u+x ./entrypoint.sh + && chmod u+x /entrypoint.sh # Expose port EXPOSE 8000 2222 From dbdce0110630770e9207cbf6e327ff0c6053b064 Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 2 Feb 2026 09:47:57 -0500 Subject: [PATCH 10/17] update dockerignore --- backend/.dockerignore | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/.dockerignore b/backend/.dockerignore index 5813e6e2..ea71973c 100644 --- a/backend/.dockerignore +++ b/backend/.dockerignore @@ -23,4 +23,5 @@ dmypy.json .DS_Store deploy.sh *.sh +!entrypoint.sh *.pem \ No newline at end of file From f6779771f74a3f436b2df41d923bcd4d8a8a818d Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 2 Feb 2026 10:01:30 -0500 Subject: [PATCH 11/17] change root location --- backend/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 980dd466..6a01e65f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -47,11 +47,11 @@ RUN apt-get update \ && echo "root:Docker!" | chpasswd \ && chmod u+x /entrypoint.sh +USER root + # Expose port EXPOSE 8000 2222 -USER appuser - # Health check HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" From 634195c122981fa69d2eb3342ba2e34b40d0d715 Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 4 Feb 2026 12:33:34 -0500 Subject: [PATCH 12/17] changes --- backend/api/services/azure_openai_client.py | 23 ++++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/api/services/azure_openai_client.py b/backend/api/services/azure_openai_client.py index 6bf2d3f0..99e05ec5 100644 --- a/backend/api/services/azure_openai_client.py +++ b/backend/api/services/azure_openai_client.py @@ -126,17 +126,20 @@ async def chat_completion( try: client = self._get_official_client(model) + request_kwargs = { + "model": deployment, + "messages": messages, + "top_p": top_p, + "frequency_penalty": frequency_penalty, + "presence_penalty": presence_penalty, + "stream": stream, + } + + if model != "gpt-5-mini": + request_kwargs["max_tokens"] = max_tokens + request_kwargs["temperature"] = temperature - response = client.chat.completions.create( - model=deployment, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - top_p=top_p, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - stream=stream, - ) + response = client.chat.completions.create(**request_kwargs) if stream: return response From e62eb209b53ad5e6a35a746cd30667934ccb7cfc Mon Sep 17 00:00:00 2001 From: Keenan Date: Thu, 5 Feb 2026 12:53:38 -0500 Subject: [PATCH 13/17] remove @lru_cache --- backend/api/services/postgres_auth.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/api/services/postgres_auth.py b/backend/api/services/postgres_auth.py index 553e3ed3..8403a037 100644 --- a/backend/api/services/postgres_auth.py +++ b/backend/api/services/postgres_auth.py @@ -26,7 +26,6 @@ def _ensure_psycopg2(): except ImportError: raise RuntimeError("psycopg2 is not installed on the server environment") -@lru_cache(maxsize=2) def get_postgres_token() -> str: """ Get an access token for Azure Database for PostgreSQL using DefaultAzureCredential. From 4edc06de6016edc056574060114f96a0cefcfc21 Mon Sep 17 00:00:00 2001 From: Keenan Date: Thu, 5 Feb 2026 13:27:15 -0500 Subject: [PATCH 14/17] add turbopack --- backend/api/services/postgres_auth.py | 1 + frontend/next.config.ts | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/api/services/postgres_auth.py b/backend/api/services/postgres_auth.py index 8403a037..568bf20f 100644 --- a/backend/api/services/postgres_auth.py +++ b/backend/api/services/postgres_auth.py @@ -39,6 +39,7 @@ def get_postgres_token() -> str: credential = DefaultAzureCredential() current_epoch_time = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) if not _current_pgsql_token or current_epoch_time >= _pgsql_token_expiration: + print("GETTING FRESH PGSQL TOKEN") token = credential.get_token(POSTGRES_SCOPE) _current_pgsql_token = token.token _pgsql_token_expiration = token.expires_on - 60 # Refresh 1 minute before expiry diff --git a/frontend/next.config.ts b/frontend/next.config.ts index 1d65f5ba..eb104311 100644 --- a/frontend/next.config.ts +++ b/frontend/next.config.ts @@ -3,10 +3,6 @@ import type { NextConfig } from 'next' const nextConfig: NextConfig = { /* config options here */ output: 'standalone', - eslint: { - // Disable ESLint during builds for production deployment - ignoreDuringBuilds: true, - }, typescript: { // Disable TypeScript errors during builds for production deployment ignoreBuildErrors: true, @@ -19,6 +15,7 @@ const nextConfig: NextConfig = { } return config }, + turbopack: {}, } export default nextConfig From 91dc1e67a97924734cf6db6b3bfc5f92111f02cd Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 6 Feb 2026 13:00:26 -0500 Subject: [PATCH 15/17] database changes --- .gitignore | 5 +- backend/api/citations/router.py | 7 +- backend/api/core/cit_utils.py | 18 +-- backend/api/core/config.py | 4 +- backend/api/services/cit_db_service.py | 85 +++++----- backend/api/services/postgres_auth.py | 209 +++++++++++-------------- backend/api/services/sr_db_service.py | 145 ++++++----------- backend/api/sr/router.py | 2 - 8 files changed, 181 insertions(+), 294 deletions(-) diff --git a/.gitignore b/.gitignore index fe155de0..df559c58 100644 --- a/.gitignore +++ b/.gitignore @@ -164,4 +164,7 @@ frontend/app/can-sr/setup/test.yaml backend/api/citations/test.ipynb file1.csv */criteria_config_measles_updated.yaml -AGENTS_ROADMAP.md \ No newline at end of file +AGENTS_ROADMAP.md + +*/logfile +logfile \ No newline at end of file diff --git a/backend/api/citations/router.py b/backend/api/citations/router.py index 2af51a50..247a2859 100644 --- a/backend/api/citations/router.py +++ b/backend/api/citations/router.py @@ -27,9 +27,6 @@ from fastapi.concurrency import run_in_threadpool from pydantic import BaseModel -from api.services.postgres_auth import pgsql_entra_auth_configured - - from ..services.sr_db_service import srdb_service from ..core.security import get_current_active_user @@ -459,9 +456,7 @@ async def hard_delete_screening_resources(sr_id: str, current_user: Dict[str, An if not screening: return {"status": "no_screening_db", "message": "No screening table configured for this SR", "deleted_table": False, "deleted_files": 0} - db_conn = screening.get("connection_string") - if not db_conn and not pgsql_entra_auth_configured(): - return {"status": "no_screening_db", "message": "Incomplete screening DB metadata", "deleted_table": False, "deleted_files": 0} + db_conn = None table_name = screening.get("table_name") if not table_name: diff --git a/backend/api/core/cit_utils.py b/backend/api/core/cit_utils.py index a3fd562d..12442fa8 100644 --- a/backend/api/core/cit_utils.py +++ b/backend/api/core/cit_utils.py @@ -13,8 +13,6 @@ from fastapi import HTTPException, status from fastapi.concurrency import run_in_threadpool -from api.services.postgres_auth import pgsql_entra_auth_configured - from .config import settings @@ -51,18 +49,6 @@ async def load_sr_and_check( Raises HTTPException with appropriate status codes on failure so routers can just propagate. """ - # ensure DB helper present and call it - if not _is_postgres_configured(db_conn_str): - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Server misconfiguration: PostgreSQL connection not available. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.", - ) - try: - await run_in_threadpool(srdb_service.ensure_db_available, db_conn_str) - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) # fetch SR try: @@ -95,8 +81,6 @@ async def load_sr_and_check( if require_screening: if not screening: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="No screening database configured for this systematic review") - db_conn = screening.get("connection_string") - if not db_conn and not pgsql_entra_auth_configured(): - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Screening DB connection info missing") + db_conn = None return sr, screening, db_conn diff --git a/backend/api/core/config.py b/backend/api/core/config.py index 1285ade7..420727d1 100644 --- a/backend/api/core/config.py +++ b/backend/api/core/config.py @@ -116,7 +116,9 @@ def convert_max_file_size(cls, v): POSTGRES_DATABASE: Optional[str] = os.getenv("POSTGRES_DATABASE") POSTGRES_USER: Optional[str] = os.getenv("POSTGRES_USER") # Entra ID user (e.g., user@tenant.onmicrosoft.com) POSTGRES_PORT: int = int(os.getenv("POSTGRES_PORT", "5432")) - POSTGRES_SSL_MODE: str = os.getenv("POSTGRES_SSL_MODE", "require") + POSTGRES_SSL_MODE: Optional[str] = os.getenv("POSTGRES_SSL_MODE") + POSTGRES_PASSWORD: Optional[str] = os.getenv("POSTGRES_PASSWORD") + AZURE_DB: bool = os.getenv("AZURE_DB", "false").lower() == "true" # Legacy: Postgres DSN used for systematic reviews and screening databases (fallback) POSTGRES_URI: Optional[str] = os.getenv("POSTGRES_URI") diff --git a/backend/api/services/cit_db_service.py b/backend/api/services/cit_db_service.py index 495c17ee..6005fef0 100644 --- a/backend/api/services/cit_db_service.py +++ b/backend/api/services/cit_db_service.py @@ -13,6 +13,8 @@ can surface a 503 with an actionable message. """ from typing import Any, Dict, List, Optional +import psycopg2 +import psycopg2.extras import json import re import os @@ -26,7 +28,7 @@ except Exception: settings = None -from .postgres_auth import _ensure_psycopg2, connect_postgres +from .postgres_auth import postgres_server # ----------------------- @@ -131,16 +133,7 @@ def __init__(self): # ----------------------- # Low level connection helpers # ----------------------- - def _ensure_psycopg2(self): - return _ensure_psycopg2() - def _connect(self, db_conn_str: Optional[str] = None): - """ - Connect and return a psycopg2 connection using Entra ID auth (preferred) or connection string. - Raises RuntimeError if psycopg2 missing. - Caller is responsible for closing the connection. - """ - return connect_postgres(db_conn_str) # ----------------------- # Generic column ops @@ -154,7 +147,7 @@ def create_column(self, db_conn_str: str, col: str, col_type: str, table_name: s table_name = _validate_ident(table_name, kind="table_name") conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() try: cur.execute(f'ALTER TABLE "{table_name}" ADD COLUMN IF NOT EXISTS "{col}" {col_type}') @@ -170,13 +163,13 @@ def create_column(self, db_conn_str: str, col: str, col_type: str, table_name: s except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -194,7 +187,7 @@ def update_jsonb_column( table_name = _validate_ident(table_name, kind="table_name") conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() try: cur.execute(f'ALTER TABLE "{table_name}" ADD COLUMN IF NOT EXISTS "{col}" JSONB') @@ -211,14 +204,14 @@ def update_jsonb_column( except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return rows or 0 finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -236,7 +229,7 @@ def update_text_column( table_name = _validate_ident(table_name, kind="table_name") conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() try: cur.execute(f'ALTER TABLE "{table_name}" ADD COLUMN IF NOT EXISTS "{col}" TEXT') @@ -253,14 +246,14 @@ def update_text_column( except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return rows or 0 finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -278,7 +271,7 @@ def dump_citations_csv(self, db_conn_str: str, table_name: str = "citations") -> table_name = _validate_ident(table_name, kind="table_name") conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() buf = io.StringIO() @@ -294,7 +287,7 @@ def dump_citations_csv(self, db_conn_str: str, table_name: str = "citations") -> except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -302,7 +295,7 @@ def dump_citations_csv(self, db_conn_str: str, table_name: str = "citations") -> finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -311,10 +304,9 @@ def get_citation_by_id(self, db_conn_str: str, citation_id: int, table_name: str Return a dict mapping column -> value for the citation row, or None. """ table_name = _validate_ident(table_name, kind="table_name") - psycopg2 = self._ensure_psycopg2() conn = None try: - conn = connect_postgres(db_conn_str) + conn = postgres_server.conn try: cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) except Exception: @@ -327,7 +319,7 @@ def get_citation_by_id(self, db_conn_str: str, citation_id: int, table_name: str except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return None @@ -341,14 +333,14 @@ def get_citation_by_id(self, db_conn_str: str, citation_id: int, table_name: str except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return result finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -357,10 +349,9 @@ def list_citation_ids(self, db_conn_str: str, filter_step=None, table_name: str Return list of integer primary keys (id) from citations table ordered by id. """ table_name = _validate_ident(table_name, kind="table_name") - psycopg2 = self._ensure_psycopg2() conn = None try: - conn = connect_postgres(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() if filter_step is not None: @@ -399,14 +390,14 @@ def list_citation_ids(self, db_conn_str: str, filter_step=None, table_name: str except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return [int(r[0]) for r in rows] finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -415,10 +406,9 @@ def list_fulltext_urls(self, db_conn_str: str, table_name: str = "citations") -> Return list of fulltext_url values (non-null) from citations table. """ table_name = _validate_ident(table_name, kind="table_name") - psycopg2 = self._ensure_psycopg2() conn = None try: - conn = connect_postgres(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() cur.execute(f'SELECT fulltext_url FROM "{table_name}" WHERE fulltext_url IS NOT NULL') rows = cur.fetchall() @@ -427,14 +417,14 @@ def list_fulltext_urls(self, db_conn_str: str, table_name: str = "citations") -> except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return [r[0] for r in rows if r and r[0]] finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -466,14 +456,14 @@ def attach_fulltext( md5 = hashlib.md5(file_bytes).hexdigest() if file_bytes is not None else "" # update both columns in one statement - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() cur.execute(f'UPDATE "{table_name}" SET "fulltext_url" = %s WHERE id = %s', (azure_path, int(citation_id))) rows = cur.rowcount conn.commit() cur.close() - conn.close() + postgres_server.close() return rows # ----------------------- @@ -484,10 +474,9 @@ def get_column_value(self, db_conn_str: str, citation_id: int, column: str, tabl Return the value stored in `column` for the citation row (or None). """ table_name = _validate_ident(table_name, kind="table_name") - psycopg2 = self._ensure_psycopg2() conn = None try: - conn = connect_postgres(db_conn_str) + conn = postgres_server.conn try: cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) except Exception: @@ -500,7 +489,7 @@ def get_column_value(self, db_conn_str: str, citation_id: int, column: str, tabl except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return None @@ -514,14 +503,14 @@ def get_column_value(self, db_conn_str: str, citation_id: int, column: str, tabl except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return val finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -541,7 +530,7 @@ def drop_table(self, db_conn_str: str, table_name: str, cascade: bool = True) -> table_name = _validate_ident(table_name, kind="table_name") conn = None try: - conn = connect_postgres(admin_dsn) + conn = postgres_server.conn conn.autocommit = True cur = conn.cursor() cas = " CASCADE" if cascade else "" @@ -553,7 +542,7 @@ def drop_table(self, db_conn_str: str, table_name: str, cascade: bool = True) -> finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -570,11 +559,9 @@ def create_table_and_insert_sync( is per-upload (e.g. sr___citations) inside the shared DB. """ table_name = _validate_ident(table_name, kind="table_name") - - psycopg2 = self._ensure_psycopg2() conn = None try: - conn = connect_postgres(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() # Create table @@ -629,14 +616,14 @@ def _row_has_data(row: dict) -> bool: except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return inserted finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass diff --git a/backend/api/services/postgres_auth.py b/backend/api/services/postgres_auth.py index 568bf20f..9334c520 100644 --- a/backend/api/services/postgres_auth.py +++ b/backend/api/services/postgres_auth.py @@ -6,127 +6,100 @@ """ from typing import Optional + +import psycopg2 +from ..core.config import settings import logging import datetime -from functools import lru_cache +from azure.identity import DefaultAzureCredential logger = logging.getLogger(__name__) -# Azure PostgreSQL OAuth scope -POSTGRES_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" -_current_pgsql_token = None -_pgsql_token_expiration = None - -def _ensure_psycopg2(): - """Ensure psycopg2 is available.""" - try: - import psycopg2 - import psycopg2.extras # noqa: F401 - return psycopg2 - except ImportError: - raise RuntimeError("psycopg2 is not installed on the server environment") - -def get_postgres_token() -> str: - """ - Get an access token for Azure Database for PostgreSQL using DefaultAzureCredential. - - Returns: - Access token string to use as password for PostgreSQL connection. - """ - from azure.identity import DefaultAzureCredential - global _current_pgsql_token, _pgsql_token_expiration - - credential = DefaultAzureCredential() - current_epoch_time = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) - if not _current_pgsql_token or current_epoch_time >= _pgsql_token_expiration: - print("GETTING FRESH PGSQL TOKEN") - token = credential.get_token(POSTGRES_SCOPE) - _current_pgsql_token = token.token - _pgsql_token_expiration = token.expires_on - 60 # Refresh 1 minute before expiry - return _current_pgsql_token - -def connect_with_entra_id( - host: str, - database: str, - user: str, - port: int = 5432, - sslmode: str = "require", -): - """ - Connect to Azure Database for PostgreSQL using Entra ID authentication. - - Args: - host: PostgreSQL server hostname (e.g., myserver.postgres.database.azure.com) - database: Database name - user: Entra ID user (e.g., user@tenant.onmicrosoft.com or managed identity client ID) - port: PostgreSQL port (default 5432) - sslmode: SSL mode (default "require") - - Returns: - psycopg2 connection object - """ - psycopg2 = _ensure_psycopg2() - - # Get access token from Entra ID - access_token = get_postgres_token() - - conn = psycopg2.connect( - host=host, - database=database, - user=user, - password=access_token, - port=port, - sslmode=sslmode, - ) - - return conn - - -def connect_postgres(db_conn_str: Optional[str] = None): - """ - Connect to PostgreSQL using Entra ID authentication (preferred) or connection string (fallback). - - If POSTGRES_HOST, POSTGRES_DATABASE, and POSTGRES_USER are configured, uses Entra ID authentication. - Otherwise, falls back to the provided connection string (for local development). - - Args: - db_conn_str: Optional connection string (used as fallback for local dev) - - Returns: - psycopg2 connection object - """ - from ..core.config import settings - - # Prefer Entra ID authentication if configured - if settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER: - logger.debug("Connecting to PostgreSQL using Entra ID authentication") - return connect_with_entra_id( - host=settings.POSTGRES_HOST, - database=settings.POSTGRES_DATABASE, - user=settings.POSTGRES_USER, - port=settings.POSTGRES_PORT, - sslmode=settings.POSTGRES_SSL_MODE, + +class PostgresServer: + """Manages a persistent PostgreSQL connection with automatic Azure token refresh.""" + + _AZURE_POSTGRES_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" + _TOKEN_REFRESH_BUFFER_SECONDS = 60 + + def __init__(self): + self._verify_config() + self._credential = DefaultAzureCredential() if settings.AZURE_DB else None + self._token: Optional[str] = None + self._token_expiration: int = 0 + self._conn = None + + @property + def conn(self): + """Return an open connection, reconnecting only when necessary.""" + if self._conn is None or self._conn.closed: + print("local database") + self._conn = self._connect() + elif settings.AZURE_DB and self._is_token_expired(): + logger.info("Azure token expired — reconnecting to PostgreSQL") + print("cloud database") + self.close() + self._conn = self._connect() + print(self._conn) + return self._conn + + def close(self): + """Safely close the current connection (idempotent).""" + if self._conn and not self._conn.closed: + try: + self._conn.close() + except Exception: + logger.warning("Failed to close PostgreSQL connection", exc_info=True) + self._conn = None + + @staticmethod + def _verify_config(): + """Validate that all required PostgreSQL settings are present.""" + required = [settings.POSTGRES_HOST, settings.POSTGRES_DATABASE, settings.POSTGRES_USER] + if not all(required): + raise RuntimeError("POSTGRES_HOST, POSTGRES_DATABASE, and POSTGRES_USER are required") + if not settings.AZURE_DB and not settings.POSTGRES_PASSWORD: + raise RuntimeError("POSTGRES_PASSWORD is required when AZURE_DB is False") + + def _is_token_expired(self) -> bool: + """Check whether the cached Azure token needs refreshing.""" + now = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) + return not self._token or now >= self._token_expiration + + def _refresh_azure_token(self) -> str: + """Return a valid Azure token, fetching a new one only if expired.""" + if self._is_token_expired(): + logger.info("Fetching fresh Azure PostgreSQL token") + token = self._credential.get_token(self._AZURE_POSTGRES_SCOPE) + self._token = token.token + self._token_expiration = token.expires_on - self._TOKEN_REFRESH_BUFFER_SECONDS + return self._token + + def _build_connect_kwargs(self) -> dict: + """Assemble psycopg2.connect() keyword arguments from settings.""" + kwargs = { + "host": settings.POSTGRES_HOST, + "database": settings.POSTGRES_DATABASE, + "user": settings.POSTGRES_USER, + "port": settings.POSTGRES_PORT, + } + if settings.POSTGRES_SSL_MODE: + kwargs["sslmode"] = settings.POSTGRES_SSL_MODE + if settings.AZURE_DB: + kwargs["password"] = self._refresh_azure_token() + elif settings.POSTGRES_PASSWORD: + kwargs["password"] = settings.POSTGRES_PASSWORD + return kwargs + + def _connect(self): + """Create a new psycopg2 connection.""" + return psycopg2.connect(**self._build_connect_kwargs()) + + def __repr__(self) -> str: + status = "open" if self._conn and not self._conn.closed else "closed" + return ( + f"" ) - - # Fallback to connection string (local development) - if db_conn_str: - logger.debug("Connecting to PostgreSQL using connection string") - psycopg2 = _ensure_psycopg2() - return psycopg2.connect(db_conn_str) - - # Check for POSTGRES_URI as final fallback - if settings.POSTGRES_URI: - logger.debug("Connecting to PostgreSQL using POSTGRES_URI") - psycopg2 = _ensure_psycopg2() - return psycopg2.connect(settings.POSTGRES_URI) - - raise RuntimeError( - "PostgreSQL not configured. Set POSTGRES_HOST, POSTGRES_DATABASE, and POSTGRES_USER " - "for Entra ID auth, or POSTGRES_URI for local development." - ) - - -def pgsql_entra_auth_configured(): - from ..core.config import settings - has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER - return has_entra_config \ No newline at end of file + +postgres_server = PostgresServer() \ No newline at end of file diff --git a/backend/api/services/sr_db_service.py b/backend/api/services/sr_db_service.py index 6ab70e4d..8a0bd43b 100644 --- a/backend/api/services/sr_db_service.py +++ b/backend/api/services/sr_db_service.py @@ -16,7 +16,7 @@ from fastapi import HTTPException, status -from .postgres_auth import _ensure_psycopg2, connect_postgres +from .postgres_auth import postgres_server from ..core.config import settings logger = logging.getLogger(__name__) @@ -27,60 +27,6 @@ def __init__(self): # Service is stateless; connection strings passed per-call pass - def _ensure_psycopg2(self): - return _ensure_psycopg2() - - def _connect(self, db_conn_str: Optional[str] = None): - """ - Connect and return a psycopg2 connection using Entra ID auth (preferred) or connection string. - Raises RuntimeError if psycopg2 missing. - Caller is responsible for closing the connection. - """ - return connect_postgres(db_conn_str) - - def _is_postgres_configured(self, db_conn_str: Optional[str] = None) -> bool: - """ - Check if PostgreSQL is configured via Entra ID env vars or connection string. - """ - has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER - has_uri_config = db_conn_str or settings.POSTGRES_URI - return bool(has_entra_config or has_uri_config) - - def _ensure_postgres_configured(self, db_conn_str: Optional[str] = None) -> None: - """ - Raise HTTPException if PostgreSQL is not configured. - """ - if not self._is_postgres_configured(db_conn_str): - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Systematic review DB not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev." - ) - - def ensure_db_available(self, db_conn_str: Optional[str] = None) -> None: - """ - Raise an HTTPException (503) if the PostgreSQL connection is not available. - Routers call this to provide consistent error messages when Postgres is not configured. - """ - # Check if any Postgres config is available - has_entra_config = settings.POSTGRES_HOST and settings.POSTGRES_DATABASE and settings.POSTGRES_USER - has_uri_config = db_conn_str or settings.POSTGRES_URI - - if not has_entra_config and not has_uri_config: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="PostgreSQL connection not configured. Set POSTGRES_HOST/DATABASE/USER for Entra ID auth, or POSTGRES_URI for local dev.", - ) - # Try to connect to verify availability - try: - conn = self._connect(db_conn_str) - conn.close() - except Exception as e: - logger.warning(f"PostgreSQL connection failed: {e}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=f"PostgreSQL connection failed: {e}", - ) - def ensure_table_exists(self, db_conn_str: str) -> None: """ Ensure the systematic_reviews table exists in PostgreSQL. @@ -89,7 +35,7 @@ def ensure_table_exists(self, db_conn_str: str) -> None: """ conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() create_table_sql = """ @@ -117,7 +63,7 @@ def ensure_table_exists(self, db_conn_str: str) -> None: except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -128,7 +74,7 @@ def ensure_table_exists(self, db_conn_str: str) -> None: finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -232,7 +178,6 @@ def create_systematic_review( """ Create a new SR document and insert into the table. Returns the created document. """ - self._ensure_postgres_configured(db_conn_str) sr_id = str(uuid.uuid4()) now = datetime.utcnow().isoformat() @@ -243,7 +188,7 @@ def create_systematic_review( conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() insert_sql = """ @@ -295,7 +240,7 @@ def create_systematic_review( except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -307,7 +252,7 @@ def create_systematic_review( finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -317,7 +262,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ requester must be a member or owner. Returns a dict with update result metadata. """ - self._ensure_postgres_configured(db_conn_str) + sr = self.get_systematic_review(db_conn_str, sr_id) if not sr or not sr.get("visible", True): @@ -330,7 +275,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() # Get current users array @@ -361,7 +306,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -375,7 +320,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -384,7 +329,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request Remove a user id from the SR's users list. Owner cannot be removed. Enforces requester permissions (must be a member or owner). """ - self._ensure_postgres_configured(db_conn_str) + sr = self.get_systematic_review(db_conn_str, sr_id) if not sr or not sr.get("visible", True): @@ -400,7 +345,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() # Get current users array @@ -431,7 +376,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -445,7 +390,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -456,7 +401,7 @@ def user_has_sr_permission(self, db_conn_str: str, sr_id: str, user_id: str) -> Note: this check deliberately ignores the SR's 'visible' flag so membership checks work regardless of whether the SR is hidden/soft-deleted. """ - self._ensure_postgres_configured(db_conn_str) + doc = self.get_systematic_review(db_conn_str, sr_id, ignore_visibility=True) if not doc: @@ -473,7 +418,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, The requester must be a member or owner. Returns the updated SR document. """ - self._ensure_postgres_configured(db_conn_str) + sr = self.get_systematic_review(db_conn_str, sr_id) if not sr or not sr.get("visible", True): @@ -486,7 +431,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() updated_at = datetime.utcnow().isoformat() @@ -516,7 +461,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -532,7 +477,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -540,11 +485,11 @@ def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> """ Return all SR documents where the user is a member (regardless of visible flag). """ - self._ensure_postgres_configured(db_conn_str) + conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() # Query using jsonb operator to check if user_email is in users array @@ -583,7 +528,7 @@ def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -595,7 +540,7 @@ def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -604,11 +549,11 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: Return SR document by id. Returns None if not found. If ignore_visibility is False, only returns visible SRs. """ - self._ensure_postgres_configured(db_conn_str) + conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() if ignore_visibility: @@ -625,7 +570,7 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass return None @@ -652,7 +597,7 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -664,7 +609,7 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -673,7 +618,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ Set the visible flag on the SR. Only owner is allowed to change visibility. Returns update metadata. """ - self._ensure_postgres_configured(db_conn_str) + sr = self.get_systematic_review(db_conn_str, sr_id, ignore_visibility=True) if not sr: @@ -684,7 +629,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() updated_at = datetime.utcnow().isoformat() @@ -700,7 +645,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -712,7 +657,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -733,7 +678,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ Permanently remove the SR document. Only owner may hard delete. Returns deletion metadata (deleted_count). """ - self._ensure_postgres_configured(db_conn_str) + sr = self.get_systematic_review(db_conn_str, sr_id, ignore_visibility=True) if not sr: @@ -744,7 +689,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() cur.execute("DELETE FROM systematic_reviews WHERE id = %s", (sr_id,)) @@ -756,7 +701,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -768,7 +713,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -777,11 +722,11 @@ def update_screening_db_info(self, db_conn_str: str, sr_id: str, screening_db: D """ Update the screening_db field in the SR document with screening database metadata. """ - self._ensure_postgres_configured(db_conn_str) + conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() updated_at = datetime.utcnow().isoformat() @@ -796,7 +741,7 @@ def update_screening_db_info(self, db_conn_str: str, sr_id: str, screening_db: D except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -806,7 +751,7 @@ def update_screening_db_info(self, db_conn_str: str, sr_id: str, screening_db: D finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass @@ -814,11 +759,11 @@ def clear_screening_db_info(self, db_conn_str: str, sr_id: str) -> None: """ Remove the screening_db field from the SR document. """ - self._ensure_postgres_configured(db_conn_str) + conn = None try: - conn = self._connect(db_conn_str) + conn = postgres_server.conn cur = conn.cursor() updated_at = datetime.utcnow().isoformat() @@ -833,7 +778,7 @@ def clear_screening_db_info(self, db_conn_str: str, sr_id: str) -> None: except Exception: pass try: - conn.close() + postgres_server.close() except Exception: pass @@ -843,7 +788,7 @@ def clear_screening_db_info(self, db_conn_str: str, sr_id: str) -> None: finally: if conn: try: - conn.close() + postgres_server.close() except Exception: pass diff --git a/backend/api/sr/router.py b/backend/api/sr/router.py index 20199992..1db2b27e 100644 --- a/backend/api/sr/router.py +++ b/backend/api/sr/router.py @@ -99,7 +99,6 @@ async def create_systematic_review( The created SR is stored in PostgreSQL and the creating user is added as the first member. """ db_conn_str = _get_db_conn_str() - await run_in_threadpool(srdb_service.ensure_db_available, db_conn_str) if not name or not name.strip(): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="name is required") @@ -265,7 +264,6 @@ async def list_systematic_reviews_for_user( Hidden/deleted SRs (visible == False) are excluded. """ db_conn_str = _get_db_conn_str() - await run_in_threadpool(srdb_service.ensure_db_available, db_conn_str) user_id = current_user.get("email") results = [] From 23e8234f4cc16173cb2411ce1b33b9039d7d336f Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 6 Feb 2026 13:39:53 -0500 Subject: [PATCH 16/17] updates --- backend/api/services/cit_db_service.py | 155 ++++------------------- backend/api/services/sr_db_service.py | 164 ++++--------------------- 2 files changed, 43 insertions(+), 276 deletions(-) diff --git a/backend/api/services/cit_db_service.py b/backend/api/services/cit_db_service.py index 6005fef0..b130b688 100644 --- a/backend/api/services/cit_db_service.py +++ b/backend/api/services/cit_db_service.py @@ -158,20 +158,10 @@ def create_column(self, db_conn_str: str, col: str, col_type: str, table_name: s except Exception: pass conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def update_jsonb_column( self, @@ -199,21 +189,11 @@ def update_jsonb_column( cur.execute(f'UPDATE "{table_name}" SET "{col}" = %s WHERE id = %s', (json.dumps(data), int(citation_id))) rows = cur.rowcount conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return rows or 0 finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def update_text_column( self, @@ -241,21 +221,11 @@ def update_text_column( cur.execute(f'UPDATE "{table_name}" SET "{col}" = %s WHERE id = %s', (text_value, int(citation_id))) rows = cur.rowcount conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return rows or 0 finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass # ----------------------- # Citation row helpers @@ -282,22 +252,12 @@ def dump_citations_csv(self, db_conn_str: str, table_name: str = "citations") -> ) csv_text = buf.getvalue() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return csv_text.encode("utf-8") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def get_citation_by_id(self, db_conn_str: str, citation_id: int, table_name: str = "citations") -> Optional[Dict[str, Any]]: """ @@ -314,35 +274,17 @@ def get_citation_by_id(self, db_conn_str: str, citation_id: int, table_name: str cur.execute(f'SELECT * FROM "{table_name}" WHERE id = %s', (citation_id,)) row = cur.fetchone() if row is None: - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass return None if isinstance(row, dict): result = row else: cols = [desc[0] for desc in cur.description] result = {cols[i]: row[i] for i in range(len(cols))} - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return result finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def list_citation_ids(self, db_conn_str: str, filter_step=None, table_name: str = "citations") -> List[int]: """ @@ -385,21 +327,11 @@ def list_citation_ids(self, db_conn_str: str, filter_step=None, table_name: str cur.execute(query) rows = cur.fetchall() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return [int(r[0]) for r in rows] finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def list_fulltext_urls(self, db_conn_str: str, table_name: str = "citations") -> List[str]: """ @@ -412,21 +344,11 @@ def list_fulltext_urls(self, db_conn_str: str, table_name: str = "citations") -> cur = conn.cursor() cur.execute(f'SELECT fulltext_url FROM "{table_name}" WHERE fulltext_url IS NOT NULL') rows = cur.fetchall() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return [r[0] for r in rows if r and r[0]] finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def update_citation_fulltext(self, db_conn_str: str, citation_id: int, fulltext_path: str) -> int: """ @@ -462,8 +384,8 @@ def attach_fulltext( rows = cur.rowcount conn.commit() - cur.close() - postgres_server.close() + + return rows # ----------------------- @@ -484,35 +406,17 @@ def get_column_value(self, db_conn_str: str, citation_id: int, column: str, tabl cur.execute(f'SELECT "{column}" FROM "{table_name}" WHERE id = %s', (citation_id,)) row = cur.fetchone() if not row: - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass return None # row may be dict or tuple if isinstance(row, dict): val = list(row.values())[0] if row else None else: val = row[0] if row and len(row) > 0 else None - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return val finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def set_column_value(self, db_conn_str: str, citation_id: int, column: str, value: Any, table_name: str = "citations") -> int: """ @@ -535,16 +439,9 @@ def drop_table(self, db_conn_str: str, table_name: str, cascade: bool = True) -> cur = conn.cursor() cas = " CASCADE" if cascade else "" cur.execute(f'DROP TABLE IF EXISTS "{table_name}"{cas}') - try: - cur.close() - except Exception: - pass finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def create_table_and_insert_sync( self, @@ -611,21 +508,11 @@ def _row_has_data(row: dict) -> bool: inserted = len(values) conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return inserted finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass # NOTE: legacy per-database helpers (drop_database, create_db_and_table_sync) were # intentionally removed in favor of per-upload tables in a shared database. diff --git a/backend/api/services/sr_db_service.py b/backend/api/services/sr_db_service.py index 8a0bd43b..1dfaedce 100644 --- a/backend/api/services/sr_db_service.py +++ b/backend/api/services/sr_db_service.py @@ -58,14 +58,7 @@ def ensure_table_exists(self, db_conn_str: str) -> None: cur.execute(create_table_sql) conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + logger.info("Ensured systematic_reviews table exists") except Exception as e: @@ -73,10 +66,7 @@ def ensure_table_exists(self, db_conn_str: str) -> None: raise finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def build_criteria_parsed(self, criteria_obj: Optional[Dict[str, Any]]) -> Dict[str, Any]: """ @@ -235,14 +225,7 @@ def create_systematic_review( if sr_doc.get('updated_at') and isinstance(sr_doc['updated_at'], dt): sr_doc['updated_at'] = sr_doc['updated_at'].isoformat() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return sr_doc @@ -251,10 +234,7 @@ def create_systematic_review( raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to create systematic review: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_id: str) -> Dict[str, Any]: """ @@ -301,14 +281,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ modified_count = cur.rowcount conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return {"matched_count": 1, "modified_count": modified_count, "added_user_id": target_user_id} @@ -319,10 +292,7 @@ def add_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to add user: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, requester_id: str) -> Dict[str, Any]: """ @@ -371,14 +341,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request modified_count = cur.rowcount conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return {"matched_count": 1, "modified_count": modified_count, "removed_user_id": target_user_id} @@ -389,10 +352,7 @@ def remove_user(self, db_conn_str: str, sr_id: str, target_user_id: str, request raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to remove user: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def user_has_sr_permission(self, db_conn_str: str, sr_id: str, user_id: str) -> bool: """ @@ -456,14 +416,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + # Return fresh doc doc = self.get_systematic_review(db_conn_str, sr_id) @@ -476,10 +429,7 @@ def update_criteria(self, db_conn_str: str, sr_id: str, criteria_obj: Dict[str, raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to update criteria: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> List[Dict[str, Any]]: """ @@ -523,14 +473,7 @@ def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> results.append(doc) - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return results @@ -539,10 +482,7 @@ def list_systematic_reviews_for_user(self, db_conn_str: str, user_email: str) -> raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to list systematic reviews: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: bool = False) -> Optional[Dict[str, Any]]: """ @@ -565,14 +505,6 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: row = cur.fetchone() if not row: - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass return None cols = [desc[0] for desc in cur.description] @@ -592,14 +524,7 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: if doc.get('updated_at') and isinstance(doc['updated_at'], dt): doc['updated_at'] = doc['updated_at'].isoformat() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return doc @@ -608,10 +533,7 @@ def get_systematic_review(self, db_conn_str: str, sr_id: str, ignore_visibility: return None finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_id: str) -> Dict[str, Any]: """ @@ -640,14 +562,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ modified_count = cur.rowcount conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + return {"matched_count": 1, "modified_count": modified_count, "visible": visible} @@ -656,10 +571,7 @@ def set_visibility(self, db_conn_str: str, sr_id: str, visible: bool, requester_ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to set visibility: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def soft_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_id: str) -> Dict[str, Any]: """ @@ -695,16 +607,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ cur.execute("DELETE FROM systematic_reviews WHERE id = %s", (sr_id,)) deleted_count = cur.rowcount conn.commit() - - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass - + return {"deleted_count": deleted_count} except Exception as e: @@ -712,10 +615,7 @@ def hard_delete_systematic_review(self, db_conn_str: str, sr_id: str, requester_ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to hard-delete systematic review: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def update_screening_db_info(self, db_conn_str: str, sr_id: str, screening_db: Dict[str, Any]) -> None: @@ -736,24 +636,14 @@ def update_screening_db_info(self, db_conn_str: str, sr_id: str, screening_db: D ) conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + except Exception as e: logger.exception(f"Failed to update screening DB info: {e}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to update screening DB info: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass def clear_screening_db_info(self, db_conn_str: str, sr_id: str) -> None: """ @@ -773,24 +663,14 @@ def clear_screening_db_info(self, db_conn_str: str, sr_id: str) -> None: ) conn.commit() - try: - cur.close() - except Exception: - pass - try: - postgres_server.close() - except Exception: - pass + except Exception as e: logger.exception(f"Failed to clear screening DB info: {e}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to clear screening DB info: {e}") finally: if conn: - try: - postgres_server.close() - except Exception: - pass + pass # module-level instance From 04c940c39d56202e90547131cc63957b353e2895 Mon Sep 17 00:00:00 2001 From: Keenan Date: Mon, 9 Feb 2026 09:27:42 -0500 Subject: [PATCH 17/17] support entra and non entra --- backend/api/core/config.py | 3 +++ backend/api/services/azure_openai_client.py | 30 ++++++++++++++------- backend/api/services/storage.py | 21 +++++++++------ backend/api/services/user_db.py | 21 +++++++++------ frontend/tsconfig.json | 24 +++++++++++++---- 5 files changed, 68 insertions(+), 31 deletions(-) diff --git a/backend/api/core/config.py b/backend/api/core/config.py index 420727d1..3fb7d32a 100644 --- a/backend/api/core/config.py +++ b/backend/api/core/config.py @@ -135,6 +135,9 @@ def convert_max_file_size(cls, v): REDIRECT_URI: str = os.getenv("REDIRECT_URI") SSO_LOGIN_URL: str = os.getenv("SSO_LOGIN_URL") + # Entra + USE_ENTRA_AUTH: bool = os.getenv("USE_ENTRA_AUTH", "false").lower() == "true" + class Config: case_sensitive = True env_file = ".env" diff --git a/backend/api/services/azure_openai_client.py b/backend/api/services/azure_openai_client.py index 99e05ec5..dd89e685 100644 --- a/backend/api/services/azure_openai_client.py +++ b/backend/api/services/azure_openai_client.py @@ -36,12 +36,16 @@ def __init__(self): # Create token provider for Azure OpenAI using DefaultAzureCredential # Wrapped with caching to avoid fetching a new token on every request - self._credential = DefaultAzureCredential() - self._token_provider = CachedTokenProvider( - get_bearer_token_provider( - self._credential, "https://cognitiveservices.azure.com/.default" + if not settings.AZURE_OPENAI_API_KEY and not settings.USE_ENTRA_AUTH: + raise ValueError("Azure OpenAI API key or Entra auth must be configured") + + if settings.USE_ENTRA_AUTH: + self._credential = DefaultAzureCredential() + self._token_provider = CachedTokenProvider( + get_bearer_token_provider( + self._credential, "https://cognitiveservices.azure.com/.default" + ) ) - ) self.model_configs = { "gpt-4.1-mini": { @@ -75,11 +79,17 @@ def _get_official_client(self, model: str) -> AzureOpenAI: f"Azure OpenAI endpoint not configured for model {model}" ) - self._official_clients[model] = AzureOpenAI( - azure_ad_token_provider=self._token_provider, - azure_endpoint=config["endpoint"], - api_version=config["api_version"], - ) + azure_openai_kwargs = { + "azure_endpoint": config["endpoint"], + "api_version": config["api_version"], + } + if settings.USE_ENTRA_AUTH: + azure_openai_kwargs["azure_ad_token_provider"] = self._token_provider + + if settings.AZURE_OPENAI_API_KEY: + azure_openai_kwargs["api_key"] = settings.AZURE_OPENAI_API_KEY + + self._official_clients[model] = AzureOpenAI(**azure_openai_kwargs) return self._official_clients[model] diff --git a/backend/api/services/storage.py b/backend/api/services/storage.py index e1b36467..b7595416 100644 --- a/backend/api/services/storage.py +++ b/backend/api/services/storage.py @@ -20,14 +20,19 @@ class AzureStorageService: """Service for managing user data in Azure Blob Storage""" def __init__(self): - if not settings.AZURE_STORAGE_ACCOUNT_NAME: - raise ValueError("AZURE_STORAGE_ACCOUNT_NAME is not configured") - - account_url = f"https://{settings.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net" - credential = DefaultAzureCredential() - self.blob_service_client = BlobServiceClient( - account_url=account_url, credential=credential - ) + if not settings.AZURE_STORAGE_ACCOUNT_NAME and not settings.AZURE_STORAGE_CONNECTION_STRING: + raise ValueError("AZURE_STORAGE_ACCOUNT_NAME or AZURE_STORAGE_CONNECTION_STRING must be configured") + + if settings.AZURE_STORAGE_ACCOUNT_NAME: + account_url = f"https://{settings.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net" + credential = DefaultAzureCredential() + self.blob_service_client = BlobServiceClient( + account_url=account_url, credential=credential + ) + elif settings.AZURE_STORAGE_CONNECTION_STRING: + self.blob_service_client = BlobServiceClient.from_connection_string( + settings.AZURE_STORAGE_CONNECTION_STRING + ) self.container_name = settings.AZURE_STORAGE_CONTAINER_NAME self._ensure_container_exists() diff --git a/backend/api/services/user_db.py b/backend/api/services/user_db.py index 2a9fb41f..00300523 100644 --- a/backend/api/services/user_db.py +++ b/backend/api/services/user_db.py @@ -18,14 +18,19 @@ class UserDatabaseService: """Service for managing user data in Azure Blob Storage""" def __init__(self): - if not settings.AZURE_STORAGE_ACCOUNT_NAME: - raise ValueError("AZURE_STORAGE_ACCOUNT_NAME is not configured") - - account_url = f"https://{settings.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net" - credential = DefaultAzureCredential() - self.blob_service_client = BlobServiceClient( - account_url=account_url, credential=credential - ) + if not settings.AZURE_STORAGE_ACCOUNT_NAME and not settings.AZURE_STORAGE_CONNECTION_STRING: + raise ValueError("AZURE_STORAGE_ACCOUNT_NAME or AZURE_STORAGE_CONNECTION_STRING must be configured") + + if settings.AZURE_STORAGE_ACCOUNT_NAME: + account_url = f"https://{settings.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net" + credential = DefaultAzureCredential() + self.blob_service_client = BlobServiceClient( + account_url=account_url, credential=credential + ) + elif settings.AZURE_STORAGE_CONNECTION_STRING: + self.blob_service_client = BlobServiceClient.from_connection_string( + settings.AZURE_STORAGE_CONNECTION_STRING + ) self.container_name = settings.AZURE_STORAGE_CONTAINER_NAME self.pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json index d8b93235..e7ff3a26 100644 --- a/frontend/tsconfig.json +++ b/frontend/tsconfig.json @@ -1,7 +1,11 @@ { "compilerOptions": { "target": "ES2017", - "lib": ["dom", "dom.iterable", "esnext"], + "lib": [ + "dom", + "dom.iterable", + "esnext" + ], "allowJs": true, "skipLibCheck": true, "strict": true, @@ -11,7 +15,7 @@ "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, - "jsx": "preserve", + "jsx": "react-jsx", "incremental": true, "plugins": [ { @@ -19,9 +23,19 @@ } ], "paths": { - "@/*": ["./*"] + "@/*": [ + "./*" + ] } }, - "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], - "exclude": ["node_modules"] + "include": [ + "next-env.d.ts", + "**/*.ts", + "**/*.tsx", + ".next/types/**/*.ts", + ".next/dev/types/**/*.ts" + ], + "exclude": [ + "node_modules" + ] }