diff --git a/backend/.env.example b/backend/.env.example index b278619..3776dc5 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -3,11 +3,12 @@ MONGODB_URI=mongodb://localhost:27017/licenguard MONGODB_DB=licenguard MCP_HTTP_URL=http://127.0.0.1:3333/mcp - +LOG_LEVEL=INFO # Git Providers Configuration GITHUB_API_URL=https://api.github.com GITHUB_TOKEN=github_pat_xxxxxx BITBUCKET_API_URL=https://api.bitbucket.org/2.0 +BITBUCKET_USER=your_bitbucket_username BITBUCKET_BASIC_TOKEN=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index 777e274..eddef9c 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -10,6 +10,7 @@ RUN apt-get update \ build-essential \ curl \ git \ + openssh-client \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt ./ diff --git a/backend/app/main.py b/backend/app/main.py index 66bd295..169e007 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -21,6 +21,14 @@ def create_app() -> FastAPI: log_fmt = '%(asctime)s %(levelname)s %(message)s' log_config['formatters']['default']['fmt'] = log_fmt log_config['formatters']['access']['fmt'] = '%(asctime)s %(levelname)s %(client_addr)s - "%(request_line)s" %(status_code)s' + + # Ensure 'app' logger captures INFO logs + log_config['loggers']['app'] = { + 'handlers': ['default'], + 'level': 'INFO', + 'propagate': False + } + logging.config.dictConfig(log_config) app = FastAPI(title='LicenGuard API', version='0.1.0') diff --git a/backend/app/services/repo_scanner.py b/backend/app/services/repo_scanner.py index 5533bb6..0d447b7 100644 --- a/backend/app/services/repo_scanner.py +++ b/backend/app/services/repo_scanner.py @@ -1,3 +1,4 @@ +import logging import os import shutil import subprocess @@ -5,6 +6,8 @@ from typing import List, Dict, Any from urllib.parse import urlparse, urlunparse +logger = logging.getLogger(__name__) + DEP_FILES = { # JavaScript / Node @@ -42,6 +45,7 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]: Insert provider-specific auth into the clone URL if available and applicable. Returns (clone_url, secret_used) so we can redact the secret from errors. """ + logger.info(f"Preparing auth for repo URL: {repo_url}") parsed = urlparse(repo_url) if parsed.scheme not in ("http", "https") or parsed.username or parsed.password: return repo_url, None @@ -55,6 +59,7 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]: for key in ("REPO_SCAN_GITHUB_TOKEN", "GITHUB_TOKEN", "GH_TOKEN"): token = os.getenv(key) if token: + logger.info(f"Found GitHub token in env var {key}") break if not token: return repo_url, None @@ -68,11 +73,14 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]: for key in ("REPO_SCAN_BITBUCKET_USER", "BITBUCKET_USER", "BITBUCKET_USERNAME"): username = os.getenv(key) if username: + logger.info(f"Found Bitbucket username in env var {key}") break + app_pw = None - for key in ("REPO_SCAN_BITBUCKET_APP_PASSWORD", "BITBUCKET_APP_PASSWORD", "BITBUCKET_TOKEN"): + for key in ("REPO_SCAN_BITBUCKET_APP_PASSWORD", "BITBUCKET_APP_PASSWORD", "BITBUCKET_TOKEN" ,"BITBUCKET_BASIC_TOKEN"): app_pw = os.getenv(key) if app_pw: + logger.info(f"Found Bitbucket app password in env var {key}") break if not username or not app_pw: return repo_url, None @@ -105,12 +113,71 @@ def clone_and_scan(repo_url: str) -> Dict[str, Any]: stderr = (result.stderr or b"").decode(errors="ignore").strip() if secret_used: stderr = stderr.replace(secret_used, "***redacted***") + + logger.warning(f"HTTPS clone failed for {repo_url}: {stderr}") + hint = "Check that the repository URL is correct and reachable." lower_url = repo_url.lower() - if not secret_used and "github.com" in lower_url: - hint += " If it is private, set GITHUB_TOKEN (or GH_TOKEN/REPO_SCAN_GITHUB_TOKEN)." - if not secret_used and "bitbucket.org" in lower_url: - hint += " If it is private, set BITBUCKET_USER (or REPO_SCAN_BITBUCKET_USER/BITBUCKET_USERNAME) and BITBUCKET_APP_PASSWORD (or REPO_SCAN_BITBUCKET_APP_PASSWORD/BITBUCKET_TOKEN)." + is_github = "github.com" in lower_url + is_bitbucket = "bitbucket.org" in lower_url + + if not secret_used: + if is_github: + hint += " For private repos, set GITHUB_TOKEN." + elif is_bitbucket: + hint += " For private repos, set BITBUCKET_USER and BITBUCKET_APP_PASSWORD." + else: + hint += " For private repos, ensure authentication credentials are provided via environment variables." + + if "terminal prompts disabled" in stderr: + hint += " Terminal prompts are disabled. You must provide credentials (env vars) or use SSH with keys." + + # If HTTPS clone failed and we did not inject HTTP auth, try SSH fallback + try_ssh_fallback = False + has_ssh = shutil.which("ssh") is not None + + if not has_ssh: + hint += " SSH client not found, SSH fallback disabled." + + try: + parsed = urlparse(repo_url) + host = (parsed.hostname or "").lower() + if (parsed.scheme in ("http", "https")) and (not secret_used) and host in ("github.com", "www.github.com", "bitbucket.org", "www.bitbucket.org") and has_ssh: + try_ssh_fallback = True + except Exception: + try_ssh_fallback = False + + if try_ssh_fallback: + # Construct SSH clone URL: git@host:owner/repo.git + path = parsed.path.lstrip('/') + ssh_url = f"git@{host}:{path}" + logger.info(f"Attempting SSH fallback for {repo_url} -> {ssh_url}") + + # Disable strict host key checking for this operation to avoid "Host key verification failed" + # in non-interactive environments (containers). + ssh_env = env.copy() + ssh_env["GIT_SSH_COMMAND"] = "ssh -o StrictHostKeyChecking=no" + + try: + ssh_result = subprocess.run( + ["git", "clone", "--depth", "1", ssh_url, tmpdir], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + env=ssh_env, + ) + if ssh_result.returncode == 0: + # Success with SSH fallback + return {"files": find_dependency_files(tmpdir), "root": tmpdir} + ssh_stderr = (ssh_result.stderr or b"").decode(errors="ignore").strip() + logger.warning(f"SSH fallback failed: {ssh_stderr}") + except Exception as e: + ssh_stderr = str(e) + logger.error(f"SSH fallback exception: {e}") + + # Redact nothing for SSH attempt + full_err = f"HTTPS clone stderr: {stderr or 'unknown'}, SSH clone stderr: {ssh_stderr or 'unknown'}. {hint}" + raise RuntimeError(f"git clone failed: {full_err}") + raise RuntimeError(f"git clone failed: {stderr or 'unknown error'}. {hint}") files = find_dependency_files(tmpdir)