Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ MONGODB_URI=mongodb://localhost:27017/licenguard
MONGODB_DB=licenguard
MCP_HTTP_URL=http://127.0.0.1:3333/mcp


LOG_LEVEL=INFO

# Git Providers Configuration
GITHUB_API_URL=https://api.github.com
GITHUB_TOKEN=github_pat_xxxxxx

BITBUCKET_API_URL=https://api.bitbucket.org/2.0
BITBUCKET_USER=your_bitbucket_username
BITBUCKET_BASIC_TOKEN=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1 change: 1 addition & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ RUN apt-get update \
build-essential \
curl \
git \
openssh-client \
&& rm -rf /var/lib/apt/lists/*

COPY requirements.txt ./
Expand Down
8 changes: 8 additions & 0 deletions backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ def create_app() -> FastAPI:
log_fmt = '%(asctime)s %(levelname)s %(message)s'
log_config['formatters']['default']['fmt'] = log_fmt
log_config['formatters']['access']['fmt'] = '%(asctime)s %(levelname)s %(client_addr)s - "%(request_line)s" %(status_code)s'

# Ensure 'app' logger captures INFO logs
log_config['loggers']['app'] = {
'handlers': ['default'],
'level': 'INFO',
'propagate': False
}

logging.config.dictConfig(log_config)

app = FastAPI(title='LicenGuard API', version='0.1.0')
Expand Down
77 changes: 72 additions & 5 deletions backend/app/services/repo_scanner.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
import os
import shutil
import subprocess
import tempfile
from typing import List, Dict, Any
from urllib.parse import urlparse, urlunparse

logger = logging.getLogger(__name__)


DEP_FILES = {
# JavaScript / Node
Expand Down Expand Up @@ -42,6 +45,7 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]:
Insert provider-specific auth into the clone URL if available and applicable.
Returns (clone_url, secret_used) so we can redact the secret from errors.
"""
logger.info(f"Preparing auth for repo URL: {repo_url}")
parsed = urlparse(repo_url)
if parsed.scheme not in ("http", "https") or parsed.username or parsed.password:
return repo_url, None
Expand All @@ -55,6 +59,7 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]:
for key in ("REPO_SCAN_GITHUB_TOKEN", "GITHUB_TOKEN", "GH_TOKEN"):
token = os.getenv(key)
if token:
logger.info(f"Found GitHub token in env var {key}")
break
if not token:
return repo_url, None
Expand All @@ -68,11 +73,14 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]:
for key in ("REPO_SCAN_BITBUCKET_USER", "BITBUCKET_USER", "BITBUCKET_USERNAME"):
username = os.getenv(key)
if username:
logger.info(f"Found Bitbucket username in env var {key}")
break

app_pw = None
for key in ("REPO_SCAN_BITBUCKET_APP_PASSWORD", "BITBUCKET_APP_PASSWORD", "BITBUCKET_TOKEN"):
for key in ("REPO_SCAN_BITBUCKET_APP_PASSWORD", "BITBUCKET_APP_PASSWORD", "BITBUCKET_TOKEN" ,"BITBUCKET_BASIC_TOKEN"):
app_pw = os.getenv(key)
if app_pw:
logger.info(f"Found Bitbucket app password in env var {key}")
break
if not username or not app_pw:
return repo_url, None
Expand Down Expand Up @@ -105,12 +113,71 @@ def clone_and_scan(repo_url: str) -> Dict[str, Any]:
stderr = (result.stderr or b"").decode(errors="ignore").strip()
if secret_used:
stderr = stderr.replace(secret_used, "***redacted***")

logger.warning(f"HTTPS clone failed for {repo_url}: {stderr}")

hint = "Check that the repository URL is correct and reachable."
lower_url = repo_url.lower()
if not secret_used and "github.com" in lower_url:
hint += " If it is private, set GITHUB_TOKEN (or GH_TOKEN/REPO_SCAN_GITHUB_TOKEN)."
if not secret_used and "bitbucket.org" in lower_url:
hint += " If it is private, set BITBUCKET_USER (or REPO_SCAN_BITBUCKET_USER/BITBUCKET_USERNAME) and BITBUCKET_APP_PASSWORD (or REPO_SCAN_BITBUCKET_APP_PASSWORD/BITBUCKET_TOKEN)."
is_github = "github.com" in lower_url
is_bitbucket = "bitbucket.org" in lower_url

if not secret_used:
if is_github:
hint += " For private repos, set GITHUB_TOKEN."
elif is_bitbucket:
hint += " For private repos, set BITBUCKET_USER and BITBUCKET_APP_PASSWORD."
else:
hint += " For private repos, ensure authentication credentials are provided via environment variables."

if "terminal prompts disabled" in stderr:
hint += " Terminal prompts are disabled. You must provide credentials (env vars) or use SSH with keys."

# If HTTPS clone failed and we did not inject HTTP auth, try SSH fallback
try_ssh_fallback = False
has_ssh = shutil.which("ssh") is not None

if not has_ssh:
hint += " SSH client not found, SSH fallback disabled."

try:
parsed = urlparse(repo_url)
host = (parsed.hostname or "").lower()
if (parsed.scheme in ("http", "https")) and (not secret_used) and host in ("github.com", "www.github.com", "bitbucket.org", "www.bitbucket.org") and has_ssh:
try_ssh_fallback = True
except Exception:
try_ssh_fallback = False

if try_ssh_fallback:
# Construct SSH clone URL: git@host:owner/repo.git
path = parsed.path.lstrip('/')
ssh_url = f"git@{host}:{path}"
logger.info(f"Attempting SSH fallback for {repo_url} -> {ssh_url}")

# Disable strict host key checking for this operation to avoid "Host key verification failed"
# in non-interactive environments (containers).
ssh_env = env.copy()
ssh_env["GIT_SSH_COMMAND"] = "ssh -o StrictHostKeyChecking=no"

try:
ssh_result = subprocess.run(
["git", "clone", "--depth", "1", ssh_url, tmpdir],
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
env=ssh_env,
)
if ssh_result.returncode == 0:
# Success with SSH fallback
return {"files": find_dependency_files(tmpdir), "root": tmpdir}
ssh_stderr = (ssh_result.stderr or b"").decode(errors="ignore").strip()
logger.warning(f"SSH fallback failed: {ssh_stderr}")
except Exception as e:
ssh_stderr = str(e)
logger.error(f"SSH fallback exception: {e}")

# Redact nothing for SSH attempt
full_err = f"HTTPS clone stderr: {stderr or 'unknown'}, SSH clone stderr: {ssh_stderr or 'unknown'}. {hint}"
raise RuntimeError(f"git clone failed: {full_err}")

raise RuntimeError(f"git clone failed: {stderr or 'unknown error'}. {hint}")

files = find_dependency_files(tmpdir)
Expand Down
Loading