diff --git a/apps/web/src/api/client.js b/apps/web/src/api/client.js
index ee15a7f..a586d31 100644
--- a/apps/web/src/api/client.js
+++ b/apps/web/src/api/client.js
@@ -42,8 +42,8 @@ export function addVersion(libraryId, payload) {
});
}
-export async function scanRepository(url) {
- const res = await fetch(`${API_BASE}/libraries/repositories/scan`, {
+export async function cloneRepository(url) {
+ const res = await fetch(`${API_BASE}/libraries/repositories/clone`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url })
@@ -55,6 +55,21 @@ export async function scanRepository(url) {
return res.json();
}
+export async function listRepositoryPackages({ root, url } = {}) {
+ const body = JSON.stringify(root ? { root } : { url });
+ const res = await fetch(`${API_BASE}/libraries/repositories/list-packages`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body,
+ });
+ if (!res.ok) {
+ const message = await res.text();
+ throw new Error(message || `Request failed with status ${res.status}`);
+ }
+ return res.json();
+}
+
+
export async function analyzeFileUpload(file) {
const form = new FormData();
form.append('file', file);
diff --git a/apps/web/src/components/ImportModal.jsx b/apps/web/src/components/ImportModal.jsx
index 1b88fb1..d52acfa 100644
--- a/apps/web/src/components/ImportModal.jsx
+++ b/apps/web/src/components/ImportModal.jsx
@@ -1,6 +1,9 @@
import { useState, useEffect, useRef } from 'react';
import { createLibrary, searchLibraries } from '../api/client.js';
+
+// TODO: RiskScore Gauge component'ı ortak bir yere taşı.
+
const RiskGauge = ({ score, level }) => {
if (score === undefined || score === null || Number.isNaN(score)) return null;
const clamped = Math.min(100, Math.max(0, Number(score)));
diff --git a/apps/web/src/components/RepoModal.jsx b/apps/web/src/components/RepoModal.jsx
index c104b68..c63ac4f 100644
--- a/apps/web/src/components/RepoModal.jsx
+++ b/apps/web/src/components/RepoModal.jsx
@@ -1,5 +1,5 @@
import React, { useCallback, useEffect, useRef, useState } from 'react';
-import { scanRepository, searchLibraries, createLibrary } from '../api/client.js';
+import { cloneRepository, listRepositoryPackages, searchLibraries, createLibrary } from '../api/client.js';
const RiskBar = ({ score, explanation }) => {
if (score === undefined || score === null || Number.isNaN(score)) return null;
@@ -22,6 +22,7 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
const [error, setError] = useState(null);
const [loading, setLoading] = useState(false);
const [files, setFiles] = useState([]);
+ const [statusMessage, setStatusMessage] = useState('');
const [depJobs, setDepJobs] = useState([]);
const [processing, setProcessing] = useState(false);
const inputRef = useRef(null);
@@ -38,6 +39,9 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
return '(N/A)';
};
+ const totalJobs = depJobs.length;
+ const processedJobs = depJobs.filter(j => j.status !== 'pending').length;
+
const resetState = () => {
setRepoUrl('');
setError(null);
@@ -74,6 +78,13 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
return cleaned || null;
}, []);
+ const cancelledRef = React.useRef(false);
+ useEffect(() => {
+ return () => {
+ cancelledRef.current = true;
+ };
+ }, []);
+
const computeRisk = useCallback((match = {}) => {
const summaries = Array.isArray(match.licenseSummary ?? match.license_summary)
? (match.licenseSummary ?? match.license_summary)
@@ -123,9 +134,18 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
setError(null);
setFiles([]);
setDepJobs([]);
- const res = await scanRepository(repoUrl);
- const scannedFiles = res.files ?? [];
+
+ setStatusMessage('Cloning repository...');
+ const cloneRes = await cloneRepository(repoUrl);
+ setStatusMessage('Cloning repository completed.');
+ const root = cloneRes.root;
+
+ setStatusMessage('Listing dependency files...');
+ const listRes = await listRepositoryPackages({ root });
+ const scannedFiles = listRes.files ?? cloneRes.files ?? [];
setFiles(scannedFiles);
+
+ setStatusMessage('Scanning dependencies...');
const jobs = [];
scannedFiles.forEach((file, fIdx) => {
const deps = Array.isArray(file?.report?.dependencies) ? file.report.dependencies : [];
@@ -145,24 +165,154 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
});
});
setDepJobs(jobs);
+ // Process jobs sequentially: check local DB, fallback to MCP, persist if needed
+ setProcessing(true);
+ const updateJob = (id, patch) =>
+ setDepJobs(prev => prev.map(j => (j.id === id ? { ...j, ...patch } : j)));
+
+ for (const next of jobs) {
+ if (cancelledRef.current) break;
+ const q = next.version ? `${next.name} ${next.version}` : next.name;
+ try {
+ updateJob(next.id, { status: 'searching', message: null });
+ const res = await searchLibraries(q);
+ let match = null;
+ let existing = false;
+
+ if (res?.source === 'mongo' && Array.isArray(res?.results) && res.results.length > 0) {
+ existing = true;
+ const lib = res.results[0];
+ const v = lib.versions?.[0];
+ match = {
+ name: lib.name,
+ version: v?.version,
+ ecosystem: lib.ecosystem,
+ description: lib.description,
+ repository: lib.repository_url,
+ license: v?.license_name,
+ license_url: v?.license_url,
+ licenseSummary: v?.license_summary ?? [],
+ evidence: v?.evidence ?? [],
+ confidence: v?.confidence,
+ risk_level: v?.risk_level,
+ risk_score: v?.risk_score,
+ risk_score_explanation: v?.risk_score_explanation
+ };
+ } else if (res?.source === 'mcp' && Array.isArray(res?.results) && res.results.length > 0) {
+ const lib = res.results[0];
+ const v = lib.versions?.[0];
+ match = {
+ name: lib.name,
+ version: v?.version,
+ ecosystem: lib.ecosystem,
+ description: lib.description,
+ repository: lib.repository_url,
+ license: v?.license_name,
+ license_url: v?.license_url,
+ licenseSummary: v?.license_summary ?? [],
+ evidence: v?.evidence ?? [],
+ confidence: v?.confidence,
+ risk_level: v?.risk_level,
+ risk_score: v?.risk_score,
+ risk_score_explanation: v?.risk_score_explanation,
+ officialSite: lib.officialSite
+ };
+ } else if (res?.discovery?.matches?.length) {
+ match = res.discovery.bestMatch ?? res.discovery.matches[0];
+ }
+
+ if (!match) {
+ updateJob(next.id, { status: 'error', message: 'Eşleşme bulunamadı' });
+ continue;
+ }
+
+ const computedRisk = computeRisk(match);
+ const risk = {
+ level: match.risk_level ?? computedRisk.level,
+ score: match.risk_score ?? computedRisk.score,
+ explanation: match.risk_score_explanation ?? computedRisk.explanation
+ };
+
+ if (existing || res?.source === 'mongo') {
+ updateJob(next.id, {
+ status: 'done',
+ message: 'Zaten kayıtlı',
+ match,
+ risk_level: risk.level,
+ risk_score: risk.score,
+ risk_score_explanation: risk.explanation
+ });
+ continue;
+ }
+
+ updateJob(next.id, { status: 'importing', match, risk_level: risk.level, risk_score: risk.score, risk_score_explanation: risk.explanation });
+
+ const payload = {
+ name: match.name ?? next.name,
+ ecosystem: match.ecosystem ?? res?.discovery?.query?.ecosystem ?? 'unknown',
+ description: match.description,
+ repository_url: match.repository ?? match.officialSite ?? null,
+ officialSite: match.officialSite ?? match.repository ?? null,
+ versions: [
+ {
+ version: normalizeVersion(match.version ?? next.version) ?? 'unknown',
+ license_name: match.license ?? null,
+ license_url: match.license_url ?? null,
+ notes: match.summary ?? null,
+ license_summary: Array.isArray(match.licenseSummary)
+ ? match.licenseSummary
+ .map(item =>
+ typeof item === 'object' && item !== null
+ ? { summary: item.summary ?? '', emoji: item.emoji ?? null }
+ : { summary: item, emoji: null }
+ )
+ .filter(entry => typeof entry.summary === 'string' && entry.summary.length > 0)
+ : [],
+ confidence: match.confidence ?? null,
+ evidence: Array.isArray(match.evidence) ? match.evidence : [],
+ risk_level: risk.level,
+ risk_score: risk.score,
+ risk_score_explanation: risk.explanation
+ }
+ ]
+ };
+
+ await createLibrary(payload);
+ updateJob(next.id, { status: 'done', message: 'Eklendi', match });
+ if (onImported) onImported();
+ } catch (err) {
+ updateJob(next.id, { status: 'error', message: err?.message ?? String(err) });
+ }
+ }
+
+ setProcessing(false);
} catch (err) {
- setError(err.message);
+ setError(err?.message ?? String(err));
} finally {
setLoading(false);
}
};
useEffect(() => {
+ let cancelled = false;
+
const processNext = async () => {
+ if (cancelled) return;
if (processing) return;
+
const next = depJobs.find(job => job.status === 'pending');
if (!next) return;
+
setProcessing(true);
const updateJob = (id, patch) =>
setDepJobs(jobs => jobs.map(j => (j.id === id ? { ...j, ...patch } : j)));
+
const q = next.version ? `${next.name} ${next.version}` : next.name;
+
try {
updateJob(next.id, { status: 'searching', message: null });
+
+ // 1) Check local DB
const res = await searchLibraries(q);
let match = null;
let existing = false;
@@ -187,6 +337,7 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
risk_score_explanation: v?.risk_score_explanation
};
} else if (res?.source === 'mcp' && Array.isArray(res?.results) && res.results.length > 0) {
+ // MCP returned direct results
const lib = res.results[0];
const v = lib.versions?.[0];
match = {
@@ -206,6 +357,7 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
officialSite: lib.officialSite
};
} else if (res?.discovery?.matches?.length) {
+ // discovery bestMatch or matches
match = res.discovery.bestMatch ?? res.discovery.matches[0];
}
@@ -235,7 +387,9 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
return;
}
+ // persist MCP/discovery result into local DB
updateJob(next.id, { status: 'importing', match, risk_level: risk.level, risk_score: risk.score, risk_score_explanation: risk.explanation });
+
const payload = {
name: match.name ?? next.name,
ecosystem: match.ecosystem ?? res?.discovery?.query?.ecosystem ?? 'unknown',
@@ -250,12 +404,12 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
notes: match.summary ?? null,
license_summary: Array.isArray(match.licenseSummary)
? match.licenseSummary
- .map(item =>
- typeof item === 'object' && item !== null
- ? { summary: item.summary ?? '', emoji: item.emoji ?? null }
- : { summary: item, emoji: null }
- )
- .filter(entry => typeof entry.summary === 'string' && entry.summary.length > 0)
+ .map(item =>
+ typeof item === 'object' && item !== null
+ ? { summary: item.summary ?? '', emoji: item.emoji ?? null }
+ : { summary: item, emoji: null }
+ )
+ .filter(entry => typeof entry.summary === 'string' && entry.summary.length > 0)
: [],
confidence: match.confidence ?? null,
evidence: Array.isArray(match.evidence) ? match.evidence : [],
@@ -270,12 +424,19 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
updateJob(next.id, { status: 'done', message: 'Eklendi', match });
if (onImported) onImported();
} catch (err) {
- updateJob(next.id, { status: 'error', message: err.message });
+ updateJob(next.id, { status: 'error', message: err?.message ?? String(err) });
} finally {
+ setStatusMessage('Scanning completed.');
setProcessing(false);
}
};
+
+ // try to drive processing whenever jobs change
processNext();
+
+ return () => {
+ cancelled = true;
+ };
}, [depJobs, processing, computeRisk, normalizeVersion, onImported]);
if (!isOpen) return null;
@@ -296,11 +457,11 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) {
✕
-
+
Repo linkini girin, link geçerliyse tarayalım.
{error &&
{error}
}
+ {statusMessage && (
+
+ {statusMessage}{(loading || processing) && totalJobs > 0 ? ` (${processedJobs}/${totalJobs})` : ''}
+
+ )}
{files.length > 0 && (
-
+
Bulunan dependency dosyaları:
+
{files.map((f, idx) => {
const deps = jobsByFile(f.path);
diff --git a/apps/web/src/styles.css b/apps/web/src/styles.css
index 75237ef..af0b98e 100644
--- a/apps/web/src/styles.css
+++ b/apps/web/src/styles.css
@@ -136,7 +136,7 @@ body {
display: flex;
justify-content: space-between;
align-items: center;
- margin-bottom: 1rem;
+ margin-bottom: 0.5rem;
}
.modal-header .close {
diff --git a/backend/app/services/file_analyzer.py b/backend/app/services/file_analyzer.py
index 9ea4706..75aae9d 100644
--- a/backend/app/services/file_analyzer.py
+++ b/backend/app/services/file_analyzer.py
@@ -1,4 +1,5 @@
import json
+import xml.etree.ElementTree as ET
from typing import List, Dict, Any
@@ -11,6 +12,8 @@ def detect_package_manager(filename: str, content: str) -> str:
return "pypi"
if lowered.endswith("pom.xml"):
return "maven"
+ if lowered.find("packages.config") != -1 or "nuget" in snippet:
+ return "nuget"
if lowered.endswith(".csproj") or "nuget" in snippet:
return "nuget"
return "unknown"
@@ -27,18 +30,95 @@ def parse_package_json(text: str) -> List[Dict[str, Any]]:
deps.append({"name": name, "version": version})
return deps
+def parse_csproj_file(content: str) -> List[Dict[str, Any]]:
+ """Parse a .csproj file content and extract PackageReference entries.
-def parse_requirements(text: str) -> List[Dict[str, Any]]:
- deps = []
- for raw in text.splitlines():
- line = raw.strip()
- if not line or line.startswith("#"):
+ Returns a list of dicts with keys: `name` and `version` (version may be None).
+ Handles both attribute-style (``)
+ and nested `` child elements. Works with XML namespaces.
+ """
+ try:
+ root = ET.fromstring(content)
+ except ET.ParseError:
+ return []
+
+ deps: List[Dict[str, Any]] = []
+
+ def local_name(tag: str) -> str:
+ return tag.split('}')[-1] if '}' in tag else tag
+
+ # Iterate through all elements and find PackageReference nodes
+ for elem in root.iter():
+ if local_name(elem.tag) != 'PackageReference':
continue
- if "==" in line:
- name, ver = line.split("==", 1)
- deps.append({"name": name.strip(), "version": ver.strip()})
- else:
- deps.append({"name": line, "version": None})
+
+ # Name can be in Include or Update attribute
+ name = elem.get('Include') or elem.get('Update')
+
+ # Version can be an attribute or a child element
+ version = elem.get('Version')
+ if version is None:
+ for child in list(elem):
+ if local_name(child.tag) == 'Version' and (child.text or '').strip():
+ version = (child.text or '').strip()
+ break
+
+ if name:
+ deps.append({"name": name, "version": version})
+
+ return deps
+
+def parse_requirements(text: str, filename: str="") -> List[Dict[str, Any]]:
+ # Support two common formats:
+ # 1) pip-style requirements (lines with optional ==version)
+ # 2) NuGet packages.config XML
+ text_stripped = text.strip()
+ deps: List[Dict[str, Any]] = []
+
+ if filename and filename.lower().endswith(".csproj"):
+ deps = parse_csproj_file(content=text_stripped)
+ elif (filename and filename.lower().find("packages.config") != -1):
+ return parse_packages_config(text)
+ else:
+ # Default: pip-style requirements
+ deps = []
+ for raw in text.splitlines():
+ line = raw.strip()
+ if not line or line.startswith("#"):
+ continue
+ if "==" in line:
+ name, ver = line.split("==", 1)
+ deps.append({"name": name.strip(), "version": ver.strip()})
+ else:
+ deps.append({"name": line, "version": None})
+ return deps
+
+
+def parse_packages_config(text: str) -> List[Dict[str, Any]]:
+ """Parse a NuGet `packages.config` XML or fall back to pip-style lines.
+
+ Returns list of {name, version}.
+ """
+ deps: List[Dict[str, Any]] = []
+ try:
+ root = ET.fromstring(text)
+ # Handle
+ for pkg in root.findall('.//package'):
+ name = pkg.get('id') or pkg.get('Id') or pkg.get('name')
+ version = pkg.get('version') or pkg.get('Version')
+ if name:
+ deps.append({"name": name, "version": version})
+ except ET.ParseError:
+ # Fall back to pip-style parsing if XML parsing fails
+ for raw in text.splitlines():
+ line = raw.strip()
+ if not line or line.startswith("#"):
+ continue
+ if "==" in line:
+ name, ver = line.split("==", 1)
+ deps.append({"name": name.strip(), "version": ver.strip()})
+ else:
+ deps.append({"name": line, "version": None})
return deps
@@ -55,6 +135,7 @@ def analyze_file(filename: str, content: str) -> Dict[str, Any]:
elif manager == "maven":
result["ecosystem"] = "maven"
elif manager == "nuget":
+ result["dependencies"] = parse_requirements(content, filename=filename)
result["ecosystem"] = "nuget"
else:
result["ecosystem"] = "unknown"
diff --git a/backend/app/services/repo_scanner.py b/backend/app/services/repo_scanner.py
index 0d447b7..8aebb99 100644
--- a/backend/app/services/repo_scanner.py
+++ b/backend/app/services/repo_scanner.py
@@ -17,7 +17,8 @@
# Java / Kotlin
"pom.xml", "build.gradle", "build.gradle.kts",
# .NET
- "csproj",
+ "packages.config",
+ ".csproj",
# Go
"go.mod", "vendor/modules.txt",
}
@@ -25,6 +26,7 @@
def is_dependency_file(filename: str) -> bool:
lower = filename.lower()
+
if lower.endswith(".csproj"):
return True
return lower in DEP_FILES
@@ -91,97 +93,143 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]:
return repo_url, None
-def clone_and_scan(repo_url: str) -> Dict[str, Any]:
- tmpdir = tempfile.mkdtemp(prefix="repo-scan-")
- try:
+# NOTE: `clone_and_scan` removed — use `clone_repository` + `scan_repository` instead.
+
+
+def clone_repository(repo_url: str, target_dir: str | None = None) -> str:
+ """
+ Clone the repository and return the path to the cloned repo (root directory).
+ If `target_dir` is not provided a temp dir will be created.
+ """
+ tmpdir = target_dir or tempfile.mkdtemp(prefix="repo-scan-")
+ created_tmp = target_dir is None
+
+ # Prepare minimal env for non-interactive containers
+ env = os.environ.copy()
+ env["HOME"] = tmpdir
+ env["GIT_TERMINAL_PROMPT"] = "0"
+
+ clone_url, secret_used = _with_host_auth(repo_url)
+ result = subprocess.run(
+ ["git", "clone", "--depth", "1", clone_url, tmpdir],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.PIPE,
+ env=env,
+ )
+ if result.returncode == 0:
+ return tmpdir
+
+ stderr = (result.stderr or b"").decode(errors="ignore").strip()
+ if secret_used:
+ stderr = stderr.replace(secret_used, "***redacted***")
+
+ logger.warning(f"HTTPS clone failed for {repo_url}: {stderr}")
+
+ hint = "Check that the repository URL is correct and reachable."
+ lower_url = repo_url.lower()
+ is_github = "github.com" in lower_url
+ is_bitbucket = "bitbucket.org" in lower_url
+
+ if not secret_used:
+ if is_github:
+ hint += " For private repos, set GITHUB_TOKEN."
+ elif is_bitbucket:
+ hint += " For private repos, set BITBUCKET_USER and BITBUCKET_APP_PASSWORD."
+ else:
+ hint += " For private repos, ensure authentication credentials are provided via environment variables."
+
+ if "terminal prompts disabled" in stderr:
+ hint += " Terminal prompts are disabled. You must provide credentials (env vars) or use SSH with keys."
+
+ # Try SSH fallback if appropriate
+ try_ssh_fallback = False
+ has_ssh = shutil.which("ssh") is not None
+ if not has_ssh:
+ hint += " SSH client not found, SSH fallback disabled."
- # Prepare environment for OpenShift/K8s compatibility
- # 1. Set HOME to temp dir as random UIDs might not have a writable home
- # 2. Disable terminal prompts
- env = os.environ.copy()
- env["HOME"] = tmpdir
- env["GIT_TERMINAL_PROMPT"] = "0"
-
- clone_url, secret_used = _with_host_auth(repo_url)
- result = subprocess.run(
- ["git", "clone", "--depth", "1", clone_url, tmpdir],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.PIPE,
- env=env,
- )
- if result.returncode != 0:
- stderr = (result.stderr or b"").decode(errors="ignore").strip()
- if secret_used:
- stderr = stderr.replace(secret_used, "***redacted***")
-
- logger.warning(f"HTTPS clone failed for {repo_url}: {stderr}")
-
- hint = "Check that the repository URL is correct and reachable."
- lower_url = repo_url.lower()
- is_github = "github.com" in lower_url
- is_bitbucket = "bitbucket.org" in lower_url
-
- if not secret_used:
- if is_github:
- hint += " For private repos, set GITHUB_TOKEN."
- elif is_bitbucket:
- hint += " For private repos, set BITBUCKET_USER and BITBUCKET_APP_PASSWORD."
- else:
- hint += " For private repos, ensure authentication credentials are provided via environment variables."
-
- if "terminal prompts disabled" in stderr:
- hint += " Terminal prompts are disabled. You must provide credentials (env vars) or use SSH with keys."
-
- # If HTTPS clone failed and we did not inject HTTP auth, try SSH fallback
- try_ssh_fallback = False
- has_ssh = shutil.which("ssh") is not None
-
- if not has_ssh:
- hint += " SSH client not found, SSH fallback disabled."
-
- try:
- parsed = urlparse(repo_url)
- host = (parsed.hostname or "").lower()
- if (parsed.scheme in ("http", "https")) and (not secret_used) and host in ("github.com", "www.github.com", "bitbucket.org", "www.bitbucket.org") and has_ssh:
- try_ssh_fallback = True
- except Exception:
- try_ssh_fallback = False
-
- if try_ssh_fallback:
- # Construct SSH clone URL: git@host:owner/repo.git
- path = parsed.path.lstrip('/')
- ssh_url = f"git@{host}:{path}"
- logger.info(f"Attempting SSH fallback for {repo_url} -> {ssh_url}")
-
- # Disable strict host key checking for this operation to avoid "Host key verification failed"
- # in non-interactive environments (containers).
- ssh_env = env.copy()
- ssh_env["GIT_SSH_COMMAND"] = "ssh -o StrictHostKeyChecking=no"
-
- try:
- ssh_result = subprocess.run(
- ["git", "clone", "--depth", "1", ssh_url, tmpdir],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.PIPE,
- env=ssh_env,
- )
- if ssh_result.returncode == 0:
- # Success with SSH fallback
- return {"files": find_dependency_files(tmpdir), "root": tmpdir}
- ssh_stderr = (ssh_result.stderr or b"").decode(errors="ignore").strip()
- logger.warning(f"SSH fallback failed: {ssh_stderr}")
- except Exception as e:
- ssh_stderr = str(e)
- logger.error(f"SSH fallback exception: {e}")
-
- # Redact nothing for SSH attempt
- full_err = f"HTTPS clone stderr: {stderr or 'unknown'}, SSH clone stderr: {ssh_stderr or 'unknown'}. {hint}"
- raise RuntimeError(f"git clone failed: {full_err}")
-
- raise RuntimeError(f"git clone failed: {stderr or 'unknown error'}. {hint}")
-
- files = find_dependency_files(tmpdir)
- return {"files": files, "root": tmpdir}
+ try:
+ parsed = urlparse(repo_url)
+ host = (parsed.hostname or "").lower()
+ if (parsed.scheme in ("http", "https")) and (not secret_used) and host in ("github.com", "www.github.com", "bitbucket.org", "www.bitbucket.org") and has_ssh:
+ try_ssh_fallback = True
except Exception:
+ try_ssh_fallback = False
+
+ if try_ssh_fallback:
+ path = parsed.path.lstrip('/')
+ ssh_url = f"git@{host}:{path}"
+ logger.info(f"Attempting SSH fallback for {repo_url} -> {ssh_url}")
+
+ ssh_env = env.copy()
+ ssh_env["GIT_SSH_COMMAND"] = "ssh -o StrictHostKeyChecking=no"
+
+ try:
+ ssh_result = subprocess.run(
+ ["git", "clone", "--depth", "1", ssh_url, tmpdir],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.PIPE,
+ env=ssh_env,
+ )
+ if ssh_result.returncode == 0:
+ return tmpdir
+ ssh_stderr = (ssh_result.stderr or b"").decode(errors="ignore").strip()
+ logger.warning(f"SSH fallback failed: {ssh_stderr}")
+ except Exception as e:
+ ssh_stderr = str(e)
+ logger.error(f"SSH fallback exception: {e}")
+
+ full_err = f"HTTPS clone stderr: {stderr or 'unknown'}, SSH clone stderr: {ssh_stderr or 'unknown'}. {hint}"
+ # cleanup if we created tmpdir here
+ if created_tmp:
+ shutil.rmtree(tmpdir, ignore_errors=True)
+ raise RuntimeError(f"git clone failed: {full_err}")
+
+ # No SSH fallback possible
+ if created_tmp:
shutil.rmtree(tmpdir, ignore_errors=True)
- raise
+ raise RuntimeError(f"git clone failed: {stderr or 'unknown error'}. {hint}")
+
+
+def scan_repository(root: str) -> Dict[str, Any]:
+ """
+ Scan a cloned repository directory for dependency files and return
+ the same shape as the original `clone_and_scan` (files + root).
+ """
+ if not root or not os.path.isdir(root):
+ raise RuntimeError("scan_repository: invalid root path")
+ files = find_dependency_files(root)
+ return {"files": files, "root": root}
+
+
+def list_repository_packages(root: str) -> List[Dict[str, Any]]:
+ """
+ Return a list of dependency-file summaries found in a cloned repository.
+ Each item is {"path": relative_path, "report": }.
+
+ This uses the local file analyzer (no MCP HTTP calls) so it's safe to call
+ in non-networked contexts and suitable for UI previews.
+ """
+ if not root or not os.path.isdir(root):
+ raise RuntimeError("list_repository_packages: invalid root path")
+
+ summaries: List[Dict[str, Any]] = []
+ try:
+ # Import locally to avoid circular imports at module import time
+ from .file_analyzer import analyze_file as local_analyze_file
+ except Exception:
+ local_analyze_file = None
+
+ for rel in find_dependency_files(root):
+ full = os.path.join(root, rel)
+ try:
+ with open(full, 'r', encoding='utf-8', errors='ignore') as fh:
+ content = fh.read()
+ if local_analyze_file:
+ report = local_analyze_file(rel, content)
+ else:
+ report = {"packageManager": "unknown", "dependencies": []}
+ except Exception as e:
+ report = {"error": str(e), "packageManager": "unknown", "dependencies": []}
+ summaries.append({"path": rel, "report": report})
+
+ return summaries
diff --git a/backend/app/views/library_view.py b/backend/app/views/library_view.py
index 172bdcd..541abd6 100644
--- a/backend/app/views/library_view.py
+++ b/backend/app/views/library_view.py
@@ -21,7 +21,11 @@
)
from ..models.repository_scan import RepositoryScanCreate
from ..services.mcp_client import get_mcp_http_client, MCPClientError
-from ..services.repo_scanner import clone_and_scan
+from ..services.repo_scanner import (
+ clone_repository,
+ scan_repository,
+ list_repository_packages,
+)
from urllib.parse import urlparse
@@ -232,67 +236,6 @@ def get_comp(vm, key):
return enriched
-async def perform_repo_scan(repo_url: str, client) -> Dict[str, Any]:
- """
- Clone and scan a repository, enrich dependencies using Mongo/MCP, and persist MCP hits.
- Returns analyzed files and a deduplicated dependency list.
- """
- try:
- scan_result = clone_and_scan(repo_url)
- except Exception as error:
- raise HTTPException(status_code=502, detail=f'Repo scan failed: {error}')
-
- analyzed_files: List[Dict[str, Any]] = []
- resolved_index: Dict[tuple, Dict[str, Any]] = {}
- tmpdir = scan_result.get('root')
- try:
- for relpath in scan_result.get('files', []):
- full_path = os.path.join(scan_result['root'], relpath)
- try:
- with open(full_path, 'r', encoding='utf-8', errors='ignore') as fh:
- content = fh.read()
- report = await client.analyze_file({"filename": relpath, "content": content}) or {}
- except Exception as exc:
- report = {"error": str(exc), "dependencies": []}
-
- enriched_deps = []
- for dep in report.get('dependencies', []) or []:
- enriched = await resolve_dependency_entry(dep, relpath, report)
- key = (enriched['name'].lower(), normalize_version(enriched.get('version')).lower())
- existing = resolved_index.get(key)
- if existing:
- sources = set(existing.get('sources', [])) | set(enriched.get('sources', []))
- existing['sources'] = sorted(sources)
- if existing.get('risk_score') is None and enriched.get('risk_score') is not None:
- existing['risk_score'] = enriched.get('risk_score')
- existing['risk_level'] = enriched.get('risk_level')
- if not existing.get('risk_score_explanation') and enriched.get('risk_score_explanation'):
- existing['risk_score_explanation'] = enriched.get('risk_score_explanation')
- for key in ("license_risk_score", "security_risk_score", "maintenance_risk_score", "usage_context_risk_score"):
- if existing.get(key) is None and enriched.get(key) is not None:
- existing[key] = enriched.get(key)
- if not existing.get('library_id') and enriched.get('library_id'):
- existing['library_id'] = enriched.get('library_id')
- if not existing.get('repository_url') and enriched.get('repository_url'):
- existing['repository_url'] = enriched.get('repository_url')
- else:
- resolved_index[key] = enriched
- enriched_deps.append({**enriched})
-
- report['dependencies'] = enriched_deps
- analyzed_files.append({"path": relpath, "report": report})
- finally:
- if tmpdir:
- shutil.rmtree(tmpdir, ignore_errors=True)
-
- dependencies = []
- for dep in resolved_index.values():
- dep.pop('_key', None)
- dependencies.append(dep)
-
- return {"analyzed_files": analyzed_files, "dependencies": dependencies}
-
-
@router.get('/', response_model=List[LibraryDocument])
async def handle_list_libraries(limit: int = Query(50, ge=1, le=500, description='Max items to return')):
return await list_libraries(limit)
@@ -341,44 +284,63 @@ async def handle_analyze_file(file: UploadFile = File(...)):
return {"file": file.filename, **(report or {})}
-@router.post('/repositories/scan')
-async def handle_repo_scan(payload: dict):
+
+@router.post('/repositories/clone')
+async def handle_repo_clone(payload: dict):
+ """Clone a repository and return a preview list of dependency files and parsed packages.
+
+ Response: { url, root, files: [ { path, report }, ... ] }
+ """
repo_url = payload.get('url')
if not repo_url:
raise HTTPException(status_code=400, detail='url is required')
- client = get_mcp_http_client()
- if not client:
- raise HTTPException(status_code=503, detail='MCP HTTP client not configured')
+ if not isinstance(repo_url, str):
+ raise HTTPException(status_code=400, detail='url must be a string')
+ # ensure static type checkers see a plain str
+ repo_url = str(repo_url)
- scan_data = await perform_repo_scan(repo_url, client)
- # Persist summarized scan to repository_scans collection
- platform, repo_name = _infer_repo_meta(repo_url)
try:
- payload = RepositoryScanCreate(
- repository_url=repo_url,
- repository_platform=platform,
- repository_name=repo_name,
- dependencies=[
- {
- "library_path": file.get("path"),
- "libraries": [
- {
- "library_name": dep.get("name"),
- "library_version": normalize_version(dep.get("version")) or dep.get("version") or "unknown"
- }
- for dep in (file.get("report", {}).get("dependencies") or [])
- if dep.get("name")
- ],
- }
- for file in scan_data.get("analyzed_files", [])
- ],
- )
- await create_repository_scan(payload)
- except Exception as exc:
- # Do not block response; just log.
- print(f'{datetime.utcnow().isoformat()} [repo_scan] failed to persist scan: {exc}')
+ root = clone_repository(repo_url)
+ except Exception as error:
+ raise HTTPException(status_code=502, detail=f'Repo clone failed: {error}')
- return {"url": repo_url, "files": scan_data["analyzed_files"], "dependencies": scan_data["dependencies"]}
+ try:
+ summaries = list_repository_packages(root)
+ except Exception as error:
+ # cleanup cloned repo on failure
+ shutil.rmtree(root, ignore_errors=True)
+ raise HTTPException(status_code=500, detail=f'Failed to list repository packages: {error}')
+
+ # Note: we keep the cloned repo on disk for now so the UI can request a follow-up scan if needed.
+ return {"url": repo_url, "root": root, "files": summaries}
+
+
+@router.post('/repositories/list-packages')
+async def handle_repo_list_packages(payload: dict):
+ """Return parsed dependency-file summaries for an existing cloned repo (by `root`) or for a repo URL.
+
+ Request body: { "root": "/path/to/clone" } OR { "url": "https://..." }
+ Response: { url?, root, files: [ { path, report }, ... ] }
+ """
+ root = payload.get('root')
+ repo_url = payload.get('url')
+
+ if not root and not repo_url:
+ raise HTTPException(status_code=400, detail='root or url is required')
+
+ try:
+ if root:
+ summaries = list_repository_packages(root)
+ return {"root": root, "files": summaries}
+ # else clone then list
+ elif repo_url:
+ if not isinstance(repo_url, str):
+ raise HTTPException(status_code=400, detail='url must be a string')
+ root = clone_repository(str(repo_url))
+ summaries = list_repository_packages(root)
+ return {"url": repo_url, "root": root, "files": summaries}
+ except Exception as error:
+ raise HTTPException(status_code=500, detail=str(error))
@router.post('/repositories/scan/highest-risk')
@@ -391,14 +353,58 @@ async def handle_repo_scan_highest_risk(payload: dict):
repo_url = payload.get('url')
if not repo_url:
raise HTTPException(status_code=400, detail='url is required')
+ if not isinstance(repo_url, str):
+ raise HTTPException(status_code=400, detail='url must be a string')
+ repo_url = str(repo_url)
client = get_mcp_http_client()
if not client:
raise HTTPException(status_code=503, detail='MCP HTTP client not configured')
- scan_data = await perform_repo_scan(repo_url, client)
- dependencies = scan_data["dependencies"]
- analyzed_files = scan_data["analyzed_files"]
+ # Use existing route handlers to support both UI and CI flows:
+ # - clone the repository (handle_repo_clone)
+ # - list packages for the cloned repo (handle_repo_list_packages)
+ # Then enrich each dependency using `resolve_dependency_entry`.
+ try:
+ clone_res = await handle_repo_clone({"url": repo_url})
+ # clone_res should contain `root` and `files`
+ root = clone_res.get("root")
+ scanned_files = clone_res.get("files") or []
+
+ # If clone returned no files, try listing packages explicitly
+ if not scanned_files and root:
+ list_res = await handle_repo_list_packages({"root": root})
+ scanned_files = list_res.get("files") or []
+
+ dependencies = []
+ analyzed_files = []
+
+ for f in scanned_files:
+ relpath = f.get("path")
+ report = f.get("report") or {}
+ # collect analyzed file summary
+ analyzed_files.append({"path": relpath, "report": report})
+
+ deps = Array = report.get("dependencies") if isinstance(report.get("dependencies"), list) else []
+ for dep in deps:
+ try:
+ enriched = await resolve_dependency_entry(dep, relpath, report)
+ except Exception:
+ # fallback: still include minimal dep info
+ enriched = {
+ "name": dep.get("name"),
+ "version": dep.get("version"),
+ "ecosystem": dep.get("ecosystem") or report.get("ecosystem"),
+ "sources": [relpath]
+ }
+ # attach source file info
+ enriched["file"] = relpath
+ dependencies.append(enriched)
+ except HTTPException:
+ # propagate HTTP errors as-is
+ raise
+ except Exception as exc:
+ raise HTTPException(status_code=500, detail=str(exc))
# Persist summarized scan to repository_scans collection
platform, repo_name = _infer_repo_meta(repo_url)
diff --git a/servers/mcp-licenguard/src/services/fileAnalyzer.js b/servers/mcp-licenguard/src/services/fileAnalyzer.js
index 9d30e87..8ada463 100644
--- a/servers/mcp-licenguard/src/services/fileAnalyzer.js
+++ b/servers/mcp-licenguard/src/services/fileAnalyzer.js
@@ -4,7 +4,7 @@ export function detectPackageManager(filename, content) {
if (lowered.includes("package.json") || snippet.includes('"dependencies"')) return "npm";
if (lowered.endsWith("requirements.txt") || snippet.includes("pip") || snippet.includes("==")) return "pypi";
if (lowered.endsWith("pom.xml")) return "maven";
- if (lowered.endsWith(".csproj") || snippet.includes("nuget")) return "nuget";
+ if (lowered.endsWith(".csproj") || lowered.includes("packages.config") || snippet.includes("nuget")) return "nuget";
if (lowered.endsWith("go.mod") || snippet.includes("module ") || snippet.includes("require")) return "go";
return "unknown";
}
@@ -73,6 +73,50 @@ export function parseGoMod(text) {
return deps;
}
+export function parseNuget(text) {
+ // Parse two common NuGet formats:
+ // 1) packages.config entries:
+ // 2) SDK-style .csproj PackageReference entries:
+ //
+ // or
+ // \n // 1.2.3\n //
+
+ const deps = [];
+ if (!text) return deps;
+
+ // packages.config
+ const pkgRegex = /]*\bid=["']([^"']+)["'][^>]*\bversion=["']([^"']+)["'][^>]*>/gi;
+ let m;
+ while ((m = pkgRegex.exec(text))) {
+ deps.push({ name: m[1], version: m[2] });
+ }
+
+ // PackageReference Include="..." Version="..." (single tag)
+ const prInlineRegex = /]*\bInclude=["']([^"']+)["'][^>]*\bVersion=["']([^"']+)["'][^>]*\/?>/gi;
+ while ((m = prInlineRegex.exec(text))) {
+ deps.push({ name: m[1], version: m[2] });
+ }
+
+ // PackageReference with nested tag
+ const prBlockRegex = /]*\bInclude=["']([^"']+)["'][^>]*>([\s\S]*?)<\/PackageReference>/gi;
+ let inner;
+ while ((m = prBlockRegex.exec(text))) {
+ const includeName = m[1];
+ inner = m[2];
+ const verMatch = /([^<]+)<\/Version>/i.exec(inner);
+ const version = verMatch ? verMatch[1].trim() : null;
+ deps.push({ name: includeName, version });
+ }
+
+ // Deduplicate by name (keep first seen version)
+ const seen = new Map();
+ for (const d of deps) {
+ const key = (d.name || '').toLowerCase();
+ if (!seen.has(key)) seen.set(key, d);
+ }
+ return Array.from(seen.values());
+}
+
export function analyzeFile({ filename, content }) {
const manager = detectPackageManager(filename || "unknown", content || "");
const result = { packageManager: manager, ecosystem: manager, dependencies: [] };
@@ -82,6 +126,11 @@ export function analyzeFile({ filename, content }) {
result.dependencies = parseRequirements(content || "");
} else if (manager === "go") {
result.dependencies = parseGoMod(content || "");
+ } else if (manager === "maven") {
+ result.dependencies = parseMaven(content || "");
+ } else if (manager === "nuget") {
+ result.dependencies = parseNuget(content || "");
}
+
return result;
}
diff --git a/servers/mcp-licenguard/src/services/libraryDiscovery.js b/servers/mcp-licenguard/src/services/libraryDiscovery.js
index 60cf8cf..1d18a3c 100644
--- a/servers/mcp-licenguard/src/services/libraryDiscovery.js
+++ b/servers/mcp-licenguard/src/services/libraryDiscovery.js
@@ -11,7 +11,7 @@ When searching:
- Maven/Java: inspect Maven Central/pom.xml for group/artifact, versions, license, repo.
- Go: inspect go.mod and GitHub; prefer module path language (Go) over similarly named npm packages.
- Rust: use crates.io/Cargo.toml for license/repo/version.
-- NuGet/.NET: use nuget.org metadata or .csproj for license/repo/version.
+- NuGet/.NET: use nuget.org metadata or .csproj/packages.config for license/repo/version.
- Ruby: rubygems.org gem info for license/repo/version.
- PHP: packagist/ composer.json for license/repo/version.
- iOS: cocoapods specs for license/repo/version.
@@ -59,7 +59,7 @@ If the user provides ecosystem/version as unknown or omits them, infer them wher
- cocoapods → "iOS / Swift / Obj-C"
- gradle → "Java / Kotlin"
- go → "Go"
-- Use repository metadata (and official site if present) to infer ecosystem: check GitHub/GitLab language badges, repo descriptions ("A Commander for modern Go CLI interactions"), presence of go.mod (Go), Cargo.toml (Rust), package.json (JavaScript), requirements.txt (Python), pom.xml (Java), .csproj (C#/.NET), etc. Prefer the repository’s language over similarly named packages in other ecosystems.
+- Use repository metadata (and official site if present) to infer ecosystem: check GitHub/GitLab language badges, repo descriptions ("A Commander for modern Go CLI interactions"), presence of go.mod (Go), Cargo.toml (Rust), package.json (JavaScript), requirements.txt (Python), pom.xml (Java), .csproj/packages.config (C#/.NET), etc. Prefer the repository’s language over similarly named packages in other ecosystems.
- If the user passes a module path with a slash (e.g., "spf13/cobra") or a repo/module already indicates a language (e.g., go.mod / Cargo.toml), keep the original name (do not drop the owner/namespace) and set ecosystem accordingly (e.g., "Go"). Avoid renaming/shortening package names; preserve the user-provided name unless authoritative evidence shows it is incorrect.
- If repository URL is found and officialSite is empty/unknown, set officialSite to repository URL.
- When repo/site content clearly shows a language/framework (e.g., go.mod + "Go CLI" in README, Maven pom.xml, Cargo.toml, package.json), trust that signal over similarly named packages in other ecosystems. Describe the project in that language’s context (e.g., a Go repo should not be described as JavaScript). Preserve the full module name (including owner/namespace) provided by the user.