From 217d8aeb39e1e186b4f975354a9787cbd4e2fc07 Mon Sep 17 00:00:00 2001 From: "murat.oguz" Date: Thu, 4 Dec 2025 17:00:32 +0300 Subject: [PATCH 1/4] Refactor repository handling: rename scanRepository to cloneRepository, add listRepositoryPackages, and improve UI feedback in RepoModal --- apps/web/src/api/client.js | 35 ++++- apps/web/src/components/ImportModal.jsx | 3 + apps/web/src/components/RepoModal.jsx | 199 ++++++++++++++++++++++-- apps/web/src/styles.css | 2 +- 4 files changed, 220 insertions(+), 19 deletions(-) diff --git a/apps/web/src/api/client.js b/apps/web/src/api/client.js index ee15a7f..9ae71cb 100644 --- a/apps/web/src/api/client.js +++ b/apps/web/src/api/client.js @@ -42,8 +42,8 @@ export function addVersion(libraryId, payload) { }); } -export async function scanRepository(url) { - const res = await fetch(`${API_BASE}/libraries/repositories/scan`, { +export async function cloneRepository(url) { + const res = await fetch(`${API_BASE}/libraries/repositories/clone`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ url }) @@ -55,6 +55,37 @@ export async function scanRepository(url) { return res.json(); } +export async function listRepositoryPackages({ root, url } = {}) { + const body = JSON.stringify(root ? { root } : { url }); + const res = await fetch(`${API_BASE}/libraries/repositories/list-packages`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body, + }); + if (!res.ok) { + const message = await res.text(); + throw new Error(message || `Request failed with status ${res.status}`); + } + return res.json(); +} + +// Backwards-compatible scanRepository: accepts string url or object { url } | { root } +export async function scanRepository(input) { + let body; + if (typeof input === 'string') body = JSON.stringify({ url: input }); + else body = JSON.stringify(input || {}); + const res = await fetch(`${API_BASE}/libraries/repositories/scan`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body, + }); + if (!res.ok) { + const message = await res.text(); + throw new Error(message || `Request failed with status ${res.status}`); + } + return res.json(); +} + export async function analyzeFileUpload(file) { const form = new FormData(); form.append('file', file); diff --git a/apps/web/src/components/ImportModal.jsx b/apps/web/src/components/ImportModal.jsx index 1b88fb1..d52acfa 100644 --- a/apps/web/src/components/ImportModal.jsx +++ b/apps/web/src/components/ImportModal.jsx @@ -1,6 +1,9 @@ import { useState, useEffect, useRef } from 'react'; import { createLibrary, searchLibraries } from '../api/client.js'; + +//  TODO: RiskScore Gauge component'ı ortak bir yere taşı. + const RiskGauge = ({ score, level }) => { if (score === undefined || score === null || Number.isNaN(score)) return null; const clamped = Math.min(100, Math.max(0, Number(score))); diff --git a/apps/web/src/components/RepoModal.jsx b/apps/web/src/components/RepoModal.jsx index c104b68..c63ac4f 100644 --- a/apps/web/src/components/RepoModal.jsx +++ b/apps/web/src/components/RepoModal.jsx @@ -1,5 +1,5 @@ import React, { useCallback, useEffect, useRef, useState } from 'react'; -import { scanRepository, searchLibraries, createLibrary } from '../api/client.js'; +import { cloneRepository, listRepositoryPackages, searchLibraries, createLibrary } from '../api/client.js'; const RiskBar = ({ score, explanation }) => { if (score === undefined || score === null || Number.isNaN(score)) return null; @@ -22,6 +22,7 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { const [error, setError] = useState(null); const [loading, setLoading] = useState(false); const [files, setFiles] = useState([]); + const [statusMessage, setStatusMessage] = useState(''); const [depJobs, setDepJobs] = useState([]); const [processing, setProcessing] = useState(false); const inputRef = useRef(null); @@ -38,6 +39,9 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { return '(N/A)'; }; + const totalJobs = depJobs.length; + const processedJobs = depJobs.filter(j => j.status !== 'pending').length; + const resetState = () => { setRepoUrl(''); setError(null); @@ -74,6 +78,13 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { return cleaned || null; }, []); + const cancelledRef = React.useRef(false); + useEffect(() => { + return () => { + cancelledRef.current = true; + }; + }, []); + const computeRisk = useCallback((match = {}) => { const summaries = Array.isArray(match.licenseSummary ?? match.license_summary) ? (match.licenseSummary ?? match.license_summary) @@ -123,9 +134,18 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { setError(null); setFiles([]); setDepJobs([]); - const res = await scanRepository(repoUrl); - const scannedFiles = res.files ?? []; + + setStatusMessage('Cloning repository...'); + const cloneRes = await cloneRepository(repoUrl); + setStatusMessage('Cloning repository completed.'); + const root = cloneRes.root; + + setStatusMessage('Listing dependency files...'); + const listRes = await listRepositoryPackages({ root }); + const scannedFiles = listRes.files ?? cloneRes.files ?? []; setFiles(scannedFiles); + + setStatusMessage('Scanning dependencies...'); const jobs = []; scannedFiles.forEach((file, fIdx) => { const deps = Array.isArray(file?.report?.dependencies) ? file.report.dependencies : []; @@ -145,24 +165,154 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { }); }); setDepJobs(jobs); + // Process jobs sequentially: check local DB, fallback to MCP, persist if needed + setProcessing(true); + const updateJob = (id, patch) => + setDepJobs(prev => prev.map(j => (j.id === id ? { ...j, ...patch } : j))); + + for (const next of jobs) { + if (cancelledRef.current) break; + const q = next.version ? `${next.name} ${next.version}` : next.name; + try { + updateJob(next.id, { status: 'searching', message: null }); + const res = await searchLibraries(q); + let match = null; + let existing = false; + + if (res?.source === 'mongo' && Array.isArray(res?.results) && res.results.length > 0) { + existing = true; + const lib = res.results[0]; + const v = lib.versions?.[0]; + match = { + name: lib.name, + version: v?.version, + ecosystem: lib.ecosystem, + description: lib.description, + repository: lib.repository_url, + license: v?.license_name, + license_url: v?.license_url, + licenseSummary: v?.license_summary ?? [], + evidence: v?.evidence ?? [], + confidence: v?.confidence, + risk_level: v?.risk_level, + risk_score: v?.risk_score, + risk_score_explanation: v?.risk_score_explanation + }; + } else if (res?.source === 'mcp' && Array.isArray(res?.results) && res.results.length > 0) { + const lib = res.results[0]; + const v = lib.versions?.[0]; + match = { + name: lib.name, + version: v?.version, + ecosystem: lib.ecosystem, + description: lib.description, + repository: lib.repository_url, + license: v?.license_name, + license_url: v?.license_url, + licenseSummary: v?.license_summary ?? [], + evidence: v?.evidence ?? [], + confidence: v?.confidence, + risk_level: v?.risk_level, + risk_score: v?.risk_score, + risk_score_explanation: v?.risk_score_explanation, + officialSite: lib.officialSite + }; + } else if (res?.discovery?.matches?.length) { + match = res.discovery.bestMatch ?? res.discovery.matches[0]; + } + + if (!match) { + updateJob(next.id, { status: 'error', message: 'Eşleşme bulunamadı' }); + continue; + } + + const computedRisk = computeRisk(match); + const risk = { + level: match.risk_level ?? computedRisk.level, + score: match.risk_score ?? computedRisk.score, + explanation: match.risk_score_explanation ?? computedRisk.explanation + }; + + if (existing || res?.source === 'mongo') { + updateJob(next.id, { + status: 'done', + message: 'Zaten kayıtlı', + match, + risk_level: risk.level, + risk_score: risk.score, + risk_score_explanation: risk.explanation + }); + continue; + } + + updateJob(next.id, { status: 'importing', match, risk_level: risk.level, risk_score: risk.score, risk_score_explanation: risk.explanation }); + + const payload = { + name: match.name ?? next.name, + ecosystem: match.ecosystem ?? res?.discovery?.query?.ecosystem ?? 'unknown', + description: match.description, + repository_url: match.repository ?? match.officialSite ?? null, + officialSite: match.officialSite ?? match.repository ?? null, + versions: [ + { + version: normalizeVersion(match.version ?? next.version) ?? 'unknown', + license_name: match.license ?? null, + license_url: match.license_url ?? null, + notes: match.summary ?? null, + license_summary: Array.isArray(match.licenseSummary) + ? match.licenseSummary + .map(item => + typeof item === 'object' && item !== null + ? { summary: item.summary ?? '', emoji: item.emoji ?? null } + : { summary: item, emoji: null } + ) + .filter(entry => typeof entry.summary === 'string' && entry.summary.length > 0) + : [], + confidence: match.confidence ?? null, + evidence: Array.isArray(match.evidence) ? match.evidence : [], + risk_level: risk.level, + risk_score: risk.score, + risk_score_explanation: risk.explanation + } + ] + }; + + await createLibrary(payload); + updateJob(next.id, { status: 'done', message: 'Eklendi', match }); + if (onImported) onImported(); + } catch (err) { + updateJob(next.id, { status: 'error', message: err?.message ?? String(err) }); + } + } + + setProcessing(false); } catch (err) { - setError(err.message); + setError(err?.message ?? String(err)); } finally { setLoading(false); } }; useEffect(() => { + let cancelled = false; + const processNext = async () => { + if (cancelled) return; if (processing) return; + const next = depJobs.find(job => job.status === 'pending'); if (!next) return; + setProcessing(true); const updateJob = (id, patch) => setDepJobs(jobs => jobs.map(j => (j.id === id ? { ...j, ...patch } : j))); + const q = next.version ? `${next.name} ${next.version}` : next.name; + try { updateJob(next.id, { status: 'searching', message: null }); + + // 1) Check local DB const res = await searchLibraries(q); let match = null; let existing = false; @@ -187,6 +337,7 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { risk_score_explanation: v?.risk_score_explanation }; } else if (res?.source === 'mcp' && Array.isArray(res?.results) && res.results.length > 0) { + // MCP returned direct results const lib = res.results[0]; const v = lib.versions?.[0]; match = { @@ -206,6 +357,7 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { officialSite: lib.officialSite }; } else if (res?.discovery?.matches?.length) { + // discovery bestMatch or matches match = res.discovery.bestMatch ?? res.discovery.matches[0]; } @@ -235,7 +387,9 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { return; } + // persist MCP/discovery result into local DB updateJob(next.id, { status: 'importing', match, risk_level: risk.level, risk_score: risk.score, risk_score_explanation: risk.explanation }); + const payload = { name: match.name ?? next.name, ecosystem: match.ecosystem ?? res?.discovery?.query?.ecosystem ?? 'unknown', @@ -250,12 +404,12 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { notes: match.summary ?? null, license_summary: Array.isArray(match.licenseSummary) ? match.licenseSummary - .map(item => - typeof item === 'object' && item !== null - ? { summary: item.summary ?? '', emoji: item.emoji ?? null } - : { summary: item, emoji: null } - ) - .filter(entry => typeof entry.summary === 'string' && entry.summary.length > 0) + .map(item => + typeof item === 'object' && item !== null + ? { summary: item.summary ?? '', emoji: item.emoji ?? null } + : { summary: item, emoji: null } + ) + .filter(entry => typeof entry.summary === 'string' && entry.summary.length > 0) : [], confidence: match.confidence ?? null, evidence: Array.isArray(match.evidence) ? match.evidence : [], @@ -270,12 +424,19 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { updateJob(next.id, { status: 'done', message: 'Eklendi', match }); if (onImported) onImported(); } catch (err) { - updateJob(next.id, { status: 'error', message: err.message }); + updateJob(next.id, { status: 'error', message: err?.message ?? String(err) }); } finally { + setStatusMessage('Scanning completed.'); setProcessing(false); } }; + + // try to drive processing whenever jobs change processNext(); + + return () => { + cancelled = true; + }; }, [depJobs, processing, computeRisk, normalizeVersion, onImported]); if (!isOpen) return null; @@ -296,11 +457,11 @@ export default function RepoLinkModal({ isOpen, onClose, onImported }) { ✕ -
+

Repo linkini girin, link geçerliyse tarayalım.

-
{error &&

{error}

} + {statusMessage && ( +
+ {statusMessage}{(loading || processing) && totalJobs > 0 ? ` (${processedJobs}/${totalJobs})` : ''} +
+ )} {files.length > 0 && ( -
+

Bulunan dependency dosyaları:

+
    {files.map((f, idx) => { const deps = jobsByFile(f.path); diff --git a/apps/web/src/styles.css b/apps/web/src/styles.css index 75237ef..af0b98e 100644 --- a/apps/web/src/styles.css +++ b/apps/web/src/styles.css @@ -136,7 +136,7 @@ body { display: flex; justify-content: space-between; align-items: center; - margin-bottom: 1rem; + margin-bottom: 0.5rem; } .modal-header .close { From 358c73b5abb79705217ee0969505c6f69d5bd6c5 Mon Sep 17 00:00:00 2001 From: "murat.oguz" Date: Thu, 4 Dec 2025 17:00:59 +0300 Subject: [PATCH 2/4] Enhance repository scanning and package management support: - Add NuGet package detection in file_analyzer.py and fileAnalyzer.js - Implement parsing for packages.config and .csproj formats - Refactor repo_scanner.py to use clone_repository and scan_repository functions - Update library_view.py to handle repository cloning and listing packages - Modify libraryDiscovery.js documentation for NuGet support --- backend/app/services/file_analyzer.py | 34 +++ backend/app/services/repo_scanner.py | 237 +++++++++++------- backend/app/views/library_view.py | 199 ++++++++++----- .../src/services/fileAnalyzer.js | 51 +++- .../src/services/libraryDiscovery.js | 4 +- 5 files changed, 364 insertions(+), 161 deletions(-) diff --git a/backend/app/services/file_analyzer.py b/backend/app/services/file_analyzer.py index 9ea4706..3379678 100644 --- a/backend/app/services/file_analyzer.py +++ b/backend/app/services/file_analyzer.py @@ -1,4 +1,5 @@ import json +import xml.etree.ElementTree as ET from typing import List, Dict, Any @@ -11,6 +12,8 @@ def detect_package_manager(filename: str, content: str) -> str: return "pypi" if lowered.endswith("pom.xml"): return "maven" + if lowered.find("packages.config") != -1 or "nuget" in snippet: + return "nuget" if lowered.endswith(".csproj") or "nuget" in snippet: return "nuget" return "unknown" @@ -29,6 +32,36 @@ def parse_package_json(text: str) -> List[Dict[str, Any]]: def parse_requirements(text: str) -> List[Dict[str, Any]]: + # Support two common formats: + # 1) pip-style requirements (lines with optional ==version) + # 2) NuGet packages.config XML + text_stripped = text.strip() + # Heuristic: if it looks like XML and contains , parse as packages.config + if text_stripped.startswith(" + for pkg in root.findall('.//package'): + name = pkg.get('id') or pkg.get('Id') or pkg.get('name') + version = pkg.get('version') or pkg.get('Version') + if name: + deps.append({"name": name, "version": version}) + except ET.ParseError: + # Fall back to pip-style parsing if XML parsing fails + deps = [] + for raw in text.splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + if "==" in line: + name, ver = line.split("==", 1) + deps.append({"name": name.strip(), "version": ver.strip()}) + else: + deps.append({"name": line, "version": None}) + return deps + + # Default: pip-style requirements deps = [] for raw in text.splitlines(): line = raw.strip() @@ -55,6 +88,7 @@ def analyze_file(filename: str, content: str) -> Dict[str, Any]: elif manager == "maven": result["ecosystem"] = "maven" elif manager == "nuget": + result["dependencies"] = parse_requirements(content) result["ecosystem"] = "nuget" else: result["ecosystem"] = "unknown" diff --git a/backend/app/services/repo_scanner.py b/backend/app/services/repo_scanner.py index 0d447b7..33818d2 100644 --- a/backend/app/services/repo_scanner.py +++ b/backend/app/services/repo_scanner.py @@ -17,7 +17,9 @@ # Java / Kotlin "pom.xml", "build.gradle", "build.gradle.kts", # .NET - "csproj", + "packages.config", + # TODO: Disable until .csproj support is added + # "csproj", # Go "go.mod", "vendor/modules.txt", } @@ -25,8 +27,9 @@ def is_dependency_file(filename: str) -> bool: lower = filename.lower() - if lower.endswith(".csproj"): - return True + # TODO: Disable until .csproj support is added + # if lower.endswith(".csproj"): + # return True return lower in DEP_FILES @@ -91,97 +94,143 @@ def _with_host_auth(repo_url: str) -> tuple[str, str | None]: return repo_url, None -def clone_and_scan(repo_url: str) -> Dict[str, Any]: - tmpdir = tempfile.mkdtemp(prefix="repo-scan-") - try: +# NOTE: `clone_and_scan` removed — use `clone_repository` + `scan_repository` instead. + + +def clone_repository(repo_url: str, target_dir: str | None = None) -> str: + """ + Clone the repository and return the path to the cloned repo (root directory). + If `target_dir` is not provided a temp dir will be created. + """ + tmpdir = target_dir or tempfile.mkdtemp(prefix="repo-scan-") + created_tmp = target_dir is None + + # Prepare minimal env for non-interactive containers + env = os.environ.copy() + env["HOME"] = tmpdir + env["GIT_TERMINAL_PROMPT"] = "0" + + clone_url, secret_used = _with_host_auth(repo_url) + result = subprocess.run( + ["git", "clone", "--depth", "1", clone_url, tmpdir], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + env=env, + ) + if result.returncode == 0: + return tmpdir + + stderr = (result.stderr or b"").decode(errors="ignore").strip() + if secret_used: + stderr = stderr.replace(secret_used, "***redacted***") + + logger.warning(f"HTTPS clone failed for {repo_url}: {stderr}") + + hint = "Check that the repository URL is correct and reachable." + lower_url = repo_url.lower() + is_github = "github.com" in lower_url + is_bitbucket = "bitbucket.org" in lower_url + + if not secret_used: + if is_github: + hint += " For private repos, set GITHUB_TOKEN." + elif is_bitbucket: + hint += " For private repos, set BITBUCKET_USER and BITBUCKET_APP_PASSWORD." + else: + hint += " For private repos, ensure authentication credentials are provided via environment variables." + + if "terminal prompts disabled" in stderr: + hint += " Terminal prompts are disabled. You must provide credentials (env vars) or use SSH with keys." + + # Try SSH fallback if appropriate + try_ssh_fallback = False + has_ssh = shutil.which("ssh") is not None + if not has_ssh: + hint += " SSH client not found, SSH fallback disabled." - # Prepare environment for OpenShift/K8s compatibility - # 1. Set HOME to temp dir as random UIDs might not have a writable home - # 2. Disable terminal prompts - env = os.environ.copy() - env["HOME"] = tmpdir - env["GIT_TERMINAL_PROMPT"] = "0" - - clone_url, secret_used = _with_host_auth(repo_url) - result = subprocess.run( - ["git", "clone", "--depth", "1", clone_url, tmpdir], - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - env=env, - ) - if result.returncode != 0: - stderr = (result.stderr or b"").decode(errors="ignore").strip() - if secret_used: - stderr = stderr.replace(secret_used, "***redacted***") - - logger.warning(f"HTTPS clone failed for {repo_url}: {stderr}") - - hint = "Check that the repository URL is correct and reachable." - lower_url = repo_url.lower() - is_github = "github.com" in lower_url - is_bitbucket = "bitbucket.org" in lower_url - - if not secret_used: - if is_github: - hint += " For private repos, set GITHUB_TOKEN." - elif is_bitbucket: - hint += " For private repos, set BITBUCKET_USER and BITBUCKET_APP_PASSWORD." - else: - hint += " For private repos, ensure authentication credentials are provided via environment variables." - - if "terminal prompts disabled" in stderr: - hint += " Terminal prompts are disabled. You must provide credentials (env vars) or use SSH with keys." - - # If HTTPS clone failed and we did not inject HTTP auth, try SSH fallback - try_ssh_fallback = False - has_ssh = shutil.which("ssh") is not None - - if not has_ssh: - hint += " SSH client not found, SSH fallback disabled." - - try: - parsed = urlparse(repo_url) - host = (parsed.hostname or "").lower() - if (parsed.scheme in ("http", "https")) and (not secret_used) and host in ("github.com", "www.github.com", "bitbucket.org", "www.bitbucket.org") and has_ssh: - try_ssh_fallback = True - except Exception: - try_ssh_fallback = False - - if try_ssh_fallback: - # Construct SSH clone URL: git@host:owner/repo.git - path = parsed.path.lstrip('/') - ssh_url = f"git@{host}:{path}" - logger.info(f"Attempting SSH fallback for {repo_url} -> {ssh_url}") - - # Disable strict host key checking for this operation to avoid "Host key verification failed" - # in non-interactive environments (containers). - ssh_env = env.copy() - ssh_env["GIT_SSH_COMMAND"] = "ssh -o StrictHostKeyChecking=no" - - try: - ssh_result = subprocess.run( - ["git", "clone", "--depth", "1", ssh_url, tmpdir], - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - env=ssh_env, - ) - if ssh_result.returncode == 0: - # Success with SSH fallback - return {"files": find_dependency_files(tmpdir), "root": tmpdir} - ssh_stderr = (ssh_result.stderr or b"").decode(errors="ignore").strip() - logger.warning(f"SSH fallback failed: {ssh_stderr}") - except Exception as e: - ssh_stderr = str(e) - logger.error(f"SSH fallback exception: {e}") - - # Redact nothing for SSH attempt - full_err = f"HTTPS clone stderr: {stderr or 'unknown'}, SSH clone stderr: {ssh_stderr or 'unknown'}. {hint}" - raise RuntimeError(f"git clone failed: {full_err}") - - raise RuntimeError(f"git clone failed: {stderr or 'unknown error'}. {hint}") - - files = find_dependency_files(tmpdir) - return {"files": files, "root": tmpdir} + try: + parsed = urlparse(repo_url) + host = (parsed.hostname or "").lower() + if (parsed.scheme in ("http", "https")) and (not secret_used) and host in ("github.com", "www.github.com", "bitbucket.org", "www.bitbucket.org") and has_ssh: + try_ssh_fallback = True except Exception: + try_ssh_fallback = False + + if try_ssh_fallback: + path = parsed.path.lstrip('/') + ssh_url = f"git@{host}:{path}" + logger.info(f"Attempting SSH fallback for {repo_url} -> {ssh_url}") + + ssh_env = env.copy() + ssh_env["GIT_SSH_COMMAND"] = "ssh -o StrictHostKeyChecking=no" + + try: + ssh_result = subprocess.run( + ["git", "clone", "--depth", "1", ssh_url, tmpdir], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + env=ssh_env, + ) + if ssh_result.returncode == 0: + return tmpdir + ssh_stderr = (ssh_result.stderr or b"").decode(errors="ignore").strip() + logger.warning(f"SSH fallback failed: {ssh_stderr}") + except Exception as e: + ssh_stderr = str(e) + logger.error(f"SSH fallback exception: {e}") + + full_err = f"HTTPS clone stderr: {stderr or 'unknown'}, SSH clone stderr: {ssh_stderr or 'unknown'}. {hint}" + # cleanup if we created tmpdir here + if created_tmp: + shutil.rmtree(tmpdir, ignore_errors=True) + raise RuntimeError(f"git clone failed: {full_err}") + + # No SSH fallback possible + if created_tmp: shutil.rmtree(tmpdir, ignore_errors=True) - raise + raise RuntimeError(f"git clone failed: {stderr or 'unknown error'}. {hint}") + + +def scan_repository(root: str) -> Dict[str, Any]: + """ + Scan a cloned repository directory for dependency files and return + the same shape as the original `clone_and_scan` (files + root). + """ + if not root or not os.path.isdir(root): + raise RuntimeError("scan_repository: invalid root path") + files = find_dependency_files(root) + return {"files": files, "root": root} + + +def list_repository_packages(root: str) -> List[Dict[str, Any]]: + """ + Return a list of dependency-file summaries found in a cloned repository. + Each item is {"path": relative_path, "report": }. + + This uses the local file analyzer (no MCP HTTP calls) so it's safe to call + in non-networked contexts and suitable for UI previews. + """ + if not root or not os.path.isdir(root): + raise RuntimeError("list_repository_packages: invalid root path") + + summaries: List[Dict[str, Any]] = [] + try: + # Import locally to avoid circular imports at module import time + from .file_analyzer import analyze_file as local_analyze_file + except Exception: + local_analyze_file = None + + for rel in find_dependency_files(root): + full = os.path.join(root, rel) + try: + with open(full, 'r', encoding='utf-8', errors='ignore') as fh: + content = fh.read() + if local_analyze_file: + report = local_analyze_file(rel, content) + else: + report = {"packageManager": "unknown", "dependencies": []} + except Exception as e: + report = {"error": str(e), "packageManager": "unknown", "dependencies": []} + summaries.append({"path": rel, "report": report}) + + return summaries diff --git a/backend/app/views/library_view.py b/backend/app/views/library_view.py index 172bdcd..bc4fad2 100644 --- a/backend/app/views/library_view.py +++ b/backend/app/views/library_view.py @@ -21,7 +21,11 @@ ) from ..models.repository_scan import RepositoryScanCreate from ..services.mcp_client import get_mcp_http_client, MCPClientError -from ..services.repo_scanner import clone_and_scan +from ..services.repo_scanner import ( + clone_repository, + scan_repository, + list_repository_packages, +) from urllib.parse import urlparse @@ -231,66 +235,81 @@ def get_comp(vm, key): } return enriched - -async def perform_repo_scan(repo_url: str, client) -> Dict[str, Any]: - """ - Clone and scan a repository, enrich dependencies using Mongo/MCP, and persist MCP hits. - Returns analyzed files and a deduplicated dependency list. - """ - try: - scan_result = clone_and_scan(repo_url) - except Exception as error: - raise HTTPException(status_code=502, detail=f'Repo scan failed: {error}') - - analyzed_files: List[Dict[str, Any]] = [] - resolved_index: Dict[tuple, Dict[str, Any]] = {} - tmpdir = scan_result.get('root') - try: - for relpath in scan_result.get('files', []): - full_path = os.path.join(scan_result['root'], relpath) - try: - with open(full_path, 'r', encoding='utf-8', errors='ignore') as fh: - content = fh.read() - report = await client.analyze_file({"filename": relpath, "content": content}) or {} - except Exception as exc: - report = {"error": str(exc), "dependencies": []} - - enriched_deps = [] - for dep in report.get('dependencies', []) or []: - enriched = await resolve_dependency_entry(dep, relpath, report) - key = (enriched['name'].lower(), normalize_version(enriched.get('version')).lower()) - existing = resolved_index.get(key) - if existing: - sources = set(existing.get('sources', [])) | set(enriched.get('sources', [])) - existing['sources'] = sorted(sources) - if existing.get('risk_score') is None and enriched.get('risk_score') is not None: - existing['risk_score'] = enriched.get('risk_score') - existing['risk_level'] = enriched.get('risk_level') - if not existing.get('risk_score_explanation') and enriched.get('risk_score_explanation'): - existing['risk_score_explanation'] = enriched.get('risk_score_explanation') - for key in ("license_risk_score", "security_risk_score", "maintenance_risk_score", "usage_context_risk_score"): - if existing.get(key) is None and enriched.get(key) is not None: - existing[key] = enriched.get(key) - if not existing.get('library_id') and enriched.get('library_id'): - existing['library_id'] = enriched.get('library_id') - if not existing.get('repository_url') and enriched.get('repository_url'): - existing['repository_url'] = enriched.get('repository_url') - else: - resolved_index[key] = enriched - enriched_deps.append({**enriched}) - - report['dependencies'] = enriched_deps - analyzed_files.append({"path": relpath, "report": report}) - finally: - if tmpdir: - shutil.rmtree(tmpdir, ignore_errors=True) - - dependencies = [] - for dep in resolved_index.values(): - dep.pop('_key', None) - dependencies.append(dep) - - return {"analyzed_files": analyzed_files, "dependencies": dependencies} +# TODO: buna gerek kalmadı galiba +# async def perform_repo_scan(repo_url: str | None, client, root: str | None = None) -> Dict[str, Any]: +# """ +# Clone and scan a repository, enrich dependencies using Mongo/MCP, and persist MCP hits. +# Returns analyzed files and a deduplicated dependency list. +# """ +# # If a root path is provided, use it (do not re-clone). Otherwise clone. +# tmpdir = None +# created_tmp = False + +# if root: +# tmpdir = root +# else: +# if not repo_url: +# raise HTTPException(status_code=400, detail='url is required when no root is provided') +# try: +# tmpdir = clone_repository(repo_url) +# created_tmp = True +# except Exception as error: +# raise HTTPException(status_code=502, detail=f'Repo clone failed: {error}') + +# try: +# scan_result = scan_repository(tmpdir) +# except Exception as error: +# raise HTTPException(status_code=502, detail=f'Repo scan failed: {error}') + +# analyzed_files: List[Dict[str, Any]] = [] +# resolved_index: Dict[tuple, Dict[str, Any]] = {} +# tmpdir = scan_result.get('root') +# try: +# for relpath in scan_result.get('files', []): +# full_path = os.path.join(scan_result['root'], relpath) +# try: +# with open(full_path, 'r', encoding='utf-8', errors='ignore') as fh: +# content = fh.read() +# report = await client.analyze_file({"filename": relpath, "content": content}) or {} +# except Exception as exc: +# report = {"error": str(exc), "dependencies": []} + +# enriched_deps = [] +# for dep in report.get('dependencies', []) or []: +# enriched = await resolve_dependency_entry(dep, relpath, report) +# key = (enriched['name'].lower(), normalize_version(enriched.get('version')).lower()) +# existing = resolved_index.get(key) +# if existing: +# sources = set(existing.get('sources', [])) | set(enriched.get('sources', [])) +# existing['sources'] = sorted(sources) +# if existing.get('risk_score') is None and enriched.get('risk_score') is not None: +# existing['risk_score'] = enriched.get('risk_score') +# existing['risk_level'] = enriched.get('risk_level') +# if not existing.get('risk_score_explanation') and enriched.get('risk_score_explanation'): +# existing['risk_score_explanation'] = enriched.get('risk_score_explanation') +# for key in ("license_risk_score", "security_risk_score", "maintenance_risk_score", "usage_context_risk_score"): +# if existing.get(key) is None and enriched.get(key) is not None: +# existing[key] = enriched.get(key) +# if not existing.get('library_id') and enriched.get('library_id'): +# existing['library_id'] = enriched.get('library_id') +# if not existing.get('repository_url') and enriched.get('repository_url'): +# existing['repository_url'] = enriched.get('repository_url') +# else: +# resolved_index[key] = enriched +# enriched_deps.append({**enriched}) + +# report['dependencies'] = enriched_deps +# analyzed_files.append({"path": relpath, "report": report}) +# finally: +# if created_tmp and tmpdir: +# shutil.rmtree(tmpdir, ignore_errors=True) + +# dependencies = [] +# for dep in resolved_index.values(): +# dep.pop('_key', None) +# dependencies.append(dep) + +# return {"analyzed_files": analyzed_files, "dependencies": dependencies} @router.get('/', response_model=List[LibraryDocument]) @@ -344,13 +363,14 @@ async def handle_analyze_file(file: UploadFile = File(...)): @router.post('/repositories/scan') async def handle_repo_scan(payload: dict): repo_url = payload.get('url') - if not repo_url: - raise HTTPException(status_code=400, detail='url is required') + root = payload.get('root') + if not repo_url and not root: + raise HTTPException(status_code=400, detail='url or root is required') client = get_mcp_http_client() if not client: raise HTTPException(status_code=503, detail='MCP HTTP client not configured') - scan_data = await perform_repo_scan(repo_url, client) + scan_data = await perform_repo_scan(repo_url, client, root=root) # Persist summarized scan to repository_scans collection platform, repo_name = _infer_repo_meta(repo_url) try: @@ -381,6 +401,57 @@ async def handle_repo_scan(payload: dict): return {"url": repo_url, "files": scan_data["analyzed_files"], "dependencies": scan_data["dependencies"]} +@router.post('/repositories/clone') +async def handle_repo_clone(payload: dict): + """Clone a repository and return a preview list of dependency files and parsed packages. + + Response: { url, root, files: [ { path, report }, ... ] } + """ + repo_url = payload.get('url') + if not repo_url: + raise HTTPException(status_code=400, detail='url is required') + + try: + root = clone_repository(repo_url) + except Exception as error: + raise HTTPException(status_code=502, detail=f'Repo clone failed: {error}') + + try: + summaries = list_repository_packages(root) + except Exception as error: + # cleanup cloned repo on failure + shutil.rmtree(root, ignore_errors=True) + raise HTTPException(status_code=500, detail=f'Failed to list repository packages: {error}') + + # Note: we keep the cloned repo on disk for now so the UI can request a follow-up scan if needed. + return {"url": repo_url, "root": root, "files": summaries} + + +@router.post('/repositories/list-packages') +async def handle_repo_list_packages(payload: dict): + """Return parsed dependency-file summaries for an existing cloned repo (by `root`) or for a repo URL. + + Request body: { "root": "/path/to/clone" } OR { "url": "https://..." } + Response: { url?, root, files: [ { path, report }, ... ] } + """ + root = payload.get('root') + repo_url = payload.get('url') + + if not root and not repo_url: + raise HTTPException(status_code=400, detail='root or url is required') + + try: + if root: + summaries = list_repository_packages(root) + return {"root": root, "files": summaries} + # else clone then list + root = clone_repository(repo_url) + summaries = list_repository_packages(root) + return {"url": repo_url, "root": root, "files": summaries} + except Exception as error: + raise HTTPException(status_code=500, detail=str(error)) + + @router.post('/repositories/scan/highest-risk') async def handle_repo_scan_highest_risk(payload: dict): """ diff --git a/servers/mcp-licenguard/src/services/fileAnalyzer.js b/servers/mcp-licenguard/src/services/fileAnalyzer.js index 9d30e87..8ada463 100644 --- a/servers/mcp-licenguard/src/services/fileAnalyzer.js +++ b/servers/mcp-licenguard/src/services/fileAnalyzer.js @@ -4,7 +4,7 @@ export function detectPackageManager(filename, content) { if (lowered.includes("package.json") || snippet.includes('"dependencies"')) return "npm"; if (lowered.endsWith("requirements.txt") || snippet.includes("pip") || snippet.includes("==")) return "pypi"; if (lowered.endsWith("pom.xml")) return "maven"; - if (lowered.endsWith(".csproj") || snippet.includes("nuget")) return "nuget"; + if (lowered.endsWith(".csproj") || lowered.includes("packages.config") || snippet.includes("nuget")) return "nuget"; if (lowered.endsWith("go.mod") || snippet.includes("module ") || snippet.includes("require")) return "go"; return "unknown"; } @@ -73,6 +73,50 @@ export function parseGoMod(text) { return deps; } +export function parseNuget(text) { + // Parse two common NuGet formats: + // 1) packages.config entries: + // 2) SDK-style .csproj PackageReference entries: + // + // or + // \n // 1.2.3\n // + + const deps = []; + if (!text) return deps; + + // packages.config + const pkgRegex = /]*\bid=["']([^"']+)["'][^>]*\bversion=["']([^"']+)["'][^>]*>/gi; + let m; + while ((m = pkgRegex.exec(text))) { + deps.push({ name: m[1], version: m[2] }); + } + + // PackageReference Include="..." Version="..." (single tag) + const prInlineRegex = /]*\bInclude=["']([^"']+)["'][^>]*\bVersion=["']([^"']+)["'][^>]*\/?>/gi; + while ((m = prInlineRegex.exec(text))) { + deps.push({ name: m[1], version: m[2] }); + } + + // PackageReference with nested tag + const prBlockRegex = /]*\bInclude=["']([^"']+)["'][^>]*>([\s\S]*?)<\/PackageReference>/gi; + let inner; + while ((m = prBlockRegex.exec(text))) { + const includeName = m[1]; + inner = m[2]; + const verMatch = /([^<]+)<\/Version>/i.exec(inner); + const version = verMatch ? verMatch[1].trim() : null; + deps.push({ name: includeName, version }); + } + + // Deduplicate by name (keep first seen version) + const seen = new Map(); + for (const d of deps) { + const key = (d.name || '').toLowerCase(); + if (!seen.has(key)) seen.set(key, d); + } + return Array.from(seen.values()); +} + export function analyzeFile({ filename, content }) { const manager = detectPackageManager(filename || "unknown", content || ""); const result = { packageManager: manager, ecosystem: manager, dependencies: [] }; @@ -82,6 +126,11 @@ export function analyzeFile({ filename, content }) { result.dependencies = parseRequirements(content || ""); } else if (manager === "go") { result.dependencies = parseGoMod(content || ""); + } else if (manager === "maven") { + result.dependencies = parseMaven(content || ""); + } else if (manager === "nuget") { + result.dependencies = parseNuget(content || ""); } + return result; } diff --git a/servers/mcp-licenguard/src/services/libraryDiscovery.js b/servers/mcp-licenguard/src/services/libraryDiscovery.js index 60cf8cf..1d18a3c 100644 --- a/servers/mcp-licenguard/src/services/libraryDiscovery.js +++ b/servers/mcp-licenguard/src/services/libraryDiscovery.js @@ -11,7 +11,7 @@ When searching: - Maven/Java: inspect Maven Central/pom.xml for group/artifact, versions, license, repo. - Go: inspect go.mod and GitHub; prefer module path language (Go) over similarly named npm packages. - Rust: use crates.io/Cargo.toml for license/repo/version. -- NuGet/.NET: use nuget.org metadata or .csproj for license/repo/version. +- NuGet/.NET: use nuget.org metadata or .csproj/packages.config for license/repo/version. - Ruby: rubygems.org gem info for license/repo/version. - PHP: packagist/ composer.json for license/repo/version. - iOS: cocoapods specs for license/repo/version. @@ -59,7 +59,7 @@ If the user provides ecosystem/version as unknown or omits them, infer them wher - cocoapods → "iOS / Swift / Obj-C" - gradle → "Java / Kotlin" - go → "Go" -- Use repository metadata (and official site if present) to infer ecosystem: check GitHub/GitLab language badges, repo descriptions ("A Commander for modern Go CLI interactions"), presence of go.mod (Go), Cargo.toml (Rust), package.json (JavaScript), requirements.txt (Python), pom.xml (Java), .csproj (C#/.NET), etc. Prefer the repository’s language over similarly named packages in other ecosystems. +- Use repository metadata (and official site if present) to infer ecosystem: check GitHub/GitLab language badges, repo descriptions ("A Commander for modern Go CLI interactions"), presence of go.mod (Go), Cargo.toml (Rust), package.json (JavaScript), requirements.txt (Python), pom.xml (Java), .csproj/packages.config (C#/.NET), etc. Prefer the repository’s language over similarly named packages in other ecosystems. - If the user passes a module path with a slash (e.g., "spf13/cobra") or a repo/module already indicates a language (e.g., go.mod / Cargo.toml), keep the original name (do not drop the owner/namespace) and set ecosystem accordingly (e.g., "Go"). Avoid renaming/shortening package names; preserve the user-provided name unless authoritative evidence shows it is incorrect. - If repository URL is found and officialSite is empty/unknown, set officialSite to repository URL. - When repo/site content clearly shows a language/framework (e.g., go.mod + "Go CLI" in README, Maven pom.xml, Cargo.toml, package.json), trust that signal over similarly named packages in other ecosystems. Describe the project in that language’s context (e.g., a Go repo should not be described as JavaScript). Preserve the full module name (including owner/namespace) provided by the user. From 0199236227ae1384fd45bafbedd2807c64f5822f Mon Sep 17 00:00:00 2001 From: "murat.oguz" Date: Thu, 4 Dec 2025 17:31:39 +0300 Subject: [PATCH 3/4] Remove deprecated scanRepository function and clean up related code in library_view --- apps/web/src/api/client.js | 16 --- backend/app/views/library_view.py | 163 ++++++++---------------------- 2 files changed, 44 insertions(+), 135 deletions(-) diff --git a/apps/web/src/api/client.js b/apps/web/src/api/client.js index 9ae71cb..a586d31 100644 --- a/apps/web/src/api/client.js +++ b/apps/web/src/api/client.js @@ -69,22 +69,6 @@ export async function listRepositoryPackages({ root, url } = {}) { return res.json(); } -// Backwards-compatible scanRepository: accepts string url or object { url } | { root } -export async function scanRepository(input) { - let body; - if (typeof input === 'string') body = JSON.stringify({ url: input }); - else body = JSON.stringify(input || {}); - const res = await fetch(`${API_BASE}/libraries/repositories/scan`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body, - }); - if (!res.ok) { - const message = await res.text(); - throw new Error(message || `Request failed with status ${res.status}`); - } - return res.json(); -} export async function analyzeFileUpload(file) { const form = new FormData(); diff --git a/backend/app/views/library_view.py b/backend/app/views/library_view.py index bc4fad2..f328aa4 100644 --- a/backend/app/views/library_view.py +++ b/backend/app/views/library_view.py @@ -235,82 +235,6 @@ def get_comp(vm, key): } return enriched -# TODO: buna gerek kalmadı galiba -# async def perform_repo_scan(repo_url: str | None, client, root: str | None = None) -> Dict[str, Any]: -# """ -# Clone and scan a repository, enrich dependencies using Mongo/MCP, and persist MCP hits. -# Returns analyzed files and a deduplicated dependency list. -# """ -# # If a root path is provided, use it (do not re-clone). Otherwise clone. -# tmpdir = None -# created_tmp = False - -# if root: -# tmpdir = root -# else: -# if not repo_url: -# raise HTTPException(status_code=400, detail='url is required when no root is provided') -# try: -# tmpdir = clone_repository(repo_url) -# created_tmp = True -# except Exception as error: -# raise HTTPException(status_code=502, detail=f'Repo clone failed: {error}') - -# try: -# scan_result = scan_repository(tmpdir) -# except Exception as error: -# raise HTTPException(status_code=502, detail=f'Repo scan failed: {error}') - -# analyzed_files: List[Dict[str, Any]] = [] -# resolved_index: Dict[tuple, Dict[str, Any]] = {} -# tmpdir = scan_result.get('root') -# try: -# for relpath in scan_result.get('files', []): -# full_path = os.path.join(scan_result['root'], relpath) -# try: -# with open(full_path, 'r', encoding='utf-8', errors='ignore') as fh: -# content = fh.read() -# report = await client.analyze_file({"filename": relpath, "content": content}) or {} -# except Exception as exc: -# report = {"error": str(exc), "dependencies": []} - -# enriched_deps = [] -# for dep in report.get('dependencies', []) or []: -# enriched = await resolve_dependency_entry(dep, relpath, report) -# key = (enriched['name'].lower(), normalize_version(enriched.get('version')).lower()) -# existing = resolved_index.get(key) -# if existing: -# sources = set(existing.get('sources', [])) | set(enriched.get('sources', [])) -# existing['sources'] = sorted(sources) -# if existing.get('risk_score') is None and enriched.get('risk_score') is not None: -# existing['risk_score'] = enriched.get('risk_score') -# existing['risk_level'] = enriched.get('risk_level') -# if not existing.get('risk_score_explanation') and enriched.get('risk_score_explanation'): -# existing['risk_score_explanation'] = enriched.get('risk_score_explanation') -# for key in ("license_risk_score", "security_risk_score", "maintenance_risk_score", "usage_context_risk_score"): -# if existing.get(key) is None and enriched.get(key) is not None: -# existing[key] = enriched.get(key) -# if not existing.get('library_id') and enriched.get('library_id'): -# existing['library_id'] = enriched.get('library_id') -# if not existing.get('repository_url') and enriched.get('repository_url'): -# existing['repository_url'] = enriched.get('repository_url') -# else: -# resolved_index[key] = enriched -# enriched_deps.append({**enriched}) - -# report['dependencies'] = enriched_deps -# analyzed_files.append({"path": relpath, "report": report}) -# finally: -# if created_tmp and tmpdir: -# shutil.rmtree(tmpdir, ignore_errors=True) - -# dependencies = [] -# for dep in resolved_index.values(): -# dep.pop('_key', None) -# dependencies.append(dep) - -# return {"analyzed_files": analyzed_files, "dependencies": dependencies} - @router.get('/', response_model=List[LibraryDocument]) async def handle_list_libraries(limit: int = Query(50, ge=1, le=500, description='Max items to return')): @@ -360,46 +284,6 @@ async def handle_analyze_file(file: UploadFile = File(...)): return {"file": file.filename, **(report or {})} -@router.post('/repositories/scan') -async def handle_repo_scan(payload: dict): - repo_url = payload.get('url') - root = payload.get('root') - if not repo_url and not root: - raise HTTPException(status_code=400, detail='url or root is required') - client = get_mcp_http_client() - if not client: - raise HTTPException(status_code=503, detail='MCP HTTP client not configured') - - scan_data = await perform_repo_scan(repo_url, client, root=root) - # Persist summarized scan to repository_scans collection - platform, repo_name = _infer_repo_meta(repo_url) - try: - payload = RepositoryScanCreate( - repository_url=repo_url, - repository_platform=platform, - repository_name=repo_name, - dependencies=[ - { - "library_path": file.get("path"), - "libraries": [ - { - "library_name": dep.get("name"), - "library_version": normalize_version(dep.get("version")) or dep.get("version") or "unknown" - } - for dep in (file.get("report", {}).get("dependencies") or []) - if dep.get("name") - ], - } - for file in scan_data.get("analyzed_files", []) - ], - ) - await create_repository_scan(payload) - except Exception as exc: - # Do not block response; just log. - print(f'{datetime.utcnow().isoformat()} [repo_scan] failed to persist scan: {exc}') - - return {"url": repo_url, "files": scan_data["analyzed_files"], "dependencies": scan_data["dependencies"]} - @router.post('/repositories/clone') async def handle_repo_clone(payload: dict): @@ -467,9 +351,50 @@ async def handle_repo_scan_highest_risk(payload: dict): if not client: raise HTTPException(status_code=503, detail='MCP HTTP client not configured') - scan_data = await perform_repo_scan(repo_url, client) - dependencies = scan_data["dependencies"] - analyzed_files = scan_data["analyzed_files"] + # Use existing route handlers to support both UI and CI flows: + # - clone the repository (handle_repo_clone) + # - list packages for the cloned repo (handle_repo_list_packages) + # Then enrich each dependency using `resolve_dependency_entry`. + try: + clone_res = await handle_repo_clone({"url": repo_url}) + # clone_res should contain `root` and `files` + root = clone_res.get("root") + scanned_files = clone_res.get("files") or [] + + # If clone returned no files, try listing packages explicitly + if not scanned_files and root: + list_res = await handle_repo_list_packages({"root": root}) + scanned_files = list_res.get("files") or [] + + dependencies = [] + analyzed_files = [] + + for f in scanned_files: + relpath = f.get("path") + report = f.get("report") or {} + # collect analyzed file summary + analyzed_files.append({"path": relpath, "report": report}) + + deps = Array = report.get("dependencies") if isinstance(report.get("dependencies"), list) else [] + for dep in deps: + try: + enriched = await resolve_dependency_entry(dep, relpath, report) + except Exception: + # fallback: still include minimal dep info + enriched = { + "name": dep.get("name"), + "version": dep.get("version"), + "ecosystem": dep.get("ecosystem") or report.get("ecosystem"), + "sources": [relpath] + } + # attach source file info + enriched["file"] = relpath + dependencies.append(enriched) + except HTTPException: + # propagate HTTP errors as-is + raise + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) # Persist summarized scan to repository_scans collection platform, repo_name = _infer_repo_meta(repo_url) From c8f20dbd14e40c6552e8f2a54231541bfe3d9997 Mon Sep 17 00:00:00 2001 From: "murat.oguz" Date: Thu, 4 Dec 2025 18:28:10 +0300 Subject: [PATCH 4/4] Add support for parsing .csproj files and enhance repo URL validation --- backend/app/services/file_analyzer.py | 123 ++++++++++++++++++-------- backend/app/services/repo_scanner.py | 9 +- backend/app/views/library_view.py | 16 +++- 3 files changed, 102 insertions(+), 46 deletions(-) diff --git a/backend/app/services/file_analyzer.py b/backend/app/services/file_analyzer.py index 3379678..75aae9d 100644 --- a/backend/app/services/file_analyzer.py +++ b/backend/app/services/file_analyzer.py @@ -30,48 +30,95 @@ def parse_package_json(text: str) -> List[Dict[str, Any]]: deps.append({"name": name, "version": version}) return deps +def parse_csproj_file(content: str) -> List[Dict[str, Any]]: + """Parse a .csproj file content and extract PackageReference entries. -def parse_requirements(text: str) -> List[Dict[str, Any]]: + Returns a list of dicts with keys: `name` and `version` (version may be None). + Handles both attribute-style (``) + and nested `` child elements. Works with XML namespaces. + """ + try: + root = ET.fromstring(content) + except ET.ParseError: + return [] + + deps: List[Dict[str, Any]] = [] + + def local_name(tag: str) -> str: + return tag.split('}')[-1] if '}' in tag else tag + + # Iterate through all elements and find PackageReference nodes + for elem in root.iter(): + if local_name(elem.tag) != 'PackageReference': + continue + + # Name can be in Include or Update attribute + name = elem.get('Include') or elem.get('Update') + + # Version can be an attribute or a child element + version = elem.get('Version') + if version is None: + for child in list(elem): + if local_name(child.tag) == 'Version' and (child.text or '').strip(): + version = (child.text or '').strip() + break + + if name: + deps.append({"name": name, "version": version}) + + return deps + +def parse_requirements(text: str, filename: str="") -> List[Dict[str, Any]]: # Support two common formats: # 1) pip-style requirements (lines with optional ==version) # 2) NuGet packages.config XML text_stripped = text.strip() - # Heuristic: if it looks like XML and contains , parse as packages.config - if text_stripped.startswith(" - for pkg in root.findall('.//package'): - name = pkg.get('id') or pkg.get('Id') or pkg.get('name') - version = pkg.get('version') or pkg.get('Version') - if name: - deps.append({"name": name, "version": version}) - except ET.ParseError: - # Fall back to pip-style parsing if XML parsing fails - deps = [] - for raw in text.splitlines(): - line = raw.strip() - if not line or line.startswith("#"): - continue - if "==" in line: - name, ver = line.split("==", 1) - deps.append({"name": name.strip(), "version": ver.strip()}) - else: - deps.append({"name": line, "version": None}) - return deps - - # Default: pip-style requirements - deps = [] - for raw in text.splitlines(): - line = raw.strip() - if not line or line.startswith("#"): - continue - if "==" in line: - name, ver = line.split("==", 1) - deps.append({"name": name.strip(), "version": ver.strip()}) - else: - deps.append({"name": line, "version": None}) + deps: List[Dict[str, Any]] = [] + + if filename and filename.lower().endswith(".csproj"): + deps = parse_csproj_file(content=text_stripped) + elif (filename and filename.lower().find("packages.config") != -1): + return parse_packages_config(text) + else: + # Default: pip-style requirements + deps = [] + for raw in text.splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + if "==" in line: + name, ver = line.split("==", 1) + deps.append({"name": name.strip(), "version": ver.strip()}) + else: + deps.append({"name": line, "version": None}) + return deps + + +def parse_packages_config(text: str) -> List[Dict[str, Any]]: + """Parse a NuGet `packages.config` XML or fall back to pip-style lines. + + Returns list of {name, version}. + """ + deps: List[Dict[str, Any]] = [] + try: + root = ET.fromstring(text) + # Handle + for pkg in root.findall('.//package'): + name = pkg.get('id') or pkg.get('Id') or pkg.get('name') + version = pkg.get('version') or pkg.get('Version') + if name: + deps.append({"name": name, "version": version}) + except ET.ParseError: + # Fall back to pip-style parsing if XML parsing fails + for raw in text.splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + if "==" in line: + name, ver = line.split("==", 1) + deps.append({"name": name.strip(), "version": ver.strip()}) + else: + deps.append({"name": line, "version": None}) return deps @@ -88,7 +135,7 @@ def analyze_file(filename: str, content: str) -> Dict[str, Any]: elif manager == "maven": result["ecosystem"] = "maven" elif manager == "nuget": - result["dependencies"] = parse_requirements(content) + result["dependencies"] = parse_requirements(content, filename=filename) result["ecosystem"] = "nuget" else: result["ecosystem"] = "unknown" diff --git a/backend/app/services/repo_scanner.py b/backend/app/services/repo_scanner.py index 33818d2..8aebb99 100644 --- a/backend/app/services/repo_scanner.py +++ b/backend/app/services/repo_scanner.py @@ -18,8 +18,7 @@ "pom.xml", "build.gradle", "build.gradle.kts", # .NET "packages.config", - # TODO: Disable until .csproj support is added - # "csproj", + ".csproj", # Go "go.mod", "vendor/modules.txt", } @@ -27,9 +26,9 @@ def is_dependency_file(filename: str) -> bool: lower = filename.lower() - # TODO: Disable until .csproj support is added - # if lower.endswith(".csproj"): - # return True + + if lower.endswith(".csproj"): + return True return lower in DEP_FILES diff --git a/backend/app/views/library_view.py b/backend/app/views/library_view.py index f328aa4..541abd6 100644 --- a/backend/app/views/library_view.py +++ b/backend/app/views/library_view.py @@ -294,6 +294,10 @@ async def handle_repo_clone(payload: dict): repo_url = payload.get('url') if not repo_url: raise HTTPException(status_code=400, detail='url is required') + if not isinstance(repo_url, str): + raise HTTPException(status_code=400, detail='url must be a string') + # ensure static type checkers see a plain str + repo_url = str(repo_url) try: root = clone_repository(repo_url) @@ -329,9 +333,12 @@ async def handle_repo_list_packages(payload: dict): summaries = list_repository_packages(root) return {"root": root, "files": summaries} # else clone then list - root = clone_repository(repo_url) - summaries = list_repository_packages(root) - return {"url": repo_url, "root": root, "files": summaries} + elif repo_url: + if not isinstance(repo_url, str): + raise HTTPException(status_code=400, detail='url must be a string') + root = clone_repository(str(repo_url)) + summaries = list_repository_packages(root) + return {"url": repo_url, "root": root, "files": summaries} except Exception as error: raise HTTPException(status_code=500, detail=str(error)) @@ -346,6 +353,9 @@ async def handle_repo_scan_highest_risk(payload: dict): repo_url = payload.get('url') if not repo_url: raise HTTPException(status_code=400, detail='url is required') + if not isinstance(repo_url, str): + raise HTTPException(status_code=400, detail='url must be a string') + repo_url = str(repo_url) client = get_mcp_http_client() if not client: