diff --git a/deployment/config/java-shared/application.properties.sample b/deployment/config/java-shared/application.properties.sample index 765c71cd..3f794130 100644 --- a/deployment/config/java-shared/application.properties.sample +++ b/deployment/config/java-shared/application.properties.sample @@ -232,6 +232,7 @@ llm.sync.anthropic.api-key= #RAG codecrow.rag.api.url=http://host.docker.internal:8001 codecrow.rag.api.enabled=true +codecrow.rag.api.secret=change-me-to-a-random-secret # RAG API timeouts (in seconds) codecrow.rag.api.timeout.connect=30 codecrow.rag.api.timeout.read=120 diff --git a/deployment/config/mcp-client/.env.sample b/deployment/config/mcp-client/.env.sample index 05066c29..43168803 100644 --- a/deployment/config/mcp-client/.env.sample +++ b/deployment/config/mcp-client/.env.sample @@ -6,6 +6,7 @@ RAG_API_URL=http://host.docker.internal:8001 # Shared secret for authenticating requests between internal services. # Must match the SERVICE_SECRET configured on rag-pipeline. # Leave empty to disable auth (dev mode only). +# IMPORTANT: Avoid $ { } characters in the secret — they can cause dotenv parsing issues. SERVICE_SECRET=change-me-to-a-random-secret # === Concurrency === diff --git a/deployment/config/rag-pipeline/.env.sample b/deployment/config/rag-pipeline/.env.sample index 1152a685..67003ea4 100644 --- a/deployment/config/rag-pipeline/.env.sample +++ b/deployment/config/rag-pipeline/.env.sample @@ -2,6 +2,7 @@ # Shared secret for authenticating incoming requests from mcp-client. # Must match the SERVICE_SECRET configured on mcp-client. # Leave empty to disable auth (dev mode only). +# IMPORTANT: Avoid $ { } characters in the secret — they can cause dotenv parsing issues. SERVICE_SECRET=change-me-to-a-random-secret # === Path Traversal Guard === diff --git a/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessor.java b/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessor.java index 80f54fd0..41979a48 100644 --- a/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessor.java +++ b/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessor.java @@ -224,10 +224,10 @@ public Map process(BranchProcessRequest request, Consumer existingFiles = updateBranchFiles(changedFiles, project, request.getTargetBranchName()); Branch branch = createOrUpdateProjectBranch(project, request); - mapCodeAnalysisIssuesToBranch(changedFiles, branch, project); + mapCodeAnalysisIssuesToBranch(changedFiles, existingFiles, branch, project); // Always update branch issue counts after mapping (even on first analysis) // Previously this was only done in reanalyzeCandidateIssues() which could be skipped @@ -282,10 +282,15 @@ public Set parseFilePathsFromDiff(String rawDiff) { return files; } - private void updateBranchFiles(Set changedFiles, Project project, String branchName) { + /** + * Updates branch file records for changed files. + * @return the set of file paths confirmed to exist in the branch (used to avoid redundant API calls) + */ + private Set updateBranchFiles(Set changedFiles, Project project, String branchName) { VcsInfo vcsInfo = getVcsInfo(project); EVcsProvider provider = getVcsProvider(project); VcsOperationsService operationsService = vcsServiceFactory.getOperationsService(provider); + Set filesExistingInBranch = new HashSet<>(); for (String filePath : changedFiles) { try { @@ -303,9 +308,12 @@ private void updateBranchFiles(Set changedFiles, Project project, String log.debug("Skipping file {} - does not exist in branch {}", filePath, branchName); continue; } + filesExistingInBranch.add(filePath); } catch (Exception e) { log.warn("Failed to check file existence for {} in branch {}: {}. Proceeding anyway.", filePath, branchName, e.getMessage()); + // On error, assume the file exists so we don't skip it + filesExistingInBranch.add(filePath); } List relatedIssues = codeAnalysisIssueRepository @@ -314,7 +322,14 @@ private void updateBranchFiles(Set changedFiles, Project project, String .filter(issue -> branchName.equals(issue.getAnalysis().getBranchName()) || branchName.equals(issue.getAnalysis().getSourceBranchName())) .toList(); - long unresolvedCount = branchSpecific.stream().filter(i -> !i.isResolved()).count(); + + // Deduplicate by content key before counting — multiple analyses may + // report the same logical issue with different DB ids + Set seenKeys = new HashSet<>(); + long unresolvedCount = branchSpecific.stream() + .filter(i -> !i.isResolved()) + .filter(i -> seenKeys.add(buildIssueContentKey(i))) + .count(); Optional projectFileOptional = branchFileRepository .findByProjectIdAndBranchNameAndFilePath(project.getId(), branchName, filePath); @@ -333,6 +348,7 @@ private void updateBranchFiles(Set changedFiles, Project project, String branchFileRepository.save(branchFile); } } + return filesExistingInBranch; } private Branch createOrUpdateProjectBranch(Project project, BranchProcessRequest request) { @@ -348,31 +364,14 @@ private Branch createOrUpdateProjectBranch(Project project, BranchProcessRequest return branchRepository.save(branch); } - private void mapCodeAnalysisIssuesToBranch(Set changedFiles, Branch branch, Project project) { - VcsInfo vcsInfo = getVcsInfo(project); - EVcsProvider provider = getVcsProvider(project); - VcsOperationsService operationsService = vcsServiceFactory.getOperationsService(provider); - + private void mapCodeAnalysisIssuesToBranch(Set changedFiles, Set filesExistingInBranch, + Branch branch, Project project) { for (String filePath : changedFiles) { - try { - OkHttpClient client = vcsClientProvider.getHttpClient(vcsInfo.vcsConnection()); - - boolean fileExistsInBranch = operationsService.checkFileExistsInBranch( - client, - vcsInfo.workspace(), - vcsInfo.repoSlug(), - branch.getBranchName(), - filePath - ); - - if (!fileExistsInBranch) { - log.debug("Skipping issue mapping for file {} - does not exist in branch {}", + // Use cached file existence from updateBranchFiles to avoid redundant API calls + if (!filesExistingInBranch.contains(filePath)) { + log.debug("Skipping issue mapping for file {} - does not exist in branch {} (cached)", filePath, branch.getBranchName()); - continue; - } - } catch (Exception e) { - log.warn("Failed to check file existence for {} in branch {}: {}. Proceeding with mapping.", - filePath, branch.getBranchName(), e.getMessage()); + continue; } List allIssues = codeAnalysisIssueRepository.findByProjectIdAndFilePath(project.getId(), filePath); @@ -387,27 +386,67 @@ private void mapCodeAnalysisIssuesToBranch(Set changedFiles, Branch bran }) .toList(); + // Content-based deduplication: build a map of existing BranchIssues by content key + // to prevent the same logical issue from being linked multiple times across analyses. + // Key = "lineNumber:severity:category" — unique enough within a single file context. + List existingBranchIssues = branchIssueRepository + .findUnresolvedByBranchIdAndFilePath(branch.getId(), filePath); + Map contentKeyMap = new HashMap<>(); + for (BranchIssue bi : existingBranchIssues) { + String key = buildIssueContentKey(bi.getCodeAnalysisIssue()); + contentKeyMap.putIfAbsent(key, bi); + } + + int skipped = 0; for (CodeAnalysisIssue issue : branchSpecificIssues) { + // Tier 1: exact ID match — same CodeAnalysisIssue already linked Optional existing = branchIssueRepository .findByBranchIdAndCodeAnalysisIssueId(branch.getId(), issue.getId()); - BranchIssue bc; + if (existing.isPresent()) { - bc = existing.get(); - bc.setSeverity(issue.getSeverity()); - branchIssueRepository.saveAndFlush(bc); - } else { - bc = new BranchIssue(); - bc.setBranch(branch); - bc.setCodeAnalysisIssue(issue); - bc.setResolved(issue.isResolved()); + BranchIssue bc = existing.get(); bc.setSeverity(issue.getSeverity()); - bc.setFirstDetectedPrNumber(issue.getAnalysis() != null ? issue.getAnalysis().getPrNumber() : null); branchIssueRepository.saveAndFlush(bc); + continue; } + + // Tier 2: content-based dedup — same logical issue from a different analysis + String contentKey = buildIssueContentKey(issue); + if (contentKeyMap.containsKey(contentKey)) { + skipped++; + continue; + } + + // No match — create new BranchIssue + BranchIssue bc = new BranchIssue(); + bc.setBranch(branch); + bc.setCodeAnalysisIssue(issue); + bc.setResolved(issue.isResolved()); + bc.setSeverity(issue.getSeverity()); + bc.setFirstDetectedPrNumber(issue.getAnalysis() != null ? issue.getAnalysis().getPrNumber() : null); + branchIssueRepository.saveAndFlush(bc); + // Register in map so subsequent issues in this batch also dedup + contentKeyMap.put(contentKey, bc); + } + + if (skipped > 0) { + log.debug("Skipped {} duplicate issue(s) for file {} in branch {}", + skipped, filePath, branch.getBranchName()); } } } + /** + * Builds a content key for deduplication of branch issues. + * Two CodeAnalysisIssue records with the same key represent the same logical issue. + */ + private String buildIssueContentKey(CodeAnalysisIssue issue) { + return issue.getFilePath() + ":" + + issue.getLineNumber() + ":" + + issue.getSeverity() + ":" + + issue.getIssueCategory(); + } + private void reanalyzeCandidateIssues(Set changedFiles, Branch branch, Project project, BranchProcessRequest request, Consumer> consumer) { List candidateBranchIssues = new ArrayList<>(); for (String filePath : changedFiles) { diff --git a/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessor.java b/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessor.java index 5c7783c0..052a8585 100644 --- a/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessor.java +++ b/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessor.java @@ -33,6 +33,8 @@ import java.util.Map; import java.util.Optional; +import org.rostilos.codecrow.analysisengine.util.DiffFingerprintUtil; + /** * Generic service that handles pull request analysis. * Uses VCS-specific services via VcsServiceFactory for provider-specific operations. @@ -151,6 +153,29 @@ public Map process( return Map.of("status", "cached", "cached", true); } + // --- Fallback cache: same commit hash, any PR number (handles close/reopen) --- + Optional commitHashHit = codeAnalysisService.getAnalysisByCommitHash( + project.getId(), request.getCommitHash()); + if (commitHashHit.isPresent()) { + log.info("Commit-hash cache hit for project={}, commit={} (source PR={}). Cloning for PR={}.", + project.getId(), request.getCommitHash(), + commitHashHit.get().getPrNumber(), request.getPullRequestId()); + CodeAnalysis cloned = codeAnalysisService.cloneAnalysisForPr( + commitHashHit.get(), project, request.getPullRequestId(), + request.getCommitHash(), request.getTargetBranchName(), + request.getSourceBranchName(), commitHashHit.get().getDiffFingerprint()); + try { + reportingService.postAnalysisResults(cloned, project, + request.getPullRequestId(), pullRequest.getId(), + request.getPlaceholderCommentId()); + } catch (IOException e) { + log.error("Failed to post commit-hash cached results to VCS: {}", e.getMessage(), e); + } + publishAnalysisCompletedEvent(project, request, correlationId, startTime, + AnalysisCompletedEvent.CompletionStatus.SUCCESS, 0, 0, null); + return Map.of("status", "cached_by_commit", "cached", true); + } + // Get all previous analyses for this PR to provide full issue history to AI List allPrAnalyses = codeAnalysisService.getAllPrAnalyses( project.getId(), @@ -170,6 +195,34 @@ public Map process( AiAnalysisRequest aiRequest = aiClientService.buildAiAnalysisRequest( project, request, previousAnalysis, allPrAnalyses); + // --- Diff fingerprint cache: same code changes, different PR/commit --- + String diffFingerprint = DiffFingerprintUtil.compute(aiRequest.getRawDiff()); + if (diffFingerprint != null) { + Optional fingerprintHit = codeAnalysisService.getAnalysisByDiffFingerprint( + project.getId(), diffFingerprint); + if (fingerprintHit.isPresent()) { + log.info("Diff fingerprint cache hit for project={}, fingerprint={} (source PR={}). Cloning for PR={}.", + project.getId(), diffFingerprint.substring(0, 8) + "...", + fingerprintHit.get().getPrNumber(), request.getPullRequestId()); + // TODO: Option B — LIGHTWEIGHT mode: instead of full clone, reuse Stage 1 issues + // but re-run Stage 2 cross-file analysis against the new target branch context. + CodeAnalysis cloned = codeAnalysisService.cloneAnalysisForPr( + fingerprintHit.get(), project, request.getPullRequestId(), + request.getCommitHash(), request.getTargetBranchName(), + request.getSourceBranchName(), diffFingerprint); + try { + reportingService.postAnalysisResults(cloned, project, + request.getPullRequestId(), pullRequest.getId(), + request.getPlaceholderCommentId()); + } catch (IOException e) { + log.error("Failed to post fingerprint-cached results to VCS: {}", e.getMessage(), e); + } + publishAnalysisCompletedEvent(project, request, correlationId, startTime, + AnalysisCompletedEvent.CompletionStatus.SUCCESS, 0, 0, null); + return Map.of("status", "cached_by_fingerprint", "cached", true); + } + } + Map aiResponse = aiAnalysisClient.performAnalysis(aiRequest, event -> { try { log.debug("Received event from AI client: type={}", event.get("type")); @@ -188,7 +241,8 @@ public Map process( request.getSourceBranchName(), request.getCommitHash(), request.getPrAuthorId(), - request.getPrAuthorUsername() + request.getPrAuthorUsername(), + diffFingerprint ); int issuesFound = newAnalysis.getIssues() != null ? newAnalysis.getIssues().size() : 0; diff --git a/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/util/DiffFingerprintUtil.java b/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/util/DiffFingerprintUtil.java new file mode 100644 index 00000000..6f8a381e --- /dev/null +++ b/java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/util/DiffFingerprintUtil.java @@ -0,0 +1,102 @@ +package org.rostilos.codecrow.analysisengine.util; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Computes a content-based fingerprint of a unified diff. + *

+ * Only actual change lines ({@code +} / {@code -}) are included — context lines, + * hunk headers ({@code @@}), and file headers ({@code +++} / {@code ---} / {@code diff --git}) + * are excluded. The change lines are sorted to make the fingerprint stable regardless + * of file ordering within the diff. + *

+ * This allows detecting that two PRs carry the same code changes even if they target + * different branches (different merge-base → different context/hunk headers). + */ +public final class DiffFingerprintUtil { + + private DiffFingerprintUtil() { /* utility */ } + + /** + * Compute a SHA-256 hex digest of the normalised change lines in the given diff. + * + * @param rawDiff the filtered unified diff (may be {@code null} or empty) + * @return 64-char lowercase hex string, or {@code null} if the diff is blank + */ + public static String compute(String rawDiff) { + if (rawDiff == null || rawDiff.isBlank()) { + return null; + } + + List changeLines = extractChangeLines(rawDiff); + if (changeLines.isEmpty()) { + return null; + } + + // Sort for stability across different file orderings + Collections.sort(changeLines); + + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + for (String line : changeLines) { + digest.update(line.getBytes(StandardCharsets.UTF_8)); + digest.update((byte) '\n'); + } + return bytesToHex(digest.digest()); + } catch (NoSuchAlgorithmException e) { + // SHA-256 is guaranteed by the JVM spec — should never happen + throw new IllegalStateException("SHA-256 not available", e); + } + } + + /** + * Extract only the actual change lines from a unified diff. + * A "change line" starts with exactly one {@code +} or {@code -} and is NOT + * a file header ({@code +++}, {@code ---}) or a diff metadata line. + */ + private static List extractChangeLines(String diff) { + List lines = new ArrayList<>(); + // Normalise line endings + String normalised = diff.replace("\r\n", "\n").replace("\r", "\n"); + for (String raw : normalised.split("\n")) { + String line = trimTrailingWhitespace(raw); + if (line.isEmpty()) { + continue; + } + char first = line.charAt(0); + if (first != '+' && first != '-') { + continue; + } + // Skip file-level headers: "+++", "---", "diff --git" + if (line.startsWith("+++") || line.startsWith("---")) { + continue; + } + if (line.startsWith("diff ")) { + continue; + } + lines.add(line); + } + return lines; + } + + private static String trimTrailingWhitespace(String s) { + int end = s.length(); + while (end > 0 && Character.isWhitespace(s.charAt(end - 1))) { + end--; + } + return s.substring(0, end); + } + + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + sb.append(String.format("%02x", b & 0xff)); + } + return sb.toString(); + } +} diff --git a/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessorTest.java b/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessorTest.java index c38168d3..6e7d5bac 100644 --- a/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessorTest.java +++ b/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/BranchAnalysisProcessorTest.java @@ -28,7 +28,6 @@ import org.rostilos.codecrow.core.persistence.repository.branch.BranchRepository; import org.rostilos.codecrow.core.persistence.repository.codeanalysis.CodeAnalysisIssueRepository; import org.rostilos.codecrow.vcsclient.VcsClientProvider; -import org.springframework.context.ApplicationEventPublisher; import java.io.IOException; import java.util.*; @@ -73,9 +72,6 @@ class BranchAnalysisProcessorTest { @Mock private RagOperationsService ragOperationsService; - @Mock - private ApplicationEventPublisher eventPublisher; - @Mock private VcsOperationsService operationsService; @@ -255,14 +251,14 @@ void shouldThrowAnalysisLockedExceptionWhenLockCannotBeAcquired() throws IOExcep when(projectService.getProjectWithConnections(1L)).thenReturn(project); when(project.getId()).thenReturn(1L); - when(project.getName()).thenReturn("Test Project"); when(analysisLockService.acquireLockWithWait(any(), anyString(), any(), anyString(), any(), any())) .thenReturn(Optional.empty()); assertThatThrownBy(() -> processor.process(request, consumer)) .isInstanceOf(AnalysisLockedException.class); - verify(eventPublisher, times(2)).publishEvent(any()); + // No consumer or event interactions should occur when lock is not acquired + verifyNoInteractions(consumer); } // Note: Full process() integration tests are complex and require extensive mocking. diff --git a/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessorTest.java b/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessorTest.java index 0b36db83..0a8c555d 100644 --- a/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessorTest.java +++ b/java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessorTest.java @@ -140,10 +140,13 @@ void shouldSuccessfullyProcessPRAnalysis() throws Exception { when(codeAnalysisService.getCodeAnalysisCache(anyLong(), anyString(), anyLong())) .thenReturn(Optional.empty()); - when(codeAnalysisService.getPreviousVersionCodeAnalysis(anyLong(), anyLong())) + when(codeAnalysisService.getAnalysisByCommitHash(anyLong(), anyString())) .thenReturn(Optional.empty()); + when(codeAnalysisService.getAllPrAnalyses(anyLong(), anyLong())) + .thenReturn(List.of()); - when(aiClientService.buildAiAnalysisRequest(any(), any(), any())).thenReturn(aiAnalysisRequest); + when(aiClientService.buildAiAnalysisRequest(any(), any(), any(), anyList())).thenReturn(aiAnalysisRequest); + when(aiAnalysisRequest.getRawDiff()).thenReturn(""); Map aiResponse = Map.of( "comment", "Review comment", @@ -151,7 +154,8 @@ void shouldSuccessfullyProcessPRAnalysis() throws Exception { ); when(aiAnalysisClient.performAnalysis(any(), any())).thenReturn(aiResponse); - when(codeAnalysisService.createAnalysisFromAiResponse(any(), any(), anyLong(), anyString(), anyString(), anyString(), any(), any())) + when(codeAnalysisService.createAnalysisFromAiResponse( + any(), any(), anyLong(), anyString(), anyString(), anyString(), any(), any(), any())) .thenReturn(codeAnalysis); Map result = processor.process(request, consumer, project); diff --git a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/model/codeanalysis/CodeAnalysis.java b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/model/codeanalysis/CodeAnalysis.java index ae4377fa..14bd145d 100644 --- a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/model/codeanalysis/CodeAnalysis.java +++ b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/model/codeanalysis/CodeAnalysis.java @@ -38,6 +38,9 @@ public class CodeAnalysis { @Column(name = "commit_hash", length = 40) private String commitHash; + @Column(name = "diff_fingerprint", length = 64) + private String diffFingerprint; + @Column(name = "target_branch_name") private String branchName; @@ -113,6 +116,9 @@ public void updateIssueCounts() { public String getCommitHash() { return commitHash; } public void setCommitHash(String commitHash) { this.commitHash = commitHash; } + public String getDiffFingerprint() { return diffFingerprint; } + public void setDiffFingerprint(String diffFingerprint) { this.diffFingerprint = diffFingerprint; } + public String getBranchName() { return branchName; } public void setBranchName(String branchName) { this.branchName = branchName; } diff --git a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/persistence/repository/codeanalysis/CodeAnalysisRepository.java b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/persistence/repository/codeanalysis/CodeAnalysisRepository.java index 27aff62d..516ca3c9 100644 --- a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/persistence/repository/codeanalysis/CodeAnalysisRepository.java +++ b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/persistence/repository/codeanalysis/CodeAnalysisRepository.java @@ -112,6 +112,46 @@ Page searchAnalyses( @Query("SELECT ca FROM CodeAnalysis ca WHERE ca.id = :id") Optional findByIdWithIssues(@Param("id") Long id); + /** + * Find the most recent ACCEPTED analysis for a project with the same diff fingerprint. + * Used for content-based cache: reuse analysis when the same code changes appear in a different PR. + */ + @org.springframework.data.jpa.repository.EntityGraph(attributePaths = { + "issues", + "project", + "project.workspace", + "project.vcsBinding", + "project.vcsBinding.vcsConnection", + "project.aiBinding" + }) + @Query("SELECT ca FROM CodeAnalysis ca WHERE ca.project.id = :projectId " + + "AND ca.diffFingerprint = :diffFingerprint " + + "AND ca.status = org.rostilos.codecrow.core.model.codeanalysis.AnalysisStatus.ACCEPTED " + + "ORDER BY ca.createdAt DESC LIMIT 1") + Optional findTopByProjectIdAndDiffFingerprint( + @Param("projectId") Long projectId, + @Param("diffFingerprint") String diffFingerprint); + + /** + * Find the most recent ACCEPTED analysis for a project + commit hash (any PR number). + * Fallback cache for close/reopen scenarios where the same commit gets a new PR number. + */ + @org.springframework.data.jpa.repository.EntityGraph(attributePaths = { + "issues", + "project", + "project.workspace", + "project.vcsBinding", + "project.vcsBinding.vcsConnection", + "project.aiBinding" + }) + @Query("SELECT ca FROM CodeAnalysis ca WHERE ca.project.id = :projectId " + + "AND ca.commitHash = :commitHash " + + "AND ca.status = org.rostilos.codecrow.core.model.codeanalysis.AnalysisStatus.ACCEPTED " + + "ORDER BY ca.createdAt DESC LIMIT 1") + Optional findTopByProjectIdAndCommitHash( + @Param("projectId") Long projectId, + @Param("commitHash") String commitHash); + /** * Find all analyses for a PR across all versions, ordered by version descending. * Used to provide LLM with full issue history including resolved issues. diff --git a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/service/CodeAnalysisService.java b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/service/CodeAnalysisService.java index d0fd4624..f34a58ea 100644 --- a/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/service/CodeAnalysisService.java +++ b/java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/service/CodeAnalysisService.java @@ -24,23 +24,20 @@ @Transactional public class CodeAnalysisService { - private final CodeAnalysisRepository analysisRepository; - private final CodeAnalysisIssueRepository issueRepository; private final CodeAnalysisRepository codeAnalysisRepository; + private final CodeAnalysisIssueRepository issueRepository; private final QualityGateRepository qualityGateRepository; private final QualityGateEvaluator qualityGateEvaluator; private static final Logger log = LoggerFactory.getLogger(CodeAnalysisService.class); @Autowired public CodeAnalysisService( - CodeAnalysisRepository analysisRepository, - CodeAnalysisIssueRepository issueRepository, CodeAnalysisRepository codeAnalysisRepository, + CodeAnalysisIssueRepository issueRepository, QualityGateRepository qualityGateRepository ) { - this.analysisRepository = analysisRepository; - this.issueRepository = issueRepository; this.codeAnalysisRepository = codeAnalysisRepository; + this.issueRepository = issueRepository; this.qualityGateRepository = qualityGateRepository; this.qualityGateEvaluator = new QualityGateEvaluator(); } @@ -54,6 +51,21 @@ public CodeAnalysis createAnalysisFromAiResponse( String commitHash, String vcsAuthorId, String vcsAuthorUsername + ) { + return createAnalysisFromAiResponse(project, analysisData, pullRequestId, + targetBranchName, sourceBranchName, commitHash, vcsAuthorId, vcsAuthorUsername, null); + } + + public CodeAnalysis createAnalysisFromAiResponse( + Project project, + Map analysisData, + Long pullRequestId, + String targetBranchName, + String sourceBranchName, + String commitHash, + String vcsAuthorId, + String vcsAuthorUsername, + String diffFingerprint ) { try { // Check if analysis already exists for this commit (handles webhook retries) @@ -74,6 +86,7 @@ public CodeAnalysis createAnalysisFromAiResponse( analysis.setBranchName(targetBranchName); analysis.setSourceBranchName(sourceBranchName); analysis.setPrVersion(previousVersion + 1); + analysis.setDiffFingerprint(diffFingerprint); return fillAnalysisData(analysis, analysisData, commitHash, vcsAuthorId, vcsAuthorUsername); } catch (Exception e) { @@ -107,11 +120,11 @@ private CodeAnalysis fillAnalysisData( Object issuesObj = analysisData.get("issues"); if (issuesObj == null) { log.warn("No issues found in analysis data"); - return analysisRepository.save(analysis); + return codeAnalysisRepository.save(analysis); } // Save analysis first to get its ID for resolution tracking - CodeAnalysis savedAnalysis = analysisRepository.save(analysis); + CodeAnalysis savedAnalysis = codeAnalysisRepository.save(analysis); Long analysisId = savedAnalysis.getId(); Long prNumber = savedAnalysis.getPrNumber(); @@ -168,17 +181,22 @@ else if (issuesObj instanceof Map) { log.info("Successfully created analysis with {} issues", savedAnalysis.getIssues().size()); - // Evaluate quality gate - QualityGate qualityGate = getQualityGateForAnalysis(savedAnalysis); - if (qualityGate != null) { - QualityGateResult qgResult = qualityGateEvaluator.evaluate(savedAnalysis, qualityGate); - savedAnalysis.setAnalysisResult(qgResult.result()); - log.info("Quality gate '{}' evaluated with result: {}", qualityGate.getName(), qgResult.result()); - } else { - log.info("No quality gate found for analysis, skipping evaluation"); + // Evaluate quality gate — wrapped defensively so a QG failure + // (e.g. detached entity, lazy-init) does not abort the entire analysis + try { + QualityGate qualityGate = getQualityGateForAnalysis(savedAnalysis); + if (qualityGate != null) { + QualityGateResult qgResult = qualityGateEvaluator.evaluate(savedAnalysis, qualityGate); + savedAnalysis.setAnalysisResult(qgResult.result()); + log.info("Quality gate '{}' evaluated with result: {}", qualityGate.getName(), qgResult.result()); + } else { + log.info("No quality gate found for analysis, skipping evaluation"); + } + } catch (Exception qgEx) { + log.warn("Quality gate evaluation failed, analysis will be saved without QG result: {}", qgEx.getMessage()); } - return analysisRepository.save(savedAnalysis); + return codeAnalysisRepository.save(savedAnalysis); } catch (Exception e) { log.error("Error creating analysis from AI response: {}", e.getMessage(), e); @@ -230,6 +248,100 @@ public Optional getCodeAnalysisCache(Long projectId, String commit return codeAnalysisRepository.findByProjectIdAndCommitHashAndPrNumber(projectId, commitHash, prNumber).stream().findFirst(); } + /** + * Fallback cache lookup by commit hash only (ignoring PR number). + * Handles close/reopen scenarios where the same commit gets a new PR number. + */ + public Optional getAnalysisByCommitHash(Long projectId, String commitHash) { + return codeAnalysisRepository.findTopByProjectIdAndCommitHash(projectId, commitHash); + } + + /** + * Content-based cache lookup by diff fingerprint. + * Handles branch-cascade flows where the same code changes appear in different PRs + * (e.g. feature→release analyzed, then release→main opens with the same changes). + */ + public Optional getAnalysisByDiffFingerprint(Long projectId, String diffFingerprint) { + if (diffFingerprint == null || diffFingerprint.isBlank()) { + return Optional.empty(); + } + return codeAnalysisRepository.findTopByProjectIdAndDiffFingerprint(projectId, diffFingerprint); + } + + /** + * Clone an existing analysis for a new PR. + * Creates a new CodeAnalysis row with cloned issues, linked to the new PR identity. + * Used when a fingerprint/commit-hash cache hit matches a different PR. + * + * // TODO: Option B — LIGHTWEIGHT mode: instead of full clone, reuse Stage 1 issues + * // but re-run Stage 2 cross-file analysis against the new target branch context. + * // This would catch interaction differences when target branches differ. + * + * // TODO: Consider tracking storage growth from cloned analyses. If it becomes significant, + * // explore referencing the original analysis instead of deep-copying issues. + */ + public CodeAnalysis cloneAnalysisForPr( + CodeAnalysis source, + Project project, + Long newPrNumber, + String commitHash, + String targetBranchName, + String sourceBranchName, + String diffFingerprint + ) { + // Guard against duplicates (same idempotency check as createAnalysisFromAiResponse) + Optional existing = codeAnalysisRepository + .findByProjectIdAndCommitHashAndPrNumber(project.getId(), commitHash, newPrNumber); + if (existing.isPresent()) { + log.info("Cloned analysis already exists for project={}, commit={}, pr={}. Returning existing.", + project.getId(), commitHash, newPrNumber); + return existing.get(); + } + + int previousVersion = codeAnalysisRepository.findMaxPrVersion(project.getId(), newPrNumber).orElse(0); + + CodeAnalysis clone = new CodeAnalysis(); + clone.setProject(project); + clone.setAnalysisType(source.getAnalysisType()); + clone.setPrNumber(newPrNumber); + clone.setCommitHash(commitHash); + clone.setDiffFingerprint(diffFingerprint); + clone.setBranchName(targetBranchName); + clone.setSourceBranchName(sourceBranchName); + clone.setComment(source.getComment()); + clone.setStatus(source.getStatus()); + clone.setAnalysisResult(source.getAnalysisResult()); + clone.setPrVersion(previousVersion + 1); + + // Save first to get an ID + CodeAnalysis saved = codeAnalysisRepository.save(clone); + + // Deep-copy issues + for (CodeAnalysisIssue srcIssue : source.getIssues()) { + CodeAnalysisIssue issueClone = new CodeAnalysisIssue(); + issueClone.setSeverity(srcIssue.getSeverity()); + issueClone.setFilePath(srcIssue.getFilePath()); + issueClone.setLineNumber(srcIssue.getLineNumber()); + issueClone.setReason(srcIssue.getReason()); + issueClone.setSuggestedFixDescription(srcIssue.getSuggestedFixDescription()); + issueClone.setSuggestedFixDiff(srcIssue.getSuggestedFixDiff()); + issueClone.setIssueCategory(srcIssue.getIssueCategory()); + issueClone.setResolved(srcIssue.isResolved()); + issueClone.setResolvedDescription(srcIssue.getResolvedDescription()); + issueClone.setVcsAuthorId(srcIssue.getVcsAuthorId()); + issueClone.setVcsAuthorUsername(srcIssue.getVcsAuthorUsername()); + saved.addIssue(issueClone); + } + + saved.updateIssueCounts(); + CodeAnalysis result = codeAnalysisRepository.save(saved); + log.info("Cloned analysis {} → {} for PR {} (fingerprint={}, {} issues)", + source.getId(), result.getId(), newPrNumber, + diffFingerprint != null ? diffFingerprint.substring(0, 8) + "..." : "null", + result.getIssues().size()); + return result; + } + public Optional getPreviousVersionCodeAnalysis(Long projectId, Long prNumber) { return codeAnalysisRepository.findByProjectIdAndPrNumberWithMaxPrVersion(projectId, prNumber); } @@ -400,40 +512,40 @@ public CodeAnalysis createAnalysis(Project project, AnalysisType analysisType) { analysis.setProject(project); analysis.setAnalysisType(analysisType); analysis.setStatus(AnalysisStatus.PENDING); - return analysisRepository.save(analysis); + return codeAnalysisRepository.save(analysis); } public CodeAnalysis saveAnalysis(CodeAnalysis analysis) { analysis.updateIssueCounts(); - return analysisRepository.save(analysis); + return codeAnalysisRepository.save(analysis); } public Optional findById(Long id) { - return analysisRepository.findById(id); + return codeAnalysisRepository.findById(id); } public List findByProjectId(Long projectId) { - return analysisRepository.findByProjectIdOrderByCreatedAtDesc(projectId); + return codeAnalysisRepository.findByProjectIdOrderByCreatedAtDesc(projectId); } public List findByProjectIdAndType(Long projectId, AnalysisType analysisType) { - return analysisRepository.findByProjectIdAndAnalysisTypeOrderByCreatedAtDesc(projectId, analysisType); + return codeAnalysisRepository.findByProjectIdAndAnalysisTypeOrderByCreatedAtDesc(projectId, analysisType); } public Optional findByProjectIdAndPrNumber(Long projectId, Long prNumber) { - return analysisRepository.findByProjectIdAndPrNumber(projectId, prNumber); + return codeAnalysisRepository.findByProjectIdAndPrNumber(projectId, prNumber); } public Optional findByProjectIdAndPrNumberAndPrVersion(Long projectId, Long prNumber, int prVersion) { - return analysisRepository.findByProjectIdAndPrNumberAndPrVersion(projectId, prNumber, prVersion); + return codeAnalysisRepository.findByProjectIdAndPrNumberAndPrVersion(projectId, prNumber, prVersion); } public List findByProjectIdAndDateRange(Long projectId, OffsetDateTime startDate, OffsetDateTime endDate) { - return analysisRepository.findByProjectIdAndDateRange(projectId, startDate, endDate); + return codeAnalysisRepository.findByProjectIdAndDateRange(projectId, startDate, endDate); } public List findByProjectIdWithHighSeverityIssues(Long projectId) { - return analysisRepository.findByProjectIdWithHighSeverityIssues(projectId); + return codeAnalysisRepository.findByProjectIdWithHighSeverityIssues(projectId); } /** @@ -451,16 +563,16 @@ public org.springframework.data.domain.Page searchAnalyses( Long prNumber, AnalysisStatus status, org.springframework.data.domain.Pageable pageable) { - return analysisRepository.searchAnalyses(projectId, prNumber, status, pageable); + return codeAnalysisRepository.searchAnalyses(projectId, prNumber, status, pageable); } public Optional findLatestByProjectId(Long projectId) { - return analysisRepository.findLatestByProjectId(projectId); + return codeAnalysisRepository.findLatestByProjectId(projectId); } public AnalysisStats getProjectAnalysisStats(Long projectId) { - long totalAnalyses = analysisRepository.countByProjectId(projectId); - Double avgIssues = analysisRepository.getAverageIssuesPerAnalysis(projectId); + long totalAnalyses = codeAnalysisRepository.countByProjectId(projectId); + Double avgIssues = codeAnalysisRepository.getAverageIssuesPerAnalysis(projectId); long highSeverityCount = issueRepository.countByProjectIdAndSeverity(projectId, IssueSeverity.HIGH); long mediumSeverityCount = issueRepository.countByProjectIdAndSeverity(projectId, IssueSeverity.MEDIUM); @@ -493,11 +605,11 @@ public void markIssueAsResolved(Long issueId) { } public void deleteAnalysis(Long analysisId) { - analysisRepository.deleteById(analysisId); + codeAnalysisRepository.deleteById(analysisId); } public void deleteAllAnalysesByProjectId(Long projectId) { - analysisRepository.deleteByProjectId(projectId); + codeAnalysisRepository.deleteByProjectId(projectId); } public static class AnalysisStats { diff --git a/java-ecosystem/libs/core/src/main/resources/db/migration/1.4.0/V1.4.0__add_diff_fingerprint_to_code_analysis.sql b/java-ecosystem/libs/core/src/main/resources/db/migration/1.4.0/V1.4.0__add_diff_fingerprint_to_code_analysis.sql new file mode 100644 index 00000000..9a68490a --- /dev/null +++ b/java-ecosystem/libs/core/src/main/resources/db/migration/1.4.0/V1.4.0__add_diff_fingerprint_to_code_analysis.sql @@ -0,0 +1,9 @@ +-- Add diff_fingerprint column for content-based analysis caching. +-- Allows reusing analysis results when the same code changes appear in different PRs +-- (e.g. close/reopen with a new PR number, or branch-cascade flows like feature→release→main). +ALTER TABLE code_analysis ADD COLUMN IF NOT EXISTS diff_fingerprint VARCHAR(64); + +-- Index for fingerprint-based cache lookups: (project_id, diff_fingerprint) +CREATE INDEX IF NOT EXISTS idx_code_analysis_project_diff_fingerprint + ON code_analysis (project_id, diff_fingerprint) + WHERE diff_fingerprint IS NOT NULL; diff --git a/java-ecosystem/libs/core/src/main/resources/db/migration/1.4.0/V1.4.1__deduplicate_branch_issues.sql b/java-ecosystem/libs/core/src/main/resources/db/migration/1.4.0/V1.4.1__deduplicate_branch_issues.sql new file mode 100644 index 00000000..8ce9d6e9 --- /dev/null +++ b/java-ecosystem/libs/core/src/main/resources/db/migration/1.4.0/V1.4.1__deduplicate_branch_issues.sql @@ -0,0 +1,59 @@ +-- V1.4.1: Remove duplicate branch_issue rows that accumulated because the +-- deduplication key was based on code_analysis_issue_id (database PK) rather +-- than on the issue's semantic content. Each PR analysis creates fresh +-- CodeAnalysisIssue rows with new IDs, so the same logical issue +-- (same file, line, severity, category) ended up with N BranchIssue rows. +-- +-- Strategy: for each (branch_id, file_path, line_number, severity, category) +-- group keep only the row with the LOWEST id (oldest / first-detected) and +-- delete the rest. Afterwards recompute the denormalized counters on branch. + +-- 1. Delete duplicate branch_issues, keeping the first (lowest id) per group +DELETE FROM branch_issue +WHERE id NOT IN ( + SELECT keeper_id FROM ( + SELECT MIN(bi.id) AS keeper_id + FROM branch_issue bi + JOIN code_analysis_issue cai ON bi.code_analysis_issue_id = cai.id + GROUP BY bi.branch_id, + cai.file_path, + cai.line_number, + cai.severity, + COALESCE(cai.issue_category, '__NONE__') + ) AS keepers +); + +-- 2. Recompute denormalized branch issue counts +UPDATE branch b SET + total_issues = COALESCE(sub.total_unresolved, 0), + high_severity_count = COALESCE(sub.high_count, 0), + medium_severity_count = COALESCE(sub.medium_count, 0), + low_severity_count = COALESCE(sub.low_count, 0), + info_severity_count = COALESCE(sub.info_count, 0), + resolved_count = COALESCE(sub.resolved_total, 0), + updated_at = NOW() +FROM ( + SELECT + bi.branch_id, + COUNT(*) FILTER (WHERE bi.is_resolved = false) AS total_unresolved, + COUNT(*) FILTER (WHERE bi.is_resolved = false AND cai.severity = 'HIGH') AS high_count, + COUNT(*) FILTER (WHERE bi.is_resolved = false AND cai.severity = 'MEDIUM') AS medium_count, + COUNT(*) FILTER (WHERE bi.is_resolved = false AND cai.severity = 'LOW') AS low_count, + COUNT(*) FILTER (WHERE bi.is_resolved = false AND cai.severity = 'INFO') AS info_count, + COUNT(*) FILTER (WHERE bi.is_resolved = true) AS resolved_total + FROM branch_issue bi + JOIN code_analysis_issue cai ON bi.code_analysis_issue_id = cai.id + GROUP BY bi.branch_id +) sub +WHERE b.id = sub.branch_id; + +-- Also zero-out branches that lost all issues after cleanup +UPDATE branch SET + total_issues = 0, + high_severity_count = 0, + medium_severity_count = 0, + low_severity_count = 0, + info_severity_count = 0, + resolved_count = 0, + updated_at = NOW() +WHERE id NOT IN (SELECT DISTINCT branch_id FROM branch_issue); diff --git a/java-ecosystem/libs/rag-engine/src/main/java/org/rostilos/codecrow/ragengine/client/RagPipelineClient.java b/java-ecosystem/libs/rag-engine/src/main/java/org/rostilos/codecrow/ragengine/client/RagPipelineClient.java index efb34d84..44456325 100644 --- a/java-ecosystem/libs/rag-engine/src/main/java/org/rostilos/codecrow/ragengine/client/RagPipelineClient.java +++ b/java-ecosystem/libs/rag-engine/src/main/java/org/rostilos/codecrow/ragengine/client/RagPipelineClient.java @@ -22,16 +22,19 @@ public class RagPipelineClient { private final ObjectMapper objectMapper; private final String ragApiUrl; private final boolean ragEnabled; + private final String serviceSecret; public RagPipelineClient( @Value("${codecrow.rag.api.url:http://rag-pipeline:8001}") String ragApiUrl, @Value("${codecrow.rag.api.enabled:false}") boolean ragEnabled, @Value("${codecrow.rag.api.timeout.connect:30}") int connectTimeout, @Value("${codecrow.rag.api.timeout.read:120}") int readTimeout, - @Value("${codecrow.rag.api.timeout.indexing:14400}") int indexingTimeout + @Value("${codecrow.rag.api.timeout.indexing:14400}") int indexingTimeout, + @Value("${codecrow.rag.api.secret:}") String serviceSecret ) { this.ragApiUrl = ragApiUrl; this.ragEnabled = ragEnabled; + this.serviceSecret = serviceSecret != null ? serviceSecret : ""; this.httpClient = new OkHttpClient.Builder() .connectTimeout(connectTimeout, java.util.concurrent.TimeUnit.SECONDS) @@ -209,10 +212,11 @@ public void deleteIndex(String workspace, String project, String branch) throws } String url = String.format("%s/index/%s/%s/%s", ragApiUrl, workspace, project, branch); - Request request = new Request.Builder() + Request.Builder builder = new Request.Builder() .url(url) - .delete() - .build(); + .delete(); + addAuthHeader(builder); + Request request = builder.build(); try (Response response = httpClient.newCall(request).execute()) { if (!response.isSuccessful()) { @@ -241,10 +245,11 @@ public boolean deleteBranch(String workspace, String project, String branch) thr String encodedBranch = java.net.URLEncoder.encode(branch, java.nio.charset.StandardCharsets.UTF_8); String url = String.format("%s/index/%s/%s/branch/%s", ragApiUrl, workspace, project, encodedBranch); - Request request = new Request.Builder() + Request.Builder builder = new Request.Builder() .url(url) - .delete() - .build(); + .delete(); + addAuthHeader(builder); + Request request = builder.build(); try (Response response = httpClient.newCall(request).execute()) { if (response.isSuccessful()) { @@ -271,10 +276,11 @@ public List getIndexedBranches(String workspace, String project) { try { String url = String.format("%s/index/%s/%s/branches", ragApiUrl, workspace, project); - Request request = new Request.Builder() + Request.Builder builder = new Request.Builder() .url(url) - .get() - .build(); + .get(); + addAuthHeader(builder); + Request request = builder.build(); try (Response response = httpClient.newCall(request).execute()) { if (response.isSuccessful() && response.body() != null) { @@ -311,10 +317,11 @@ public List> getIndexedBranchesWithStats(String workspace, S try { String url = String.format("%s/index/%s/%s/branches", ragApiUrl, workspace, project); - Request request = new Request.Builder() + Request.Builder builder = new Request.Builder() .url(url) - .get() - .build(); + .get(); + addAuthHeader(builder); + Request request = builder.build(); try (Response response = httpClient.newCall(request).execute()) { if (response.isSuccessful() && response.body() != null) { @@ -376,10 +383,11 @@ public boolean isHealthy() { } try { - Request request = new Request.Builder() + Request.Builder builder = new Request.Builder() .url(ragApiUrl + "/health") - .get() - .build(); + .get(); + addAuthHeader(builder); + Request request = builder.build(); try (Response response = httpClient.newCall(request).execute()) { return response.isSuccessful(); @@ -398,15 +406,25 @@ private Map postLongRunning(String url, Map payl return doRequest(url, payload, longRunningHttpClient); } + /** + * Adds the x-service-secret header to the request if a secret is configured. + */ + private void addAuthHeader(Request.Builder builder) { + if (!serviceSecret.isEmpty()) { + builder.addHeader("x-service-secret", serviceSecret); + } + } + @SuppressWarnings("unchecked") private Map doRequest(String url, Map payload, OkHttpClient client) throws IOException { String json = objectMapper.writeValueAsString(payload); RequestBody body = RequestBody.create(json, JSON); - Request request = new Request.Builder() + Request.Builder builder = new Request.Builder() .url(url) - .post(body) - .build(); + .post(body); + addAuthHeader(builder); + Request request = builder.build(); try (Response response = client.newCall(request).execute()) { String responseBody = response.body() != null ? response.body().string() : "{}"; diff --git a/java-ecosystem/libs/rag-engine/src/test/java/org/rostilos/codecrow/ragengine/client/RagPipelineClientTest.java b/java-ecosystem/libs/rag-engine/src/test/java/org/rostilos/codecrow/ragengine/client/RagPipelineClientTest.java index 274fc1c5..f2694048 100644 --- a/java-ecosystem/libs/rag-engine/src/test/java/org/rostilos/codecrow/ragengine/client/RagPipelineClientTest.java +++ b/java-ecosystem/libs/rag-engine/src/test/java/org/rostilos/codecrow/ragengine/client/RagPipelineClientTest.java @@ -33,7 +33,8 @@ void setUp() throws IOException { true, // enabled 5, // connect timeout 10, // read timeout - 20 // indexing timeout + 20, // indexing timeout + "test-secret" // service secret ); objectMapper = new ObjectMapper(); @@ -71,7 +72,7 @@ void testDeleteFiles_WhenDisabled() throws Exception { RagPipelineClient disabledClient = new RagPipelineClient( mockWebServer.url("/").toString(), false, // disabled - 5, 10, 20 + 5, 10, 20, "" ); List files = List.of("file1.java"); @@ -123,7 +124,7 @@ void testSemanticSearch_WhenDisabled() throws Exception { RagPipelineClient disabledClient = new RagPipelineClient( mockWebServer.url("/").toString(), false, - 5, 10, 20 + 5, 10, 20, "" ); Map result = disabledClient.semanticSearch( @@ -159,7 +160,7 @@ void testGetPRContext_WhenDisabled() throws Exception { RagPipelineClient disabledClient = new RagPipelineClient( mockWebServer.url("/").toString(), false, - 5, 10, 20 + 5, 10, 20, "" ); Map result = disabledClient.getPRContext( @@ -226,7 +227,7 @@ void testUpdateFiles_WhenDisabled() throws Exception { RagPipelineClient disabledClient = new RagPipelineClient( mockWebServer.url("/").toString(), false, - 5, 10, 20 + 5, 10, 20, "" ); Map result = disabledClient.updateFiles( @@ -243,7 +244,8 @@ void testConstructor_WithDefaults() { true, 30, 120, - 14400 + 14400, + "" ); assertThat(defaultClient).isNotNull(); @@ -268,4 +270,35 @@ void testNetworkError_ThrowsIOException() throws IOException { )) .isInstanceOf(IOException.class); } + + @Test + void testServiceSecretHeader_SentOnRequests() throws Exception { + Map mockResponse = Map.of("status", "success"); + mockWebServer.enqueue(new MockResponse() + .setBody(objectMapper.writeValueAsString(mockResponse)) + .addHeader("Content-Type", "application/json")); + + client.deleteFiles(List.of("file.java"), "ws", "proj", "main"); + + RecordedRequest request = mockWebServer.takeRequest(); + assertThat(request.getHeader("x-service-secret")).isEqualTo("test-secret"); + } + + @Test + void testServiceSecretHeader_NotSentWhenEmpty() throws Exception { + RagPipelineClient noSecretClient = new RagPipelineClient( + mockWebServer.url("/").toString(), + true, 5, 10, 20, "" + ); + + Map mockResponse = Map.of("status", "success"); + mockWebServer.enqueue(new MockResponse() + .setBody(objectMapper.writeValueAsString(mockResponse)) + .addHeader("Content-Type", "application/json")); + + noSecretClient.deleteFiles(List.of("file.java"), "ws", "proj", "main"); + + RecordedRequest request = mockWebServer.takeRequest(); + assertThat(request.getHeader("x-service-secret")).isNull(); + } } diff --git a/java-ecosystem/libs/vcs-client/src/main/java/org/rostilos/codecrow/vcsclient/bitbucket/cloud/actions/CheckFileExistsInBranchAction.java b/java-ecosystem/libs/vcs-client/src/main/java/org/rostilos/codecrow/vcsclient/bitbucket/cloud/actions/CheckFileExistsInBranchAction.java index 143c017b..c61a5e93 100644 --- a/java-ecosystem/libs/vcs-client/src/main/java/org/rostilos/codecrow/vcsclient/bitbucket/cloud/actions/CheckFileExistsInBranchAction.java +++ b/java-ecosystem/libs/vcs-client/src/main/java/org/rostilos/codecrow/vcsclient/bitbucket/cloud/actions/CheckFileExistsInBranchAction.java @@ -15,10 +15,14 @@ /** * Action to check if a file exists in a specific branch on Bitbucket Cloud. * Uses the Bitbucket Cloud API to verify file existence without downloading content. + * Includes retry logic with exponential backoff for rate-limited (429) responses. */ public class CheckFileExistsInBranchAction { private static final Logger log = LoggerFactory.getLogger(CheckFileExistsInBranchAction.class); + private static final int MAX_RETRIES = 3; + private static final long INITIAL_BACKOFF_MS = 2_000; + private final OkHttpClient authorizedOkHttpClient; public CheckFileExistsInBranchAction(OkHttpClient authorizedOkHttpClient) { @@ -28,18 +32,19 @@ public CheckFileExistsInBranchAction(OkHttpClient authorizedOkHttpClient) { /** * Checks if a file exists in the specified branch. * Uses HEAD request to check existence without downloading file content. + * Retries with exponential backoff on 429 (rate-limit) responses. * * @param workspace workspace or team slug * @param repoSlug repository slug * @param branchName branch name (or commit hash) * @param filePath file path relative to repository root * @return true if file exists in the branch, false otherwise - * @throws IOException on network errors + * @throws IOException on network errors after retries are exhausted */ public boolean fileExists(String workspace, String repoSlug, String branchName, String filePath) throws IOException { String ws = Optional.ofNullable(workspace).orElse(""); String encodedPath = encodeFilePath(filePath); - + // Use Bitbucket Cloud API endpoint to check file existence with HEAD request String apiUrl = String.format("%s/repositories/%s/%s/src/%s/%s", BitbucketCloudConfig.BITBUCKET_API_BASE, ws, repoSlug, branchName, encodedPath); @@ -49,22 +54,53 @@ public boolean fileExists(String workspace, String repoSlug, String branchName, .head() .build(); - try (Response resp = authorizedOkHttpClient.newCall(req).execute()) { - if (resp.isSuccessful()) { - return true; - } else if (resp.code() == 404) { - log.debug("File not found: {} in branch {} (workspace: {}, repo: {})", - filePath, branchName, workspace, repoSlug); - return false; - } else { - String msg = String.format("Unexpected response %d when checking file existence: %s in branch %s", - resp.code(), filePath, branchName); - log.warn(msg); - throw new IOException(msg); + int attempt = 0; + long backoffMs = INITIAL_BACKOFF_MS; + + while (true) { + try (Response resp = authorizedOkHttpClient.newCall(req).execute()) { + if (resp.isSuccessful()) { + return true; + } else if (resp.code() == 404) { + log.debug("File not found: {} in branch {} (workspace: {}, repo: {})", + filePath, branchName, workspace, repoSlug); + return false; + } else if (resp.code() == 429 && attempt < MAX_RETRIES) { + // Rate limited — honour Retry-After header if present, otherwise exponential backoff + long waitMs = backoffMs; + String retryAfter = resp.header("Retry-After"); + if (retryAfter != null) { + try { + waitMs = Long.parseLong(retryAfter.trim()) * 1_000; + } catch (NumberFormatException ignored) { + // Use default backoff + } + } + log.info("Rate limited (429) checking file {}. Retrying in {}ms (attempt {}/{})", + filePath, waitMs, attempt + 1, MAX_RETRIES); + try { + Thread.sleep(waitMs); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while waiting for rate-limit backoff", ie); + } + attempt++; + backoffMs *= 2; // Exponential backoff + } else { + String msg = String.format("Unexpected response %d when checking file existence: %s in branch %s", + resp.code(), filePath, branchName); + log.warn(msg); + throw new IOException(msg); + } + } catch (IOException e) { + if (attempt < MAX_RETRIES && e.getMessage() != null && e.getMessage().contains("429")) { + attempt++; + backoffMs *= 2; + continue; + } + log.error("Failed to check file existence for {}: {}", filePath, e.getMessage(), e); + throw e; } - } catch (IOException e) { - log.error("Failed to check file existence for {}: {}", filePath, e.getMessage(), e); - throw e; } } diff --git a/java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/generic/processor/WebhookAsyncProcessor.java b/java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/generic/processor/WebhookAsyncProcessor.java index 064c0061..a1c67990 100644 --- a/java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/generic/processor/WebhookAsyncProcessor.java +++ b/java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/generic/processor/WebhookAsyncProcessor.java @@ -309,6 +309,13 @@ private void initializeProjectAssociations(Project project) { aiConn.getProviderKey(); } } + + // Force initialization of Quality Gate (lazy @ManyToOne) — accessed by + // CodeAnalysisService.getQualityGateForAnalysis() after session closes + var qualityGate = project.getQualityGate(); + if (qualityGate != null) { + qualityGate.getName(); + } } /** diff --git a/python-ecosystem/mcp-client/api/middleware.py b/python-ecosystem/mcp-client/api/middleware.py index 9e78d0d5..96bef76c 100644 --- a/python-ecosystem/mcp-client/api/middleware.py +++ b/python-ecosystem/mcp-client/api/middleware.py @@ -23,6 +23,10 @@ class ServiceSecretMiddleware(BaseHTTPMiddleware): def __init__(self, app, secret: str | None = None): super().__init__(app) self.secret = secret or os.environ.get("SERVICE_SECRET", "") + if self.secret: + logger.info("ServiceSecretMiddleware: secret configured (length=%d)", len(self.secret)) + else: + logger.warning("ServiceSecretMiddleware: no secret configured — auth disabled") async def dispatch(self, request: Request, call_next): # Skip auth for health/doc endpoints @@ -36,7 +40,13 @@ async def dispatch(self, request: Request, call_next): provided = request.headers.get("x-service-secret", "") if provided != self.secret: logger.warning( - f"Unauthorized request to {request.url.path} from {request.client.host if request.client else 'unknown'}" + "Unauthorized request to %s from %s — " + "provided_len=%d expected_len=%d match=%s", + request.url.path, + request.client.host if request.client else "unknown", + len(provided), + len(self.secret), + provided == self.secret, ) return JSONResponse( status_code=401, diff --git a/python-ecosystem/mcp-client/service/command/command_service.py b/python-ecosystem/mcp-client/service/command/command_service.py index a0e59991..f8743e6a 100644 --- a/python-ecosystem/mcp-client/service/command/command_service.py +++ b/python-ecosystem/mcp-client/service/command/command_service.py @@ -28,7 +28,7 @@ class CommandService: MAX_STEPS_ASK = 40 def __init__(self): - load_dotenv() + load_dotenv(interpolate=False) self.default_jar_path = os.environ.get( "MCP_SERVER_JAR", "/app/codecrow-vcs-mcp-1.0.jar" diff --git a/python-ecosystem/mcp-client/service/review/review_service.py b/python-ecosystem/mcp-client/service/review/review_service.py index 1466f4c8..b3f4eb4e 100644 --- a/python-ecosystem/mcp-client/service/review/review_service.py +++ b/python-ecosystem/mcp-client/service/review/review_service.py @@ -34,7 +34,7 @@ class ReviewService: MAX_CONCURRENT_REVIEWS = int(os.environ.get("MAX_CONCURRENT_REVIEWS", "4")) def __init__(self): - load_dotenv() + load_dotenv(interpolate=False) self.default_jar_path = os.environ.get( "MCP_SERVER_JAR", #"/var/www/html/codecrow/codecrow-public/java-ecosystem/mcp-servers/vcs-mcp/target/codecrow-vcs-mcp-1.0.jar", diff --git a/python-ecosystem/rag-pipeline/main.py b/python-ecosystem/rag-pipeline/main.py index 4918eaa5..3e18e67c 100644 --- a/python-ecosystem/rag-pipeline/main.py +++ b/python-ecosystem/rag-pipeline/main.py @@ -12,7 +12,7 @@ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) -load_dotenv() +load_dotenv(interpolate=False) # Validate critical environment variables before starting def validate_environment(): diff --git a/python-ecosystem/rag-pipeline/src/rag_pipeline/api/middleware.py b/python-ecosystem/rag-pipeline/src/rag_pipeline/api/middleware.py index 9e78d0d5..96bef76c 100644 --- a/python-ecosystem/rag-pipeline/src/rag_pipeline/api/middleware.py +++ b/python-ecosystem/rag-pipeline/src/rag_pipeline/api/middleware.py @@ -23,6 +23,10 @@ class ServiceSecretMiddleware(BaseHTTPMiddleware): def __init__(self, app, secret: str | None = None): super().__init__(app) self.secret = secret or os.environ.get("SERVICE_SECRET", "") + if self.secret: + logger.info("ServiceSecretMiddleware: secret configured (length=%d)", len(self.secret)) + else: + logger.warning("ServiceSecretMiddleware: no secret configured — auth disabled") async def dispatch(self, request: Request, call_next): # Skip auth for health/doc endpoints @@ -36,7 +40,13 @@ async def dispatch(self, request: Request, call_next): provided = request.headers.get("x-service-secret", "") if provided != self.secret: logger.warning( - f"Unauthorized request to {request.url.path} from {request.client.host if request.client else 'unknown'}" + "Unauthorized request to %s from %s — " + "provided_len=%d expected_len=%d match=%s", + request.url.path, + request.client.host if request.client else "unknown", + len(provided), + len(self.secret), + provided == self.secret, ) return JSONResponse( status_code=401, diff --git a/python-ecosystem/rag-pipeline/src/rag_pipeline/models/config.py b/python-ecosystem/rag-pipeline/src/rag_pipeline/models/config.py index 18826395..2cffc100 100644 --- a/python-ecosystem/rag-pipeline/src/rag_pipeline/models/config.py +++ b/python-ecosystem/rag-pipeline/src/rag_pipeline/models/config.py @@ -51,7 +51,7 @@ def get_embedding_dim_for_model(model: str) -> int: class RAGConfig(BaseModel): """Configuration for RAG pipeline""" - load_dotenv() + load_dotenv(interpolate=False) # Qdrant for vector storage qdrant_url: str = Field(default_factory=lambda: os.getenv("QDRANT_URL", "http://localhost:6333")) qdrant_collection_prefix: str = Field(default_factory=lambda: os.getenv("QDRANT_COLLECTION_PREFIX", "codecrow"))