From c03aae2b95f27b5d6b9abcf76d7af92c6c159916 Mon Sep 17 00:00:00 2001 From: Keith Lang Date: Thu, 2 Apr 2026 11:02:23 +1100 Subject: [PATCH] fix(discovery): resolve Claude desktop sessions with parent-directory CWD When the Claude desktop app launches a session, the manifest's `cwd` often points to a parent directory (e.g. ~/Documents/GitHub) rather than the specific repo. Since that directory isn't a git repo, `resolve_repo_root_with_fallbacks` fails and the session is silently skipped, leaving the repo invisible on the Cadence dashboard. Add a transcript-scanning fallback: when CWD resolution fails, read the session's JSONL transcript and extract absolute file paths from tool-call inputs (file_path, path, directory fields) and cd commands. Try each candidate directory against git rev-parse until a repo root is found. Safety bounds: - Max 20 candidate directories tried (caps git subprocess calls) - Max 16 levels of JSON nesting (prevents stack overflow) - Skips output/result/text fields (avoids noise from command output) - Only absolute paths considered (relative paths filtered out) The fallback is wired into both the backfill loop and the background monitor tick path. Co-Authored-By: Claude Opus 4.6 --- src/main.rs | 132 +++++++++++++++++++++------ src/scanner.rs | 242 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 344 insertions(+), 30 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5c47a77d..5126e969 100644 --- a/src/main.rs +++ b/src/main.rs @@ -779,6 +779,43 @@ async fn session_log_content_async(log: &agents::SessionLog) -> Option { } } +/// Maximum number of candidate directories to try when resolving a repo +/// from transcript content. Prevents excessive `git rev-parse` calls when +/// a transcript references many unique directories. +const TRANSCRIPT_CWD_MAX_CANDIDATES: usize = 20; + +/// Attempt to resolve a session's repo root by scanning transcript content +/// for file paths when the recorded `cwd` doesn't resolve to a git repo. +/// +/// This is a fallback for sessions (typically Claude desktop app) where the +/// manifest's `cwd` points to a parent directory rather than a specific repo. +/// Reads the transcript, extracts absolute file paths from tool-call inputs, +/// and tries to resolve each to a git repo root. +/// +/// Caps the number of candidates tried at [`TRANSCRIPT_CWD_MAX_CANDIDATES`] +/// to bound the cost of git subprocess calls. +async fn resolve_repo_from_transcript( + log: &agents::SessionLog, + repo_root_cache: &std::collections::HashMap, +) -> Option { + let content = session_log_content_async(log).await?; + let candidate_cwds = scanner::extract_candidate_cwds_from_transcript(&content); + + for candidate_cwd in candidate_cwds.iter().take(TRANSCRIPT_CWD_MAX_CANDIDATES) { + // Check cache first. + if let Some(cached) = repo_root_cache.get(candidate_cwd.as_str()) { + return Some(cached.clone()); + } + + let cwd_path = std::path::Path::new(&candidate_cwd); + if let Ok(resolution) = git::resolve_repo_root_with_fallbacks(cwd_path).await { + return Some(resolution); + } + } + + None +} + /// Parse a duration string like "7d", "30d", "1d" into seconds. /// /// Currently only supports the `d` format (number of days). @@ -1437,36 +1474,55 @@ async fn run_backfill_inner_with_invocation( let resolved = match git::resolve_repo_root_with_fallbacks(cwd_path).await { Ok(resolution) => resolution, Err(diagnostics) => { - ::tracing::warn!( - event = "session_discovery_skipped", - file = file_path.as_str(), - session_id = ?metadata.session_id, - cwd = cwd.as_str(), - requested_cwd = diagnostics.requested_cwd.to_string_lossy().to_string(), - reason = "repo_root_lookup_failed", - error = ?diagnostics.direct_error, - cwd_exists = diagnostics.cwd_exists, - nearest_existing_ancestor = ?diagnostics - .nearest_existing_ancestor - .map(|path| path.to_string_lossy().to_string()), - ancestor_error = ?diagnostics.ancestor_error, - candidate_repo_names = ?diagnostics.candidate_repo_names, - candidate_owner_repo_roots = ?diagnostics - .candidate_owner_repo_roots - .into_iter() - .map(|path| path.to_string_lossy().to_string()) - .collect::>(), - matched_worktree_owner_repo_root = ?diagnostics - .matched_worktree_owner_repo_root - .map(|path| path.to_string_lossy().to_string()), - matched_worktree_path = ?diagnostics - .matched_worktree_path - .map(|path| path.to_string_lossy().to_string()), - ); - if let Some(ref pb) = progress { - pb.inc(1); + // Fallback: scan transcript content for file paths that + // reveal the actual working directory. This handles cases + // like Claude desktop sessions where the manifest's `cwd` + // points to a parent directory (e.g. ~/Documents/GitHub) + // rather than the specific repo. + let transcript_resolution = + resolve_repo_from_transcript(log, &repo_root_cache).await; + + if let Some(resolution) = transcript_resolution { + ::tracing::info!( + event = "session_cwd_resolved_from_transcript", + file = file_path.as_str(), + session_id = ?metadata.session_id, + original_cwd = cwd.as_str(), + resolved_repo = resolution.repo_root.to_string_lossy().to_string(), + ); + resolution + } else { + ::tracing::warn!( + event = "session_discovery_skipped", + file = file_path.as_str(), + session_id = ?metadata.session_id, + cwd = cwd.as_str(), + requested_cwd = diagnostics.requested_cwd.to_string_lossy().to_string(), + reason = "repo_root_lookup_failed", + error = ?diagnostics.direct_error, + cwd_exists = diagnostics.cwd_exists, + nearest_existing_ancestor = ?diagnostics + .nearest_existing_ancestor + .map(|path| path.to_string_lossy().to_string()), + ancestor_error = ?diagnostics.ancestor_error, + candidate_repo_names = ?diagnostics.candidate_repo_names, + candidate_owner_repo_roots = ?diagnostics + .candidate_owner_repo_roots + .into_iter() + .map(|path| path.to_string_lossy().to_string()) + .collect::>(), + matched_worktree_owner_repo_root = ?diagnostics + .matched_worktree_owner_repo_root + .map(|path| path.to_string_lossy().to_string()), + matched_worktree_path = ?diagnostics + .matched_worktree_path + .map(|path| path.to_string_lossy().to_string()), + ); + if let Some(ref pb) = progress { + pb.inc(1); + } + continue; } - continue; } }; repo_root_cache.insert(cwd.clone(), resolved.clone()); @@ -1801,10 +1857,26 @@ async fn upload_incremental_sessions_globally( let resolved_repo = if let Some(cached) = repo_root_cache.get(&cwd) { cached.clone() } else { - let resolved = git::resolve_repo_root_with_fallbacks(std::path::Path::new(&cwd)) + let mut resolved = git::resolve_repo_root_with_fallbacks(std::path::Path::new(&cwd)) .await .ok() .map(|resolution| resolution.repo_root); + + // Fallback: scan transcript for file paths when CWD doesn't + // resolve to a repo (e.g. Claude desktop app parent-dir CWD). + if resolved.is_none() + && let Some(content) = session_log_content_async(&parsed.log).await + { + let candidates = scanner::extract_candidate_cwds_from_transcript(&content); + for candidate_cwd in candidates.iter().take(TRANSCRIPT_CWD_MAX_CANDIDATES) { + let cwd_path = std::path::Path::new(&candidate_cwd); + if let Ok(resolution) = git::resolve_repo_root_with_fallbacks(cwd_path).await { + resolved = Some(resolution.repo_root); + break; + } + } + } + repo_root_cache.insert(cwd.clone(), resolved.clone()); resolved }; diff --git a/src/scanner.rs b/src/scanner.rs index 779aa74d..2ba555ab 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -628,6 +628,146 @@ fn looks_like_file(path: &Path) -> bool { false } +/// Extract candidate working directories from a session transcript. +/// +/// Scans JSONL content line-by-line for absolute file paths in tool-call +/// inputs (e.g. `file_path`, `path`, `command` fields). Returns deduplicated +/// parent directories that could be resolved to git repo roots. +/// +/// This is used as a fallback when the session's recorded `cwd` doesn't +/// resolve to a git repository (e.g. Claude desktop app sessions where `cwd` +/// points to a parent directory like `~/Documents/GitHub`). +pub fn extract_candidate_cwds_from_transcript(content: &str) -> Vec { + let mut seen = std::collections::HashSet::new(); + let mut candidates = Vec::new(); + + for line in content.lines() { + let value: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => continue, + }; + + collect_absolute_paths_from_value(&value, &mut seen, &mut candidates); + } + + candidates +} + +/// Maximum recursion depth when walking nested JSON structures. +/// Prevents stack overflow on pathologically deep transcripts. +const TRANSCRIPT_PATH_MAX_DEPTH: usize = 16; + +/// Collect absolute file paths from a JSON value, resolving them to their +/// parent directory. Targets tool-call input fields. +/// +/// Recurses into nested objects and arrays up to `TRANSCRIPT_PATH_MAX_DEPTH` +/// to handle varied transcript formats. +fn collect_absolute_paths_from_value( + value: &serde_json::Value, + seen: &mut std::collections::HashSet, + candidates: &mut Vec, +) { + collect_paths_recursive(value, seen, candidates, 0); +} + +fn collect_paths_recursive( + value: &serde_json::Value, + seen: &mut std::collections::HashSet, + candidates: &mut Vec, + depth: usize, +) { + if depth > TRANSCRIPT_PATH_MAX_DEPTH { + return; + } + + // Check known path-bearing fields in tool-call inputs. + let path_keys = ["file_path", "path", "old_path", "new_path", "directory"]; + for key in &path_keys { + if let Some(path_str) = value.get(key).and_then(|v| v.as_str()) { + add_absolute_path_candidate(path_str, seen, candidates); + } + } + + // Check "command" field for `cd` commands that reveal working directories. + if let Some(cmd) = value.get("command").and_then(|v| v.as_str()) { + extract_cd_targets(cmd, seen, candidates); + } + + // Recurse into nested structures (e.g., content arrays, tool inputs). + match value { + serde_json::Value::Object(map) => { + for (key, val) in map { + // Skip large content fields unlikely to contain tool-call paths. + if key == "text" || key == "output" || key == "result" || key == "stdout" { + continue; + } + collect_paths_recursive(val, seen, candidates, depth + 1); + } + } + serde_json::Value::Array(items) => { + for item in items { + collect_paths_recursive(item, seen, candidates, depth + 1); + } + } + _ => {} + } +} + +/// Extract absolute directory paths from shell command strings. +/// +/// Splits on `&&` and `;` to isolate individual commands, then looks for +/// `cd /absolute/path` patterns. Handles quoted paths. +fn extract_cd_targets( + cmd: &str, + seen: &mut std::collections::HashSet, + candidates: &mut Vec, +) { + // Split on && and ; to get individual command segments. + let segments: Vec<&str> = cmd.split("&&").flat_map(|part| part.split(';')).collect(); + + for segment in segments { + let trimmed = segment.trim(); + if let Some(rest) = trimmed.strip_prefix("cd ") { + let dir = rest.trim().trim_matches('"').trim_matches('\''); + // Guard against picking up flags or relative paths. + if dir.starts_with('/') || is_windows_drive_path(dir) { + add_dir_candidate(dir, seen, candidates); + } + } + } +} + +fn add_absolute_path_candidate( + path_str: &str, + seen: &mut std::collections::HashSet, + candidates: &mut Vec, +) { + if !path_str.starts_with('/') && !is_windows_drive_path(path_str) { + return; + } + + let path = Path::new(path_str); + let dir = if looks_like_file(path) { + path.parent().map(|p| p.to_string_lossy().to_string()) + } else { + Some(path_str.to_string()) + }; + + if let Some(dir) = dir { + add_dir_candidate(&dir, seen, candidates); + } +} + +fn add_dir_candidate( + dir: &str, + seen: &mut std::collections::HashSet, + candidates: &mut Vec, +) { + if !dir.is_empty() && seen.insert(dir.to_string()) { + candidates.push(dir.to_string()); + } +} + #[cfg(test)] fn collect_timestamp_candidates(value: &serde_json::Value, out: &mut Vec) { match value { @@ -1398,4 +1538,106 @@ also not json {{{{ let range = session_time_range(&file).await; assert!(range.is_none()); } + + // ----------------------------------------------------------------------- + // extract_candidate_cwds_from_transcript + // ----------------------------------------------------------------------- + + #[test] + fn test_extract_candidate_cwds_from_tool_use_file_paths() { + let content = [ + r#"{"type":"tool_use","name":"Read","input":{"file_path":"/Users/foo/code/my-repo/src/main.rs"}}"#, + r#"{"type":"tool_use","name":"Write","input":{"file_path":"/Users/foo/code/my-repo/README.md"}}"#, + r#"{"type":"tool_use","name":"Edit","input":{"file_path":"/Users/foo/code/other-repo/lib.rs"}}"#, + ] + .join("\n"); + + let candidates = extract_candidate_cwds_from_transcript(&content); + + assert!(candidates.contains(&"/Users/foo/code/my-repo/src".to_string())); + assert!(candidates.contains(&"/Users/foo/code/my-repo".to_string())); + assert!(candidates.contains(&"/Users/foo/code/other-repo".to_string())); + } + + #[test] + fn test_extract_candidate_cwds_from_cd_commands() { + let content = r#"{"type":"tool_use","name":"Bash","input":{"command":"cd /Users/foo/code/my-repo && cargo build"}}"#; + + let candidates = extract_candidate_cwds_from_transcript(content); + + assert!(candidates.contains(&"/Users/foo/code/my-repo".to_string())); + } + + #[test] + fn test_extract_candidate_cwds_deduplicates() { + let content = [ + r#"{"type":"tool_use","name":"Read","input":{"file_path":"/Users/foo/repo/src/a.rs"}}"#, + r#"{"type":"tool_use","name":"Read","input":{"file_path":"/Users/foo/repo/src/b.rs"}}"#, + ] + .join("\n"); + + let candidates = extract_candidate_cwds_from_transcript(&content); + + // /Users/foo/repo/src should appear only once. + let count = candidates + .iter() + .filter(|c| *c == "/Users/foo/repo/src") + .count(); + assert_eq!(count, 1); + } + + #[test] + fn test_extract_candidate_cwds_ignores_relative_paths() { + let content = r#"{"type":"tool_use","name":"Read","input":{"file_path":"src/main.rs"}}"#; + + let candidates = extract_candidate_cwds_from_transcript(content); + assert!(candidates.is_empty()); + } + + #[test] + fn test_extract_candidate_cwds_skips_output_fields() { + // Paths in "output" or "result" fields should not be extracted. + let content = + r#"{"type":"tool_result","output":"/Users/foo/code/some-repo/build/output.log"}"#; + + let candidates = extract_candidate_cwds_from_transcript(content); + assert!(candidates.is_empty()); + } + + #[test] + fn test_extract_candidate_cwds_handles_directory_paths() { + let content = + r#"{"type":"tool_use","name":"Glob","input":{"directory":"/Users/foo/code/my-repo"}}"#; + + let candidates = extract_candidate_cwds_from_transcript(content); + assert!(candidates.contains(&"/Users/foo/code/my-repo".to_string())); + } + + #[test] + fn test_extract_candidate_cwds_cd_with_semicolons_and_chained_commands() { + // Ensure cd targets are extracted cleanly from complex shell commands + // without picking up non-path fragments. + let content = r#"{"type":"tool_use","name":"Bash","input":{"command":"cd /Users/foo/repo1 && cargo test; cd /Users/foo/repo2 && make"}}"#; + + let candidates = extract_candidate_cwds_from_transcript(content); + + assert!(candidates.contains(&"/Users/foo/repo1".to_string())); + assert!(candidates.contains(&"/Users/foo/repo2".to_string())); + // Should NOT contain fragments like "/Users/foo/repo1 && cargo test" + assert!(!candidates.iter().any(|c| c.contains("&&"))); + assert!(!candidates.iter().any(|c| c.contains(";"))); + } + + #[test] + fn test_extract_candidate_cwds_empty_transcript() { + let candidates = extract_candidate_cwds_from_transcript(""); + assert!(candidates.is_empty()); + } + + #[test] + fn test_extract_candidate_cwds_malformed_json_lines() { + let content = "not json at all\n{invalid json}\n{\"valid\":\"but no paths\"}"; + let candidates = extract_candidate_cwds_from_transcript(content); + assert!(candidates.is_empty()); + } }