From c03aae2b95f27b5d6b9abcf76d7af92c6c159916 Mon Sep 17 00:00:00 2001
From: Keith Lang <songcarver@gmail.com>
Date: Thu, 2 Apr 2026 11:02:23 +1100
Subject: [PATCH] fix(discovery): resolve Claude desktop sessions with
 parent-directory CWD

When the Claude desktop app launches a session, the manifest's `cwd`
often points to a parent directory (e.g. ~/Documents/GitHub) rather
than the specific repo. Since that directory isn't a git repo,
`resolve_repo_root_with_fallbacks` fails and the session is silently
skipped, leaving the repo invisible on the Cadence dashboard.

Add a transcript-scanning fallback: when CWD resolution fails, read
the session's JSONL transcript and extract absolute file paths from
tool-call inputs (file_path, path, directory fields) and cd commands.
Try each candidate directory against git rev-parse until a repo root
is found.

Safety bounds:
- Max 20 candidate directories tried (caps git subprocess calls)
- Max 16 levels of JSON nesting (prevents stack overflow)
- Skips output/result/text fields (avoids noise from command output)
- Only absolute paths considered (relative paths filtered out)

The fallback is wired into both the backfill loop and the background
monitor tick path.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/main.rs    | 132 +++++++++++++++++++++------
 src/scanner.rs | 242 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 344 insertions(+), 30 deletions(-)
diff --git a/src/main.rs b/src/main.rs
index 5c47a77d..5126e969 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -779,6 +779,43 @@ async fn session_log_content_async(log: &agents::SessionLog) -> Option<String> {
     }
 }
 
+/// Maximum number of candidate directories to try when resolving a repo
+/// from transcript content. Prevents excessive `git rev-parse` calls when
+/// a transcript references many unique directories.
+const TRANSCRIPT_CWD_MAX_CANDIDATES: usize = 20;
+
+/// Attempt to resolve a session's repo root by scanning transcript content
+/// for file paths when the recorded `cwd` doesn't resolve to a git repo.
+///
+/// This is a fallback for sessions (typically Claude desktop app) where the
+/// manifest's `cwd` points to a parent directory rather than a specific repo.
+/// Reads the transcript, extracts absolute file paths from tool-call inputs,
+/// and tries to resolve each to a git repo root.
+///
+/// Caps the number of candidates tried at [`TRANSCRIPT_CWD_MAX_CANDIDATES`]
+/// to bound the cost of git subprocess calls.
+async fn resolve_repo_from_transcript(
+    log: &agents::SessionLog,
+    repo_root_cache: &std::collections::HashMap<String, git::RepoRootResolution>,
+) -> Option<git::RepoRootResolution> {
+    let content = session_log_content_async(log).await?;
+    let candidate_cwds = scanner::extract_candidate_cwds_from_transcript(&content);
+
+    for candidate_cwd in candidate_cwds.iter().take(TRANSCRIPT_CWD_MAX_CANDIDATES) {
+        // Check cache first.
+        if let Some(cached) = repo_root_cache.get(candidate_cwd.as_str()) {
+            return Some(cached.clone());
+        }
+
+        let cwd_path = std::path::Path::new(&candidate_cwd);
+        if let Ok(resolution) = git::resolve_repo_root_with_fallbacks(cwd_path).await {
+            return Some(resolution);
+        }
+    }
+
+    None
+}
+
 /// Parse a duration string like "7d", "30d", "1d" into seconds.
 ///
 /// Currently only supports the `<N>d` format (number of days).
@@ -1437,36 +1474,55 @@ async fn run_backfill_inner_with_invocation(
             let resolved = match git::resolve_repo_root_with_fallbacks(cwd_path).await {
                 Ok(resolution) => resolution,
                 Err(diagnostics) => {
-                    ::tracing::warn!(
-                        event = "session_discovery_skipped",
-                        file = file_path.as_str(),
-                        session_id = ?metadata.session_id,
-                        cwd = cwd.as_str(),
-                        requested_cwd = diagnostics.requested_cwd.to_string_lossy().to_string(),
-                        reason = "repo_root_lookup_failed",
-                        error = ?diagnostics.direct_error,
-                        cwd_exists = diagnostics.cwd_exists,
-                        nearest_existing_ancestor = ?diagnostics
-                            .nearest_existing_ancestor
-                            .map(|path| path.to_string_lossy().to_string()),
-                        ancestor_error = ?diagnostics.ancestor_error,
-                        candidate_repo_names = ?diagnostics.candidate_repo_names,
-                        candidate_owner_repo_roots = ?diagnostics
-                            .candidate_owner_repo_roots
-                            .into_iter()
-                            .map(|path| path.to_string_lossy().to_string())
-                            .collect::<Vec<_>>(),
-                        matched_worktree_owner_repo_root = ?diagnostics
-                            .matched_worktree_owner_repo_root
-                            .map(|path| path.to_string_lossy().to_string()),
-                        matched_worktree_path = ?diagnostics
-                            .matched_worktree_path
-                            .map(|path| path.to_string_lossy().to_string()),
-                    );
-                    if let Some(ref pb) = progress {
-                        pb.inc(1);
+                    // Fallback: scan transcript content for file paths that
+                    // reveal the actual working directory. This handles cases
+                    // like Claude desktop sessions where the manifest's `cwd`
+                    // points to a parent directory (e.g. ~/Documents/GitHub)
+                    // rather than the specific repo.
+                    let transcript_resolution =
+                        resolve_repo_from_transcript(log, &repo_root_cache).await;
+
+                    if let Some(resolution) = transcript_resolution {
+                        ::tracing::info!(
+                            event = "session_cwd_resolved_from_transcript",
+                            file = file_path.as_str(),
+                            session_id = ?metadata.session_id,
+                            original_cwd = cwd.as_str(),
+                            resolved_repo = resolution.repo_root.to_string_lossy().to_string(),
+                        );
+                        resolution
+                    } else {
+                        ::tracing::warn!(
+                            event = "session_discovery_skipped",
+                            file = file_path.as_str(),
+                            session_id = ?metadata.session_id,
+                            cwd = cwd.as_str(),
+                            requested_cwd = diagnostics.requested_cwd.to_string_lossy().to_string(),
+                            reason = "repo_root_lookup_failed",
+                            error = ?diagnostics.direct_error,
+                            cwd_exists = diagnostics.cwd_exists,
+                            nearest_existing_ancestor = ?diagnostics
+                                .nearest_existing_ancestor
+                                .map(|path| path.to_string_lossy().to_string()),
+                            ancestor_error = ?diagnostics.ancestor_error,
+                            candidate_repo_names = ?diagnostics.candidate_repo_names,
+                            candidate_owner_repo_roots = ?diagnostics
+                                .candidate_owner_repo_roots
+                                .into_iter()
+                                .map(|path| path.to_string_lossy().to_string())
+                                .collect::<Vec<_>>(),
+                            matched_worktree_owner_repo_root = ?diagnostics
+                                .matched_worktree_owner_repo_root
+                                .map(|path| path.to_string_lossy().to_string()),
+                            matched_worktree_path = ?diagnostics
+                                .matched_worktree_path
+                                .map(|path| path.to_string_lossy().to_string()),
+                        );
+                        if let Some(ref pb) = progress {
+                            pb.inc(1);
+                        }
+                        continue;
                     }
-                    continue;
                 }
             };
             repo_root_cache.insert(cwd.clone(), resolved.clone());
@@ -1801,10 +1857,26 @@ async fn upload_incremental_sessions_globally(
         let resolved_repo = if let Some(cached) = repo_root_cache.get(&cwd) {
             cached.clone()
         } else {
-            let resolved = git::resolve_repo_root_with_fallbacks(std::path::Path::new(&cwd))
+            let mut resolved = git::resolve_repo_root_with_fallbacks(std::path::Path::new(&cwd))
                 .await
                 .ok()
                 .map(|resolution| resolution.repo_root);
+
+            // Fallback: scan transcript for file paths when CWD doesn't
+            // resolve to a repo (e.g. Claude desktop app parent-dir CWD).
+            if resolved.is_none()
+                && let Some(content) = session_log_content_async(&parsed.log).await
+            {
+                let candidates = scanner::extract_candidate_cwds_from_transcript(&content);
+                for candidate_cwd in candidates.iter().take(TRANSCRIPT_CWD_MAX_CANDIDATES) {
+                    let cwd_path = std::path::Path::new(&candidate_cwd);
+                    if let Ok(resolution) = git::resolve_repo_root_with_fallbacks(cwd_path).await {
+                        resolved = Some(resolution.repo_root);
+                        break;
+                    }
+                }
+            }
+
             repo_root_cache.insert(cwd.clone(), resolved.clone());
             resolved
         };
diff --git a/src/scanner.rs b/src/scanner.rs
index 779aa74d..2ba555ab 100644
--- a/src/scanner.rs
+++ b/src/scanner.rs
@@ -628,6 +628,146 @@ fn looks_like_file(path: &Path) -> bool {
     false
 }
 
+/// Extract candidate working directories from a session transcript.
+///
+/// Scans JSONL content line-by-line for absolute file paths in tool-call
+/// inputs (e.g. `file_path`, `path`, `command` fields). Returns deduplicated
+/// parent directories that could be resolved to git repo roots.
+///
+/// This is used as a fallback when the session's recorded `cwd` doesn't
+/// resolve to a git repository (e.g. Claude desktop app sessions where `cwd`
+/// points to a parent directory like `~/Documents/GitHub`).
+pub fn extract_candidate_cwds_from_transcript(content: &str) -> Vec<String> {
+    let mut seen = std::collections::HashSet::new();
+    let mut candidates = Vec::new();
+
+    for line in content.lines() {
+        let value: serde_json::Value = match serde_json::from_str(line) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        collect_absolute_paths_from_value(&value, &mut seen, &mut candidates);
+    }
+
+    candidates
+}
+
+/// Maximum recursion depth when walking nested JSON structures.
+/// Prevents stack overflow on pathologically deep transcripts.
+const TRANSCRIPT_PATH_MAX_DEPTH: usize = 16;
+
+/// Collect absolute file paths from a JSON value, resolving them to their
+/// parent directory. Targets tool-call input fields.
+///
+/// Recurses into nested objects and arrays up to `TRANSCRIPT_PATH_MAX_DEPTH`
+/// to handle varied transcript formats.
+fn collect_absolute_paths_from_value(
+    value: &serde_json::Value,
+    seen: &mut std::collections::HashSet<String>,
+    candidates: &mut Vec<String>,
+) {
+    collect_paths_recursive(value, seen, candidates, 0);
+}
+
+fn collect_paths_recursive(
+    value: &serde_json::Value,
+    seen: &mut std::collections::HashSet<String>,
+    candidates: &mut Vec<String>,
+    depth: usize,
+) {
+    if depth > TRANSCRIPT_PATH_MAX_DEPTH {
+        return;
+    }
+
+    // Check known path-bearing fields in tool-call inputs.
+    let path_keys = ["file_path", "path", "old_path", "new_path", "directory"];
+    for key in &path_keys {
+        if let Some(path_str) = value.get(key).and_then(|v| v.as_str()) {
+            add_absolute_path_candidate(path_str, seen, candidates);
+        }
+    }
+
+    // Check "command" field for `cd` commands that reveal working directories.
+    if let Some(cmd) = value.get("command").and_then(|v| v.as_str()) {
+        extract_cd_targets(cmd, seen, candidates);
+    }
+
+    // Recurse into nested structures (e.g., content arrays, tool inputs).
+    match value {
+        serde_json::Value::Object(map) => {
+            for (key, val) in map {
+                // Skip large content fields unlikely to contain tool-call paths.
+                if key == "text" || key == "output" || key == "result" || key == "stdout" {
+                    continue;
+                }
+                collect_paths_recursive(val, seen, candidates, depth + 1);
+            }
+        }
+        serde_json::Value::Array(items) => {
+            for item in items {
+                collect_paths_recursive(item, seen, candidates, depth + 1);
+            }
+        }
+        _ => {}
+    }
+}
+
+/// Extract absolute directory paths from shell command strings.
+///
+/// Splits on `&&` and `;` to isolate individual commands, then looks for
+/// `cd /absolute/path` patterns. Handles quoted paths.
+fn extract_cd_targets(
+    cmd: &str,
+    seen: &mut std::collections::HashSet<String>,
+    candidates: &mut Vec<String>,
+) {
+    // Split on && and ; to get individual command segments.
+    let segments: Vec<&str> = cmd.split("&&").flat_map(|part| part.split(';')).collect();
+
+    for segment in segments {
+        let trimmed = segment.trim();
+        if let Some(rest) = trimmed.strip_prefix("cd ") {
+            let dir = rest.trim().trim_matches('"').trim_matches('\'');
+            // Guard against picking up flags or relative paths.
+            if dir.starts_with('/') || is_windows_drive_path(dir) {
+                add_dir_candidate(dir, seen, candidates);
+            }
+        }
+    }
+}
+
+fn add_absolute_path_candidate(
+    path_str: &str,
+    seen: &mut std::collections::HashSet<String>,
+    candidates: &mut Vec<String>,
+) {
+    if !path_str.starts_with('/') && !is_windows_drive_path(path_str) {
+        return;
+    }
+
+    let path = Path::new(path_str);
+    let dir = if looks_like_file(path) {
+        path.parent().map(|p| p.to_string_lossy().to_string())
+    } else {
+        Some(path_str.to_string())
+    };
+
+    if let Some(dir) = dir {
+        add_dir_candidate(&dir, seen, candidates);
+    }
+}
+
+fn add_dir_candidate(
+    dir: &str,
+    seen: &mut std::collections::HashSet<String>,
+    candidates: &mut Vec<String>,
+) {
+    if !dir.is_empty() && seen.insert(dir.to_string()) {
+        candidates.push(dir.to_string());
+    }
+}
+
 #[cfg(test)]
 fn collect_timestamp_candidates(value: &serde_json::Value, out: &mut Vec<serde_json::Value>) {
     match value {
@@ -1398,4 +1538,106 @@ also not json {{{{
         let range = session_time_range(&file).await;
         assert!(range.is_none());
     }
+
+    // -----------------------------------------------------------------------
+    // extract_candidate_cwds_from_transcript
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_extract_candidate_cwds_from_tool_use_file_paths() {
+        let content = [
+            r#"{"type":"tool_use","name":"Read","input":{"file_path":"/Users/foo/code/my-repo/src/main.rs"}}"#,
+            r#"{"type":"tool_use","name":"Write","input":{"file_path":"/Users/foo/code/my-repo/README.md"}}"#,
+            r#"{"type":"tool_use","name":"Edit","input":{"file_path":"/Users/foo/code/other-repo/lib.rs"}}"#,
+        ]
+        .join("\n");
+
+        let candidates = extract_candidate_cwds_from_transcript(&content);
+
+        assert!(candidates.contains(&"/Users/foo/code/my-repo/src".to_string()));
+        assert!(candidates.contains(&"/Users/foo/code/my-repo".to_string()));
+        assert!(candidates.contains(&"/Users/foo/code/other-repo".to_string()));
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_from_cd_commands() {
+        let content = r#"{"type":"tool_use","name":"Bash","input":{"command":"cd /Users/foo/code/my-repo && cargo build"}}"#;
+
+        let candidates = extract_candidate_cwds_from_transcript(content);
+
+        assert!(candidates.contains(&"/Users/foo/code/my-repo".to_string()));
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_deduplicates() {
+        let content = [
+            r#"{"type":"tool_use","name":"Read","input":{"file_path":"/Users/foo/repo/src/a.rs"}}"#,
+            r#"{"type":"tool_use","name":"Read","input":{"file_path":"/Users/foo/repo/src/b.rs"}}"#,
+        ]
+        .join("\n");
+
+        let candidates = extract_candidate_cwds_from_transcript(&content);
+
+        // /Users/foo/repo/src should appear only once.
+        let count = candidates
+            .iter()
+            .filter(|c| *c == "/Users/foo/repo/src")
+            .count();
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_ignores_relative_paths() {
+        let content = r#"{"type":"tool_use","name":"Read","input":{"file_path":"src/main.rs"}}"#;
+
+        let candidates = extract_candidate_cwds_from_transcript(content);
+        assert!(candidates.is_empty());
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_skips_output_fields() {
+        // Paths in "output" or "result" fields should not be extracted.
+        let content =
+            r#"{"type":"tool_result","output":"/Users/foo/code/some-repo/build/output.log"}"#;
+
+        let candidates = extract_candidate_cwds_from_transcript(content);
+        assert!(candidates.is_empty());
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_handles_directory_paths() {
+        let content =
+            r#"{"type":"tool_use","name":"Glob","input":{"directory":"/Users/foo/code/my-repo"}}"#;
+
+        let candidates = extract_candidate_cwds_from_transcript(content);
+        assert!(candidates.contains(&"/Users/foo/code/my-repo".to_string()));
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_cd_with_semicolons_and_chained_commands() {
+        // Ensure cd targets are extracted cleanly from complex shell commands
+        // without picking up non-path fragments.
+        let content = r#"{"type":"tool_use","name":"Bash","input":{"command":"cd /Users/foo/repo1 && cargo test; cd /Users/foo/repo2 && make"}}"#;
+
+        let candidates = extract_candidate_cwds_from_transcript(content);
+
+        assert!(candidates.contains(&"/Users/foo/repo1".to_string()));
+        assert!(candidates.contains(&"/Users/foo/repo2".to_string()));
+        // Should NOT contain fragments like "/Users/foo/repo1 && cargo test"
+        assert!(!candidates.iter().any(|c| c.contains("&&")));
+        assert!(!candidates.iter().any(|c| c.contains(";")));
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_empty_transcript() {
+        let candidates = extract_candidate_cwds_from_transcript("");
+        assert!(candidates.is_empty());
+    }
+
+    #[test]
+    fn test_extract_candidate_cwds_malformed_json_lines() {
+        let content = "not json at all\n{invalid json}\n{\"valid\":\"but no paths\"}";
+        let candidates = extract_candidate_cwds_from_transcript(content);
+        assert!(candidates.is_empty());
+    }
 }