From 9ddf3ff85df75e3058fde65a28a06d825cae968d Mon Sep 17 00:00:00 2001 From: Nicolas Arnaud-Cormos Date: Sun, 22 Mar 2026 21:45:23 +0100 Subject: [PATCH] fix(update): use consistent diff engine for patch-ID matching collect_patch_ids used `git log -p` for upstream commits while batch_patch_ids used `git diff-tree -p` for feature commits. These produce different patch-IDs when diff.algorithm is set (e.g. histogram), because diff-tree ignores the config and always uses Myers. Switch collect_patch_ids to rev-list + diff-tree so both sides use the same engine. Add --empty=drop as a safety net for any commits that slip through filtering. Add integration tests that reproduce the bug by setting diff.algorithm=histogram with a markdown file rewrite. Co-Authored-By: Claude Opus 4.6 --- src/weave.rs | 57 ++++--- tests/integration/test_update.sh | 257 +++++++++++++++++++++++++++++++ 2 files changed, 290 insertions(+), 24 deletions(-) diff --git a/src/weave.rs b/src/weave.rs index 090c3d2..4ad16f0 100644 --- a/src/weave.rs +++ b/src/weave.rs @@ -1147,7 +1147,7 @@ pub fn run_rebase( // Build args string for logging let upstream_arg = upstream.unwrap_or("--root"); let log_args = format!( - "rebase --interactive --autostash --keep-empty --no-autosquash --rebase-merges --update-refs {}", + "rebase --interactive --autostash --keep-empty --empty=drop --no-autosquash --rebase-merges --update-refs {}", upstream_arg ); @@ -1158,6 +1158,7 @@ pub fn run_rebase( "--interactive", "--autostash", "--keep-empty", + "--empty=drop", "--no-autosquash", "--rebase-merges", "--update-refs", @@ -1216,11 +1217,38 @@ pub fn run_rebase( Ok(RebaseOutcome::Completed) } -/// Collect patch-IDs for all commits in the range `base..head` using a -/// single `git log -p | git patch-id --stable` pipeline (2 processes total). -/// Returns `None` if either command fails. +/// Collect patch-IDs for all commits in the range `base..head`. +/// +/// Uses `git rev-list` to enumerate commits, then `git diff-tree -p --stdin` +/// piped to `git patch-id --stable` — the same method used by `batch_patch_ids` +/// for feature commits. Using the same diff engine for both sides is critical: +/// `git diff-tree` ignores the `diff.algorithm` config (always uses Myers), +/// while `git log -p` respects it. When the repo is configured with a +/// non-default algorithm (e.g. `histogram`), the two commands produce +/// different hunks for the same commit, leading to different patch-IDs. fn collect_patch_ids(workdir: &Path, base: &Oid, head: &Oid) -> Option> { - let pairs = run_patch_id_pipeline(workdir, &["log", "-p", &format!("{}..{}", base, head)])?; + use std::process::Command; + + let output = Command::new("git") + .current_dir(workdir) + .args(["rev-list", &format!("{}..{}", base, head)]) + .output() + .ok()?; + + if !output.status.success() || output.stdout.is_empty() { + return Some(HashSet::new()); + } + + let oids: Vec = String::from_utf8_lossy(&output.stdout) + .lines() + .filter_map(|line| Oid::from_str(line.trim()).ok()) + .collect(); + + if oids.is_empty() { + return Some(HashSet::new()); + } + + let pairs = batch_patch_ids(workdir, &oids)?; Some(pairs.into_iter().map(|(_, pid)| pid).collect()) } @@ -1305,25 +1333,6 @@ fn pipe_to_patch_id(workdir: &Path, diff: &[u8]) -> Option ) } -/// Run `git | git patch-id --stable` and return `(commit_hex, patch_id)` pairs. -fn run_patch_id_pipeline(workdir: &Path, args: &[&str]) -> Option> { - use std::process::{Command, Stdio}; - - let log = Command::new("git") - .current_dir(workdir) - .args(args) - .stdout(Stdio::piped()) - .stderr(Stdio::null()) - .output() - .ok()?; - - if !log.status.success() || log.stdout.is_empty() { - return None; - } - - pipe_to_patch_id(workdir, &log.stdout) -} - #[cfg(test)] #[path = "weave_test.rs"] mod tests; diff --git a/tests/integration/test_update.sh b/tests/integration/test_update.sh index 54bb3e6..394a1b7 100644 --- a/tests/integration/test_update.sh +++ b/tests/integration/test_update.sh @@ -231,4 +231,261 @@ assert_exit_ok $? "submodule_ok" assert_contains "$out" "Updating submodules" "submodule_spinner_start" assert_contains "$out" "Updated submodules" "submodule_spinner_stop" +# ══════════════════════════════════════════════════════════════════════════════ +# CHERRY-PICKED UPSTREAM COMMITS ARE FILTERED +# ══════════════════════════════════════════════════════════════════════════════ + +describe "cherry-picked feature commit is filtered from rebase" +setup_repo_with_remote +# Enable histogram diff algorithm — this triggers different hunk merging +# between `git log -p` (respects config) and `git diff-tree -p` (ignores it, +# always uses Myers). The resulting patch-IDs differ, which was the root cause +# of the bug where cherry-picked commits weren't detected during update. +git -C "$WORK" config diff.algorithm histogram + +# Push a doc file to upstream so the merge-base includes it. +# The doc has sections that produce different diff hunks under histogram +# vs Myers when rewritten — this is key to triggering the bug. +SETUP="$TMPROOT/setup" +git clone -q "$TMPROOT/remote.git" "$SETUP" +git -C "$SETUP" config user.email "test@test.com" +git -C "$SETUP" config user.name "Test" +git -C "$SETUP" config core.autocrlf false +cat > "$SETUP/doc.md" << 'ORIGINAL' +# Title + +## Overview + +Some overview text here. + +## CLI + +```bash +command [--yes] +``` + +**Arguments:** + +- `--yes` / `-y`: Skip the prompt. + +**Behavior:** + +- Validates the current state +- Fetches all changes +- Rebases local commits +- Updates submodules +- On conflict, reports the error +- After success, proposes to remove branches + +## What Happens + +1. **Validation**: + - HEAD must be on a branch + - Must have upstream tracking + +2. **Fetch**: + - All changes are fetched + - Tags are force-updated + +## Conflict Handling + +When conflicts occur: +- The operation pauses +- User resolves manually + +## Prerequisites + +- Git 2.38 or later +- Must be in a git repository +ORIGINAL +git -C "$SETUP" add doc.md +git -C "$SETUP" commit -q -m "Add doc" +git -C "$SETUP" push -q origin +rm -rf "$SETUP" +# Fetch so WORK sees the new upstream base +git -C "$WORK" fetch -q origin +git -C "$WORK" rebase -q "$(git -C "$WORK" rev-parse --abbrev-ref --symbolic-full-name @{u})" + +create_feature_branch "cherry-feat" +switch_to cherry-feat +# Rewrite the doc (produces different hunks under histogram vs Myers) +cat > "$WORK/doc.md" << 'UPDATED' +# Title + +## Overview + +Some overview text here. + +## CLI + +```bash +command [--yes] +``` + +**Flags:** + +- `--yes` / `-y`: Skip the prompt. + +## What Happens + +### Normal Update + +**What changes:** + +1. **Validation**: + - HEAD must be on a branch + - Must have upstream tracking + +2. **Fetch**: + - All changes are fetched + - Tags are force-updated + - Deleted remote branches are pruned + +**What stays the same:** +- Feature branch refs are kept in sync +- Merge topology is preserved + +## Conflict Recovery + +When conflicts occur: +- State is saved +- User resolves manually +- Continue or abort + +## Prerequisites + +- Git 2.38 or later +- Must be in a git repository +UPDATED +git -C "$WORK" add doc.md +git -C "$WORK" commit -q -m "Rewrite doc" +switch_to integration +weave_branch "cherry-feat" + +# Upstream recreates the same change (simulates cherry-pick with different OID) +OTHER="$TMPROOT/other" +git clone -q "$TMPROOT/remote.git" "$OTHER" +git -C "$OTHER" config user.email "upstream@test.com" +git -C "$OTHER" config user.name "Upstream" +git -C "$OTHER" config core.autocrlf false +base_branch=$(git -C "$OTHER" rev-parse --abbrev-ref HEAD) +cat > "$OTHER/doc.md" << 'UPDATED' +# Title + +## Overview + +Some overview text here. + +## CLI + +```bash +command [--yes] +``` + +**Flags:** + +- `--yes` / `-y`: Skip the prompt. + +## What Happens + +### Normal Update + +**What changes:** + +1. **Validation**: + - HEAD must be on a branch + - Must have upstream tracking + +2. **Fetch**: + - All changes are fetched + - Tags are force-updated + - Deleted remote branches are pruned + +**What stays the same:** +- Feature branch refs are kept in sync +- Merge topology is preserved + +## Conflict Recovery + +When conflicts occur: +- State is saved +- User resolves manually +- Continue or abort + +## Prerequisites + +- Git 2.38 or later +- Must be in a git repository +UPDATED +git -C "$OTHER" add doc.md +git -C "$OTHER" commit -q -m "Rewrite doc" +git -C "$OTHER" push -q origin "$base_branch" + +out=$(gl update 2>&1) +assert_exit_ok $? "cherry_pick_filter_ok" +assert_contains "$out" "Rebased onto upstream" "cherry_pick_rebased" +# The feature commit message should still be in history (from the upstream copy) +assert_log_contains "Rewrite doc" "cherry_pick_commit_in_log" + +describe "partially cherry-picked branch keeps remaining commits" +setup_repo_with_remote +create_feature_branch "partial-cherry" +switch_to partial-cherry +commit_file "Partial F1" "pf1.txt" +commit_file "Partial F2" "pf2.txt" +commit_file "Partial F3" "pf3.txt" +switch_to integration +weave_branch "partial-cherry" + +# Upstream cherry-picks only F1 +OTHER="$TMPROOT/other" +git clone -q "$TMPROOT/remote.git" "$OTHER" +git -C "$OTHER" config user.email "upstream@test.com" +git -C "$OTHER" config user.name "Upstream" +git -C "$OTHER" config core.autocrlf false +# Recreate F1's change on upstream (simulates cherry-pick with different OID) +base_branch=$(git -C "$OTHER" rev-parse --abbrev-ref HEAD) +echo "Partial F1" > "$OTHER/pf1.txt" +git -C "$OTHER" add pf1.txt +git -C "$OTHER" commit -q -m "Partial F1" +git -C "$OTHER" push -q origin "$base_branch" + +out=$(gl update 2>&1) +assert_exit_ok $? "partial_cherry_ok" +assert_contains "$out" "Rebased onto upstream" "partial_cherry_rebased" +# F2 and F3 should still be on the branch +assert_contains "$(git -C "$WORK" log partial-cherry --oneline)" "Partial F2" "partial_cherry_f2_kept" +assert_contains "$(git -C "$WORK" log partial-cherry --oneline)" "Partial F3" "partial_cherry_f3_kept" + +describe "fully cherry-picked branch is handled gracefully" +setup_repo_with_remote +create_feature_branch "full-cherry" +switch_to full-cherry +commit_file "Full F1" "ff1.txt" +commit_file "Full F2" "ff2.txt" +switch_to integration +weave_branch "full-cherry" + +# Upstream cherry-picks both commits +OTHER="$TMPROOT/other" +git clone -q "$TMPROOT/remote.git" "$OTHER" +git -C "$OTHER" config user.email "upstream@test.com" +git -C "$OTHER" config user.name "Upstream" +git -C "$OTHER" config core.autocrlf false +# Recreate both changes on upstream (simulates cherry-pick with different OIDs) +base_branch=$(git -C "$OTHER" rev-parse --abbrev-ref HEAD) +echo "Full F1" > "$OTHER/ff1.txt" +git -C "$OTHER" add ff1.txt +git -C "$OTHER" commit -q -m "Full F1" +echo "Full F2" > "$OTHER/ff2.txt" +git -C "$OTHER" add ff2.txt +git -C "$OTHER" commit -q -m "Full F2" +git -C "$OTHER" push -q origin "$base_branch" + +out=$(gl update 2>&1) +assert_exit_ok $? "full_cherry_ok" +assert_contains "$out" "Rebased onto upstream" "full_cherry_rebased" +assert_log_contains "Full F1" "full_cherry_f1_in_log" +assert_log_contains "Full F2" "full_cherry_f2_in_log" + pass