From 798ddd947ffe9d608d9aa5803dc7c409834e7159 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:10 -0400 Subject: [PATCH 01/42] pack-objects: use standard option incompatibility functions pack-objects has a handful of explicit checks for pairs of command-line options which are mutually incompatible. Many of these pre-date a699367bb8 (i18n: factorize more 'incompatible options' messages, 2022-01-31). Convert the explicit checks into die_for_incompatible_opt2() calls, which simplifies the implementation and standardizes pack-objects' output when given incompatible options (e.g., --stdin-packs with --filter gives different output than --keep-unreachable with --unpack-unreachable). There is one minor piece of test fallout in t5331 that expects the old format, which has been corrected. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 20 +++++++++++--------- t/t5331-pack-objects-stdin.sh | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 67941c8a603ae4..e7274e0e009416 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -5010,9 +5010,10 @@ int cmd_pack_objects(int argc, strvec_push(&rp, "--unpacked"); } - if (exclude_promisor_objects && exclude_promisor_objects_best_effort) - die(_("options '%s' and '%s' cannot be used together"), - "--exclude-promisor-objects", "--exclude-promisor-objects-best-effort"); + die_for_incompatible_opt2(exclude_promisor_objects, + "--exclude-promisor-objects", + exclude_promisor_objects_best_effort, + "--exclude-promisor-objects-best-effort"); if (exclude_promisor_objects) { use_internal_rev_list = 1; fetch_if_missing = 0; @@ -5050,13 +5051,14 @@ int cmd_pack_objects(int argc, if (!pack_to_stdout && thin) die(_("--thin cannot be used to build an indexable pack")); - if (keep_unreachable && unpack_unreachable) - die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable"); + die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable", + unpack_unreachable, "--unpack-unreachable"); if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; - if (stdin_packs && filter_options.choice) - die(_("cannot use --filter with --stdin-packs")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + filter_options.choice, "--filter"); + if (stdin_packs && use_internal_rev_list) die(_("cannot use internal rev list with --stdin-packs")); @@ -5064,8 +5066,8 @@ int cmd_pack_objects(int argc, if (cruft) { if (use_internal_rev_list) die(_("cannot use internal rev list with --cruft")); - if (stdin_packs) - die(_("cannot use --stdin-packs with --cruft")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + cruft, "--cruft"); } /* diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index b48c0cbe8fcfab..8fd07deb8d001e 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -64,7 +64,7 @@ test_expect_success '--stdin-packs is incompatible with --filter' ' cd stdin-packs && test_must_fail git pack-objects --stdin-packs --stdout \ --filter=blob:none err && - test_grep "cannot use --filter with --stdin-packs" err + test_grep "options .--stdin-packs. and .--filter. cannot be used together" err ) ' From 9809d4ae9f5b577e0afd18082b095414ce046c00 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:13 -0400 Subject: [PATCH 02/42] pack-objects: limit scope in 'add_object_entry_from_pack()' In add_object_entry_from_pack() we declare 'revs' (given to us through the miscellaneous context argument) earlier in the "if (p)" conditional than is necessary. Move it down as far as it can go to reduce its scope. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e7274e0e009416..d04a36a6bf3e5f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3725,7 +3725,6 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; if (p) { - struct rev_info *revs = _data; struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; @@ -3733,6 +3732,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, die(_("could not get type of object %s in pack %s"), oid_to_hex(oid), p->pack_name); } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; /* * commits in included packs are used as starting points for the * subsequent revision walk From 67e1a7827bf81f84ba8933d494e441139bd3f34d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:15 -0400 Subject: [PATCH 03/42] pack-objects: factor out handling '--stdin-packs' At the bottom of cmd_pack_objects() we check which mode the command is running in (e.g., generating a cruft pack, handling '--stdin-packs', using the internal rev-list, etc.) and handle the mode appropriately. The '--stdin-packs' case is handled inline (dating back to its introduction in 339bce27f4 (builtin/pack-objects.c: add '--stdin-packs' option, 2021-02-22)) since it is relatively short. Extract the body of "if (stdin_packs)" into its own function to prepare for the implementation to become lengthier in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index d04a36a6bf3e5f..7ce04b71ddf72b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3909,6 +3909,17 @@ static void read_packs_list_from_stdin(void) string_list_clear(&exclude_packs, 0); } +static void add_unreachable_loose_objects(void); + +static void read_stdin_packs(int rev_list_unpacked) +{ + /* avoids adding objects in excluded packs */ + ignore_packed_keep_in_core = 1; + read_packs_list_from_stdin(); + if (rev_list_unpacked) + add_unreachable_loose_objects(); +} + static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, struct packed_git *pack, off_t offset, const char *name, uint32_t mtime) @@ -4004,7 +4015,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) } } -static void add_unreachable_loose_objects(void); static void add_objects_in_unpacked_packs(void); static void enumerate_cruft_objects(void) @@ -5135,11 +5145,7 @@ int cmd_pack_objects(int argc, progress_state = start_progress(the_repository, _("Enumerating objects"), 0); if (stdin_packs) { - /* avoids adding objects in excluded packs */ - ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(); - if (rev_list_unpacked) - add_unreachable_loose_objects(); + read_stdin_packs(rev_list_unpacked); } else if (cruft) { read_cruft_objects(); } else if (!use_internal_rev_list) { From 97ec43247c01bc125fa9618e54f93a7dd0b52ab4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:18 -0400 Subject: [PATCH 04/42] pack-objects: declare 'rev_info' for '--stdin-packs' earlier Once 'read_packs_list_from_stdin()' has called for_each_object_in_pack() on each of the input packs, we do a reachability traversal to discover names for any objects we picked up so we can generate name hash values and hopefully get higher quality deltas as a result. A future commit will change the purpose of this reachability traversal to find and pack objects which are reachable from commits in the input packs, but are packed in an unknown (not included nor excluded) pack. Extract the code which initializes and performs the reachability traversal to take place in the caller, not the callee, which prepares us to share this code for the '--unpacked' case (see the function add_unreachable_loose_objects() for more details). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 67 +++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7ce04b71ddf72b..4258ac1792ab30 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3793,7 +3793,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } -static void read_packs_list_from_stdin(void) +static void read_packs_list_from_stdin(struct rev_info *revs) { struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; @@ -3801,24 +3801,6 @@ static void read_packs_list_from_stdin(void) struct string_list_item *item = NULL; struct packed_git *p; - struct rev_info revs; - - repo_init_revisions(the_repository, &revs, NULL); - /* - * Use a revision walk to fill in the namehash of objects in the include - * packs. To save time, we'll avoid traversing through objects that are - * in excluded packs. - * - * That may cause us to avoid populating all of the namehash fields of - * all included objects, but our goal is best-effort, since this is only - * an optimization during delta selection. - */ - revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs.blob_objects = 1; - revs.tree_objects = 1; - revs.tag_objects = 1; - revs.ignore_missing_links = 1; while (strbuf_getline(&buf, stdin) != EOF) { if (!buf.len) @@ -3888,22 +3870,10 @@ static void read_packs_list_from_stdin(void) struct packed_git *p = item->util; for_each_object_in_pack(p, add_object_entry_from_pack, - &revs, + revs, FOR_EACH_OBJECT_PACK_ORDER); } - if (prepare_revision_walk(&revs)) - die(_("revision walk setup failed")); - traverse_commit_list(&revs, - show_commit_pack_hint, - show_object_pack_hint, - NULL); - - trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", - stdin_packs_found_nr); - trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", - stdin_packs_hints_nr); - strbuf_release(&buf); string_list_clear(&include_packs, 0); string_list_clear(&exclude_packs, 0); @@ -3913,11 +3883,42 @@ static void add_unreachable_loose_objects(void); static void read_stdin_packs(int rev_list_unpacked) { + struct rev_info revs; + + repo_init_revisions(the_repository, &revs, NULL); + /* + * Use a revision walk to fill in the namehash of objects in the include + * packs. To save time, we'll avoid traversing through objects that are + * in excluded packs. + * + * That may cause us to avoid populating all of the namehash fields of + * all included objects, but our goal is best-effort, since this is only + * an optimization during delta selection. + */ + revs.no_kept_objects = 1; + revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.blob_objects = 1; + revs.tree_objects = 1; + revs.tag_objects = 1; + revs.ignore_missing_links = 1; + /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(); + read_packs_list_from_stdin(&revs); if (rev_list_unpacked) add_unreachable_loose_objects(); + + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + traverse_commit_list(&revs, + show_commit_pack_hint, + show_object_pack_hint, + NULL); + + trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", + stdin_packs_found_nr); + trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", + stdin_packs_hints_nr); } static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, From d6220cce6beda5404effa7107b7544a3d8c6266a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:21 -0400 Subject: [PATCH 05/42] pack-objects: perform name-hash traversal for unpacked objects With '--unpacked', pack-objects adds loose objects (which don't appear in any of the excluded packs from '--stdin-packs') to the output pack without considering them as reachability tips for the name-hash traversal. This was an oversight in the original implementation of '--stdin-packs', since the code which enumerates and adds loose objects to the output pack (`add_unreachable_loose_objects()`) did not have access to the 'rev_info' struct found in `read_packs_list_from_stdin()`. Excluding unpacked objects from that traversal doesn't affect the correctness of the resulting pack, but it does make it harder to discover good deltas for loose objects. Now that the 'rev_info' struct is declared outside of `read_packs_list_from_stdin()`, we can pass it to `add_objects_in_unpacked_packs()` and add any loose objects as tips to the above-mentioned traversal, in theory producing slightly tighter packs as a result. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4258ac1792ab30..3437dbd7f1a855 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3879,7 +3879,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_clear(&exclude_packs, 0); } -static void add_unreachable_loose_objects(void); +static void add_unreachable_loose_objects(struct rev_info *revs); static void read_stdin_packs(int rev_list_unpacked) { @@ -3906,7 +3906,7 @@ static void read_stdin_packs(int rev_list_unpacked) ignore_packed_keep_in_core = 1; read_packs_list_from_stdin(&revs); if (rev_list_unpacked) - add_unreachable_loose_objects(); + add_unreachable_loose_objects(&revs); if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); @@ -4025,7 +4025,7 @@ static void enumerate_cruft_objects(void) _("Enumerating cruft objects"), 0); add_objects_in_unpacked_packs(); - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); stop_progress(&progress_state); } @@ -4303,8 +4303,9 @@ static void add_objects_in_unpacked_packs(void) } static int add_loose_object(const struct object_id *oid, const char *path, - void *data UNUSED) + void *data) { + struct rev_info *revs = data; enum object_type type = oid_object_info(the_repository, oid, NULL); if (type < 0) { @@ -4325,6 +4326,10 @@ static int add_loose_object(const struct object_id *oid, const char *path, } else { add_object_entry(oid, type, "", 0); } + + if (revs && type == OBJ_COMMIT) + add_pending_oid(revs, NULL, oid, 0); + return 0; } @@ -4333,11 +4338,10 @@ static int add_loose_object(const struct object_id *oid, const char *path, * add_object_entry will weed out duplicates, so we just add every * loose object we find. */ -static void add_unreachable_loose_objects(void) +static void add_unreachable_loose_objects(struct rev_info *revs) { for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), - add_loose_object, - NULL, NULL, NULL); + add_loose_object, NULL, NULL, revs); } static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) @@ -4684,7 +4688,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (keep_unreachable) add_objects_in_unpacked_packs(); if (pack_loose_unreachable) - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); if (unpack_unreachable) loosen_unused_packed_objects(); From 8ed5d87bdd03469249373dead5d12ff4590bcccc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:24 -0400 Subject: [PATCH 06/42] pack-objects: fix typo in 'show_object_pack_hint()' Noticed-by: Elijah Newren Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 3437dbd7f1a855..9580b4ea1a1539 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3767,7 +3767,7 @@ static void show_object_pack_hint(struct object *object, const char *name, * would typically pick up during a reachability traversal. * * Make a best-effort attempt to fill in the ->hash and ->no_try_delta - * here using a now in order to perhaps improve the delta selection + * fields here in order to perhaps improve the delta selection * process. */ oe->hash = pack_name_hash_fn(name); From 63195f013b845b02063b21162fa60fcfb8b631ef Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:27 -0400 Subject: [PATCH 07/42] pack-objects: swap 'show_{object,commit}_pack_hint' show_commit_pack_hint() has heretofore been a noop, so its position within its compilation unit only needs to appear before its first use. But the following commit will sometimes have `show_commit_pack_hint()` call `show_object_pack_hint()`, so reorder the former to appear after the latter to minimize the code movement in that patch. Suggested-by: Elijah Newren Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 9580b4ea1a1539..f44447a3f9caf3 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3748,12 +3748,6 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; } -static void show_commit_pack_hint(struct commit *commit UNUSED, - void *data UNUSED) -{ - /* nothing to do; commits don't have a namehash */ -} - static void show_object_pack_hint(struct object *object, const char *name, void *data UNUSED) { @@ -3776,6 +3770,12 @@ static void show_object_pack_hint(struct object *object, const char *name, stdin_packs_hints_nr++; } +static void show_commit_pack_hint(struct commit *commit UNUSED, + void *data UNUSED) +{ + /* nothing to do; commits don't have a namehash */ +} + static int pack_mtime_cmp(const void *_a, const void *_b) { struct packed_git *a = ((const struct string_list_item*)_a)->util; From cd846bacc7dce3e71137e320adb01f5923353800 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 23 Jun 2025 18:32:30 -0400 Subject: [PATCH 08/42] pack-objects: introduce '--stdin-packs=follow' When invoked with '--stdin-packs', pack-objects will generate a pack which contains the objects found in the "included" packs, less any objects from "excluded" packs. Packs that exist in the repository but weren't specified as either included or excluded are in practice treated like the latter, at least in the sense that pack-objects won't include objects from those packs. This behavior forces us to include any cruft pack(s) in a repository's multi-pack index for the reasons described in ddee3703b3 (builtin/repack.c: add cruft packs to MIDX during geometric repack, 2022-05-20). The full details are in ddee3703b3, but the gist is if you have a once-unreachable object in a cruft pack which later becomes reachable via one or more commits in a pack generated with '--stdin-packs', you *have* to include that object in the MIDX via the copy in the cruft pack, otherwise we cannot generate reachability bitmaps for any commits which reach that object. Note that the traversal here is best-effort, similar to the existing traversal which provides name-hash hints. This means that the object traversal may hand us back a blob that does not actually exist. We *won't* see missing trees/commits with 'ignore_missing_links' because: - missing commit parents are discarded at the commit traversal stage by revision.c::process_parents() - missing tag objects are discarded by revision.c::handle_commit() - missing tree objects are discarded by the list-objects code in list-objects.c::process_tree() But we have to handle potentially-missing blobs specially by making a separate check to ensure they exist in the repository. Failing to do so would mean that we'd add an object to the packing list which doesn't actually exist, rendering us unable to write out the pack. This prepares us for new repacking behavior which will "resurrect" objects found in cruft or otherwise unspecified packs when generating new packs. In the context of geometric repacking, this may be used to maintain a sequence of geometrically-repacked packs, the union of which is closed under reachability, even in the case described earlier. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.adoc | 10 ++- builtin/pack-objects.c | 86 +++++++++++++++----- t/t5331-pack-objects-stdin.sh | 120 ++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+), 23 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index b1c5aa27da4d55..eba014c40615eb 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -87,13 +87,21 @@ base-name:: reference was included in the resulting packfile. This can be useful to send new tags to native Git clients. ---stdin-packs:: +--stdin-packs[=]:: Read the basenames of packfiles (e.g., `pack-1234abcd.pack`) from the standard input, instead of object names or revision arguments. The resulting pack contains all objects listed in the included packs (those not beginning with `^`), excluding any objects listed in the excluded packs (beginning with `^`). + +When `mode` is "follow", objects from packs not listed on stdin receive +special treatment. Objects within unlisted packs will be included if +those objects are (1) reachable from the included packs, and (2) not +found in any excluded packs. This mode is useful, for example, to +resurrect once-unreachable objects found in cruft packs to generate +packs which are closed under reachability up to the boundary set by the +excluded packs. ++ Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f44447a3f9caf3..4ae52c6a29144b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -284,6 +284,12 @@ static struct oidmap configured_exclusions; static struct oidset excluded_by_config; static int name_hash_version = -1; +enum stdin_packs_mode { + STDIN_PACKS_MODE_NONE, + STDIN_PACKS_MODE_STANDARD, + STDIN_PACKS_MODE_FOLLOW, +}; + /** * Check whether the name_hash_version chosen by user input is appropriate, * and also validate whether it is compatible with other features. @@ -3749,31 +3755,47 @@ static int add_object_entry_from_pack(const struct object_id *oid, } static void show_object_pack_hint(struct object *object, const char *name, - void *data UNUSED) + void *data) { - struct object_entry *oe = packlist_find(&to_pack, &object->oid); - if (!oe) - return; + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + if (mode == STDIN_PACKS_MODE_FOLLOW) { + if (object->type == OBJ_BLOB && + !has_object(the_repository, &object->oid, 0)) + return; + add_object_entry(&object->oid, object->type, name, 0); + } else { + struct object_entry *oe = packlist_find(&to_pack, &object->oid); + if (!oe) + return; - /* - * Our 'to_pack' list was constructed by iterating all objects packed in - * included packs, and so doesn't have a non-zero hash field that you - * would typically pick up during a reachability traversal. - * - * Make a best-effort attempt to fill in the ->hash and ->no_try_delta - * fields here in order to perhaps improve the delta selection - * process. - */ - oe->hash = pack_name_hash_fn(name); - oe->no_try_delta = name && no_try_delta(name); + /* + * Our 'to_pack' list was constructed by iterating all + * objects packed in included packs, and so doesn't have + * a non-zero hash field that you would typically pick + * up during a reachability traversal. + * + * Make a best-effort attempt to fill in the ->hash and + * ->no_try_delta fields here in order to perhaps + * improve the delta selection process. + */ + oe->hash = pack_name_hash_fn(name); + oe->no_try_delta = name && no_try_delta(name); - stdin_packs_hints_nr++; + stdin_packs_hints_nr++; + } } -static void show_commit_pack_hint(struct commit *commit UNUSED, - void *data UNUSED) +static void show_commit_pack_hint(struct commit *commit, void *data) { + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + + if (mode == STDIN_PACKS_MODE_FOLLOW) { + show_object_pack_hint((struct object *)commit, "", data); + return; + } + /* nothing to do; commits don't have a namehash */ + } static int pack_mtime_cmp(const void *_a, const void *_b) @@ -3881,7 +3903,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) static void add_unreachable_loose_objects(struct rev_info *revs); -static void read_stdin_packs(int rev_list_unpacked) +static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) { struct rev_info revs; @@ -3913,7 +3935,7 @@ static void read_stdin_packs(int rev_list_unpacked) traverse_commit_list(&revs, show_commit_pack_hint, show_object_pack_hint, - NULL); + &mode); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); @@ -4795,6 +4817,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) { return is_not_in_promisor_pack_obj((struct object *) commit, data); } +static int parse_stdin_packs_mode(const struct option *opt, const char *arg, + int unset) +{ + enum stdin_packs_mode *mode = opt->value; + + if (unset) + *mode = STDIN_PACKS_MODE_NONE; + else if (!arg || !*arg) + *mode = STDIN_PACKS_MODE_STANDARD; + else if (!strcmp(arg, "follow")) + *mode = STDIN_PACKS_MODE_FOLLOW; + else + die(_("invalid value for '%s': '%s'"), opt->long_name, arg); + + return 0; +} + int cmd_pack_objects(int argc, const char **argv, const char *prefix, @@ -4805,7 +4844,7 @@ int cmd_pack_objects(int argc, struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; int rev_list_index = 0; - int stdin_packs = 0; + enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; @@ -4860,6 +4899,9 @@ int cmd_pack_objects(int argc, OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, N_("include objects referred to by the index"), 1, PARSE_OPT_NONEG), + OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"), + N_("read packs from stdin"), + PARSE_OPT_OPTARG, parse_stdin_packs_mode), OPT_BOOL(0, "stdin-packs", &stdin_packs, N_("read packs from stdin")), OPT_BOOL(0, "stdout", &pack_to_stdout, @@ -5150,7 +5192,7 @@ int cmd_pack_objects(int argc, progress_state = start_progress(the_repository, _("Enumerating objects"), 0); if (stdin_packs) { - read_stdin_packs(rev_list_unpacked); + read_stdin_packs(stdin_packs, rev_list_unpacked); } else if (cruft) { read_cruft_objects(); } else if (!use_internal_rev_list) { diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 8fd07deb8d001e..4a8df5a389d29f 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -236,4 +236,124 @@ test_expect_success 'pack-objects --stdin with packfiles from main and alternate test_cmp expected-objects actual-objects ' +objdir=.git/objects +packdir=$objdir/pack + +objects_in_packs () { + for p in "$@" + do + git show-index <"$packdir/pack-$p.idx" || return 1 + done >objects.raw && + + cut -d' ' -f2 objects.raw | sort && + rm -f objects.raw +} + +test_expect_success '--stdin-packs=follow walks into unknown packs' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + + for c in A B C D + do + test_commit "$c" || return 1 + done && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo C..D | git pack-objects --revs $packdir/pack)" && + test_commit E && + + git prune-packed && + + cat >in <<-EOF && + pack-$B.pack + ^pack-$C.pack + pack-$D.pack + EOF + + # With just --stdin-packs, pack "A" is unknown to us, so + # only objects from packs "B" and "D" are included in + # the output pack. + P=$(git pack-objects --stdin-packs $packdir/pack expect && + objects_in_packs $P >actual && + test_cmp expect actual && + + # But with --stdin-packs=follow, objects from both + # included packs reach objects from the unknown pack, so + # objects from pack "A" is included in the output pack + # in addition to the above. + P=$(git pack-objects --stdin-packs=follow $packdir/pack expect && + objects_in_packs $P >actual && + test_cmp expect actual && + + # And with --unpacked, we will pick up objects from unknown + # packs that are reachable from loose objects. Loose object E + # reaches objects in pack A, but there are three excluded packs + # in between. + # + # The resulting pack should include objects reachable from E + # that are not present in packs B, C, or D, along with those + # present in pack A. + cat >in <<-EOF && + ^pack-$B.pack + ^pack-$C.pack + ^pack-$D.pack + EOF + + P=$(git pack-objects --stdin-packs=follow --unpacked \ + $packdir/pack expect.raw && + sort expect.raw >expect && + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + +stdin_packs__follow_with_only () { + rm -fr stdin_packs__follow_with_only && + git init stdin_packs__follow_with_only && + ( + cd stdin_packs__follow_with_only && + + test_commit A && + test_commit B && + + git rev-parse "$@" >B.objects && + + echo A | git pack-objects --revs $packdir/pack && + B="$(git pack-objects $packdir/pack objs && + for obj in $(cat objs) + do + rm -f $objdir/$(test_oid_to_path $obj) || return 1 + done && + + ( cd $packdir && ls pack-*.pack ) >in && + git pack-objects --stdin-packs=follow --stdout >/dev/null Date: Mon, 23 Jun 2025 18:32:32 -0400 Subject: [PATCH 09/42] repack: exclude cruft pack(s) from the MIDX where possible In ddee3703b3 (builtin/repack.c: add cruft packs to MIDX during geometric repack, 2022-05-20), repack began adding cruft pack(s) to the MIDX with '--write-midx' to ensure that the resulting MIDX was always closed under reachability in order to generate reachability bitmaps. While the previous patch added the '--stdin-packs=follow' option to pack-objects, it is not yet on by default. Given that, suppose you have a once-unreachable object packed in a cruft pack, which later becomes reachable from one or more objects in a geometrically repacked pack. That once-unreachable object *won't* appear in the new pack, since the cruft pack was not specified as included or excluded when the geometrically repacked pack was created with 'pack-objects --stdin-packs' (*not* '--stdin-packs=follow', which is not on). If that new pack is included in a MIDX without the cruft pack, then trying to generate bitmaps for that MIDX may fail. This happens when the bitmap selection process picks one or more commits which reach the once-unreachable objects. To mitigate this failure mode, commit ddee3703b3 ensures that the MIDX will be closed under reachability by including cruft pack(s). If cruft pack(s) were not included, we would fail to generate a MIDX bitmap. But ddee3703b3 alludes to the fact that this is sub-optimal by saying [...] it's desirable to avoid including cruft packs in the MIDX because it causes the MIDX to store a bunch of objects which are likely to get thrown away. , which is true, but hides an even larger problem. If repositories rarely prune their unreachable objects and/or have many of them, the MIDX must keep track of a large number of objects which bloats the MIDX and slows down object lookup. This is doubly unfortunate because the vast majority of objects in cruft pack(s) are unlikely to be read. But any object lookups that go through the MIDX must binary search over them anyway, slowing down object lookups using the MIDX. This patch causes geometrically-repacked packs to contain a copy of any once-unreachable object(s) with 'git pack-objects --stdin-packs=follow', allowing us to avoid including any cruft packs in the MIDX. This is because a sequence of geometrically-repacked packs that were all generated with '--stdin-packs=follow' are guaranteed to have their union be closed under reachability. Note that you cannot guarantee that a collection of packs is closed under reachability if not all of them were generated with "following" as above. One tell-tale sign that not all geometrically-repacked packs in the MIDX were generated with "following" is to see if there is a pack in the existing MIDX that is not going to be somehow represented (either verbatim or as part of a geometric rollup) in the new MIDX. If there is, then starting to generate packs with "following" during geometric repacking won't work, since it's open to the same race as described above. But if you're starting from scratch (e.g., building the first MIDX after an all-into-one '--cruft' repack), then you can guarantee that the union of subsequently generated packs from geometric repacking *is* closed under reachability. (One exception here is when "starting from scratch" results in a noop repack, e.g., because the non-cruft pack(s) in a repository already form a geometric progression. Since we can't tell whether or not those were generated with '--stdin-packs=follow', they may depend on once-unreachable objects, so we have to include the cruft pack in the MIDX in this case.) Detect when this is the case and avoid including cruft packs in the MIDX where possible. The existing behavior remains the default, and the new behavior is available with the config 'repack.midxMustIncludeCruft' set to 'false'. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/repack.adoc | 7 ++ builtin/repack.c | 187 +++++++++++++++++++++++++++---- t/t7704-repack-cruft.sh | 145 ++++++++++++++++++++++++ 3 files changed, 319 insertions(+), 20 deletions(-) diff --git a/Documentation/config/repack.adoc b/Documentation/config/repack.adoc index c79af6d7b8b5d7..e9e78dcb198292 100644 --- a/Documentation/config/repack.adoc +++ b/Documentation/config/repack.adoc @@ -39,3 +39,10 @@ repack.cruftThreads:: a cruft pack and the respective parameters are not given over the command line. See similarly named `pack.*` configuration variables for defaults and meaning. + +repack.midxMustContainCruft:: + When set to true, linkgit:git-repack[1] will unconditionally include + cruft pack(s), if any, in the multi-pack index when invoked with + `--write-midx`. When false, cruft packs are only included in the MIDX + when necessary (e.g., because they might be required to form a + reachability closure with MIDX bitmaps). Defaults to true. diff --git a/builtin/repack.c b/builtin/repack.c index 5ddc6e7f9573d4..8d1540a0fda8e1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -39,6 +39,7 @@ static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; static char *packdir, *packtmp_name, *packtmp; +static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" @@ -108,6 +109,10 @@ static int repack_config(const char *var, const char *value, free(cruft_po_args->threads); return git_config_string(&cruft_po_args->threads, var, value); } + if (!strcmp(var, "repack.midxmustcontaincruft")) { + midx_must_contain_cruft = git_config_bool(var, value); + return 0; + } return git_default_config(var, value, ctx, cb); } @@ -690,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry) free(geometry->pack); } +static int midx_has_unknown_packs(char **midx_pack_names, + size_t midx_pack_names_nr, + struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + size_t i; + + string_list_sort(include); + + for (i = 0; i < midx_pack_names_nr; i++) { + const char *pack_name = midx_pack_names[i]; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + struct midx_snapshot_ref_data { struct tempfile *f; struct oidset seen; @@ -758,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f) static void midx_included_packs(struct string_list *include, struct existing_packs *existing, + char **midx_pack_names, + size_t midx_pack_names_nr, struct string_list *names, struct pack_geometry *geometry) { @@ -811,26 +889,56 @@ static void midx_included_packs(struct string_list *include, } } - for_each_string_list_item(item, &existing->cruft_packs) { + if (midx_must_contain_cruft || + midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, + include, geometry, existing)) { /* - * When doing a --geometric repack, there is no need to check - * for deleted packs, since we're by definition not doing an - * ALL_INTO_ONE repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any packs which are - * enqueued for deletion. + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. * - * So we could omit the conditional below in the --geometric - * case, but doing so is unnecessary since no packs are marked - * as pending deletion (since we only call - * `mark_packs_for_deletion()` when doing an all-into-one - * repack). + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. */ - if (pack_is_marked_for_deletion(item)) - continue; + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `mark_packs_for_deletion()` when doing an + * all-into-one repack). + */ + if (pack_is_marked_for_deletion(item)) + continue; - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; } strbuf_release(&buf); @@ -1145,6 +1253,8 @@ int cmd_repack(int argc, struct tempfile *refs_snapshot = NULL; int i, ext, ret; int show_progress; + char **midx_pack_names = NULL; + size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ int delete_redundant = 0; @@ -1361,7 +1471,10 @@ int cmd_repack(int argc, !(pack_everything & PACK_CRUFT)) strvec_push(&cmd.args, "--pack-loose-unreachable"); } else if (geometry.split_factor) { - strvec_push(&cmd.args, "--stdin-packs"); + if (midx_must_contain_cruft) + strvec_push(&cmd.args, "--stdin-packs"); + else + strvec_push(&cmd.args, "--stdin-packs=follow"); strvec_push(&cmd.args, "--unpacked"); } else { strvec_push(&cmd.args, "--unpacked"); @@ -1401,8 +1514,25 @@ int cmd_repack(int argc, if (ret) goto cleanup; - if (!names.nr && !po_args.quiet) - printf_ln(_("Nothing new to pack.")); + if (!names.nr) { + if (!po_args.quiet) + printf_ln(_("Nothing new to pack.")); + /* + * If we didn't write any new packs, the non-cruft packs + * may refer to once-unreachable objects in the cruft + * pack(s). + * + * If there isn't already a MIDX, the one we write + * must include the cruft pack(s), in case the + * non-cruft pack(s) refer to once-cruft objects. + * + * If there is already a MIDX, we can punt here, since + * midx_has_unknown_packs() will make the decision for + * us. + */ + if (!get_local_multi_pack_index(the_repository)) + midx_must_contain_cruft = 1; + } if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); @@ -1483,6 +1613,19 @@ int cmd_repack(int argc, string_list_sort(&names); + if (get_local_multi_pack_index(the_repository)) { + struct multi_pack_index *m = + get_local_multi_pack_index(the_repository); + + ALLOC_ARRAY(midx_pack_names, + m->num_packs + m->num_packs_in_base); + + for (; m; m = m->base_midx) + for (uint32_t i = 0; i < m->num_packs; i++) + midx_pack_names[midx_pack_names_nr++] = + xstrdup(m->pack_names[i]); + } + close_object_store(the_repository->objects); /* @@ -1524,7 +1667,8 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &existing, midx_pack_names, + midx_pack_names_nr, &names, &geometry); ret = write_midx_included_packs(&include, &geometry, &names, refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1575,6 +1719,9 @@ int cmd_repack(int argc, string_list_clear(&names, 1); existing_packs_release(&existing); free_pack_geometry(&geometry); + for (size_t i = 0; i < midx_pack_names_nr; i++) + free(midx_pack_names[i]); + free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index 8aebfb45f5d1ac..aa2e2e6ad887f2 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -724,4 +724,149 @@ test_expect_success 'cruft repack respects --quiet' ' ) ' +setup_cruft_exclude_tests() { + git init "$1" && + ( + cd "$1" && + + git config repack.midxMustContainCruft false && + + test_commit one && + + test_commit --no-tag two && + two="$(git rev-parse HEAD)" && + test_commit --no-tag three && + three="$(git rev-parse HEAD)" && + git reset --hard one && + git reflog expire --all --expire=all && + + GIT_TEST_MULTI_PACK_INDEX=0 git repack --cruft -d && + + git merge $two && + test_commit four + ) +} + +test_expect_success 'repack --write-midx excludes cruft where possible' ' + setup_cruft_exclude_tests exclude-cruft-when-possible && + ( + cd exclude-cruft-when-possible && + + GIT_TEST_MULTI_PACK_INDEX=0 \ + git repack -d --geometric=2 --write-midx --write-bitmap-index && + + test-tool read-midx --show-objects $objdir >midx && + cruft="$(ls $packdir/*.mtimes)" && + test_grep ! "$(basename "$cruft" .mtimes).idx" midx && + + git rev-list --all --objects --no-object-names >reachable.raw && + sort reachable.raw >reachable.objects && + awk "/\.pack$/ { print \$1 }" midx.objects && + + test_cmp reachable.objects midx.objects + ) +' + +test_expect_success 'repack --write-midx includes cruft when instructed' ' + setup_cruft_exclude_tests exclude-cruft-when-instructed && + ( + cd exclude-cruft-when-instructed && + + GIT_TEST_MULTI_PACK_INDEX=0 \ + git -c repack.midxMustContainCruft=true repack \ + -d --geometric=2 --write-midx --write-bitmap-index && + + test-tool read-midx --show-objects $objdir >midx && + cruft="$(ls $packdir/*.mtimes)" && + test_grep "$(basename "$cruft" .mtimes).idx" midx && + + git cat-file --batch-check="%(objectname)" --batch-all-objects \ + >all.objects && + awk "/\.pack$/ { print \$1 }" midx.objects && + + test_cmp all.objects midx.objects + ) +' + +test_expect_success 'repack --write-midx includes cruft when necessary' ' + setup_cruft_exclude_tests exclude-cruft-when-necessary && + ( + cd exclude-cruft-when-necessary && + + test_path_is_file $(ls $packdir/pack-*.mtimes) && + ( cd $packdir && ls pack-*.idx ) | sort >packs.all && + git multi-pack-index write --stdin-packs --bitmap midx && + awk "/\.pack$/ { print \$1 }" midx.objects && + git cat-file --batch-all-objects --batch-check="%(objectname)" \ + >expect.objects && + test_cmp expect.objects midx.objects && + + grep "^pack-" midx >midx.packs && + test_line_count = "$(($(wc -l packs.all && + cruft="$(ls $packdir/pack-*.mtimes)" && + cruft="${cruft%.mtimes}.idx" && + + for idx in $(grep -v $cruft out && + wc -l sizes.raw && + + # Make sure that there are two non-cruft packs, and + # that one of them contains at least twice as many + # objects as the other, ensuring that they are already + # in a geometric progression. + sort -n sizes.raw >sizes && + test_line_count = 2 sizes && + s1=$(head -n 1 sizes) && + s2=$(tail -n 1 sizes) && + test "$s2" -gt "$((2 * $s1))" && + + git -c repack.midxMustContainCruft=false repack --geometric=2 \ + --write-midx --write-bitmap-index + ) +' + test_done From ca6daa1368eb9b0b48f64ef57907821318d7971c Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:27 +0000 Subject: [PATCH 10/42] hash: add a constant for the default hash algorithm Right now, SHA-1 is the default hash algorithm in Git. However, this may change in the future. We have many places in our code that use the SHA-1 constant to indicate the default hash if none is specified, but it will end up being more practical to specify this explicitly and clearly using a constant for whatever the default hash algorithm is. Then, if we decide to change it in the future, we can simply replace the constant representing the default with a new value. For these reasons, introduce GIT_HASH_DEFAULT to represent the default hash algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hash.h b/hash.h index d6422ddf454c17..0d3d85e04cbd5c 100644 --- a/hash.h +++ b/hash.h @@ -174,6 +174,8 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s #define GIT_HASH_SHA256 2 /* Number of algorithms supported (including unknown). */ #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) +/* Default hash algorithm if unspecified. */ +#define GIT_HASH_DEFAULT GIT_HASH_SHA1 /* "sha1", big-endian */ #define GIT_SHA1_FORMAT_ID 0x73686131 From 1f68f3da877a91fefd6cc84b79986af2ef73d21e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:28 +0000 Subject: [PATCH 11/42] hash: add a constant for the legacy hash algorithm We have a a variety of uses of GIT_HASH_SHA1 littered throughout our code. Some of these really mean to represent specifically SHA-1, but some actually represent the original hash algorithm used in Git which is implied by older, legacy formats and protocols which do not contain hash information. For instance, the bundle v1 and v2 formats do not contain hash algorithm information, and thus SHA-1 is implied by the use of these formats. Add a constant for documentary purposes which indicates this value. It will always be the same as SHA-1, since this is an essential part of these formats, but its use indicates this particular reason and not any other reason why SHA-1 might be used. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hash.h b/hash.h index 0d3d85e04cbd5c..953e840d155bc6 100644 --- a/hash.h +++ b/hash.h @@ -176,6 +176,8 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) /* Default hash algorithm if unspecified. */ #define GIT_HASH_DEFAULT GIT_HASH_SHA1 +/* Legacy hash algorithm. Implied for older data formats which don't specify. */ +#define GIT_HASH_SHA1_LEGACY GIT_HASH_SHA1 /* "sha1", big-endian */ #define GIT_SHA1_FORMAT_ID 0x73686131 From dc9c16c2fc8222364277696cb4d70782281d3c06 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:29 +0000 Subject: [PATCH 12/42] builtin: use default hash when outside a repository We have some commands that can operate inside or outside a repository. If we're operating outside a repository, we clearly cannot use the repository's hash algorithm as a default since it doesn't exist, so instead, let's pick the default instead of specifically SHA-1. Right now this results in no functional change since the default is SHA-1, but that may change in the future. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/apply.c | 2 +- builtin/diff.c | 2 +- builtin/hash-object.c | 2 +- builtin/index-pack.c | 2 +- builtin/ls-remote.c | 2 +- builtin/patch-id.c | 2 +- builtin/shortlog.c | 2 +- builtin/show-index.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index a1e20c593d0903..d642a402516f30 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -29,7 +29,7 @@ int cmd_apply(int argc, * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/ */ if (!the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); argc = apply_parse_options(argc, argv, &state, &force_apply, &options, diff --git a/builtin/diff.c b/builtin/diff.c index fa963808c31848..357702df9efcb7 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -483,7 +483,7 @@ int cmd_diff(int argc, * configurable via a command line option. */ if (nongit) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); init_diff_ui_defaults(); git_config(git_diff_ui_config, NULL); diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 6a99ec250d028f..213a302e05bd28 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -104,7 +104,7 @@ int cmd_hash_object(int argc, prefix = setup_git_directory_gently(&nongit); if (nongit && !the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index bb7925bd29f0aa..352ce7f88ae0ec 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -2034,7 +2034,7 @@ int cmd_index_pack(int argc, * choice but to guess the object hash. */ if (!the_repository->hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); opts.flags &= ~(WRITE_REV | WRITE_REV_VERIFY); if (rev_index) { diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 01a4d4daa1faba..df09000b30de50 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -112,7 +112,7 @@ int cmd_ls_remote(int argc, * depending on what object hash the remote uses. */ if (!the_repository->hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); packet_trace_identity("ls-remote"); diff --git a/builtin/patch-id.c b/builtin/patch-id.c index cdef2ec10abcd5..26f04b0335da7e 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -254,7 +254,7 @@ int cmd_patch_id(int argc, * the code that computes patch IDs to always use SHA1. */ if (!the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 30075b67be8dac..795a6316257ce4 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -418,7 +418,7 @@ int cmd_shortlog(int argc, * git/nongit so that we do not have to do this. */ if (nongit && !the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); git_config(git_default_config, NULL); shortlog_init(&log); diff --git a/builtin/show-index.c b/builtin/show-index.c index 9d4ecf5e7ba010..2c3e2940ce6bb1 100644 --- a/builtin/show-index.c +++ b/builtin/show-index.c @@ -47,7 +47,7 @@ int cmd_show_index(int argc, * the index file passed in and use that instead. */ if (!the_hash_algo) - repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); hashsz = the_hash_algo->rawsz; From 667d251a04c1dd769fb5a71bbe94d6d15ae594f1 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:30 +0000 Subject: [PATCH 13/42] Use legacy hash for legacy formats We have a large variety of data formats and protocols where no hash algorithm was defined and the default was assumed to always be SHA-1. Instead of explicitly stating SHA-1, let's use the constant to represent the legacy hash algorithm (which is still SHA-1) so that it's clear for documentary purposes that it's a legacy fallback option and not an intentional choice to use SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- bundle.c | 4 ++-- connect.c | 6 +++--- fetch-pack.c | 2 +- pkt-line.c | 2 +- remote-curl.c | 2 +- serve.c | 2 +- setup.c | 4 ++-- transport.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a317d6c278de21..24b33a3a5c35a6 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2136,7 +2136,7 @@ static struct command *read_head_info(struct packet_reader *reader, use_push_options = 1; hash = parse_feature_value(feature_list, "object-format", &len, NULL); if (!hash) { - hash = hash_algos[GIT_HASH_SHA1].name; + hash = hash_algos[GIT_HASH_SHA1_LEGACY].name; len = strlen(hash); } if (xstrncmpz(the_hash_algo->name, hash, len)) diff --git a/bundle.c b/bundle.c index b0a3fee2efa7b3..61e81bb0c37e7d 100644 --- a/bundle.c +++ b/bundle.c @@ -95,7 +95,7 @@ int read_bundle_header_fd(int fd, struct bundle_header *header, * by an "object-format=" capability, which is being handled in * `parse_capability()`. */ - header->hash_algo = &hash_algos[GIT_HASH_SHA1]; + header->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; /* The bundle header ends with an empty line */ while (!strbuf_getwholeline_fd(&buf, fd, '\n') && @@ -507,7 +507,7 @@ int create_bundle(struct repository *r, const char *path, * SHA1. * 2. @filter is required because we parsed an object filter. */ - if (the_hash_algo != &hash_algos[GIT_HASH_SHA1] || revs.filter.choice) + if (the_hash_algo != &hash_algos[GIT_HASH_SHA1_LEGACY] || revs.filter.choice) min_version = 3; if (argc > 1) { diff --git a/connect.c b/connect.c index 3280435331038e..e77287f426cdfd 100644 --- a/connect.c +++ b/connect.c @@ -251,7 +251,7 @@ static void process_capabilities(struct packet_reader *reader, size_t *linelen) reader->hash_algo = &hash_algos[hash_algo]; free(hash_name); } else { - reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; } } @@ -500,7 +500,7 @@ static void send_capabilities(int fd_out, struct packet_reader *reader) reader->hash_algo = &hash_algos[hash_algo]; packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name); } else { - reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; } if (server_feature_v2("promisor-remote", &promisor_remote_info)) { char *reply = promisor_remote_reply(promisor_remote_info); @@ -665,7 +665,7 @@ int server_supports_hash(const char *desired, int *feature_supported) if (feature_supported) *feature_supported = !!hash; if (!hash) { - hash = hash_algos[GIT_HASH_SHA1].name; + hash = hash_algos[GIT_HASH_SHA1_LEGACY].name; len = strlen(hash); } while (hash) { diff --git a/fetch-pack.c b/fetch-pack.c index fa4231fee74c9f..95f66ffc1db02d 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1342,7 +1342,7 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, die(_("mismatched algorithms: client %s; server %s"), the_hash_algo->name, hash_name); packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); - } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1_LEGACY) { die(_("the server does not support algorithm '%s'"), the_hash_algo->name); } diff --git a/pkt-line.c b/pkt-line.c index a5bcbc96fb340f..fc583feb26510d 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -617,7 +617,7 @@ void packet_reader_init(struct packet_reader *reader, int fd, reader->buffer_size = sizeof(packet_buffer); reader->options = options; reader->me = "git"; - reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; strbuf_init(&reader->scratch, 0); } diff --git a/remote-curl.c b/remote-curl.c index b8bc3a80cf4142..84f46947802da8 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -285,7 +285,7 @@ static const struct git_hash_algo *detect_hash_algo(struct discovery *heads) * back to SHA1, which may or may not be correct. */ if (!p) - return &hash_algos[GIT_HASH_SHA1]; + return &hash_algos[GIT_HASH_SHA1_LEGACY]; algo = hash_algo_by_length((p - heads->buf) / 2); if (algo == GIT_HASH_UNKNOWN) diff --git a/serve.c b/serve.c index e3ccf1505ca1a0..53ecab3b42b44f 100644 --- a/serve.c +++ b/serve.c @@ -14,7 +14,7 @@ static int advertise_sid = -1; static int advertise_object_info = -1; -static int client_hash_algo = GIT_HASH_SHA1; +static int client_hash_algo = GIT_HASH_SHA1_LEGACY; static int always_advertise(struct repository *r UNUSED, struct strbuf *value UNUSED) diff --git a/setup.c b/setup.c index f93bd6a24a5d9c..3d2b3e745b579e 100644 --- a/setup.c +++ b/setup.c @@ -2222,11 +2222,11 @@ void initialize_repository_version(int hash_algo, * version will get adjusted by git-clone(1) once it has learned about * the remote repository's format. */ - if (hash_algo != GIT_HASH_SHA1 || + if (hash_algo != GIT_HASH_SHA1_LEGACY || ref_storage_format != REF_STORAGE_FORMAT_FILES) target_version = GIT_REPO_VERSION_READ; - if (hash_algo != GIT_HASH_SHA1 && hash_algo != GIT_HASH_UNKNOWN) + if (hash_algo != GIT_HASH_SHA1_LEGACY && hash_algo != GIT_HASH_UNKNOWN) git_config_set("extensions.objectformat", hash_algos[hash_algo].name); else if (reinit) diff --git a/transport.c b/transport.c index 6c2801bcbd9d02..c123ac1e38b815 100644 --- a/transport.c +++ b/transport.c @@ -1243,7 +1243,7 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->smart_options->receivepack = remote->receivepack; } - ret->hash_algo = &hash_algos[GIT_HASH_SHA1]; + ret->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; return ret; } From d6e616cee741fc3f67fd3b7c328175b932d0aaa5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:31 +0000 Subject: [PATCH 14/42] setup: use the default algorithm to initialize repo format When we define a new repository format with REPOSITORY_FORMAT_INIT, we always use GIT_HASH_SHA1, and this value ends up getting used as the default value to initialize a repository if none of the command line, environment, or config tell us to do otherwise. Because we might not always want to use SHA-1 as the default, let's instead specify the default hash algorithm constant so that we will use whatever the specified default is. However, we also need to continue to read older repositories. If we're in a v0 repository or extensions.objectformat is not set, then we must continue to default to the original hash algorithm: SHA-1. If an algorithm is set explicitly, however, it will override the hash_algo member of the repository_format struct and we'll get the right value. Similarly, if the repository was initialized before Git 0.99.3, then it may lack a core.repositoryformatversion key, and some repositories lack a config file altogether. In both cases, format->version is -1 and we need to assume that SHA-1 is in use. Because clear_repository_format reinitializes the struct repository_format and therefore sets the hash_algo member to the default (which could in the future not be SHA-1), we need to reset this member explicitly. We know, however, that at the point we call read_repository_format, we are actually reading an existing repository and not initializing a new one or operating outside of a repository, so we are not changing the default behavior back to SHA-1 if the default algorithm is different. It is potentially questionable that we ignore all repository configuration if there is a config file but it doesn't have core.repositoryformatversion set, in which case we reset all of the configuration to the default. However, it is unclear what the right thing to do instead with such an old repository is and a simple git init will add the missing entry, so for now, we simply honor what the existing code does and reset the value to the default, simply adding our initialization to SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- setup.c | 5 ++++- setup.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.c b/setup.c index 3d2b3e745b579e..03a61bd06afb77 100644 --- a/setup.c +++ b/setup.c @@ -835,9 +835,12 @@ static void init_repository_format(struct repository_format *format) int read_repository_format(struct repository_format *format, const char *path) { clear_repository_format(format); + format->hash_algo = GIT_HASH_SHA1_LEGACY; git_config_from_file(check_repo_format, path, format); - if (format->version == -1) + if (format->version == -1) { clear_repository_format(format); + format->hash_algo = GIT_HASH_SHA1_LEGACY; + } return format->version; } diff --git a/setup.h b/setup.h index 18dc3b73686ce2..8522fa8575da71 100644 --- a/setup.h +++ b/setup.h @@ -149,7 +149,7 @@ struct repository_format { { \ .version = -1, \ .is_bare = -1, \ - .hash_algo = GIT_HASH_SHA1, \ + .hash_algo = GIT_HASH_DEFAULT, \ .ref_storage_format = REF_STORAGE_FORMAT_FILES, \ .unknown_extensions = STRING_LIST_INIT_DUP, \ .v1_only_extensions = STRING_LIST_INIT_DUP, \ From c470ac4ac41b02994f2f10b4134c40661d7435be Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:32 +0000 Subject: [PATCH 15/42] t: default to compile-time default hash if not set Right now, the default compile-time hash is SHA-1. However, in the future, this might change and it would be helpful to gracefully handle this case in our testsuite. To avoid making these assumptions, let's introduce a variable that contains the built-in default hash and use it in our setup code as the fallback value if no hash was explicitly set. For now, this is always SHA-1, but in a future commit, we'll allow adjusting this and the variable will be more useful. To allow us to make our tests more robust, allow test_oid to take the --hash=builtin option to specify this hash, whatever it is. Additionally, add a DEFAULT_HASH_ALGORITHM prerequisite to check for the compile-time hash. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/test-lib-functions.sh | 5 ++++- t/test-lib.sh | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index bee4a2ca3472e1..6ec95ea51f08dc 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1695,7 +1695,7 @@ test_set_hash () { # Detect the hash algorithm in use. test_detect_hash () { - case "$GIT_TEST_DEFAULT_HASH" in + case "${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" in "sha256") test_hash_algo=sha256 test_compat_hash_algo=sha1 @@ -1767,6 +1767,9 @@ test_oid () { --hash=compat) algo="$test_compat_hash_algo" && shift;; + --hash=builtin) + algo="$GIT_TEST_BUILTIN_HASH" && + shift;; --hash=*) algo="${1#--hash=}" && shift;; diff --git a/t/test-lib.sh b/t/test-lib.sh index 92d0db13d7429d..be7189067898fc 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -536,7 +536,8 @@ export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME export GIT_COMMITTER_DATE GIT_AUTHOR_DATE export EDITOR -GIT_DEFAULT_HASH="${GIT_TEST_DEFAULT_HASH:-sha1}" +GIT_TEST_BUILTIN_HASH=sha1 +GIT_DEFAULT_HASH="${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" export GIT_DEFAULT_HASH GIT_DEFAULT_REF_FORMAT="${GIT_TEST_DEFAULT_REF_FORMAT:-files}" export GIT_DEFAULT_REF_FORMAT @@ -1895,6 +1896,10 @@ test_lazy_prereq SHA1 ' esac ' +test_lazy_prereq DEFAULT_HASH_ALGORITHM ' + test "$GIT_TEST_BUILTIN_HASH" = "$GIT_DEFAULT_HASH" +' + test_lazy_prereq DEFAULT_REPO_FORMAT ' test_have_prereq SHA1,REFFILES ' From 6866b422608ebfd25ba65935fd2d5378029ec3ea Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:33 +0000 Subject: [PATCH 16/42] t1007: choose the built-in hash outside of a repo Right now, the built-in default hash is always SHA-1, but that will change in a future commit. Instead of assuming that operating outside of a repository will always use SHA-1, simply ask test_oid for the built-in hash instead, which will always be correct. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1007-hash-object.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index dbbe9fb0d4b19b..b4e8d04885b163 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -252,9 +252,9 @@ test_expect_success '--literally complains about non-standard types' ' test_must_fail git hash-object -t bogus --literally --stdin ' -test_expect_success '--stdin outside of repository (uses SHA-1)' ' +test_expect_success '--stdin outside of repository (uses default hash)' ' nongit git hash-object --stdin actual && - echo "$(test_oid --hash=sha1 hello)" >expect && + echo "$(test_oid --hash=builtin hello)" >expect && test_cmp expect actual ' From f957ce078f61266b3212b88d9c357a1b7f071a6f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:34 +0000 Subject: [PATCH 17/42] t4042: choose the built-in hash outside of a repo Right now, the built-in default hash is always SHA-1, but that will change in a future commit. Instead of assuming that operating outside of a repository will always use SHA-1, provide constants for both algorithms and then simply ask test_oid for the built-in hash instead, which will always be correct. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t4042-diff-textconv-caching.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh index ff0e73531b90ed..31018ceba288d0 100755 --- a/t/t4042-diff-textconv-caching.sh +++ b/t/t4042-diff-textconv-caching.sh @@ -120,6 +120,14 @@ test_expect_success 'log notes cache and still use cache for -p' ' ' test_expect_success 'caching is silently ignored outside repo' ' + test_oid_cache <<-\EOM && + oid1 sha1:5626abf + oid1 sha256:a4ed1f3 + oid2 sha1:f719efd + oid2 sha256:aa9e7dc + EOM + oid1=$(test_oid --hash=builtin oid1) && + oid2=$(test_oid --hash=builtin oid2) && mkdir -p non-repo && echo one >non-repo/one && echo two >non-repo/two && @@ -129,9 +137,9 @@ test_expect_success 'caching is silently ignored outside repo' ' -c diff.test.textconv="tr a-z A-Z <" \ -c diff.test.cachetextconv=true \ diff --no-index one two >actual && - cat >expect <<-\EOF && + cat >expect <<-EOF && diff --git a/one b/two - index 5626abf..f719efd 100644 + index $oid1..$oid2 100644 --- a/one +++ b/two @@ -1 +1 @@ From 9d619f2ef8c95a791d34f5d3cb2793dcc0b8610d Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:35 +0000 Subject: [PATCH 18/42] t5300: choose the built-in hash outside of a repo Right now, the built-in default hash is always SHA-1, but that will change in a future commit. Instead of assuming that operating outside of a repository will always use SHA-1, look up the default hash algorithm for operating outside of a repository using an appropriate environment variable, which will always be correct. Additionally, for operations outside of a repository, use the DEFAULT_HASH_ALGORITHM prerequisite rather than SHA1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5300-pack-object.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index a5932b6a8be0ac..5013373891abf2 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -525,7 +525,7 @@ test_expect_success 'index-pack --strict works in non-repo' ' test_path_is_file foo.idx ' -test_expect_success SHA1 'show-index works OK outside a repository' ' +test_expect_success DEFAULT_HASH_ALGORITHM 'show-index works OK outside a repository' ' nongit git show-index Date: Tue, 1 Jul 2025 21:22:36 +0000 Subject: [PATCH 19/42] help: add a build option for default hash We'd like users to be able to determine the hash algorithm that is the builtin default in their version of Git. This is useful for troubleshooting, especially when we decide to change the default. Add an entry for the default hash in the output of git version --build-options so that users can easily access that information and include it in bug reports. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- help.c | 1 + 1 file changed, 1 insertion(+) diff --git a/help.c b/help.c index 21b778707a6a65..bd0be2ee574cc9 100644 --- a/help.c +++ b/help.c @@ -810,6 +810,7 @@ void get_version_info(struct strbuf *buf, int show_build_options) SHA1_UNSAFE_BACKEND); #endif strbuf_addf(buf, "SHA-256: %s\n", SHA256_BACKEND); + strbuf_addf(buf, "default-hash: %s\n", hash_algos[GIT_HASH_DEFAULT].name); } } From c79bb70a2e7d9158ec165ea16ad45371cd6e350d Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 1 Jul 2025 21:22:37 +0000 Subject: [PATCH 20/42] Enable SHA-256 by default in breaking changes mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our document on breaking changes indicates that we intend to default to SHA-256 in Git 3.0. Since most people choose the default option, this is an important security upgrade to our defaults. To allow people to test this case, when WITH_BREAKING_CHANGES is set in the configuration, build Git with SHA-256 as the default hash. Update the testsuite to use the build options information to automatically choose the right value. Note that if the command substitution for GIT_TEST_BUILTIN_HASH fails, so does the testsuite—and quite spectacularly at that. Thus, the case where the Git binary is somehow subtly broken will not go undetected. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 8 +++++++- t/test-lib.sh | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hash.h b/hash.h index 953e840d155bc6..3fcbe9bcba6528 100644 --- a/hash.h +++ b/hash.h @@ -174,8 +174,14 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s #define GIT_HASH_SHA256 2 /* Number of algorithms supported (including unknown). */ #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) + /* Default hash algorithm if unspecified. */ -#define GIT_HASH_DEFAULT GIT_HASH_SHA1 +#ifdef WITH_BREAKING_CHANGES +# define GIT_HASH_DEFAULT GIT_HASH_SHA256 +#else +# define GIT_HASH_DEFAULT GIT_HASH_SHA1 +#endif + /* Legacy hash algorithm. Implied for older data formats which don't specify. */ #define GIT_HASH_SHA1_LEGACY GIT_HASH_SHA1 diff --git a/t/test-lib.sh b/t/test-lib.sh index be7189067898fc..315543f2933a09 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -536,7 +536,7 @@ export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME export GIT_COMMITTER_DATE GIT_AUTHOR_DATE export EDITOR -GIT_TEST_BUILTIN_HASH=sha1 +GIT_TEST_BUILTIN_HASH=$("$GIT_BUILD_DIR/git" version --build-options | sed -ne 's/^default-hash: //p') GIT_DEFAULT_HASH="${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" export GIT_DEFAULT_HASH GIT_DEFAULT_REF_FORMAT="${GIT_TEST_DEFAULT_REF_FORMAT:-files}" From 026f2e3be23b2e5f227e1973b480481a1d48f6a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:24 +0000 Subject: [PATCH 21/42] doc: convert git-log to new documentation format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Switch the synopsis to a synopsis block which will automatically format placeholders in italics and keywords in monospace - Use __ instead of in the description - Use `backticks` for keywords and more complex option descriptions. The new rendering engine will apply synopsis rules to these spans. We also transform inline descriptions of possible values of option --decorate into a list, which is more readable and extensible. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/git-log.adoc | 86 ++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/Documentation/git-log.adoc b/Documentation/git-log.adoc index ae8a7e2d638949..b6f3d92c435f56 100644 --- a/Documentation/git-log.adoc +++ b/Documentation/git-log.adoc @@ -8,8 +8,8 @@ git-log - Show commit logs SYNOPSIS -------- -[verse] -'git log' [] [] [[--] ...] +[synopsis] +git log [] [] [[--] ...] DESCRIPTION ----------- @@ -27,28 +27,34 @@ each commit introduces are shown. OPTIONS ------- ---follow:: +`--follow`:: Continue listing the history of a file beyond renames (works only for a single file). ---no-decorate:: ---decorate[=short|full|auto|no]:: - Print out the ref names of any commits that are shown. If 'short' is - specified, the ref name prefixes 'refs/heads/', 'refs/tags/' and - 'refs/remotes/' will not be printed. If 'full' is specified, the - full ref name (including prefix) will be printed. If 'auto' is - specified, then if the output is going to a terminal, the ref names - are shown as if 'short' were given, otherwise no ref names are - shown. The option `--decorate` is short-hand for `--decorate=short`. - Default to configuration value of `log.decorate` if configured, - otherwise, `auto`. - ---decorate-refs=:: ---decorate-refs-exclude=:: +`--no-decorate`:: +`--decorate[=(short|full|auto|no)]`:: + Print out the ref names of any commits that are shown. Possible values + are: ++ +---- +`short`;; the ref name prefixes `refs/heads/`, `refs/tags/` and + `refs/remotes/` are not printed. +`full`;; the full ref name (including prefix) is printed. +`auto`:: if the output is going to a terminal, the ref names + are shown as if `short` were given, otherwise no ref names are + shown. +---- ++ +The option `--decorate` is short-hand for `--decorate=short`. Default to +configuration value of `log.decorate` if configured, otherwise, `auto`. + +`--decorate-refs=`:: +`--decorate-refs-exclude=`:: For each candidate reference, do not use it for decoration if it - matches any patterns given to `--decorate-refs-exclude` or if it - doesn't match any of the patterns given to `--decorate-refs`. The - `log.excludeDecoration` config option allows excluding refs from + matches any of the __ parameters given to + `--decorate-refs-exclude` or if it doesn't match any of the + __ parameters given to `--decorate-refs`. + The `log.excludeDecoration` config option allows excluding refs from the decorations, but an explicit `--decorate-refs` pattern will override a match in `log.excludeDecoration`. + @@ -56,51 +62,51 @@ If none of these options or config settings are given, then references are used as decoration if they match `HEAD`, `refs/heads/`, `refs/remotes/`, `refs/stash/`, or `refs/tags/`. ---clear-decorations:: +`--clear-decorations`:: When specified, this option clears all previous `--decorate-refs` or `--decorate-refs-exclude` options and relaxes the default decoration filter to include all references. This option is assumed if the config value `log.initialDecorationSet` is set to `all`. ---source:: +`--source`:: Print out the ref name given on the command line by which each commit was reached. ---[no-]mailmap:: ---[no-]use-mailmap:: +`--[no-]mailmap`:: +`--[no-]use-mailmap`:: Use mailmap file to map author and committer names and email addresses to canonical real names and email addresses. See linkgit:git-shortlog[1]. ---full-diff:: +`--full-diff`:: Without this flag, `git log -p ...` shows commits that touch the specified paths, and diffs about the same specified paths. With this, the full diff is shown for commits that touch - the specified paths; this means that "..." limits only + the specified paths; this means that "`...`" limits only commits, and doesn't limit diff for those commits. + Note that this affects all diff-based output types, e.g. those produced by `--stat`, etc. ---log-size:: - Include a line ``log size '' in the output for each commit, - where is the length of that commit's message in bytes. +`--log-size`:: + Include a line `log size ` in the output for each commit, + where __ is the length of that commit's message in bytes. Intended to speed up tools that read log messages from `git log` output by allowing them to allocate space in advance. include::line-range-options.adoc[] -:: +__:: Show only commits in the specified revision range. When no - is specified, it defaults to `HEAD` (i.e. the + __ is specified, it defaults to `HEAD` (i.e. the whole history leading to the current commit). `origin..HEAD` specifies all the commits reachable from the current commit (i.e. `HEAD`), but not from `origin`. For a complete list of - ways to spell , see the 'Specifying Ranges' + ways to spell __, see the 'Specifying Ranges' section of linkgit:gitrevisions[7]. -[--] ...:: +`[--] ...`:: Show only commits that are enough to explain how the files that match the specified paths came to be. See 'History Simplification' below for details and other simplification @@ -145,14 +151,14 @@ EXAMPLES `git log --since="2 weeks ago" -- gitk`:: - Show the changes during the last two weeks to the file 'gitk'. + Show the changes during the last two weeks to the file `gitk`. The `--` is necessary to avoid confusion with the *branch* named - 'gitk' + `gitk` `git log --name-status release..test`:: - Show the commits that are in the "test" branch but not yet - in the "release" branch, along with the list of paths + Show the commits that are in the "`test`" branch but not yet + in the "`release`" branch, along with the list of paths each commit modifies. `git log --follow builtin/rev-list.c`:: @@ -164,7 +170,7 @@ EXAMPLES `git log --branches --not --remotes=origin`:: Shows all commits that are in any of local branches but not in - any of remote-tracking branches for 'origin' (what you have that + any of remote-tracking branches for `origin` (what you have that origin doesn't). `git log master --not --remotes=*/master`:: @@ -200,11 +206,11 @@ CONFIGURATION See linkgit:git-config[1] for core variables and linkgit:git-diff[1] for settings related to diff generation. -format.pretty:: +`format.pretty`:: Default for the `--format` option. (See 'Pretty Formats' above.) Defaults to `medium`. -i18n.logOutputEncoding:: +`i18n.logOutputEncoding`:: Encoding to use when displaying logs. (See 'Discussion' above.) Defaults to the value of `i18n.commitEncoding` if set, and UTF-8 otherwise. From ffe24e00a5bd398c91f42a062f51c0d4cd84d489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:25 +0000 Subject: [PATCH 22/42] doc: git-log convert rev-list-description to new doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use `backticks` for commit ranges. The new rendering engine will apply synopsis rules to these spans. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/rev-list-description.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/rev-list-description.adoc b/Documentation/rev-list-description.adoc index a9efa7fa2722ad..82c680e5701302 100644 --- a/Documentation/rev-list-description.adoc +++ b/Documentation/rev-list-description.adoc @@ -26,8 +26,8 @@ endif::git-log[] means "list all the commits which are reachable from 'foo' or 'bar', but not from 'baz'". -A special notation "''..''" can be used as a -short-hand for "^'' ''". For example, either of +A special notation "`..`" can be used as a +short-hand for "`^ `". For example, either of the following may be used interchangeably: ifdef::git-rev-list[] @@ -43,7 +43,7 @@ $ git log HEAD ^origin ----------------------------------------------------------------------- endif::git-log[] -Another special notation is "''...''" which is useful +Another special notation is "`...`" which is useful for merges. The resulting set of commits is the symmetric difference between the two operands. The following two commands are equivalent: From 0c2585672248ed361c53ba21c949ab57b349daec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:26 +0000 Subject: [PATCH 23/42] doc: git-log: convert line range options to new doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit format placeholders in italics and keywords in monospace - Use __ instead of in the description - Use `backticks` for keywords and more complex option descriptions. The new rendering engine will apply synopsis rules to these spans. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/line-range-options.adoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/line-range-options.adoc b/Documentation/line-range-options.adoc index f275df3b69fa9d..c44ba05320f9bf 100644 --- a/Documentation/line-range-options.adoc +++ b/Documentation/line-range-options.adoc @@ -1,12 +1,12 @@ --L,::: --L:::: +`-L,:`:: +`-L::`:: - Trace the evolution of the line range given by ',', - or by the function name regex '', within the ''. You may + Trace the evolution of the line range given by `,`, + or by the function name regex __, within the __. You may not give any pathspec limiters. This is currently limited to a walk starting from a single revision, i.e., you may only give zero or one positive revision arguments, and - '' and '' (or '') must exist in the starting revision. + __ and __ (or __) must exist in the starting revision. You can specify this option more than once. Implies `--patch`. Patch output can be suppressed using `--no-patch`, but other diff formats (namely `--raw`, `--numstat`, `--shortstat`, `--dirstat`, `--summary`, From 204f7308949cf60795ee26231a48e6df2f80fcfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:27 +0000 Subject: [PATCH 24/42] doc: git-log: convert line range format to new doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use __ instead of in the description - Use `backticks` for keywords and more complex option descriptions. The new rendering engine will apply synopsis rules to these spans. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/line-range-format.adoc | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/line-range-format.adoc b/Documentation/line-range-format.adoc index 9b51e9fb661436..3cc2a14544cf39 100644 --- a/Documentation/line-range-format.adoc +++ b/Documentation/line-range-format.adoc @@ -1,30 +1,30 @@ -'' and '' can take one of these forms: +__ and __ can take one of these forms: -- number +- __ + -If '' or '' is a number, it specifies an +If __ or __ is a number, it specifies an absolute line number (lines count from 1). + -- `/regex/` +- `//` + This form will use the first line matching the given -POSIX regex. If '' is a regex, it will search from the end of +POSIX __. If __ is a regex, it will search from the end of the previous `-L` range, if any, otherwise from the start of file. -If '' is `^/regex/`, it will search from the start of file. -If '' is a regex, it will search -starting at the line given by ''. +If __ is `^//`, it will search from the start of file. +If __ is a regex, it will search starting at the line given by +__. + -- +offset or -offset +- `+` or `-` + -This is only valid for '' and will specify a number -of lines before or after the line given by ''. +This is only valid for __ and will specify a number +of lines before or after the line given by __. + -If `:` is given in place of '' and '', it is a +If `:` is given in place of __ and __, it is a regular expression that denotes the range from the first funcname line -that matches '', up to the next funcname line. `:` +that matches __, up to the next funcname line. `:` searches from the end of the previous `-L` range, if any, otherwise from the start of file. `^:` searches from the start of file. The function names are determined in the same way as `git diff` From d9d297a5f7bef919658976087b740a1c31f653e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:28 +0000 Subject: [PATCH 25/42] doc: git-log: convert rev list options to new doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix some malformed synopis of options - Use __ instead of in the description - Use `backticks` for keywords and more complex option descriptions. The new rendering engine will apply synopsis rules to these spans. - Add the '%' sign to the characters of keywords. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/asciidoc.conf.in | 2 +- Documentation/asciidoctor-extensions.rb.in | 4 +- Documentation/rev-list-options.adoc | 390 ++++++++++----------- 3 files changed, 198 insertions(+), 198 deletions(-) diff --git a/Documentation/asciidoc.conf.in b/Documentation/asciidoc.conf.in index 9d9139306e6f75..ff9ea0a2944511 100644 --- a/Documentation/asciidoc.conf.in +++ b/Documentation/asciidoc.conf.in @@ -43,7 +43,7 @@ ifdef::doctype-book[] endif::doctype-book[] [literal-inlinemacro] -{eval:re.sub(r'(<[-a-zA-Z0-9.]+>)', r'\1', re.sub(r'([\[\s|()>]|^|\]|>)(\.?([-a-zA-Z0-9:+=~@\\\*\/_^\$]+\.?)+|,)',r'\1\2', re.sub(r'(\.\.\.?)([^\]$.])', r'\1\2', macros.passthroughs[int(attrs['passtext'][1:-1])] if attrs['passtext'][1:-1].isnumeric() else attrs['passtext'][1:-1])))} +{eval:re.sub(r'(<[-a-zA-Z0-9.]+>)', r'\1', re.sub(r'([\[\s|()>]|^|\]|>)(\.?([-a-zA-Z0-9:+=~@\\\*\/_^\$%]+\.?)+|,)',r'\1\2', re.sub(r'(\.\.\.?)([^\]$.])', r'\1\2', macros.passthroughs[int(attrs['passtext'][1:-1])] if attrs['passtext'][1:-1].isnumeric() else attrs['passtext'][1:-1])))} endif::backend-docbook[] diff --git a/Documentation/asciidoctor-extensions.rb.in b/Documentation/asciidoctor-extensions.rb.in index 8b7b1613496748..fe64a62d9683be 100644 --- a/Documentation/asciidoctor-extensions.rb.in +++ b/Documentation/asciidoctor-extensions.rb.in @@ -73,7 +73,7 @@ module Git elsif type == :monospaced node.text.gsub(/(\.\.\.?)([^\]$\.])/, '\1\2') .gsub(/^\.\.\.?$/, '\0') - .gsub(%r{([\[\s|()>.]|^|\]|>)(\.?([-a-zA-Z0-9:+=~@/_^\$\\\*]+\.{0,2})+|,)}, '\1\2') + .gsub(%r{([\[\s|()>.]|^|\]|>)(\.?([-a-zA-Z0-9:+=~@/_^\$\\\*%]+\.{0,2})+|,)}, '\1\2') .gsub(/(<[-a-zA-Z0-9.]+>)/, '\1') else open, close, supports_phrase = QUOTE_TAGS[type] @@ -102,7 +102,7 @@ module Git if node.type == :monospaced node.text.gsub(/(\.\.\.?)([^\]$.])/, '\1\2') .gsub(/^\.\.\.?$/, '\0') - .gsub(%r{([\[\s|()>.]|^|\]|>)(\.?([-a-zA-Z0-9:+=~@,/_^\$\\\*]+\.{0,2})+)}, '\1\2') + .gsub(%r{([\[\s|()>.]|^|\]|>)(\.?([-a-zA-Z0-9:+=~@,/_^\$\\\*%]+\.{0,2})+)}, '\1\2') .gsub(/(<[-a-zA-Z0-9.]+>)/, '\1') else diff --git a/Documentation/rev-list-options.adoc b/Documentation/rev-list-options.adoc index d38875efdada38..e4214035815d8f 100644 --- a/Documentation/rev-list-options.adoc +++ b/Documentation/rev-list-options.adoc @@ -6,60 +6,60 @@ special notations explained in the description, additional commit limiting may be applied. Using more options generally further limits the output (e.g. -`--since=` limits to commits newer than ``, and using it +`--since=` limits to commits newer than __, and using it with `--grep=` further limits to commits whose log message -has a line that matches ``), unless otherwise noted. +has a line that matches __), unless otherwise noted. Note that these are applied before commit ordering and formatting options, such as `--reverse`. --:: --n :: ---max-count=:: - Limit the number of commits to output. +`-`:: +`-n `:: +`--max-count=`:: + Limit the output to __ commits. ---skip=:: - Skip 'number' commits before starting to show the commit output. +`--skip=`:: + Skip __ commits before starting to show the commit output. ---since=:: ---after=:: - Show commits more recent than a specific date. +`--since=`:: +`--after=`:: + Show commits more recent than __. ---since-as-filter=:: - Show all commits more recent than a specific date. This visits +`--since-as-filter=`:: + Show all commits more recent than __. This visits all commits in the range, rather than stopping at the first commit which - is older than a specific date. + is older than __. ---until=:: ---before=:: - Show commits older than a specific date. +`--until=`:: +`--before=`:: + Show commits older than __. ifdef::git-rev-list[] ---max-age=:: ---min-age=:: +`--max-age=`:: +`--min-age=`:: Limit the commits output to specified time range. endif::git-rev-list[] ---author=:: ---committer=:: +`--author=`:: +`--committer=`:: Limit the commits output to ones with author/committer - header lines that match the specified pattern (regular - expression). With more than one `--author=`, - commits whose author matches any of the given patterns are + header lines that match the __ regular + expression. With more than one `--author=`, + commits whose author matches any of the __ are chosen (similarly for multiple `--committer=`). ---grep-reflog=:: +`--grep-reflog=`:: Limit the commits output to ones with reflog entries that - match the specified pattern (regular expression). With + match the __ regular expression. With more than one `--grep-reflog`, commits whose reflog message matches any of the given patterns are chosen. It is an error to use this option unless `--walk-reflogs` is in use. ---grep=:: +`--grep=`:: Limit the commits output to ones with a log message that - matches the specified pattern (regular expression). With + matches the __ regular expression. With more than one `--grep=`, commits whose message - matches any of the given patterns are chosen (but see + matches any of the __ are chosen (but see `--all-match`). ifndef::git-rev-list[] + @@ -67,35 +67,35 @@ When `--notes` is in effect, the message from the notes is matched as if it were part of the log message. endif::git-rev-list[] ---all-match:: +`--all-match`:: Limit the commits output to ones that match all given `--grep`, instead of ones that match at least one. ---invert-grep:: +`--invert-grep`:: Limit the commits output to ones with a log message that do not - match the pattern specified with `--grep=`. + match the __ specified with `--grep=`. --i:: ---regexp-ignore-case:: +`-i`:: +`--regexp-ignore-case`:: Match the regular expression limiting patterns without regard to letter case. ---basic-regexp:: +`--basic-regexp`:: Consider the limiting patterns to be basic regular expressions; this is the default. --E:: ---extended-regexp:: +`-E`:: +`--extended-regexp`:: Consider the limiting patterns to be extended regular expressions instead of the default basic regular expressions. --F:: ---fixed-strings:: +`-F`:: +`--fixed-strings`:: Consider the limiting patterns to be fixed strings (don't interpret pattern as a regular expression). --P:: ---perl-regexp:: +`-P`:: +`--perl-regexp`:: Consider the limiting patterns to be Perl-compatible regular expressions. + @@ -103,20 +103,20 @@ Support for these types of regular expressions is an optional compile-time dependency. If Git wasn't compiled with support for them providing this option will cause it to die. ---remove-empty:: +`--remove-empty`:: Stop when a given path disappears from the tree. ---merges:: +`--merges`:: Print only merge commits. This is exactly the same as `--min-parents=2`. ---no-merges:: +`--no-merges`:: Do not print commits with more than one parent. This is exactly the same as `--max-parents=1`. ---min-parents=:: ---max-parents=:: ---no-min-parents:: ---no-max-parents:: +`--min-parents=`:: +`--max-parents=`:: +`--no-min-parents`:: +`--no-max-parents`:: Show only commits which have at least (or at most) that many parent commits. In particular, `--max-parents=1` is the same as `--no-merges`, `--min-parents=2` is the same as `--merges`. `--max-parents=0` @@ -126,7 +126,7 @@ providing this option will cause it to die. again. Equivalent forms are `--min-parents=0` (any commit has 0 or more parents) and `--max-parents=-1` (negative numbers denote no upper limit). ---first-parent:: +`--first-parent`:: When finding commits to include, follow only the first parent commit upon seeing a merge commit. This option can give a better overview when viewing the evolution of @@ -141,14 +141,14 @@ This option also changes default diff format for merge commits to `first-parent`, see `--diff-merges=first-parent` for details. endif::git-log[] ---exclude-first-parent-only:: +`--exclude-first-parent-only`:: When finding commits to exclude (with a '{caret}'), follow only the first parent commit upon seeing a merge commit. This can be used to find the set of changes in a topic branch from the point where it diverged from the remote branch, given that arbitrary merges can be valid topic branch changes. ---not:: +`--not`:: Reverses the meaning of the '{caret}' prefix (or lack thereof) for all following revision specifiers, up to the next `--not`. When used on the command line before --stdin, the revisions passed @@ -156,37 +156,37 @@ endif::git-log[] via standard input, the revisions passed on the command line will not be affected by it. ---all:: +`--all`:: Pretend as if all the refs in `refs/`, along with `HEAD`, are - listed on the command line as ''. + listed on the command line as __. ---branches[=]:: +`--branches[=]`:: Pretend as if all the refs in `refs/heads` are listed - on the command line as ''. If '' is given, limit - branches to ones matching given shell glob. If pattern lacks '?', + on the command line as __. If __ is given, limit + branches to ones matching given shell glob. If __ lacks '?', '{asterisk}', or '[', '/{asterisk}' at the end is implied. ---tags[=]:: +`--tags[=]`:: Pretend as if all the refs in `refs/tags` are listed - on the command line as ''. If '' is given, limit + on the command line as __. If __ is given, limit tags to ones matching given shell glob. If pattern lacks '?', '{asterisk}', or '[', '/{asterisk}' at the end is implied. ---remotes[=]:: +`--remotes[=]`:: Pretend as if all the refs in `refs/remotes` are listed - on the command line as ''. If '' is given, limit + on the command line as __. If __ is given, limit remote-tracking branches to ones matching given shell glob. If pattern lacks '?', '{asterisk}', or '[', '/{asterisk}' at the end is implied. ---glob=:: - Pretend as if all the refs matching shell glob '' - are listed on the command line as ''. Leading 'refs/', +`--glob=`:: + Pretend as if all the refs matching shell glob __ + are listed on the command line as __. Leading 'refs/', is automatically prepended if missing. If pattern lacks '?', '{asterisk}', or '[', '/{asterisk}' at the end is implied. ---exclude=:: +`--exclude=`:: - Do not include refs matching '' that the next `--all`, + Do not include refs matching __ that the next `--all`, `--branches`, `--tags`, `--remotes`, or `--glob` would otherwise consider. Repetitions of this option accumulate exclusion patterns up to the next `--all`, `--branches`, `--tags`, `--remotes`, or @@ -199,7 +199,7 @@ respectively, and they must begin with `refs/` when applied to `--glob` or `--all`. If a trailing '/{asterisk}' is intended, it must be given explicitly. ---exclude-hidden=[fetch|receive|uploadpack]:: +`--exclude-hidden=(fetch|receive|uploadpack)`:: Do not include refs that would be hidden by `git-fetch`, `git-receive-pack` or `git-upload-pack` by consulting the appropriate `fetch.hideRefs`, `receive.hideRefs` or `uploadpack.hideRefs` @@ -207,11 +207,11 @@ explicitly. linkgit:git-config[1]). This option affects the next pseudo-ref option `--all` or `--glob` and is cleared after processing them. ---reflog:: +`--reflog`:: Pretend as if all objects mentioned by reflogs are listed on the - command line as ``. + command line as __. ---alternate-refs:: +`--alternate-refs`:: Pretend as if all objects mentioned as ref tips of alternate repositories were listed on the command line. An alternate repository is any repository whose object directory is specified @@ -219,7 +219,7 @@ explicitly. be modified by `core.alternateRefsCommand`, etc. See linkgit:git-config[1]. ---single-worktree:: +`--single-worktree`:: By default, all working trees will be examined by the following options when there are more than one (see linkgit:git-worktree[1]): `--all`, `--reflog` and @@ -227,19 +227,19 @@ explicitly. This option forces them to examine the current working tree only. ---ignore-missing:: +`--ignore-missing`:: Upon seeing an invalid object name in the input, pretend as if the bad input was not given. ifndef::git-rev-list[] ---bisect:: +`--bisect`:: Pretend as if the bad bisection ref `refs/bisect/bad` was listed and as if it was followed by `--not` and the good bisection refs `refs/bisect/good-*` on the command line. endif::git-rev-list[] ---stdin:: +`--stdin`:: In addition to getting arguments from the command line, read them from standard input as well. This accepts commits and pseudo-options like `--all` and `--glob=`. When a `--` separator @@ -249,15 +249,15 @@ endif::git-rev-list[] influence any subsequent command line arguments. ifdef::git-rev-list[] ---quiet:: +`--quiet`:: Don't print anything to standard output. This form is primarily meant to allow the caller to test the exit status to see if a range of objects is fully connected (or not). It is faster than redirecting stdout to `/dev/null` as the output does not have to be formatted. ---disk-usage:: ---disk-usage=human:: +`--disk-usage`:: +`--disk-usage=human`:: Suppress normal output; instead, print the sum of the bytes used for on-disk storage by the selected commits or objects. This is equivalent to piping the output into `git cat-file @@ -269,11 +269,11 @@ ifdef::git-rev-list[] in human-readable string(e.g. 12.24 Kib, 3.50 Mib). endif::git-rev-list[] ---cherry-mark:: +`--cherry-mark`:: Like `--cherry-pick` (see below) but mark equivalent commits with `=` rather than omitting them, and inequivalent ones with `+`. ---cherry-pick:: +`--cherry-pick`:: Omit any commit that introduces the same change as another commit on the ``other side'' when the set of commits are limited with symmetric difference. @@ -286,8 +286,8 @@ cherry-picked from the other branch (for example, ``3rd on b'' may be cherry-picked from branch A). With this option, such pairs of commits are excluded from the output. ---left-only:: ---right-only:: +`--left-only`:: +`--right-only`:: List only commits on the respective side of a symmetric difference, i.e. only those which would be marked `<` resp. `>` by `--left-right`. @@ -298,20 +298,20 @@ commits from `B` which are in `A` or are patch-equivalent to a commit in More precisely, `--cherry-pick --right-only --no-merges` gives the exact list. ---cherry:: +`--cherry`:: A synonym for `--right-only --cherry-mark --no-merges`; useful to limit the output to the commits on our side and mark those that have been applied to the other side of a forked history with `git log --cherry upstream...mybranch`, similar to `git cherry upstream mybranch`. --g:: ---walk-reflogs:: +`-g`:: +`--walk-reflogs`:: Instead of walking the commit ancestry chain, walk reflog entries from the most recent one to older ones. When this option is used you cannot specify commits to - exclude (that is, '{caret}commit', 'commit1..commit2', - and 'commit1\...commit2' notations cannot be used). + exclude (that is, `^`, `..`, + and `...` notations cannot be used). + With `--pretty` format other than `oneline` and `reference` (for obvious reasons), this causes the output to have two extra lines of information @@ -340,29 +340,29 @@ See also linkgit:git-reflog[1]. + Under `--pretty=reference`, this information will not be shown at all. ---merge:: +`--merge`:: Show commits touching conflicted paths in the range `HEAD...`, where `` is the first existing pseudoref in `MERGE_HEAD`, `CHERRY_PICK_HEAD`, `REVERT_HEAD` or `REBASE_HEAD`. Only works when the index has unmerged entries. This option can be used to show relevant commits when resolving conflicts from a 3-way merge. ---boundary:: +`--boundary`:: Output excluded boundary commits. Boundary commits are prefixed with `-`. ifdef::git-rev-list[] ---use-bitmap-index:: +`--use-bitmap-index`:: Try to speed up the traversal using the pack bitmap index (if one is available). Note that when traversing with `--objects`, trees and blobs will not have their associated path printed. ---progress=
:: +`--progress=
`:: Show progress reports on stderr as objects are considered. The `
` text will be printed with each progress update. --z:: +`-z`:: Instead of being newline-delimited, each outputted object and its accompanying metadata is delimited using NUL bytes. Output is printed in the following form: @@ -397,56 +397,56 @@ is how to do it, as there are various strategies to simplify the history. The following options select the commits to be shown: -:: +``:: Commits modifying the given are selected. ---simplify-by-decoration:: +`--simplify-by-decoration`:: Commits that are referred by some branch or tag are selected. Note that extra commits can be shown to give a meaningful history. The following options affect the way the simplification is performed: -Default mode:: +`Default mode`:: Simplifies the history to the simplest history explaining the final state of the tree. Simplest because it prunes some side branches if the end result is the same (i.e. merging branches with the same content) ---show-pulls:: +`--show-pulls`:: Include all commits from the default mode, but also any merge commits that are not TREESAME to the first parent but are TREESAME to a later parent. This mode is helpful for showing the merge commits that "first introduced" a change to a branch. ---full-history:: +`--full-history`:: Same as the default mode, but does not prune some history. ---dense:: +`--dense`:: Only the selected commits are shown, plus some to have a meaningful history. ---sparse:: +`--sparse`:: All commits in the simplified history are shown. ---simplify-merges:: +`--simplify-merges`:: Additional option to `--full-history` to remove some needless merges from the resulting history, as there are no selected commits contributing to this merge. ---ancestry-path[=]:: - When given a range of commits to display (e.g. 'commit1..commit2' - or 'commit2 {caret}commit1'), and a commit in that range, +`--ancestry-path[=]`:: + When given a range of commits to display (e.g. `..` + or ` ^`), and a commit __ in that range, only display commits in that range - that are ancestors of , descendants of , or - itself. If no commit is specified, use 'commit1' (the - excluded part of the range) as . Can be passed multiple + that are ancestors of __, descendants of __, or + __ itself. If no commit is specified, use __ (the + excluded part of the range) as __. Can be passed multiple times; if so, a commit is included if it is any of the commits given or if it is an ancestor or descendant of one of them. A more detailed explanation follows. -Suppose you specified `foo` as the . We shall call commits +Suppose you specified `foo` as the __. We shall call commits that modify `foo` !TREESAME, and the rest TREESAME. (In a diff filtered for `foo`, they look different and equal, respectively.) @@ -466,22 +466,22 @@ The horizontal line of history A---Q is taken to be the first parent of each merge. The commits are: * `I` is the initial commit, in which `foo` exists with contents - ``asdf'', and a file `quux` exists with contents ``quux''. Initial + `asdf`, and a file `quux` exists with contents `quux`. Initial commits are compared to an empty tree, so `I` is !TREESAME. -* In `A`, `foo` contains just ``foo''. +* In `A`, `foo` contains just `foo`. * `B` contains the same change as `A`. Its merge `M` is trivial and hence TREESAME to all parents. -* `C` does not change `foo`, but its merge `N` changes it to ``foobar'', +* `C` does not change `foo`, but its merge `N` changes it to `foobar`, so it is not TREESAME to any parent. -* `D` sets `foo` to ``baz''. Its merge `O` combines the strings from - `N` and `D` to ``foobarbaz''; i.e., it is not TREESAME to any parent. +* `D` sets `foo` to `baz`. Its merge `O` combines the strings from + `N` and `D` to `foobarbaz`; i.e., it is not TREESAME to any parent. -* `E` changes `quux` to ``xyzzy'', and its merge `P` combines the - strings to ``quux xyzzy''. `P` is TREESAME to `O`, but not to `E`. +* `E` changes `quux` to `xyzzy`, and its merge `P` combines the + strings to `quux xyzzy`. `P` is TREESAME to `O`, but not to `E`. * `X` is an independent root commit that added a new file `side`, and `Y` modified it. `Y` is TREESAME to `X`. Its merge `Q` added `side` to `P`, and @@ -517,7 +517,7 @@ Parent/child relations are only visible with `--parents`, but that does not affect the commits selected in default mode, so we have shown the parent lines. ---full-history without parent rewriting:: +`--full-history` without parent rewriting:: This mode differs from the default in one point: always follow all parents of a merge, even if it is TREESAME to one of them. Even if more than one side of the merge has commits that are @@ -536,7 +536,7 @@ Note that without parent rewriting, it is not really possible to talk about the parent/child relationships between the commits, so we show them disconnected. ---full-history with parent rewriting:: +`--full-history` with parent rewriting:: Ordinary commits are only included if they are !TREESAME (though this can be changed, see `--sparse` below). + @@ -560,18 +560,18 @@ rewritten to contain `E`'s parent `I`. The same happened for `C` and In addition to the above settings, you can change whether TREESAME affects inclusion: ---dense:: +`--dense`:: Commits that are walked are included if they are not TREESAME to any parent. ---sparse:: +`--sparse`:: All commits that are walked are included. + Note that without `--full-history`, this still simplifies merges: if one of the parents is TREESAME, we follow only that one, so the other sides of the merge are never walked. ---simplify-merges:: +`--simplify-merges`:: First, build a history graph in the same way that `--full-history` with parent rewriting does (see above). + @@ -618,9 +618,9 @@ Note the major differences in `N`, `P`, and `Q` over `--full-history`: There is another simplification mode available: ---ancestry-path[=]:: +`--ancestry-path[=]`:: Limit the displayed commits to those which are an ancestor of - , or which are a descendant of , or are + __, or which are a descendant of __, or are __ itself. + As an example use case, consider the following commit history: @@ -636,15 +636,15 @@ As an example use case, consider the following commit history: A regular 'D..M' computes the set of commits that are ancestors of `M`, but excludes the ones that are ancestors of `D`. This is useful to see what happened to the history leading to `M` since `D`, in the sense -that ``what does `M` have that did not exist in `D`''. The result in this +that "what does `M` have that did not exist in `D`". The result in this example would be all the commits, except `A` and `B` (and `D` itself, of course). + When we want to find out what commits in `M` are contaminated with the bug introduced by `D` and need fixing, however, we might want to view -only the subset of 'D..M' that are actually descendants of `D`, i.e. +only the subset of `D..M` that are actually descendants of `D`, i.e. excluding `C` and `K`. This is exactly what the `--ancestry-path` -option does. Applied to the 'D..M' range, it results in: +option does. Applied to the `D..M` range, it results in: + ----------------------------------------------------------------------- E-------F @@ -655,7 +655,7 @@ option does. Applied to the 'D..M' range, it results in: ----------------------------------------------------------------------- + We can also use `--ancestry-path=D` instead of `--ancestry-path` which -means the same thing when applied to the 'D..M' range but is just more +means the same thing when applied to the `D..M` range but is just more explicit. + If we instead are interested in a given topic within this range, and all @@ -770,7 +770,7 @@ into the important branch. This commit may have information about why the change `X` came to override the changes from `A` and `B` in its commit message. ---show-pulls:: +`--show-pulls`:: In addition to the commits shown in the default history, show each merge commit that is not TREESAME to its first parent but is TREESAME to a later parent. @@ -819,7 +819,7 @@ ifdef::git-rev-list[] Bisection Helpers ~~~~~~~~~~~~~~~~~ ---bisect:: +`--bisect`:: Limit output to the one commit object which is roughly halfway between included and excluded commits. Note that the bad bisection ref `refs/bisect/bad` is added to the included commits (if it @@ -843,7 +843,7 @@ introduces a regression is thus reduced to a binary search: repeatedly generate and test new 'midpoint's until the commit chain is of length one. ---bisect-vars:: +`--bisect-vars`:: This calculates the same as `--bisect`, except that refs in `refs/bisect/` are not used, and except that this outputs text ready to be eval'ed by the shell. These lines will assign the @@ -855,7 +855,7 @@ one. `bisect_bad`, and the number of commits we are bisecting right now to `bisect_all`. ---bisect-all:: +`--bisect-all`:: This outputs all the commit objects between the included and excluded commits, ordered by their distance to the included and excluded commits. Refs in `refs/bisect/` are not used. The farthest @@ -878,15 +878,15 @@ Commit Ordering By default, the commits are shown in reverse chronological order. ---date-order:: +`--date-order`:: Show no parents before all of its children are shown, but otherwise show commits in the commit timestamp order. ---author-date-order:: +`--author-date-order`:: Show no parents before all of its children are shown, but otherwise show commits in the author timestamp order. ---topo-order:: +`--topo-order`:: Show no parents before all of its children are shown, and avoid showing commits on multiple lines of history intermixed. @@ -910,8 +910,8 @@ With `--topo-order`, they would show 8 6 5 3 7 4 2 1 (or 8 7 4 2 6 5 avoid showing the commits from two parallel development track mixed together. ---reverse:: - Output the commits chosen to be shown (see Commit Limiting +`--reverse`:: + Output the commits chosen to be shown (see 'Commit Limiting' section above) in reverse order. Cannot be combined with `--walk-reflogs`. endif::git-shortlog[] @@ -923,39 +923,39 @@ Object Traversal These options are mostly targeted for packing of Git repositories. ifdef::git-rev-list[] ---objects:: +`--objects`:: Print the object IDs of any object referenced by the listed - commits. `--objects foo ^bar` thus means ``send me + commits. `--objects foo ^bar` thus means "send me all object IDs which I need to download if I have the commit - object _bar_ but not _foo_''. See also `--object-names` below. + object `bar` but not `foo`". See also `--object-names` below. ---in-commit-order:: +`--in-commit-order`:: Print tree and blob ids in order of the commits. The tree and blob ids are printed after they are first referenced by a commit. ---objects-edge:: +`--objects-edge`:: Similar to `--objects`, but also print the IDs of excluded - commits prefixed with a ``-'' character. This is used by + commits prefixed with a "`-`" character. This is used by linkgit:git-pack-objects[1] to build a ``thin'' pack, which records objects in deltified form based on objects contained in these excluded commits to reduce network traffic. ---objects-edge-aggressive:: +`--objects-edge-aggressive`:: Similar to `--objects-edge`, but it tries harder to find excluded commits at the cost of increased time. This is used instead of `--objects-edge` to build ``thin'' packs for shallow repositories. ---indexed-objects:: +`--indexed-objects`:: Pretend as if all trees and blobs used by the index are listed on the command line. Note that you probably want to use `--objects`, too. ---unpacked:: +`--unpacked`:: Only useful with `--objects`; print the object IDs that are not in packs. ---object-names:: +`--object-names`:: Only useful with `--objects`; print the names of the object IDs that are found. This is the default behavior. Note that the "name" of each object is ambiguous, and mostly intended as a @@ -964,52 +964,52 @@ ifdef::git-rev-list[] to remove newlines; and if an object would appear multiple times with different names, only one name is shown. ---no-object-names:: +`--no-object-names`:: Only useful with `--objects`; does not print the names of the object IDs that are found. This inverts `--object-names`. This flag allows the output to be more easily parsed by commands such as linkgit:git-cat-file[1]. ---filter=:: +`--filter=`:: Only useful with one of the `--objects*`; omits objects (usually - blobs) from the list of printed objects. The '' + blobs) from the list of printed objects. The __ may be one of the following: + -The form '--filter=blob:none' omits all blobs. +The form `--filter=blob:none` omits all blobs. + -The form '--filter=blob:limit=[kmg]' omits blobs of size at least n -bytes or units. n may be zero. The suffixes k, m, and g can be used -to name units in KiB, MiB, or GiB. For example, 'blob:limit=1k' +The form `--filter=blob:limit=[kmg]` omits blobs of size at least __ +bytes or units. __ may be zero. The suffixes `k`, `m`, and `g` can be used +to name units in KiB, MiB, or GiB. For example, `blob:limit=1k` is the same as 'blob:limit=1024'. + -The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects +The form `--filter=object:type=(tag|commit|tree|blob)` omits all objects which are not of the requested type. + -The form '--filter=sparse:oid=' uses a sparse-checkout -specification contained in the blob (or blob-expression) '' +The form `--filter=sparse:oid=` uses a sparse-checkout +specification contained in the blob (or blob-expression) __ to omit blobs that would not be required for a sparse checkout on the requested refs. + -The form '--filter=tree:' omits all blobs and trees whose depth -from the root tree is >= (minimum depth if an object is located -at multiple depths in the commits traversed). =0 will not include +The form `--filter=tree:` omits all blobs and trees whose depth +from the root tree is >= __ (minimum depth if an object is located +at multiple depths in the commits traversed). __=0 will not include any trees or blobs unless included explicitly in the command-line (or -standard input when --stdin is used). =1 will include only the +standard input when `--stdin` is used). __=1 will include only the tree and blobs which are referenced directly by a commit reachable from - or an explicitly-given object. =2 is like =1 +__ or an explicitly-given object. __=2 is like =1 while also including trees and blobs one more level removed from an explicitly-given commit or tree. + -Note that the form '--filter=sparse:path=' that wants to read +Note that the form `--filter=sparse:path=` that wants to read from an arbitrary path on the filesystem has been dropped for security reasons. + -Multiple '--filter=' flags can be specified to combine filters. Only +Multiple `--filter=` flags can be specified to combine filters. Only objects which are accepted by every filter are included. + -The form '--filter=combine:++...' can also be +The form `--filter=combine:++...` can also be used to combined several filters, but this is harder than just repeating -the '--filter' flag and is usually not necessary. Filters are joined by +the `--filter` flag and is usually not necessary. Filters are joined by '{plus}' and individual filters are %-encoded (i.e. URL-encoded). Besides the '{plus}' and '%' characters, the following characters are reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+'`+ @@ -1017,52 +1017,52 @@ as well as all characters with ASCII code <= `0x20`, which includes space and newline. + Other arbitrary characters can also be encoded. For instance, -'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are +`combine:tree:3+blob:none` and `combine:tree%3A3+blob%3Anone` are equivalent. ---no-filter:: +`--no-filter`:: Turn off any previous `--filter=` argument. ---filter-provided-objects:: +`--filter-provided-objects`:: Filter the list of explicitly provided objects, which would otherwise always be printed even if they did not match any of the filters. Only useful with `--filter=`. ---filter-print-omitted:: +`--filter-print-omitted`:: Only useful with `--filter=`; prints a list of the objects omitted by the filter. Object IDs are prefixed with a ``~'' character. ---missing=:: +`--missing=`:: A debug option to help with future "partial clone" development. This option specifies how missing objects are handled. + -The form '--missing=error' requests that rev-list stop with an error if +The form `--missing=error` requests that rev-list stop with an error if a missing object is encountered. This is the default action. + -The form '--missing=allow-any' will allow object traversal to continue +The form `--missing=allow-any` will allow object traversal to continue if a missing object is encountered. Missing objects will silently be omitted from the results. + -The form '--missing=allow-promisor' is like 'allow-any', but will only +The form `--missing=allow-promisor` is like `allow-any`, but will only allow object traversal to continue for EXPECTED promisor missing objects. Unexpected missing objects will raise an error. + -The form '--missing=print' is like 'allow-any', but will also print a +The form `--missing=print` is like `allow-any`, but will also print a list of the missing objects. Object IDs are prefixed with a ``?'' character. + -The form '--missing=print-info' is like 'print', but will also print additional +The form `--missing=print-info` is like `print`, but will also print additional information about the missing object inferred from its containing object. The information is all printed on the same line with the missing object ID in the form: `? [=]...`. The `=` pairs containing -additional information are separated from each other by a SP. The value is -encoded in a token specific fashion, but SP or LF contained in value are always +additional information are separated from each other by a _SP_. The value is +encoded in a token specific fashion, but _SP_ or _LF_ contained in value are always expected to be represented in such a way that the resulting encoded value does not have either of these two problematic bytes. Each `=` may be one of the following: + -- * The `path=` shows the path of the missing object inferred from a - containing object. A path containing SP or special characters is enclosed in + containing object. A path containing _SP_ or special characters is enclosed in double-quotes in the C style as needed. + * The `type=` shows the type of the missing object inferred from a @@ -1073,7 +1073,7 @@ If some tips passed to the traversal are missing, they will be considered as missing too, and the traversal will ignore them. In case we cannot get their Object ID though, an error will be raised. ---exclude-promisor-objects:: +`--exclude-promisor-objects`:: (For internal use only.) Prefilter object traversal at promisor boundary. This is used with partial clone. This is stronger than `--missing=allow-promisor` because it limits the @@ -1081,7 +1081,7 @@ we cannot get their Object ID though, an error will be raised. objects. endif::git-rev-list[] ---no-walk[=(sorted|unsorted)]:: +`--no-walk[=(sorted|unsorted)]`:: Only show the given commits, but do not traverse their ancestors. This has no effect if a range is specified. If the argument `unsorted` is given, the commits are shown in the order they were @@ -1090,7 +1090,7 @@ endif::git-rev-list[] by commit time. Cannot be combined with `--graph`. ---do-walk:: +`--do-walk`:: Overrides a previous `--no-walk`. endif::git-shortlog[] @@ -1106,10 +1106,10 @@ endif::git-rev-list[] include::pretty-options.adoc[] ---relative-date:: +`--relative-date`:: Synonym for `--date=relative`. ---date=:: +`--date=`:: Only takes effect for dates shown in human-readable format, such as when using `--pretty`. `log.date` config variable sets a default value for the log command's `--date` option. By default, dates @@ -1159,12 +1159,12 @@ omitted. 1970). As with `--raw`, this is always in UTC and therefore `-local` has no effect. -`--date=format:...` feeds the format `...` to your system `strftime`, -except for %s, %z, and %Z, which are handled internally. +`--date=format:` feeds the __ to your system `strftime`, +except for `%s`, `%z`, and `%Z`, which are handled internally. Use `--date=format:%c` to show the date in your system locale's -preferred format. See the `strftime` manual for a complete list of +preferred format. See the `strftime`(3) manual for a complete list of format placeholders. When using `-local`, the correct syntax is -`--date=format-local:...`. +`--date=format-local:`. `--date=default` is the default format, and is based on ctime(3) output. It shows a single line with three-letter day of the week, @@ -1174,33 +1174,33 @@ the local time zone is used, e.g. `Thu Jan 1 00:00:00 1970 +0000`. -- ifdef::git-rev-list[] ---header:: +`--header`:: Print the contents of the commit in raw-format; each record is separated with a NUL character. ---no-commit-header:: +`--no-commit-header`:: Suppress the header line containing "commit" and the object ID printed before the specified format. This has no effect on the built-in formats; only custom formats are affected. ---commit-header:: +`--commit-header`:: Overrides a previous `--no-commit-header`. endif::git-rev-list[] ---parents:: +`--parents`:: Print also the parents of the commit (in the form "commit parent..."). Also enables parent rewriting, see 'History Simplification' above. ---children:: +`--children`:: Print also the children of the commit (in the form "commit child..."). Also enables parent rewriting, see 'History Simplification' above. ifdef::git-rev-list[] ---timestamp:: +`--timestamp`:: Print the raw commit timestamp. endif::git-rev-list[] ---left-right:: +`--left-right`:: Mark which side of a symmetric difference a commit is reachable from. Commits from the left side are prefixed with `<` and those from the right with `>`. If combined with `--boundary`, those @@ -1229,7 +1229,7 @@ you would get an output like this: -xxxxxxx... 1st on a ----------------------------------------------------------------------- ---graph:: +`--graph`:: Draw a text-based graphical representation of the commit history on the left hand side of the output. This may cause extra lines to be printed in between commits, in order for the graph history @@ -1241,15 +1241,15 @@ This enables parent rewriting, see 'History Simplification' above. This implies the `--topo-order` option by default, but the `--date-order` option may also be specified. ---show-linear-break[=]:: - When --graph is not used, all history branches are flattened +`--show-linear-break[=]`:: + When `--graph` is not used, all history branches are flattened which can make it hard to see that the two consecutive commits do not belong to a linear branch. This option puts a barrier - in between them in that case. If `` is specified, it + in between them in that case. If __ is specified, it is the string that will be shown instead of the default one. ifdef::git-rev-list[] ---count:: +`--count`:: Print a number stating how many commits would have been listed, and suppress all other output. When used together with `--left-right`, instead print the counts for left and From 06db6a3c4a9e23e575f9e5e7f812358deaf11834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:29 +0000 Subject: [PATCH 26/42] doc: git-log: convert pretty options to new doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use __ instead of in the description - Use `backticks` for keywords and more complex option descriptions. The new rendering engine will apply synopsis rules to these spans. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/pretty-options.adoc | 71 ++++++++++++++++--------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/Documentation/pretty-options.adoc b/Documentation/pretty-options.adoc index 23888cd612c9fb..388b245a8a68d4 100644 --- a/Documentation/pretty-options.adoc +++ b/Documentation/pretty-options.adoc @@ -1,38 +1,38 @@ ---pretty[=]:: ---format=:: +`--pretty[=]`:: +`--format=`:: Pretty-print the contents of the commit logs in a given format, - where '' can be one of 'oneline', 'short', 'medium', - 'full', 'fuller', 'reference', 'email', 'raw', 'format:' - and 'tformat:'. When '' is none of the above, - and has '%placeholder' in it, it acts as if - '--pretty=tformat:' were given. + where '' can be one of `oneline`, `short`, `medium`, + `full`, `fuller`, `reference`, `email`, `raw`, `format:` + and `tformat:`. When __ is none of the above, + and has `%` in it, it acts as if + `--pretty=tformat:` were given. + See the "PRETTY FORMATS" section for some additional details for each -format. When '=' part is omitted, it defaults to 'medium'. +format. When `=` part is omitted, it defaults to `medium`. + -Note: you can specify the default pretty format in the repository +NOTE: you can specify the default pretty format in the repository configuration (see linkgit:git-config[1]). ---abbrev-commit:: +`--abbrev-commit`:: Instead of showing the full 40-byte hexadecimal commit object name, show a prefix that names the object uniquely. - "--abbrev=" (which also modifies diff output, if it is displayed) + `--abbrev=` (which also modifies diff output, if it is displayed) option can be used to specify the minimum length of the prefix. + -This should make "--pretty=oneline" a whole lot more readable for +This should make `--pretty=oneline` a whole lot more readable for people using 80-column terminals. ---no-abbrev-commit:: +`--no-abbrev-commit`:: Show the full 40-byte hexadecimal commit object name. This negates `--abbrev-commit`, either explicit or implied by other options such - as "--oneline". It also overrides the `log.abbrevCommit` variable. + as `--oneline`. It also overrides the `log.abbrevCommit` variable. ---oneline:: - This is a shorthand for "--pretty=oneline --abbrev-commit" +`--oneline`:: + This is a shorthand for `--pretty=oneline --abbrev-commit` used together. ---encoding=:: +`--encoding=`:: Commit objects record the character encoding used for the log message in their encoding header; this option can be used to tell the command to re-code the commit log message in the encoding @@ -44,22 +44,22 @@ people using 80-column terminals. to convert the commit, we will quietly output the original object verbatim. ---expand-tabs=:: ---expand-tabs:: ---no-expand-tabs:: +`--expand-tabs=`:: +`--expand-tabs`:: +`--no-expand-tabs`:: Perform a tab expansion (replace each tab with enough spaces - to fill to the next display column that is a multiple of '') + to fill to the next display column that is a multiple of __) in the log message before showing it in the output. `--expand-tabs` is a short-hand for `--expand-tabs=8`, and `--no-expand-tabs` is a short-hand for `--expand-tabs=0`, which disables tab expansion. + By default, tabs are expanded in pretty formats that indent the log -message by 4 spaces (i.e. 'medium', which is the default, 'full', -and 'fuller'). +message by 4 spaces (i.e. `medium`, which is the default, `full`, +and `fuller`). ifndef::git-rev-list[] ---notes[=]:: +`--notes[=]`:: Show the notes (see linkgit:git-notes[1]) that annotate the commit, when showing the commit log message. This is the default for `git log`, `git show` and `git whatchanged` commands when @@ -75,28 +75,29 @@ to display. The ref can specify the full refname when it begins with `refs/notes/`; when it begins with `notes/`, `refs/` and otherwise `refs/notes/` is prefixed to form the full name of the ref. + -Multiple --notes options can be combined to control which notes are -being displayed. Examples: "--notes=foo" will show only notes from -"refs/notes/foo"; "--notes=foo --notes" will show both notes from +Multiple `--notes` options can be combined to control which notes are +being displayed. Examples: "`--notes=foo`" will show only notes from +`refs/notes/foo`; "`--notes=foo --notes`" will show both notes from "refs/notes/foo" and from the default notes ref(s). ---no-notes:: +`--no-notes`:: Do not show notes. This negates the above `--notes` option, by resetting the list of notes refs from which notes are shown. Options are parsed in the order given on the command line, so e.g. - "--notes --notes=foo --no-notes --notes=bar" will only show notes - from "refs/notes/bar". + "`--notes --notes=foo --no-notes --notes=bar`" will only show notes + from `refs/notes/bar`. ---show-notes-by-default:: +`--show-notes-by-default`:: Show the default notes unless options for displaying specific notes are given. ---show-notes[=]:: ---[no-]standard-notes:: - These options are deprecated. Use the above --notes/--no-notes +`--show-notes[=]`:: +`--standard-notes`:: +`--no-standard-notes`:: + These options are deprecated. Use the above `--notes`/`--no-notes` options instead. endif::git-rev-list[] ---show-signature:: +`--show-signature`:: Check the validity of a signed commit object by passing the signature to `gpg --verify` and show the output. From ca484a90e27c79083ecca3adf2bc8d3c42a5f006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Mon, 7 Jul 2025 18:53:30 +0000 Subject: [PATCH 27/42] doc: git-log: convert pretty formats to new doc format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use __ instead of in the description - Use `backticks` for keywords and more complex option descriptions. The new rendering engine will apply synopsis rules to these spans. For all the formats in the form of %(foo), the formatting needs to be heavier because we not want the parentheses to be rendered as syntax elements,but as keywords, i.e. we need to circumvent the syntax highlighting of synopsis. In this particular case, this requires the heavy escaping of the parts that contain parentheses with ++. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/pretty-formats.adoc | 283 +++++++++++++++--------------- 1 file changed, 143 insertions(+), 140 deletions(-) diff --git a/Documentation/pretty-formats.adoc b/Documentation/pretty-formats.adoc index 07475de8c33702..9ed0417fc811e5 100644 --- a/Documentation/pretty-formats.adoc +++ b/Documentation/pretty-formats.adoc @@ -2,11 +2,11 @@ PRETTY FORMATS -------------- If the commit is a merge, and if the pretty-format -is not 'oneline', 'email' or 'raw', an additional line is -inserted before the 'Author:' line. This line begins with +is not `oneline`, `email` or `raw`, an additional line is +inserted before the `Author:` line. This line begins with "Merge: " and the hashes of ancestral commits are printed, separated by spaces. Note that the listed commits may not -necessarily be the list of the *direct* parent commits if you +necessarily be the list of the 'direct' parent commits if you have limited your view of history: for example, if you are only interested in changes related to a certain directory or file. @@ -14,24 +14,24 @@ file. There are several built-in formats, and you can define additional formats by setting a pretty. config option to either another format name, or a -'format:' string, as described below (see +`format:` string, as described below (see linkgit:git-config[1]). Here are the details of the built-in formats: -* 'oneline' +* `oneline` + This is designed to be as compact as possible. -* 'short' +* `short` commit Author: -* 'medium' +* `medium` commit Author: @@ -41,7 +41,7 @@ This is designed to be as compact as possible. -* 'full' +* `full` commit Author: @@ -51,7 +51,7 @@ This is designed to be as compact as possible. -* 'fuller' +* `fuller` commit Author: @@ -63,18 +63,18 @@ This is designed to be as compact as possible. -* 'reference' +* `reference` (, ) + This format is used to refer to another commit in a commit message and -is the same as `--pretty='format:%C(auto)%h (%s, %ad)'`. By default, +is the same as ++--pretty=\'format:%C(auto)%h (%s, %ad)'++. By default, the date is formatted with `--date=short` unless another `--date` option is explicitly specified. As with any `format:` with format placeholders, its output is not affected by other options like `--decorate` and `--walk-reflogs`. -* 'email' +* `email` From From: @@ -83,30 +83,30 @@ placeholders, its output is not affected by other options like -* 'mboxrd' +* `mboxrd` + -Like 'email', but lines in the commit message starting with "From " +Like `email`, but lines in the commit message starting with "From " (preceded by zero or more ">") are quoted with ">" so they aren't confused as starting a new commit. -* 'raw' +* `raw` + -The 'raw' format shows the entire commit exactly as +The `raw` format shows the entire commit exactly as stored in the commit object. Notably, the hashes are -displayed in full, regardless of whether --abbrev or ---no-abbrev are used, and 'parents' information show the +displayed in full, regardless of whether `--abbrev` or +`--no-abbrev` are used, and 'parents' information show the true parent commits, without taking grafts or history simplification into account. Note that this format affects the way commits are displayed, but not the way the diff is shown e.g. with `git log --raw`. To get full object names in a raw diff format, use `--no-abbrev`. -* 'format:' +* `format:` + -The 'format:' format allows you to specify which information +The `format:` format allows you to specify which information you want to show. It works a little bit like printf format, -with the notable exception that you get a newline with '%n' -instead of '\n'. +with the notable exception that you get a newline with `%n` +instead of `\n`. + E.g, 'format:"The author of %h was %an, %ar%nThe title was >>%s<<%n"' would show something like this: @@ -120,158 +120,161 @@ The title was >>t4119: test autocomputing -p for traditional diff input.<< The placeholders are: - Placeholders that expand to a single literal character: -'%n':: newline -'%%':: a raw '%' -'%x00':: '%x' followed by two hexadecimal digits is replaced with a +++%n++:: newline +++%%++:: a raw ++%++ +++%x00++:: ++%x++ followed by two hexadecimal digits is replaced with a byte with the hexadecimal digits' value (we will call this "literal formatting code" in the rest of this document). - Placeholders that affect formatting of later placeholders: -'%Cred':: switch color to red -'%Cgreen':: switch color to green -'%Cblue':: switch color to blue -'%Creset':: reset color -'%C(...)':: color specification, as described under Values in the +++%Cred++:: switch color to red +++%Cgreen++:: switch color to green +++%Cblue++:: switch color to blue +++%Creset++:: reset color +++%C(++__++)++:: color specification, as described under Values in the "CONFIGURATION FILE" section of linkgit:git-config[1]. By default, colors are shown only when enabled for log output (by `color.diff`, `color.ui`, or `--color`, and respecting the `auto` settings of the former if we are going to a - terminal). `%C(auto,...)` is accepted as a historical - synonym for the default (e.g., `%C(auto,red)`). Specifying - `%C(always,...)` will show the colors even when color is + terminal). ++%C(auto,++__++)++ is accepted as a historical + synonym for the default (e.g., ++%C(auto,red)++). Specifying + ++%C(always,++__++)++ will show the colors even when color is not otherwise enabled (though consider just using - `--color=always` to enable color for the whole output, + `--color=always` to enable color for the whole output, including this format and anything else git might color). - `auto` alone (i.e. `%C(auto)`) will turn on auto coloring + `auto` alone (i.e. ++%C(auto)++) will turn on auto coloring on the next placeholders until the color is switched again. -'%m':: left (`<`), right (`>`) or boundary (`-`) mark -'%w([[,[,]]])':: switch line wrapping, like the -w option of +++%m++:: left (`<`), right (`>`) or boundary (`-`) mark +++%w(++`[[,[,]]]`++)++:: switch line wrapping, like the `-w` option of linkgit:git-shortlog[1]. -'%<( [,trunc|ltrunc|mtrunc])':: make the next placeholder take at +++%<(++`[,(trunc|ltrunc|mtrunc)]`++)++:: make the next placeholder take at least N column widths, padding spaces on the right if necessary. Optionally - truncate (with ellipsis '..') at the left (ltrunc) `..ft`, + truncate (with ellipsis `..`) at the left (ltrunc) `..ft`, the middle (mtrunc) `mi..le`, or the end (trunc) `rig..`, if the output is longer than - N columns. + __ columns. Note 1: that truncating - only works correctly with N >= 2. - Note 2: spaces around the N and M (see below) + only works correctly with __ >= 2. + Note 2: spaces around the __ and __ (see below) values are optional. Note 3: Emojis and other wide characters will take two display columns, which may over-run column boundaries. Note 4: decomposed character combining marks may be misplaced at padding boundaries. -'%<|( )':: make the next placeholder take at least until Mth +++%<|(++__ ++)++:: make the next placeholder take at least until __ th display column, padding spaces on the right if necessary. - Use negative M values for column positions measured + Use negative __ values for column positions measured from the right hand edge of the terminal window. -'%>( )', '%>|( )':: similar to '%<( )', '%<|( )' respectively, +++%>(++__++)++:: +++%>|(++__++)++:: similar to ++%<(++__++)++, ++%<|(++__++)++ respectively, but padding spaces on the left -'%>>( )', '%>>|( )':: similar to '%>( )', '%>|( )' +++%>>(++__++)++:: +++%>>|(++__++)++:: similar to ++%>(++__++)++, ++%>|(++__++)++ respectively, except that if the next placeholder takes more spaces than given and there are spaces on its left, use those spaces -'%><( )', '%><|( )':: similar to '%<( )', '%<|( )' +++%><(++__++)++:: +++%><|(++__++)++:: similar to ++%<(++__++)++, ++%<|(++__++)++ respectively, but padding both sides (i.e. the text is centered) - Placeholders that expand to information extracted from the commit: -'%H':: commit hash -'%h':: abbreviated commit hash -'%T':: tree hash -'%t':: abbreviated tree hash -'%P':: parent hashes -'%p':: abbreviated parent hashes -'%an':: author name -'%aN':: author name (respecting .mailmap, see linkgit:git-shortlog[1] ++%H+:: commit hash ++%h+:: abbreviated commit hash ++%T+:: tree hash ++%t+:: abbreviated tree hash ++%P+:: parent hashes ++%p+:: abbreviated parent hashes ++%an+:: author name ++%aN+:: author name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) -'%ae':: author email -'%aE':: author email (respecting .mailmap, see linkgit:git-shortlog[1] ++%ae+:: author email ++%aE+:: author email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) -'%al':: author email local-part (the part before the '@' sign) -'%aL':: author local-part (see '%al') respecting .mailmap, see ++%al+:: author email local-part (the part before the `@` sign) ++%aL+:: author local-part (see +%al+) respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) -'%ad':: author date (format respects --date= option) -'%aD':: author date, RFC2822 style -'%ar':: author date, relative -'%at':: author date, UNIX timestamp -'%ai':: author date, ISO 8601-like format -'%aI':: author date, strict ISO 8601 format -'%as':: author date, short format (`YYYY-MM-DD`) -'%ah':: author date, human style (like the `--date=human` option of ++%ad+:: author date (format respects --date= option) ++%aD+:: author date, RFC2822 style ++%ar+:: author date, relative ++%at+:: author date, UNIX timestamp ++%ai+:: author date, ISO 8601-like format ++%aI+:: author date, strict ISO 8601 format ++%as+:: author date, short format (`YYYY-MM-DD`) ++%ah+:: author date, human style (like the `--date=human` option of linkgit:git-rev-list[1]) -'%cn':: committer name -'%cN':: committer name (respecting .mailmap, see ++%cn+:: committer name ++%cN+:: committer name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) -'%ce':: committer email -'%cE':: committer email (respecting .mailmap, see ++%ce+:: committer email ++%cE+:: committer email (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) -'%cl':: committer email local-part (the part before the '@' sign) -'%cL':: committer local-part (see '%cl') respecting .mailmap, see ++%cl+:: committer email local-part (the part before the `@` sign) ++%cL+:: committer local-part (see +%cl+) respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) -'%cd':: committer date (format respects --date= option) -'%cD':: committer date, RFC2822 style -'%cr':: committer date, relative -'%ct':: committer date, UNIX timestamp -'%ci':: committer date, ISO 8601-like format -'%cI':: committer date, strict ISO 8601 format -'%cs':: committer date, short format (`YYYY-MM-DD`) -'%ch':: committer date, human style (like the `--date=human` option of ++%cd+:: committer date (format respects --date= option) ++%cD+:: committer date, RFC2822 style ++%cr+:: committer date, relative ++%ct+:: committer date, UNIX timestamp ++%ci+:: committer date, ISO 8601-like format ++%cI+:: committer date, strict ISO 8601 format ++%cs+:: committer date, short format (`YYYY-MM-DD`) ++%ch+:: committer date, human style (like the `--date=human` option of linkgit:git-rev-list[1]) -'%d':: ref names, like the --decorate option of linkgit:git-log[1] -'%D':: ref names without the " (", ")" wrapping. -'%(decorate[:])':: ++%d+:: ref names, like the --decorate option of linkgit:git-log[1] ++%D+:: ref names without the " (", ")" wrapping. +++%(decorate++`[: