From cbc1d8e26547bcd2dac89328bff720efca6e1617 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 25 Apr 2025 08:38:26 -0700 Subject: [PATCH 01/18] ci: update the message for unavailble third-party software An earlier fix added an extra message immediately after failing to download a third-party package. But near the end of the script, their availability is checked again and given a message. Remove the new ones added with a recent fix, as they are redundant. If we were to add more places to download these software (e.g. for other platforms we currently do not download them on), the existing warnning near the end of the script will also trigger. While at it, as Dscho suggests, rewrite the WARNING: label on the warning message to ::warning::, which presumably should be shown a bit more prominently in the CI summary. Suggested-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- ci/install-dependencies.sh | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index e51304c3b0eed2..be20271d3cbc78 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -71,7 +71,6 @@ ubuntu-*|i386/ubuntu-*|debian-*) chmod a+x "$CUSTOM_PATH/p4d" "$CUSTOM_PATH/p4" || { rm -f "$CUSTOM_PATH/p4" rm -f "$CUSTOM_PATH/p4d" - echo >&2 "P4 download (optional) failed" } wget --quiet \ @@ -79,16 +78,12 @@ ubuntu-*|i386/ubuntu-*|debian-*) tar -xzf "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" \ -C "$CUSTOM_PATH" --strip-components=1 \ "git-lfs-$LINUX_GIT_LFS_VERSION/git-lfs" && - rm "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" || { - rm -f "$CUSTOM_PATH/git-lfs" - echo >&2 "LFS download (optional) failed" - } + rm "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" || + rm -f "$CUSTOM_PATH/git-lfs" wget --quiet "$JGITWHENCE" --output-document="$CUSTOM_PATH/jgit" && - chmod a+x "$CUSTOM_PATH/jgit" || { - rm -f "$CUSTOM_PATH/jgit" - echo >&2 "JGit download (optional) failed" - } + chmod a+x "$CUSTOM_PATH/jgit" || + rm -f "$CUSTOM_PATH/jgit" ;; esac ;; @@ -151,7 +146,7 @@ then echo "$(tput setaf 6)Perforce Client Version$(tput sgr0)" p4 -V else - echo >&2 "WARNING: perforce wasn't installed, see above for clues why" + echo >&2 "::warning:: perforce wasn't installed, see above for clues why" fi if type git-lfs >/dev/null 2>&1 @@ -159,7 +154,7 @@ then echo "$(tput setaf 6)Git-LFS Version$(tput sgr0)" git-lfs version else - echo >&2 "WARNING: git-lfs wasn't installed, see above for clues why" + echo >&2 "::warning:: git-lfs wasn't installed, see above for clues why" fi if type jgit >/dev/null 2>&1 @@ -167,7 +162,7 @@ then echo "$(tput setaf 6)JGit Version$(tput sgr0)" jgit version else - echo >&2 "WARNING: JGit wasn't installed, see above for clues why" + echo >&2 "::warning:: JGit wasn't installed, see above for clues why" fi end_group "Install dependencies" From 956acbefbd5464748930d3f96a4fcaf43feb4291 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 25 Apr 2025 08:38:27 -0700 Subject: [PATCH 02/18] ci: download JGit from maven, not eclipse.org As Matthias Sohn, JGit maintainer, recommends, update the JGit download link from repo.eclipse.org to a one in maven.org Helped-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- ci/install-dependencies.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index be20271d3cbc78..d9004ab24f036c 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -9,7 +9,7 @@ begin_group "Install dependencies" P4WHENCE=https://cdist2.perforce.com/perforce/r23.2 LFSWHENCE=https://github.com/github/git-lfs/releases/download/v$LINUX_GIT_LFS_VERSION -JGITWHENCE=https://repo.eclipse.org/content/groups/releases//org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh +JGITWHENCE=https://repo1.maven.org/maven2/org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh # Make sudo a no-op and execute the command directly when running as root. # While using sudo would be fine on most platforms when we are root already, From 2cfe0541e711be39a47d093cba608c0700d027ec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 25 Apr 2025 16:11:28 +0200 Subject: [PATCH 03/18] meson: report detected runtime executable paths Git needs to know about a couple of executable paths to pick at runtime. This includes the system shell, but may also optionally include the Perl and Python interpreters. Meson detects the location of these paths automatically via `find_program()`, which does a lookup via the `PATH` environment variable. As such, it may not be immediately obvious to the developer which paths have been autodetected. Improve this by exposing runtime executable paths at setup time. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/meson.build b/meson.build index c47cb79af0815a..a180c66ee69f4c 100644 --- a/meson.build +++ b/meson.build @@ -2080,3 +2080,9 @@ summary({ 'sha256': sha256_backend, 'zlib': zlib_backend, }, section: 'Backends') + +summary({ + 'perl': target_perl, + 'python': target_python, + 'shell': target_shell, +}, section: 'Runtime executable paths') From 4cba20fbdc68f4f968defc796647b103b72c9609 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 25 Apr 2025 16:11:29 +0200 Subject: [PATCH 04/18] meson: prefer shell at "/bin/sh" Meson detects the path of the target shell via `find_program("sh")`, which essentially does a lookup via `PATH`. This may easily lead to a subtly-broken Git distribution when the build host has its shell in a location that the target host doesn't know about. Fix the issue by appending "/bin" to the custom program path, which causes us to prefer "/bin/sh" over a `PATH`-based lookup. While "/bin/sh" isn't standardized, this path tends to work alright on Linux and BSD distributions. Furthermore, "/bin/sh" is also the path we pick in our Makefile by default, which further demonstrates that this shell fulfills our needs. Note that we intentionally append, not prepend, to the custom program path. This is because the program path can be configured by the user via the `-Dsane_tool_path=` build option, which should take precedence over any defaults we pick for the user. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index a180c66ee69f4c..6a90310a2ca311 100644 --- a/meson.build +++ b/meson.build @@ -236,7 +236,11 @@ sed = find_program('sed', dirs: program_path, native: true) shell = find_program('sh', dirs: program_path, native: true) tar = find_program('tar', dirs: program_path, native: true) -target_shell = find_program('sh', dirs: program_path, native: false) +# Detect the target shell that is used by Git at runtime. Note that we prefer +# "/bin/sh" over a PATH-based lookup, which provides a working shell on most +# supported systems. This path is also the default shell path used by our +# Makefile. This lookup can be overridden via `program_path`. +target_shell = find_program('sh', dirs: program_path + [ '/bin' ], native: false) # Sanity-check that programs required for the build exist. foreach tool : ['cat', 'cut', 'grep', 'sort', 'tr', 'uname'] From d235c468a5d713f8244727d1862eaa1f029b3821 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Fri, 25 Apr 2025 10:09:09 +0000 Subject: [PATCH 05/18] send-email: retrieve Message-ID from outlook SMTP server The script generates a Message-ID alongwith the other headers when gen_header is called, and is sent alongwith the email. For most email providers, including gmail, the Message-ID goes unchanged to the recipient. But, this does not seem to be a case with Outlook. In Outlook, when we send our own Message-ID as a part of the headers, it discards it. Then it generates a new random Message-ID and that is what the recipient gets. This is a problem because the Message-ID is crucial when we are sending multiple emails in a thread. The current implementation for threads in the script replies to the Message-ID it generated, but due to Outlook's behavior, it is not the same as the one that the recipient got, thus breaking threads. So a need arises to retrieve the Message-ID from the server response and set it in the In-Reply-To and References email headers instead of using the self generated one for the purpose of replies. The $smtp->message variable in this script for outlook is something like this: 2.0.0 OK [Hostname=Some-hostname] The Message-ID here is the one the recipient gets, rather than the one the script generated. This patch uses the fact above and retrieves the Message-ID from the server response. It then changes the value of the $message_id variable to the one received from the server. This value will be used when next and subsequent messages are sent as replies to the message, thus preserving the threading of the messages. Signed-off-by: Aditya Garg Signed-off-by: Junio C Hamano --- git-send-email.perl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/git-send-email.perl b/git-send-email.perl index 1f613fa979df45..618474916e8c3e 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -1574,6 +1574,11 @@ sub gen_header { return ($recipients_ref, $to, $date, $gitversion, $cc, $ccline, $header); } +sub is_outlook { + my ($host) = @_; + return ($host eq 'smtp.office365.com' || $host eq 'smtp-mail.outlook.com'); +} + # Prepares the email, then asks the user what to do. # # If the user chooses to send the email, it's sent and 1 is returned. @@ -1737,6 +1742,22 @@ sub send_message { $smtp->datasend("$line") or die $smtp->message; } $smtp->dataend() or die $smtp->message; + + # Outlook discards the Message-ID header we set while sending the email + # and generates a new random Message-ID. So in order to avoid breaking + # threads, we simply retrieve the Message-ID from the server response + # and assign it to the $message_id variable, which will then be + # assigned to $in_reply_to by the caller when the next message is sent + # as a response to this message. + if (is_outlook($smtp_server)) { + if ($smtp->message =~ /<([^>]+)>/) { + $message_id = "<$1>"; + printf __("Outlook reassigned Message-ID to: %s\n"), $message_id; + } else { + warn __("Warning: Could not retrieve Message-ID from server response.\n"); + } + } + $smtp->code =~ /250|200/ or die sprintf(__("Failed to send %s\n"), $subject).$smtp->message; } if ($quiet) { From 89d557b950c7a0581c12452e8f9576c45546246b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 28 Apr 2025 20:24:43 +0000 Subject: [PATCH 06/18] test-tool: add pack-deltas helper When trying to demonstrate certain behavior in tests, it can be helpful to create packfiles that have specific delta structures. 'git pack-objects' uses various algorithms to select deltas based on their compression rates, but that does not always demonstrate all possible packfile shapes. This becomes especially important when wanting to test 'git index-pack' and its ability to parse certain pack shapes. We have prior art in t/lib-pack.sh, where certain delta structures are produced by manually writing certain opaque pack contents. However, producing these script updates is cumbersome and difficult to do as a contributor. Instead, create a new test-tool, 'test-tool pack-deltas', that reads a list of instructions for which objects to include in a packfile and how those objects should be written in delta form. At the moment, this only supports REF_DELTAs as those are the kinds of deltas needed to exercise a bug in 'git index-pack'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Makefile | 1 + t/helper/meson.build | 1 + t/helper/test-pack-deltas.c | 148 ++++++++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + 5 files changed, 152 insertions(+) create mode 100644 t/helper/test-pack-deltas.c diff --git a/Makefile b/Makefile index 13f9062a056944..c4d21ccd3d1b6b 100644 --- a/Makefile +++ b/Makefile @@ -821,6 +821,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o TEST_BUILTINS_OBJS += test-mktemp.o TEST_BUILTINS_OBJS += test-name-hash.o TEST_BUILTINS_OBJS += test-online-cpus.o +TEST_BUILTINS_OBJS += test-pack-deltas.o TEST_BUILTINS_OBJS += test-pack-mtimes.o TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-parse-pathspec-file.o diff --git a/t/helper/meson.build b/t/helper/meson.build index d2cabaa2bcfcc9..d4e8b26df8d6de 100644 --- a/t/helper/meson.build +++ b/t/helper/meson.build @@ -36,6 +36,7 @@ test_tool_sources = [ 'test-mktemp.c', 'test-name-hash.c', 'test-online-cpus.c', + 'test-pack-deltas.c', 'test-pack-mtimes.c', 'test-parse-options.c', 'test-parse-pathspec-file.c', diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c new file mode 100644 index 00000000000000..4caa024b1ebe73 --- /dev/null +++ b/t/helper/test-pack-deltas.c @@ -0,0 +1,148 @@ +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "git-compat-util.h" +#include "delta.h" +#include "git-zlib.h" +#include "hash.h" +#include "hex.h" +#include "pack.h" +#include "pack-objects.h" +#include "parse-options.h" +#include "setup.h" +#include "strbuf.h" +#include "string-list.h" + +static const char *usage_str[] = { + "test-tool pack-deltas --num-objects ", + NULL +}; + +static unsigned long do_compress(void **pptr, unsigned long size) +{ + git_zstream stream; + void *in, *out; + unsigned long maxsize; + + git_deflate_init(&stream, 1); + maxsize = git_deflate_bound(&stream, size); + + in = *pptr; + out = xmalloc(maxsize); + *pptr = out; + + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (git_deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + git_deflate_end(&stream); + + free(in); + return stream.total_out; +} + +static void write_ref_delta(struct hashfile *f, + struct object_id *oid, + struct object_id *base) +{ + unsigned char header[MAX_PACK_OBJECT_HEADER]; + unsigned long size, base_size, delta_size, compressed_size, hdrlen; + enum object_type type; + void *base_buf, *delta_buf; + void *buf = repo_read_object_file(the_repository, + oid, &type, + &size); + + if (!buf) + die("unable to read %s", oid_to_hex(oid)); + + base_buf = repo_read_object_file(the_repository, + base, &type, + &base_size); + + if (!base_buf) + die("unable to read %s", oid_to_hex(base)); + + delta_buf = diff_delta(base_buf, base_size, + buf, size, &delta_size, 0); + + compressed_size = do_compress(&delta_buf, delta_size); + + hdrlen = encode_in_pack_object_header(header, sizeof(header), + OBJ_REF_DELTA, delta_size); + hashwrite(f, header, hdrlen); + hashwrite(f, base->hash, the_repository->hash_algo->rawsz); + hashwrite(f, delta_buf, compressed_size); + + free(buf); + free(base_buf); + free(delta_buf); +} + +int cmd__pack_deltas(int argc, const char **argv) +{ + int num_objects = -1; + struct hashfile *f; + struct strbuf line = STRBUF_INIT; + struct option options[] = { + OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")), + OPT_END() + }; + + argc = parse_options(argc, argv, NULL, + options, usage_str, 0); + + if (argc || num_objects < 0) + usage_with_options(usage_str, options); + + setup_git_directory(); + + f = hashfd(the_repository->hash_algo, 1, ""); + write_pack_header(f, num_objects); + + /* Read each line from stdin into 'line' */ + while (strbuf_getline_lf(&line, stdin) != EOF) { + const char *type_str, *content_oid_str, *base_oid_str = NULL; + struct object_id content_oid, base_oid; + struct string_list items = STRING_LIST_INIT_NODUP; + /* + * Tokenize into two or three parts: + * 1. REF_DELTA, OFS_DELTA, or FULL. + * 2. The object ID for the content object. + * 3. The object ID for the base object (optional). + */ + if (string_list_split_in_place(&items, line.buf, " ", 3) < 0) + die("invalid input format: %s", line.buf); + + if (items.nr < 2) + die("invalid input format: %s", line.buf); + + type_str = items.items[0].string; + content_oid_str = items.items[1].string; + + if (get_oid_hex(content_oid_str, &content_oid)) + die("invalid object: %s", content_oid_str); + if (items.nr >= 3) { + base_oid_str = items.items[2].string; + if (get_oid_hex(base_oid_str, &base_oid)) + die("invalid object: %s", base_oid_str); + } + string_list_clear(&items, 0); + + if (!strcmp(type_str, "REF_DELTA")) + write_ref_delta(f, &content_oid, &base_oid); + else if (!strcmp(type_str, "OFS_DELTA")) + die("OFS_DELTA not implemented"); + else if (!strcmp(type_str, "FULL")) + die("FULL not implemented"); + else + die("unknown pack type: %s", type_str); + } + + finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + strbuf_release(&line); + return 0; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 50dc4dac4ed625..74812ed86d385a 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -46,6 +46,7 @@ static struct test_cmd cmds[] = { { "mktemp", cmd__mktemp }, { "name-hash", cmd__name_hash }, { "online-cpus", cmd__online_cpus }, + { "pack-deltas", cmd__pack_deltas }, { "pack-mtimes", cmd__pack_mtimes }, { "parse-options", cmd__parse_options }, { "parse-options-flags", cmd__parse_options_flags }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 6d62a5b53d9596..2571a3ccfe8991 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -39,6 +39,7 @@ int cmd__mergesort(int argc, const char **argv); int cmd__mktemp(int argc, const char **argv); int cmd__name_hash(int argc, const char **argv); int cmd__online_cpus(int argc, const char **argv); +int cmd__pack_deltas(int argc, const char **argv); int cmd__pack_mtimes(int argc, const char **argv); int cmd__parse_options(int argc, const char **argv); int cmd__parse_options_flags(int argc, const char **argv); From fd7fd7afc975a42dd60c96f57b83f2a4fc7e58c0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 28 Apr 2025 20:24:44 +0000 Subject: [PATCH 07/18] t5309: create failing test for 'git index-pack' This new test demonstrates some behavior where a valid packfile is being rejected by the Git client due to the order in which it is resolving REF_DELTAs. The thin packfile has a REF_DELTA chain A->B->C where C is not included in the packfile. However, the client repository contains both C and B already. Thus, 'git index-pack' is able to resolve A before resolving B. When resolving B, it then attempts to resolve any other REF_DELTAs that are pointing to B as a base. This "revisits" A and complains as if there is a cycle, but it did not actually detect a cycle. A fix will arrive in the next change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5309-pack-delta-cycles.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/t/t5309-pack-delta-cycles.sh b/t/t5309-pack-delta-cycles.sh index 60fc710bacb20e..6a9367633026c1 100755 --- a/t/t5309-pack-delta-cycles.sh +++ b/t/t5309-pack-delta-cycles.sh @@ -75,4 +75,28 @@ test_expect_success 'failover to a duplicate object in the same pack' ' test_must_fail git index-pack --fix-thin --stdin B->C with B on disk' ' + git init server && + ( + cd server && + test_commit_bulk 4 + ) && + + A=$(git -C server rev-parse HEAD^{tree}) && + B=$(git -C server rev-parse HEAD~1^{tree}) && + C=$(git -C server rev-parse HEAD~2^{tree}) && + git -C server reset --hard HEAD~1 && + + test-tool -C server pack-deltas --num-objects=2 >thin.pack <<-EOF && + REF_DELTA $A $B + REF_DELTA $B $C + EOF + + git clone "file://$(pwd)/server" client && + ( + cd client && + git index-pack --fix-thin --stdin <../thin.pack + ) +' + test_done From 98f8854c948340e77532a3fe6978c005cf8f05e3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 28 Apr 2025 20:24:45 +0000 Subject: [PATCH 08/18] index-pack: allow revisiting REF_DELTA chains As detailed in the previous changes to t5309-pack-delta-cycles.sh, the logic within 'git index-pack' to analyze an incoming thin packfile with REF_DELTAs is suspect. The algorithm is overly cautious around delta cycles, and that leads in fact to failing even when there is no cycle. This change adjusts the algorithm to no longer fail in these cases. In fact, these cycle cases will no longer fail but more importantly the valid cases will no longer fail, either. The resulting packfile from the --fix-thin operation will not have cycles either since REF_DELTAs are forbidden from the on-disk format and OFS_DELTAs are impossible to write as a cycle. The crux of the matter is how the algorithm works when the REF_DELTAs point to base objects that exist in the local repository. When reading the thin packfile, the object IDs for the delta objects are unknown so we do not have the delta chain structure automatically. Instead, we need to start somewhere by selecting a delta whose base is inside our current object database. Consider the case where the packfile has two REF_DELTA objects, A and B, and the delta chain looks like "A depends on B" and "B depends on C" for some third object C, where C is already in the current repository. The algorithm _should_ start with all objects that depend on C, finding B, and then moving on to all objects depending on B, finding A. However, if the repository also already has object B, then the delta chain can be analyzed in a different order. The deltas with base B can be analyzed first, finding A, and then the deltas with base C are analyzed, finding B. The algorithm currently continues to look for objects that depend on B, finding A again. This fails due to A's 'real_type' member already being overwritten from OBJ_REF_DELTA to the correct object type. This scenario is possible in a typical 'git fetch' where the client does not advertise B as a 'have' but requests A as a 'want' (and C is noticed as a common object based on other 'have's). The reason this isn't typically seen is that most Git servers use OFS_DELTAs to represent deltas within a packfile. However, if a server uses only REF_DELTAs, then this kind of issue can occur. There is nothing in the explicit packfile format that states this use of inter-pack REF_DELTA is incorrect, only that REF_DELTAs should not be used in the on-disk representation to avoid cycles. This die() was introduced in ab791dd138 (index-pack: fix race condition with duplicate bases, 2014-08-29). Several refactors have adjusted the error message and the surrounding logic, but this issue has existed for a longer time as that was only a conversion from an assert(). The tests in t5309 originated in 3b910d0c5e (add tests for indexing packs with delta cycles, 2013-08-23) and b2ef3d9ebb (test index-pack on packs with recoverable delta cycles, 2013-08-23). These changes make note that the current behavior of handling "resolvable" cycles is mostly a documentation-only test, not that this behavior is the best way for Git to handle the situation. The fix here is somewhat complicated due to the amount of state being adjusted by the loop within threaded_second_pass(). Instead of trying to resume the start of the loop while adjusting the necessary context, I chose to scan the REF_DELTAs depending on the current 'parent' and skip any that have already been processed. This necessarily leaves us in a state where 'child' and 'child_obj' could be left as NULL and that must be handled later. There is also some careful handling around skipping REF_DELTAs when there are also OFS_DELTAs depending on that parent. There may be value in extending 'test-tool pack-deltas' to allow writing OFS_DELTAs in order to exercise this logic across the delta types. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 58 ++++++++++++++++++++---------------- t/t5309-pack-delta-cycles.sh | 12 ++++++-- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index de127c0ff13a28..dbe79701fb8b6f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1109,8 +1109,8 @@ static void *threaded_second_pass(void *data) set_thread_data(data); for (;;) { struct base_data *parent = NULL; - struct object_entry *child_obj; - struct base_data *child; + struct object_entry *child_obj = NULL; + struct base_data *child = NULL; counter_lock(); display_progress(progress, nr_resolved_deltas); @@ -1137,15 +1137,18 @@ static void *threaded_second_pass(void *data) parent = list_first_entry(&work_head, struct base_data, list); - if (parent->ref_first <= parent->ref_last) { + while (parent->ref_first <= parent->ref_last) { int offset = ref_deltas[parent->ref_first++].obj_no; child_obj = objects + offset; - if (child_obj->real_type != OBJ_REF_DELTA) - die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", - (uintmax_t) child_obj->idx.offset, - oid_to_hex(&parent->obj->idx.oid)); + if (child_obj->real_type != OBJ_REF_DELTA) { + child_obj = NULL; + continue; + } child_obj->real_type = parent->obj->real_type; - } else { + break; + } + + if (!child_obj && parent->ofs_first <= parent->ofs_last) { child_obj = objects + ofs_deltas[parent->ofs_first++].obj_no; assert(child_obj->real_type == OBJ_OFS_DELTA); @@ -1178,29 +1181,32 @@ static void *threaded_second_pass(void *data) } work_unlock(); - if (parent) { - child = resolve_delta(child_obj, parent); - if (!child->children_remaining) - FREE_AND_NULL(child->data); - } else { - child = make_base(child_obj, NULL); - if (child->children_remaining) { - /* - * Since this child has its own delta children, - * we will need this data in the future. - * Inflate now so that future iterations will - * have access to this object's data while - * outside the work mutex. - */ - child->data = get_data_from_pack(child_obj); - child->size = child_obj->size; + if (child_obj) { + if (parent) { + child = resolve_delta(child_obj, parent); + if (!child->children_remaining) + FREE_AND_NULL(child->data); + } else{ + child = make_base(child_obj, NULL); + if (child->children_remaining) { + /* + * Since this child has its own delta children, + * we will need this data in the future. + * Inflate now so that future iterations will + * have access to this object's data while + * outside the work mutex. + */ + child->data = get_data_from_pack(child_obj); + child->size = child_obj->size; + } } } work_lock(); if (parent) parent->retain_data--; - if (child->data) { + + if (child && child->data) { /* * This child has its own children, so add it to * work_head. @@ -1209,7 +1215,7 @@ static void *threaded_second_pass(void *data) base_cache_used += child->size; prune_base_data(NULL); free_base_data(child); - } else { + } else if (child) { /* * This child does not have its own children. It may be * the last descendant of its ancestors; free those diff --git a/t/t5309-pack-delta-cycles.sh b/t/t5309-pack-delta-cycles.sh index 6a9367633026c1..6b03675d91b5e1 100755 --- a/t/t5309-pack-delta-cycles.sh +++ b/t/t5309-pack-delta-cycles.sh @@ -60,7 +60,10 @@ test_expect_success 'index-pack detects REF_DELTA cycles' ' test_expect_success 'failover to an object in another pack' ' clear_packs && git index-pack --stdin recoverable.pack && pack_trailer recoverable.pack && - test_must_fail git index-pack --fix-thin --stdin B->C with B on disk' ' +test_expect_success 'index-pack works with thin pack A->B->C with B on disk' ' git init server && ( cd server && From daec3c08e3e40c436cab013c5005a6e8b2e7923e Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Tue, 29 Apr 2025 16:37:09 +0000 Subject: [PATCH 09/18] send-email: add --[no-]outlook-id-fix option Add an option to allow users to specifically enable or disable retrieving the Message-ID from the Outlook SMTP server. This can be used for other hosts mimicking the behaviour of Outlook, or for users who set a custom domain to be a CNAME for the Outlook SMTP server. While at it, lets also add missing * in description of --no-smtp-auth. Helped-by: Junio C Hamano Signed-off-by: Aditya Garg Signed-off-by: Junio C Hamano --- Documentation/git-send-email.adoc | 13 +++++++++++++ git-send-email.perl | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Documentation/git-send-email.adoc b/Documentation/git-send-email.adoc index 7f223db42dd313..92389036fa368c 100644 --- a/Documentation/git-send-email.adoc +++ b/Documentation/git-send-email.adoc @@ -115,6 +115,19 @@ illustration below where `[PATCH v2 0/3]` is in reply to `[PATCH 0/2]`: Only necessary if --compose is also set. If --compose is not set, this will be prompted for. +--[no-]outlook-id-fix:: + Microsoft Outlook SMTP servers discard the Message-ID sent via email and + assign a new random Message-ID, thus breaking threads. ++ +With `--outlook-id-fix`, 'git send-email' uses a mechanism specific to +Outlook servers to learn the Message-ID the server assigned to fix the +threading. Use it only when you know that the server reports the +rewritten Message-ID the same way as Outlook servers do. ++ +Without this option specified, the fix is done by default when talking +to 'smtp.office365.com' or 'smtp-mail.outlook.com'. Use +`--no-outlook-id-fix` to disable even when talking to these two servers. + --subject=:: Specify the initial subject of the email thread. Only necessary if --compose is also set. If --compose diff --git a/git-send-email.perl b/git-send-email.perl index 618474916e8c3e..4215f8f7e951e3 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -41,6 +41,8 @@ sub usage { --subject * Email "Subject:" --reply-to * Email "Reply-To:" --in-reply-to * Email "In-Reply-To:" + --[no-]outlook-id-fix * The SMTP host is an Outlook server that munges the + Message-ID. Retrieve it from the server. --[no-]xmailer * Add "X-Mailer:" header (default). --[no-]annotate * Review each patch that will be sent in an editor. --compose * Open an editor for introduction. @@ -68,7 +70,7 @@ sub usage { --smtp-auth * Space-separated list of allowed AUTH mechanisms, or "none" to disable authentication. This setting forces to use one of the listed mechanisms. - --no-smtp-auth Disable SMTP authentication. Shorthand for + --no-smtp-auth * Disable SMTP authentication. Shorthand for `--smtp-auth=none` --smtp-debug <0|1> * Disable, enable Net::SMTP debug. @@ -290,6 +292,7 @@ sub do_edit { my $mailmap = 0; my $target_xfer_encoding = 'auto'; my $forbid_sendmail_variables = 1; +my $outlook_id_fix = 'auto'; my %config_bool_settings = ( "thread" => \$thread, @@ -305,6 +308,7 @@ sub do_edit { "xmailer" => \$use_xmailer, "forbidsendmailvariables" => \$forbid_sendmail_variables, "mailmap" => \$mailmap, + "outlookidfix" => \$outlook_id_fix, ); my %config_settings = ( @@ -551,6 +555,7 @@ sub config_regexp { "relogin-delay=i" => \$relogin_delay, "git-completion-helper" => \$git_completion_helper, "v=s" => \$reroll_count, + "outlook-id-fix!" => \$outlook_id_fix, ); $rc = GetOptions(%options); @@ -1576,7 +1581,12 @@ sub gen_header { sub is_outlook { my ($host) = @_; - return ($host eq 'smtp.office365.com' || $host eq 'smtp-mail.outlook.com'); + if ($outlook_id_fix eq 'auto') { + $outlook_id_fix = + ($host eq 'smtp.office365.com' || + $host eq 'smtp-mail.outlook.com') ? 1 : 0; + } + return $outlook_id_fix; } # Prepares the email, then asks the user what to do. From ddb28da58fd657fa672f4605e50e140ce4c662f8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:15 +0200 Subject: [PATCH 10/18] object-store: move `struct packed_git` into "packfile.h" The "object-store.h" header contains the definition of `struct packed_git`. As this structure hosts all kind of information about a specific packfile it is arguably a bit out of place in a generic place like "object-store.h". Move the structure as well as `pack_map_entry_cmp()` into "packfile.h". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-store.h | 59 +------------------------------------------------- pack-objects.h | 1 + packfile.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 59 deletions(-) diff --git a/object-store.h b/object-store.h index 46961dc954257b..e04469a85fba4a 100644 --- a/object-store.h +++ b/object-store.h @@ -92,65 +92,8 @@ struct oidtree *odb_loose_cache(struct object_directory *odb, /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct object_directory *odb); -struct packed_git { - struct hashmap_entry packmap_ent; - struct packed_git *next; - struct list_head mru; - struct pack_window *windows; - off_t pack_size; - const void *index_data; - size_t index_size; - uint32_t num_objects; - size_t crc_offset; - struct oidset bad_objects; - int index_version; - time_t mtime; - int pack_fd; - int index; /* for builtin/pack-objects.c */ - unsigned pack_local:1, - pack_keep:1, - pack_keep_in_core:1, - freshened:1, - do_not_close:1, - pack_promisor:1, - multi_pack_index:1, - is_cruft:1; - unsigned char hash[GIT_MAX_RAWSZ]; - struct revindex_entry *revindex; - const uint32_t *revindex_data; - const uint32_t *revindex_map; - size_t revindex_size; - /* - * mtimes_map points at the beginning of the memory mapped region of - * this pack's corresponding .mtimes file, and mtimes_size is the size - * of that .mtimes file - */ - const uint32_t *mtimes_map; - size_t mtimes_size; - - /* repo denotes the repository this packfile belongs to */ - struct repository *repo; - - /* something like ".git/objects/pack/xxxxx.pack" */ - char pack_name[FLEX_ARRAY]; /* more */ -}; - +struct packed_git; struct multi_pack_index; - -static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct cached_object_entry; struct raw_object_store { diff --git a/pack-objects.h b/pack-objects.h index d1c4ae7f9b6189..475a2d67ce30eb 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -4,6 +4,7 @@ #include "object-store.h" #include "thread-utils.h" #include "pack.h" +#include "packfile.h" struct repository; diff --git a/packfile.h b/packfile.h index 25097213d06d61..05499382397576 100644 --- a/packfile.h +++ b/packfile.h @@ -1,13 +1,70 @@ #ifndef PACKFILE_H #define PACKFILE_H +#include "list.h" #include "object.h" #include "oidset.h" /* in object-store.h */ -struct packed_git; struct object_info; +struct packed_git { + struct hashmap_entry packmap_ent; + struct packed_git *next; + struct list_head mru; + struct pack_window *windows; + off_t pack_size; + const void *index_data; + size_t index_size; + uint32_t num_objects; + size_t crc_offset; + struct oidset bad_objects; + int index_version; + time_t mtime; + int pack_fd; + int index; /* for builtin/pack-objects.c */ + unsigned pack_local:1, + pack_keep:1, + pack_keep_in_core:1, + freshened:1, + do_not_close:1, + pack_promisor:1, + multi_pack_index:1, + is_cruft:1; + unsigned char hash[GIT_MAX_RAWSZ]; + struct revindex_entry *revindex; + const uint32_t *revindex_data; + const uint32_t *revindex_map; + size_t revindex_size; + /* + * mtimes_map points at the beginning of the memory mapped region of + * this pack's corresponding .mtimes file, and mtimes_size is the size + * of that .mtimes file + */ + const uint32_t *mtimes_map; + size_t mtimes_size; + + /* repo denotes the repository this packfile belongs to */ + struct repository *repo; + + /* something like ".git/objects/pack/xxxxx.pack" */ + char pack_name[FLEX_ARRAY]; /* more */ +}; + +static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry2, + const void *keydata) +{ + const char *key = keydata; + const struct packed_git *pg1, *pg2; + + pg1 = container_of(entry, const struct packed_git, packmap_ent); + pg2 = container_of(entry2, const struct packed_git, packmap_ent); + + return strcmp(pg1->pack_name, key ? key : pg2->pack_name); +} + struct pack_window { struct pack_window *next; unsigned char *base; From 56ef85e82ffa39ac86db39bc0ac11c67451d0e5b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:16 +0200 Subject: [PATCH 11/18] object-store: drop `loose_object_path()` The function `loose_object_path()` is a trivial wrapper around `odb_loose_path()`, with the only exception that it always uses the primary object database of the given repository. This doesn't really add a ton of value though, so let's drop the function and inline it at every callsite. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- http-walker.c | 3 ++- http.c | 4 ++-- object-file.c | 4 ++-- object-file.h | 4 ++++ object-store.c | 6 ------ object-store.h | 7 ------- 6 files changed, 10 insertions(+), 18 deletions(-) diff --git a/http-walker.c b/http-walker.c index 882cae19c2468c..95458e2f6384bc 100644 --- a/http-walker.c +++ b/http-walker.c @@ -9,6 +9,7 @@ #include "list.h" #include "transport.h" #include "packfile.h" +#include "object-file.h" #include "object-store.h" struct alt_base { @@ -540,7 +541,7 @@ static int fetch_object(struct walker *walker, const struct object_id *oid) ret = error("File %s has bad hash", hex); } else if (req->rename < 0) { struct strbuf buf = STRBUF_INIT; - loose_object_path(the_repository, &buf, &req->oid); + odb_loose_path(the_repository->objects->odb, &buf, &req->oid); ret = error("unable to write sha1 filename %s", buf.buf); strbuf_release(&buf); } diff --git a/http.c b/http.c index 0c41138042562f..3c029cf8947df7 100644 --- a/http.c +++ b/http.c @@ -2662,7 +2662,7 @@ struct http_object_request *new_http_object_request(const char *base_url, oidcpy(&freq->oid, oid); freq->localfile = -1; - loose_object_path(the_repository, &filename, oid); + odb_loose_path(the_repository->objects->odb, &filename, oid); strbuf_addf(&freq->tmpfile, "%s.temp", filename.buf); strbuf_addf(&prevfile, "%s.prev", filename.buf); @@ -2814,7 +2814,7 @@ int finish_http_object_request(struct http_object_request *freq) unlink_or_warn(freq->tmpfile.buf); return -1; } - loose_object_path(the_repository, &filename, &freq->oid); + odb_loose_path(the_repository->objects->odb, &filename, &freq->oid); freq->rename = finalize_object_file(freq->tmpfile.buf, filename.buf); strbuf_release(&filename); diff --git a/object-file.c b/object-file.c index 9cc3a24a40da1a..dc56a4766df4d1 100644 --- a/object-file.c +++ b/object-file.c @@ -932,7 +932,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr, if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) prepare_loose_object_bulk_checkin(); - loose_object_path(the_repository, &filename, oid); + odb_loose_path(the_repository->objects->odb, &filename, oid); fd = start_loose_object_common(&tmp_file, filename.buf, flags, &stream, compressed, sizeof(compressed), @@ -1079,7 +1079,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, goto cleanup; } - loose_object_path(the_repository, &filename, oid); + odb_loose_path(the_repository->objects->odb, &filename, oid); /* We finally know the object path, and create the missing dir. */ dirlen = directory_size(filename.buf); diff --git a/object-file.h b/object-file.h index c002fbe23451b3..0a7b6b9f9d9288 100644 --- a/object-file.h +++ b/object-file.h @@ -25,6 +25,10 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_directory; +/* + * Put in `buf` the name of the file in the local object database that + * would be used to store a loose object with the specified oid. + */ const char *odb_loose_path(struct object_directory *odb, struct strbuf *buf, const struct object_id *oid); diff --git a/object-store.c b/object-store.c index 6ab50d25d3eb4f..e5cfb8c007915a 100644 --- a/object-store.c +++ b/object-store.c @@ -96,12 +96,6 @@ int odb_pack_keep(const char *name) return open(name, O_RDWR|O_CREAT|O_EXCL, 0600); } -const char *loose_object_path(struct repository *r, struct strbuf *buf, - const struct object_id *oid) -{ - return odb_loose_path(r->objects->odb, buf, oid); -} - /* * Return non-zero iff the path is usable as an alternate object database. */ diff --git a/object-store.h b/object-store.h index e04469a85fba4a..5668de62d01a5d 100644 --- a/object-store.h +++ b/object-store.h @@ -196,13 +196,6 @@ int odb_mkstemp(struct strbuf *temp_filename, const char *pattern); */ int odb_pack_keep(const char *name); -/* - * Put in `buf` the name of the file in the local object database that - * would be used to store a loose object with the specified oid. - */ -const char *loose_object_path(struct repository *r, struct strbuf *buf, - const struct object_id *oid); - void *map_loose_object(struct repository *r, const struct object_id *oid, unsigned long *size); From 0b8ed25b66aedc9f4fe44d1a5cab2719290b22a9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:17 +0200 Subject: [PATCH 12/18] object-store: move and rename `odb_pack_keep()` The function `odb_pack_keep()` creates a file at the passed-in path. If this fails, then the function re-tries by first creating any potentially missing leading directories and then trying to create the file once again. As such, this function doesn't host any kind of logic that is specific to the object store, but is rather a generic helper function. Rename the function to `safe_create_file_with_leading_directories()` and move it into "path.c". While at it, refactor it so that it loses its dependency on `the_repository`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 3 ++- builtin/index-pack.c | 2 +- object-store.c | 13 ------------- object-store.h | 7 ------- path.c | 14 ++++++++++++++ path.h | 7 +++++++ 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index c1e198f4e34df9..b2839c5f439b7b 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -811,7 +811,8 @@ static char *keep_pack(const char *curr_index_name) int keep_fd; odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep"); - keep_fd = odb_pack_keep(name.buf); + keep_fd = safe_create_file_with_leading_directories(pack_data->repo, + name.buf); if (keep_fd < 0) die_errno("cannot create keep file"); write_or_die(keep_fd, keep_msg, strlen(keep_msg)); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 60a8ee05dbc982..f49431d626b173 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1565,7 +1565,7 @@ static void write_special_file(const char *suffix, const char *msg, else filename = odb_pack_name(the_repository, &name_buf, hash, suffix); - fd = odb_pack_keep(filename); + fd = safe_create_file_with_leading_directories(the_repository, filename); if (fd < 0) { if (errno != EEXIST) die_errno(_("cannot write %s file '%s'"), diff --git a/object-store.c b/object-store.c index e5cfb8c007915a..0cbad5a19a0c04 100644 --- a/object-store.c +++ b/object-store.c @@ -83,19 +83,6 @@ int odb_mkstemp(struct strbuf *temp_filename, const char *pattern) return xmkstemp_mode(temp_filename->buf, mode); } -int odb_pack_keep(const char *name) -{ - int fd; - - fd = open(name, O_RDWR|O_CREAT|O_EXCL, 0600); - if (0 <= fd) - return fd; - - /* slow path */ - safe_create_leading_directories_const(the_repository, name); - return open(name, O_RDWR|O_CREAT|O_EXCL, 0600); -} - /* * Return non-zero iff the path is usable as an alternate object database. */ diff --git a/object-store.h b/object-store.h index 5668de62d01a5d..aa8fc63043ec87 100644 --- a/object-store.h +++ b/object-store.h @@ -189,13 +189,6 @@ void raw_object_store_clear(struct raw_object_store *o); */ int odb_mkstemp(struct strbuf *temp_filename, const char *pattern); -/* - * Create a pack .keep file named "name" (which should generally be the output - * of odb_pack_name). Returns a file descriptor opened for writing, or -1 on - * error. - */ -int odb_pack_keep(const char *name); - void *map_loose_object(struct repository *r, const struct object_id *oid, unsigned long *size); diff --git a/path.c b/path.c index 4505bb78e8b470..3b598b2847ff03 100644 --- a/path.c +++ b/path.c @@ -1011,6 +1011,20 @@ enum scld_error safe_create_leading_directories_const(struct repository *repo, return result; } +int safe_create_file_with_leading_directories(struct repository *repo, + const char *path) +{ + int fd; + + fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600); + if (0 <= fd) + return fd; + + /* slow path */ + safe_create_leading_directories_const(repo, path); + return open(path, O_RDWR|O_CREAT|O_EXCL, 0600); +} + static int have_same_root(const char *path1, const char *path2) { int is_abs1, is_abs2; diff --git a/path.h b/path.h index fd1a194b060135..e67348f25397cc 100644 --- a/path.h +++ b/path.h @@ -266,6 +266,13 @@ enum scld_error safe_create_leading_directories_const(struct repository *repo, const char *path); enum scld_error safe_create_leading_directories_no_share(char *path); +/* + * Create a file, potentially creating its leading directories in case they + * don't exist. Returns the return value of the open(3p) call. + */ +int safe_create_file_with_leading_directories(struct repository *repo, + const char *path); + # ifdef USE_THE_REPOSITORY_VARIABLE # include "strbuf.h" # include "repository.h" From 1a793261c53507f7c46f748cc76378a9c5bb05cf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:18 +0200 Subject: [PATCH 13/18] object-store: move function declarations to their respective subsystems We carry declarations for a couple of functions in "object-store.h" that are not defined in "object-store.c", but in a different subsystem. Move these declarations to the respective headers whose matching code files carry the corresponding definition. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/count-objects.c | 2 +- builtin/gc.c | 2 +- convert.c | 2 +- diffcore-rename.c | 2 +- dir.c | 2 +- log-tree.c | 2 +- object-file.h | 77 +++++++++++++++++++++++++++++++++ object-name.c | 2 +- object-store.h | 95 +---------------------------------------- packfile.h | 19 +++++++++ prune-packed.c | 2 +- reachable.c | 2 +- 12 files changed, 106 insertions(+), 103 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 0bb5360b2f262c..a88c0c9c09af64 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -12,7 +12,7 @@ #include "parse-options.h" #include "quote.h" #include "packfile.h" -#include "object-store.h" +#include "object-file.h" static unsigned long garbage; static off_t size_garbage; diff --git a/builtin/gc.c b/builtin/gc.c index b5ce1d32766e13..4d428f3253d3c2 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -28,7 +28,7 @@ #include "commit.h" #include "commit-graph.h" #include "packfile.h" -#include "object-store.h" +#include "object-file.h" #include "pack.h" #include "pack-objects.h" #include "path.h" diff --git a/convert.c b/convert.c index 8783e17941ff70..b5f7cf6306c3c3 100644 --- a/convert.c +++ b/convert.c @@ -8,7 +8,7 @@ #include "copy.h" #include "gettext.h" #include "hex.h" -#include "object-store.h" +#include "object-file.h" #include "attr.h" #include "run-command.h" #include "quote.h" diff --git a/diffcore-rename.c b/diffcore-rename.c index 179731462b53fd..7723bc3334e084 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -8,7 +8,7 @@ #include "git-compat-util.h" #include "diff.h" #include "diffcore.h" -#include "object-store.h" +#include "object-file.h" #include "hashmap.h" #include "mem-pool.h" #include "oid-array.h" diff --git a/dir.c b/dir.c index 5c4675b4ac40e8..e11342e13660fb 100644 --- a/dir.c +++ b/dir.c @@ -17,7 +17,7 @@ #include "environment.h" #include "gettext.h" #include "name-hash.h" -#include "object-store.h" +#include "object-file.h" #include "path.h" #include "refs.h" #include "repository.h" diff --git a/log-tree.c b/log-tree.c index a4d4ab59ca0714..1d05dc1c7010d5 100644 --- a/log-tree.c +++ b/log-tree.c @@ -9,7 +9,7 @@ #include "environment.h" #include "hex.h" #include "object-name.h" -#include "object-store.h" +#include "object-file.h" #include "repository.h" #include "tmp-objdir.h" #include "commit.h" diff --git a/object-file.h b/object-file.h index 0a7b6b9f9d9288..a85b2e5b494c8f 100644 --- a/object-file.h +++ b/object-file.h @@ -3,6 +3,7 @@ #include "git-zlib.h" #include "object.h" +#include "object-store.h" struct index_state; @@ -25,6 +26,16 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_directory; +/* + * Populate and return the loose object cache array corresponding to the + * given object ID. + */ +struct oidtree *odb_loose_cache(struct object_directory *odb, + const struct object_id *oid); + +/* Empty the loose object cache for the specified object directory. */ +void odb_clear_loose_cache(struct object_directory *odb); + /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. @@ -42,6 +53,68 @@ int has_loose_object_nonlocal(const struct object_id *); int has_loose_object(const struct object_id *); +void *map_loose_object(struct repository *r, const struct object_id *oid, + unsigned long *size); + +/* + * Iterate over the files in the loose-object parts of the object + * directory "path", triggering the following callbacks: + * + * - loose_object is called for each loose object we find. + * + * - loose_cruft is called for any files that do not appear to be + * loose objects. Note that we only look in the loose object + * directories "objects/[0-9a-f]{2}/", so we will not report + * "objects/foobar" as cruft. + * + * - loose_subdir is called for each top-level hashed subdirectory + * of the object directory (e.g., "$OBJDIR/f0"). It is called + * after the objects in the directory are processed. + * + * Any callback that is NULL will be ignored. Callbacks returning non-zero + * will end the iteration. + * + * In the "buf" variant, "path" is a strbuf which will also be used as a + * scratch buffer, but restored to its original contents before + * the function returns. + */ +typedef int each_loose_object_fn(const struct object_id *oid, + const char *path, + void *data); +typedef int each_loose_cruft_fn(const char *basename, + const char *path, + void *data); +typedef int each_loose_subdir_fn(unsigned int nr, + const char *path, + void *data); +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); +int for_each_loose_file_in_objdir_buf(struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); + +/* + * Iterate over all accessible loose objects without respect to + * reachability. By default, this includes both local and alternate objects. + * The order in which objects are visited is unspecified. + * + * Any flags specific to packs are ignored. + */ +int for_each_loose_object(each_loose_object_fn, void *, + enum for_each_object_flags flags); + + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object @@ -158,6 +231,10 @@ int finalize_object_file(const char *tmpfile, const char *filename); int finalize_object_file_flags(const char *tmpfile, const char *filename, enum finalize_object_file_flags flags); +void hash_object_file(const struct git_hash_algo *algo, const void *buf, + unsigned long len, enum object_type type, + struct object_id *oid); + /* Helper to check and "touch" a file */ int check_and_freshen_file(const char *fn, int freshen); diff --git a/object-name.c b/object-name.c index 2c751a5352a75d..9288b2dd2453a0 100644 --- a/object-name.c +++ b/object-name.c @@ -19,7 +19,7 @@ #include "oidtree.h" #include "packfile.h" #include "pretty.h" -#include "object-store.h" +#include "object-file.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" diff --git a/object-store.h b/object-store.h index aa8fc63043ec87..9dc39a7c91e7f2 100644 --- a/object-store.h +++ b/object-store.h @@ -82,16 +82,6 @@ struct object_directory *set_temporary_primary_odb(const char *dir, int will_des */ void restore_primary_odb(struct object_directory *restore_odb, const char *old_path); -/* - * Populate and return the loose object cache array corresponding to the - * given object ID. - */ -struct oidtree *odb_loose_cache(struct object_directory *odb, - const struct object_id *oid); - -/* Empty the loose object cache for the specified object directory. */ -void odb_clear_loose_cache(struct object_directory *odb); - struct packed_git; struct multi_pack_index; struct cached_object_entry; @@ -189,9 +179,6 @@ void raw_object_store_clear(struct raw_object_store *o); */ int odb_mkstemp(struct strbuf *temp_filename, const char *pattern); -void *map_loose_object(struct repository *r, const struct object_id *oid, - unsigned long *size); - void *repo_read_object_file(struct repository *r, const struct object_id *oid, enum object_type *type, @@ -200,10 +187,6 @@ void *repo_read_object_file(struct repository *r, /* Read and unpack an object file into memory, write memory to an object file */ int oid_object_info(struct repository *r, const struct object_id *, unsigned long *); -void hash_object_file(const struct git_hash_algo *algo, const void *buf, - unsigned long len, enum object_type type, - struct object_id *oid); - /* * Add an object file to the in-memory object store, without writing it * to disk. @@ -340,56 +323,7 @@ static inline void obj_read_unlock(void) if(obj_read_use_lock) pthread_mutex_unlock(&obj_read_mutex); } - -/* - * Iterate over the files in the loose-object parts of the object - * directory "path", triggering the following callbacks: - * - * - loose_object is called for each loose object we find. - * - * - loose_cruft is called for any files that do not appear to be - * loose objects. Note that we only look in the loose object - * directories "objects/[0-9a-f]{2}/", so we will not report - * "objects/foobar" as cruft. - * - * - loose_subdir is called for each top-level hashed subdirectory - * of the object directory (e.g., "$OBJDIR/f0"). It is called - * after the objects in the directory are processed. - * - * Any callback that is NULL will be ignored. Callbacks returning non-zero - * will end the iteration. - * - * In the "buf" variant, "path" is a strbuf which will also be used as a - * scratch buffer, but restored to its original contents before - * the function returns. - */ -typedef int each_loose_object_fn(const struct object_id *oid, - const char *path, - void *data); -typedef int each_loose_cruft_fn(const char *basename, - const char *path, - void *data); -typedef int each_loose_subdir_fn(unsigned int nr, - const char *path, - void *data); -int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); -int for_each_loose_file_in_objdir(const char *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); -int for_each_loose_file_in_objdir_buf(struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); - -/* Flags for for_each_*_object() below. */ +/* Flags for for_each_*_object(). */ enum for_each_object_flags { /* Iterate only over local objects, not alternates. */ FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), @@ -409,33 +343,6 @@ enum for_each_object_flags { FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; -/* - * Iterate over all accessible loose objects without respect to - * reachability. By default, this includes both local and alternate objects. - * The order in which objects are visited is unspecified. - * - * Any flags specific to packs are ignored. - */ -int for_each_loose_object(each_loose_object_fn, void *, - enum for_each_object_flags flags); - -/* - * Iterate over all accessible packed objects without respect to reachability. - * By default, this includes both local and alternate packs. - * - * Note that some objects may appear twice if they are found in multiple packs. - * Each pack is visited in an unspecified order. By default, objects within a - * pack are visited in pack-idx order (i.e., sorted by oid). - */ -typedef int each_packed_object_fn(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data); -int for_each_object_in_pack(struct packed_git *p, - each_packed_object_fn, void *data, - enum for_each_object_flags flags); -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags); void *read_object_with_reference(struct repository *r, const struct object_id *oid, diff --git a/packfile.h b/packfile.h index 05499382397576..3a3c77cf05a63d 100644 --- a/packfile.h +++ b/packfile.h @@ -3,6 +3,7 @@ #include "list.h" #include "object.h" +#include "object-store.h" #include "oidset.h" /* in object-store.h */ @@ -117,6 +118,24 @@ void for_each_file_in_pack_dir(const char *objdir, each_file_in_pack_dir_fn fn, void *data); +/* + * Iterate over all accessible packed objects without respect to reachability. + * By default, this includes both local and alternate packs. + * + * Note that some objects may appear twice if they are found in multiple packs. + * Each pack is visited in an unspecified order. By default, objects within a + * pack are visited in pack-idx order (i.e., sorted by oid). + */ +typedef int each_packed_object_fn(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data); +int for_each_object_in_pack(struct packed_git *p, + each_packed_object_fn, void *data, + enum for_each_object_flags flags); +int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, + void *data, enum for_each_object_flags flags); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 diff --git a/prune-packed.c b/prune-packed.c index c1d95a519d7479..92fb4fbb0ed3d1 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -2,7 +2,7 @@ #include "git-compat-util.h" #include "gettext.h" -#include "object-store.h" +#include "object-file.h" #include "packfile.h" #include "progress.h" #include "prune-packed.h" diff --git a/reachable.c b/reachable.c index e5f56f40181d88..9dc748f0b9a0f7 100644 --- a/reachable.c +++ b/reachable.c @@ -14,7 +14,7 @@ #include "list-objects.h" #include "packfile.h" #include "worktree.h" -#include "object-store.h" +#include "object-file.h" #include "pack-bitmap.h" #include "pack-mtimes.h" #include "config.h" From f8fc4cacd37afa254a8822258f76de53ae2dfbb2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:19 +0200 Subject: [PATCH 14/18] object-store: allow fetching objects via `has_object()` We're about to fully remove `repo_has_object_file()` in favor of `has_object()`. The latter function does not yet have a way to fetch missing objects via a promisor remote though, which means that it cannot fully replace all usecases of `repo_has_object_file()`. Introduce a new flag `HAS_OBJECT_FETCH_PROMISOR` that causes the function to optionally fetch missing objects which are part of a promisor pack. This flag will be used in the subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-store.c | 9 ++++++--- object-store.h | 10 +++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/object-store.c b/object-store.c index 0cbad5a19a0c04..0d873868a6d7cb 100644 --- a/object-store.c +++ b/object-store.c @@ -937,12 +937,15 @@ void *read_object_with_reference(struct repository *r, int has_object(struct repository *r, const struct object_id *oid, unsigned flags) { - int quick = !(flags & HAS_OBJECT_RECHECK_PACKED); - unsigned object_info_flags = OBJECT_INFO_SKIP_FETCH_OBJECT | - (quick ? OBJECT_INFO_QUICK : 0); + unsigned object_info_flags = 0; if (!startup_info->have_repository) return 0; + if (!(flags & HAS_OBJECT_RECHECK_PACKED)) + object_info_flags |= OBJECT_INFO_QUICK; + if (!(flags & HAS_OBJECT_FETCH_PROMISOR)) + object_info_flags |= OBJECT_INFO_SKIP_FETCH_OBJECT; + return oid_object_info_extended(r, oid, NULL, object_info_flags) >= 0; } diff --git a/object-store.h b/object-store.h index 9dc39a7c91e7f2..f0e111464c28e0 100644 --- a/object-store.h +++ b/object-store.h @@ -262,12 +262,16 @@ int oid_object_info_extended(struct repository *r, const struct object_id *, struct object_info *, unsigned flags); -/* Retry packed storage after checking packed and loose storage */ -#define HAS_OBJECT_RECHECK_PACKED 1 +enum { + /* Retry packed storage after checking packed and loose storage */ + HAS_OBJECT_RECHECK_PACKED = (1 << 0), + /* Allow fetching the object in case the repository has a promisor remote. */ + HAS_OBJECT_FETCH_PROMISOR = (1 << 1), +}; /* * Returns 1 if the object exists. This function will not lazily fetch objects - * in a partial clone. + * in a partial clone by default. */ int has_object(struct repository *r, const struct object_id *oid, unsigned flags); From 062b914c841329a003f74e1340ea5178391274a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:20 +0200 Subject: [PATCH 15/18] treewide: convert users of `repo_has_object_file()` to `has_object()` As the comment of `repo_has_object_file()` and its `_with_flags()` variant tells us, these functions are considered to be deprecated in favor of `has_object()`. There are a couple of slight benefits in favor of the replacement: - The new function has a short-and-sweet name. - More explicit defaults: `has_object()` doesn't fetch missing objects via promisor remotes, and neither does it reload packfiles if an object wasn't found by default. This ensures that it becomes immediately obvious when a simple object existence check may result in expensive actions. Most importantly though, it is confusing that we have two sets of functions that ultimately do the same thing, but with different defaults. Start sunsetting `repo_has_object_file()` and its `_with_flags()` sibling by replacing all callsites with `has_object()`: - `repo_has_object_file(...)` is equivalent to `has_object(..., HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)`. - `repo_has_object_file_with_flags(..., OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT)` is equivalent to `has_object(..., 0)`. - `repo_has_object_file_with_flags(..., OBJECT_INFO_SKIP_FETCH_OBJECT)` is equivalent to `has_object(..., HAS_OBJECT_RECHECK_PACKED)`. - `repo_has_object_file_with_flags(..., OBJECT_INFO_QUICK)` is equivalent to `has_object(..., HAS_OBJECT_FETCH_PROMISOR)`. The replacements should be functionally equivalent. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 3 ++- builtin/clone.c | 4 +--- builtin/fetch.c | 15 +++++++-------- builtin/index-pack.c | 5 ++--- builtin/receive-pack.c | 4 +++- builtin/remote.c | 3 ++- builtin/show-ref.c | 3 ++- builtin/unpack-objects.c | 3 ++- bulk-checkin.c | 3 ++- cache-tree.c | 13 +++++++++---- fetch-pack.c | 7 +++---- http-push.c | 11 +++++++---- http-walker.c | 6 ++++-- list-objects.c | 3 ++- notes.c | 3 ++- object-store.c | 2 +- reflog.c | 3 ++- refs.c | 2 +- remote.c | 2 +- send-pack.c | 5 +---- shallow.c | 9 ++++++--- upload-pack.c | 3 +-- walker.c | 3 ++- 23 files changed, 65 insertions(+), 50 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 0e3f10a946700e..3914a2a3f61c61 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -169,7 +169,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, goto cleanup; case 'e': - ret = !repo_has_object_file(the_repository, &oid); + ret = !has_object(the_repository, &oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR); goto cleanup; case 'w': diff --git a/builtin/clone.c b/builtin/clone.c index 6b1d11a3ed2001..b498b81a0434b3 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -504,9 +504,7 @@ static void write_followtags(const struct ref *refs, const char *msg) continue; if (ends_with(ref->name, "^{}")) continue; - if (!repo_has_object_file_with_flags(the_repository, &ref->old_oid, - OBJECT_INFO_QUICK | - OBJECT_INFO_SKIP_FETCH_OBJECT)) + if (!has_object(the_repository, &ref->old_oid, 0)) continue; refs_update_ref(get_main_ref_store(the_repository), msg, ref->name, &ref->old_oid, NULL, 0, diff --git a/builtin/fetch.c b/builtin/fetch.c index 95589b499485d1..aadcf49a5b4777 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -337,7 +337,6 @@ static void find_non_local_tags(const struct ref *refs, struct string_list_item *remote_ref_item; const struct ref *ref; struct refname_hash_entry *item = NULL; - const int quick_flags = OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT; refname_hash_init(&existing_refs); refname_hash_init(&remote_refs); @@ -367,9 +366,9 @@ static void find_non_local_tags(const struct ref *refs, */ if (ends_with(ref->name, "^{}")) { if (item && - !repo_has_object_file_with_flags(the_repository, &ref->old_oid, quick_flags) && + !has_object(the_repository, &ref->old_oid, 0) && !oidset_contains(&fetch_oids, &ref->old_oid) && - !repo_has_object_file_with_flags(the_repository, &item->oid, quick_flags) && + !has_object(the_repository, &item->oid, 0) && !oidset_contains(&fetch_oids, &item->oid)) clear_item(item); item = NULL; @@ -383,7 +382,7 @@ static void find_non_local_tags(const struct ref *refs, * fetch. */ if (item && - !repo_has_object_file_with_flags(the_repository, &item->oid, quick_flags) && + !has_object(the_repository, &item->oid, 0) && !oidset_contains(&fetch_oids, &item->oid)) clear_item(item); @@ -404,7 +403,7 @@ static void find_non_local_tags(const struct ref *refs, * checked to see if it needs fetching. */ if (item && - !repo_has_object_file_with_flags(the_repository, &item->oid, quick_flags) && + !has_object(the_repository, &item->oid, 0) && !oidset_contains(&fetch_oids, &item->oid)) clear_item(item); @@ -911,7 +910,8 @@ static int update_local_ref(struct ref *ref, struct commit *current = NULL, *updated; int fast_forward = 0; - if (!repo_has_object_file(the_repository, &ref->new_oid)) + if (!has_object(the_repository, &ref->new_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) die(_("object %s not found"), oid_to_hex(&ref->new_oid)); if (oideq(&ref->old_oid, &ref->new_oid)) { @@ -1330,8 +1330,7 @@ static int check_exist_and_connected(struct ref *ref_map) * we need all direct targets to exist. */ for (r = rm; r; r = r->next) { - if (!repo_has_object_file_with_flags(the_repository, &r->old_oid, - OBJECT_INFO_SKIP_FETCH_OBJECT)) + if (!has_object(the_repository, &r->old_oid, HAS_OBJECT_RECHECK_PACKED)) return -1; } diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f49431d626b173..147e9b8b47956a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -892,9 +892,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (startup_info->have_repository) { read_lock(); - collision_test_needed = - repo_has_object_file_with_flags(the_repository, oid, - OBJECT_INFO_QUICK); + collision_test_needed = has_object(the_repository, oid, + HAS_OBJECT_FETCH_PROMISOR); read_unlock(); } diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index be314879e82908..c92e57ba188a19 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1506,7 +1506,9 @@ static const char *update(struct command *cmd, struct shallow_info *si) } } - if (!is_null_oid(new_oid) && !repo_has_object_file(the_repository, new_oid)) { + if (!is_null_oid(new_oid) && + !has_object(the_repository, new_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { error("unpack should have generated %s, " "but I can't find it!", oid_to_hex(new_oid)); ret = "bad pack"; diff --git a/builtin/remote.c b/builtin/remote.c index b4baa34e665a8f..0d6755bcb71e3d 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -454,7 +454,8 @@ static int get_push_ref_states(const struct ref *remote_refs, info->status = PUSH_STATUS_UPTODATE; else if (is_null_oid(&ref->old_oid)) info->status = PUSH_STATUS_CREATE; - else if (repo_has_object_file(the_repository, &ref->old_oid) && + else if (has_object(the_repository, &ref->old_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) && ref_newer(&ref->new_oid, &ref->old_oid)) info->status = PUSH_STATUS_FASTFORWARD; else diff --git a/builtin/show-ref.c b/builtin/show-ref.c index f81209f23c3386..623a52a45f85cf 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -35,7 +35,8 @@ static void show_one(const struct show_one_options *opts, const char *hex; struct object_id peeled; - if (!repo_has_object_file(the_repository, oid)) + if (!has_object(the_repository, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) die("git show-ref: bad ref %s (%s)", refname, oid_to_hex(oid)); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 661be789f1340c..e905d5f4e1964b 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -449,7 +449,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, delta_data = get_data(delta_size); if (!delta_data) return; - if (repo_has_object_file(the_repository, &base_oid)) + if (has_object(the_repository, &base_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) ; /* Ok we have this one */ else if (resolve_against_held(nr, &base_oid, delta_data, delta_size)) diff --git a/bulk-checkin.c b/bulk-checkin.c index c31c31b18d8a0b..678e2ecc2c29da 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -130,7 +130,8 @@ static void flush_batch_fsync(void) static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) { /* The object may already exist in the repository */ - if (repo_has_object_file(the_repository, oid)) + if (has_object(the_repository, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 1; /* Might want to keep the list sorted */ diff --git a/cache-tree.c b/cache-tree.c index c0e1e9ee1d4af0..fa3858e2829aa8 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -238,7 +238,9 @@ int cache_tree_fully_valid(struct cache_tree *it) int i; if (!it) return 0; - if (it->entry_count < 0 || !repo_has_object_file(the_repository, &it->oid)) + if (it->entry_count < 0 || + has_object(the_repository, &it->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 0; for (i = 0; i < it->subtree_nr; i++) { if (!cache_tree_fully_valid(it->down[i]->cache_tree)) @@ -289,7 +291,9 @@ static int update_one(struct cache_tree *it, } } - if (0 <= it->entry_count && repo_has_object_file(the_repository, &it->oid)) + if (0 <= it->entry_count && + has_object(the_repository, &it->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return it->entry_count; /* @@ -395,7 +399,8 @@ static int update_one(struct cache_tree *it, ce_missing_ok = mode == S_IFGITLINK || missing_ok || !must_check_existence(ce); if (is_null_oid(oid) || - (!ce_missing_ok && !repo_has_object_file(the_repository, oid))) { + (!ce_missing_ok && !has_object(the_repository, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))) { strbuf_release(&buffer); if (expected_missing) return -1; @@ -443,7 +448,7 @@ static int update_one(struct cache_tree *it, struct object_id oid; hash_object_file(the_hash_algo, buffer.buf, buffer.len, OBJ_TREE, &oid); - if (repo_has_object_file_with_flags(the_repository, &oid, OBJECT_INFO_SKIP_FETCH_OBJECT)) + if (has_object(the_repository, &oid, HAS_OBJECT_RECHECK_PACKED)) oidcpy(&it->oid, &oid); else to_invalidate = 1; diff --git a/fetch-pack.c b/fetch-pack.c index 210dc30d50f6d6..fa4231fee74c9f 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -769,9 +769,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, if (!commit) { struct object *o; - if (!repo_has_object_file_with_flags(the_repository, &ref->old_oid, - OBJECT_INFO_QUICK | - OBJECT_INFO_SKIP_FETCH_OBJECT)) + if (!has_object(the_repository, &ref->old_oid, 0)) continue; o = parse_object(the_repository, &ref->old_oid); if (!o || o->type != OBJ_COMMIT) @@ -1985,7 +1983,8 @@ static void update_shallow(struct fetch_pack_args *args, struct oid_array extra = OID_ARRAY_INIT; struct object_id *oid = si->shallow->oid; for (i = 0; i < si->shallow->nr; i++) - if (repo_has_object_file(the_repository, &oid[i])) + if (has_object(the_repository, &oid[i], + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) oid_array_append(&extra, &oid[i]); if (extra.nr) { setup_alternate_shallow(&shallow_lock, diff --git a/http-push.c b/http-push.c index 32e37565f4e08f..f9e67cabd4bee8 100644 --- a/http-push.c +++ b/http-push.c @@ -1446,7 +1446,9 @@ static void one_remote_ref(const char *refname) * Fetch a copy of the object if it doesn't exist locally - it * may be required for updating server info later. */ - if (repo->can_update_info_refs && !repo_has_object_file(the_repository, &ref->old_oid)) { + if (repo->can_update_info_refs && + !has_object(the_repository, &ref->old_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { obj = lookup_unknown_object(the_repository, &ref->old_oid); fprintf(stderr, " fetch %s for %s\n", oid_to_hex(&ref->old_oid), refname); @@ -1651,14 +1653,14 @@ static int delete_remote_branch(const char *pattern, int force) return error("Remote HEAD symrefs too deep"); if (is_null_oid(&head_oid)) return error("Unable to resolve remote HEAD"); - if (!repo_has_object_file(the_repository, &head_oid)) + if (!has_object(the_repository, &head_oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return error("Remote HEAD resolves to object %s\nwhich does not exist locally, perhaps you need to fetch?", oid_to_hex(&head_oid)); /* Remote branch must resolve to a known object */ if (is_null_oid(&remote_ref->old_oid)) return error("Unable to resolve remote branch %s", remote_ref->name); - if (!repo_has_object_file(the_repository, &remote_ref->old_oid)) + if (!has_object(the_repository, &remote_ref->old_oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return error("Remote branch %s resolves to object %s\nwhich does not exist locally, perhaps you need to fetch?", remote_ref->name, oid_to_hex(&remote_ref->old_oid)); /* Remote branch must be an ancestor of remote HEAD */ @@ -1879,7 +1881,8 @@ int cmd_main(int argc, const char **argv) if (!force_all && !is_null_oid(&ref->old_oid) && !ref->force) { - if (!repo_has_object_file(the_repository, &ref->old_oid) || + if (!has_object(the_repository, &ref->old_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) || !ref_newer(&ref->peer_ref->new_oid, &ref->old_oid)) { /* diff --git a/http-walker.c b/http-walker.c index 95458e2f6384bc..463f7b119ad4ca 100644 --- a/http-walker.c +++ b/http-walker.c @@ -138,7 +138,8 @@ static int fill_active_slot(void *data UNUSED) list_for_each_safe(pos, tmp, head) { obj_req = list_entry(pos, struct object_request, node); if (obj_req->state == WAITING) { - if (repo_has_object_file(the_repository, &obj_req->oid)) + if (has_object(the_repository, &obj_req->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) obj_req->state = COMPLETE; else { start_object_request(obj_req); @@ -496,7 +497,8 @@ static int fetch_object(struct walker *walker, const struct object_id *oid) if (!obj_req) return error("Couldn't find request for %s in the queue", hex); - if (repo_has_object_file(the_repository, &obj_req->oid)) { + if (has_object(the_repository, &obj_req->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { if (obj_req->req) abort_http_object_request(&obj_req->req); abort_object_request(obj_req); diff --git a/list-objects.c b/list-objects.c index 1e5512e1318a2c..597114281f6596 100644 --- a/list-objects.c +++ b/list-objects.c @@ -74,7 +74,8 @@ static void process_blob(struct traversal_context *ctx, * of missing objects. */ if (ctx->revs->exclude_promisor_objects && - !repo_has_object_file(the_repository, &obj->oid) && + !has_object(the_repository, &obj->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) && is_promisor_object(ctx->revs->repo, &obj->oid)) return; diff --git a/notes.c b/notes.c index d9645c4b5dc603..0a128f1de98050 100644 --- a/notes.c +++ b/notes.c @@ -794,7 +794,8 @@ static int prune_notes_helper(const struct object_id *object_oid, struct note_delete_list **l = (struct note_delete_list **) cb_data; struct note_delete_list *n; - if (repo_has_object_file(the_repository, object_oid)) + if (has_object(the_repository, object_oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 0; /* nothing to do for this note */ /* failed to find object => prune this note */ diff --git a/object-store.c b/object-store.c index 0d873868a6d7cb..2db34804e8ff02 100644 --- a/object-store.c +++ b/object-store.c @@ -847,7 +847,7 @@ int pretend_object_file(struct repository *repo, char *co_buf; hash_object_file(repo->hash_algo, buf, len, type, oid); - if (repo_has_object_file_with_flags(repo, oid, OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT) || + if (has_object(repo, oid, 0) || find_cached_object(repo->objects, oid)) return 0; diff --git a/reflog.c b/reflog.c index 12f7a02e3408db..15d81ebea978d3 100644 --- a/reflog.c +++ b/reflog.c @@ -152,7 +152,8 @@ static int tree_is_complete(const struct object_id *oid) init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); complete = 1; while (tree_entry(&desc, &entry)) { - if (!repo_has_object_file(the_repository, &entry.oid) || + if (!has_object(the_repository, &entry.oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) || (S_ISDIR(entry.mode) && !tree_is_complete(&entry.oid))) { tree->object.flags |= INCOMPLETE; complete = 0; diff --git a/refs.c b/refs.c index 6559db378909e4..dce5c49ca2ba65 100644 --- a/refs.c +++ b/refs.c @@ -376,7 +376,7 @@ int ref_resolves_to_object(const char *refname, { if (flags & REF_ISBROKEN) return 0; - if (!repo_has_object_file(repo, oid)) { + if (!has_object(repo, oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { error(_("%s does not point to a valid object!"), refname); return 0; } diff --git a/remote.c b/remote.c index 9fa3614e7a3374..4099183cacdc8a 100644 --- a/remote.c +++ b/remote.c @@ -1702,7 +1702,7 @@ void set_ref_status_for_push(struct ref *remote_refs, int send_mirror, if (!reject_reason && !ref->deletion && !is_null_oid(&ref->old_oid)) { if (starts_with(ref->name, "refs/tags/")) reject_reason = REF_STATUS_REJECT_ALREADY_EXISTS; - else if (!repo_has_object_file_with_flags(the_repository, &ref->old_oid, OBJECT_INFO_SKIP_FETCH_OBJECT)) + else if (!has_object(the_repository, &ref->old_oid, HAS_OBJECT_RECHECK_PACKED)) reject_reason = REF_STATUS_REJECT_FETCH_FIRST; else if (!lookup_commit_reference_gently(the_repository, &ref->old_oid, 1) || !lookup_commit_reference_gently(the_repository, &ref->new_oid, 1)) diff --git a/send-pack.c b/send-pack.c index 5005689cb55a48..86592ce526db95 100644 --- a/send-pack.c +++ b/send-pack.c @@ -45,10 +45,7 @@ int option_parse_push_signed(const struct option *opt, static void feed_object(struct repository *r, const struct object_id *oid, FILE *fh, int negative) { - if (negative && - !repo_has_object_file_with_flags(r, oid, - OBJECT_INFO_SKIP_FETCH_OBJECT | - OBJECT_INFO_QUICK)) + if (negative && !has_object(r, oid, 0)) return; if (negative) diff --git a/shallow.c b/shallow.c index 2f82ebd6e3f5e5..faeeeb45f986e1 100644 --- a/shallow.c +++ b/shallow.c @@ -310,7 +310,8 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) if (graft->nr_parent != -1) return 0; if (data->flags & QUICK) { - if (!repo_has_object_file(the_repository, &graft->oid)) + if (!has_object(the_repository, &graft->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 0; } else if (data->flags & SEEN_ONLY) { struct commit *c = lookup_commit(the_repository, &graft->oid); @@ -476,7 +477,8 @@ void prepare_shallow_info(struct shallow_info *info, struct oid_array *sa) ALLOC_ARRAY(info->ours, sa->nr); ALLOC_ARRAY(info->theirs, sa->nr); for (size_t i = 0; i < sa->nr; i++) { - if (repo_has_object_file(the_repository, sa->oid + i)) { + if (has_object(the_repository, sa->oid + i, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { struct commit_graft *graft; graft = lookup_commit_graft(the_repository, &sa->oid[i]); @@ -513,7 +515,8 @@ void remove_nonexistent_theirs_shallow(struct shallow_info *info) for (i = dst = 0; i < info->nr_theirs; i++) { if (i != dst) info->theirs[dst] = info->theirs[i]; - if (repo_has_object_file(the_repository, oid + info->theirs[i])) + if (has_object(the_repository, oid + info->theirs[i], + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) dst++; } info->nr_theirs = dst; diff --git a/upload-pack.c b/upload-pack.c index 30e4630f3a1cb3..956da5b061a0e5 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -509,8 +509,7 @@ static int got_oid(struct upload_pack_data *data, { if (get_oid_hex(hex, oid)) die("git upload-pack: expected SHA1 object, got '%s'", hex); - if (!repo_has_object_file_with_flags(the_repository, oid, - OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT)) + if (!has_object(the_repository, oid, 0)) return -1; return do_got_oid(data, oid); } diff --git a/walker.c b/walker.c index 4fedc19f346e66..b470d43e54d486 100644 --- a/walker.c +++ b/walker.c @@ -150,7 +150,8 @@ static int process(struct walker *walker, struct object *obj) return 0; obj->flags |= SEEN; - if (repo_has_object_file(the_repository, &obj->oid)) { + if (has_object(the_repository, &obj->oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { /* We already have it, so we should scan it now. */ obj->flags |= TO_SCAN; } From 8a9e27be8213ab90ac761d56ac36229ee52c443f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 29 Apr 2025 09:52:21 +0200 Subject: [PATCH 16/18] object-store: drop `repo_has_object_file()` In the preceding commits we have converted all users of `repo_has_object_file()` and its `_with_flags()` variant to instead use `has_object()`. Drop these functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-store.c | 14 -------------- object-store.h | 17 ----------------- 2 files changed, 31 deletions(-) diff --git a/object-store.c b/object-store.c index 2db34804e8ff02..2f51d0e3b037e3 100644 --- a/object-store.c +++ b/object-store.c @@ -949,20 +949,6 @@ int has_object(struct repository *r, const struct object_id *oid, return oid_object_info_extended(r, oid, NULL, object_info_flags) >= 0; } -int repo_has_object_file_with_flags(struct repository *r, - const struct object_id *oid, int flags) -{ - if (!startup_info->have_repository) - return 0; - return oid_object_info_extended(r, oid, NULL, flags) >= 0; -} - -int repo_has_object_file(struct repository *r, - const struct object_id *oid) -{ - return repo_has_object_file_with_flags(r, oid, 0); -} - void assert_oid_type(const struct object_id *oid, enum object_type expect) { enum object_type type = oid_object_info(the_repository, oid, NULL); diff --git a/object-store.h b/object-store.h index f0e111464c28e0..c2fe5a19605040 100644 --- a/object-store.h +++ b/object-store.h @@ -276,23 +276,6 @@ enum { int has_object(struct repository *r, const struct object_id *oid, unsigned flags); -/* - * These macros and functions are deprecated. If checking existence for an - * object that is likely to be missing and/or whose absence is relatively - * inconsequential (or is consequential but the caller is prepared to handle - * it), use has_object(), which has better defaults (no lazy fetch in a partial - * clone and no rechecking of packed storage). In the unlikely event that a - * caller needs to assert existence of an object that it fully expects to - * exist, and wants to trigger a lazy fetch in a partial clone, use - * oid_object_info_extended() with a NULL struct object_info. - * - * These functions can be removed once all callers have migrated to - * has_object() and/or oid_object_info_extended(). - */ -int repo_has_object_file(struct repository *r, const struct object_id *oid); -int repo_has_object_file_with_flags(struct repository *r, - const struct object_id *oid, int flags); - void assert_oid_type(const struct object_id *oid, enum object_type expect); /* From 03f2915541a4923c5733e505a42e77031eb9494c Mon Sep 17 00:00:00 2001 From: Niels Glodny Date: Tue, 29 Apr 2025 16:09:49 +0200 Subject: [PATCH 17/18] xdiff: disable cleanup_records heuristic with --minimal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleanup_records function marks some lines as changed before running the actual diff algorithm. For most lines, this is a good performance optimization, but it also marks lines that are surrounded by many changed lines as changed as well. This can cause redundant changes and longer-than-necessary diffs. Whether this results in better-looking diffs is subjective. However, the --minimal flag explicitly requests the shortest possible diff. The change results in shorter diffs in about 1.3% of all diffs in Git's history. Performance wise, I have measured the impact on "git log -p -3000 --minimal > /dev/null". With this change, I get Time (mean ± σ): 2.363 s ± 0.023 s (25 runs) and without this patch I measured Time (mean ± σ): 2.362 s ± 0.035 s (25 runs). As the difference is well within the margin of error, this does not seem to have an impact on performance. Signed-off-by: Niels Glodny Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t4071-diff-minimal.sh | 14 ++++++++++++++ xdiff/xprepare.c | 5 +++-- 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100755 t/t4071-diff-minimal.sh diff --git a/t/meson.build b/t/meson.build index bfb744e8863d98..8f2e9d2c5055fc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -501,6 +501,7 @@ integration_tests = [ 't4068-diff-symmetric-merge-base.sh', 't4069-remerge-diff.sh', 't4070-diff-pairs.sh', + 't4071-diff-minimal.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4071-diff-minimal.sh b/t/t4071-diff-minimal.sh new file mode 100755 index 00000000000000..4c484dadfb0ada --- /dev/null +++ b/t/t4071-diff-minimal.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +test_description='minimal diff algorithm' + +. ./test-lib.sh + +test_expect_success 'minimal diff should not mark changes between changed lines' ' + test_write_lines x x x x >pre && + test_write_lines x x x A B C D x E F G >post && + test_expect_code 1 git diff --no-index --minimal pre post >diff && + test_grep ! ^[+-]x diff +' + +test_done diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index c84549f6c5089e..e1d4017b2ddeac 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -368,6 +368,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd xrecord_t **recs; xdlclass_t *rcrec; char *dis, *dis1, *dis2; + int need_min = !!(cf->flags & XDF_NEED_MINIMAL); if (!XDL_CALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2)) return -1; @@ -379,7 +380,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len2 : 0; - dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; } if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) @@ -387,7 +388,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len1 : 0; - dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; } for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; From 38af977b81bbf8ce8c0004d3f4046a823ecb30a1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 12 May 2025 14:22:59 -0700 Subject: [PATCH 18/18] The thirteenth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.50.0.adoc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/RelNotes/2.50.0.adoc b/Documentation/RelNotes/2.50.0.adoc index 07759cf98b3cca..6794031f996034 100644 --- a/Documentation/RelNotes/2.50.0.adoc +++ b/Documentation/RelNotes/2.50.0.adoc @@ -53,6 +53,15 @@ UI, Workflows & Features * The build procedure installs bash (but not zsh) completion script. + * send-email has been updated to work better with Outlook's smtp server. + + * "git diff --minimal" used to give non-minimal output when its + optimization kicked in, which has been disabled. + + * "git index-pack --fix-thin" used to abort to prevent a cycle in + delta chains from forming in a corner case even when there is no + such cycle. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -134,6 +143,8 @@ Performance, Internal Implementation, Development Support etc. * Add an equivalent to "make hdr-check" target to meson based builds. + * Further code clean-up in the object-store layer. + Fixes since v2.49 ----------------- @@ -261,6 +272,10 @@ Fixes since v2.49 now detected and the command errors out. (merge 974f0d4664 ps/mv-contradiction-fix later to maint). + * Further refinement on CI messages when an optional external + software is unavailable (e.g. due to third-party service outage). + (merge 956acbefbd jc/ci-skip-unavailable-external-software later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 227c4f33a0 ja/doc-block-delimiter-markup-fix later to maint). (merge 2bfd3b3685 ab/decorate-code-cleanup later to maint).