From 197f0d0f390ef2f1e573a57c27ed7db8df7a46fd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Sep 2025 11:16:06 +0200 Subject: [PATCH 01/35] meson: introduce a "docs" alias to compile documentation only Meson does not currently provide a target to compile documentation, only. Instead, users needs to compile the whole project, which may be way more than they really intend to do. Introduce a new "docs" alias to plug this gap. This alias can be invoked e.g. with `meson compile docs`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/howto/meson.build | 4 ++-- Documentation/meson.build | 8 ++++---- Documentation/technical/meson.build | 4 ++-- contrib/contacts/meson.build | 4 ++-- contrib/subtree/meson.build | 4 ++-- meson.build | 7 +++++++ 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Documentation/howto/meson.build b/Documentation/howto/meson.build index 81000028c08ee6..ece20244af2ad3 100644 --- a/Documentation/howto/meson.build +++ b/Documentation/howto/meson.build @@ -29,7 +29,7 @@ howto_index = custom_target( output: 'howto-index.adoc', ) -custom_target( +doc_targets += custom_target( command: asciidoc_html_options, input: howto_index, output: 'howto-index.html', @@ -51,7 +51,7 @@ foreach howto : howto_sources capture: true, ) - custom_target( + doc_targets += custom_target( command: asciidoc_html_options, input: howto_stripped, output: fs.stem(howto_stripped.full_path()) + '.html', diff --git a/Documentation/meson.build b/Documentation/meson.build index 4404c623f006db..f0169fcf9c60f0 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -375,7 +375,7 @@ foreach manpage, category : manpages output: fs.stem(manpage) + '.xml', ) - custom_target( + doc_targets += custom_target( command: [ xmlto, '-m', '@INPUT0@', @@ -398,7 +398,7 @@ foreach manpage, category : manpages endif if get_option('docs').contains('html') - custom_target( + doc_targets += custom_target( command: asciidoc_common_options + [ '--backend=' + asciidoc_html, '--doctype=manpage', @@ -450,7 +450,7 @@ if get_option('docs').contains('html') depends: documentation_deps, ) - custom_target( + doc_targets += custom_target( command: [ xsltproc, '--xinclude', @@ -479,7 +479,7 @@ if get_option('docs').contains('html') ] foreach article : articles - custom_target( + doc_targets += custom_target( command: asciidoc_common_options + [ '--backend=' + asciidoc_html, '--out-file=@OUTPUT@', diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build index a13aafcfbb8c75..858af811a7bcc1 100644 --- a/Documentation/technical/meson.build +++ b/Documentation/technical/meson.build @@ -46,7 +46,7 @@ api_index = custom_target( output: 'api-index.adoc', ) -custom_target( +doc_targets += custom_target( command: asciidoc_html_options, input: api_index, output: 'api-index.html', @@ -56,7 +56,7 @@ custom_target( ) foreach article : api_docs + articles - custom_target( + doc_targets += custom_target( command: asciidoc_html_options, input: article, output: fs.stem(article) + '.html', diff --git a/contrib/contacts/meson.build b/contrib/contacts/meson.build index 73d82dfe52b85f..c8fdb35ed990ee 100644 --- a/contrib/contacts/meson.build +++ b/contrib/contacts/meson.build @@ -20,7 +20,7 @@ if get_option('docs').contains('man') output: 'git-contacts.xml', ) - custom_target( + doc_targets += custom_target( command: [ xmlto, '-m', '@INPUT@', @@ -39,7 +39,7 @@ if get_option('docs').contains('man') endif if get_option('docs').contains('html') - custom_target( + doc_targets += custom_target( command: asciidoc_common_options + [ '--backend=' + asciidoc_html, '--doctype=manpage', diff --git a/contrib/subtree/meson.build b/contrib/subtree/meson.build index 98dd8e0c8eacea..46cdbcc30c9bd7 100644 --- a/contrib/subtree/meson.build +++ b/contrib/subtree/meson.build @@ -38,7 +38,7 @@ if get_option('docs').contains('man') output: 'git-subtree.xml', ) - custom_target( + doc_targets += custom_target( command: [ xmlto, '-m', '@INPUT@', @@ -57,7 +57,7 @@ if get_option('docs').contains('man') endif if get_option('docs').contains('html') - custom_target( + doc_targets += custom_target( command: asciidoc_common_options + [ '--backend=' + asciidoc_html, '--doctype=manpage', diff --git a/meson.build b/meson.build index 5dd299b4962d84..f7dd6ee30d7251 100644 --- a/meson.build +++ b/meson.build @@ -2099,11 +2099,18 @@ endif subdir('bin-wrappers') if get_option('docs') != [] + doc_targets = [] subdir('Documentation') endif subdir('contrib') +# Note that the target is intentionally configured after including the +# 'contrib' directory, as some tool there also have their own manpages. +if get_option('docs') != [] + alias_target('docs', doc_targets) +endif + exclude_from_check_headers = [ 'compat/', 'unicode-width.h', From b64579dff989f36343bdbb3e1d6481ee4a3f0876 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Sep 2025 11:16:07 +0200 Subject: [PATCH 02/35] meson: print docs backend as part of the summary Our documentation can be built with either Asciidoc or Asciidoctor as backend. When Meson is configured to build documentation, then it will automatically detect which of these tools is available and use them. It's not obvious to the user though which of these backends is used unless the user explicitly asks for one backend via `-Ddocs_backend=`. Improve the status quo by printing the docs backend as part of the "backends" summary. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meson.build b/meson.build index f7dd6ee30d7251..cfaae83488c164 100644 --- a/meson.build +++ b/meson.build @@ -2101,6 +2101,8 @@ subdir('bin-wrappers') if get_option('docs') != [] doc_targets = [] subdir('Documentation') +else + docs_backend = 'none' endif subdir('contrib') @@ -2249,6 +2251,7 @@ summary({ summary({ 'csprng': csprng_backend, + 'docs': docs_backend, 'https': https_backend, 'sha1': sha1_backend, 'sha1_unsafe': sha1_unsafe_backend, From ff4ec8ded0504cbe4fb4705ad793d862acfc63fc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Sep 2025 11:16:08 +0200 Subject: [PATCH 03/35] ci: don't compile whole project when testing docs with Meson MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our "documentation" CI jobs, unsurprisingly, performs a couple of tests on our documentation. The job knows to not only test the documentation generated by our Makefile, but also by Meson. In the latter case with Meson we end up building the whole project, including all of the binaries. This is of course quite excessive and a waste of compute cycles, as we don't care about these binaries at all. Fix this by using the new "docs" target that we introduced in the preceding commit. Reported-by: SZEDER Gábor Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ci/test-documentation.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test-documentation.sh b/ci/test-documentation.sh index 49f87f50fd7966..5e4fd8fbd7a33d 100755 --- a/ci/test-documentation.sh +++ b/ci/test-documentation.sh @@ -48,13 +48,13 @@ check_unignored_build_artifacts # Build docs with Meson and AsciiDoc meson setup build-asciidoc -Ddocs=html,man -Ddocs_backend=asciidoc -meson compile -C build-asciidoc +meson compile -C build-asciidoc docs check_docs build-asciidoc AsciiDoc rm -rf build-asciidoc # Build docs with Meson and AsciiDoctor meson setup build-asciidoctor -Ddocs=html,man -Ddocs_backend=asciidoctor -meson compile -C build-asciidoctor +meson compile -C build-asciidoctor docs check_docs build-asciidoctor Asciidoctor rm -rf build-asciidoctor From f3c1db4b2a23fac171a699b10f9328f8df52602f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:33 -0500 Subject: [PATCH 04/35] bulk-checkin: remove ODB transaction nesting ODB transactions support being nested. Only the outermost {begin,end}_odb_transaction() start and finish a transaction. This allows internal object write codepaths to be optimized with ODB transactions without worrying about whether a transaction is already active. When {begin,end}_odb_transaction() is invoked during an active transaction, these operations are essentially treated as no-ops. This can make the interface a bit awkward to use, as calling end_odb_transaction() does not guarantee that a transaction is actually ended. Thus, in situations where a transaction needs to be explicitly flushed, flush_odb_transaction() must be used. To remove the need for an explicit transaction flush operation via flush_odb_transaction() and better clarify transaction semantics, drop the transaction nesting mechanism in favor of begin_odb_transaction() returning a NULL transaction value to signal it was a no-op, and end_odb_transaction() behaving as a no-op when a NULL transaction value is passed. This is safe for existing callers as the transaction value wired to end_odb_transaction() already comes from begin_odb_transaction() and thus continues the same no-op behavior when a transaction is already pending. With this model, passing a pending transaction to end_odb_transaction() ensures it is committed at that point in time. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 22 ++++++++++------------ bulk-checkin.h | 8 +++----- object-file.c | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 124c49306769a5..eb6ef704c3c758 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -33,7 +33,6 @@ struct bulk_checkin_packfile { struct odb_transaction { struct object_database *odb; - int nesting; struct tmp_objdir *objdir; struct bulk_checkin_packfile packfile; }; @@ -368,12 +367,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - if (!odb->transaction) { - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - } + if (odb->transaction) + return NULL; - odb->transaction->nesting += 1; + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; return odb->transaction; } @@ -389,14 +387,14 @@ void flush_odb_transaction(struct odb_transaction *transaction) void end_odb_transaction(struct odb_transaction *transaction) { - if (!transaction || transaction->nesting == 0) - BUG("Unbalanced ODB transaction nesting"); - - transaction->nesting -= 1; - - if (transaction->nesting) + if (!transaction) return; + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + flush_odb_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); diff --git a/bulk-checkin.h b/bulk-checkin.h index ac8887f476b496..51d0ac6134e79e 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -38,9 +38,8 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, /* * Tell the object database to optimize for adding * multiple objects. end_odb_transaction must be called - * to make new objects visible. Transactions can be nested, - * and objects are only visible after the outermost transaction - * is complete or the transaction is flushed. + * to make new objects visible. If a transaction is already + * pending, NULL is returned. */ struct odb_transaction *begin_odb_transaction(struct object_database *odb); @@ -53,8 +52,7 @@ void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the - * current transaction visible if this is the final nested - * transaction. + * current transaction visible. */ void end_odb_transaction(struct odb_transaction *transaction); diff --git a/object-file.c b/object-file.c index bc15af42450949..5e765735495310 100644 --- a/object-file.c +++ b/object-file.c @@ -1267,7 +1267,7 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(transaction, + ret = index_blob_bulk_checkin(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); end_odb_transaction(transaction); From 9c61d9aded98748aae949b83babbdbd11e695f32 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:34 -0500 Subject: [PATCH 05/35] builtin/update-index: end ODB transaction when --verbose is specified With 23a3a303 (update-index: use the bulk-checkin infrastructure, 2022-04-04), object database transactions were added to git-update-index(1) to facilitate writing objects in bulk. With transactions, newly added objects are instead written to a temporary object directory and migrated to the primary object database upon transaction commit. When the --verbose option is specified, the subsequent set of objects written are explicitly flushed via flush_odb_transaction() prior to reporting the update. Flushing the object database transaction migrates pending objects to the primary object database without marking the transaction as complete. This is done so objects are immediately visible to git-update-index(1) callers using the --verbose option and that rely on parsing verbose output to know when objects are written. Due to how git-update-index(1) parses arguments, options that come after a filename are not considered during the object update. Therefore, it may not be known ahead of time whether the --verbose option is present and thus object writes are considered transactional by default until a --verbose option is parsed. Flushing a transaction after individual object writes negates the benefit of writing objects to a transaction in the first place. Furthermore, the mechanism to flush a transaction without actually committing is rather awkward. Drop the call to flush_odb_transaction() in favor of ending the transaction altogether when the --verbose flag is encountered. Subsequent object writes occur outside of a transaction and are therefore immediately visible which matches the current behavior. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/update-index.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2ba2d29c959fac..d36bc557521f85 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -70,14 +70,6 @@ static void report(const char *fmt, ...) if (!verbose) return; - /* - * It is possible, though unlikely, that a caller could use the verbose - * output to synchronize with addition of objects to the object - * database. The current implementation of ODB transactions leaves - * objects invisible while a transaction is active, so flush the - * transaction here before reporting a change made by update-index. - */ - flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -1150,6 +1142,21 @@ int cmd_update_index(int argc, const char *path = ctx.argv[0]; char *p; + /* + * It is possible, though unlikely, that a caller could + * use the verbose output to synchronize with addition + * of objects to the object database. The current + * implementation of ODB transactions leaves objects + * invisible while a transaction is active, so end the + * transaction here early before processing the next + * update. All further updates are performed outside of + * a transaction. + */ + if (transaction && verbose) { + end_odb_transaction(transaction); + transaction = NULL; + } + setup_work_tree(); p = prefix_path(prefix, prefix_length, path); update_one(p); From ca7d93453b6c309aa1fca411e1bdaa9ca4c82199 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:35 -0500 Subject: [PATCH 06/35] bulk-checkin: drop flush_odb_transaction() Object database transactions can be explicitly flushed via flush_odb_transaction() without actually completing the transaction. This makes the provided transactional interface a bit awkward. Now that there are no longer any flush_odb_transaction() call sites, drop the function to simplify the interface and further ensure that a transaction is only finalized when end_odb_transaction() is invoked. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 12 ++---------- bulk-checkin.h | 7 ------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index eb6ef704c3c758..5de848deffe41f 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -376,15 +376,6 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void flush_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); -} - void end_odb_transaction(struct odb_transaction *transaction) { if (!transaction) @@ -395,7 +386,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_odb_transaction(transaction); + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); transaction->odb->transaction = NULL; free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 51d0ac6134e79e..eea728f0d41e53 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -43,13 +43,6 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, */ struct odb_transaction *begin_odb_transaction(struct object_database *odb); -/* - * Make any objects that are currently part of a pending object - * database transaction visible. It is valid to call this function - * even if no transaction is active. - */ -void flush_odb_transaction(struct odb_transaction *transaction); - /* * Tell the object database to make any objects from the * current transaction visible. From 78839e9cdead363d10190a009783c2d18149cc54 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:36 -0500 Subject: [PATCH 07/35] object-file: relocate ODB transaction code The bulk-checkin subsystem provides various functions to manage ODB transactions. Apart from {begin,end}_odb_transaction(), these functions are only used by the object-file subsystem to manage aspects of a transaction implementation specific to the files object source. Relocate all the transaction code in bulk-checkin to object-file. This simplifies the exposed transaction interface by reducing it to only {begin,end}_odb_transaction(). Function and type names are adjusted in the subsequent commit to better fit the new location. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Makefile | 1 - builtin/add.c | 2 +- builtin/unpack-objects.c | 1 - builtin/update-index.c | 1 - bulk-checkin.c | 393 -------------------------------------- bulk-checkin.h | 52 ------ cache-tree.c | 1 - meson.build | 1 - object-file.c | 394 ++++++++++++++++++++++++++++++++++++++- object-file.h | 16 ++ read-cache.c | 1 - 11 files changed, 410 insertions(+), 453 deletions(-) delete mode 100644 bulk-checkin.c delete mode 100644 bulk-checkin.h diff --git a/Makefile b/Makefile index 4c95affadb5e26..d25d4255f8a7eb 100644 --- a/Makefile +++ b/Makefile @@ -974,7 +974,6 @@ LIB_OBJS += blame.o LIB_OBJS += blob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o -LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle-uri.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o diff --git a/builtin/add.c b/builtin/add.c index 740c7c45817828..8294366d68a1de 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -14,13 +14,13 @@ #include "gettext.h" #include "pathspec.h" #include "run-command.h" +#include "object-file.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" #include "diff.h" #include "read-cache.h" #include "revision.h" -#include "bulk-checkin.h" #include "strvec.h" #include "submodule.h" #include "add-interactive.h" diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 28124b324d2641..4596fff0dad58e 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -2,7 +2,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/builtin/update-index.c b/builtin/update-index.c index d36bc557521f85..ee01c4e423da43 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/bulk-checkin.c b/bulk-checkin.c deleted file mode 100644 index 5de848deffe41f..00000000000000 --- a/bulk-checkin.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "bulk-checkin.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "lockfile.h" -#include "repository.h" -#include "csum-file.h" -#include "pack.h" -#include "strbuf.h" -#include "tmp-objdir.h" -#include "packfile.h" -#include "object-file.h" -#include "odb.h" - -struct bulk_checkin_packfile { - char *pack_tmp_name; - struct hashfile *f; - off_t offset; - struct pack_idx_option pack_idx_opts; - - struct pack_idx_entry **written; - uint32_t alloc_written; - uint32_t nr_written; -}; - -struct odb_transaction { - struct object_database *odb; - - struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; -}; - -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - unsigned char hash[GIT_MAX_RAWSZ]; - struct strbuf packname = STRBUF_INIT; - - if (!state->f) - return; - - if (state->nr_written == 0) { - close(state->f->fd); - free_hashfile(state->f); - unlink(state->pack_tmp_name); - goto clear_exit; - } else if (state->nr_written == 1) { - finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, - CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - } else { - int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, - state->nr_written, hash, - state->offset); - close(fd); - } - - strbuf_addf(&packname, "%s/pack/pack-%s.", - repo_get_object_directory(transaction->odb->repo), - hash_to_hex_algop(hash, repo->hash_algo)); - - finish_tmp_packfile(transaction, &packname, hash); - for (uint32_t i = 0; i < state->nr_written; i++) - free(state->written[i]); - -clear_exit: - free(state->pack_tmp_name); - free(state->written); - memset(state, 0, sizeof(*state)); - - strbuf_release(&packname); - /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(repo); -} - -/* - * Cleanup after batch-mode fsync_object_files. - */ -static void flush_batch_fsync(struct odb_transaction *transaction) -{ - struct strbuf temp_path = STRBUF_INIT; - struct tempfile *temp; - - if (!transaction->objdir) - return; - - /* - * Issue a full hardware flush against a temporary file to ensure - * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout - * request, but it has not flushed any writeback cache in the storage - * hardware or any filesystem logs. This fsync call acts as a barrier - * to ensure that the data in each new object file is durable before - * the final name is visible. - */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", - repo_get_object_directory(transaction->odb->repo)); - temp = xmks_tempfile(temp_path.buf); - fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); - delete_tempfile(&temp); - strbuf_release(&temp_path); - - /* - * Make the object files visible in the primary ODB after their data is - * fully durable. - */ - tmp_objdir_migrate(transaction->objdir); - transaction->objdir = NULL; -} - -static int already_written(struct odb_transaction *transaction, - struct object_id *oid) -{ - /* The object may already exist in the repository */ - if (odb_has_object(transaction->odb, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - return 1; - - /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) - if (oideq(&transaction->packfile.written[i]->oid, oid)) - return 1; - - /* This is a new object we need to keep */ - return 0; -} - -/* - * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. - */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) -{ - git_zstream s; - unsigned char ibuf[16384]; - unsigned char obuf[16384]; - unsigned hdrlen; - int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); - off_t offset = 0; - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); - s.next_out = obuf + hdrlen; - s.avail_out = sizeof(obuf) - hdrlen; - - while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } - s.next_in = ibuf; - s.avail_in = rsize; - size -= rsize; - } - - status = git_deflate(&s, size ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; - } - s.next_out = obuf; - s.avail_out = sizeof(obuf); - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - return 0; -} - -/* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) - return; - - state->f = create_tmp_packfile(transaction->odb->repo, - &state->pack_tmp_name); - reset_pack_idx_option(&state->pack_idx_opts); - - /* Pretend we are going to write only one object */ - state->offset = write_pack_header(state->f, 1); - if (!state->offset) - die_errno("unable to write pack header"); -} - -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; - struct git_hash_ctx ctx; - unsigned char obuf[16384]; - unsigned header_len; - struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; - - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t) -1) - return error("cannot find the current offset"); - - header_len = format_object_header((char *)obuf, sizeof(obuf), - OBJ_BLOB, size); - transaction->odb->repo->hash_algo->init_fn(&ctx); - git_hash_update(&ctx, obuf, header_len); - - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_to_stream(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } - - already_hashed_to = 0; - - while (1) { - prepare_to_stream(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - if (!idx) - BUG("should not happen"); - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) - return error("cannot seek back"); - } - git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; - - idx->crc32 = crc32_end(state->f); - if (already_written(transaction, result_oid)) { - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - free(idx); - } else { - oidcpy(&idx->oid, result_oid); - ALLOC_GROW(state->written, - state->nr_written + 1, - state->alloc_written); - state->written[state->nr_written++] = idx; - } - return 0; -} - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) -{ - /* - * We lazily create the temporary object directory - * the first time an object might be added, since - * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. - */ - if (!transaction || transaction->objdir) - return; - - transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); - if (transaction->objdir) - tmp_objdir_replace_primary_odb(transaction->objdir, 0); -} - -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename) -{ - /* - * If we have an active ODB transaction, we issue a call that - * cleans the filesystem page cache but avoids a hardware flush - * command. Later on we will issue a single hardware flush - * before renaming the objects to their final names as part of - * flush_batch_fsync. - */ - if (!transaction || !transaction->objdir || - git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { - if (errno == ENOSYS) - warning(_("core.fsyncMethod = batch is unsupported on this platform")); - fsync_or_die(fd, filename); - } -} - -struct odb_transaction *begin_odb_transaction(struct object_database *odb) -{ - if (odb->transaction) - return NULL; - - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - - return odb->transaction; -} - -void end_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - /* - * Ensure the transaction ending matches the pending transaction. - */ - ASSERT(transaction == transaction->odb->transaction); - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); - transaction->odb->transaction = NULL; - free(transaction); -} diff --git a/bulk-checkin.h b/bulk-checkin.h deleted file mode 100644 index eea728f0d41e53..00000000000000 --- a/bulk-checkin.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef BULK_CHECKIN_H -#define BULK_CHECKIN_H - -#include "object.h" -#include "odb.h" - -struct odb_transaction; - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename); - -/* - * This writes the specified object to a packfile. Objects written here - * during the same transaction are written to the same packfile. The - * packfile is not flushed until the transaction is flushed. The caller - * is expected to ensure a valid transaction is setup for objects to be - * recorded to. - * - * This also bypasses the usual "convert-to-git" dance, and that is on - * purpose. We could write a streaming version of the converting - * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above). However, that is - * somewhat complicated, as we do not know the size of the filter - * result, which we need to know beforehand when writing a git object. - * Since the primary motivation for trying to stream from the working - * tree file and to avoid mmaping it in core is to deal with large - * binary blobs, they generally do not want to get any conversion, and - * callers should avoid this code path when filters are requested. - */ -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags); - -/* - * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called - * to make new objects visible. If a transaction is already - * pending, NULL is returned. - */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); - -/* - * Tell the object database to make any objects from the - * current transaction visible. - */ -void end_odb_transaction(struct odb_transaction *transaction); - -#endif diff --git a/cache-tree.c b/cache-tree.c index d225554eedd920..79ddf6b72780fd 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -8,7 +8,6 @@ #include "tree.h" #include "tree-walk.h" #include "cache-tree.h" -#include "bulk-checkin.h" #include "object-file.h" #include "odb.h" #include "read-cache-ll.h" diff --git a/meson.build b/meson.build index b3dfcc04972601..fccb6d2eeca050 100644 --- a/meson.build +++ b/meson.build @@ -287,7 +287,6 @@ libgit_sources = [ 'blob.c', 'bloom.c', 'branch.c', - 'bulk-checkin.c', 'bundle-uri.c', 'bundle.c', 'cache-tree.c', diff --git a/object-file.c b/object-file.c index 5e765735495310..03f9931b832e49 100644 --- a/object-file.c +++ b/object-file.c @@ -10,7 +10,6 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" -#include "bulk-checkin.h" #include "convert.h" #include "dir.h" #include "environment.h" @@ -28,6 +27,8 @@ #include "read-cache-ll.h" #include "setup.h" #include "streaming.h" +#include "tempfile.h" +#include "tmp-objdir.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } +struct bulk_checkin_packfile { + char *pack_tmp_name; + struct hashfile *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +}; + +struct odb_transaction { + struct object_database *odb; + + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +}; + +static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call begin_odb_transaction. + */ + if (!transaction || transaction->objdir) + return; + + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); +} + +static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!transaction || !transaction->objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + if (errno == ENOSYS) + warning(_("core.fsyncMethod = batch is unsupported on this platform")); + fsync_or_die(fd, filename); + } +} + +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_batch_fsync(struct odb_transaction *transaction) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!transaction->objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_bulk_checkin has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; +} + /* Finalize a file on disk, and close it. */ static void close_loose_object(struct odb_source *source, int fd, const char *filename) @@ -1243,6 +1331,283 @@ static int index_core(struct index_state *istate, return ret; } +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) +{ + /* The object may already exist in the repository */ + if (odb_has_object(transaction->odb, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + return 1; + + /* Might want to keep the list sorted */ + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_to_stream(struct odb_transaction *transaction, + unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + if (!(flags & INDEX_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_blob_to_pack(struct bulk_checkin_packfile *state, + struct git_hash_ctx *ctx, off_t *already_hashed_to, + int fd, size_t size, const char *path, + unsigned flags) +{ + git_zstream s; + unsigned char ibuf[16384]; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & INDEX_WRITE_OBJECT); + off_t offset = 0; + + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + if (size && !s.avail_in) { + size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if ((size_t)read_result != rsize) + die("failed to read %u bytes from '%s'", + (unsigned)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_hash_update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + hashwrite(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, + unsigned char hash[]) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + char *idx_tmp_name = NULL; + + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); + + free(idx_tmp_name); +} + +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + unsigned char hash[GIT_MAX_RAWSZ]; + struct strbuf packname = STRBUF_INIT; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + free_hashfile(state->f); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, + state->nr_written, hash, + state->offset); + close(fd); + } + + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); + for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(state->pack_tmp_name); + free(state->written); + memset(state, 0, sizeof(*state)); + + strbuf_release(&packname); + /* Make objects we just wrote available to ourselves */ + reprepare_packed_git(repo); +} + +/* + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct hashfile_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t)-1) + return error("cannot find the current offset"); + + header_len = format_object_header((char *)obuf, sizeof(obuf), + OBJ_BLOB, size); + transaction->odb->repo->hash_algo->init_fn(&ctx); + git_hash_update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & INDEX_WRITE_OBJECT) != 0) { + CALLOC_ARRAY(idx, 1); + + prepare_to_stream(transaction, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + + already_hashed_to = 0; + + while (1) { + prepare_to_stream(transaction, flags); + if (idx) { + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, + fd, size, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + BUG("should not happen"); + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + flush_bulk_checkin_packfile(transaction); + if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) + return error("cannot seek back"); + } + git_hash_final_oid(result_oid, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(transaction, result_oid)) { + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + oidcpy(&idx->oid, result_oid); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1609,3 +1974,30 @@ int read_loose_object(struct repository *repo, munmap(map, mapsize); return ret; } + +struct odb_transaction *begin_odb_transaction(struct object_database *odb) +{ + if (odb->transaction) + return NULL; + + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + + return odb->transaction; +} + +void end_odb_transaction(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); + transaction->odb->transaction = NULL; + free(transaction); +} diff --git a/object-file.h b/object-file.h index 15d97630d3b11b..6323d2e63c00ab 100644 --- a/object-file.h +++ b/object-file.h @@ -218,4 +218,20 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +struct odb_transaction; + +/* + * Tell the object database to optimize for adding + * multiple objects. end_odb_transaction must be called + * to make new objects visible. If a transaction is already + * pending, NULL is returned. + */ +struct odb_transaction *begin_odb_transaction(struct object_database *odb); + +/* + * Tell the object database to make any objects from the + * current transaction visible. + */ +void end_odb_transaction(struct odb_transaction *transaction); + #endif /* OBJECT_FILE_H */ diff --git a/read-cache.c b/read-cache.c index 229b8ef11c9a74..80591eecedc207 100644 --- a/read-cache.c +++ b/read-cache.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "bulk-checkin.h" #include "config.h" #include "date.h" #include "diff.h" From ed0f5f93e9f0b0b3cc1a37ee5b10b625590f08c8 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:37 -0500 Subject: [PATCH 08/35] object-file: update naming from bulk-checkin Update the names of several functions and types relocated from the bulk-checkin subsystem for better clarity. Also drop finish_tmp_packfile() as a standalone function in favor of embedding it in flush_packfile_transaction() directly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 80 +++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/object-file.c b/object-file.c index 03f9931b832e49..8103a2bf413e56 100644 --- a/object-file.c +++ b/object-file.c @@ -667,7 +667,7 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } -struct bulk_checkin_packfile { +struct transaction_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -682,10 +682,10 @@ struct odb_transaction { struct object_database *odb; struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; + struct transaction_packfile packfile; }; -static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +static void prepare_loose_object_transaction(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -701,7 +701,7 @@ static void prepare_loose_object_bulk_checkin(struct odb_transaction *transactio tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, +static void fsync_loose_object_transaction(struct odb_transaction *transaction, int fd, const char *filename) { /* @@ -722,7 +722,7 @@ static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(struct odb_transaction *transaction) +static void flush_loose_object_transaction(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; @@ -733,7 +733,7 @@ static void flush_batch_fsync(struct odb_transaction *transaction) /* * Issue a full hardware flush against a temporary file to ensure * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout + * fsync_loose_object_transaction has already issued a writeout * request, but it has not flushed any writeback cache in the storage * hardware or any filesystem logs. This fsync call acts as a barrier * to ensure that the data in each new object file is durable before @@ -762,7 +762,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); + fsync_loose_object_transaction(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -940,7 +940,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -1029,7 +1029,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1349,10 +1349,10 @@ static int already_written(struct odb_transaction *transaction, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) +static void prepare_packfile_transaction(struct odb_transaction *transaction, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; @@ -1381,7 +1381,7 @@ static void prepare_to_stream(struct odb_transaction *transaction, * status before calling us just in case we ask it to call us again * with a new pack. */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, +static int stream_blob_to_pack(struct transaction_packfile *state, struct git_hash_ctx *ctx, off_t *already_hashed_to, int fd, size_t size, const char *path, unsigned flags) @@ -1457,28 +1457,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) +static void flush_packfile_transaction(struct odb_transaction *transaction) { - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; + char *idx_tmp_name = NULL; if (!state->f) return; @@ -1503,11 +1488,16 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) repo_get_object_directory(transaction->odb->repo), hash_to_hex_algop(hash, repo->hash_algo)); - finish_tmp_packfile(transaction, &packname, hash); + stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); + for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); clear_exit: + free(idx_tmp_name); free(state->pack_tmp_name); free(state->written); memset(state, 0, sizeof(*state)); @@ -1535,11 +1525,12 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) +static int index_blob_packfile_transaction(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, + size_t size, const char *path, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -1560,14 +1551,14 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -1585,7 +1576,7 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); + flush_packfile_transaction(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) return error("cannot seek back"); } @@ -1632,9 +1623,10 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(the_repository->objects->transaction, - oid, fd, xsize_t(st->st_size), - path, flags); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); end_odb_transaction(transaction); } @@ -1996,8 +1988,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); + flush_loose_object_transaction(transaction); + flush_packfile_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); } From ce1661f9da70ea2ffcb54f7b544410fad26e965d Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:38 -0500 Subject: [PATCH 09/35] odb: add transaction interface Transactions are managed via the {begin,end}_odb_transaction() function in the object-file subsystem and its implementation is specific to the files object source. Introduce odb_transaction_{begin,commit}() in the odb subsystem to provide an eventual object source agnostic means to manage transactions. Update call sites to instead manage transactions through the odb subsystem. Also rename {begin,end}_odb_transaction() functions to object_file_transaction_{begin,commit}() to clarify the object source it supports. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/add.c | 5 +++-- builtin/unpack-objects.c | 4 ++-- builtin/update-index.c | 7 ++++--- cache-tree.c | 4 ++-- object-file.c | 12 +++++++----- object-file.h | 6 +++--- odb.c | 10 ++++++++++ odb.h | 13 +++++++++++++ read-cache.c | 4 ++-- 9 files changed, 46 insertions(+), 19 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 8294366d68a1de..bf312c40be9789 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -15,6 +15,7 @@ #include "pathspec.h" #include "run-command.h" #include "object-file.h" +#include "odb.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" @@ -575,7 +576,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -594,7 +595,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4596fff0dad58e..ef79e43715d362 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -599,12 +599,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(transaction); + odb_transaction_commit(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index ee01c4e423da43..8a5907767bf297 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -18,6 +18,7 @@ #include "cache-tree.h" #include "tree-walk.h" #include "object-file.h" +#include "odb.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" @@ -1122,7 +1123,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1152,7 +1153,7 @@ int cmd_update_index(int argc, * a transaction. */ if (transaction && verbose) { - end_odb_transaction(transaction); + odb_transaction_commit(transaction); transaction = NULL; } @@ -1220,7 +1221,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(transaction); + odb_transaction_commit(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/cache-tree.c b/cache-tree.c index 79ddf6b72780fd..2aba47060e95d4 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -489,10 +489,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 8103a2bf413e56..17a236d2fe121b 100644 --- a/object-file.c +++ b/object-file.c @@ -691,7 +691,7 @@ static void prepare_loose_object_transaction(struct odb_transaction *transaction * We lazily create the temporary object directory * the first time an object might be added, since * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. + * added at the time they call object_file_transaction_begin. */ if (!transaction || transaction->objdir) return; @@ -1622,12 +1622,12 @@ int index_fd(struct index_state *istate, struct object_id *oid, } else { struct odb_transaction *transaction; - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); ret = index_blob_packfile_transaction(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); } close(fd); @@ -1967,8 +1967,10 @@ int read_loose_object(struct repository *repo, return ret; } -struct odb_transaction *begin_odb_transaction(struct object_database *odb) +struct odb_transaction *object_file_transaction_begin(struct odb_source *source) { + struct object_database *odb = source->odb; + if (odb->transaction) return NULL; @@ -1978,7 +1980,7 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void end_odb_transaction(struct odb_transaction *transaction) +void object_file_transaction_commit(struct odb_transaction *transaction) { if (!transaction) return; diff --git a/object-file.h b/object-file.h index 6323d2e63c00ab..3fd48dcafbf1dc 100644 --- a/object-file.h +++ b/object-file.h @@ -222,16 +222,16 @@ struct odb_transaction; /* * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called + * multiple objects. object_file_transaction_commit must be called * to make new objects visible. If a transaction is already * pending, NULL is returned. */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); +struct odb_transaction *object_file_transaction_begin(struct odb_source *source); /* * Tell the object database to make any objects from the * current transaction visible. */ -void end_odb_transaction(struct odb_transaction *transaction); +void object_file_transaction_commit(struct odb_transaction *transaction); #endif /* OBJECT_FILE_H */ diff --git a/odb.c b/odb.c index 2a92a018c42940..af9534bfe1cdf5 100644 --- a/odb.c +++ b/odb.c @@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o) hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + return object_file_transaction_begin(odb->sources); +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + object_file_transaction_commit(transaction); +} diff --git a/odb.h b/odb.h index a89b2143909920..82093753c84ca6 100644 --- a/odb.h +++ b/odb.h @@ -185,6 +185,19 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + /* * Find source by its object directory path. Dies in case the source couldn't * be found. diff --git a/read-cache.c b/read-cache.c index 80591eecedc207..94098a3861403c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3972,9 +3972,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); release_revisions(&rev); return !!data.add_errors; From 2f8fd208c36bf2e88f949d0c4059214dfcb2a717 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Wed, 17 Sep 2025 20:14:26 +0200 Subject: [PATCH 10/35] gpg-interface: refactor 'enum sign_mode' parsing The definition of 'enum sign_mode' as well as its parsing code are in "builtin/fast-export.c". This was fine because `git fast-export` was the only command with '--signed-tags=' or '--signed-commits=' options. In a following commit, we are going to add a similar option to `git fast-import`, which will be simpler, easier and cleaner if we can reuse the 'enum sign_mode' defintion and parsing code. So let's move that definition and parsing code from "builtin/fast-export.c" to "gpg-interface.{c,h}". While at it, let's fix a small indentation issue with the arguments of parse_opt_sign_mode(). Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 19 +++++-------------- gpg-interface.c | 17 +++++++++++++++++ gpg-interface.h | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index c06ee0b213502e..dc2486f9a83a9b 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -37,8 +37,6 @@ static const char *const fast_export_usage[] = { NULL }; -enum sign_mode { SIGN_ABORT, SIGN_VERBATIM, SIGN_STRIP, SIGN_WARN_VERBATIM, SIGN_WARN_STRIP }; - static int progress; static enum sign_mode signed_tag_mode = SIGN_ABORT; static enum sign_mode signed_commit_mode = SIGN_STRIP; @@ -59,23 +57,16 @@ static struct hashmap anonymized_seeds; static struct revision_sources revision_sources; static int parse_opt_sign_mode(const struct option *opt, - const char *arg, int unset) + const char *arg, int unset) { enum sign_mode *val = opt->value; + if (unset) return 0; - else if (!strcmp(arg, "abort")) - *val = SIGN_ABORT; - else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) - *val = SIGN_VERBATIM; - else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn")) - *val = SIGN_WARN_VERBATIM; - else if (!strcmp(arg, "warn-strip")) - *val = SIGN_WARN_STRIP; - else if (!strcmp(arg, "strip")) - *val = SIGN_STRIP; - else + + if (parse_sign_mode(arg, val)) return error("Unknown %s mode: %s", opt->long_name, arg); + return 0; } diff --git a/gpg-interface.c b/gpg-interface.c index 06e7fb50603d22..2f4f0e32cb3b4f 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -1125,3 +1125,20 @@ static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, FREE_AND_NULL(ssh_signing_key_file); return ret; } + +int parse_sign_mode(const char *arg, enum sign_mode *mode) +{ + if (!strcmp(arg, "abort")) + *mode = SIGN_ABORT; + else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) + *mode = SIGN_VERBATIM; + else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn")) + *mode = SIGN_WARN_VERBATIM; + else if (!strcmp(arg, "warn-strip")) + *mode = SIGN_WARN_STRIP; + else if (!strcmp(arg, "strip")) + *mode = SIGN_STRIP; + else + return -1; + return 0; +} diff --git a/gpg-interface.h b/gpg-interface.h index 60ddf8bbfa3833..50487aa1483274 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -104,4 +104,19 @@ int check_signature(struct signature_check *sigc, void print_signature_buffer(const struct signature_check *sigc, unsigned flags); +/* Modes for --signed-tags= and --signed-commits= options. */ +enum sign_mode { + SIGN_ABORT, + SIGN_WARN_VERBATIM, + SIGN_VERBATIM, + SIGN_WARN_STRIP, + SIGN_STRIP, +}; + +/* + * Return 0 if `arg` can be parsed into an `enum sign_mode`. Return -1 + * otherwise. + */ +int parse_sign_mode(const char *arg, enum sign_mode *mode); + #endif From eaaddf57912466414bce5bf81a24d1d69caf2e51 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Wed, 17 Sep 2025 20:14:27 +0200 Subject: [PATCH 11/35] fast-import: add '--signed-commits=' option A '--signed-commits=' option is already available when using `git fast-export` to decide what should be done at export time about commit signatures. At import time though, there is no option, or other way, in `git fast-import` to decide about commit signatures. To remediate that, let's add a '--signed-commits=' option to `git fast-import` too. For now the supported s are the same as those supported by `git fast-export`. The code responsible for consuming a signature is refactored into the import_one_signature() and discard_one_signature() functions, which makes it easier to follow the logic and add new modes in the future. In the 'strip' and 'warn-strip' modes, we deliberately use discard_one_signature() to discard the signature without parsing it. This ensures that even malformed signatures, which would cause the parser to fail, can be successfully stripped from a commit. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 5 ++ builtin/fast-import.c | 63 ++++++++++++++--- t/meson.build | 1 + t/t9305-fast-import-signatures.sh | 106 +++++++++++++++++++++++++++++ 4 files changed, 165 insertions(+), 10 deletions(-) create mode 100755 t/t9305-fast-import-signatures.sh diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 3144ffcdb689d5..90f242d0583d10 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -66,6 +66,11 @@ OPTIONS remote-helpers that use the `import` capability, as they are already trusted to run their own code. +--signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort):: + Specify how to handle signed commits. Behaves in the same way + as the same option in linkgit:git-fast-export[1], except that + default is 'verbatim' (instead of 'abort'). + Options for Frontends ~~~~~~~~~~~~~~~~~~~~~ diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2c35f9345d02d7..2010e78475b32e 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -188,6 +188,8 @@ static int global_argc; static const char **global_argv; static const char *global_prefix; +static enum sign_mode signed_commit_mode = SIGN_VERBATIM; + /* Memory pools */ static struct mem_pool fi_mem_pool = { .block_alloc = 2*1024*1024 - sizeof(struct mp_block), @@ -2752,6 +2754,15 @@ static void parse_one_signature(struct signature_data *sig, const char *v) parse_data(&sig->data, 0, NULL); } +static void discard_one_signature(void) +{ + struct strbuf data = STRBUF_INIT; + + read_next_command(); + parse_data(&data, 0, NULL); + strbuf_release(&data); +} + static void add_gpgsig_to_commit(struct strbuf *commit_data, const char *header, struct signature_data *sig) @@ -2785,6 +2796,22 @@ static void store_signature(struct signature_data *stored_sig, } } +static void import_one_signature(struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + const char *v) +{ + struct signature_data sig = { NULL, NULL, STRBUF_INIT }; + + parse_one_signature(&sig, v); + + if (!strcmp(sig.hash_algo, "sha1")) + store_signature(sig_sha1, &sig, "SHA-1"); + else if (!strcmp(sig.hash_algo, "sha256")) + store_signature(sig_sha256, &sig, "SHA-256"); + else + die(_("parse_one_signature() returned unknown hash algo")); +} + static void parse_new_commit(const char *arg) { static struct strbuf msg = STRBUF_INIT; @@ -2817,19 +2844,32 @@ static void parse_new_commit(const char *arg) if (!committer) die("Expected committer but didn't get one"); - /* Process signatures (up to 2: one "sha1" and one "sha256") */ while (skip_prefix(command_buf.buf, "gpgsig ", &v)) { - struct signature_data sig = { NULL, NULL, STRBUF_INIT }; - - parse_one_signature(&sig, v); + switch (signed_commit_mode) { + + /* First, modes that don't need the signature to be parsed */ + case SIGN_ABORT: + die("encountered signed commit; use " + "--signed-commits= to handle it"); + case SIGN_WARN_STRIP: + warning(_("stripping a commit signature")); + /* fallthru */ + case SIGN_STRIP: + discard_one_signature(); + break; - if (!strcmp(sig.hash_algo, "sha1")) - store_signature(&sig_sha1, &sig, "SHA-1"); - else if (!strcmp(sig.hash_algo, "sha256")) - store_signature(&sig_sha256, &sig, "SHA-256"); - else - BUG("parse_one_signature() returned unknown hash algo"); + /* Second, modes that parse the signature */ + case SIGN_WARN_VERBATIM: + warning(_("importing a commit signature verbatim")); + /* fallthru */ + case SIGN_VERBATIM: + import_one_signature(&sig_sha1, &sig_sha256, v); + break; + /* Third, BUG */ + default: + BUG("invalid signed_commit_mode value %d", signed_commit_mode); + } read_next_command(); } @@ -3501,6 +3541,9 @@ static int parse_one_option(const char *option) option_active_branches(option); } else if (skip_prefix(option, "export-pack-edges=", &option)) { option_export_pack_edges(option); + } else if (skip_prefix(option, "signed-commits=", &option)) { + if (parse_sign_mode(option, &signed_commit_mode)) + usagef(_("unknown --signed-commits mode '%s'"), option); } else if (!strcmp(option, "quiet")) { show_stats = 0; quiet = 1; diff --git a/t/meson.build b/t/meson.build index 82af229be3efbe..08ad6938e2e02f 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1032,6 +1032,7 @@ integration_tests = [ 't9302-fast-import-unpack-limit.sh', 't9303-fast-import-compression.sh', 't9304-fast-import-marks.sh', + 't9305-fast-import-signatures.sh', 't9350-fast-export.sh', 't9351-fast-export-anonymize.sh', 't9400-git-cvsserver-server.sh', diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh new file mode 100755 index 00000000000000..c2b427165862d3 --- /dev/null +++ b/t/t9305-fast-import-signatures.sh @@ -0,0 +1,106 @@ +#!/bin/sh + +test_description='git fast-import --signed-commits=' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-gpg.sh" + +test_expect_success 'set up unsigned initial commit and import repo' ' + test_commit first && + git init new +' + +test_expect_success GPG 'set up OpenPGP signed commit' ' + git checkout -b openpgp-signing main && + echo "Content for OpenPGP signing." >file-sign && + git add file-sign && + git commit -S -m "OpenPGP signed commit" && + OPENPGP_SIGNING=$(git rev-parse --verify openpgp-signing) +' + +test_expect_success GPG 'import OpenPGP signature with --signed-commits=verbatim' ' + git fast-export --signed-commits=verbatim openpgp-signing >output && + git -C new fast-import --quiet --signed-commits=verbatim log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING = $IMPORTED && + test_must_be_empty log +' + +test_expect_success GPGSM 'set up X.509 signed commit' ' + git checkout -b x509-signing main && + test_config gpg.format x509 && + test_config user.signingkey $GIT_COMMITTER_EMAIL && + echo "Content for X.509 signing." >file-sign && + git add file-sign && + git commit -S -m "X.509 signed commit" && + X509_SIGNING=$(git rev-parse HEAD) +' + +test_expect_success GPGSM 'import X.509 signature fails with --signed-commits=abort' ' + git fast-export --signed-commits=verbatim x509-signing >output && + test_must_fail git -C new fast-import --quiet --signed-commits=abort log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/x509-signing) && + test $X509_SIGNING = $IMPORTED && + test_grep "importing a commit signature" log +' + +test_expect_success GPGSSH 'set up SSH signed commit' ' + git checkout -b ssh-signing main && + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + echo "Content for SSH signing." >file-sign && + git add file-sign && + git commit -S -m "SSH signed commit" && + SSH_SIGNING=$(git rev-parse HEAD) +' + +test_expect_success GPGSSH 'strip SSH signature with --signed-commits=strip' ' + git fast-export --signed-commits=verbatim ssh-signing >output && + git -C new fast-import --quiet --signed-commits=strip log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/ssh-signing) && + test $SSH_SIGNING != $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep ! -E "^gpgsig" actual && + test_must_be_empty log +' + +test_expect_success GPG 'setup a commit with dual OpenPGP signatures on its SHA-1 and SHA-256 formats' ' + # Create a signed SHA-256 commit + git init --object-format=sha256 explicit-sha256 && + git -C explicit-sha256 config extensions.compatObjectFormat sha1 && + git -C explicit-sha256 checkout -b dual-signed && + test_commit -C explicit-sha256 A && + echo B >explicit-sha256/B && + git -C explicit-sha256 add B && + test_tick && + git -C explicit-sha256 commit -S -m "signed" B && + SHA256_B=$(git -C explicit-sha256 rev-parse dual-signed) && + + # Create the corresponding SHA-1 commit + SHA1_B=$(git -C explicit-sha256 rev-parse --output-object-format=sha1 dual-signed) && + + # Check that the resulting SHA-1 commit has both signatures + git -C explicit-sha256 cat-file -p $SHA1_B >out && + test_grep -E "^gpgsig " out && + test_grep -E "^gpgsig-sha256 " out +' + +test_expect_success GPG 'strip both OpenPGP signatures with --signed-commits=warn-strip' ' + git -C explicit-sha256 fast-export --signed-commits=verbatim dual-signed >output && + test_grep -E "^gpgsig sha1 openpgp" output && + test_grep -E "^gpgsig sha256 openpgp" output && + git -C new fast-import --quiet --signed-commits=warn-strip log 2>&1 && + git -C new cat-file commit refs/heads/dual-signed >actual && + test_grep ! -E "^gpgsig " actual && + test_grep ! -E "^gpgsig-sha256 " actual && + test_grep "stripping a commit signature" log >out && + test_line_count = 2 out +' + +test_done From 29fe658ffbd40e6f0343728a978c215fc1e1d11a Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:11 +0200 Subject: [PATCH 12/35] =?UTF-8?q?Makefile:=20don=E2=80=99t=20add=20whatcha?= =?UTF-8?q?nged=20after=20it=20has=20been=20removed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 07572f220a8 (whatchanged: remove when built with WITH_BREAKING_CHANGES, 2025-05-12) set up the removal of git-whatchanged(1) when `WITH_BREAKING_CHANGES` is active. Part of that work was removing it from `commands` in `git.c`. But the Makefile still lists it as a builtin. That leaves it in the limbo of being linked but not being callable; you get the generic error about not being able to call it as a *builtin*: $ git whatchanged fatal: cannot handle whatchanged as a builtin instead of the expected: $ git whatchanged git: 'whatchanged' is not a git command. See 'git --help'. Based-on-patch-by: Jeff King Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index e11340c1ae77ba..9311da5bb1547a 100644 --- a/Makefile +++ b/Makefile @@ -883,7 +883,9 @@ BUILT_INS += git-stage$X BUILT_INS += git-status$X BUILT_INS += git-switch$X BUILT_INS += git-version$X +ifndef WITH_BREAKING_CHANGES BUILT_INS += git-whatchanged$X +endif # what 'all' will build but not install in gitexecdir OTHER_PROGRAMS += git$X From 5f31632ed7d8c2928b5cdd7a1358367415d24535 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:12 +0200 Subject: [PATCH 13/35] git: add `deprecated` category to --list-cmds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 145 builtin commands (according to `git --list-cmds=builtins`), users are probably not keeping on top of which ones (if any) are deprecated. Let’s expand the experimental `--list-cmds`[1] to allow users and programs to query for this information. We will also use this in an upcoming commit to implement `is_deprecated_command`. [1]: Using something which is experimental to query for deprecations is perhaps not the most ideal approach, but it is simple to implement and better than having to scan the documentation Acked-by: Patrick Steinhardt Helped-by: Patrick Steinhardt Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git.adoc | 3 ++- git.c | 26 ++++++++++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Documentation/git.adoc b/Documentation/git.adoc index 743b7b00e4d751..a2f0838b168c24 100644 --- a/Documentation/git.adoc +++ b/Documentation/git.adoc @@ -219,7 +219,8 @@ If you just want to run git as if it was started in `` then use List commands by group. This is an internal/experimental option and may change or be removed in the future. Supported groups are: builtins, parseopt (builtin commands that use - parse-options), main (all commands in libexec directory), + parse-options), deprecated (deprecated builtins), + main (all commands in libexec directory), others (all other commands in `$PATH` that have git- prefix), list- (see categories in command-list.txt), nohelpers (exclude helper commands), alias and config diff --git a/git.c b/git.c index 83eac0aeab75d6..511efdf20569b0 100644 --- a/git.c +++ b/git.c @@ -28,6 +28,7 @@ #define NEED_WORK_TREE (1<<3) #define DELAY_PAGER_CONFIG (1<<4) #define NO_PARSEOPT (1<<5) /* parse-options is not used */ +#define DEPRECATED (1<<6) struct cmd_struct { const char *cmd; @@ -51,7 +52,9 @@ const char git_more_info_string[] = static int use_pager = -1; -static void list_builtins(struct string_list *list, unsigned int exclude_option); +static void list_builtins(struct string_list *list, + unsigned int include_option, + unsigned int exclude_option); static void exclude_helpers_from_list(struct string_list *list) { @@ -88,7 +91,7 @@ static int list_cmds(const char *spec) int len = sep - spec; if (match_token(spec, len, "builtins")) - list_builtins(&list, 0); + list_builtins(&list, 0, 0); else if (match_token(spec, len, "main")) list_all_main_cmds(&list); else if (match_token(spec, len, "others")) @@ -99,6 +102,8 @@ static int list_cmds(const char *spec) list_aliases(&list); else if (match_token(spec, len, "config")) list_cmds_by_config(&list); + else if (match_token(spec, len, "deprecated")) + list_builtins(&list, DEPRECATED, 0); else if (len > 5 && !strncmp(spec, "list-", 5)) { struct strbuf sb = STRBUF_INIT; @@ -322,7 +327,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) if (!strcmp(cmd, "parseopt")) { struct string_list list = STRING_LIST_INIT_DUP; - list_builtins(&list, NO_PARSEOPT); + list_builtins(&list, 0, NO_PARSEOPT); for (size_t i = 0; i < list.nr; i++) printf("%s ", list.items[i].string); string_list_clear(&list, 0); @@ -590,7 +595,7 @@ static struct cmd_struct commands[] = { { "notes", cmd_notes, RUN_SETUP }, { "pack-objects", cmd_pack_objects, RUN_SETUP }, #ifndef WITH_BREAKING_CHANGES - { "pack-redundant", cmd_pack_redundant, RUN_SETUP | NO_PARSEOPT }, + { "pack-redundant", cmd_pack_redundant, RUN_SETUP | NO_PARSEOPT | DEPRECATED }, #endif { "pack-refs", cmd_pack_refs, RUN_SETUP }, { "patch-id", cmd_patch_id, RUN_SETUP_GENTLY | NO_PARSEOPT }, @@ -647,7 +652,7 @@ static struct cmd_struct commands[] = { { "verify-tag", cmd_verify_tag, RUN_SETUP }, { "version", cmd_version }, #ifndef WITH_BREAKING_CHANGES - { "whatchanged", cmd_whatchanged, RUN_SETUP }, + { "whatchanged", cmd_whatchanged, RUN_SETUP | DEPRECATED }, #endif { "worktree", cmd_worktree, RUN_SETUP }, { "write-tree", cmd_write_tree, RUN_SETUP }, @@ -668,11 +673,16 @@ int is_builtin(const char *s) return !!get_builtin(s); } -static void list_builtins(struct string_list *out, unsigned int exclude_option) +static void list_builtins(struct string_list *out, + unsigned int include_option, + unsigned int exclude_option) { + if (include_option && exclude_option) + BUG("'include_option' and 'exclude_option' are mutually exclusive"); for (size_t i = 0; i < ARRAY_SIZE(commands); i++) { - if (exclude_option && - (commands[i].option & exclude_option)) + if (include_option && !(commands[i].option & include_option)) + continue; + if (exclude_option && (commands[i].option & exclude_option)) continue; string_list_append(out, commands[i].cmd); } From b4f9282d8db88619b2becac7f4ee2cad75a72ff9 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:13 +0200 Subject: [PATCH 14/35] git: move seen-alias bookkeeping into handle_alias(...) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are about to complicate the command handling by allowing *deprecated* builtins to be shadowed by aliases. We need to organize the code in order to facilitate that.[1] The code in the `while(1)` speculatively adds commands to the list before finding out if it’s an alias. Let’s instead move it inside `handle_alias(...)`—where it conceptually belongs anyway—and in turn only run this logic when we have found an alias.[2] [1]: We will do that with an additional call to `handle_alias(1)` inside the loop. *Not* moving this code leaves a blind spot; we will miss alias looping crafted via deprecated builtin names [2]: Also rename the list to a more descriptive name Based-on-patch-by: Jeff King Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- git.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/git.c b/git.c index 511efdf20569b0..ef1e7b205aa8e8 100644 --- a/git.c +++ b/git.c @@ -365,7 +365,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) return (*argv) - orig_argv; } -static int handle_alias(struct strvec *args) +static int handle_alias(struct strvec *args, struct string_list *expanded_aliases) { int envchanged = 0, ret = 0, saved_errno = errno; int count, option_count; @@ -376,6 +376,8 @@ static int handle_alias(struct strvec *args) alias_command = args->v[0]; alias_string = alias_lookup(alias_command); if (alias_string) { + struct string_list_item *seen; + if (args->nr == 2 && !strcmp(args->v[1], "-h")) fprintf_ln(stderr, _("'%s' is aliased to '%s'"), alias_command, alias_string); @@ -423,6 +425,25 @@ static int handle_alias(struct strvec *args) if (!strcmp(alias_command, new_argv[0])) die(_("recursive alias: %s"), alias_command); + string_list_append(expanded_aliases, alias_command); + seen = unsorted_string_list_lookup(expanded_aliases, + new_argv[0]); + + if (seen) { + struct strbuf sb = STRBUF_INIT; + for (size_t i = 0; i < expanded_aliases->nr; i++) { + struct string_list_item *item = &expanded_aliases->items[i]; + + strbuf_addf(&sb, "\n %s", item->string); + if (item == seen) + strbuf_addstr(&sb, " <=="); + else if (i == expanded_aliases->nr - 1) + strbuf_addstr(&sb, " ==>"); + } + die(_("alias loop detected: expansion of '%s' does" + " not terminate:%s"), expanded_aliases->items[0].string, sb.buf); + } + trace_argv_printf(new_argv, "trace: alias expansion: %s =>", alias_command); @@ -806,8 +827,7 @@ static void execv_dashed_external(const char **argv) static int run_argv(struct strvec *args) { int done_alias = 0; - struct string_list cmd_list = STRING_LIST_INIT_DUP; - struct string_list_item *seen; + struct string_list expanded_aliases = STRING_LIST_INIT_DUP; while (1) { /* @@ -859,35 +879,17 @@ static int run_argv(struct strvec *args) /* .. then try the external ones */ execv_dashed_external(args->v); - seen = unsorted_string_list_lookup(&cmd_list, args->v[0]); - if (seen) { - struct strbuf sb = STRBUF_INIT; - for (size_t i = 0; i < cmd_list.nr; i++) { - struct string_list_item *item = &cmd_list.items[i]; - - strbuf_addf(&sb, "\n %s", item->string); - if (item == seen) - strbuf_addstr(&sb, " <=="); - else if (i == cmd_list.nr - 1) - strbuf_addstr(&sb, " ==>"); - } - die(_("alias loop detected: expansion of '%s' does" - " not terminate:%s"), cmd_list.items[0].string, sb.buf); - } - - string_list_append(&cmd_list, args->v[0]); - /* * It could be an alias -- this works around the insanity * of overriding "git log" with "git show" by having * alias.log = show */ - if (!handle_alias(args)) + if (!handle_alias(args, &expanded_aliases)) break; done_alias = 1; } - string_list_clear(&cmd_list, 0); + string_list_clear(&expanded_aliases, 0); return done_alias; } From bf68b116997a0471dfccf7dcced00eb7d8b66982 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:14 +0200 Subject: [PATCH 15/35] git: allow alias-shadowing deprecated builtins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-whatchanged(1) is deprecated and you need to pass `--i-still-use-this` in order to force it to work as before. There are two affected users, or usages: 1. people who use the command in scripts; and 2. people who are used to using it interactively. For (1) the replacement is straightforward.[1] But people in (2) might like the name or be really used to typing it.[3] An obvious first thought is to suggest aliasing `whatchanged` to the git-log(1) equivalent.[1] But this doesn’t work and is awkward since you cannot shadow builtins via aliases. Now you are left in an uncomfortable limbo; your alias won’t work until the command is removed for good. Let’s lift this limitation by allowing *deprecated* builtins to be shadowed by aliases. The only observed demand for aliasing has been for git-whatchanged(1), not for git-pack-redundant(1). But let’s be consistent and treat all deprecated commands the same. [1]: git log --raw --no-merges With a minor caveat: you get different outputs if you happen to have empty commits (no changes)[2] [2]: https://lore.kernel.org/git/20250825085428.GA367101@coredump.intra.peff.net/ [3]: https://lore.kernel.org/git/BL3P221MB0449288C8B0FA448A227FD48833AA@BL3P221MB0449.NAMP221.PROD.OUTLOOK.COM/ Based-on-patch-by: Jeff King Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/config/alias.adoc | 3 ++- git.c | 17 ++++++++++++++ t/t0014-alias.sh | 40 +++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/Documentation/config/alias.adoc b/Documentation/config/alias.adoc index 2c5db0ad8428e8..3c8fab3a95c599 100644 --- a/Documentation/config/alias.adoc +++ b/Documentation/config/alias.adoc @@ -3,7 +3,8 @@ alias.*:: after defining `alias.last = cat-file commit HEAD`, the invocation `git last` is equivalent to `git cat-file commit HEAD`. To avoid confusion and troubles with script usage, aliases that - hide existing Git commands are ignored. Arguments are split by + hide existing Git commands are ignored except for deprecated + commands. Arguments are split by spaces, the usual shell quoting and escaping are supported. A quote pair or a backslash can be used to quote them. + diff --git a/git.c b/git.c index ef1e7b205aa8e8..8c85da84c30534 100644 --- a/git.c +++ b/git.c @@ -824,12 +824,29 @@ static void execv_dashed_external(const char **argv) exit(128); } +static int is_deprecated_command(const char *cmd) +{ + struct cmd_struct *builtin = get_builtin(cmd); + return builtin && (builtin->option & DEPRECATED); +} + static int run_argv(struct strvec *args) { int done_alias = 0; struct string_list expanded_aliases = STRING_LIST_INIT_DUP; while (1) { + /* + * Allow deprecated commands to be overridden by aliases. This + * creates a seamless path forward for people who want to keep + * using the name after it is gone, but want to skip the + * deprecation complaint in the meantime. + */ + if (is_deprecated_command(args->v[0]) && + handle_alias(args, &expanded_aliases)) { + done_alias = 1; + continue; + } /* * If we tried alias and futzed with our environment, * it no longer is safe to invoke builtins directly in diff --git a/t/t0014-alias.sh b/t/t0014-alias.sh index 854d59ec58c25a..1b196ed9d6d1c8 100755 --- a/t/t0014-alias.sh +++ b/t/t0014-alias.sh @@ -27,6 +27,20 @@ test_expect_success 'looping aliases - internal execution' ' test_grep "^fatal: alias loop detected: expansion of" output ' +test_expect_success 'looping aliases - deprecated builtins' ' + test_config alias.whatchanged pack-redundant && + test_config alias.pack-redundant whatchanged && + cat >expect <<-EOF && + ${SQ}whatchanged${SQ} is aliased to ${SQ}pack-redundant${SQ} + ${SQ}pack-redundant${SQ} is aliased to ${SQ}whatchanged${SQ} + fatal: alias loop detected: expansion of ${SQ}whatchanged${SQ} does not terminate: + whatchanged <== + pack-redundant ==> + EOF + test_must_fail git whatchanged -h 2>actual && + test_cmp expect actual +' + # This test is disabled until external loops are fixed, because would block # the test suite for a full minute. # @@ -55,4 +69,30 @@ test_expect_success 'tracing a shell alias with arguments shows trace of prepare test_cmp expect actual ' +can_alias_deprecated_builtin () { + cmd="$1" && + # some git(1) commands will fail for `-h` (the case for + # git-status as of 2025-09-07) + test_might_fail git status -h >expect && + test_file_not_empty expect && + test_might_fail git -c alias."$cmd"=status "$cmd" -h >actual && + test_cmp expect actual +} + +test_expect_success 'can alias-shadow deprecated builtins' ' + for cmd in $(git --list-cmds=deprecated) + do + can_alias_deprecated_builtin "$cmd" || return 1 + done +' + +test_expect_success 'can alias-shadow via two deprecated builtins' ' + # some git(1) commands will fail... (see above) + test_might_fail git status -h >expect && + test_file_not_empty expect && + test_might_fail git -c alias.whatchanged=pack-redundant \ + -c alias.pack-redundant=status whatchanged -h >actual && + test_cmp expect actual +' + test_done From 65d33db48e5a2c6dbf3f33d6eaa987f55dabe26a Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:15 +0200 Subject: [PATCH 16/35] t0014: test shadowing of aliases for a sample of builtins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit added tests for shadowing deprecated builtins. Let’s make the test suite more complete by exercising a sample of the builtins and in turn test the documentation for git-config(1): To avoid confusion and troubles with script usage, aliases that hide existing Git commands are ignored except for deprecated commands. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- t/t0014-alias.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/t/t0014-alias.sh b/t/t0014-alias.sh index 1b196ed9d6d1c8..07a53e7366ef4b 100755 --- a/t/t0014-alias.sh +++ b/t/t0014-alias.sh @@ -95,4 +95,21 @@ test_expect_success 'can alias-shadow via two deprecated builtins' ' test_cmp expect actual ' +cannot_alias_regular_builtin () { + cmd="$1" && + # some git(1) commands will fail... (see above) + test_might_fail git "$cmd" -h >expect && + test_file_not_empty expect && + test_might_fail git -c alias."$cmd"=status "$cmd" -h >actual && + test_cmp expect actual +} + +test_expect_success 'cannot alias-shadow a sample of regular builtins' ' + for cmd in grep check-ref-format interpret-trailers \ + checkout-index fast-import diagnose rev-list prune + do + cannot_alias_regular_builtin "$cmd" || return 1 + done +' + test_done From 098230f725e66fe9e346f4db995116d0aef4e0cf Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:16 +0200 Subject: [PATCH 17/35] you-still-use-that??: help the user help themselves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give the user a list of suggestions for what to do when they run a deprecated command. The first order of action will be to check the breaking changes document;[1] this short error message says nothing about why this command is deprecated, and in any case going into any kind of detail might overwhelm the user. Then they can find out if this has been discussed on the mailing list. Then users who e.g. are using git-whatchanged(1) can learn that this is arguably a plug-in replacement: git log --raw --no-merges Finally they are invited to send an email to the mailing list. Also drop the “please add” part in favor of just using the “refusing” die-message; these two would have been right after each other in this new version. Also drop “Thanks” since it now would require a new paragraph. [1]: www.git-scm.com has a disclaimer for these internal documents that says that “This information is specific to the Git project”. That’s misleading in this particular case. But users are unlikely to get discouraged from reading about why they (or their programs) cannot run a command any more; it clearly concerns them. Helped-by: Eric Sunshine Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- usage.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/usage.c b/usage.c index 81913236a4a2ab..35dc57eb07ef8e 100644 --- a/usage.c +++ b/usage.c @@ -7,6 +7,7 @@ #include "git-compat-util.h" #include "gettext.h" #include "trace2.h" +#include "strbuf.h" static void vfreportf(FILE *f, const char *prefix, const char *err, va_list params) { @@ -377,12 +378,24 @@ void bug_fl(const char *file, int line, const char *fmt, ...) NORETURN void you_still_use_that(const char *command_name) { + struct strbuf percent_encoded = STRBUF_INIT; + strbuf_add_percentencode(&percent_encoded, + command_name, + STRBUF_ENCODE_SLASH); + fprintf(stderr, _("'%s' is nominated for removal.\n" - "If you still use this command, please add an extra\n" - "option, '--i-still-use-this', on the command line\n" - "and let us know you still use it by sending an e-mail\n" - "to . Thanks.\n"), - command_name); + "If you still use this command, here's what you can do:\n" + "\n" + "- read https://git-scm.com/docs/BreakingChanges.html\n" + "- check if anyone has discussed this on the mailing\n" + " list and if they came up with something that can\n" + " help you: https://lore.kernel.org/git/?q=%s\n" + "- send an email to to let us\n" + " know that you still use this command and were unable\n" + " to determine a suitable replacement\n" + "\n"), + command_name, percent_encoded.buf); + strbuf_release(&percent_encoded); die(_("refusing to run without --i-still-use-this")); } From 5a31252702da61ddabc68f3f8ac7a2cffc19a542 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:17 +0200 Subject: [PATCH 18/35] whatchanged: hint about git-log(1) and aliasing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There have been quite a few `--i-still-use-this` user reports since Git 2.51.0 was released.[1][2] And it doesn’t seem like they are reading the man page about the git-log(1) equivalent. Tell them what options to plug into git-log(1), either as a replacement command or as an alias.[3] That template produces almost the same output[4] and is arguably a plug-in replacement. Concretely, add an optional `hint` argument so that we can use it right after the initial error line. Also mention the same concrete options in the documentation while we’re at it. [1]: E.g., • https://lore.kernel.org/git/e1a69dea-bcb6-45fc-83d3-9e50d32c410b@5y5.one/ • https://lore.kernel.org/git/1011073f-9930-4360-a42f-71eb7421fe3f@chrispalmer.uk/#t • https://lore.kernel.org/git/9fcbfcc4-79f9-421f-b9a4-dc455f7db485@acm.org/#t • https://lore.kernel.org/git/83241BDE-1E0D-489A-9181-C608E9FCC17B@gmail.com/ [2]: The error message on 2.51.0 does tell them to report it, unconditionally [3]: We allow aliasing deprecated builtins now for people who are very used to the command name or just like it a lot [4]: You only get different outputs if you happen to have empty commits (no changes)[4] [5]: https://lore.kernel.org/git/20250825085428.GA367101@coredump.intra.peff.net/ Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-whatchanged.adoc | 6 +++++- builtin/log.c | 8 +++++++- builtin/pack-redundant.c | 2 +- git-compat-util.h | 2 +- usage.c | 14 ++++++++++---- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Documentation/git-whatchanged.adoc b/Documentation/git-whatchanged.adoc index d21484026fe805..e71d2aa2d27ea6 100644 --- a/Documentation/git-whatchanged.adoc +++ b/Documentation/git-whatchanged.adoc @@ -24,7 +24,11 @@ Shows commit logs and diff output each commit introduces. New users are encouraged to use linkgit:git-log[1] instead. The `whatchanged` command is essentially the same as linkgit:git-log[1] -but defaults to showing the raw format diff output and skipping merges. +but defaults to showing the raw format diff output and skipping merges: + +---- +git log --raw --no-merges +---- The command is primarily kept for historical reasons; fingers of many people who learned Git long before `git log` was invented by diff --git a/builtin/log.c b/builtin/log.c index c2f8bbf86301a9..1d1e6e9130a7ed 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -543,7 +543,13 @@ int cmd_whatchanged(int argc, cmd_log_init(argc, argv, prefix, &rev, &opt, &cfg); if (!cfg.i_still_use_this) - you_still_use_that("git whatchanged"); + you_still_use_that("git whatchanged", + _("\n" + "hint: You can replace 'git whatchanged ' with:\n" + "hint:\tgit log --raw --no-merges\n" + "hint: Or make an alias:\n" + "hint:\tgit config set --global alias.whatchanged 'log --raw --no-merges'\n" + "\n")); if (!rev.diffopt.output_format) rev.diffopt.output_format = DIFF_FORMAT_RAW; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index fe81c293e3af6f..5d5ae4afa28f50 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -626,7 +626,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s } if (!i_still_use_this) - you_still_use_that("git pack-redundant"); + you_still_use_that("git pack-redundant", NULL); if (load_all_packs) load_all(); diff --git a/git-compat-util.h b/git-compat-util.h index 9408f463e31b51..398e0fac4fab60 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -460,7 +460,7 @@ void warning_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); void show_usage_if_asked(int ac, const char **av, const char *err); -NORETURN void you_still_use_that(const char *command_name); +NORETURN void you_still_use_that(const char *command_name, const char *hint); #ifndef NO_OPENSSL #ifdef APPLE_COMMON_CRYPTO diff --git a/usage.c b/usage.c index 35dc57eb07ef8e..7545a616453449 100644 --- a/usage.c +++ b/usage.c @@ -376,7 +376,8 @@ void bug_fl(const char *file, int line, const char *fmt, ...) va_end(ap); } -NORETURN void you_still_use_that(const char *command_name) + +NORETURN void you_still_use_that(const char *command_name, const char *hint) { struct strbuf percent_encoded = STRBUF_INIT; strbuf_add_percentencode(&percent_encoded, @@ -384,8 +385,13 @@ NORETURN void you_still_use_that(const char *command_name) STRBUF_ENCODE_SLASH); fprintf(stderr, - _("'%s' is nominated for removal.\n" - "If you still use this command, here's what you can do:\n" + _("'%s' is nominated for removal.\n"), command_name); + + if (hint) + fputs(hint, stderr); + + fprintf(stderr, + _("If you still use this command, here's what you can do:\n" "\n" "- read https://git-scm.com/docs/BreakingChanges.html\n" "- check if anyone has discussed this on the mailing\n" @@ -395,7 +401,7 @@ NORETURN void you_still_use_that(const char *command_name) " know that you still use this command and were unable\n" " to determine a suitable replacement\n" "\n"), - command_name, percent_encoded.buf); + percent_encoded.buf); strbuf_release(&percent_encoded); die(_("refusing to run without --i-still-use-this")); } From a9235f6fa7bcdff08238c33ce5f87135119ab03b Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:18 +0200 Subject: [PATCH 19/35] whatchanged: remove not-even-shorter clause MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The closest equivalent is `git log --raw --no-merges`. Also change to “defaults” (implicit plural). Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-whatchanged.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-whatchanged.adoc b/Documentation/git-whatchanged.adoc index e71d2aa2d27ea6..436e219b7d0692 100644 --- a/Documentation/git-whatchanged.adoc +++ b/Documentation/git-whatchanged.adoc @@ -15,7 +15,7 @@ WARNING ------- `git whatchanged` has been deprecated and is scheduled for removal in a future version of Git, as it is merely `git log` with different -default; `whatchanged` is not even shorter to type than `log --raw`. +defaults. DESCRIPTION ----------- From 54a60e5b38578dea9303c282589b3dac8a83ff75 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Wed, 17 Sep 2025 22:24:19 +0200 Subject: [PATCH 20/35] BreakingChanges: remove claim about whatchanged reports This was written in e836757e14b (whatschanged: list it in BreakingChanges document, 2025-05-12) which was on the same topic that added the `--i-still-use-this` requirement.[1] Maybe it was a work-in-progress comment/status. [1]: jc/you-still-use-whatchanged Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/BreakingChanges.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/BreakingChanges.adoc b/Documentation/BreakingChanges.adoc index f8d2eba061c82a..c4985163c3cd5d 100644 --- a/Documentation/BreakingChanges.adoc +++ b/Documentation/BreakingChanges.adoc @@ -235,7 +235,7 @@ These features will be removed. equivalent `git log --raw`. We have nominated the command for removal, have changed the command to refuse to work unless the `--i-still-use-this` option is given, and asked the users to report - when they do so. So far there hasn't been a single complaint. + when they do so. + The command will be removed. From 8dfe077fb68eb952464ce59deaa4dfdd52891457 Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:39 +0530 Subject: [PATCH 21/35] refs: add a generic 'optimize' API The existing `pack-refs` API is conceptually tied to the 'files' backend, but its behavior is generic (e.g., it triggers compaction for reftable). This naming is confusing. Introduce a new generic refs_optimize() API that dispatches to a backend-specific implementation via a new 'optimize' vtable method. This lays the architectural groundwork for different reference backends (like 'files' and 'reftable') to provide their own storage optimization logic, which will be called from a single, generic entry point. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- refs.c | 5 +++++ refs.h | 6 ++++++ refs/refs-internal.h | 3 +++ 3 files changed, 14 insertions(+) diff --git a/refs.c b/refs.c index 4ff55cf24f68ee..191b95b4a30131 100644 --- a/refs.c +++ b/refs.c @@ -2282,6 +2282,11 @@ int refs_pack_refs(struct ref_store *refs, struct pack_refs_opts *opts) return refs->be->pack_refs(refs, opts); } +int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) +{ + return refs->be->optimize(refs, opts); +} + int peel_iterated_oid(struct repository *r, const struct object_id *base, struct object_id *peeled) { if (current_ref_iter && diff --git a/refs.h b/refs.h index f29e486e332f6d..d28c4ef0afd080 100644 --- a/refs.h +++ b/refs.h @@ -480,6 +480,12 @@ struct pack_refs_opts { */ int refs_pack_refs(struct ref_store *refs, struct pack_refs_opts *opts); +/* + * Optimize the ref store. The exact behavior is up to the backend. + * For the files backend, this is equivalent to packing refs. + */ +int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts); + /* * Setup reflog before using. Fill in err and return -1 on failure. */ diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 54c2079c1264e8..4ef3bd75c6ae55 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -447,6 +447,8 @@ typedef int ref_transaction_commit_fn(struct ref_store *refs, typedef int pack_refs_fn(struct ref_store *ref_store, struct pack_refs_opts *opts); +typedef int optimize_fn(struct ref_store *ref_store, + struct pack_refs_opts *opts); typedef int rename_ref_fn(struct ref_store *ref_store, const char *oldref, const char *newref, const char *logmsg); @@ -572,6 +574,7 @@ struct ref_storage_be { ref_transaction_abort_fn *transaction_abort; pack_refs_fn *pack_refs; + optimize_fn *optimize; rename_ref_fn *rename_ref; copy_ref_fn *copy_ref; From 1fd6067181703e9e65f602e6da27b9b1d8b783a2 Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:40 +0530 Subject: [PATCH 22/35] files-backend: implement 'optimize' action With the generic `refs_optimize()` API now in place, provide the first implementation for the 'files' reference backend. This makes the new API functional for existing repositories and serves as the foundation for migrating user-facing commands to the new architecture. The implementation simply calls the existing `files_pack_refs()` function, as 'packing' is the method used to optimize the files-based reference store. Wire up the new `files_optimize()` function to the `optimize` slot in the files backend's virtual table. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- refs/files-backend.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/refs/files-backend.c b/refs/files-backend.c index dfc8e9bc5055ab..1428d3a6f1b523 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1473,6 +1473,15 @@ static int files_pack_refs(struct ref_store *ref_store, return 0; } +static int files_optimize(struct ref_store *ref_store, struct pack_refs_opts *opts) +{ + /* + * For the "files" backend, "optimizing" is the same as "packing". + * So, we just call the existing worker function for packing. + */ + return files_pack_refs(ref_store, opts); +} + /* * People using contrib's git-new-workdir have .git/logs/refs -> * /some/other/path/.git/logs/refs, and that may live on another device. @@ -3909,6 +3918,7 @@ struct ref_storage_be refs_be_files = { .transaction_abort = files_transaction_abort, .pack_refs = files_pack_refs, + .optimize = files_optimize, .rename_ref = files_rename_ref, .copy_ref = files_copy_ref, From da0849a71e08ad072700b7cd1a0cb8b6fb89c50a Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:41 +0530 Subject: [PATCH 23/35] reftable-backend: implement 'optimize' action To make the new generic `optimize` API fully functional, provide an implementation for the 'reftable' reference backend. For the reftable backend, the 'optimize' action is to compact its tables. The existing `reftable_be_pack_refs()` function already provides this logic, so the new `reftable_be_optimize()` function simply calls it. Wire up the new function to the `optimize` slot in the reftable backend's virtual table. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 570463da4173c7..5dff1e08e5cea8 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1721,6 +1721,12 @@ static int reftable_be_pack_refs(struct ref_store *ref_store, return ret; } +static int reftable_be_optimize(struct ref_store *ref_store, + struct pack_refs_opts *opts) +{ + return reftable_be_pack_refs(ref_store, opts); +} + struct write_create_symref_arg { struct reftable_ref_store *refs; struct reftable_stack *stack; @@ -2702,6 +2708,7 @@ struct ref_storage_be refs_be_reftable = { .transaction_abort = reftable_be_transaction_abort, .pack_refs = reftable_be_pack_refs, + .optimize = reftable_be_optimize, .rename_ref = reftable_be_rename_ref, .copy_ref = reftable_be_copy_ref, From 0bef41319c889e6409ea4c1369747a70cbae7c1f Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:42 +0530 Subject: [PATCH 24/35] builtin/pack-refs: convert to use the generic refs_optimize() API The `git pack-refs` command behaves generically, triggering a pack for the 'files' backend and a compaction for the 'reftable' backend. However, the name of the command and its corresponding API is conceptually tied to the 'files' backend implementation. To create a cleaner, more generic interface, refactor `git pack-refs` to use the new `refs_optimize()` API. "Optimize" is a better semantic term for this generic action. This change allows `git pack-refs` to act as a backend-agnostic frontend for reference optimization, and paves the way for the new `git refs optimize` command to do the same. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- builtin/pack-refs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/pack-refs.c b/builtin/pack-refs.c index 5e28d0f9e80996..dfcf6645244a66 100644 --- a/builtin/pack-refs.c +++ b/builtin/pack-refs.c @@ -51,7 +51,7 @@ int cmd_pack_refs(int argc, if (!pack_refs_opts.includes->nr) string_list_append(pack_refs_opts.includes, "refs/tags/*"); - ret = refs_pack_refs(get_main_ref_store(repo), &pack_refs_opts); + ret = refs_optimize(get_main_ref_store(repo), &pack_refs_opts); clear_ref_exclusions(&excludes); string_list_clear(&included_refs, 0); From 0d4ec339227d04bcba89390bdef22d4dce30d271 Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:43 +0530 Subject: [PATCH 25/35] builtin/pack-refs: factor out core logic into a shared library The implementation of `git pack-refs` is monolithic within `cmd_pack_refs()`, making it impossible to share its logic with other commands. To enable code reuse for the upcoming `git refs optimize` subcommand, refactor the core logic into a shared helper function. Split the original `builtin/pack-refs.c` file into two parts: - A new shared library file, `pack-refs.c`, which contains the core option parsing and packing logic in a new `pack_refs_core()` helper function. - The original `builtin/pack-refs.c`, which is now a thin wrapper responsible only for defining the `git pack-refs` command and calling the shared helper. A new `pack-refs.h` header is also introduced to define the public interface for this shared logic. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/pack-refs.c | 54 ++++--------------------------------------- meson.build | 1 + pack-refs.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ pack-refs.h | 23 +++++++++++++++++++ 5 files changed, 86 insertions(+), 49 deletions(-) create mode 100644 pack-refs.c create mode 100644 pack-refs.h diff --git a/Makefile b/Makefile index 555b7f4dc3c0e1..f51297ffc3fda5 100644 --- a/Makefile +++ b/Makefile @@ -1094,6 +1094,7 @@ LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-check.o LIB_OBJS += pack-mtimes.o LIB_OBJS += pack-objects.o +LIB_OBJS += pack-refs.o LIB_OBJS += pack-revindex.o LIB_OBJS += pack-write.o LIB_OBJS += packfile.o diff --git a/builtin/pack-refs.c b/builtin/pack-refs.c index dfcf6645244a66..3446b84cdae7fa 100644 --- a/builtin/pack-refs.c +++ b/builtin/pack-refs.c @@ -1,60 +1,16 @@ #include "builtin.h" -#include "config.h" -#include "environment.h" #include "gettext.h" -#include "parse-options.h" -#include "refs.h" -#include "revision.h" - -static char const * const pack_refs_usage[] = { - N_("git pack-refs [--all] [--no-prune] [--auto] [--include ] [--exclude ]"), - NULL -}; +#include "pack-refs.h" int cmd_pack_refs(int argc, const char **argv, const char *prefix, struct repository *repo) { - struct ref_exclusions excludes = REF_EXCLUSIONS_INIT; - struct string_list included_refs = STRING_LIST_INIT_NODUP; - struct pack_refs_opts pack_refs_opts = { - .exclusions = &excludes, - .includes = &included_refs, - .flags = PACK_REFS_PRUNE, - }; - struct string_list option_excluded_refs = STRING_LIST_INIT_NODUP; - struct string_list_item *item; - int pack_all = 0; - int ret; - - struct option opts[] = { - OPT_BOOL(0, "all", &pack_all, N_("pack everything")), - OPT_BIT(0, "prune", &pack_refs_opts.flags, N_("prune loose refs (default)"), PACK_REFS_PRUNE), - OPT_BIT(0, "auto", &pack_refs_opts.flags, N_("auto-pack refs as needed"), PACK_REFS_AUTO), - OPT_STRING_LIST(0, "include", pack_refs_opts.includes, N_("pattern"), - N_("references to include")), - OPT_STRING_LIST(0, "exclude", &option_excluded_refs, N_("pattern"), - N_("references to exclude")), - OPT_END(), + static char const * const pack_refs_usage[] = { + N_("git pack-refs " PACK_REFS_OPTS), + NULL }; - repo_config(repo, git_default_config, NULL); - if (parse_options(argc, argv, prefix, opts, pack_refs_usage, 0)) - usage_with_options(pack_refs_usage, opts); - - for_each_string_list_item(item, &option_excluded_refs) - add_ref_exclusion(pack_refs_opts.exclusions, item->string); - - if (pack_all) - string_list_append(pack_refs_opts.includes, "*"); - - if (!pack_refs_opts.includes->nr) - string_list_append(pack_refs_opts.includes, "refs/tags/*"); - - ret = refs_optimize(get_main_ref_store(repo), &pack_refs_opts); - clear_ref_exclusions(&excludes); - string_list_clear(&included_refs, 0); - string_list_clear(&option_excluded_refs, 0); - return ret; + return pack_refs_core(argc, argv, prefix, repo, pack_refs_usage); } diff --git a/meson.build b/meson.build index e8ec0eca1650a5..cedaadad2e884a 100644 --- a/meson.build +++ b/meson.build @@ -407,6 +407,7 @@ libgit_sources = [ 'pack-check.c', 'pack-mtimes.c', 'pack-objects.c', + 'pack-refs.c', 'pack-revindex.c', 'pack-write.c', 'packfile.c', diff --git a/pack-refs.c b/pack-refs.c new file mode 100644 index 00000000000000..1a5e07d8b888ab --- /dev/null +++ b/pack-refs.c @@ -0,0 +1,56 @@ +#include "builtin.h" +#include "config.h" +#include "environment.h" +#include "pack-refs.h" +#include "parse-options.h" +#include "refs.h" +#include "revision.h" + +int pack_refs_core(int argc, + const char **argv, + const char *prefix, + struct repository *repo, + const char * const *usage_opts) +{ + struct ref_exclusions excludes = REF_EXCLUSIONS_INIT; + struct string_list included_refs = STRING_LIST_INIT_NODUP; + struct pack_refs_opts pack_refs_opts = { + .exclusions = &excludes, + .includes = &included_refs, + .flags = PACK_REFS_PRUNE, + }; + struct string_list option_excluded_refs = STRING_LIST_INIT_NODUP; + struct string_list_item *item; + int pack_all = 0; + int ret; + + struct option opts[] = { + OPT_BOOL(0, "all", &pack_all, N_("pack everything")), + OPT_BIT(0, "prune", &pack_refs_opts.flags, N_("prune loose refs (default)"), PACK_REFS_PRUNE), + OPT_BIT(0, "auto", &pack_refs_opts.flags, N_("auto-pack refs as needed"), PACK_REFS_AUTO), + OPT_STRING_LIST(0, "include", pack_refs_opts.includes, N_("pattern"), + N_("references to include")), + OPT_STRING_LIST(0, "exclude", &option_excluded_refs, N_("pattern"), + N_("references to exclude")), + OPT_END(), + }; + repo_config(repo, git_default_config, NULL); + if (parse_options(argc, argv, prefix, opts, usage_opts, 0)) + usage_with_options(usage_opts, opts); + + for_each_string_list_item(item, &option_excluded_refs) + add_ref_exclusion(pack_refs_opts.exclusions, item->string); + + if (pack_all) + string_list_append(pack_refs_opts.includes, "*"); + + if (!pack_refs_opts.includes->nr) + string_list_append(pack_refs_opts.includes, "refs/tags/*"); + + ret = refs_optimize(get_main_ref_store(repo), &pack_refs_opts); + + clear_ref_exclusions(&excludes); + string_list_clear(&included_refs, 0); + string_list_clear(&option_excluded_refs, 0); + return ret; +} diff --git a/pack-refs.h b/pack-refs.h new file mode 100644 index 00000000000000..5de27e7da847b1 --- /dev/null +++ b/pack-refs.h @@ -0,0 +1,23 @@ +#ifndef PACK_REFS_H +#define PACK_REFS_H + +struct repository; + +/* + * Shared usage string for options common to git-pack-refs(1) + * and git-refs-optimize(1). The command-specific part (e.g., "git refs optimize ") + * must be prepended by the caller. + */ +#define PACK_REFS_OPTS \ + "[--all] [--no-prune] [--auto] [--include ] [--exclude ]" + +/* + * The core logic for pack-refs and its clones. + */ +int pack_refs_core(int argc, + const char **argv, + const char *prefix, + struct repository *repo, + const char * const *usage_opts); + +#endif /* PACK_REFS_H */ From 93efe34f5a9a6ef705e6f55d46852717ce242340 Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:44 +0530 Subject: [PATCH 26/35] doc: pack-refs: factor out common options In preparation for adding documentation for `git refs optimize`, factor out the common options from the `git-pack-refs` man page into a shareable file `pack-refs-options.adoc` and update `git-pack-refs.adoc` to use an `include::` macro. This change is a pure refactoring and results in no change to the final rendered documentation for `pack-refs`. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- Documentation/git-pack-refs.adoc | 53 +--------------------------- Documentation/pack-refs-options.adoc | 52 +++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 52 deletions(-) create mode 100644 Documentation/pack-refs-options.adoc diff --git a/Documentation/git-pack-refs.adoc b/Documentation/git-pack-refs.adoc index 42b90051e695a5..fde9f2f294e815 100644 --- a/Documentation/git-pack-refs.adoc +++ b/Documentation/git-pack-refs.adoc @@ -45,58 +45,7 @@ unpacked. OPTIONS ------- ---all:: - -The command by default packs all tags and refs that are already -packed, and leaves other refs -alone. This is because branches are expected to be actively -developed and packing their tips does not help performance. -This option causes all refs to be packed as well, with the exception -of hidden refs, broken refs, and symbolic refs. Useful for a repository -with many branches of historical interests. - ---no-prune:: - -The command usually removes loose refs under `$GIT_DIR/refs` -hierarchy after packing them. This option tells it not to. - ---auto:: - -Pack refs as needed depending on the current state of the ref database. The -behavior depends on the ref format used by the repository and may change in the -future. -+ - - "files": Loose references are packed into the `packed-refs` file - based on the ratio of loose references to the size of the - `packed-refs` file. The bigger the `packed-refs` file, the more loose - references need to exist before we repack. -+ - - "reftable": Tables are compacted such that they form a geometric - sequence. For two tables N and N+1, where N+1 is newer, this - maintains the property that N is at least twice as big as N+1. Only - tables that violate this property are compacted. - ---include :: - -Pack refs based on a `glob(7)` pattern. Repetitions of this option -accumulate inclusion patterns. If a ref is both included in `--include` and -`--exclude`, `--exclude` takes precedence. Using `--include` will preclude all -tags from being included by default. Symbolic refs and broken refs will never -be packed. When used with `--all`, it will be a noop. Use `--no-include` to clear -and reset the list of patterns. - ---exclude :: - -Do not pack refs matching the given `glob(7)` pattern. Repetitions of this option -accumulate exclusion patterns. Use `--no-exclude` to clear and reset the list of -patterns. If a ref is already packed, including it with `--exclude` will not -unpack it. -+ -When used with `--all`, pack only loose refs which do not match any of -the provided `--exclude` patterns. -+ -When used with `--include`, refs provided to `--include`, minus refs that are -provided to `--exclude` will be packed. +include::pack-refs-options.adoc[] BUGS diff --git a/Documentation/pack-refs-options.adoc b/Documentation/pack-refs-options.adoc new file mode 100644 index 00000000000000..0b11282941bb02 --- /dev/null +++ b/Documentation/pack-refs-options.adoc @@ -0,0 +1,52 @@ +--all:: + +The command by default packs all tags and refs that are already +packed, and leaves other refs +alone. This is because branches are expected to be actively +developed and packing their tips does not help performance. +This option causes all refs to be packed as well, with the exception +of hidden refs, broken refs, and symbolic refs. Useful for a repository +with many branches of historical interests. + +--no-prune:: + +The command usually removes loose refs under `$GIT_DIR/refs` +hierarchy after packing them. This option tells it not to. + +--auto:: + +Pack refs as needed depending on the current state of the ref database. The +behavior depends on the ref format used by the repository and may change in the +future. ++ + - "files": Loose references are packed into the `packed-refs` file + based on the ratio of loose references to the size of the + `packed-refs` file. The bigger the `packed-refs` file, the more loose + references need to exist before we repack. ++ + - "reftable": Tables are compacted such that they form a geometric + sequence. For two tables N and N+1, where N+1 is newer, this + maintains the property that N is at least twice as big as N+1. Only + tables that violate this property are compacted. + +--include :: + +Pack refs based on a `glob(7)` pattern. Repetitions of this option +accumulate inclusion patterns. If a ref is both included in `--include` and +`--exclude`, `--exclude` takes precedence. Using `--include` will preclude all +tags from being included by default. Symbolic refs and broken refs will never +be packed. When used with `--all`, it will be a noop. Use `--no-include` to clear +and reset the list of patterns. + +--exclude :: + +Do not pack refs matching the given `glob(7)` pattern. Repetitions of this option +accumulate exclusion patterns. Use `--no-exclude` to clear and reset the list of +patterns. If a ref is already packed, including it with `--exclude` will not +unpack it. ++ +When used with `--all`, pack only loose refs which do not match any of +the provided `--exclude` patterns. ++ +When used with `--include`, refs provided to `--include`, minus refs that are +provided to `--exclude` will be packed. From ecc70a48a5ea5e568b1cbdd111f7ddba62dbe4d6 Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:45 +0530 Subject: [PATCH 27/35] builtin/refs: add optimize subcommand As part of the ongoing effort to consolidate reference handling, introduce a new `optimize` subcommand. This command provides the same functionality and exit-code behavior as `git pack-refs`, serving as its modern replacement. Implement `cmd_refs_optimize` by having it call the `pack_refs_core()` helper function. This helper was factored out of the original `cmd_pack_refs` in a preceding commit, allowing both commands to share the same core logic as independent peers. Add documentation for the new command. The man page leverages the shared options file, created in a previous commit, by using the AsciiDoc `include::` macro to ensure consistency with git-pack-refs(1). Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- Documentation/git-refs.adoc | 10 ++++++++++ builtin/refs.c | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Documentation/git-refs.adoc b/Documentation/git-refs.adoc index d462953fb5ee3a..e233f21eeb528a 100644 --- a/Documentation/git-refs.adoc +++ b/Documentation/git-refs.adoc @@ -18,6 +18,7 @@ git refs list [--count=] [--shell|--perl|--python|--tcl] [--contains[=]] [--no-contains[=]] [(--exclude=)...] [--start-after=] [ --stdin | (...)] +git refs optimize [--all] [--no-prune] [--auto] [--include ] [--exclude ] DESCRIPTION ----------- @@ -38,6 +39,11 @@ list:: formatting, and sorting. This subcommand is an alias for linkgit:git-for-each-ref[1] and offers identical functionality. +optimize:: + Optimizes references to improve repository performance and reduce disk + usage. This subcommand is an alias for linkgit:git-pack-refs[1] and + offers identical functionality. + OPTIONS ------- @@ -73,6 +79,10 @@ The following options are specific to 'git refs list': include::for-each-ref-options.adoc[] +The following options are specific to 'git refs optimize': + +include::pack-refs-options.adoc[] + KNOWN LIMITATIONS ----------------- diff --git a/builtin/refs.c b/builtin/refs.c index 76224feba4d55a..785f476e4b9f9a 100644 --- a/builtin/refs.c +++ b/builtin/refs.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "config.h" #include "fsck.h" +#include "pack-refs.h" #include "parse-options.h" #include "refs.h" #include "strbuf.h" @@ -14,6 +15,9 @@ #define REFS_VERIFY_USAGE \ N_("git refs verify [--strict] [--verbose]") +#define REFS_OPTIMIZE_USAGE \ + N_("git refs optimize " PACK_REFS_OPTS) + static int cmd_refs_migrate(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { @@ -113,6 +117,17 @@ static int cmd_refs_list(int argc, const char **argv, const char *prefix, return for_each_ref_core(argc, argv, prefix, repo, refs_list_usage); } +static int cmd_refs_optimize(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + static char const * const refs_optimize_usage[] = { + REFS_OPTIMIZE_USAGE, + NULL + }; + + return pack_refs_core(argc, argv, prefix, repo, refs_optimize_usage); +} + int cmd_refs(int argc, const char **argv, const char *prefix, @@ -122,6 +137,7 @@ int cmd_refs(int argc, REFS_MIGRATE_USAGE, REFS_VERIFY_USAGE, "git refs list " COMMON_USAGE_FOR_EACH_REF, + REFS_OPTIMIZE_USAGE, NULL, }; parse_opt_subcommand_fn *fn = NULL; @@ -129,6 +145,7 @@ int cmd_refs(int argc, OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate), OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify), OPT_SUBCOMMAND("list", &fn, cmd_refs_list), + OPT_SUBCOMMAND("optimize", &fn, cmd_refs_optimize), OPT_END(), }; From ac0bad0af488aa25ffb2363f79b7e5728fd0cf97 Mon Sep 17 00:00:00 2001 From: Meet Soni Date: Fri, 19 Sep 2025 13:56:46 +0530 Subject: [PATCH 28/35] t0601: refactor tests to be shareable In preparation for adding tests for the new `git refs optimize` command, refactor the existing t0601 test suite to make its logic shareable. Move the core test logic from `t0601-reffiles-pack-refs.sh` into a new `pack-refs-tests.sh` file. Inside this new script, replace hardcoded calls to "pack-refs" with the `$pack_refs` variable. The original `t0601-reffiles-pack-refs.sh` script now becomes a simple "driver". It is responsible for setting the default value of the variable and then sourcing the test library. This new structure follows the established pattern used for sharing tests between `git-for-each-ref` and `git-refs list` and prepares the test suite for the `refs optimize` tests to be added in a subsequent commit. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- t/pack-refs-tests.sh | 431 ++++++++++++++++++++++++++++++++++ t/t0601-reffiles-pack-refs.sh | 430 +-------------------------------- 2 files changed, 432 insertions(+), 429 deletions(-) create mode 100644 t/pack-refs-tests.sh diff --git a/t/pack-refs-tests.sh b/t/pack-refs-tests.sh new file mode 100644 index 00000000000000..3dbcc01718e157 --- /dev/null +++ b/t/pack-refs-tests.sh @@ -0,0 +1,431 @@ +pack_refs=${pack_refs:-pack-refs} + +test_expect_success 'enable reflogs' ' + git config core.logallrefupdates true +' + +test_expect_success 'prepare a trivial repository' ' + echo Hello > A && + git update-index --add A && + git commit -m "Initial commit." && + HEAD=$(git rev-parse --verify HEAD) +' + +test_expect_success '${pack_refs} --prune --all' ' + test_path_is_missing .git/packed-refs && + git ${pack_refs} --no-prune --all && + test_path_is_file .git/packed-refs && + N=$(find .git/refs -type f | wc -l) && + test "$N" != 0 && + + git ${pack_refs} --prune --all && + test_path_is_file .git/packed-refs && + N=$(find .git/refs -type f) && + test -z "$N" +' + +SHA1= + +test_expect_success 'see if git show-ref works as expected' ' + git branch a && + SHA1=$(cat .git/refs/heads/a) && + echo "$SHA1 refs/heads/a" >expect && + git show-ref a >result && + test_cmp expect result +' + +test_expect_success 'see if a branch still exists when packed' ' + git branch b && + git ${pack_refs} --all && + rm -f .git/refs/heads/b && + echo "$SHA1 refs/heads/b" >expect && + git show-ref b >result && + test_cmp expect result +' + +test_expect_success 'git branch c/d should barf if branch c exists' ' + git branch c && + git ${pack_refs} --all && + rm -f .git/refs/heads/c && + test_must_fail git branch c/d +' + +test_expect_success 'see if a branch still exists after git ${pack_refs} --prune' ' + git branch e && + git ${pack_refs} --all --prune && + echo "$SHA1 refs/heads/e" >expect && + git show-ref e >result && + test_cmp expect result +' + +test_expect_success 'see if git ${pack_refs} --prune remove ref files' ' + git branch f && + git ${pack_refs} --all --prune && + ! test -f .git/refs/heads/f +' + +test_expect_success 'see if git ${pack_refs} --prune removes empty dirs' ' + git branch r/s/t && + git ${pack_refs} --all --prune && + ! test -e .git/refs/heads/r +' + +test_expect_success 'git branch g should work when git branch g/h has been deleted' ' + git branch g/h && + git ${pack_refs} --all --prune && + git branch -d g/h && + git branch g && + git ${pack_refs} --all && + git branch -d g +' + +test_expect_success 'git branch i/j/k should barf if branch i exists' ' + git branch i && + git ${pack_refs} --all --prune && + test_must_fail git branch i/j/k +' + +test_expect_success 'test git branch k after branch k/l/m and k/lm have been deleted' ' + git branch k/l && + git branch k/lm && + git branch -d k/l && + git branch k/l/m && + git branch -d k/l/m && + git branch -d k/lm && + git branch k +' + +test_expect_success 'test git branch n after some branch deletion and pruning' ' + git branch n/o && + git branch n/op && + git branch -d n/o && + git branch n/o/p && + git branch -d n/op && + git ${pack_refs} --all --prune && + git branch -d n/o/p && + git branch n +' + +test_expect_success 'test excluded refs are not packed' ' + git branch dont_pack1 && + git branch dont_pack2 && + git branch pack_this && + git ${pack_refs} --all --exclude "refs/heads/dont_pack*" && + test -f .git/refs/heads/dont_pack1 && + test -f .git/refs/heads/dont_pack2 && + ! test -f .git/refs/heads/pack_this' + +test_expect_success 'test --no-exclude refs clears excluded refs' ' + git branch dont_pack3 && + git branch dont_pack4 && + git ${pack_refs} --all --exclude "refs/heads/dont_pack*" --no-exclude && + ! test -f .git/refs/heads/dont_pack3 && + ! test -f .git/refs/heads/dont_pack4' + +test_expect_success 'test only included refs are packed' ' + git branch pack_this1 && + git branch pack_this2 && + git tag dont_pack5 && + git ${pack_refs} --include "refs/heads/pack_this*" && + test -f .git/refs/tags/dont_pack5 && + ! test -f .git/refs/heads/pack_this1 && + ! test -f .git/refs/heads/pack_this2' + +test_expect_success 'test --no-include refs clears included refs' ' + git branch pack1 && + git branch pack2 && + git ${pack_refs} --include "refs/heads/pack*" --no-include && + test -f .git/refs/heads/pack1 && + test -f .git/refs/heads/pack2' + +test_expect_success 'test --exclude takes precedence over --include' ' + git branch dont_pack5 && + git ${pack_refs} --include "refs/heads/pack*" --exclude "refs/heads/pack*" && + test -f .git/refs/heads/dont_pack5' + +test_expect_success 'see if up-to-date packed refs are preserved' ' + git branch q && + git ${pack_refs} --all --prune && + git update-ref refs/heads/q refs/heads/q && + ! test -f .git/refs/heads/q +' + +test_expect_success 'pack, prune and repack' ' + git tag foo && + git ${pack_refs} --all --prune && + git show-ref >all-of-them && + git ${pack_refs} && + git show-ref >again && + test_cmp all-of-them again +' + +test_expect_success 'explicit ${pack_refs} with dangling packed reference' ' + git commit --allow-empty -m "soon to be garbage-collected" && + git ${pack_refs} --all && + git reset --hard HEAD^ && + git reflog expire --expire=all --all && + git prune --expire=all && + git ${pack_refs} --all 2>result && + test_must_be_empty result +' + +test_expect_success 'delete ref with dangling packed version' ' + git checkout -b lamb && + git commit --allow-empty -m "future garbage" && + git ${pack_refs} --all && + git reset --hard HEAD^ && + git checkout main && + git reflog expire --expire=all --all && + git prune --expire=all && + git branch -d lamb 2>result && + test_must_be_empty result +' + +test_expect_success 'delete ref while another dangling packed ref' ' + git branch lamb && + git commit --allow-empty -m "future garbage" && + git ${pack_refs} --all && + git reset --hard HEAD^ && + git reflog expire --expire=all --all && + git prune --expire=all && + git branch -d lamb 2>result && + test_must_be_empty result +' + +test_expect_success 'pack ref directly below refs/' ' + git update-ref refs/top HEAD && + git ${pack_refs} --all --prune && + grep refs/top .git/packed-refs && + test_path_is_missing .git/refs/top +' + +test_expect_success 'do not pack ref in refs/bisect' ' + git update-ref refs/bisect/local HEAD && + git ${pack_refs} --all --prune && + ! grep refs/bisect/local .git/packed-refs >/dev/null && + test_path_is_file .git/refs/bisect/local +' + +test_expect_success 'disable reflogs' ' + git config core.logallrefupdates false && + rm -rf .git/logs +' + +test_expect_success 'create packed foo/bar/baz branch' ' + git branch foo/bar/baz && + git ${pack_refs} --all --prune && + test_path_is_missing .git/refs/heads/foo/bar/baz && + test_must_fail git reflog exists refs/heads/foo/bar/baz +' + +test_expect_success 'notice d/f conflict with existing directory' ' + test_must_fail git branch foo && + test_must_fail git branch foo/bar +' + +test_expect_success 'existing directory reports concrete ref' ' + test_must_fail git branch foo 2>stderr && + test_grep refs/heads/foo/bar/baz stderr +' + +test_expect_success 'notice d/f conflict with existing ref' ' + test_must_fail git branch foo/bar/baz/extra && + test_must_fail git branch foo/bar/baz/lots/of/extra/components +' + +test_expect_success 'reject packed-refs with unterminated line' ' + cp .git/packed-refs .git/packed-refs.bak && + test_when_finished "mv .git/packed-refs.bak .git/packed-refs" && + printf "%s" "$HEAD refs/zzzzz" >>.git/packed-refs && + echo "fatal: unterminated line in .git/packed-refs: $HEAD refs/zzzzz" >expected_err && + test_must_fail git for-each-ref >out 2>err && + test_cmp expected_err err +' + +test_expect_success 'reject packed-refs containing junk' ' + cp .git/packed-refs .git/packed-refs.bak && + test_when_finished "mv .git/packed-refs.bak .git/packed-refs" && + printf "%s\n" "bogus content" >>.git/packed-refs && + echo "fatal: unexpected line in .git/packed-refs: bogus content" >expected_err && + test_must_fail git for-each-ref >out 2>err && + test_cmp expected_err err +' + +test_expect_success 'reject packed-refs with a short SHA-1' ' + cp .git/packed-refs .git/packed-refs.bak && + test_when_finished "mv .git/packed-refs.bak .git/packed-refs" && + printf "%.7s %s\n" $HEAD refs/zzzzz >>.git/packed-refs && + printf "fatal: unexpected line in .git/packed-refs: %.7s %s\n" $HEAD refs/zzzzz >expected_err && + test_must_fail git for-each-ref >out 2>err && + test_cmp expected_err err +' + +test_expect_success 'timeout if packed-refs.lock exists' ' + LOCK=.git/packed-refs.lock && + >"$LOCK" && + test_when_finished "rm -f $LOCK" && + test_must_fail git ${pack_refs} --all --prune +' + +test_expect_success 'retry acquiring packed-refs.lock' ' + LOCK=.git/packed-refs.lock && + >"$LOCK" && + test_when_finished "wait && rm -f $LOCK" && + { + ( sleep 1 && rm -f $LOCK ) & + } && + git -c core.packedrefstimeout=3000 ${pack_refs} --all --prune +' + +test_expect_success SYMLINKS 'pack symlinked packed-refs' ' + # First make sure that symlinking works when reading: + git update-ref refs/heads/lossy refs/heads/main && + git for-each-ref >all-refs-before && + mv .git/packed-refs .git/my-deviant-packed-refs && + ln -s my-deviant-packed-refs .git/packed-refs && + git for-each-ref >all-refs-linked && + test_cmp all-refs-before all-refs-linked && + git ${pack_refs} --all --prune && + git for-each-ref >all-refs-packed && + test_cmp all-refs-before all-refs-packed && + test -h .git/packed-refs && + test "$(test_readlink .git/packed-refs)" = "my-deviant-packed-refs" +' + +# The 'packed-refs' file is stored directly in .git/. This means it is global +# to the repository, and can only contain refs that are shared across all +# worktrees. +test_expect_success 'refs/worktree must not be packed' ' + test_commit initial && + test_commit wt1 && + test_commit wt2 && + git worktree add wt1 wt1 && + git worktree add wt2 wt2 && + git checkout initial && + git update-ref refs/worktree/foo HEAD && + git -C wt1 update-ref refs/worktree/foo HEAD && + git -C wt2 update-ref refs/worktree/foo HEAD && + git ${pack_refs} --all && + test_path_is_missing .git/refs/tags/wt1 && + test_path_is_file .git/refs/worktree/foo && + test_path_is_file .git/worktrees/wt1/refs/worktree/foo && + test_path_is_file .git/worktrees/wt2/refs/worktree/foo +' + +# we do not want to count on running ${pack_refs} to +# actually pack it, as it is perfectly reasonable to +# skip processing a broken ref +test_expect_success 'create packed-refs file with broken ref' ' + test_tick && git commit --allow-empty -m one && + recoverable=$(git rev-parse HEAD) && + test_tick && git commit --allow-empty -m two && + missing=$(git rev-parse HEAD) && + rm -f .git/refs/heads/main && + cat >.git/packed-refs <<-EOF && + $missing refs/heads/main + $recoverable refs/heads/other + EOF + echo $missing >expect && + git rev-parse refs/heads/main >actual && + test_cmp expect actual +' + +test_expect_success '${pack_refs} does not silently delete broken packed ref' ' + git ${pack_refs} --all --prune && + git rev-parse refs/heads/main >actual && + test_cmp expect actual +' + +test_expect_success '${pack_refs} does not drop broken refs during deletion' ' + git update-ref -d refs/heads/other && + git rev-parse refs/heads/main >actual && + test_cmp expect actual +' + +for command in "git ${pack_refs} --all --auto" "git maintenance run --task=${pack_refs} --auto" +do + test_expect_success "$command does not repack below 16 refs without packed-refs" ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config set maintenance.auto false && + git commit --allow-empty --message "initial" && + + # Create 14 additional references, which brings us to + # 15 together with the default branch. + printf "create refs/heads/loose-%d HEAD\n" $(test_seq 14) >stdin && + git update-ref --stdin stdin && + git update-ref --stdin stdin && + git update-ref --stdin stdin && + git update-ref --stdin stdin && + git update-ref --stdin A && - git update-index --add A && - git commit -m "Initial commit." && - HEAD=$(git rev-parse --verify HEAD) -' - -test_expect_success 'pack-refs --prune --all' ' - test_path_is_missing .git/packed-refs && - git pack-refs --no-prune --all && - test_path_is_file .git/packed-refs && - N=$(find .git/refs -type f | wc -l) && - test "$N" != 0 && - - git pack-refs --prune --all && - test_path_is_file .git/packed-refs && - N=$(find .git/refs -type f) && - test -z "$N" -' - -SHA1= - -test_expect_success 'see if git show-ref works as expected' ' - git branch a && - SHA1=$(cat .git/refs/heads/a) && - echo "$SHA1 refs/heads/a" >expect && - git show-ref a >result && - test_cmp expect result -' - -test_expect_success 'see if a branch still exists when packed' ' - git branch b && - git pack-refs --all && - rm -f .git/refs/heads/b && - echo "$SHA1 refs/heads/b" >expect && - git show-ref b >result && - test_cmp expect result -' - -test_expect_success 'git branch c/d should barf if branch c exists' ' - git branch c && - git pack-refs --all && - rm -f .git/refs/heads/c && - test_must_fail git branch c/d -' - -test_expect_success 'see if a branch still exists after git pack-refs --prune' ' - git branch e && - git pack-refs --all --prune && - echo "$SHA1 refs/heads/e" >expect && - git show-ref e >result && - test_cmp expect result -' - -test_expect_success 'see if git pack-refs --prune remove ref files' ' - git branch f && - git pack-refs --all --prune && - ! test -f .git/refs/heads/f -' - -test_expect_success 'see if git pack-refs --prune removes empty dirs' ' - git branch r/s/t && - git pack-refs --all --prune && - ! test -e .git/refs/heads/r -' - -test_expect_success 'git branch g should work when git branch g/h has been deleted' ' - git branch g/h && - git pack-refs --all --prune && - git branch -d g/h && - git branch g && - git pack-refs --all && - git branch -d g -' - -test_expect_success 'git branch i/j/k should barf if branch i exists' ' - git branch i && - git pack-refs --all --prune && - test_must_fail git branch i/j/k -' - -test_expect_success 'test git branch k after branch k/l/m and k/lm have been deleted' ' - git branch k/l && - git branch k/lm && - git branch -d k/l && - git branch k/l/m && - git branch -d k/l/m && - git branch -d k/lm && - git branch k -' - -test_expect_success 'test git branch n after some branch deletion and pruning' ' - git branch n/o && - git branch n/op && - git branch -d n/o && - git branch n/o/p && - git branch -d n/op && - git pack-refs --all --prune && - git branch -d n/o/p && - git branch n -' - -test_expect_success 'test excluded refs are not packed' ' - git branch dont_pack1 && - git branch dont_pack2 && - git branch pack_this && - git pack-refs --all --exclude "refs/heads/dont_pack*" && - test -f .git/refs/heads/dont_pack1 && - test -f .git/refs/heads/dont_pack2 && - ! test -f .git/refs/heads/pack_this' - -test_expect_success 'test --no-exclude refs clears excluded refs' ' - git branch dont_pack3 && - git branch dont_pack4 && - git pack-refs --all --exclude "refs/heads/dont_pack*" --no-exclude && - ! test -f .git/refs/heads/dont_pack3 && - ! test -f .git/refs/heads/dont_pack4' - -test_expect_success 'test only included refs are packed' ' - git branch pack_this1 && - git branch pack_this2 && - git tag dont_pack5 && - git pack-refs --include "refs/heads/pack_this*" && - test -f .git/refs/tags/dont_pack5 && - ! test -f .git/refs/heads/pack_this1 && - ! test -f .git/refs/heads/pack_this2' - -test_expect_success 'test --no-include refs clears included refs' ' - git branch pack1 && - git branch pack2 && - git pack-refs --include "refs/heads/pack*" --no-include && - test -f .git/refs/heads/pack1 && - test -f .git/refs/heads/pack2' - -test_expect_success 'test --exclude takes precedence over --include' ' - git branch dont_pack5 && - git pack-refs --include "refs/heads/pack*" --exclude "refs/heads/pack*" && - test -f .git/refs/heads/dont_pack5' - -test_expect_success 'see if up-to-date packed refs are preserved' ' - git branch q && - git pack-refs --all --prune && - git update-ref refs/heads/q refs/heads/q && - ! test -f .git/refs/heads/q -' - -test_expect_success 'pack, prune and repack' ' - git tag foo && - git pack-refs --all --prune && - git show-ref >all-of-them && - git pack-refs && - git show-ref >again && - test_cmp all-of-them again -' - -test_expect_success 'explicit pack-refs with dangling packed reference' ' - git commit --allow-empty -m "soon to be garbage-collected" && - git pack-refs --all && - git reset --hard HEAD^ && - git reflog expire --expire=all --all && - git prune --expire=all && - git pack-refs --all 2>result && - test_must_be_empty result -' - -test_expect_success 'delete ref with dangling packed version' ' - git checkout -b lamb && - git commit --allow-empty -m "future garbage" && - git pack-refs --all && - git reset --hard HEAD^ && - git checkout main && - git reflog expire --expire=all --all && - git prune --expire=all && - git branch -d lamb 2>result && - test_must_be_empty result -' - -test_expect_success 'delete ref while another dangling packed ref' ' - git branch lamb && - git commit --allow-empty -m "future garbage" && - git pack-refs --all && - git reset --hard HEAD^ && - git reflog expire --expire=all --all && - git prune --expire=all && - git branch -d lamb 2>result && - test_must_be_empty result -' - -test_expect_success 'pack ref directly below refs/' ' - git update-ref refs/top HEAD && - git pack-refs --all --prune && - grep refs/top .git/packed-refs && - test_path_is_missing .git/refs/top -' - -test_expect_success 'do not pack ref in refs/bisect' ' - git update-ref refs/bisect/local HEAD && - git pack-refs --all --prune && - ! grep refs/bisect/local .git/packed-refs >/dev/null && - test_path_is_file .git/refs/bisect/local -' - -test_expect_success 'disable reflogs' ' - git config core.logallrefupdates false && - rm -rf .git/logs -' - -test_expect_success 'create packed foo/bar/baz branch' ' - git branch foo/bar/baz && - git pack-refs --all --prune && - test_path_is_missing .git/refs/heads/foo/bar/baz && - test_must_fail git reflog exists refs/heads/foo/bar/baz -' - -test_expect_success 'notice d/f conflict with existing directory' ' - test_must_fail git branch foo && - test_must_fail git branch foo/bar -' - -test_expect_success 'existing directory reports concrete ref' ' - test_must_fail git branch foo 2>stderr && - test_grep refs/heads/foo/bar/baz stderr -' - -test_expect_success 'notice d/f conflict with existing ref' ' - test_must_fail git branch foo/bar/baz/extra && - test_must_fail git branch foo/bar/baz/lots/of/extra/components -' - -test_expect_success 'reject packed-refs with unterminated line' ' - cp .git/packed-refs .git/packed-refs.bak && - test_when_finished "mv .git/packed-refs.bak .git/packed-refs" && - printf "%s" "$HEAD refs/zzzzz" >>.git/packed-refs && - echo "fatal: unterminated line in .git/packed-refs: $HEAD refs/zzzzz" >expected_err && - test_must_fail git for-each-ref >out 2>err && - test_cmp expected_err err -' - -test_expect_success 'reject packed-refs containing junk' ' - cp .git/packed-refs .git/packed-refs.bak && - test_when_finished "mv .git/packed-refs.bak .git/packed-refs" && - printf "%s\n" "bogus content" >>.git/packed-refs && - echo "fatal: unexpected line in .git/packed-refs: bogus content" >expected_err && - test_must_fail git for-each-ref >out 2>err && - test_cmp expected_err err -' - -test_expect_success 'reject packed-refs with a short SHA-1' ' - cp .git/packed-refs .git/packed-refs.bak && - test_when_finished "mv .git/packed-refs.bak .git/packed-refs" && - printf "%.7s %s\n" $HEAD refs/zzzzz >>.git/packed-refs && - printf "fatal: unexpected line in .git/packed-refs: %.7s %s\n" $HEAD refs/zzzzz >expected_err && - test_must_fail git for-each-ref >out 2>err && - test_cmp expected_err err -' - -test_expect_success 'timeout if packed-refs.lock exists' ' - LOCK=.git/packed-refs.lock && - >"$LOCK" && - test_when_finished "rm -f $LOCK" && - test_must_fail git pack-refs --all --prune -' - -test_expect_success 'retry acquiring packed-refs.lock' ' - LOCK=.git/packed-refs.lock && - >"$LOCK" && - test_when_finished "wait && rm -f $LOCK" && - { - ( sleep 1 && rm -f $LOCK ) & - } && - git -c core.packedrefstimeout=3000 pack-refs --all --prune -' - -test_expect_success SYMLINKS 'pack symlinked packed-refs' ' - # First make sure that symlinking works when reading: - git update-ref refs/heads/lossy refs/heads/main && - git for-each-ref >all-refs-before && - mv .git/packed-refs .git/my-deviant-packed-refs && - ln -s my-deviant-packed-refs .git/packed-refs && - git for-each-ref >all-refs-linked && - test_cmp all-refs-before all-refs-linked && - git pack-refs --all --prune && - git for-each-ref >all-refs-packed && - test_cmp all-refs-before all-refs-packed && - test -h .git/packed-refs && - test "$(test_readlink .git/packed-refs)" = "my-deviant-packed-refs" -' - -# The 'packed-refs' file is stored directly in .git/. This means it is global -# to the repository, and can only contain refs that are shared across all -# worktrees. -test_expect_success 'refs/worktree must not be packed' ' - test_commit initial && - test_commit wt1 && - test_commit wt2 && - git worktree add wt1 wt1 && - git worktree add wt2 wt2 && - git checkout initial && - git update-ref refs/worktree/foo HEAD && - git -C wt1 update-ref refs/worktree/foo HEAD && - git -C wt2 update-ref refs/worktree/foo HEAD && - git pack-refs --all && - test_path_is_missing .git/refs/tags/wt1 && - test_path_is_file .git/refs/worktree/foo && - test_path_is_file .git/worktrees/wt1/refs/worktree/foo && - test_path_is_file .git/worktrees/wt2/refs/worktree/foo -' - -# we do not want to count on running pack-refs to -# actually pack it, as it is perfectly reasonable to -# skip processing a broken ref -test_expect_success 'create packed-refs file with broken ref' ' - test_tick && git commit --allow-empty -m one && - recoverable=$(git rev-parse HEAD) && - test_tick && git commit --allow-empty -m two && - missing=$(git rev-parse HEAD) && - rm -f .git/refs/heads/main && - cat >.git/packed-refs <<-EOF && - $missing refs/heads/main - $recoverable refs/heads/other - EOF - echo $missing >expect && - git rev-parse refs/heads/main >actual && - test_cmp expect actual -' - -test_expect_success 'pack-refs does not silently delete broken packed ref' ' - git pack-refs --all --prune && - git rev-parse refs/heads/main >actual && - test_cmp expect actual -' - -test_expect_success 'pack-refs does not drop broken refs during deletion' ' - git update-ref -d refs/heads/other && - git rev-parse refs/heads/main >actual && - test_cmp expect actual -' - -for command in "git pack-refs --all --auto" "git maintenance run --task=pack-refs --auto" -do - test_expect_success "$command does not repack below 16 refs without packed-refs" ' - test_when_finished "rm -rf repo" && - git init repo && - ( - cd repo && - git config set maintenance.auto false && - git commit --allow-empty --message "initial" && - - # Create 14 additional references, which brings us to - # 15 together with the default branch. - printf "create refs/heads/loose-%d HEAD\n" $(test_seq 14) >stdin && - git update-ref --stdin stdin && - git update-ref --stdin stdin && - git update-ref --stdin stdin && - git update-ref --stdin stdin && - git update-ref --stdin Date: Fri, 19 Sep 2025 13:56:47 +0530 Subject: [PATCH 29/35] t: add test for git refs optimize subcommand Add a test script, `t/t1463-refs-optimize.sh`, for the new `git refs optimize` command. This script acts as a simple driver, leveraging the shared test library created in the preceding commit. It works by overriding the `$pack_refs` variable to "refs optimize" and then sourcing the shared library (`t/pack-refs-tests.sh`). This approach ensures that `git refs optimize` is tested against the entire comprehensive test suite of `git pack-refs`, verifying that it acts as a compatible drop-in replacement. Mentored-by: Patrick Steinhardt Mentored-by: shejialuo Signed-off-by: Meet Soni Signed-off-by: Junio C Hamano --- t/meson.build | 3 ++- t/t1463-refs-optimize.sh | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100755 t/t1463-refs-optimize.sh diff --git a/t/meson.build b/t/meson.build index baeeba2ce652d1..92327aabdfb44b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -211,6 +211,7 @@ integration_tests = [ 't1451-fsck-buffer.sh', 't1460-refs-migrate.sh', 't1461-refs-list.sh', + 't1463-refs-optimize.sh', 't1500-rev-parse.sh', 't1501-work-tree.sh', 't1502-rev-parse-parseopt.sh', @@ -1219,4 +1220,4 @@ if perl.found() and time.found() timeout: 0, ) endforeach -endif \ No newline at end of file +endif diff --git a/t/t1463-refs-optimize.sh b/t/t1463-refs-optimize.sh new file mode 100755 index 00000000000000..c11c905d795d26 --- /dev/null +++ b/t/t1463-refs-optimize.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +test_description='git refs optimize should not change the branch semantic + +This test runs git refs optimize and git show-ref and checks that the branch +semantic is still the same. +' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME +GIT_TEST_DEFAULT_REF_FORMAT=files +export GIT_TEST_DEFAULT_REF_FORMAT + +. ./test-lib.sh + +pack_refs='refs optimize' +. "$TEST_DIRECTORY"/pack-refs-tests.sh From f1371a3c9511361d3aedf37f833981113a3b19d8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 22 Sep 2025 15:06:18 +0200 Subject: [PATCH 30/35] t1300: write test expectations in the test's body There are a bunch of tests in t1300 where we write the test expectation handed over to `test_cmp ()` outside of the test body. This does not match our modern test style, and there isn't really a reason why this would need to happen outside of the test bodies. Convert those to instead do so as part of the test itself. While at it, normalize these tests to use `<<\EOF` for those that don't use variable expansion and `<<-EOF` for those that aren't sensitive to indentation. Note that there are two exceptions that we leave as-is for now since they are reused across tests. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t1300-config.sh | 291 ++++++++++++++++++++-------------------------- 1 file changed, 128 insertions(+), 163 deletions(-) diff --git a/t/t1300-config.sh b/t/t1300-config.sh index f856821839247e..538f2c9b8a0d7c 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -134,38 +134,39 @@ test_expect_success 'clear default config' ' rm -f .git/config ' -cat > expect << EOF +test_expect_success 'initial' ' + cat >expect <<\EOF && [section] penguin = little blue EOF -test_expect_success 'initial' ' git config ${mode_set} section.penguin "little blue" && test_cmp expect .git/config ' -cat > expect << EOF +test_expect_success 'mixed case' ' + cat >expect <<\EOF && [section] penguin = little blue Movie = BadPhysics EOF -test_expect_success 'mixed case' ' git config ${mode_set} Section.Movie BadPhysics && test_cmp expect .git/config ' -cat > expect << EOF +test_expect_success 'similar section' ' + cat >expect <<\EOF && [section] penguin = little blue Movie = BadPhysics [Sections] WhatEver = Second EOF -test_expect_success 'similar section' ' git config ${mode_set} Sections.WhatEver Second && test_cmp expect .git/config ' -cat > expect << EOF +test_expect_success 'uppercase section' ' + cat >expect <<\EOF && [section] penguin = little blue Movie = BadPhysics @@ -173,7 +174,6 @@ cat > expect << EOF [Sections] WhatEver = Second EOF -test_expect_success 'uppercase section' ' git config ${mode_set} SECTION.UPPERCASE true && test_cmp expect .git/config ' @@ -186,7 +186,8 @@ test_expect_success 'replace with non-match (actually matching)' ' git config section.penguin "very blue" !kingpin ' -cat > expect << EOF +test_expect_success 'append comments' ' + cat >expect <<\EOF && [section] Movie = BadPhysics UPPERCASE = true @@ -198,8 +199,6 @@ cat > expect << EOF [Sections] WhatEver = Second EOF - -test_expect_success 'append comments' ' git config --replace-all --comment="Pygoscelis papua" section.penguin gentoo && git config ${mode_set} --comment="find fish" section.disposition peckish && git config ${mode_set} --comment="#abc" section.foo bar && @@ -265,14 +264,15 @@ test_expect_success 'unset with cont. lines' ' git config ${mode_unset} beta.baz ' -cat > expect <<\EOF -[alpha] -bar = foo -[beta] -foo = bar -EOF - -test_expect_success 'unset with cont. lines is correct' 'test_cmp expect .git/config' +test_expect_success 'unset with cont. lines is correct' ' + cat >expect <<-\EOF && + [alpha] + bar = foo + [beta] + foo = bar + EOF + test_cmp expect .git/config +' cat > .git/config << EOF [beta] ; silly comment # another comment @@ -292,16 +292,15 @@ test_expect_success 'multiple unset' ' git config ${mode_unset_all} beta.haha ' -cat > expect << EOF +test_expect_success 'multiple unset is correct' ' + cat >expect < expect << EOF +test_expect_success 'all replaced' ' + cat >expect < expect << EOF +test_expect_success 'really mean test' ' + cat >expect < expect << EOF +test_expect_success 'really really mean test' ' + cat >expect < expect << EOF +test_expect_success 'unset' ' + cat >expect < expect << EOF +test_expect_success 'multivar' ' + cat >expect < expect << EOF +test_expect_success 'multivar replace' ' + cat >expect < expect << EOF +test_expect_success 'multivar unset' ' + cat >expect < expect << EOF +test_expect_success 'hierarchical section value' ' + cat >expect < expect << EOF -beta.noindent=sillyValue -nextsection.nonewline=wow2 for me -123456.a123=987 -version.1.2.3eX.alpha=beta -EOF - test_expect_success 'working --list' ' + cat >expect <<-\EOF && + beta.noindent=sillyValue + nextsection.nonewline=wow2 for me + 123456.a123=987 + version.1.2.3eX.alpha=beta + EOF git config ${mode_prefix}list > output && test_cmp expect output ' @@ -500,44 +495,40 @@ test_expect_success '--list without repo produces empty output' ' test_must_be_empty output ' -cat > expect << EOF -beta.noindent -nextsection.nonewline -123456.a123 -version.1.2.3eX.alpha -EOF - test_expect_success '--name-only --list' ' + cat >expect <<-\EOF && + beta.noindent + nextsection.nonewline + 123456.a123 + version.1.2.3eX.alpha + EOF git config ${mode_prefix}list --name-only >output && test_cmp expect output ' -cat > expect << EOF -beta.noindent sillyValue -nextsection.nonewline wow2 for me -EOF - test_expect_success '--get-regexp' ' + cat >expect <<-\EOF && + beta.noindent sillyValue + nextsection.nonewline wow2 for me + EOF git config ${mode_get_regexp} in >output && test_cmp expect output ' -cat > expect << EOF -beta.noindent -nextsection.nonewline -EOF - test_expect_success '--name-only --get-regexp' ' + cat >expect <<-\EOF && + beta.noindent + nextsection.nonewline + EOF git config ${mode_get_regexp} --name-only in >output && test_cmp expect output ' -cat > expect << EOF -wow2 for me -wow4 for you -EOF - test_expect_success '--add' ' + cat >expect <<-\EOF && + wow2 for me + wow4 for you + EOF git config --add nextsection.nonewline "wow4 for you" && git config ${mode_get_all} nextsection.nonewline > output && test_cmp expect output @@ -558,37 +549,32 @@ test_expect_success 'get variable with empty value' ' git config --get emptyvalue.variable ^$ ' -echo novalue.variable > expect - test_expect_success 'get-regexp variable with no value' ' + echo novalue.variable >expect && git config ${mode_get_regexp} novalue > output && test_cmp expect output ' -echo 'novalue.variable true' > expect - test_expect_success 'get-regexp --bool variable with no value' ' + echo "novalue.variable true" >expect && git config ${mode_get_regexp} --bool novalue > output && test_cmp expect output ' -echo 'emptyvalue.variable ' > expect - test_expect_success 'get-regexp variable with empty value' ' + echo "emptyvalue.variable " >expect && git config ${mode_get_regexp} emptyvalue > output && test_cmp expect output ' -echo true > expect - test_expect_success 'get bool variable with no value' ' + echo true >expect && git config --bool novalue.variable > output && test_cmp expect output ' -echo false > expect - test_expect_success 'get bool variable with empty value' ' + echo false >expect && git config --bool emptyvalue.variable > output && test_cmp expect output ' @@ -604,19 +590,19 @@ cat > .git/config << EOF c = d EOF -cat > expect << EOF +test_expect_success 'new section is partial match of another' ' + cat >expect <<\EOF && [a.b] c = d [a] x = y EOF - -test_expect_success 'new section is partial match of another' ' git config a.x y && test_cmp expect .git/config ' -cat > expect << EOF +test_expect_success 'new variable inserts into proper section' ' + cat >expect <<\EOF && [a.b] c = d [a] @@ -625,8 +611,6 @@ cat > expect << EOF [b] x = y EOF - -test_expect_success 'new variable inserts into proper section' ' git config b.x y && git config a.b c && test_cmp expect .git/config @@ -642,11 +626,10 @@ cat > other-config << EOF bahn = strasse EOF -cat > expect << EOF -ein.bahn=strasse -EOF - test_expect_success 'alternative GIT_CONFIG' ' + cat >expect <<-\EOF && + ein.bahn=strasse + EOF GIT_CONFIG=other-config git config ${mode_prefix}list >output && test_cmp expect output ' @@ -675,14 +658,13 @@ test_expect_success 'refer config from subdirectory' ' test_cmp_config -C x strasse --file=../other-config --get ein.bahn ' -cat > expect << EOF +test_expect_success '--set in alternative file' ' + cat >expect <<\EOF && [ein] bahn = strasse [anwohner] park = ausweis EOF - -test_expect_success '--set in alternative file' ' git config --file=other-config anwohner.park ausweis && test_cmp expect other-config ' @@ -730,7 +712,8 @@ test_expect_success 'rename another section' ' git config ${mode_prefix}rename-section branch."1 234 blabl/a" branch.drei ' -cat > expect << EOF +test_expect_success 'rename succeeded' ' + cat >expect <<\EOF && # Hallo #Bello [branch "zwei"] @@ -740,8 +723,6 @@ cat > expect << EOF [branch "drei"] weird EOF - -test_expect_success 'rename succeeded' ' test_cmp expect .git/config ' @@ -753,7 +734,8 @@ test_expect_success 'rename a section with a var on the same line' ' git config ${mode_prefix}rename-section branch.vier branch.zwei ' -cat > expect << EOF +test_expect_success 'rename succeeded' ' + cat >expect <<\EOF && # Hallo #Bello [branch "zwei"] @@ -765,8 +747,6 @@ weird [branch "zwei"] z = 1 EOF - -test_expect_success 'rename succeeded' ' test_cmp expect .git/config ' @@ -816,32 +796,29 @@ test_expect_success 'remove section' ' git config ${mode_prefix}remove-section branch.zwei ' -cat > expect << EOF +test_expect_success 'section was removed properly' ' + cat >expect <<\EOF && # Hallo #Bello [branch "drei"] weird EOF - -test_expect_success 'section was removed properly' ' test_cmp expect .git/config ' -cat > expect << EOF +test_expect_success 'section ending' ' + cat >expect <<\EOF && [gitcvs] enabled = true dbname = %Ggitcvs2.%a.%m.sqlite [gitcvs "ext"] dbname = %Ggitcvs1.%a.%m.sqlite EOF - -test_expect_success 'section ending' ' rm -f .git/config && git config ${mode_set} gitcvs.enabled true && git config ${mode_set} gitcvs.ext.dbname %Ggitcvs1.%a.%m.sqlite && git config ${mode_set} gitcvs.dbname %Ggitcvs2.%a.%m.sqlite && test_cmp expect .git/config - ' test_expect_success numbers ' @@ -885,19 +862,17 @@ test_expect_success 'invalid stdin config' ' test_grep "bad config line 1 in standard input" output ' -cat > expect << EOF -true -false -true -false -true -false -true -false -EOF - test_expect_success bool ' - + cat >expect <<-\EOF && + true + false + true + false + true + false + true + false + EOF git config ${mode_set} bool.true1 01 && git config ${mode_set} bool.true2 -1 && git config ${mode_set} bool.true3 YeS && @@ -923,7 +898,8 @@ test_expect_success 'invalid bool (set)' ' test_must_fail git config --bool bool.nobool foobar' -cat > expect <<\EOF +test_expect_success 'set --bool' ' + cat >expect <<\EOF && [bool] true1 = true true2 = true @@ -934,9 +910,6 @@ cat > expect <<\EOF false3 = false false4 = false EOF - -test_expect_success 'set --bool' ' - rm -f .git/config && git config --bool bool.true1 01 && git config --bool bool.true2 -1 && @@ -948,15 +921,13 @@ test_expect_success 'set --bool' ' git config --bool bool.false4 FALSE && test_cmp expect .git/config' -cat > expect <<\EOF +test_expect_success 'set --int' ' + cat >expect <<\EOF && [int] val1 = 1 val2 = -1 val3 = 5242880 EOF - -test_expect_success 'set --int' ' - rm -f .git/config && git config --int int.val1 01 && git config --int int.val2 -1 && @@ -994,7 +965,8 @@ test_expect_success 'get --bool-or-int' ' test_cmp expect actual ' -cat >expect <<\EOF +test_expect_success 'set --bool-or-int' ' + cat >expect <<\EOF && [bool] true1 = true false1 = false @@ -1005,8 +977,6 @@ cat >expect <<\EOF int2 = 1 int3 = -1 EOF - -test_expect_success 'set --bool-or-int' ' rm -f .git/config && git config --bool-or-int bool.true1 true && git config --bool-or-int bool.false1 false && @@ -1018,14 +988,13 @@ test_expect_success 'set --bool-or-int' ' test_cmp expect .git/config ' -cat >expect <<\EOF +test_expect_success !MINGW 'set --path' ' + cat >expect <<\EOF && [path] home = ~/ normal = /dev/null trailingtilde = foo~ EOF - -test_expect_success !MINGW 'set --path' ' rm -f .git/config && git config --path path.home "~/" && git config --path path.normal "/dev/null" && @@ -1037,25 +1006,23 @@ then test_set_prereq HOMEVAR fi -cat >expect <expect <<-EOF && + $HOME/ + /dev/null + foo~ + EOF git config --get --path path.home > result && git config --get --path path.normal >> result && git config --get --path path.trailingtilde >> result && test_cmp expect result ' -cat >expect <<\EOF -/dev/null -foo~ -EOF - test_expect_success !MINGW 'get --path copes with unset $HOME' ' + cat >expect <<-\EOF && + /dev/null + foo~ + EOF ( sane_unset HOME && test_must_fail git config --get --path path.home \ @@ -1112,12 +1079,11 @@ test_expect_success 'get --type=color' ' test_cmp expect actual ' -cat >expect << EOF +test_expect_success 'set --type=color' ' + cat >expect <<\EOF && [foo] color = red EOF - -test_expect_success 'set --type=color' ' rm .git/config && git config --type=color foo.color "red" && test_cmp expect .git/config @@ -1133,14 +1099,14 @@ test_expect_success 'set --type=color barfs on non-color' ' test_grep "cannot parse color" error ' -cat > expect << EOF +test_expect_success 'quoting' ' + cat >expect <<\EOF && [quote] leading = " test" ending = "test " semicolon = "test;test" hash = "test#test" EOF -test_expect_success 'quoting' ' rm -f .git/config && git config ${mode_set} quote.leading " test" && git config ${mode_set} quote.ending "test " && @@ -1166,13 +1132,12 @@ inued inued" EOF -cat > expect <<\EOF -section.continued=continued -section.noncont=not continued -section.quotecont=cont;inued -EOF - test_expect_success 'value continued on next line' ' + cat >expect <<-\EOF && + section.continued=continued + section.noncont=not continued + section.quotecont=cont;inued + EOF git config ${mode_prefix}list > result && test_cmp expect result ' From 7f89ad8c8c805b3b062d73d89c0763462a930e92 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 22 Sep 2025 15:06:19 +0200 Subject: [PATCH 31/35] t1300: small style fixups We have a couple of small style violations in t1300: - An empty newline at the start of the test body. - The test command is sometimes on the same line as the test name. - The closing single-quote is sometimes on the same line as the last command of the test. Fix these. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t1300-config.sh | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 538f2c9b8a0d7c..6d1015acfd822a 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -213,7 +213,9 @@ test_expect_success 'Prohibited LF in comment' ' test_must_fail git config ${mode_set} --comment="a${LF}b" section.k v ' -test_expect_success 'non-match result' 'test_cmp expect .git/config' +test_expect_success 'non-match result' ' + test_cmp expect .git/config +' test_expect_success 'find mixed-case key by canonical name' ' test_cmp_config Second sections.whatever @@ -455,9 +457,13 @@ EOF test_cmp expect .git/config ' -test_expect_success 'invalid key' 'test_must_fail git config inval.2key blabla' +test_expect_success 'invalid key' ' + test_must_fail git config inval.2key blabla +' -test_expect_success 'correct key' 'git config 123456.a123 987' +test_expect_success 'correct key' ' + git config 123456.a123 987 +' test_expect_success 'hierarchical section' ' git config Version.1.2.3eX.Alpha beta @@ -490,6 +496,7 @@ test_expect_success 'working --list' ' git config ${mode_prefix}list > output && test_cmp expect output ' + test_expect_success '--list without repo produces empty output' ' git --git-dir=nonexistent config ${mode_prefix}list >output && test_must_be_empty output @@ -887,16 +894,17 @@ test_expect_success bool ' git config --bool --get bool.true$i >>result && git config --bool --get bool.false$i >>result || return 1 done && - test_cmp expect result' + test_cmp expect result +' test_expect_success 'invalid bool (--get)' ' - git config ${mode_set} bool.nobool foobar && - test_must_fail git config --bool --get bool.nobool' + test_must_fail git config --bool --get bool.nobool +' test_expect_success 'invalid bool (set)' ' - - test_must_fail git config --bool bool.nobool foobar' + test_must_fail git config --bool bool.nobool foobar +' test_expect_success 'set --bool' ' cat >expect <<\EOF && @@ -999,7 +1007,8 @@ EOF git config --path path.home "~/" && git config --path path.normal "/dev/null" && git config --path path.trailingtilde "foo~" && - test_cmp expect .git/config' + test_cmp expect .git/config +' if test_have_prereq !MINGW && test "${HOME+set}" then @@ -1117,10 +1126,13 @@ EOF test_expect_success 'key with newline' ' test_must_fail git config ${mode_get} "key.with -newline" 123' +newline" 123 +' -test_expect_success 'value with newline' 'git config ${mode_set} key.sub value.with\\\ -newline' +test_expect_success 'value with newline' ' + git config ${mode_set} key.sub value.with\\\ +newline +' cat > .git/config <<\EOF [section] @@ -1330,7 +1342,6 @@ test_expect_success 'multiple git -c appends config' ' ' test_expect_success 'last one wins: two level vars' ' - # sec.var and sec.VAR are the same variable, as the first # and the last level of a configuration variable name is # case insensitive. @@ -1349,7 +1360,6 @@ test_expect_success 'last one wins: two level vars' ' ' test_expect_success 'last one wins: three level vars' ' - # v.a.r and v.A.r are not the same variable, as the middle # level of a three-level configuration variable name is # case sensitive. From 6e6ed3eaba315ceab0e0e9256474caac8520a819 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 22 Sep 2025 15:06:20 +0200 Subject: [PATCH 32/35] builtin/config: do not die in `get_color()` When trying to parse an invalid color via `get_color()` we die. We're about to introduce another caller in a subsequent commit though that has its own error handling, so dying is a bit drastic there. Furthermore, the only caller that we already have right now already knows to handle errors in other branches that don't call `get_color()`. Convert the function to instead return an error code to improve its flexibility. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/config.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index f70d6354772259..65e79c76735e44 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -547,24 +547,31 @@ static int git_get_color_config(const char *var, const char *value, return 0; } -static void get_color(const struct config_location_options *opts, +static int get_color(const struct config_location_options *opts, const char *var, const char *def_color) { struct get_color_config_data data = { .get_color_slot = var, .parsed_color[0] = '\0', }; + int ret; config_with_options(git_get_color_config, &data, &opts->source, the_repository, &opts->options); if (!data.get_color_found && def_color) { - if (color_parse(def_color, data.parsed_color) < 0) - die(_("unable to parse default color value")); + if (color_parse(def_color, data.parsed_color) < 0) { + ret = error(_("unable to parse default color value")); + goto out; + } } + ret = 0; + +out: fputs(data.parsed_color, stdout); + return ret; } struct get_colorbool_config_data { @@ -1390,7 +1397,7 @@ static int cmd_config_actions(int argc, const char **argv, const char *prefix) } else if (actions == ACTION_GET_COLOR) { check_argc(argc, 1, 2); - get_color(&location_opts, argv[0], argv[1]); + ret = get_color(&location_opts, argv[0], argv[1]); } else if (actions == ACTION_GET_COLORBOOL) { check_argc(argc, 1, 2); From 54b24b108055d9ba4850706a8ed8ee53edf08e37 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 22 Sep 2025 15:06:21 +0200 Subject: [PATCH 33/35] builtin/config: special-case retrieving colors without a key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our documentation for git-config(1) has a section where it explains how to parse and use colors as Git would configure them. In order to get the ANSI color escape sequence to reset the colors to normal we recommend the following command: $ git config get --type=color --default="reset" "" This command is not supposed to parse any configuration keys. Instead, it is expected to parse the "reset" default value and turn it into a proper ANSI color escape sequence. It was reported though [1] that this command doesn't work: $ git config get --type=color --default="reset" "" error: key does not contain a section: This error was introduced in 4e51389000 (builtin/config: introduce "get" subcommand, 2024-05-06), where we introduced the "get" subcommand to retrieve configuration values. The preimage of that commit used `git config --get-color "" "reset"` instead, which still works. This use case is really quite specific to parsing colors, as it wouldn't make sense to give git-config(1) a default value and an empty config key only to return that default value unmodified. But with `--type=color` we don't return the value directly; we instead parse the value into an ANSI escape sequence. As such, we can easily special-case this one use case: - If the provided config key is empty; - the user is asking for a color code; and - the user has provided a default value, then we call `get_color()` directly. Do so to make the documented command work as expected. [1]: Reported-by: SZEDER Gábor Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/config.c | 2 ++ t/t1300-config.sh | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/builtin/config.c b/builtin/config.c index 65e79c76735e44..cfd89a4186bdd1 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -923,6 +923,8 @@ static int cmd_config_get(int argc, const char **argv, const char *prefix, if (url) ret = get_urlmatch(&location_opts, &display_opts, argv[0], url); + else if (display_opts.type == TYPE_COLOR && !strlen(argv[0]) && display_opts.default_value) + ret = get_color(&location_opts, "", display_opts.default_value); else ret = get_value(&location_opts, &display_opts, argv[0], value_pattern, get_value_flags, flags); diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 6d1015acfd822a..3cf5d17abab0bc 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -1083,11 +1083,22 @@ test_expect_success 'get --type=color' ' rm .git/config && git config ${mode_set} foo.color "red" && git config --get --type=color foo.color >actual.raw && + git config get --type=color foo.color >actual-subcommand.raw && + test_cmp actual.raw actual-subcommand.raw && test_decode_color actual && echo "" >expect && test_cmp expect actual ' +test_expect_success 'get --type=color with default value only' ' + git config --get-color "" "red" >actual.raw && + test_decode_color actual && + echo "" >expect && + test_cmp expect actual && + git config get --type=color --default="red" "" >actual-subcommand.raw && + test_cmp actual.raw actual-subcommand.raw +' + test_expect_success 'set --type=color' ' cat >expect <<\EOF && [foo] From e4dabf4fd62470f03b5aa3f1ad615cd7121cb5c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 22 Sep 2025 15:06:22 +0200 Subject: [PATCH 34/35] builtin/config: do not spawn pager when printing color codes With `git config get --type=color` the user asks us to parse a specific configuration key and turn the value into an ANSI color escape sequence. The printed string can then for example be used as part of shell scripts to reuse the same colors as Git. Right now though we set up the auto-pager, which means that the string may be written to the pager instead of directly to the terminal. This behaviour is problematic for two reasons: - Color codes are meant for direct terminal output; writing them into a pager does not seem like a sensible thing to do without additional text. - It is inconsistent with `git config --get-color`, which never uses a pager, despite the fact that we claim `git config get --type=color` to be a drop-in replacement in git-config(1). Fix this by disabling the pager when outputting color sequences. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/config.c | 3 ++- t/t1300-config.sh | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/builtin/config.c b/builtin/config.c index cfd89a4186bdd1..c8776ad6519178 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -919,7 +919,8 @@ static int cmd_config_get(int argc, const char **argv, const char *prefix, location_options_init(&location_opts, prefix); display_options_init(&display_opts); - setup_auto_pager("config", 1); + if (display_opts.type != TYPE_COLOR) + setup_auto_pager("config", 1); if (url) ret = get_urlmatch(&location_opts, &display_opts, argv[0], url); diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 3cf5d17abab0bc..358d6363796f48 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -9,6 +9,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-terminal.sh for mode in legacy subcommands do @@ -1099,6 +1100,14 @@ test_expect_success 'get --type=color with default value only' ' test_cmp actual.raw actual-subcommand.raw ' +test_expect_success TTY 'get --type=color does not use a pager' ' + test_config core.pager "echo foobar" && + test_terminal git config get --type=color --default="red" "" >actual.raw && + test_decode_color actual && + echo "" >expect && + test_cmp expect actual +' + test_expect_success 'set --type=color' ' cat >expect <<\EOF && [foo] From 5099f64a82ccc80f3c6567589bfeb5e9a1b9fd6b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 2 Oct 2025 12:23:32 -0700 Subject: [PATCH 35/35] The fourteenth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index 1e5281188fd8c4..b106483f427f20 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -44,6 +44,9 @@ UI, Workflows & Features * The stash.index configuration variable can be set to make "git stash pop/apply" pretend that it was invoked with "--index". + * "git fast-import" learned that "--signed-commits=" option that + corresponds to that of "git fast-export". + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -83,6 +86,10 @@ Performance, Internal Implementation, Development Support etc. singleton variable, which has been updated to pass an instance throughout the callchain. + * The work to build on the bulk-checkin infrastructure to create many + objects at once in a transaction and to abstract it into the + generic object layer continues. + * CodingGuidelines now spells out how bitfields are to be written. * Adjust to the way newer versions of cURL selectivel enables tracing @@ -102,6 +109,10 @@ Performance, Internal Implementation, Development Support etc. while the code has been cleaned up to prevent similar bugs in the future. + * The build procedure based on meson learned a target to only build + documentation, similar to "make doc". + (merge ff4ec8ded0 ps/meson-build-docs later to maint). + Fixes since v2.51 ----------------- @@ -259,6 +270,16 @@ including security updates, are included in this release. * "git last-modified" operating in non-recursive mode used to trigger a BUG(), which has been corrected. + * The use of "git config get" command to learn how ANSI color + sequence is for a particular type, e.g., "git config get + --type=color --default=reset no.such.thing", isn't very ergonomic. + (merge e4dabf4fd6 ps/config-get-color-fixes later to maint). + + * The "do you still use it?" message given by a command that is + deeply deprecated and allow us to suggest alternatives has been + updated. + (merge 54a60e5b38 kh/you-still-use-whatchanged-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 823d537fa7 kh/doc-git-log-markup-fix later to maint). (merge cf7efa4f33 rj/t6137-cygwin-fix later to maint).