From 98518304c5761ba04cefb6d73c5698db7e46d1c2 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:57 -0500 Subject: [PATCH 01/17] bulk-checkin: introduce object database transaction structure Object database transaction state is stored across several global variables in the bulk-checkin subsystem. Consolidate this state into a single `struct odb_transaction` global. In a subsequent commit, the transactional interfaces will be updated to wire this structure instead of relying on a global variable. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab0398136..82a73da79e8f0e 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -19,11 +19,7 @@ #include "object-file.h" #include "odb.h" -static int odb_transaction_nesting; - -static struct tmp_objdir *bulk_fsync_objdir; - -static struct bulk_checkin_packfile { +struct bulk_checkin_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -32,7 +28,13 @@ static struct bulk_checkin_packfile { struct pack_idx_entry **written; uint32_t alloc_written; uint32_t nr_written; -} bulk_checkin_packfile; +}; + +static struct odb_transaction { + int nesting; + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +} transaction; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -101,7 +103,7 @@ static void flush_batch_fsync(void) struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!bulk_fsync_objdir) + if (!transaction.objdir) return; /* @@ -123,8 +125,8 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(bulk_fsync_objdir); - bulk_fsync_objdir = NULL; + tmp_objdir_migrate(transaction.objdir); + transaction.objdir = NULL; } static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) @@ -331,12 +333,12 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!odb_transaction_nesting || bulk_fsync_objdir) + if (!transaction.nesting || transaction.objdir) return; - bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (bulk_fsync_objdir) - tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); + transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + if (transaction.objdir) + tmp_objdir_replace_primary_odb(transaction.objdir, 0); } void fsync_loose_object_bulk_checkin(int fd, const char *filename) @@ -348,7 +350,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!bulk_fsync_objdir || + if (!transaction.objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -360,31 +362,31 @@ int index_blob_bulk_checkin(struct object_id *oid, int fd, size_t size, const char *path, unsigned flags) { - int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size, + int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size, path, flags); - if (!odb_transaction_nesting) - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + if (!transaction.nesting) + flush_bulk_checkin_packfile(&transaction.packfile); return status; } void begin_odb_transaction(void) { - odb_transaction_nesting += 1; + transaction.nesting += 1; } void flush_odb_transaction(void) { flush_batch_fsync(); - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + flush_bulk_checkin_packfile(&transaction.packfile); } void end_odb_transaction(void) { - odb_transaction_nesting -= 1; - if (odb_transaction_nesting < 0) + transaction.nesting -= 1; + if (transaction.nesting < 0) BUG("Unbalanced ODB transaction nesting"); - if (odb_transaction_nesting) + if (transaction.nesting) return; flush_odb_transaction(); From b3361447256bb92a1dbdda910a33cfb1d6fc8f88 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:58 -0500 Subject: [PATCH 02/17] bulk-checkin: remove global transaction state Object database transactions in the bulk-checkin subsystem rely on global state to track transaction status. Stop relying on global state and instead store the transaction in the `struct object_database`. Functions that operate on transactions are updated to now wire transaction state. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/add.c | 5 ++- builtin/unpack-objects.c | 5 ++- builtin/update-index.c | 7 ++-- bulk-checkin.c | 82 ++++++++++++++++++++++++++-------------- bulk-checkin.h | 18 +++++---- cache-tree.c | 5 ++- object-file.c | 11 +++--- odb.h | 8 ++++ read-cache.c | 5 ++- 9 files changed, 94 insertions(+), 52 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 0235854f8099c4..740c7c45817828 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -389,6 +389,7 @@ int cmd_add(int argc, char *seen = NULL; char *ps_matched = NULL; struct lock_file lock_file = LOCK_INIT; + struct odb_transaction *transaction; repo_config(repo, add_config, NULL); @@ -574,7 +575,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -593,7 +594,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(); + end_odb_transaction(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 7ae7c82b6c05a6..28124b324d2641 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -584,6 +584,7 @@ static void unpack_all(void) { int i; unsigned char *hdr = fill(sizeof(struct pack_header)); + struct odb_transaction *transaction; if (get_be32(hdr) != PACK_SIGNATURE) die("bad pack file"); @@ -599,12 +600,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(); + end_odb_transaction(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2380f3ccd68c8c..2ba2d29c959fac 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -77,7 +77,7 @@ static void report(const char *fmt, ...) * objects invisible while a transaction is active, so flush the * transaction here before reporting a change made by update-index. */ - flush_odb_transaction(); + flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -940,6 +940,7 @@ int cmd_update_index(int argc, strbuf_getline_fn getline_fn; int parseopt_state = PARSE_OPT_UNKNOWN; struct repository *r = the_repository; + struct odb_transaction *transaction; struct option options[] = { OPT_BIT('q', NULL, &refresh_args.flags, N_("continue refresh even when index needs update"), @@ -1130,7 +1131,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1213,7 +1214,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(); + end_odb_transaction(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/bulk-checkin.c b/bulk-checkin.c index 82a73da79e8f0e..53a20a2d92fd77 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -30,11 +30,13 @@ struct bulk_checkin_packfile { uint32_t nr_written; }; -static struct odb_transaction { +struct odb_transaction { + struct object_database *odb; + int nesting; struct tmp_objdir *objdir; struct bulk_checkin_packfile packfile; -} transaction; +}; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -98,12 +100,12 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(void) +static void flush_batch_fsync(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!transaction.objdir) + if (!transaction->objdir) return; /* @@ -125,8 +127,8 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(transaction.objdir); - transaction.objdir = NULL; + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; } static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) @@ -325,7 +327,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -void prepare_loose_object_bulk_checkin(void) +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -333,15 +335,16 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!transaction.nesting || transaction.objdir) + if (!transaction || transaction->objdir) return; - transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (transaction.objdir) - tmp_objdir_replace_primary_odb(transaction.objdir, 0); + transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -void fsync_loose_object_bulk_checkin(int fd, const char *filename) +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) { /* * If we have an active ODB transaction, we issue a call that @@ -350,7 +353,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!transaction.objdir || + if (!transaction || !transaction->objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -358,36 +361,57 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) } } -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags) { - int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size, - path, flags); - if (!transaction.nesting) - flush_bulk_checkin_packfile(&transaction.packfile); + int status; + + if (transaction) { + status = deflate_blob_to_pack(&transaction->packfile, oid, fd, + size, path, flags); + } else { + struct bulk_checkin_packfile state = { 0 }; + + status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); + flush_bulk_checkin_packfile(&state); + } + return status; } -void begin_odb_transaction(void) +struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - transaction.nesting += 1; + if (!odb->transaction) { + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + } + + odb->transaction->nesting += 1; + + return odb->transaction; } -void flush_odb_transaction(void) +void flush_odb_transaction(struct odb_transaction *transaction) { - flush_batch_fsync(); - flush_bulk_checkin_packfile(&transaction.packfile); + if (!transaction) + return; + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(&transaction->packfile); } -void end_odb_transaction(void) +void end_odb_transaction(struct odb_transaction *transaction) { - transaction.nesting -= 1; - if (transaction.nesting < 0) + if (!transaction || transaction->nesting == 0) BUG("Unbalanced ODB transaction nesting"); - if (transaction.nesting) + transaction->nesting -= 1; + + if (transaction->nesting) return; - flush_odb_transaction(); + flush_odb_transaction(transaction); + transaction->odb->transaction = NULL; + free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 7246ea58dcf348..16254ce6a704f6 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -5,9 +5,13 @@ #define BULK_CHECKIN_H #include "object.h" +#include "odb.h" -void prepare_loose_object_bulk_checkin(void); -void fsync_loose_object_bulk_checkin(int fd, const char *filename); +struct odb_transaction; + +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename); /* * This creates one packfile per large blob unless bulk-checkin @@ -24,8 +28,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename); * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags); /* @@ -35,20 +39,20 @@ int index_blob_bulk_checkin(struct object_id *oid, * and objects are only visible after the outermost transaction * is complete or the transaction is flushed. */ -void begin_odb_transaction(void); +struct odb_transaction *begin_odb_transaction(struct object_database *odb); /* * Make any objects that are currently part of a pending object * database transaction visible. It is valid to call this function * even if no transaction is active. */ -void flush_odb_transaction(void); +void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the * current transaction visible if this is the final nested * transaction. */ -void end_odb_transaction(void); +void end_odb_transaction(struct odb_transaction *transaction); #endif diff --git a/cache-tree.c b/cache-tree.c index 66ef2becbe01a4..d225554eedd920 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct index_state *istate, int flags) { + struct odb_transaction *transaction; int skip, i; i = verify_cache(istate, flags); @@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(); + end_odb_transaction(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 2bc36ab3ee8cbf..1740aa2b2e35f3 100644 --- a/object-file.c +++ b/object-file.c @@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(fd, filename); + fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1263,8 +1263,9 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); else - ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path, - flags); + ret = index_blob_bulk_checkin(the_repository->objects->transaction, + oid, fd, xsize_t(st->st_size), + path, flags); close(fd); return ret; } diff --git a/odb.h b/odb.h index 3dfc66d75a3d20..a89b2143909920 100644 --- a/odb.h +++ b/odb.h @@ -84,6 +84,7 @@ struct odb_source { struct packed_git; struct cached_object_entry; +struct odb_transaction; /* * The object database encapsulates access to objects in a repository. It @@ -94,6 +95,13 @@ struct object_database { /* Repository that owns this database. */ struct repository *repo; + /* + * State of current current object database transaction. Only one + * transaction may be pending at a time. Is NULL when no transaction is + * configured. + */ + struct odb_transaction *transaction; + /* * Set of all object directories; the main directory is first (and * cannot be NULL after initialization). Subsequent directories are diff --git a/read-cache.c b/read-cache.c index 06ad74db2286ae..229b8ef11c9a74 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix, const struct pathspec *pathspec, char *ps_matched, int include_sparse, int flags) { + struct odb_transaction *transaction; struct update_callback_data data; struct rev_info rev; @@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(); + end_odb_transaction(transaction); release_revisions(&rev); return !!data.add_errors; From aa4d81b53311fcdf099400beebad99c14be4b561 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:59 -0500 Subject: [PATCH 03/17] bulk-checkin: require transaction for index_blob_bulk_checkin() The bulk-checkin subsystem provides a mechanism to write blobs directly to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing transaction when invoked, objects written via this function are stored in the same packfile. The packfile is not flushed until the transaction itself is flushed. If there is no transaction, the single object is written to a packfile and immediately flushed. This complicates `index_blob_bulk_checkin()` as it cannot reliably use the provided transaction to get the associated repository. Update `index_blob_bulk_checkin()` to assume that a valid transaction is always provided. Callers are now expected to ensure a transaction is set up beforehand. With this simplification, `deflate_blob_bulk_checkin()` is no longer needed as a standalone internal function and is combined with `index_blob_bulk_checkin()`. The single call site in `object-file.c:index_fd()` is updated accordingly. Due to how `{begin,end}_odb_transaction()` handles nested transactions, a new transaction is only created and committed if there is not already an ongoing transaction. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 27 ++++----------------------- bulk-checkin.h | 7 +++++-- object-file.c | 21 ++++++++++++++------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 53a20a2d92fd77..542d8125a863e2 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -254,11 +254,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state, die_errno("unable to write pack header"); } -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -361,25 +361,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, } } -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags) -{ - int status; - - if (transaction) { - status = deflate_blob_to_pack(&transaction->packfile, oid, fd, - size, path, flags); - } else { - struct bulk_checkin_packfile state = { 0 }; - - status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); - flush_bulk_checkin_packfile(&state); - } - - return status; -} - struct odb_transaction *begin_odb_transaction(struct object_database *odb) { if (!odb->transaction) { diff --git a/bulk-checkin.h b/bulk-checkin.h index 16254ce6a704f6..ac8887f476b496 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, int fd, const char *filename); /* - * This creates one packfile per large blob unless bulk-checkin - * machinery is "plugged". + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting diff --git a/object-file.c b/object-file.c index 1740aa2b2e35f3..bc15af42450949 100644 --- a/object-file.c +++ b/object-file.c @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(the_repository->objects->transaction, + } else { + struct odb_transaction *transaction; + + transaction = begin_odb_transaction(the_repository->objects); + ret = index_blob_bulk_checkin(transaction, oid, fd, xsize_t(st->st_size), path, flags); + end_odb_transaction(transaction); + } + close(fd); return ret; } From ddc0b56ad77d7c86145a6a1774f05f9d11bf2337 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:35:00 -0500 Subject: [PATCH 04/17] bulk-checkin: use repository variable from transaction The bulk-checkin subsystem depends on `the_repository`. Adapt functions and call sites to access the repository through `struct odb_transaction` instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the `pack_compression_level` and `pack_size_limit_cfg` globals are still used. Also adapt functions using packfile state to instead access it through the transaction. This makes some function parameters redundant and go away. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 67 +++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 542d8125a863e2..124c49306769a5 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -38,25 +38,26 @@ struct odb_transaction { struct bulk_checkin_packfile packfile; }; -static void finish_tmp_packfile(struct strbuf *basename, - const char *pack_tmp_name, - struct pack_idx_entry **written_list, - uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, unsigned char hash[]) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; char *idx_tmp_name = NULL; - stage_tmp_packfiles(the_repository, basename, pack_tmp_name, - written_list, nr_written, NULL, pack_idx_opts, hash, - &idx_tmp_name); - rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); free(idx_tmp_name); } -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; @@ -73,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name, + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, state->nr_written, hash, state->offset); close(fd); } - strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository), - hash_to_hex(hash)); - finish_tmp_packfile(&packname, state->pack_tmp_name, - state->written, state->nr_written, - &state->pack_idx_opts, hash); + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); @@ -94,7 +95,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + reprepare_packed_git(repo); } /* @@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction) * to ensure that the data in each new object file is durable before * the final name is visible. */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository)); + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); temp = xmks_tempfile(temp_path.buf); fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); delete_tempfile(&temp); @@ -131,16 +133,17 @@ static void flush_batch_fsync(struct odb_transaction *transaction) transaction->objdir = NULL; } -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) { /* The object may already exist in the repository */ - if (odb_has_object(the_repository->objects, oid, + if (odb_has_object(transaction->odb, oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 1; /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < state->nr_written; i++) - if (oideq(&state->written[i]->oid, oid)) + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) return 1; /* This is a new object we need to keep */ @@ -239,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct bulk_checkin_packfile *state, +static void prepare_to_stream(struct odb_transaction *transaction, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; - state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); reset_pack_idx_option(&state->pack_idx_opts); /* Pretend we are going to write only one object */ @@ -272,21 +277,21 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); - the_hash_algo->init_fn(&ctx); + transaction->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); /* Note: idx is non-NULL when we are writing */ if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -304,7 +309,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(state); + flush_bulk_checkin_packfile(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } @@ -313,7 +318,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, return 0; idx->crc32 = crc32_end(state->f); - if (already_written(state, result_oid)) { + if (already_written(transaction, result_oid)) { hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; free(idx); @@ -338,7 +343,7 @@ void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) if (!transaction || transaction->objdir) return; - transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); if (transaction->objdir) tmp_objdir_replace_primary_odb(transaction->objdir, 0); } @@ -379,7 +384,7 @@ void flush_odb_transaction(struct odb_transaction *transaction) return; flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(&transaction->packfile); + flush_bulk_checkin_packfile(transaction); } void end_odb_transaction(struct odb_transaction *transaction) From bf5c224537532eb61b2c6d88162c9dffd8bcfd96 Mon Sep 17 00:00:00 2001 From: Mikhail Malinouski Date: Wed, 3 Sep 2025 14:50:40 +0000 Subject: [PATCH 05/17] docs: fix typo in worktree.adoc 'extension' The documentation incorrectly referred to the extension without an 's'. This fixes the typo for clarity. Signed-off-by: Mikhail Malinouski Signed-off-by: Junio C Hamano --- Documentation/config/worktree.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/config/worktree.adoc b/Documentation/config/worktree.adoc index 5e35c7d018aecd..9e3f84f748c4aa 100644 --- a/Documentation/config/worktree.adoc +++ b/Documentation/config/worktree.adoc @@ -15,5 +15,5 @@ worktree.useRelativePaths:: different locations or environments. Defaults to "false". + Note that setting `worktree.useRelativePaths` to "true" implies enabling the -`extension.relativeWorktrees` config (see linkgit:git-config[1]), +`extensions.relativeWorktrees` config (see linkgit:git-config[1]), thus making it incompatible with older versions of Git. From a92f5ca0d5c1b27f70a519efba967d613fd48a7a Mon Sep 17 00:00:00 2001 From: Lucas Seiki Oshiro Date: Thu, 4 Sep 2025 10:40:16 -0300 Subject: [PATCH 06/17] repo: add the flag -z as an alias for --format=nul Other Git commands that have nul-terminated output (e.g. git-config, git-status, git-ls-files) have a flag `-z` for using the null character as the record separator. Add the `-z` flag to git-repo-info as an alias for `--format=nul`, making it consistent with the behavior of the other commands. Mentored-by: Karthik Nayak Mentored-by: Patrick Steinhardt Signed-off-by: Lucas Seiki Oshiro Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 6 ++++-- builtin/repo.c | 38 +++++++++++++++++++++++++------------ t/t1900-repo.sh | 12 ++++++++++++ 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 2870828d936192..6f5ee882157c16 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -8,7 +8,7 @@ git-repo - Retrieve information about the repository SYNOPSIS -------- [synopsis] -git repo info [--format=(keyvalue|nul)] [...] +git repo info [--format=(keyvalue|nul)] [-z] [...] DESCRIPTION ----------- @@ -18,7 +18,7 @@ THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. COMMANDS -------- -`info [--format=(keyvalue|nul)] [...]`:: +`info [--format=(keyvalue|nul)] [-z] [...]`:: Retrieve metadata-related information about the current repository. Only the requested data will be returned based on their keys (see "INFO KEYS" section below). @@ -40,6 +40,8 @@ supported: between the key and the value and using a NUL character after each value. This format is better suited for being parsed by another applications than `keyvalue`. Unlike in the `keyvalue` format, the values are never quoted. ++ +`-z` is an alias for `--format=nul`. INFO KEYS --------- diff --git a/builtin/repo.c b/builtin/repo.c index 8c6e7f42aba107..dc9a2674694667 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -9,7 +9,7 @@ #include "shallow.h" static const char *const repo_usage[] = { - "git repo info [--format=(keyvalue|nul)] [...]", + "git repo info [--format=(keyvalue|nul)] [-z] [...]", NULL }; @@ -112,26 +112,40 @@ static int print_fields(int argc, const char **argv, return ret; } +static int parse_format_cb(const struct option *opt, + const char *arg, int unset UNUSED) +{ + enum output_format *format = opt->value; + + if (opt->short_name == 'z') + *format = FORMAT_NUL_TERMINATED; + else if (!strcmp(arg, "nul")) + *format = FORMAT_NUL_TERMINATED; + else if (!strcmp(arg, "keyvalue")) + *format = FORMAT_KEYVALUE; + else + die(_("invalid format '%s'"), arg); + + return 0; +} + static int repo_info(int argc, const char **argv, const char *prefix, struct repository *repo) { - const char *format_str = "keyvalue"; - enum output_format format; + enum output_format format = FORMAT_KEYVALUE; struct option options[] = { - OPT_STRING(0, "format", &format_str, N_("format"), - N_("output format")), + OPT_CALLBACK_F(0, "format", &format, N_("format"), + N_("output format"), + PARSE_OPT_NONEG, parse_format_cb), + OPT_CALLBACK_F('z', NULL, &format, NULL, + N_("synonym for --format=nul"), + PARSE_OPT_NONEG | PARSE_OPT_NOARG, + parse_format_cb), OPT_END() }; argc = parse_options(argc, argv, prefix, options, repo_usage, 0); - if (!strcmp(format_str, "keyvalue")) - format = FORMAT_KEYVALUE; - else if (!strcmp(format_str, "nul")) - format = FORMAT_NUL_TERMINATED; - else - die(_("invalid format '%s'"), format_str); - return print_fields(argc, argv, repo, format); } diff --git a/t/t1900-repo.sh b/t/t1900-repo.sh index a69c715357e26b..ddf788d5a26318 100755 --- a/t/t1900-repo.sh +++ b/t/t1900-repo.sh @@ -92,4 +92,16 @@ test_expect_success 'git-repo-info aborts when requesting an invalid format' ' test_cmp expect actual ' +test_expect_success '-z uses nul-terminated format' ' + printf "layout.bare\nfalse\0layout.shallow\nfalse\0" >expected && + git repo info -z layout.bare layout.shallow >actual && + test_cmp expected actual +' + +test_expect_success 'git repo info uses the last requested format' ' + echo "layout.bare=false" >expected && + git repo info --format=nul -z --format=keyvalue layout.bare >actual && + test_cmp expected actual +' + test_done From c2e3713334aa822683f046bbad7905ec8510d38b Mon Sep 17 00:00:00 2001 From: Lucas Seiki Oshiro Date: Thu, 4 Sep 2025 10:40:17 -0300 Subject: [PATCH 07/17] repo: add the field objects.format The flag `--show-object-format` from git-rev-parse is used for retrieving the object storage format. This way, it is used for querying repository metadata, fitting in the purpose of git-repo-info. Add a new field `objects.format` to the git-repo-info subcommand containing that information. Mentored-by: Karthik Nayak Mentored-by: Patrick Steinhardt Signed-off-by: Lucas Seiki Oshiro Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 3 +++ builtin/repo.c | 7 +++++++ t/t1900-repo.sh | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 6f5ee882157c16..209afd1b6152be 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -55,6 +55,9 @@ values that they return: `layout.shallow`:: `true` if this is a shallow repository, otherwise `false`. +`object.format`:: + The object format (hash algorithm) used in the repository. + `references.format`:: The reference storage format. The valid values are: + diff --git a/builtin/repo.c b/builtin/repo.c index dc9a2674694667..bbb0966f2d2284 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -38,6 +38,12 @@ static int get_layout_shallow(struct repository *repo, struct strbuf *buf) return 0; } +static int get_object_format(struct repository *repo, struct strbuf *buf) +{ + strbuf_addstr(buf, repo->hash_algo->name); + return 0; +} + static int get_references_format(struct repository *repo, struct strbuf *buf) { strbuf_addstr(buf, @@ -49,6 +55,7 @@ static int get_references_format(struct repository *repo, struct strbuf *buf) static const struct field repo_info_fields[] = { { "layout.bare", get_layout_bare }, { "layout.shallow", get_layout_shallow }, + { "object.format", get_object_format }, { "references.format", get_references_format }, }; diff --git a/t/t1900-repo.sh b/t/t1900-repo.sh index ddf788d5a26318..2beba67889af25 100755 --- a/t/t1900-repo.sh +++ b/t/t1900-repo.sh @@ -63,6 +63,12 @@ test_expect_success 'setup remote' ' test_repo_info 'shallow repository = true is retrieved correctly' \ 'git clone --depth 1 "file://$PWD/remote"' 'shallow' 'layout.shallow' 'true' +test_repo_info 'object.format = sha1 is retrieved correctly' \ + 'git init --object-format=sha1' 'sha1' 'object.format' 'sha1' + +test_repo_info 'object.format = sha256 is retrieved correctly' \ + 'git init --object-format=sha256' 'sha256' 'object.format' 'sha256' + test_expect_success 'values returned in order requested' ' cat >expect <<-\EOF && layout.bare=false From 069c15d256ed5308d09a7a16d64c8af2416bed58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 4 Sep 2025 19:58:25 +0200 Subject: [PATCH 08/17] object-name: declare pointer type of extend_abbrev_len()'s 2nd parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the expected type of the second parameter of extend_abbrev_len() instead of casting a void pointer internally. Just a single caller passes in a void pointer, the rest pass the correct type. Let the compiler help keeping it that way. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- object-name.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/object-name.c b/object-name.c index 88d1313028cf0b..6c6c38d3ccaad1 100644 --- a/object-name.c +++ b/object-name.c @@ -691,10 +691,9 @@ static inline char get_hex_char_from_oid(const struct object_id *oid, return hex[oid->hash[pos >> 1] & 0xf]; } -static int extend_abbrev_len(const struct object_id *oid, void *cb_data) +static int extend_abbrev_len(const struct object_id *oid, + struct min_abbrev_data *mad) { - struct min_abbrev_data *mad = cb_data; - unsigned int i = mad->init_len; while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i)) i++; From c9388d9012a71c6ef83ec94bd72eeb93f53caee4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 5 Sep 2025 19:26:13 +0000 Subject: [PATCH 09/17] midx-write: only load initialized packs The fill_packs_from_midx() method was refactored in fcb2205b77 (midx: implement support for writing incremental MIDX chains, 2024-08-06) to allow for preferred packfiles and incremental multi-pack-indexes. However, this led to some conditions that can cause improperly initialized memory in the context's list of packfiles. The conditions caring about the preferred pack name or the incremental flag are currently necessary to load a packfile. But the context is still being populated with pack_info structs based on the packfile array for the existing multi-pack-index even if prepare_midx_pack() isn't called. Add a new test that breaks under --stress when compiled with SANITIZE=address. The chosen number of 100 packfiles was selected to get the --stress output to fail about 50% of the time, while 50 packfiles could not get a failure in most --stress runs. The test case is marked as EXPENSIVE not only because of the number of packfiles it creates, but because some CI environments were reporting errors during the test that I could not reproduce, specifically around being unable to open the packfiles or their pack-indexes. When it fails under SANITIZE=address, it provides the following error: AddressSanitizer:DEADLYSIGNAL ================================================================= ==3263517==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000027 ==3263517==The signal is caused by a READ memory access. ==3263517==Hint: address points to the zero page. #0 0x562d5d82d1fb in close_pack_windows packfile.c:299 #1 0x562d5d82d3ab in close_pack packfile.c:354 #2 0x562d5d7bfdb4 in write_midx_internal midx-write.c:1490 #3 0x562d5d7c7aec in midx_repack midx-write.c:1795 #4 0x562d5d46fff6 in cmd_multi_pack_index builtin/multi-pack-index.c:305 ... This failure stack trace is disconnected from the real fix because the bad pointers are accessed later when closing the packfiles from the context. There are a few different aspects to this fix that are worth noting: 1. We return to the previous behavior of fill_packs_from_midx to not rely on the incremental flag or existence of a preferred pack. 2. The behavior to scan all layers of an incremental midx is kept, so this is not a full revert of the change. 3. We skip allocating more room in the pack_info array if the pack fails prepare_midx_pack(). 4. The method has always returned 0 for success and 1 for failure, but the condition checking for error added a check for a negative result for failure, so that is now updated. 5. The call to open_pack_index() is removed, but this is needed later in the case of a preferred pack. That call is moved to immediately before its result is needed (checking for the object count). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx-write.c | 46 +++++++++++++++---------------------- t/t5319-multi-pack-index.sh | 17 ++++++++++++++ 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/midx-write.c b/midx-write.c index ba4a94950a8314..8203b50da1e290 100644 --- a/midx-write.c +++ b/midx-write.c @@ -938,8 +938,7 @@ static struct multi_pack_index *lookup_multi_pack_index(struct repository *r, return result; } -static int fill_packs_from_midx(struct write_midx_context *ctx, - const char *preferred_pack_name, uint32_t flags) +static int fill_packs_from_midx(struct write_midx_context *ctx) { struct multi_pack_index *m; @@ -947,30 +946,11 @@ static int fill_packs_from_midx(struct write_midx_context *ctx, uint32_t i; for (i = 0; i < m->num_packs; i++) { - ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - - /* - * If generating a reverse index, need to have - * packed_git's loaded to compare their - * mtimes and object count. - * - * If a preferred pack is specified, need to - * have packed_git's loaded to ensure the chosen - * preferred pack has a non-zero object count. - */ - if (flags & MIDX_WRITE_REV_INDEX || - preferred_pack_name) { - if (prepare_midx_pack(ctx->repo, m, - m->num_packs_in_base + i)) { - error(_("could not load pack")); - return 1; - } - - if (open_pack_index(m->packs[i])) - die(_("could not open index for %s"), - m->packs[i]->pack_name); - } + if (prepare_midx_pack(ctx->repo, m, + m->num_packs_in_base + i)) + return error(_("could not load pack")); + ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], m->pack_names[i], m->num_packs_in_base + i); @@ -1141,8 +1121,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, ctx.num_multi_pack_indexes_before++; m = m->base_midx; } - } else if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name, - flags) < 0) { + } else if (ctx.m && fill_packs_from_midx(&ctx)) { goto cleanup; } @@ -1204,6 +1183,13 @@ static int write_midx_internal(struct repository *r, const char *object_dir, struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p; ctx.preferred_pack_idx = 0; + /* + * Attempt opening the pack index to populate num_objects. + * Ignore failiures as they can be expected and are not + * fatal during this selection time. + */ + open_pack_index(oldest); + if (packs_to_drop && packs_to_drop->nr) BUG("cannot write a MIDX bitmap during expiration"); @@ -1218,6 +1204,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (!oldest->num_objects || p->mtime < oldest->mtime) { oldest = p; + open_pack_index(oldest); ctx.preferred_pack_idx = i; } } @@ -1241,6 +1228,11 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (ctx.preferred_pack_idx > -1) { struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; + + if (open_pack_index(preferred)) + die(_("failed to open preferred pack %s"), + ctx.info[ctx.preferred_pack_idx].pack_name); + if (!preferred->num_objects) { error(_("cannot select preferred pack %s with no objects"), preferred->pack_name); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index bd75dea9501ed7..49705c62a25202 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -989,6 +989,23 @@ test_expect_success 'repack --batch-size=0 repacks everything' ' ) ' +test_expect_success EXPENSIVE 'repack/expire with many packs' ' + cp -r dup many && + ( + cd many && + + for i in $(test_seq 1 100) + do + test_commit extra$i && + git maintenance run --task=loose-objects || return 1 + done && + + git multi-pack-index write && + git multi-pack-index repack && + git multi-pack-index expire + ) +' + test_expect_success 'repack --batch-size= repacks everything' ' ( cd dup2 && From 3a45c7beb0f66bad122a1c319c71add5533e1f00 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 5 Sep 2025 19:26:14 +0000 Subject: [PATCH 10/17] midx-write: put failing response value back This instance of setting the result to 1 before going to cleanup was accidentally removed in fcb2205b77 (midx: implement support for writing incremental MIDX chains, 2024-08-06). Build upon a test that already deletes a packfile to verify that this error propagates to full command failure. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx-write.c | 1 + t/t5319-multi-pack-index.sh | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/midx-write.c b/midx-write.c index 8203b50da1e290..4c8af23861afcd 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1122,6 +1122,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, m = m->base_midx; } } else if (ctx.m && fill_packs_from_midx(&ctx)) { + result = 1; goto cleanup; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 49705c62a25202..2c22fdb9317d38 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1100,7 +1100,10 @@ test_expect_success 'load reverse index when missing .idx, .pack' ' mv $idx.bak $idx && mv $pack $pack.bak && - git cat-file --batch-check="%(objectsize:disk)" err && + test_grep "could not load pack" err ) ' From 9c2262d65dee8c1e3656b01f7db0660181902d2a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 5 Sep 2025 19:26:15 +0000 Subject: [PATCH 11/17] midx-write: use cleanup when incremental midx fails The incremental mode of writing a multi-pack-index has a few extra conditions that could lead to failure, but these are currently short-ciruiting with 'return -1' instead of setting the method's 'result' variable and going to the cleanup tag. Replace these returns with gotos to avoid memory issues when exiting early due to error conditions. Unfortunately, these error conditions are difficult to reproduce with test cases, which is perhaps one reason why the memory loss was not caught by existing test cases in memory tracking modes. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx-write.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/midx-write.c b/midx-write.c index 4c8af23861afcd..e8864386d8d84e 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1345,13 +1345,15 @@ static int write_midx_internal(struct repository *r, const char *object_dir, incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { error(_("unable to create temporary MIDX layer")); - return -1; + result = -1; + goto cleanup; } if (adjust_shared_perm(r, get_tempfile_path(incr))) { error(_("unable to adjust shared permissions for '%s'"), get_tempfile_path(incr)); - return -1; + result = -1; + goto cleanup; } f = hashfd(r->hash_algo, get_tempfile_fd(incr), @@ -1451,18 +1453,22 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); - return -1; + result = -1; + goto cleanup; } - if (link_midx_to_chain(ctx.base_midx) < 0) - return -1; + if (link_midx_to_chain(ctx.base_midx) < 0) { + result = -1; + goto cleanup; + } get_split_midx_filename_ext(r->hash_algo, &final_midx_name, object_dir, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { error_errno(_("unable to rename new multi-pack-index layer")); - return -1; + result = -1; + goto cleanup; } strbuf_release(&final_midx_name); From 68383ac9d4c90c9b3c7591aa0118d72489599b11 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 5 Sep 2025 19:26:16 +0000 Subject: [PATCH 12/17] midx-write: use uint32_t for preferred_pack_idx midx-write.c has the DISABLE_SIGN_COMPARE_WARNINGS macro defined for a few reasons, but the biggest one is the use of a signed preferred_pack_idx member inside the write_midx_context struct. The code currently uses -1 to indicate an unset preferred pack but pack int ids are normally handled as uint32_t. There are also a few loops that search for the preferred pack by name and those iterators will need updates to uint32_t in the next change. For now, replace the use of -1 with a 'NO_PREFERRED_PACK' macro and an equality check. The macro stores the max value of a uint32_t, so we cannot store a preferred pack that appears last in a list of 2^32 total packs, but that's expected to be unreasonable already. Furthermore, with this change we end up extending the range from 2^31 possible packs to 2^32-1. There are some careful things to worry about with initializing the preferred pack in the struct and using that value when searching for a preferred pack that was already incorrect but accidentally working when the index was initialized to zero. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx-write.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/midx-write.c b/midx-write.c index e8864386d8d84e..fd1803c3ec72d8 100644 --- a/midx-write.c +++ b/midx-write.c @@ -24,6 +24,7 @@ #define BITMAP_POS_UNKNOWN (~((uint32_t)0)) #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) +#define NO_PREFERRED_PACK (~((uint32_t)0)) extern int midx_checksum_valid(struct multi_pack_index *m); extern void clear_midx_files_ext(const char *object_dir, const char *ext, @@ -104,7 +105,7 @@ struct write_midx_context { unsigned large_offsets_needed:1; uint32_t num_large_offsets; - int preferred_pack_idx; + uint32_t preferred_pack_idx; int incremental; uint32_t num_multi_pack_indexes_before; @@ -260,7 +261,7 @@ static void midx_fanout_sort(struct midx_fanout *fanout) static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, struct multi_pack_index *m, uint32_t cur_fanout, - int preferred_pack) + uint32_t preferred_pack) { uint32_t start = m->num_objects_in_base, end; uint32_t cur_object; @@ -274,7 +275,7 @@ static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, end = m->num_objects_in_base + ntohl(m->chunk_oid_fanout[cur_fanout]); for (cur_object = start; cur_object < end; cur_object++) { - if ((preferred_pack > -1) && + if ((preferred_pack != NO_PREFERRED_PACK) && (preferred_pack == nth_midxed_pack_int_id(m, cur_object))) { /* * Objects from preferred packs are added @@ -364,7 +365,8 @@ static void compute_sorted_entries(struct write_midx_context *ctx, preferred, cur_fanout); } - if (-1 < ctx->preferred_pack_idx && ctx->preferred_pack_idx < start_pack) + if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && + ctx->preferred_pack_idx < start_pack) midx_fanout_add_pack_fanout(&fanout, ctx->info, ctx->preferred_pack_idx, 1, cur_fanout); @@ -1058,7 +1060,9 @@ static int write_midx_internal(struct repository *r, const char *object_dir, struct hashfile *f = NULL; struct lock_file lk; struct tempfile *incr; - struct write_midx_context ctx = { 0 }; + struct write_midx_context ctx = { + .preferred_pack_idx = NO_PREFERRED_PACK, + }; int bitmapped_packs_concat_len = 0; int pack_name_concat_len = 0; int dropped_packs = 0; @@ -1166,7 +1170,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, goto cleanup; /* nothing to do */ if (preferred_pack_name) { - ctx.preferred_pack_idx = -1; + ctx.preferred_pack_idx = NO_PREFERRED_PACK; for (i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, @@ -1176,12 +1180,12 @@ static int write_midx_internal(struct repository *r, const char *object_dir, } } - if (ctx.preferred_pack_idx == -1) + if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) warning(_("unknown preferred pack: '%s'"), preferred_pack_name); } else if (ctx.nr && (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { - struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p; + struct packed_git *oldest = ctx.info[0].p; ctx.preferred_pack_idx = 0; /* @@ -1217,17 +1221,17 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * objects to resolve, so the preferred value doesn't * matter. */ - ctx.preferred_pack_idx = -1; + ctx.preferred_pack_idx = NO_PREFERRED_PACK; } } else { /* * otherwise don't mark any pack as preferred to avoid * interfering with expiration logic below */ - ctx.preferred_pack_idx = -1; + ctx.preferred_pack_idx = NO_PREFERRED_PACK; } - if (ctx.preferred_pack_idx > -1) { + if (ctx.preferred_pack_idx != NO_PREFERRED_PACK) { struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; if (open_pack_index(preferred)) From 1f2bc6be1dbbda5bc40a23bd58a60bbee9de7401 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 5 Sep 2025 19:26:17 +0000 Subject: [PATCH 13/17] midx-write: reenable signed comparison errors Remove the remaining signed comparison warnings in midx-write.c so that they can be enforced as errors in the future. After the previous change, the remaining errors are due to iterator variables named 'i'. The strategy here involves defining the variable within the for loop syntax to make sure we use the appropriate bitness for the loop sentinel. This matters in at least one method where the variable was compared to uint32_t in some loops and size_t in others. While adjusting these loops, there were some where the loop boundary was checking against a uint32_t value _plus one_. These were replaced with non-strict comparisons, but also the value is checked to not be UINT32_MAX. Since the value is the number of incremental multi-pack- indexes, this is not a meaningful restriction. The new die() is about defensive programming more than it being realistically possible. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx-write.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/midx-write.c b/midx-write.c index fd1803c3ec72d8..9b386911612649 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1,5 +1,3 @@ -#define DISABLE_SIGN_COMPARE_WARNINGS - #include "git-compat-util.h" #include "abspath.h" #include "config.h" @@ -845,7 +843,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, uint32_t commits_nr, unsigned flags) { - int ret, i; + int ret; uint16_t options = 0; struct bitmap_writer writer; struct pack_idx_entry **index; @@ -873,7 +871,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, * this order). */ ALLOC_ARRAY(index, pdata->nr_objects); - for (i = 0; i < pdata->nr_objects; i++) + for (uint32_t i = 0; i < pdata->nr_objects; i++) index[i] = &pdata->objects[i].idx; bitmap_writer_init(&writer, ctx->repo, pdata, @@ -894,7 +892,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, * happens between bitmap_writer_build_type_index() and * bitmap_writer_finish(). */ - for (i = 0; i < pdata->nr_objects; i++) + for (uint32_t i = 0; i < pdata->nr_objects; i++) index[ctx->pack_order[i]] = &pdata->objects[i].idx; bitmap_writer_select_commits(&writer, commits, commits_nr); @@ -1056,7 +1054,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, { struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; - uint32_t i, start_pack; + uint32_t start_pack; struct hashfile *f = NULL; struct lock_file lk; struct tempfile *incr; @@ -1172,7 +1170,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; @@ -1204,7 +1202,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * pack-order has all of its objects selected from that pack * (and not another pack containing a duplicate) */ - for (i = 1; i < ctx.nr; i++) { + for (size_t i = 1; i < ctx.nr; i++) { struct packed_git *p = ctx.info[i].p; if (!oldest->num_objects || p->mtime < oldest->mtime) { @@ -1249,7 +1247,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, compute_sorted_entries(&ctx, start_pack); ctx.large_offsets_needed = 0; - for (i = 0; i < ctx.entries_nr; i++) { + for (size_t i = 0; i < ctx.entries_nr; i++) { if (ctx.entries[i].offset > 0x7fffffff) ctx.num_large_offsets++; if (ctx.entries[i].offset > 0xffffffff) @@ -1259,10 +1257,10 @@ static int write_midx_internal(struct repository *r, const char *object_dir, QSORT(ctx.info, ctx.nr, pack_info_compare); if (packs_to_drop && packs_to_drop->nr) { - int drop_index = 0; + size_t drop_index = 0; int missing_drops = 0; - for (i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { + for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, packs_to_drop->items[drop_index].string); @@ -1293,7 +1291,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * pack_perm[old_id] = new_id */ ALLOC_ARRAY(ctx.pack_perm, ctx.nr); - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (ctx.info[i].expired) { dropped_packs++; ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; @@ -1302,7 +1300,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, } } - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (ctx.info[i].expired) continue; pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; @@ -1448,6 +1446,9 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * have been freed in the previous if block. */ + if (ctx.num_multi_pack_indexes_before == UINT32_MAX) + die(_("too many multi-pack-indexes")); + CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); if (ctx.incremental) { @@ -1480,7 +1481,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, keep_hashes[ctx.num_multi_pack_indexes_before] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); - for (i = 0; i < ctx.num_multi_pack_indexes_before; i++) { + for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), @@ -1488,7 +1489,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, m = m->base_midx; } - for (i = 0; i < ctx.num_multi_pack_indexes_before + 1; i++) + for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = @@ -1506,7 +1507,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, ctx.incremental); cleanup: - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (ctx.info[i].p) { close_pack(ctx.info[i].p); free(ctx.info[i].p); @@ -1519,7 +1520,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, free(ctx.pack_perm); free(ctx.pack_order); if (keep_hashes) { - for (i = 0; i < ctx.num_multi_pack_indexes_before + 1; i++) + for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) free((char *)keep_hashes[i]); free(keep_hashes); } From c25651aefdf583ebb2dbb88437337947372de8f3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 5 Sep 2025 19:26:18 +0000 Subject: [PATCH 14/17] midx-write: simplify error cases The write_midx_internal() method uses gotos to jump to a cleanup section to clear memory before returning 'result'. Since these jumps are more common for error conditions, initialize 'result' to -1 and then only set it to 0 before returning with success. There are a couple places where we return with success via a jump. This has the added benefit that the method now returns -1 on error instead of an inconsistent 1 or -1. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx-write.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/midx-write.c b/midx-write.c index 9b386911612649..b7824d21fc1854 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1064,7 +1064,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, int bitmapped_packs_concat_len = 0; int pack_name_concat_len = 0; int dropped_packs = 0; - int result = 0; + int result = -1; const char **keep_hashes = NULL; struct chunkfile *cf; @@ -1117,14 +1117,12 @@ static int write_midx_internal(struct repository *r, const char *object_dir, error(_("could not load reverse index for MIDX %s"), hash_to_hex_algop(get_midx_checksum(m), m->repo->hash_algo)); - result = 1; goto cleanup; } ctx.num_multi_pack_indexes_before++; m = m->base_midx; } } else if (ctx.m && fill_packs_from_midx(&ctx)) { - result = 1; goto cleanup; } @@ -1160,12 +1158,16 @@ static int write_midx_internal(struct repository *r, const char *object_dir, */ if (!want_bitmap) clear_midx_files_ext(object_dir, "bitmap", NULL); + + result = 0; goto cleanup; } } - if (ctx.incremental && !ctx.nr) + if (ctx.incremental && !ctx.nr) { + result = 0; goto cleanup; /* nothing to do */ + } if (preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; @@ -1239,7 +1241,6 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (!preferred->num_objects) { error(_("cannot select preferred pack %s with no objects"), preferred->pack_name); - result = 1; goto cleanup; } } @@ -1278,10 +1279,8 @@ static int write_midx_internal(struct repository *r, const char *object_dir, } } - if (missing_drops) { - result = 1; + if (missing_drops) goto cleanup; - } } /* @@ -1327,7 +1326,6 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (ctx.nr - dropped_packs == 0) { error(_("no pack files to index.")); - result = 1; goto cleanup; } @@ -1347,14 +1345,12 @@ static int write_midx_internal(struct repository *r, const char *object_dir, incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { error(_("unable to create temporary MIDX layer")); - result = -1; goto cleanup; } if (adjust_shared_perm(r, get_tempfile_path(incr))) { error(_("unable to adjust shared permissions for '%s'"), get_tempfile_path(incr)); - result = -1; goto cleanup; } @@ -1432,7 +1428,6 @@ static int write_midx_internal(struct repository *r, const char *object_dir, midx_hash, &pdata, commits, commits_nr, flags) < 0) { error(_("could not write multi-pack bitmap")); - result = 1; clear_packing_data(&pdata); free(commits); goto cleanup; @@ -1458,21 +1453,17 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); - result = -1; goto cleanup; } - if (link_midx_to_chain(ctx.base_midx) < 0) { - result = -1; + if (link_midx_to_chain(ctx.base_midx) < 0) goto cleanup; - } get_split_midx_filename_ext(r->hash_algo, &final_midx_name, object_dir, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { error_errno(_("unable to rename new multi-pack-index layer")); - result = -1; goto cleanup; } @@ -1505,6 +1496,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, clear_midx_files(r, object_dir, keep_hashes, ctx.num_multi_pack_indexes_before + 1, ctx.incremental); + result = 0; cleanup: for (size_t i = 0; i < ctx.nr; i++) { From 7a57fb1a597eb2fa281c00cac94863ad0d7d7f6c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 5 Sep 2025 08:18:01 +0200 Subject: [PATCH 15/17] t5530: modernize tests Refactor tests to follow modern best practices: - Merge together tests that set up and verify a single use case. - Drop empty newlines at the beginning and end of test bodies. - Don't change directories in the main test body. - Remove an unused `D` variable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t5530-upload-pack-error.sh | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh index 558eedf25a4c9b..8e505786f1b76b 100755 --- a/t/t5530-upload-pack-error.sh +++ b/t/t5530-upload-pack-error.sh @@ -4,8 +4,6 @@ test_description='errors in upload-pack' . ./test-lib.sh -D=$(pwd) - corrupt_repo () { object_sha1=$(git rev-parse "$1") && ob=$(expr "$object_sha1" : "\(..\)") && @@ -21,11 +19,7 @@ test_expect_success 'setup and corrupt repository' ' test_tick && echo changed >file && git commit -a -m changed && - corrupt_repo HEAD:file - -' - -test_expect_success 'fsck fails' ' + corrupt_repo HEAD:file && test_must_fail git fsck ' @@ -40,17 +34,12 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' ' ' test_expect_success 'corrupt repo differently' ' - git hash-object -w file && - corrupt_repo HEAD^^{tree} - -' - -test_expect_success 'fsck fails' ' + corrupt_repo HEAD^^{tree} && test_must_fail git fsck ' -test_expect_success 'upload-pack fails due to error in rev-list' ' +test_expect_success 'upload-pack fails due to error in rev-list' ' printf "%04xwant %s\n%04xshallow %s00000009done\n0000" \ $(($hexsz + 10)) $(git rev-parse HEAD) \ $(($hexsz + 12)) $(git rev-parse HEAD^) >input && @@ -59,7 +48,6 @@ test_expect_success 'upload-pack fails due to error in rev-list' ' ' test_expect_success 'upload-pack fails due to bad want (no object)' ' - printf "%04xwant %s multi_ack_detailed\n00000009done\n0000" \ $(($hexsz + 29)) $(test_oid deadbeef) >input && test_must_fail git upload-pack . output 2>output.err && @@ -69,7 +57,6 @@ test_expect_success 'upload-pack fails due to bad want (no object)' ' ' test_expect_success 'upload-pack fails due to bad want (not tip)' ' - oid=$(echo an object we have | git hash-object -w --stdin) && printf "%04xwant %s multi_ack_detailed\n00000009done\n0000" \ $(($hexsz + 29)) "$oid" >input && @@ -80,7 +67,6 @@ test_expect_success 'upload-pack fails due to bad want (not tip)' ' ' test_expect_success 'upload-pack fails due to error in pack-objects enumeration' ' - printf "%04xwant %s\n00000009done\n0000" \ $((hexsz + 10)) $(git rev-parse HEAD) >input && test_must_fail git upload-pack . /dev/null 2>output.err && @@ -105,18 +91,9 @@ test_expect_success 'upload-pack tolerates EOF just after stateless client wants test_cmp expect actual ' -test_expect_success 'create empty repository' ' - - mkdir foo && - cd foo && - git init - -' - test_expect_success 'fetch fails' ' - - test_must_fail git fetch .. main - + git init foo && + test_must_fail git -C foo fetch .. main ' test_done From 88a2dc68c8c9a2cf04c6d6d52e4ac3b26788e273 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 5 Sep 2025 08:18:02 +0200 Subject: [PATCH 16/17] upload-pack: don't ACK non-commits repeatedly in protocol v2 When a client performs a fetch or clone they can optionally send "have" lines to tell the server which objects they already have available locally. These object IDs are stored by the server in an object array so that it can remember any objects it doesn't have to include in the pack sent to the client. While there isn't any reason to do so, clients are free to send the same "have" line repeatedly. git-upload-pack(1) already knows to handle this well: every commit it has seen via a "have" line gets marked with the `THEY_HAVE` flag, and if such a commit is seen repeatedly we know to not process it another time. This also has the effect that we only store the object ID once, only, in the `have_obj` array. There is an edge case though: if the client sends an object ID that does not refer to a commit we neither store nor check the `THEY_HAVE` flag. This means that we repeatedly store the same object ID in our `have_obj` array, with two consequences: - In protocol v2 we deduplicate ACKs for commits, but not for any other objects as we send ACKs for every object ID in the `have_obj` array. - The `have_obj` array can grow in size indefinitely with both protocols. The potentially-more-serious issue is the second one, as we basically have a way for an adversary to allocate arbitrarily large buffers now. Ultimately, this doesn't seem to be all that serious though: on my machine, the growth of that array is at around 4MB/s, and after roughly five minutes I was only at 1GB RSS. So this is concerning, but only mildly so. Fix this bug by storing the `THEY_HAVE` flag independent of the object type so that we don't store duplicate object IDs in `have_obj` anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t5530-upload-pack-error.sh | 39 ++++++++++++++++++++++++++++++++++++ upload-pack.c | 19 +++++++++--------- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh index 8e505786f1b76b..d40292cfb7b48f 100755 --- a/t/t5530-upload-pack-error.sh +++ b/t/t5530-upload-pack-error.sh @@ -96,4 +96,43 @@ test_expect_success 'fetch fails' ' test_must_fail git -C foo fetch .. main ' +test_expect_success 'upload-pack ACKs repeated non-commit objects repeatedly (protocol v0)' ' + commit_id=$(git rev-parse HEAD) && + tree_id=$(git rev-parse HEAD^{tree}) && + test-tool pkt-line pack >request <<-EOF && + want $commit_id + 0000 + have $tree_id + have $tree_id + 0000 + EOF + git upload-pack --stateless-rpc . actual && + depacketize actual.raw && + grep ^ACK actual.raw >actual.acks && + cat >expect <<-EOF && + ACK $tree_id + ACK $tree_id + EOF + test_cmp expect actual.acks +' + +test_expect_success 'upload-pack ACKs repeated non-commit objects once only (protocol v2)' ' + commit_id=$(git rev-parse HEAD) && + tree_id=$(git rev-parse HEAD^{tree}) && + test-tool pkt-line pack >request <<-EOF && + command=fetch + object-format=$(test_oid algo) + 0001 + want $commit_id + have $tree_id + have $tree_id + 0000 + EOF + GIT_PROTOCOL=version=2 git upload-pack . actual && + depacketize actual.raw && + grep ^ACK actual.raw >actual.acks && + echo "ACK $tree_id" >expect && + test_cmp expect actual.acks +' + test_done diff --git a/upload-pack.c b/upload-pack.c index 26f29b85b551c1..037e10e8fa5f24 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -476,20 +476,17 @@ static void create_pack_file(struct upload_pack_data *pack_data, static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid) { - int we_knew_they_have = 0; struct object *o = parse_object_with_flags(the_repository, oid, PARSE_OBJECT_SKIP_HASH_CHECK | PARSE_OBJECT_DISCARD_TREE); if (!o) die("oops (%s)", oid_to_hex(oid)); + if (o->type == OBJ_COMMIT) { struct commit_list *parents; struct commit *commit = (struct commit *)o; - if (o->flags & THEY_HAVE) - we_knew_they_have = 1; - else - o->flags |= THEY_HAVE; + if (!data->oldest_have || (commit->date < data->oldest_have)) data->oldest_have = commit->date; for (parents = commit->parents; @@ -497,11 +494,13 @@ static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid parents = parents->next) parents->item->object.flags |= THEY_HAVE; } - if (!we_knew_they_have) { - add_object_array(o, NULL, &data->have_obj); - return 1; - } - return 0; + + if (o->flags & THEY_HAVE) + return 0; + o->flags |= THEY_HAVE; + + add_object_array(o, NULL, &data->have_obj); + return 1; } static int got_oid(struct upload_pack_data *data, From a483264b01b977f3e65a4419103c21e6af7412a2 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 15 Sep 2025 08:51:09 -0700 Subject: [PATCH 17/17] The ninth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index 6f8579bdeed781..959c8afe15f8a8 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -20,6 +20,10 @@ UI, Workflows & Features * "git refs exists" that works like "git show-ref --exists" has been added. + * "repo info" learns a short-hand option "-z" that is the same as + "--format=nul", and learns to report the objects format used in the + repository. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -55,6 +59,10 @@ Performance, Internal Implementation, Development Support etc. which items are still on the queue (an unacceptable alternative is to reserve one object flag bits). + * The bulk-checkin code used to depend on a file-scope static + singleton variable, which has been updated to pass an instance + throughout the callchain. + Fixes since v2.51 ----------------- @@ -164,6 +172,11 @@ including security updates, are included in this release. which has been disabled in Gitlab CI. (merge 608cf5b793 ps/gitlab-ci-disable-windows-monitoring later to maint). + * A broken or malicious "git fetch" can say that it has the same + object for many many times, and the upload-pack serving it can + exhaust memory storing them redundantly, which has been corrected. + (merge 88a2dc68c8 ps/upload-pack-oom-protection later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 823d537fa7 kh/doc-git-log-markup-fix later to maint). (merge cf7efa4f33 rj/t6137-cygwin-fix later to maint). @@ -182,3 +195,6 @@ including security updates, are included in this release. (merge 2f4bf83ffc km/alias-doc-markup-fix later to maint). (merge b0d97aac19 kh/doc-markup-fixes later to maint). (merge f9a6705d9a tc/t0450-harden later to maint). + (merge c25651aefd ds/midx-write-fixes later to maint). + (merge 069c15d256 rs/object-name-extend-abbrev-len-update later to maint). + (merge bf5c224537 mm/worktree-doc-typofix later to maint).