From e1d062e8ba0b72f49e9ef9713cc7011c330baab8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 10 Sep 2025 15:12:17 +0200 Subject: [PATCH 01/24] odb: drop deprecated wrapper functions In the Git 2.51 release cycle we've refactored the object database layer to access objects via `struct object_database` directly. To make the transition a bit easier we have retained some of the old-style functions in case those were widely used. Now that Git 2.51 has been released it's time to clean up though and drop these old wrappers. Do so and adapt the small number of newly added users to use the new functions instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 6 +++--- odb.h | 33 --------------------------------- t/helper/test-pack-deltas.c | 10 ++++------ 3 files changed, 7 insertions(+), 42 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 53a225625039ea..ff6900b6545a24 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3774,7 +3774,7 @@ static void show_object_pack_hint(struct object *object, const char *name, enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; if (mode == STDIN_PACKS_MODE_FOLLOW) { if (object->type == OBJ_BLOB && - !has_object(the_repository, &object->oid, 0)) + !odb_has_object(the_repository->objects, &object->oid, 0)) return; add_object_entry(&object->oid, object->type, name, 0); } else { @@ -4591,8 +4591,8 @@ static int add_objects_by_path(const char *path, /* Skip objects that do not exist locally. */ if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) && - oid_object_info_extended(the_repository, oid, &oi, - OBJECT_INFO_FOR_PREFETCH) < 0) + odb_read_object_info_extended(the_repository->objects, oid, &oi, + OBJECT_INFO_FOR_PREFETCH) < 0) continue; exclude = is_oid_uninteresting(the_repository, oid); diff --git a/odb.h b/odb.h index 3dfc66d75a3d20..e8b9dff9480ded 100644 --- a/odb.h +++ b/odb.h @@ -475,37 +475,4 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } -/* Compatibility wrappers, to be removed once Git 2.51 has been released. */ -#include "repository.h" - -static inline int oid_object_info_extended(struct repository *r, - const struct object_id *oid, - struct object_info *oi, - unsigned flags) -{ - return odb_read_object_info_extended(r->objects, oid, oi, flags); -} - -static inline int oid_object_info(struct repository *r, - const struct object_id *oid, - unsigned long *sizep) -{ - return odb_read_object_info(r->objects, oid, sizep); -} - -static inline void *repo_read_object_file(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size) -{ - return odb_read_object(r->objects, oid, type, size); -} - -static inline int has_object(struct repository *r, - const struct object_id *oid, - unsigned flags) -{ - return odb_has_object(r->objects, oid, flags); -} - #endif /* ODB_H */ diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c index 4caa024b1ebe73..4981401eaa6664 100644 --- a/t/helper/test-pack-deltas.c +++ b/t/helper/test-pack-deltas.c @@ -51,16 +51,14 @@ static void write_ref_delta(struct hashfile *f, unsigned long size, base_size, delta_size, compressed_size, hdrlen; enum object_type type; void *base_buf, *delta_buf; - void *buf = repo_read_object_file(the_repository, - oid, &type, - &size); + void *buf = odb_read_object(the_repository->objects, + oid, &type, &size); if (!buf) die("unable to read %s", oid_to_hex(oid)); - base_buf = repo_read_object_file(the_repository, - base, &type, - &base_size); + base_buf = odb_read_object(the_repository->objects, + base, &type, &base_size); if (!base_buf) die("unable to read %s", oid_to_hex(base)); From b7983adb5180c62586753754ae22a24ce8f7a04c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:00 +0200 Subject: [PATCH 02/24] packfile: introduce a new `struct packfile_store` Information about an object database's packfiles is currently distributed across two different structures: - `struct packed_git` contains the `next` pointer as well as the `mru_head`, both of which serve to store the list of packfiles. - `struct object_database` contains several fields that relate to the packfiles. So we don't really have a central data structure that tracks our packfiles, and consequently responsibilities aren't always clear cut. A consequence for the upcoming pluggable object databases is that this makes it very hard to move management of packfiles from the object database level down into the object database source. Introduce a new `struct packfile_store` which is about to become the single source of truth for managing packfiles. Right now this data structure doesn't yet contain anything, but in subsequent patches we will move all data structures that relate to packfiles and that are currently contained in `struct object_database` into this new home. Note that this is only a first step: most importantly, we won't (yet) move the `struct packed_git::next` pointer around. This will happen in a subsequent patch series though so that `struct packed_git` will really only host information about the specific packfile it represents. Further note that the new structure still sits at the wrong level at the end of this patch series: as mentioned, it should eventually sit at the level of the object database source, not at the object database level. But introducing the packfile store now already makes it way easier to eventually push down the now-selfcontained data structure by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 1 + odb.h | 3 ++- packfile.c | 13 +++++++++++++ packfile.h | 18 ++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/odb.c b/odb.c index 75c443fe665be5..a2289ea97df39b 100644 --- a/odb.c +++ b/odb.c @@ -996,6 +996,7 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; + o->packfiles = packfile_store_new(o); INIT_LIST_HEAD(&o->packed_git_mru); hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); pthread_mutex_init(&o->replace_mutex, NULL); diff --git a/odb.h b/odb.h index 51fe8a5a929f16..33034eaf2fea8a 100644 --- a/odb.h +++ b/odb.h @@ -91,6 +91,7 @@ struct odb_source { }; struct packed_git; +struct packfile_store; struct cached_object_entry; /* @@ -136,7 +137,7 @@ struct object_database { * * should only be accessed directly by packfile.c */ - + struct packfile_store *packfiles; struct packed_git *packed_git; /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; diff --git a/packfile.c b/packfile.c index acb680966dacf9..130d3e25073118 100644 --- a/packfile.c +++ b/packfile.c @@ -2332,3 +2332,16 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l *len = hdr - out; return 0; } + +struct packfile_store *packfile_store_new(struct object_database *odb) +{ + struct packfile_store *store; + CALLOC_ARRAY(store, 1); + store->odb = odb; + return store; +} + +void packfile_store_free(struct packfile_store *store) +{ + free(store); +} diff --git a/packfile.h b/packfile.h index f16753f2a9bb4c..8d31fd619ad249 100644 --- a/packfile.h +++ b/packfile.h @@ -52,6 +52,24 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +/* + * A store that manages packfiles for a given object database. + */ +struct packfile_store { + struct object_database *odb; +}; + +/* + * Allocate and initialize a new empty packfile store for the given object + * database. + */ +struct packfile_store *packfile_store_new(struct object_database *odb); + +/* + * Free the packfile store and all its associated state. + */ +void packfile_store_free(struct packfile_store *store); + static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *entry, const struct hashmap_entry *entry2, From 535b7a667a94d5882add829e30e20b6dfa076640 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:01 +0200 Subject: [PATCH 03/24] odb: move list of packfiles into `struct packfile_store` The object database tracks the list of packfiles it currently knows about. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the list accordingly. Extract the logic from `odb_clear()` that knows to close all such packfiles and move it into the new subsystem, as well. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 12 ++---------- odb.h | 1 - packfile.c | 42 +++++++++++++++++++++++++----------------- packfile.h | 15 ++++++++++++++- 4 files changed, 41 insertions(+), 29 deletions(-) diff --git a/odb.c b/odb.c index a2289ea97df39b..7201d01406db6f 100644 --- a/odb.c +++ b/odb.c @@ -1038,16 +1038,8 @@ void odb_clear(struct object_database *o) INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); - - /* - * `close_object_store()` only closes the packfiles, but doesn't free - * them. We thus have to do this manually. - */ - for (struct packed_git *p = o->packed_git, *next; p; p = next) { - next = p->next; - free(p); - } - o->packed_git = NULL; + packfile_store_free(o->packfiles); + o->packfiles = NULL; hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); diff --git a/odb.h b/odb.h index 33034eaf2fea8a..22a170b434c929 100644 --- a/odb.h +++ b/odb.h @@ -138,7 +138,6 @@ struct object_database { * should only be accessed directly by packfile.c */ struct packfile_store *packfiles; - struct packed_git *packed_git; /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; diff --git a/packfile.c b/packfile.c index 130d3e25073118..36bc240107b57a 100644 --- a/packfile.c +++ b/packfile.c @@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current) if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packed_git; p; p = p->next) + for (p = current->repo->objects->packfiles->packs; p; p = p->next) scan_windows(p, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -362,13 +362,8 @@ void close_pack(struct packed_git *p) void close_object_store(struct object_database *o) { struct odb_source *source; - struct packed_git *p; - for (p = o->packed_git; p; p = p->next) - if (p->do_not_close) - BUG("want to close pack marked 'do-not-close'"); - else - close_pack(p); + packfile_store_close(o->packfiles); for (source = o->sources; source; source = source->next) { if (source->midx) @@ -468,7 +463,7 @@ static int close_one_pack(struct repository *r) struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (p->pack_fd == -1) continue; find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); @@ -789,8 +784,8 @@ void install_packed_git(struct repository *r, struct packed_git *pack) if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packed_git; - r->objects->packed_git = pack; + pack->next = r->objects->packfiles->packs; + r->objects->packfiles->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); hashmap_add(&r->objects->pack_map, &pack->packmap_ent); @@ -974,7 +969,7 @@ unsigned long repo_approximate_object_count(struct repository *r) count += m->num_objects; } - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (open_pack_index(p)) continue; count += p->num_objects; @@ -1015,7 +1010,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) static void rearrange_packed_git(struct repository *r) { - sort_packs(&r->objects->packed_git, sort_pack); + sort_packs(&r->objects->packfiles->packs, sort_pack); } static void prepare_packed_git_mru(struct repository *r) @@ -1024,7 +1019,7 @@ static void prepare_packed_git_mru(struct repository *r) INIT_LIST_HEAD(&r->objects->packed_git_mru); - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) list_add_tail(&p->mru, &r->objects->packed_git_mru); } @@ -1073,7 +1068,7 @@ void reprepare_packed_git(struct repository *r) struct packed_git *get_packed_git(struct repository *r) { prepare_packed_git(r); - return r->objects->packed_git; + return r->objects->packfiles->packs; } struct multi_pack_index *get_multi_pack_index(struct odb_source *source) @@ -1094,7 +1089,7 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packed_git; + return r->objects->packfiles->packs; } struct list_head *get_packed_git_mru(struct repository *r) @@ -1219,7 +1214,7 @@ const struct packed_git *has_packed_and_bad(struct repository *r, { struct packed_git *p; - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) if (oidset_contains(&p->bad_objects, oid)) return p; return NULL; @@ -2080,7 +2075,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packed_git) + if (!r->objects->packfiles->packs) return 0; list_for_each(pos, &r->objects->packed_git_mru) { @@ -2343,5 +2338,18 @@ struct packfile_store *packfile_store_new(struct object_database *odb) void packfile_store_free(struct packfile_store *store) { + for (struct packed_git *p = store->packs, *next; p; p = next) { + next = p->next; + free(p); + } free(store); } + +void packfile_store_close(struct packfile_store *store) +{ + for (struct packed_git *p = store->packs; p; p = p->next) { + if (p->do_not_close) + BUG("want to close pack marked 'do-not-close'"); + close_pack(p); + } +} diff --git a/packfile.h b/packfile.h index 8d31fd619ad249..d7ac8d24b435b2 100644 --- a/packfile.h +++ b/packfile.h @@ -57,6 +57,12 @@ struct packed_git { */ struct packfile_store { struct object_database *odb; + + /* + * The list of packfiles in the order in which they are being added to + * the store. + */ + struct packed_git *packs; }; /* @@ -66,10 +72,17 @@ struct packfile_store { struct packfile_store *packfile_store_new(struct object_database *odb); /* - * Free the packfile store and all its associated state. + * Free the packfile store and all its associated state. All packfiles + * tracked by the store will be closed. */ void packfile_store_free(struct packfile_store *store); +/* + * Close all packfiles associated with this store. The packfiles won't be + * free'd, so they can be re-opened at a later point in time. + */ +void packfile_store_close(struct packfile_store *store); + static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *entry, const struct hashmap_entry *entry2, From 3421cb56a8b37425f2a47695adfa4a29a06a9d2e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:02 +0200 Subject: [PATCH 04/24] odb: move initialization bit into `struct packfile_store` The object database knows to skip re-initializing the list of packfiles in case it's already been initialized. Whether or not that is the case is tracked via a separate `initialized` bit that is stored in the object database. With the introduction of the `struct packfile_store` we have a better place to host this bit though. Move it accordingly. While at it, convert the field into a boolean now that we're allowed to use them in our code base. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 6 ------ packfile.c | 6 +++--- packfile.h | 6 ++++++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/odb.h b/odb.h index 22a170b434c929..bf1b4d4677317c 100644 --- a/odb.h +++ b/odb.h @@ -169,12 +169,6 @@ struct object_database { unsigned long approximate_object_count; unsigned approximate_object_count_valid : 1; - /* - * Whether packed_git has already been populated with this repository's - * packs. - */ - unsigned packed_git_initialized : 1; - /* * Submodule source paths that will be added as additional sources to * allow lookup of submodule objects via the main object database. diff --git a/packfile.c b/packfile.c index 36bc240107b57a..f37557eac5409f 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,7 +1027,7 @@ static void prepare_packed_git(struct repository *r) { struct odb_source *source; - if (r->objects->packed_git_initialized) + if (r->objects->packfiles->initialized) return; odb_prepare_alternates(r->objects); @@ -1038,7 +1038,7 @@ static void prepare_packed_git(struct repository *r) rearrange_packed_git(r); prepare_packed_git_mru(r); - r->objects->packed_git_initialized = 1; + r->objects->packfiles->initialized = true; } void reprepare_packed_git(struct repository *r) @@ -1060,7 +1060,7 @@ void reprepare_packed_git(struct repository *r) odb_clear_loose_cache(source); r->objects->approximate_object_count_valid = 0; - r->objects->packed_git_initialized = 0; + r->objects->packfiles->initialized = false; prepare_packed_git(r); obj_read_unlock(); } diff --git a/packfile.h b/packfile.h index d7ac8d24b435b2..cf81091175f8cd 100644 --- a/packfile.h +++ b/packfile.h @@ -63,6 +63,12 @@ struct packfile_store { * the store. */ struct packed_git *packs; + + /* + * Whether packfiles have already been populated with this store's + * packs. + */ + bool initialized; }; /* From 14aaf5c9d889a4988ffc64b39fe38bd19b930a50 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:03 +0200 Subject: [PATCH 05/24] odb: move packfile map into `struct packfile_store` The object database tracks a map of packfiles by their respective paths, which is used to figure out whether a given packfile has already been loaded. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the map accordingly. `pack_map_entry_cmp()` isn't used anywhere but in "packfile.c" anymore after this change, so we convert it to a static function, as well. Note that we also drop the `inline` hint: the function is used as a callback function exclusively, and callbacks cannot be inlined. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 2 +- odb.c | 2 -- odb.h | 8 +------- packfile.c | 20 ++++++++++++++++++-- packfile.h | 20 ++++++-------------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/midx.c b/midx.c index 7726c13d7e7bc0..e96970efbfbb79 100644 --- a/midx.c +++ b/midx.c @@ -460,7 +460,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addbuf(&key, &pack_name); strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->pack_map, + p = hashmap_get_entry_from_hash(&r->objects->packfiles->map, strhash(key.buf), key.buf, struct packed_git, packmap_ent); if (!p) { diff --git a/odb.c b/odb.c index 7201d01406db6f..737d98c91191af 100644 --- a/odb.c +++ b/odb.c @@ -998,7 +998,6 @@ struct object_database *odb_new(struct repository *repo) o->repo = repo; o->packfiles = packfile_store_new(o); INIT_LIST_HEAD(&o->packed_git_mru); - hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1041,6 +1040,5 @@ void odb_clear(struct object_database *o) packfile_store_free(o->packfiles); o->packfiles = NULL; - hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } diff --git a/odb.h b/odb.h index bf1b4d4677317c..b79e7280c149cb 100644 --- a/odb.h +++ b/odb.h @@ -135,7 +135,7 @@ struct object_database { /* * private data * - * should only be accessed directly by packfile.c + * Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; /* A most-recently-used ordered version of the packed_git list. */ @@ -155,12 +155,6 @@ struct object_database { struct cached_object_entry *cached_objects; size_t cached_object_nr, cached_object_alloc; - /* - * A map of packfiles to packed_git structs for tracking which - * packs have been loaded already. - */ - struct hashmap pack_map; - /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use diff --git a/packfile.c b/packfile.c index f37557eac5409f..17e0b8ab27ece6 100644 --- a/packfile.c +++ b/packfile.c @@ -788,7 +788,7 @@ void install_packed_git(struct repository *r, struct packed_git *pack) r->objects->packfiles->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->pack_map, &pack->packmap_ent); + hashmap_add(&r->objects->packfiles->map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -901,7 +901,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, hashmap_entry_init(&hent, hash); /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) { + if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { p = add_packed_git(data->r, full_name, full_name_len, data->local); if (p) install_packed_git(data->r, p); @@ -2328,11 +2328,26 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } +static int pack_map_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry2, + const void *keydata) +{ + const char *key = keydata; + const struct packed_git *pg1, *pg2; + + pg1 = container_of(entry, const struct packed_git, packmap_ent); + pg2 = container_of(entry2, const struct packed_git, packmap_ent); + + return strcmp(pg1->pack_name, key ? key : pg2->pack_name); +} + struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; + hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); return store; } @@ -2342,6 +2357,7 @@ void packfile_store_free(struct packfile_store *store) next = p->next; free(p); } + hashmap_clear(&store->map); free(store); } diff --git a/packfile.h b/packfile.h index cf81091175f8cd..9bbef511647729 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,12 @@ struct packfile_store { */ struct packed_git *packs; + /* + * A map of packfile names to packed_git structs for tracking which + * packs have been loaded already. + */ + struct hashmap map; + /* * Whether packfiles have already been populated with this store's * packs. @@ -89,20 +95,6 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); -static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct pack_window { struct pack_window *next; unsigned char *base; From fe835b0ca0ba4d6968cd2d1f824c178547934792 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:04 +0200 Subject: [PATCH 06/24] odb: move MRU list of packfiles into `struct packfile_store` The object database tracks the list of packfiles in most-recently-used order, which is mostly used to favor reading from packfiles that contain most of the objects that we're currently accessing. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the list accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 2 +- odb.c | 2 -- odb.h | 4 ---- packfile.c | 11 ++++++----- packfile.h | 3 +++ 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/midx.c b/midx.c index e96970efbfbb79..91c7b3917d6b80 100644 --- a/midx.c +++ b/midx.c @@ -468,7 +468,7 @@ int prepare_midx_pack(struct multi_pack_index *m, m->source->local); if (p) { install_packed_git(r, p); - list_add_tail(&p->mru, &r->objects->packed_git_mru); + list_add_tail(&p->mru, &r->objects->packfiles->mru); } } diff --git a/odb.c b/odb.c index 737d98c91191af..32e982bf0b98cb 100644 --- a/odb.c +++ b/odb.c @@ -997,7 +997,6 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); - INIT_LIST_HEAD(&o->packed_git_mru); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1035,7 +1034,6 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); packfile_store_free(o->packfiles); o->packfiles = NULL; diff --git a/odb.h b/odb.h index b79e7280c149cb..3044b6a661369e 100644 --- a/odb.h +++ b/odb.h @@ -3,7 +3,6 @@ #include "hashmap.h" #include "object.h" -#include "list.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -138,9 +137,6 @@ struct object_database { * Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* A most-recently-used ordered version of the packed_git list. */ - struct list_head packed_git_mru; - struct { struct packed_git **packs; unsigned flags; diff --git a/packfile.c b/packfile.c index 17e0b8ab27ece6..861d7ffd6fa9db 100644 --- a/packfile.c +++ b/packfile.c @@ -1017,10 +1017,10 @@ static void prepare_packed_git_mru(struct repository *r) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packed_git_mru); + INIT_LIST_HEAD(&r->objects->packfiles->mru); for (p = r->objects->packfiles->packs; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packed_git_mru); + list_add_tail(&p->mru, &r->objects->packfiles->mru); } static void prepare_packed_git(struct repository *r) @@ -1095,7 +1095,7 @@ struct packed_git *get_all_packs(struct repository *r) struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); - return &r->objects->packed_git_mru; + return &r->objects->packfiles->mru; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -2078,10 +2078,10 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packed_git_mru) { + list_for_each(pos, &r->objects->packfiles->mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packed_git_mru); + list_move(&p->mru, &r->objects->packfiles->mru); return 1; } } @@ -2347,6 +2347,7 @@ struct packfile_store *packfile_store_new(struct object_database *odb) struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; + INIT_LIST_HEAD(&store->mru); hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); return store; } diff --git a/packfile.h b/packfile.h index 9bbef511647729..d48d46cc1bdce7 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,9 @@ struct packfile_store { */ struct packed_git *packs; + /* A most-recently-used ordered version of the packs list. */ + struct list_head mru; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. From bd1a521de869dc9b26ca88efc5eae022222918c1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:05 +0200 Subject: [PATCH 07/24] odb: move kept cache into `struct packfile_store` The object database tracks a cache of "kept" packfiles, which is used by git-pack-objects(1) to handle cruft objects. With the introduction of the `struct packfile_store` we have a better place to host this cache though. Move the cache accordingly. This moves the last bit of packfile-related state from the object database into the packfile store. Adapt the comment for the `packfiles` pointer in `struct object_database` to reflect this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 10 +--------- packfile.c | 16 ++++++++-------- packfile.h | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/odb.h b/odb.h index 3044b6a661369e..9dd7bb6bc3e5f8 100644 --- a/odb.h +++ b/odb.h @@ -131,16 +131,8 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* - * private data - * - * Should only be accessed directly by packfile.c and midx.c. - */ + /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - struct { - struct packed_git **packs; - unsigned flags; - } kept_pack_cache; /* * This is meant to hold a *small* number of objects that you would diff --git a/packfile.c b/packfile.c index 861d7ffd6fa9db..95a78f267f1f30 100644 --- a/packfile.c +++ b/packfile.c @@ -2091,19 +2091,19 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { - if (!r->objects->kept_pack_cache.packs) + if (!r->objects->packfiles->kept_cache.packs) return; - if (r->objects->kept_pack_cache.flags == flags) + if (r->objects->packfiles->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->kept_pack_cache.packs); - r->objects->kept_pack_cache.flags = 0; + FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); + r->objects->packfiles->kept_cache.flags = 0; } struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) { maybe_invalidate_kept_pack_cache(r, flags); - if (!r->objects->kept_pack_cache.packs) { + if (!r->objects->packfiles->kept_cache.packs) { struct packed_git **packs = NULL; size_t nr = 0, alloc = 0; struct packed_git *p; @@ -2126,11 +2126,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->kept_pack_cache.packs = packs; - r->objects->kept_pack_cache.flags = flags; + r->objects->packfiles->kept_cache.packs = packs; + r->objects->packfiles->kept_cache.flags = flags; } - return r->objects->kept_pack_cache.packs; + return r->objects->packfiles->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, diff --git a/packfile.h b/packfile.h index d48d46cc1bdce7..bf66211986e436 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,20 @@ struct packfile_store { */ struct packed_git *packs; + /* + * Cache of packfiles which are marked as "kept", either because there + * is an on-disk ".keep" file or because they are marked as "kept" in + * memory. + * + * Should not be accessed directly, but via `kept_pack_cache()`. The + * list of packs gets invalidated when the stored flags and the flags + * passed to `kept_pack_cache()` mismatch. + */ + struct { + struct packed_git **packs; + unsigned flags; + } kept_cache; + /* A most-recently-used ordered version of the packs list. */ struct list_head mru; From 995ee880277144207b2cb45069218aa972fb350b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:06 +0200 Subject: [PATCH 08/24] packfile: reorder functions to avoid function declaration Reorder functions so that we can avoid a forward declaration of `prepare_packed_git()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/packfile.c b/packfile.c index 95a78f267f1f30..5588a7ad6dfde7 100644 --- a/packfile.c +++ b/packfile.c @@ -946,40 +946,6 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -static void prepare_packed_git(struct repository *r); -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) -{ - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; - - prepare_packed_git(r); - - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects; - } - - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; - } - return r->objects->approximate_object_count; -} - DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); static int sort_pack(const struct packed_git *a, const struct packed_git *b) @@ -1098,6 +1064,39 @@ struct list_head *get_packed_git_mru(struct repository *r) return &r->objects->packfiles->mru; } +/* + * Give a fast, rough count of the number of objects in the repository. This + * ignores loose objects completely. If you have a lot of them, then either + * you should repack because your performance will be awful, or they are + * all unreachable objects about to be pruned, in which case they're not really + * interesting as a measure of repo size in the first place. + */ +unsigned long repo_approximate_object_count(struct repository *r) +{ + if (!r->objects->approximate_object_count_valid) { + struct odb_source *source; + unsigned long count = 0; + struct packed_git *p; + + prepare_packed_git(r); + + for (source = r->objects->sources; source; source = source->next) { + struct multi_pack_index *m = get_multi_pack_index(source); + if (m) + count += m->num_objects; + } + + for (p = r->objects->packfiles->packs; p; p = p->next) { + if (open_pack_index(p)) + continue; + count += p->num_objects; + } + r->objects->approximate_object_count = count; + r->objects->approximate_object_count_valid = 1; + } + return r->objects->approximate_object_count; +} + unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { From c36ecc0685a75f913fe4871766715221c71f4b09 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:07 +0200 Subject: [PATCH 09/24] packfile: refactor `prepare_packed_git()` to work on packfile store The `prepare_packed_git()` function and its friends are responsible for loading packfiles as well as the multi-pack index for a given object database. Refactor these functions to accept a packfile store instead of a repository to clarify their scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/packfile.c b/packfile.c index 5588a7ad6dfde7..095c85919b6c7c 100644 --- a/packfile.c +++ b/packfile.c @@ -974,37 +974,32 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) return -1; } -static void rearrange_packed_git(struct repository *r) -{ - sort_packs(&r->objects->packfiles->packs, sort_pack); -} - -static void prepare_packed_git_mru(struct repository *r) +static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packfiles->mru); + INIT_LIST_HEAD(&store->mru); - for (p = r->objects->packfiles->packs; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packfiles->mru); + for (p = store->packs; p; p = p->next) + list_add_tail(&p->mru, &store->mru); } -static void prepare_packed_git(struct repository *r) +static void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; - if (r->objects->packfiles->initialized) + if (store->initialized) return; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { + odb_prepare_alternates(store->odb); + for (source = store->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - rearrange_packed_git(r); + sort_packs(&store->packs, sort_pack); - prepare_packed_git_mru(r); - r->objects->packfiles->initialized = true; + packfile_store_prepare_mru(store); + store->initialized = true; } void reprepare_packed_git(struct repository *r) @@ -1027,25 +1022,25 @@ void reprepare_packed_git(struct repository *r) r->objects->approximate_object_count_valid = 0; r->objects->packfiles->initialized = false; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); obj_read_unlock(); } struct packed_git *get_packed_git(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); return r->objects->packfiles->packs; } struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - prepare_packed_git(source->odb->repo); + packfile_store_prepare(source->odb->packfiles); return source->midx; } struct packed_git *get_all_packs(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; @@ -1060,7 +1055,7 @@ struct packed_git *get_all_packs(struct repository *r) struct list_head *get_packed_git_mru(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); return &r->objects->packfiles->mru; } @@ -1078,7 +1073,7 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); @@ -2068,7 +2063,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa { struct list_head *pos; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) From 78237ea53d6546aeab7adb2c7547a1177311ccde Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:08 +0200 Subject: [PATCH 10/24] packfile: split up responsibilities of `reprepare_packed_git()` In `reprepare_packed_git()` we perform a couple of operations: - We reload alternate object directories. - We clear the loose object cache. - We reprepare packfiles. While the logic is hosted in "packfile.c", it clearly reaches into other subsystems that aren't related to packfiles. Split up the responsibility and introduce `odb_reprepare()` which now becomes responsible for repreparing the whole object database. The existing `reprepare_packed_git()` function is refactored accordingly and only cares about reloading the packfile store now. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/backfill.c | 2 +- builtin/gc.c | 4 ++-- builtin/receive-pack.c | 2 +- builtin/repack.c | 2 +- bulk-checkin.c | 2 +- connected.c | 2 +- fetch-pack.c | 4 ++-- object-name.c | 2 +- odb.c | 27 ++++++++++++++++++++++++++- odb.h | 6 ++++++ packfile.c | 26 ++++---------------------- packfile.h | 9 ++++++++- transport-helper.c | 2 +- 13 files changed, 55 insertions(+), 35 deletions(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index 80056abe4730ae..e80fc1b694df61 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -53,7 +53,7 @@ static void download_batch(struct backfill_context *ctx) * We likely have a new packfile. Add it to the packed list to * avoid possible duplicate downloads of the same objects. */ - reprepare_packed_git(ctx->repo); + odb_reprepare(ctx->repo->objects); } static int fill_missing_blobs(const char *path UNUSED, diff --git a/builtin/gc.c b/builtin/gc.c index 03ae4926b20982..aeca06a08bec5d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1042,7 +1042,7 @@ int cmd_gc(int argc, die(FAILED_RUN, "rerere"); report_garbage = report_pack_garbage; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { close_object_store(the_repository->objects); clean_pack_garbage(); @@ -1491,7 +1491,7 @@ static off_t get_auto_pack_size(void) struct packed_git *p; struct repository *r = the_repository; - reprepare_packed_git(r); + odb_reprepare(r->objects); for (p = get_all_packs(r); p; p = p->next) { if (p->pack_size > max_size) { second_largest_size = max_size; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 1113137a6f0b3f..c9288a9c7e382b 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2389,7 +2389,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) status = finish_command(&child); if (status) return "index-pack abnormal exit"; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } return NULL; } diff --git a/builtin/repack.c b/builtin/repack.c index c490a51e9192da..5ff27fc8e29a9b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1685,7 +1685,7 @@ int cmd_repack(int argc, goto cleanup; } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (delete_redundant) { int opts = 0; diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab0398136..f65439a748a4ec 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -90,7 +90,7 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } /* diff --git a/connected.c b/connected.c index 18c13245d8e40c..d6e9682fd93ce9 100644 --- a/connected.c +++ b/connected.c @@ -72,7 +72,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, * Before checking for promisor packs, be sure we have the * latest pack-files loaded into memory. */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); do { struct packed_git *p; diff --git a/fetch-pack.c b/fetch-pack.c index 6ed566295189dc..fe7a84bf2f97fa 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1983,7 +1983,7 @@ static void update_shallow(struct fetch_pack_args *args, * remote is shallow, but this is a clone, there are * no objects in repo to worry about. Accept any * shallow points that exist in the pack (iow in repo - * after get_pack() and reprepare_packed_git()) + * after get_pack() and odb_reprepare()) */ struct oid_array extra = OID_ARRAY_INIT; struct object_id *oid = si->shallow->oid; @@ -2108,7 +2108,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, &si, pack_lockfiles); } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (!args->cloning && args->deepen) { struct check_connected_options opt = CHECK_CONNECTED_INIT; diff --git a/object-name.c b/object-name.c index 732056ff5e305b..df9e0c5f020a91 100644 --- a/object-name.c +++ b/object-name.c @@ -596,7 +596,7 @@ static enum get_oid_result get_short_oid(struct repository *r, * or migrated from loose to packed. */ if (status == MISSING_OBJECT) { - reprepare_packed_git(r); + odb_reprepare(r->objects); find_short_object_filename(&ds); find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, oid); diff --git a/odb.c b/odb.c index 32e982bf0b98cb..65a6cc67b61ccf 100644 --- a/odb.c +++ b/odb.c @@ -694,7 +694,7 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { - reprepare_packed_git(odb->repo); + odb_reprepare(odb->repo->objects); if (find_pack_entry(odb->repo, real, &e)) break; } @@ -1040,3 +1040,28 @@ void odb_clear(struct object_database *o) string_list_clear(&o->submodule_source_paths, 0); } + +void odb_reprepare(struct object_database *o) +{ + struct odb_source *source; + + obj_read_lock(); + + /* + * Reprepare alt odbs, in case the alternates file was modified + * during the course of this process. This only _adds_ odbs to + * the linked list, so existing odbs will continue to exist for + * the lifetime of the process. + */ + o->loaded_alternates = 0; + odb_prepare_alternates(o); + + for (source = o->sources; source; source = source->next) + odb_clear_loose_cache(source); + + o->approximate_object_count_valid = 0; + + packfile_store_reprepare(o->packfiles); + + obj_read_unlock(); +} diff --git a/odb.h b/odb.h index 9dd7bb6bc3e5f8..ab39e3605d5419 100644 --- a/odb.h +++ b/odb.h @@ -161,6 +161,12 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Clear caches, reload alternates and then reload object sources so that new + * objects may become accessible. + */ +void odb_reprepare(struct object_database *o); + /* * Find source by its object directory path. Returns a `NULL` pointer in case * the source could not be found. diff --git a/packfile.c b/packfile.c index 095c85919b6c7c..950b98aac51c3f 100644 --- a/packfile.c +++ b/packfile.c @@ -1002,28 +1002,10 @@ static void packfile_store_prepare(struct packfile_store *store) store->initialized = true; } -void reprepare_packed_git(struct repository *r) +void packfile_store_reprepare(struct packfile_store *store) { - struct odb_source *source; - - obj_read_lock(); - - /* - * Reprepare alt odbs, in case the alternates file was modified - * during the course of this process. This only _adds_ odbs to - * the linked list, so existing odbs will continue to exist for - * the lifetime of the process. - */ - r->objects->loaded_alternates = 0; - odb_prepare_alternates(r->objects); - - for (source = r->objects->sources; source; source = source->next) - odb_clear_loose_cache(source); - - r->objects->approximate_object_count_valid = 0; - r->objects->packfiles->initialized = false; - packfile_store_prepare(r->objects->packfiles); - obj_read_unlock(); + store->initialized = false; + packfile_store_prepare(store); } struct packed_git *get_packed_git(struct repository *r) @@ -1144,7 +1126,7 @@ unsigned long get_size_from_delta(struct packed_git *p, * * Other worrying sections could be the call to close_pack_fd(), * which can close packs even with in-use windows, and to - * reprepare_packed_git(). Regarding the former, mmap doc says: + * odb_reprepare(). Regarding the former, mmap doc says: * "closing the file descriptor does not unmap the region". And * for the latter, it won't re-open already available packs. */ diff --git a/packfile.h b/packfile.h index bf66211986e436..a85ff607febe54 100644 --- a/packfile.h +++ b/packfile.h @@ -112,6 +112,14 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); +/* + * Clear the packfile caches and try to look up any new packfiles that have + * appeared since last preparing the packfiles store. + * + * This function must be called under the `odb_read_lock()`. + */ +void packfile_store_reprepare(struct packfile_store *store); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -188,7 +196,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void reprepare_packed_git(struct repository *r); void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); diff --git a/transport-helper.c b/transport-helper.c index 0789e5bca53282..4d95d84f9e4d05 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -450,7 +450,7 @@ static int fetch_with_fetch(struct transport *transport, } strbuf_release(&buf); - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); return 0; } From f6f236d926915411eca28cb1c47619fdacf6eafb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:09 +0200 Subject: [PATCH 11/24] packfile: refactor `install_packed_git()` to work on packfile store The `install_packed_git()` functions adds a packfile to a specific object store. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 2 +- builtin/index-pack.c | 2 +- http.c | 2 +- http.h | 2 +- midx.c | 2 +- packfile.c | 11 ++++++----- packfile.h | 9 +++++++-- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2c35f9345d02d7..e9d82b31c390bf 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -901,7 +901,7 @@ static void end_packfile(void) if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - install_packed_git(the_repository, new_p); + packfile_store_add_pack(the_repository->objects->packfiles, new_p); free(idx_name); /* Print the boundary */ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f91c301bba9fbd..ed490dfad47859 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1645,7 +1645,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, p = add_packed_git(the_repository, final_index_name, strlen(final_index_name), 0); if (p) - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } if (!from_stdin) { diff --git a/http.c b/http.c index 98853d64834f99..af2120b64c711c 100644 --- a/http.c +++ b/http.c @@ -2541,7 +2541,7 @@ void http_install_packfile(struct packed_git *p, lst = &((*lst)->next); *lst = (*lst)->next; - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/http.h b/http.h index 36202139f451ff..e5a5380c6c384e 100644 --- a/http.h +++ b/http.h @@ -210,7 +210,7 @@ int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); /* - * Remove p from the given list, and invoke install_packed_git() on it. + * Remove p from the given list, and invoke packfile_store_add_pack() on it. * * This is a convenience function for users that have obtained a list of packs * from http_get_info_packs() and have chosen a specific pack to fetch. diff --git a/midx.c b/midx.c index 91c7b3917d6b80..69c44be71c5dc7 100644 --- a/midx.c +++ b/midx.c @@ -467,7 +467,7 @@ int prepare_midx_pack(struct multi_pack_index *m, p = add_packed_git(r, pack_name.buf, pack_name.len, m->source->local); if (p) { - install_packed_git(r, p); + packfile_store_add_pack(r->objects->packfiles, p); list_add_tail(&p->mru, &r->objects->packfiles->mru); } } diff --git a/packfile.c b/packfile.c index 950b98aac51c3f..af806aba093518 100644 --- a/packfile.c +++ b/packfile.c @@ -779,16 +779,17 @@ struct packed_git *add_packed_git(struct repository *r, const char *path, return p; } -void install_packed_git(struct repository *r, struct packed_git *pack) +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack) { if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packfiles->packs; - r->objects->packfiles->packs = pack; + pack->next = store->packs; + store->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->packfiles->map, &pack->packmap_ent); + hashmap_add(&store->map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -904,7 +905,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { p = add_packed_git(data->r, full_name, full_name_len, data->local); if (p) - install_packed_git(data->r, p); + packfile_store_add_pack(data->r->objects->packfiles, p); } free(pack_name); } diff --git a/packfile.h b/packfile.h index a85ff607febe54..ba4b0cef9cb0e7 100644 --- a/packfile.h +++ b/packfile.h @@ -120,6 +120,13 @@ void packfile_store_close(struct packfile_store *store); */ void packfile_store_reprepare(struct packfile_store *store); +/* + * Add the pack to the store so that contained objects become accessible via + * the store. This moves ownership into the store. + */ +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -196,8 +203,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void install_packed_git(struct repository *r, struct packed_git *pack); - struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct multi_pack_index *get_multi_pack_index(struct odb_source *source); From d67530f6bbe56f1951b8fd2fcdaae255bf552e2d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:10 +0200 Subject: [PATCH 12/24] packfile: introduce function to load and add packfiles We have a recurring pattern where we essentially perform an upsert of a packfile in case it isn't yet known by the packfile store. The logic to do so is non-trivial as we have to reconstruct the packfile's key, check the map of packfiles, then create the new packfile and finally add it to the store. Introduce a new function that does this dance for us. Refactor callsites to use it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 4 ++-- builtin/index-pack.c | 10 +++------- midx.c | 23 ++++------------------ packfile.c | 44 ++++++++++++++++++++++++++++++------------- packfile.h | 8 ++++++++ 5 files changed, 48 insertions(+), 41 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index e9d82b31c390bf..a26e79689d55b8 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -897,11 +897,11 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1); + new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + idx_name, 1); if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - packfile_store_add_pack(the_repository->objects->packfiles, new_p); free(idx_name); /* Print the boundary */ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ed490dfad47859..2b78ba7fe4d14a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,13 +1640,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) { - struct packed_git *p; - p = add_packed_git(the_repository, final_index_name, - strlen(final_index_name), 0); - if (p) - packfile_store_add_pack(the_repository->objects->packfiles, p); - } + if (do_fsck_object) + packfile_store_load_pack(the_repository->objects->packfiles, + final_index_name, 0); if (!from_stdin) { printf("%s\n", hash_to_hex(hash)); diff --git a/midx.c b/midx.c index 69c44be71c5dc7..3faeaf2f8faed1 100644 --- a/midx.c +++ b/midx.c @@ -443,7 +443,6 @@ int prepare_midx_pack(struct multi_pack_index *m, { struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; - struct strbuf key = STRBUF_INIT; struct packed_git *p; pack_int_id = midx_for_pack(&m, pack_int_id); @@ -455,25 +454,11 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - - /* pack_map holds the ".pack" name, but we have the .idx */ - strbuf_addbuf(&key, &pack_name); - strbuf_strip_suffix(&key, ".idx"); - strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->packfiles->map, - strhash(key.buf), key.buf, - struct packed_git, packmap_ent); - if (!p) { - p = add_packed_git(r, pack_name.buf, pack_name.len, - m->source->local); - if (p) { - packfile_store_add_pack(r->objects->packfiles, p); - list_add_tail(&p->mru, &r->objects->packfiles->mru); - } - } - + p = packfile_store_load_pack(r->objects->packfiles, + pack_name.buf, m->source->local); + if (p) + list_add_tail(&p->mru, &r->objects->packfiles->mru); strbuf_release(&pack_name); - strbuf_release(&key); if (!p) { m->packs[pack_int_id] = MIDX_PACK_ERROR; diff --git a/packfile.c b/packfile.c index af806aba093518..9224ca424c187c 100644 --- a/packfile.c +++ b/packfile.c @@ -792,6 +792,33 @@ void packfile_store_add_pack(struct packfile_store *store, hashmap_add(&store->map, &pack->packmap_ent); } +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local) +{ + struct strbuf key = STRBUF_INIT; + struct packed_git *p; + + /* + * We're being called with the path to the index file, but `pack_map` + * holds the path to the packfile itself. + */ + strbuf_addstr(&key, idx_path); + strbuf_strip_suffix(&key, ".idx"); + strbuf_addstr(&key, ".pack"); + + p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, + struct packed_git, packmap_ent); + if (!p) { + p = add_packed_git(store->odb->repo, idx_path, + strlen(idx_path), local); + if (p) + packfile_store_add_pack(store, p); + } + + strbuf_release(&key); + return p; +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -891,23 +918,14 @@ static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; - struct packed_git *p; size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->m && midx_contains_pack(data->m, file_name))) { - struct hashmap_entry hent; - char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name); - unsigned int hash = strhash(pack_name); - hashmap_entry_init(&hent, hash); - - /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { - p = add_packed_git(data->r, full_name, full_name_len, data->local); - if (p) - packfile_store_add_pack(data->r->objects->packfiles, p); - } - free(pack_name); + char *trimmed_path = xstrndup(full_name, full_name_len); + packfile_store_load_pack(data->r->objects->packfiles, + trimmed_path, data->local); + free(trimmed_path); } if (!report_garbage) diff --git a/packfile.h b/packfile.h index ba4b0cef9cb0e7..fcefcbbef65f0b 100644 --- a/packfile.h +++ b/packfile.h @@ -127,6 +127,14 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Open the packfile and add it to the store if it isn't yet known. Returns + * either the newly opened packfile or the preexisting packfile. Returns a + * `NULL` pointer in case the packfile could not be opened. + */ +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local); + struct pack_window { struct pack_window *next; unsigned char *base; From ab8aff4a6b2a1d5aa79deeb64bdeecc0234b4ddf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:11 +0200 Subject: [PATCH 13/24] packfile: move `get_multi_pack_index()` into "midx.c" The `get_multi_pack_index()` function is declared and implemented in the packfile subsystem, even though it really belongs into the multi-pack index subsystem. The reason for this is likely that it needs to call `packfile_store_prepare()`, which is not exposed by the packfile system. In a subsequent commit we're about to add another caller outside of the packfile system though, so we'll have to expose the function anyway. Do so now already and move `get_multi_pack_index()` into the MIDX subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 6 ++++++ midx.h | 1 + packfile.c | 8 +------- packfile.h | 10 +++++++++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/midx.c b/midx.c index 3faeaf2f8faed1..1d6269f957e781 100644 --- a/midx.c +++ b/midx.c @@ -93,6 +93,12 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, return 0; } +struct multi_pack_index *get_multi_pack_index(struct odb_source *source) +{ + packfile_store_prepare(source->odb->packfiles); + return source->midx; +} + static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, const char *midx_name) { diff --git a/midx.h b/midx.h index e241d2d6900bc3..6e54d73503d560 100644 --- a/midx.h +++ b/midx.h @@ -94,6 +94,7 @@ void get_midx_chain_filename(struct odb_source *source, struct strbuf *out); void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, const unsigned char *hash, const char *ext); +struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct multi_pack_index *load_multi_pack_index(struct odb_source *source); int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); struct packed_git *nth_midxed_pack(struct multi_pack_index *m, diff --git a/packfile.c b/packfile.c index 9224ca424c187c..7a9193e5ef4664 100644 --- a/packfile.c +++ b/packfile.c @@ -1003,7 +1003,7 @@ static void packfile_store_prepare_mru(struct packfile_store *store) list_add_tail(&p->mru, &store->mru); } -static void packfile_store_prepare(struct packfile_store *store) +void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1033,12 +1033,6 @@ struct packed_git *get_packed_git(struct repository *r) return r->objects->packfiles->packs; } -struct multi_pack_index *get_multi_pack_index(struct odb_source *source) -{ - packfile_store_prepare(source->odb->packfiles); - return source->midx; -} - struct packed_git *get_all_packs(struct repository *r) { packfile_store_prepare(r->objects->packfiles); diff --git a/packfile.h b/packfile.h index fcefcbbef65f0b..a9e561ac394863 100644 --- a/packfile.h +++ b/packfile.h @@ -112,6 +112,15 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); +/* + * Prepare the packfile store by loading packfiles and multi-pack indices for + * all alternates. This becomes a no-op if the store is already prepared. + * + * It shouldn't typically be necessary to call this function directly, as + * functions that access the store know to prepare it. + */ +void packfile_store_prepare(struct packfile_store *store); + /* * Clear the packfile caches and try to look up any new packfiles that have * appeared since last preparing the packfiles store. @@ -213,7 +222,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); -struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct packed_git *get_all_packs(struct repository *r); /* From 751808b2a18acba76b824aed4d8b7442bd7f5fca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:12 +0200 Subject: [PATCH 14/24] packfile: refactor `get_packed_git()` to work on packfile store The `get_packed_git()` function prepares the packfile store and then returns its packfiles. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 2 +- builtin/grep.c | 2 +- object-name.c | 4 ++-- packfile.c | 6 +++--- packfile.h | 7 ++++++- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index aeca06a08bec5d..ec6735a540ad1b 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1423,7 +1423,7 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = get_packed_git(the_repository); + for (p = packfile_store_get_packs(the_repository->objects->packfiles); count < incremental_repack_auto_limit && p; p = p->next) { if (!p->multi_pack_index) diff --git a/builtin/grep.c b/builtin/grep.c index 5df653733371d8..63a4959568fa1c 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1214,7 +1214,7 @@ int cmd_grep(int argc, if (recurse_submodules) repo_read_gitmodules(the_repository, 1); if (startup_info->have_repository) - (void)get_packed_git(the_repository); + (void)packfile_store_get_packs(the_repository->objects->packfiles); start_threads(&opt); } else { diff --git a/object-name.c b/object-name.c index df9e0c5f020a91..53356819a3def2 100644 --- a/object-name.c +++ b/object-name.c @@ -213,7 +213,7 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = get_packed_git(ds->repo); p && !ds->ambiguous; + for (p = packfile_store_get_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); } @@ -806,7 +806,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = get_packed_git(mad->repo); p; p = p->next) + for (p = packfile_store_get_packs(mad->repo->objects->packfiles); p; p = p->next) find_abbrev_len_for_pack(p, mad); } diff --git a/packfile.c b/packfile.c index 7a9193e5ef4664..b37f43afb587a5 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,10 +1027,10 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *get_packed_git(struct repository *r) +struct packed_git *packfile_store_get_packs(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); - return r->objects->packfiles->packs; + packfile_store_prepare(store); + return store->packs; } struct packed_git *get_all_packs(struct repository *r) diff --git a/packfile.h b/packfile.h index a9e561ac394863..0b691ded7ef12a 100644 --- a/packfile.h +++ b/packfile.h @@ -136,6 +136,12 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get packs managed by the given store. Does not load the MIDX or any packs + * referenced by it. + */ +struct packed_git *packfile_store_get_packs(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -220,7 +226,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct packed_git *get_all_packs(struct repository *r); From d2779beb36ff64eb062103db14006f7ae6da5f37 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:13 +0200 Subject: [PATCH 15/24] packfile: refactor `get_all_packs()` to work on packfile store The `get_all_packs()` function prepares the packfile store and then returns its packfiles. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 3 ++- builtin/count-objects.c | 3 ++- builtin/fast-import.c | 6 ++++-- builtin/fsck.c | 11 +++++++---- builtin/gc.c | 8 +++++--- builtin/pack-objects.c | 28 +++++++++++++++++++--------- builtin/pack-redundant.c | 6 ++++-- builtin/repack.c | 9 ++++++--- connected.c | 3 ++- http-backend.c | 5 +++-- http.c | 3 ++- pack-bitmap.c | 4 ++-- pack-objects.c | 3 ++- packfile.c | 12 ++++++------ packfile.h | 7 ++++++- server-info.c | 3 ++- t/helper/test-find-pack.c | 2 +- t/helper/test-pack-mtimes.c | 2 +- 18 files changed, 76 insertions(+), 42 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fce0b06451c5cd..ee6715fa523ce6 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -852,9 +852,10 @@ static void batch_each_object(struct batch_options *opt, if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *pack; - for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + for (pack = packfile_store_get_all_packs(packs); pack; pack = pack->next) { if (bitmap_index_contains_pack(bitmap, pack) || open_pack_index(pack)) continue; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a61d3b46aac627..f2f407c2a78183 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -122,6 +122,7 @@ int cmd_count_objects(int argc, count_loose, count_cruft, NULL, NULL); if (verbose) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; unsigned long num_pack = 0; off_t size_pack = 0; @@ -129,7 +130,7 @@ int cmd_count_objects(int argc, struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index a26e79689d55b8..b1d5549815ac66 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -952,6 +952,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -975,7 +976,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1092,6 +1093,7 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); @@ -1175,7 +1177,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/builtin/fsck.c b/builtin/fsck.c index d2eb9d4fbe922b..8ee95e0d67cf37 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -867,19 +867,20 @@ static int mark_packed_for_connectivity(const struct object_id *oid, static int check_pack_rev_indexes(struct repository *r, int show_progress) { + struct packfile_store *packs = r->objects->packfiles; struct progress *progress = NULL; uint32_t pack_count = 0; int res = 0; if (show_progress) { - for (struct packed_git *p = get_all_packs(r); p; p = p->next) + for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) pack_count++; progress = start_delayed_progress(the_repository, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } - for (struct packed_git *p = get_all_packs(r); p; p = p->next) { + for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) { int load_error = load_pack_revindex_from_disk(p); if (load_error < 0) { @@ -999,6 +1000,8 @@ int cmd_fsck(int argc, for_each_packed_object(the_repository, mark_packed_for_connectivity, NULL, 0); } else { + struct packfile_store *packs = the_repository->objects->packfiles; + odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) fsck_source(source); @@ -1009,7 +1012,7 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - for (p = get_all_packs(the_repository); p; + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (open_pack_index(p)) continue; @@ -1019,7 +1022,7 @@ int cmd_fsck(int argc, progress = start_progress(the_repository, _("Checking objects"), total); } - for (p = get_all_packs(the_repository); p; + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { /* verify gives error messages itself */ if (verify_pack(the_repository, diff --git a/builtin/gc.c b/builtin/gc.c index ec6735a540ad1b..e19e13d9788076 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -487,9 +487,10 @@ static int too_many_loose_objects(struct gc_config *cfg) static struct packed_git *find_base_packs(struct string_list *packs, unsigned long limit) { + struct packfile_store *packfiles = the_repository->objects->packfiles; struct packed_git *p, *base = NULL; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packfiles); p; p = p->next) { if (!p->pack_local || p->is_cruft) continue; if (limit) { @@ -508,13 +509,14 @@ static struct packed_git *find_base_packs(struct string_list *packs, static int too_many_packs(struct gc_config *cfg) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; int cnt; if (cfg->gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { + for (cnt = 0, p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) continue; if (p->pack_keep) @@ -1492,7 +1494,7 @@ static off_t get_auto_pack_size(void) struct repository *r = the_repository; odb_reprepare(r->objects); - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if (p->pack_size > max_size) { second_largest_size = max_size; max_size = p->pack_size; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1494afcf3dffe6..de351b757ae446 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3831,6 +3831,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b) static void read_packs_list_from_stdin(struct rev_info *revs) { + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; @@ -3855,7 +3856,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -4076,6 +4077,7 @@ static void enumerate_cruft_objects(void) static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct rev_info revs; int ret; @@ -4105,7 +4107,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = get_all_packs(the_repository); p; p = p->next) + for (p = packfile_store_get_all_packs(packs); p; p = p->next) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -4122,6 +4124,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs static void read_cruft_objects(void) { + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list discard_packs = STRING_LIST_INIT_DUP; struct string_list fresh_packs = STRING_LIST_INIT_DUP; @@ -4142,7 +4145,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -4390,11 +4393,12 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { + struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_all_packs(the_repository); + packfile_store_get_all_packs(packs); while (p) { if ((!p->pack_local || p->pack_keep || @@ -4404,7 +4408,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_all_packs(the_repository); + p = packfile_store_get_all_packs(packs); else p = p->next; if (p == last_found) @@ -4436,12 +4440,13 @@ static int loosened_object_can_be_discarded(const struct object_id *oid, static void loosen_unused_packed_objects(void) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; uint32_t i; uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4742,12 +4747,13 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) static void add_extra_kept_packs(const struct string_list *names) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; if (!names->nr) return; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *name = basename(p->pack_name); int i; @@ -5185,8 +5191,10 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -5198,8 +5206,10 @@ int cmd_pack_objects(int argc, * want to unset "local" based on looking at packs, as * it also covers non-local objects */ + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index fe81c293e3af6f..dd28171f0a179a 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -566,7 +566,8 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packed_git *p = get_all_packs(the_repository); + struct packfile_store *packs = the_repository->objects->packfiles; + struct packed_git *p = packfile_store_get_all_packs(packs); if (strlen(filename) < 40) die("Bad pack filename: %s", filename); @@ -581,7 +582,8 @@ static struct pack_list * add_pack_file(const char *filename) static void load_all(void) { - struct packed_git *p = get_all_packs(the_repository); + struct packfile_store *packs = the_repository->objects->packfiles; + struct packed_git *p = packfile_store_get_all_packs(packs); while (p) { add_pack(p); diff --git a/builtin/repack.c b/builtin/repack.c index 5ff27fc8e29a9b..e8730808c535a9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -265,10 +265,11 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { int i; const char *base; @@ -497,10 +498,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -1137,11 +1139,12 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!(p->is_cruft && p->pack_local)) continue; diff --git a/connected.c b/connected.c index d6e9682fd93ce9..b288a18b17c33f 100644 --- a/connected.c +++ b/connected.c @@ -74,9 +74,10 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ odb_reprepare(the_repository->objects); do { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_promisor) continue; if (find_pack_entry_one(oid, p)) diff --git a/http-backend.c b/http-backend.c index d5dfe762bb5178..9084058f1e9f13 100644 --- a/http-backend.c +++ b/http-backend.c @@ -603,18 +603,19 @@ static void get_head(struct strbuf *hdr, char *arg UNUSED) static void get_info_packs(struct strbuf *hdr, char *arg UNUSED) { size_t objdirlen = strlen(repo_get_object_directory(the_repository)); + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct packed_git *p; size_t cnt = 0; select_getanyfile(hdr); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/http.c b/http.c index af2120b64c711c..077e879de9ebab 100644 --- a/http.c +++ b/http.c @@ -2408,6 +2408,7 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) static int fetch_and_setup_pack_index(struct packed_git **packs_head, unsigned char *sha1, const char *base_url) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *new_pack, *p; char *tmp_idx = NULL; int ret; @@ -2416,7 +2417,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, * If we already have the pack locally, no need to fetch its index or * even add it to list; we already have all of its objects. */ - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (hasheq(p->hash, sha1, the_repository->hash_algo)) return 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 058bdb5d7ded0b..ac71035d7715f7 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -664,7 +664,7 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; /* @@ -3362,7 +3362,7 @@ int verify_bitmap_files(struct repository *r) free(midx_bitmap_name); } - for (struct packed_git *p = get_all_packs(r); + for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { char *pack_bitmap_name = pack_bitmap_filename(p); res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); diff --git a/pack-objects.c b/pack-objects.c index a9d9855063aea8..d8eb679735484a 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -86,6 +86,7 @@ struct object_entry *packlist_find(struct packing_data *pdata, static void prepare_in_pack_by_idx(struct packing_data *pdata) { + struct packfile_store *packs = pdata->repo->objects->packfiles; struct packed_git **mapping, *p; int cnt = 0, nr = 1U << OE_IN_PACK_BITS; @@ -95,7 +96,7 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = get_all_packs(pdata->repo); p; p = p->next, cnt++) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next, cnt++) { if (cnt == nr) { free(mapping); return; diff --git a/packfile.c b/packfile.c index b37f43afb587a5..cd5431b6aa1880 100644 --- a/packfile.c +++ b/packfile.c @@ -1033,11 +1033,11 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) return store->packs; } -struct packed_git *get_all_packs(struct repository *r) +struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); + packfile_store_prepare(store); - for (struct odb_source *source = r->objects->sources; source; source = source->next) { + for (struct odb_source *source = store->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -1045,7 +1045,7 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packfiles->packs; + return store->packs; } struct list_head *get_packed_git_mru(struct repository *r) @@ -2105,7 +2105,7 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { ALLOC_GROW(packs, nr + 1, alloc); @@ -2202,7 +2202,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; - for (p = get_all_packs(repo); p; p = p->next) { + for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && diff --git a/packfile.h b/packfile.h index 0b691ded7ef12a..1afb9cd6641696 100644 --- a/packfile.h +++ b/packfile.h @@ -142,6 +142,12 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packed_git *packfile_store_get_packs(struct packfile_store *store); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -227,7 +233,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, extern void (*report_garbage)(unsigned seen_bits, const char *path); struct list_head *get_packed_git_mru(struct repository *r); -struct packed_git *get_all_packs(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy diff --git a/server-info.c b/server-info.c index 9bb30d9ab71d22..1d33de821e9f5e 100644 --- a/server-info.c +++ b/server-info.c @@ -287,12 +287,13 @@ static int compare_info(const void *a_, const void *b_) static void init_pack_info(struct repository *r, const char *infofile, int force) { + struct packfile_store *packs = r->objects->packfiles; struct packed_git *p; int stale; int i; size_t alloc = 0; - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index 611a13a32610d2..e001dc3066db70 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -39,7 +39,7 @@ int cmd__find_pack(int argc, const char **argv) if (repo_get_oid(the_repository, argv[0], &oid)) die("cannot parse %s as an object name", argv[0]); - for (p = get_all_packs(the_repository); p; p = p->next) + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) if (find_pack_entry_one(&oid, p)) { printf("%s\n", p->pack_name); actual_count++; diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index d51aaa3dc40d12..7c428c16011a23 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -37,7 +37,7 @@ int cmd__pack_mtimes(int argc, const char **argv) if (argc != 2) usage(pack_mtimes_usage); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) { strbuf_addstr(&buf, basename(p->pack_name)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".mtimes"); From dd52a29b78d80e425be660f3b443a42e0374a7d1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:14 +0200 Subject: [PATCH 16/24] packfile: refactor `get_packed_git_mru()` to work on packfile store The `get_packed_git_mru()` function prepares the packfile store and then returns its packfiles in most-recently-used order. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- packfile.c | 6 +++--- packfile.h | 7 +++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de351b757ae446..61bbbdfb83feeb 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,12 +1748,12 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, get_packed_git_mru(the_repository)) { + list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) list_move(&p->mru, - get_packed_git_mru(the_repository)); + packfile_store_get_packs_mru(the_repository->objects->packfiles)); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index cd5431b6aa1880..5a7caec2925977 100644 --- a/packfile.c +++ b/packfile.c @@ -1048,10 +1048,10 @@ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) return store->packs; } -struct list_head *get_packed_git_mru(struct repository *r) +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); - return &r->objects->packfiles->mru; + packfile_store_prepare(store); + return &store->mru; } /* diff --git a/packfile.h b/packfile.h index 1afb9cd6641696..e7a5792b6cf691 100644 --- a/packfile.h +++ b/packfile.h @@ -148,6 +148,11 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store); */ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); +/* + * Get all packs in most-recently-used order. + */ +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -232,8 +237,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -struct list_head *get_packed_git_mru(struct repository *r); - /* * Give a rough count of objects in the repository. This sacrifices accuracy * for speed. From cc1cc31e2a46e33941840bbb2026fff2d0532b2b Mon Sep 17 00:00:00 2001 From: Julia Evans Date: Tue, 23 Sep 2025 18:10:48 +0000 Subject: [PATCH 17/24] doc: git-push: create PUSH RULES section Right now the rules for when a `git push` is allowed are buried at the bottom of the description of ``. Put them in their own section so that we can reference them from `--force` and give some context for why they exist. Having the "PUSH RULES" section also lets us be a little bit more specific with the rule in `--force`: we can just focus on the rule for pushing for a branch (which is likely the one that's most relevant) and leave the details about what happens when you push to a tag or a ref that isn't a branch to the later section. Signed-off-by: Julia Evans Signed-off-by: Junio C Hamano --- Documentation/git-push.adoc | 94 ++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/Documentation/git-push.adoc b/Documentation/git-push.adoc index 5f5408e2c01d26..cf506ab8b491ae 100644 --- a/Documentation/git-push.adoc +++ b/Documentation/git-push.adoc @@ -91,48 +91,6 @@ is ambiguous. configuration (see linkgit:git-config[1]) suggest what refs/ namespace you may have wanted to push to. --- -+ -The object referenced by is used to update the reference -on the remote side. Whether this is allowed depends on where in -`refs/*` the reference lives as described in detail below, in -those sections "update" means any modifications except deletes, which -as noted after the next few sections are treated differently. -+ -The `refs/heads/*` namespace will only accept commit objects, and -updates only if they can be fast-forwarded. -+ -The `refs/tags/*` namespace will accept any kind of object (as -commits, trees and blobs can be tagged), and any updates to them will -be rejected. -+ -It's possible to push any type of object to any namespace outside of -`refs/{tags,heads}/*`. In the case of tags and commits, these will be -treated as if they were the commits inside `refs/heads/*` for the -purposes of whether the update is allowed. -+ -I.e. a fast-forward of commits and tags outside `refs/{tags,heads}/*` -is allowed, even in cases where what's being fast-forwarded is not a -commit, but a tag object which happens to point to a new commit which -is a fast-forward of the commit the last tag (or commit) it's -replacing. Replacing a tag with an entirely different tag is also -allowed, if it points to the same commit, as well as pushing a peeled -tag, i.e. pushing the commit that existing tag object points to, or a -new tag object which an existing commit points to. -+ -Tree and blob objects outside of `refs/{tags,heads}/*` will be treated -the same way as if they were inside `refs/tags/*`, any update of them -will be rejected. -+ -All of the rules described above about what's not allowed as an update -can be overridden by adding an the optional leading `+` to a refspec -(or using `--force` command line option). The only exception to this -is that no amount of forcing will make the `refs/heads/*` namespace -accept a non-commit object. Hooks and configuration can also override -or amend these rules, see e.g. `receive.denyNonFastForwards` in -linkgit:git-config[1] and `pre-receive` and `update` in -linkgit:githooks[5]. -+ Pushing an empty allows you to delete the ref from the remote repository. Deletions are always accepted without a leading `+` in the refspec (or `--force`), except when forbidden by configuration @@ -145,6 +103,7 @@ the local side, the remote side is updated if a branch of the same name already exists on the remote side. + `tag ` means the same as `refs/tags/:refs/tags/`. +Not all updates are allowed: see PUSH RULES below for the details. --all:: --branches:: @@ -335,14 +294,12 @@ allowing a forced update. -f:: --force:: - Usually, the command refuses to update a remote ref that is - not an ancestor of the local ref used to overwrite it. - Also, when `--force-with-lease` option is used, the command refuses - to update a remote ref whose current value does not match - what is expected. + Usually, `git push` will refuse to update a branch that is not an + ancestor of the commit being pushed. + -This flag disables these checks, and can cause the remote repository -to lose commits; use it with care. +This flag disables that check, the other safety checks in PUSH RULES +below, and the checks in --force-with-lease. It can cause the remote +repository to lose commits; use it with care. + Note that `--force` applies to all the refs that are pushed, hence using it with `push.default` set to `matching` or with multiple push @@ -514,6 +471,45 @@ reason:: refs, no explanation is needed. For a failed ref, the reason for failure is described. +PUSH RULES +---------- + +As a safety feature, the `git push` command only allows certain kinds of +updates to prevent you from accidentally losing data on the remote. + +Because branches and tags are intended to be used differently, the +safety rules for pushing to a branch are different from the rules +for pushing to a tag. In the following rules "update" means any +modifications except deletions and creations. Deletions and creations +are always allowed, except when forbidden by configuration or hooks. + +1. If the push destination is a **branch** (`refs/heads/*`): only + fast-forward updates are allowed, which means the destination must be + an ancestor of the source commit. The source must be a commit. +2. If the push destination is a **tag** (`refs/tags/*`): all updates will + be rejected. The source can be any object. +3. If the push destination is not a branch or tag: + * If the source is a tree or blob object, any updates will be rejected + * If the source is a tag or commit object, any fast-forward update + is allowed, even in cases where what's being fast-forwarded is not a + commit, but a tag object which happens to point to a new commit which + is a fast-forward of the commit the last tag (or commit) it's + replacing. Replacing a tag with an entirely different tag is also + allowed, if it points to the same commit, as well as pushing a peeled + tag, i.e. pushing the commit that existing tag object points to, or a + new tag object which an existing commit points to. + +You can override these rules by passing `--force` or by adding the +optional leading `+` to a refspec. The only exceptions are that no +amount of forcing will make a branch accept a non-commit object, +and forcing won't make the remote repository accept a push that it's +configured to deny. + +Hooks and configuration can also override or amend these rules, +see e.g. `receive.denyNonFastForwards` and `receive.denyDeletes` +in linkgit:git-config[1] and `pre-receive` and `update` in +linkgit:githooks[5]. + NOTE ABOUT FAST-FORWARDS ------------------------ From 657586a5a6ddfa1d6c732b8b0f4670d198a4be02 Mon Sep 17 00:00:00 2001 From: Julia Evans Date: Tue, 23 Sep 2025 18:10:49 +0000 Subject: [PATCH 18/24] doc: git-push: rewrite refspec specification From user feedback, there was a request for examples, as well as a comment that one person found "If git push [] without any argument is set to update some ref at the destination with with remote..push configuration variable..." impossible to understand. To make the section easier to navigate, create a list of every possible refspec form, with examples for each form as well as 2 forms which were previously missing (patterns and negative refspecs). Made a few changes to use more familiar language, but ultimately refspecs are a pretty advanced feature so I've mostly left the terminology alone. Signed-off-by: Julia Evans Signed-off-by: Junio C Hamano --- Documentation/git-push.adoc | 105 ++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/Documentation/git-push.adoc b/Documentation/git-push.adoc index cf506ab8b491ae..cc5cadcdfc0d02 100644 --- a/Documentation/git-push.adoc +++ b/Documentation/git-push.adoc @@ -55,54 +55,65 @@ OPTIONS[[OPTIONS]] ...:: Specify what destination ref to update with what source object. - The format of a parameter is an optional plus - `+`, followed by the source object , followed - by a colon `:`, followed by the destination ref . -+ -The is often the name of the branch you would want to push, but -it can be any arbitrary "SHA-1 expression", such as `master~4` or -`HEAD` (see linkgit:gitrevisions[7]). -+ -The tells which ref on the remote side is updated with this -push. Arbitrary expressions cannot be used here, an actual ref must -be named. -If `git push []` without any `` argument is set to -update some ref at the destination with `` with -`remote..push` configuration variable, `:` part can -be omitted--such a push will update a ref that `` normally updates -without any `` on the command line. Otherwise, missing -`:` means to update the same ref as the ``. -+ -If doesn't start with `refs/` (e.g. `refs/heads/master`) we will -try to infer where in `refs/*` on the destination it -belongs based on the type of being pushed and whether -is ambiguous. + --- -* If unambiguously refers to a ref on the remote, - then push to that ref. - -* If resolves to a ref starting with refs/heads/ or refs/tags/, - then prepend that to . - -* Other ambiguity resolutions might be added in the future, but for - now any other cases will error out with an error indicating what we - tried, and depending on the `advice.pushUnqualifiedRefname` - configuration (see linkgit:git-config[1]) suggest what refs/ - namespace you may have wanted to push to. - -Pushing an empty allows you to delete the ref from the -remote repository. Deletions are always accepted without a leading `+` -in the refspec (or `--force`), except when forbidden by configuration -or hooks. See `receive.denyDeletes` in linkgit:git-config[1] and -`pre-receive` and `update` in linkgit:githooks[5]. -+ -The special refspec `:` (or `+:` to allow non-fast-forward updates) -directs Git to push "matching" branches: for every branch that exists on -the local side, the remote side is updated if a branch of the same name -already exists on the remote side. -+ -`tag ` means the same as `refs/tags/:refs/tags/`. +The format for a refspec is [+][:], for example `main`, +`main:other`, or `HEAD^:refs/heads/main`. ++ +The `` is often the name of the local branch to push, but it can be +any arbitrary "SHA-1 expression" (see linkgit:gitrevisions[7]). ++ +The `` determines what ref to update on the remote side. It must be the +name of a branch, tag, or other ref, not an arbitrary expression. ++ +The `+` is optional and does the same thing as `--force`. ++ +You can write a refspec using the fully expanded form (for +example `refs/heads/main:refs/heads/main`) which specifies the exact source +and destination, or with a shorter form (for example `main` or +`main:other`). Here are the rules for how refspecs are expanded, +as well as various other special refspec forms: ++ + * `` without a `:` means to update the same ref as the + ``, unless the `remote..push` configuration specifies a + different . For example, if `main` is a branch, then the refspec + `main` expands to `main:refs/heads/main`. + * If `` unambiguously refers to a ref on the remote, + then expand it to that ref. For example, if `v1.0` is a tag on the + remote, then `HEAD:v1.0` expands to `HEAD:refs/tags/v1.0`. + * If `` resolves to a ref starting with `refs/heads/` or `refs/tags/`, + then prepend that to . For example, if `main` is a branch, then + `main:other` expands to `main:refs/heads/other` + * The special refspec `:` (or `+:` to allow non-fast-forward updates) + directs Git to push "matching" branches: for every branch that exists on + the local side, the remote side is updated if a branch of the same name + already exists on the remote side. + * may contain a * to indicate a simple pattern match. + This works like a glob that matches any ref matching the pattern. + There must be only one * in both the `` and ``. + It will map refs to the destination by replacing the * with the + contents matched from the source. For example, `refs/heads/*:refs/heads/*` + will push all branches. + * A refspec starting with `^` is a negative refspec. + This specifies refs to exclude. A ref will be considered to + match if it matches at least one positive refspec, and does not + match any negative refspec. Negative refspecs can be pattern refspecs. + They must only contain a ``. + Fully spelled out hex object names are also not supported. + For example, `git push origin 'refs/heads/*' '^refs/heads/dev-*'` + will push all branches except for those starting with `dev-` + * If `` is empty, it deletes the `` ref from the remote + repository. For example, `git push origin :dev` will + delete the `dev` branch. + * `tag ` expands to `refs/tags/:refs/tags/`. + This is technically a special syntax for `git push` and not a refspec, + since in `git push origin tag v1.0` the arguments `tag` and `v1.0` + are separate. + * If the refspec can't be expanded unambiguously, error out + with an error indicating what was tried, and depending + on the `advice.pushUnqualifiedRefname` configuration (see + linkgit:git-config[1]) suggest what refs/ namespace you may have + wanted to push to. + Not all updates are allowed: see PUSH RULES below for the details. --all:: From e4efcd70604f2a294e020ec2e1bc38f01892cd19 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 26 Sep 2025 10:32:50 +0000 Subject: [PATCH 19/24] http: offer to cast `size_t` to `curl_off_t` safely This commit moves the `xcurl_off_t()` function, which validates that a given value fits within the `curl_off_t` data type and then casts it, to a more central place so that it can be used outside of `remote-curl.c`, too. At the same time, this function is renamed to conform better with the naming convention of the helper functions that safely cast from one data type to another which has been well established in `git-compat-util.h`. With this move, `gettext.h` must be `#include`d in `http.h` to allow the error message to remain translatable. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- http.h | 10 ++++++++++ remote-curl.c | 14 +++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/http.h b/http.h index 36202139f451ff..0a36dbd2949b56 100644 --- a/http.h +++ b/http.h @@ -8,6 +8,7 @@ struct packed_git; #include #include +#include "gettext.h" #include "strbuf.h" #include "remote.h" @@ -95,6 +96,15 @@ static inline int missing__target(int code, int result) #define missing_target(a) missing__target((a)->http_code, (a)->curl_result) +static inline curl_off_t cast_size_t_to_curl_off_t(size_t a) +{ + uintmax_t size = a; + if (size > maximum_signed_value_of_type(curl_off_t)) + die(_("number too large to represent as curl_off_t " + "on this platform: %"PRIuMAX), (uintmax_t)a); + return (curl_off_t)a; +} + /* * Normalize curl results to handle CURL_FAILONERROR (or lack thereof). Failing * http codes have their "result" converted to CURLE_HTTP_RETURNED_ERROR, and diff --git a/remote-curl.c b/remote-curl.c index b8bc3a80cf4142..1bede365b9db7d 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -894,14 +894,6 @@ static int probe_rpc(struct rpc_state *rpc, struct slot_results *results) return err; } -static curl_off_t xcurl_off_t(size_t len) -{ - uintmax_t size = len; - if (size > maximum_signed_value_of_type(curl_off_t)) - die(_("cannot handle pushes this big")); - return (curl_off_t)size; -} - /* * If flush_received is true, do not attempt to read any more; just use what's * in rpc->buf. @@ -999,7 +991,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece * and we just need to send it. */ curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body); - curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, xcurl_off_t(gzip_size)); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, cast_size_t_to_curl_off_t(gzip_size)); } else if (use_gzip && 1024 < rpc->len) { /* The client backend isn't giving us compressed data so @@ -1030,7 +1022,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece headers = curl_slist_append(headers, "Content-Encoding: gzip"); curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body); - curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, xcurl_off_t(gzip_size)); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, cast_size_t_to_curl_off_t(gzip_size)); if (options.verbosity > 1) { fprintf(stderr, "POST %s (gzip %lu to %lu bytes)\n", @@ -1043,7 +1035,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece * more normal Content-Length approach. */ curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, rpc->buf); - curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, xcurl_off_t(rpc->len)); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE_LARGE, cast_size_t_to_curl_off_t(rpc->len)); if (options.verbosity > 1) { fprintf(stderr, "POST %s (%lu bytes)\n", rpc->service_name, (unsigned long)rpc->len); From 580cf0f2f6d38221fc4c5f17155c311915301a5c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 26 Sep 2025 10:32:51 +0000 Subject: [PATCH 20/24] imap-send: be more careful when casting to `curl_off_t` When casting a `size_t` to `curl_off_t`, there is a currently uncommon chance that the value can be cut off (`curl_off_t` is expected to be a signed 64-bit data type). Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- imap-send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap-send.c b/imap-send.c index 2e812f5a6e9e10..29dc86ff272ccc 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1475,7 +1475,7 @@ static int curl_append_msgs_to_imap(struct imap_server_conf *server, lf_to_crlf(&msgbuf.buf); curl_easy_setopt(curl, CURLOPT_INFILESIZE_LARGE, - (curl_off_t)(msgbuf.buf.len-prev_len)); + cast_size_t_to_curl_off_t(msgbuf.buf.len-prev_len)); res = curl_easy_perform(curl); From ecc5749578cea0d1c6072c806649bf1076c7b4c3 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 26 Sep 2025 10:32:52 +0000 Subject: [PATCH 21/24] http-push: avoid new compile error With the recent update in Git for Windows/ARM64 as of https://github.com/git-for-windows/git-sdk-arm64/commit/21b288e16358 cURL was updated from v8.15.0 to v8.16.0, and the LLVM-based builds (but strangely not the GCC-based builds) continuously greet me thusly: http-push.c:211:2: error: call to '_curl_easy_setopt_err_long' declared with 'warning' attribute: curl_easy_setopt expects a long argument [-Werror,-Wattribute-warning] CC builtin/apply.o 211 | curl_easy_setopt(curl, CURLOPT_INFILESIZE, buffer->buf.len); | ^ C:/a/git-sdk-arm64/git-sdk-arm64/minimal-sdk/clangarm64/include/curl/typecheck-gcc.h:50:15: note: expanded from macro 'curl_easy_setopt' 50 | _curl_easy_setopt_err_long(); \ | ^ 1 error generated. make: *** [Makefile:2877: http-push.o] Error 1 The easiest way to shut up that compile error (which is legitimate, seeing as the `CURLOPT_INFILESIZE` options expects a `long` parameter, but `buffer->buf.len` refers to the `size_t` attribute of a `strbuf`) would be to simply cast the parameter to a `long`. However, there is a much better solution: To use the `CURLOPT_INFILESIZE_LARGE` option instead, which was added in cURL v7.11.0 (see https://curl.se/ch/7.11.0.html) and which Git _already_ uses in `curl_append_msgs_to_imap()`. This fix was the motivation for renaming `xcurl_off_t()` to `cast_size_t_to_curl_off_t()` and making it available more broadly, which is the reason why it is used here, too. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- http-push.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/http-push.c b/http-push.c index f5a92529a8406b..c06b94d5dbd862 100644 --- a/http-push.c +++ b/http-push.c @@ -208,7 +208,8 @@ static void curl_setup_http(CURL *curl, const char *url, curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L); curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_INFILE, buffer); - curl_easy_setopt(curl, CURLOPT_INFILESIZE, buffer->buf.len); + curl_easy_setopt(curl, CURLOPT_INFILESIZE_LARGE, + cast_size_t_to_curl_off_t(buffer->buf.len)); curl_easy_setopt(curl, CURLOPT_READFUNCTION, fread_buffer); curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, seek_buffer); curl_easy_setopt(curl, CURLOPT_SEEKDATA, buffer); From 3721541d35a0112e143c74e11a23cd329600f40e Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Sat, 27 Sep 2025 09:50:45 -0500 Subject: [PATCH 22/24] clang-format: exclude control macros from SpaceBeforeParens The formatter currently suggests adding a space between a control macro and parentheses. In the Git project, this is not typically expected. Set `SpaceBeforeParens` to `ControlStatementsExceptControlMacros` accordingly. Helped-by: Karthik Nayak Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- .clang-format | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index dcfd0aad60c31e..86b4fe33e5cd98 100644 --- a/.clang-format +++ b/.clang-format @@ -149,7 +149,7 @@ SpaceBeforeCaseColon: false # f(); # } # } -SpaceBeforeParens: ControlStatements +SpaceBeforeParens: ControlStatementsExceptControlMacros # Don't insert spaces inside empty '()' SpaceInEmptyParentheses: false From 15eff6b7d733b46107eecabb958d28fb74fb7fda Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 7 Oct 2025 10:11:44 -0700 Subject: [PATCH 23/24] mailmap: change primary address for Jonathan Tan Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index afa21abbaaffcd..7b3198171fad1e 100644 --- a/.mailmap +++ b/.mailmap @@ -126,6 +126,7 @@ Jon Loeliger Jon Seymour Jonathan Nieder Jonathan del Strother +Jonathan Tan Josh Triplett Josh Triplett Julian Phillips From 79cf913ea9321f774da29b2330b5781d5ff420ef Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 7 Oct 2025 12:24:01 -0700 Subject: [PATCH 24/24] The fifteenth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index b106483f427f20..ee7ea2e43379e7 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -280,6 +280,15 @@ including security updates, are included in this release. updated. (merge 54a60e5b38 kh/you-still-use-whatchanged-fix later to maint). + * Clang-format update to let our control macros formatted the way we + had them traditionally, e.g., "for_each_string_list_item()" without + space before the parentheses. + (merge 3721541d35 jt/clang-format-foreach-wo-space-before-parenthesis later to maint). + + * A few places where an size_t value was cast to curl_off_t without + checking has been updated to use the existing helper function. + (merge ecc5749578 js/curl-off-t-fixes later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 823d537fa7 kh/doc-git-log-markup-fix later to maint). (merge cf7efa4f33 rj/t6137-cygwin-fix later to maint). @@ -305,3 +314,4 @@ including security updates, are included in this release. (merge ac7096723b jc/doc-includeif-hasconfig-remote-url-fix later to maint). (merge fafc9b08b8 ag/doc-sendmail-gmail-example-update later to maint). (merge a66fc22bf9 rs/get-oid-with-flags-cleanup later to maint). + (merge e1d062e8ba ps/odb-clean-stale-wrappers later to maint).