From 80e7f52299619575cb48522a4ca40427e2231dc6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:27 +0200 Subject: [PATCH 01/22] object-file: fix -Wsign-compare warnings There are some trivial -Wsign-compare warnings in "object-file.c". Fix them and drop the preprocessor define that disables those warnings. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/object-file.c b/object-file.c index 3d674d1093e449..987cf289420f6d 100644 --- a/object-file.c +++ b/object-file.c @@ -8,7 +8,6 @@ */ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "bulk-checkin.h" @@ -44,8 +43,7 @@ static int get_conv_flags(unsigned flags) static void fill_loose_path(struct strbuf *buf, const struct object_id *oid) { - int i; - for (i = 0; i < the_hash_algo->rawsz; i++) { + for (size_t i = 0; i < the_hash_algo->rawsz; i++) { static char hex[] = "0123456789abcdef"; unsigned int val = oid->hash[i]; strbuf_addch(buf, hex[val >> 4]); @@ -327,9 +325,8 @@ static void *unpack_loose_rest(git_zstream *stream, void *buffer, unsigned long size, const struct object_id *oid) { - int bytes = strlen(buffer) + 1; + size_t bytes = strlen(buffer) + 1, n; unsigned char *buf = xmallocz(size); - unsigned long n; int status = Z_OK; n = stream->total_out - bytes; @@ -596,7 +593,7 @@ static int check_collision(const char *source, const char *dest) goto out; } - if (sz_a < sizeof(buf_source)) + if ((size_t) sz_a < sizeof(buf_source)) break; } @@ -1240,7 +1237,7 @@ static int index_core(struct index_state *istate, if (read_result < 0) ret = error_errno(_("read error while indexing %s"), path ? path : ""); - else if (read_result != size) + else if ((size_t) read_result != size) ret = error(_("short read while indexing %s"), path ? path : ""); else @@ -1268,7 +1265,7 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_stream_convert_blob(istate, oid, fd, path, flags); else if (!S_ISREG(st->st_mode)) ret = index_pipe(istate, oid, fd, type, path, flags); - else if (st->st_size <= repo_settings_get_big_file_threshold(the_repository) || + else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(the_repository)) || type != OBJ_BLOB || (path && would_convert_to_git(istate, path))) ret = index_core(istate, oid, fd, xsize_t(st->st_size), @@ -1472,7 +1469,7 @@ struct oidtree *odb_loose_cache(struct odb_source *source, uint32_t *bitmap; if (subdir_nr < 0 || - subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) BUG("subdir_nr out of range"); bitmap = &source->loose_objects_subdir_seen[word_index]; From 18323f5b485f5c484622e18c6bfd167fdf5ca101 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:28 +0200 Subject: [PATCH 02/22] object-file: stop using `the_hash_algo` There are a couple of users of the `the_hash_algo` macro, which implicitly depends on `the_repository`. Adapt these callers to not do so anymore, either by deriving it from already-available context or by using `the_repository->hash_algo`. The latter variant doesn't yet help to remove the global dependency, but such users will be adapted in the following commits to not use `the_repository` anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 40 ++++++++++++++++++++++++---------------- object-file.h | 1 + 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/object-file.c b/object-file.c index 987cf289420f6d..bc395febc9d345 100644 --- a/object-file.c +++ b/object-file.c @@ -25,6 +25,7 @@ #include "pack.h" #include "packfile.h" #include "path.h" +#include "read-cache-ll.h" #include "setup.h" #include "streaming.h" @@ -41,9 +42,11 @@ static int get_conv_flags(unsigned flags) return 0; } -static void fill_loose_path(struct strbuf *buf, const struct object_id *oid) +static void fill_loose_path(struct strbuf *buf, + const struct object_id *oid, + const struct git_hash_algo *algop) { - for (size_t i = 0; i < the_hash_algo->rawsz; i++) { + for (size_t i = 0; i < algop->rawsz; i++) { static char hex[] = "0123456789abcdef"; unsigned int val = oid->hash[i]; strbuf_addch(buf, hex[val >> 4]); @@ -60,7 +63,7 @@ const char *odb_loose_path(struct odb_source *source, strbuf_reset(buf); strbuf_addstr(buf, source->path); strbuf_addch(buf, '/'); - fill_loose_path(buf, oid); + fill_loose_path(buf, oid, source->odb->repo->hash_algo); return buf->buf; } @@ -1165,7 +1168,7 @@ static int index_mem(struct index_state *istate, opts.strict = 1; opts.error_func = hash_format_check_report; - if (fsck_buffer(null_oid(the_hash_algo), type, buf, size, &opts)) + if (fsck_buffer(null_oid(istate->repo->hash_algo), type, buf, size, &opts)) die(_("refusing to create malformed object")); fsck_finish(&opts); } @@ -1173,7 +1176,7 @@ static int index_mem(struct index_state *istate, if (write_object) ret = write_object_file(buf, size, type, oid); else - hash_object_file(the_hash_algo, buf, size, type, oid); + hash_object_file(istate->repo->hash_algo, buf, size, type, oid); strbuf_release(&nbuf); return ret; @@ -1199,7 +1202,7 @@ static int index_stream_convert_blob(struct index_state *istate, ret = write_object_file(sbuf.buf, sbuf.len, OBJ_BLOB, oid); else - hash_object_file(the_hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB, + hash_object_file(istate->repo->hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB, oid); strbuf_release(&sbuf); return ret; @@ -1297,7 +1300,7 @@ int index_path(struct index_state *istate, struct object_id *oid, if (strbuf_readlink(&sb, path, st->st_size)) return error_errno("readlink(\"%s\")", path); if (!(flags & INDEX_WRITE_OBJECT)) - hash_object_file(the_hash_algo, sb.buf, sb.len, + hash_object_file(istate->repo->hash_algo, sb.buf, sb.len, OBJ_BLOB, oid); else if (write_object_file(sb.buf, sb.len, OBJ_BLOB, oid)) rc = error(_("%s: failed to insert into database"), path); @@ -1328,6 +1331,7 @@ int read_pack_header(int fd, struct pack_header *header) int for_each_file_in_obj_subdir(unsigned int subdir_nr, struct strbuf *path, + const struct git_hash_algo *algop, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, @@ -1364,12 +1368,12 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, namelen = strlen(de->d_name); strbuf_setlen(path, baselen); strbuf_add(path, de->d_name, namelen); - if (namelen == the_hash_algo->hexsz - 2 && + if (namelen == algop->hexsz - 2 && !hex_to_bytes(oid.hash + 1, de->d_name, - the_hash_algo->rawsz - 1)) { - oid_set_algo(&oid, the_hash_algo); - memset(oid.hash + the_hash_algo->rawsz, 0, - GIT_MAX_RAWSZ - the_hash_algo->rawsz); + algop->rawsz - 1)) { + oid_set_algo(&oid, algop); + memset(oid.hash + algop->rawsz, 0, + GIT_MAX_RAWSZ - algop->rawsz); if (obj_cb) { r = obj_cb(&oid, path->buf, data); if (r) @@ -1405,7 +1409,8 @@ int for_each_loose_file_in_objdir_buf(struct strbuf *path, int i; for (i = 0; i < 256; i++) { - r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb, + r = for_each_file_in_obj_subdir(i, path, the_repository->hash_algo, + obj_cb, cruft_cb, subdir_cb, data); if (r) break; @@ -1481,6 +1486,7 @@ struct oidtree *odb_loose_cache(struct odb_source *source, } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, + source->odb->repo->hash_algo, append_loose_object, NULL, NULL, source->loose_objects_cache); @@ -1501,7 +1507,8 @@ static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, const char *path, - const struct object_id *expected_oid) + const struct object_id *expected_oid, + const struct git_hash_algo *algop) { struct git_hash_ctx c; struct object_id real_oid; @@ -1509,7 +1516,7 @@ static int check_stream_oid(git_zstream *stream, unsigned long total_read; int status = Z_OK; - the_hash_algo->init_fn(&c); + algop->init_fn(&c); git_hash_update(&c, hdr, stream->total_out); /* @@ -1594,7 +1601,8 @@ int read_loose_object(const char *path, if (*oi->typep == OBJ_BLOB && *size > repo_settings_get_big_file_threshold(the_repository)) { - if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) + if (check_stream_oid(&stream, hdr, *size, path, expected_oid, + the_repository->hash_algo) < 0) goto out_inflate; } else { *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid); diff --git a/object-file.h b/object-file.h index 67b4ffc480833b..222ff2871a186c 100644 --- a/object-file.h +++ b/object-file.h @@ -89,6 +89,7 @@ typedef int each_loose_subdir_fn(unsigned int nr, void *data); int for_each_file_in_obj_subdir(unsigned int subdir_nr, struct strbuf *path, + const struct git_hash_algo *algo, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, From 931e8c9f5226d855922ab5b0ba04bafe4aa7382f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:29 +0200 Subject: [PATCH 03/22] object-file: get rid of `the_repository` in `has_loose_object()` We implicitly depend on `the_repository` in `has_loose_object()`. Refactor the function to accept an `odb_source` as input that should be checked for such a loose object. This refactoring changes semantics of the function to not check the whole object database for such a loose object anymore, but instead we now only check that single source. Existing callers thus need to loop through all sources manually now. While this change may seem illogical at first, whether or not an object exists in a specific format should be answered by the source using that format. As such, we can eventually convert this into a generic function `odb_source_has_object()` that simply checks whether a given object exists in an object source. And as we will know about the format that any given source uses it allows us to derive whether the object exists in a given format. This change also makes `has_loose_object_nonlocal()` obsolete. The only caller of this function is adapted so that it skips the primary object source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 24 ++++++++++++++++++++---- object-file.c | 16 +++++++--------- object-file.h | 7 +++---- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5781dec9808273..a44f0ce1c785ed 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1703,8 +1703,16 @@ static int want_object_in_pack_mtime(const struct object_id *oid, struct list_head *pos; struct multi_pack_index *m; - if (!exclude && local && has_loose_object_nonlocal(oid)) - return 0; + if (!exclude && local) { + /* + * Note that we start iterating at `sources->next` so that we + * skip the local object source. + */ + struct odb_source *source = the_repository->objects->sources->next; + for (; source; source = source->next) + if (has_loose_object(source, oid)) + return 0; + } /* * If we already know the pack object lives in, start checks from that @@ -3928,7 +3936,14 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type } else { if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime)) return; - if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) { + if (!pack && type == OBJ_BLOB) { + struct odb_source *source = the_repository->objects->sources; + int found = 0; + + for (; !found && source; source = source->next) + if (has_loose_object(source, oid)) + found = 1; + /* * If a traversed tree has a missing blob then we want * to avoid adding that missing object to our pack. @@ -3942,7 +3957,8 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type * limited to "ensure non-tip blobs which don't exist in * packs do exist via loose objects". Confused? */ - return; + if (!found) + return; } entry = create_object_entry(oid, type, pack_name_hash_fn(name), diff --git a/object-file.c b/object-file.c index bc395febc9d345..7aecaa3d2a034d 100644 --- a/object-file.c +++ b/object-file.c @@ -121,14 +121,10 @@ static int check_and_freshen(const struct object_id *oid, int freshen) check_and_freshen_nonlocal(oid, freshen); } -int has_loose_object_nonlocal(const struct object_id *oid) +int has_loose_object(struct odb_source *source, + const struct object_id *oid) { - return check_and_freshen_nonlocal(oid, 0); -} - -int has_loose_object(const struct object_id *oid) -{ - return check_and_freshen(oid, 0); + return check_and_freshen_odb(source, oid, 0); } int format_object_header(char *str, size_t size, enum object_type type, @@ -1103,8 +1099,10 @@ int force_object_loose(const struct object_id *oid, time_t mtime) int hdrlen; int ret; - if (has_loose_object(oid)) - return 0; + for (struct odb_source *source = repo->objects->sources; source; source = source->next) + if (has_loose_object(source, oid)) + return 0; + oi.typep = &type; oi.sizep = &len; oi.contentp = &buf; diff --git a/object-file.h b/object-file.h index 222ff2871a186c..5b63a05ab515ed 100644 --- a/object-file.h +++ b/object-file.h @@ -45,13 +45,12 @@ const char *odb_loose_path(struct odb_source *source, const struct object_id *oid); /* - * Return true iff an alternate object database has a loose object + * Return true iff an object database source has a loose object * with the specified name. This function does not respect replace * references. */ -int has_loose_object_nonlocal(const struct object_id *); - -int has_loose_object(const struct object_id *); +int has_loose_object(struct odb_source *source, + const struct object_id *oid); void *map_loose_object(struct repository *r, const struct object_id *oid, unsigned long *size); From f6638bf55d0c611b86873cc1c88b0dac3bc653e2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:30 +0200 Subject: [PATCH 04/22] object-file: inline `check_and_freshen()` functions The `check_and_freshen()` functions are only used by a single caller now. Inline them into `freshen_loose_object()`. While at it, rename `check_and_freshen_odb()` to `_source()` to reflect that it works on a single object source instead of on the whole database. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/object-file.c b/object-file.c index 7aecaa3d2a034d..9e17e608f78960 100644 --- a/object-file.c +++ b/object-file.c @@ -89,42 +89,19 @@ int check_and_freshen_file(const char *fn, int freshen) return 1; } -static int check_and_freshen_odb(struct odb_source *source, - const struct object_id *oid, - int freshen) +static int check_and_freshen_source(struct odb_source *source, + const struct object_id *oid, + int freshen) { static struct strbuf path = STRBUF_INIT; odb_loose_path(source, &path, oid); return check_and_freshen_file(path.buf, freshen); } -static int check_and_freshen_local(const struct object_id *oid, int freshen) -{ - return check_and_freshen_odb(the_repository->objects->sources, oid, freshen); -} - -static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) -{ - struct odb_source *source; - - odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources->next; source; source = source->next) { - if (check_and_freshen_odb(source, oid, freshen)) - return 1; - } - return 0; -} - -static int check_and_freshen(const struct object_id *oid, int freshen) -{ - return check_and_freshen_local(oid, freshen) || - check_and_freshen_nonlocal(oid, freshen); -} - int has_loose_object(struct odb_source *source, const struct object_id *oid) { - return check_and_freshen_odb(source, oid, 0); + return check_and_freshen_source(source, oid, 0); } int format_object_header(char *str, size_t size, enum object_type type, @@ -918,7 +895,15 @@ static int write_loose_object(const struct object_id *oid, char *hdr, static int freshen_loose_object(const struct object_id *oid) { - return check_and_freshen(oid, 1); + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + if (check_and_freshen_source(source, oid, 1)) + return 1; + } + + return 0; } static int freshen_packed_object(const struct object_id *oid) From 1031f57695ed02da06d1af5ac945bb243262fa09 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:31 +0200 Subject: [PATCH 05/22] object-file: get rid of `the_repository` when freshening objects We implicitly depend on `the_repository` when freshening either loose or packed objects. Refactor these functions to instead accept an object database as input so that we can get rid of the global dependency. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/object-file.c b/object-file.c index 9e17e608f78960..3453989b7e321e 100644 --- a/object-file.c +++ b/object-file.c @@ -893,23 +893,21 @@ static int write_loose_object(const struct object_id *oid, char *hdr, FOF_SKIP_COLLISION_CHECK); } -static int freshen_loose_object(const struct object_id *oid) +static int freshen_loose_object(struct object_database *odb, + const struct object_id *oid) { - struct odb_source *source; - - odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) { + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) if (check_and_freshen_source(source, oid, 1)) return 1; - } - return 0; } -static int freshen_packed_object(const struct object_id *oid) +static int freshen_packed_object(struct object_database *odb, + const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(the_repository, oid, &e)) + if (!find_pack_entry(odb->repo, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -999,7 +997,8 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(fd, tmp_file.buf); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) { + if (freshen_packed_object(the_repository->objects, oid) || + freshen_loose_object(the_repository->objects, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } @@ -1062,7 +1061,8 @@ int write_object_file_flags(const void *buf, unsigned long len, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(repo->objects, oid) || + freshen_loose_object(repo->objects, oid)) return 0; if (write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags)) return -1; From 1efe0aeaa2e10766abe9bf05e2e1a014251ba4e2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:32 +0200 Subject: [PATCH 06/22] object-file: get rid of `the_repository` in `loose_object_info()` While `loose_object_info()` already accepts a repository as parameter we still have one callsite in there where we use `the_repository` to figure out the hash algorithm. Use the passed-in repository instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 3453989b7e321e..800eeae85af1ed 100644 --- a/object-file.c +++ b/object-file.c @@ -421,7 +421,7 @@ int loose_object_info(struct repository *r, enum object_type type_scratch; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, the_repository->hash_algo); + oidclr(oi->delta_base_oid, r->hash_algo); /* * If we don't care about type or size, then we don't From cbb388f3e53660c88220c40a8dddb976672ae03d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:33 +0200 Subject: [PATCH 07/22] object-file: get rid of `the_repository` in `finalize_object_file()` We implicitly depend on `the_repository` when moving an object file into place in `finalize_object_file()`. Get rid of this global dependency by passing in a repository. Note that one might be pressed to inject an object database instead of a repository. But the function doesn't really care about the ODB at all. All it does is to move a file into place while checking whether there is any collision. As such, the functionality it provides is independent of the object database and only needs the repository as parameter so that it can adjust permissions of the file we are about to finalize. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 4 ++-- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 2 +- bulk-checkin.c | 2 +- http.c | 4 ++-- midx-write.c | 2 +- object-file.c | 14 ++++++++------ object-file.h | 6 ++++-- pack-write.c | 16 +++++++++------- pack.h | 3 ++- tmp-objdir.c | 2 +- 11 files changed, 32 insertions(+), 25 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index b1389c59211a48..89f57898b15da6 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -821,11 +821,11 @@ static char *keep_pack(const char *curr_index_name) die_errno("failed to write keep file"); odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack"); - if (finalize_object_file(pack_data->pack_name, name.buf)) + if (finalize_object_file(pack_data->repo, pack_data->pack_name, name.buf)) die("cannot store pack file"); odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx"); - if (finalize_object_file(curr_index_name, name.buf)) + if (finalize_object_file(pack_data->repo, curr_index_name, name.buf)) die("cannot store index file"); free((void *)curr_index_name); return strbuf_detach(&name, NULL); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 19c67a8534414a..dabeb825a6c3fd 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1598,7 +1598,7 @@ static void rename_tmp_packfile(const char **final_name, if (!*final_name || strcmp(*final_name, curr_name)) { if (!*final_name) *final_name = odb_pack_name(the_repository, name, hash, ext); - if (finalize_object_file(curr_name, *final_name)) + if (finalize_object_file(the_repository, curr_name, *final_name)) die(_("unable to rename temporary '*.%s' file to '%s'"), ext, *final_name); } else if (make_read_only_if_same) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index a44f0ce1c785ed..e8e85d8278bba6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1449,7 +1449,7 @@ static void write_pack_file(void) strbuf_setlen(&tmpname, tmpname_len); } - rename_tmp_packfile_idx(&tmpname, &idx_tmp_name); + rename_tmp_packfile_idx(the_repository, &tmpname, &idx_tmp_name); free(idx_tmp_name); strbuf_release(&tmpname); diff --git a/bulk-checkin.c b/bulk-checkin.c index 16df86c0ba89db..b2809ab0398136 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -46,7 +46,7 @@ static void finish_tmp_packfile(struct strbuf *basename, stage_tmp_packfiles(the_repository, basename, pack_tmp_name, written_list, nr_written, NULL, pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(basename, &idx_tmp_name); + rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); free(idx_tmp_name); } diff --git a/http.c b/http.c index 9b62f627dc576e..7cc797116bbc17 100644 --- a/http.c +++ b/http.c @@ -2331,7 +2331,7 @@ int http_get_file(const char *url, const char *filename, ret = http_request_reauth(url, result, HTTP_REQUEST_FILE, options); fclose(result); - if (ret == HTTP_OK && finalize_object_file(tmpfile.buf, filename)) + if (ret == HTTP_OK && finalize_object_file(the_repository, tmpfile.buf, filename)) ret = HTTP_ERROR; cleanup: strbuf_release(&tmpfile); @@ -2815,7 +2815,7 @@ int finish_http_object_request(struct http_object_request *freq) return -1; } odb_loose_path(the_repository->objects->sources, &filename, &freq->oid); - freq->rename = finalize_object_file(freq->tmpfile.buf, filename.buf); + freq->rename = finalize_object_file(the_repository, freq->tmpfile.buf, filename.buf); strbuf_release(&filename); return freq->rename; diff --git a/midx-write.c b/midx-write.c index f2cfb85476ed78..effacade2d3c00 100644 --- a/midx-write.c +++ b/midx-write.c @@ -667,7 +667,7 @@ static void write_midx_reverse_index(struct write_midx_context *ctx, tmp_file = write_rev_file_order(ctx->repo, NULL, ctx->pack_order, ctx->entries_nr, midx_hash, WRITE_REV); - if (finalize_object_file(tmp_file, buf.buf)) + if (finalize_object_file(ctx->repo, tmp_file, buf.buf)) die(_("cannot store reverse index file")); strbuf_release(&buf); diff --git a/object-file.c b/object-file.c index 800eeae85af1ed..6a7049a9e9819c 100644 --- a/object-file.c +++ b/object-file.c @@ -584,12 +584,14 @@ static int check_collision(const char *source, const char *dest) /* * Move the just written object into its final resting place. */ -int finalize_object_file(const char *tmpfile, const char *filename) +int finalize_object_file(struct repository *repo, + const char *tmpfile, const char *filename) { - return finalize_object_file_flags(tmpfile, filename, 0); + return finalize_object_file_flags(repo, tmpfile, filename, 0); } -int finalize_object_file_flags(const char *tmpfile, const char *filename, +int finalize_object_file_flags(struct repository *repo, + const char *tmpfile, const char *filename, enum finalize_object_file_flags flags) { unsigned retries = 0; @@ -649,7 +651,7 @@ int finalize_object_file_flags(const char *tmpfile, const char *filename, } out: - if (adjust_shared_perm(the_repository, filename)) + if (adjust_shared_perm(repo, filename)) return error(_("unable to set permission to '%s'"), filename); return 0; } @@ -889,7 +891,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr, warning_errno(_("failed utime() on %s"), tmp_file.buf); } - return finalize_object_file_flags(tmp_file.buf, filename.buf, + return finalize_object_file_flags(the_repository, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); } @@ -1020,7 +1022,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, strbuf_release(&dir); } - err = finalize_object_file_flags(tmp_file.buf, filename.buf, + err = finalize_object_file_flags(the_repository, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) err = repo_add_loose_object_map(the_repository, oid, &compat_oid); diff --git a/object-file.h b/object-file.h index 5b63a05ab515ed..370139e0762f1b 100644 --- a/object-file.h +++ b/object-file.h @@ -218,8 +218,10 @@ enum finalize_object_file_flags { FOF_SKIP_COLLISION_CHECK = 1, }; -int finalize_object_file(const char *tmpfile, const char *filename); -int finalize_object_file_flags(const char *tmpfile, const char *filename, +int finalize_object_file(struct repository *repo, + const char *tmpfile, const char *filename); +int finalize_object_file_flags(struct repository *repo, + const char *tmpfile, const char *filename, enum finalize_object_file_flags flags); void hash_object_file(const struct git_hash_algo *algo, const void *buf, diff --git a/pack-write.c b/pack-write.c index eccdc798e368c3..83eaf88541eefb 100644 --- a/pack-write.c +++ b/pack-write.c @@ -538,22 +538,24 @@ struct hashfile *create_tmp_packfile(struct repository *repo, return hashfd(repo->hash_algo, fd, *pack_tmp_name); } -static void rename_tmp_packfile(struct strbuf *name_prefix, const char *source, +static void rename_tmp_packfile(struct repository *repo, + struct strbuf *name_prefix, const char *source, const char *ext) { size_t name_prefix_len = name_prefix->len; strbuf_addstr(name_prefix, ext); - if (finalize_object_file(source, name_prefix->buf)) + if (finalize_object_file(repo, source, name_prefix->buf)) die("unable to rename temporary file to '%s'", name_prefix->buf); strbuf_setlen(name_prefix, name_prefix_len); } -void rename_tmp_packfile_idx(struct strbuf *name_buffer, +void rename_tmp_packfile_idx(struct repository *repo, + struct strbuf *name_buffer, char **idx_tmp_name) { - rename_tmp_packfile(name_buffer, *idx_tmp_name, "idx"); + rename_tmp_packfile(repo, name_buffer, *idx_tmp_name, "idx"); } void stage_tmp_packfiles(struct repository *repo, @@ -586,11 +588,11 @@ void stage_tmp_packfiles(struct repository *repo, hash); } - rename_tmp_packfile(name_buffer, pack_tmp_name, "pack"); + rename_tmp_packfile(repo, name_buffer, pack_tmp_name, "pack"); if (rev_tmp_name) - rename_tmp_packfile(name_buffer, rev_tmp_name, "rev"); + rename_tmp_packfile(repo, name_buffer, rev_tmp_name, "rev"); if (mtimes_tmp_name) - rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes"); + rename_tmp_packfile(repo, name_buffer, mtimes_tmp_name, "mtimes"); free(rev_tmp_name); free(mtimes_tmp_name); diff --git a/pack.h b/pack.h index 5d4393eaffef04..ec76472e49b052 100644 --- a/pack.h +++ b/pack.h @@ -145,7 +145,8 @@ void stage_tmp_packfiles(struct repository *repo, struct pack_idx_option *pack_idx_opts, unsigned char hash[], char **idx_tmp_name); -void rename_tmp_packfile_idx(struct strbuf *basename, +void rename_tmp_packfile_idx(struct repository *repo, + struct strbuf *basename, char **idx_tmp_name); #endif diff --git a/tmp-objdir.c b/tmp-objdir.c index ae01eae9c415d3..9f5a1788cd7c48 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -227,7 +227,7 @@ static int migrate_one(struct tmp_objdir *t, return -1; return migrate_paths(t, src, dst, flags); } - return finalize_object_file_flags(src->buf, dst->buf, flags); + return finalize_object_file_flags(t->repo, src->buf, dst->buf, flags); } static int is_loose_object_shard(const char *name) From 0f9b18935745fcdebcad613a6e3e52db5b29b1d4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:34 +0200 Subject: [PATCH 08/22] loose: write loose objects map via their source When a repository is configured to have a compatibility hash algorithm we keep track of object ID mappings for loose objects via the loose object map. This map simply maps an object ID of the actual hash to the object ID of the compatibility hash. This loose object map is an inherent property of the loose files backend and thus of one specific object source. Refactor the interfaces to reflect this by requiring a `struct odb_source` as input instead of a repository. This prepares for subsequent commits where we will refactor writing of loose objects to work on a `struct odb_source`, as well. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 16 +++++++++------- loose.h | 4 +++- object-file.c | 6 +++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/loose.c b/loose.c index 519f5db79352a1..e8ea6e7e24ba31 100644 --- a/loose.c +++ b/loose.c @@ -166,7 +166,8 @@ int repo_write_loose_object_map(struct repository *repo) return -1; } -static int write_one_object(struct repository *repo, const struct object_id *oid, +static int write_one_object(struct odb_source *source, + const struct object_id *oid, const struct object_id *compat_oid) { struct lock_file lock; @@ -174,7 +175,7 @@ static int write_one_object(struct repository *repo, const struct object_id *oid struct stat st; struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; - repo_common_path_replace(repo, &path, "objects/loose-object-idx"); + strbuf_addf(&path, "%s/loose-object-idx", source->path); hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1); fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666); @@ -190,7 +191,7 @@ static int write_one_object(struct repository *repo, const struct object_id *oid goto errout; if (close(fd)) goto errout; - adjust_shared_perm(repo, path.buf); + adjust_shared_perm(source->odb->repo, path.buf); rollback_lock_file(&lock); strbuf_release(&buf); strbuf_release(&path); @@ -204,17 +205,18 @@ static int write_one_object(struct repository *repo, const struct object_id *oid return -1; } -int repo_add_loose_object_map(struct repository *repo, const struct object_id *oid, +int repo_add_loose_object_map(struct odb_source *source, + const struct object_id *oid, const struct object_id *compat_oid) { int inserted = 0; - if (!should_use_loose_object_map(repo)) + if (!should_use_loose_object_map(source->odb->repo)) return 0; - inserted = insert_loose_map(repo->objects->sources, oid, compat_oid); + inserted = insert_loose_map(source, oid, compat_oid); if (inserted) - return write_one_object(repo, oid, compat_oid); + return write_one_object(source, oid, compat_oid); return 0; } diff --git a/loose.h b/loose.h index 28512306e5fec7..6af1702973c058 100644 --- a/loose.h +++ b/loose.h @@ -4,6 +4,7 @@ #include "khash.h" struct repository; +struct odb_source; struct loose_object_map { kh_oid_map_t *to_compat; @@ -16,7 +17,8 @@ int repo_loose_object_map_oid(struct repository *repo, const struct object_id *src, const struct git_hash_algo *dest_algo, struct object_id *dest); -int repo_add_loose_object_map(struct repository *repo, const struct object_id *oid, +int repo_add_loose_object_map(struct odb_source *source, + const struct object_id *oid, const struct object_id *compat_oid); int repo_read_loose_object_map(struct repository *repo); int repo_write_loose_object_map(struct repository *repo); diff --git a/object-file.c b/object-file.c index 6a7049a9e9819c..a9248760a26822 100644 --- a/object-file.c +++ b/object-file.c @@ -1025,7 +1025,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, err = finalize_object_file_flags(the_repository, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) - err = repo_add_loose_object_map(the_repository, oid, &compat_oid); + err = repo_add_loose_object_map(the_repository->objects->sources, oid, &compat_oid); cleanup: strbuf_release(&tmp_file); strbuf_release(&filename); @@ -1069,7 +1069,7 @@ int write_object_file_flags(const void *buf, unsigned long len, if (write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags)) return -1; if (compat) - return repo_add_loose_object_map(repo, oid, &compat_oid); + return repo_add_loose_object_map(repo->objects->sources, oid, &compat_oid); return 0; } @@ -1103,7 +1103,7 @@ int force_object_loose(const struct object_id *oid, time_t mtime) hdrlen = format_object_header(hdr, sizeof(hdr), type, len); ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) - ret = repo_add_loose_object_map(the_repository, oid, &compat_oid); + ret = repo_add_loose_object_map(the_repository->objects->sources, oid, &compat_oid); free(buf); return ret; From ab1c6e1d12e869cbca3ea7f8a4e767e45fd14c49 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:35 +0200 Subject: [PATCH 09/22] odb: introduce `odb_write_object()` We do not have a backend-agnostic way to write objects into an object database. While there is `write_object_file()`, this function is rather specific to the loose object format. Introduce `odb_write_object()` to plug this gap. For now, this function is a simple wrapper around `write_object_file()` and doesn't even use the passed-in object database yet. This will change in subsequent commits, where `write_object_file()` is converted so that it works on top of an `odb_source`. `odb_write_object()` will then become responsible for deciding which source an object shall be written to. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- apply.c | 11 +++++++---- builtin/checkout.c | 2 +- builtin/merge-file.c | 3 ++- builtin/mktag.c | 2 +- builtin/mktree.c | 2 +- builtin/notes.c | 3 ++- builtin/receive-pack.c | 4 ++-- builtin/replace.c | 3 ++- builtin/tag.c | 4 ++-- builtin/unpack-objects.c | 12 ++++++------ cache-tree.c | 5 ++--- commit.c | 4 ++-- match-trees.c | 2 +- merge-ort.c | 7 ++++--- notes-cache.c | 3 ++- notes.c | 12 ++++++++---- object-file.c | 18 +++++++++--------- object-file.h | 26 +++----------------------- odb.c | 10 ++++++++++ odb.h | 38 ++++++++++++++++++++++++++++++++++++++ read-cache.c | 2 +- 21 files changed, 106 insertions(+), 67 deletions(-) diff --git a/apply.c b/apply.c index a6836692d0ce08..ffb9d9f76d62d7 100644 --- a/apply.c +++ b/apply.c @@ -3621,7 +3621,7 @@ static int try_threeway(struct apply_state *state, /* Preimage the patch was prepared for */ if (patch->is_new) - write_object_file("", 0, OBJ_BLOB, &pre_oid); + odb_write_object(the_repository->objects, "", 0, OBJ_BLOB, &pre_oid); else if (repo_get_oid(the_repository, patch->old_oid_prefix, &pre_oid) || read_blob_object(&buf, &pre_oid, patch->old_mode)) return error(_("repository lacks the necessary blob to perform 3-way merge.")); @@ -3637,7 +3637,8 @@ static int try_threeway(struct apply_state *state, return -1; } /* post_oid is theirs */ - write_object_file(tmp_image.buf.buf, tmp_image.buf.len, OBJ_BLOB, &post_oid); + odb_write_object(the_repository->objects, tmp_image.buf.buf, + tmp_image.buf.len, OBJ_BLOB, &post_oid); image_clear(&tmp_image); /* our_oid is ours */ @@ -3650,7 +3651,8 @@ static int try_threeway(struct apply_state *state, return error(_("cannot read the current contents of '%s'"), patch->old_name); } - write_object_file(tmp_image.buf.buf, tmp_image.buf.len, OBJ_BLOB, &our_oid); + odb_write_object(the_repository->objects, tmp_image.buf.buf, + tmp_image.buf.len, OBJ_BLOB, &our_oid); image_clear(&tmp_image); /* in-core three-way merge between post and our using pre as base */ @@ -4360,7 +4362,8 @@ static int add_index_file(struct apply_state *state, } fill_stat_cache_info(state->repo->index, ce, &st); } - if (write_object_file(buf, size, OBJ_BLOB, &ce->oid) < 0) { + if (odb_write_object(the_repository->objects, buf, size, + OBJ_BLOB, &ce->oid) < 0) { discard_cache_entry(ce); return error(_("unable to create backing store " "for newly created file %s"), path); diff --git a/builtin/checkout.c b/builtin/checkout.c index 0a90b86a72913f..f95eb64ffb3d83 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -320,7 +320,7 @@ static int checkout_merged(int pos, const struct checkout *state, * (it also writes the merge result to the object database even * when it may contain conflicts). */ - if (write_object_file(result_buf.ptr, result_buf.size, OBJ_BLOB, &oid)) + if (odb_write_object(the_repository->objects, result_buf.ptr, result_buf.size, OBJ_BLOB, &oid)) die(_("Unable to add merge result for '%s'"), path); free(result_buf.ptr); ce = make_transient_cache_entry(mode, &oid, path, 2, ce_mem_pool); diff --git a/builtin/merge-file.c b/builtin/merge-file.c index 9464f2756299ec..b8b25a14e6dc5e 100644 --- a/builtin/merge-file.c +++ b/builtin/merge-file.c @@ -155,7 +155,8 @@ int cmd_merge_file(int argc, if (object_id && !to_stdout) { struct object_id oid; if (result.size) { - if (write_object_file(result.ptr, result.size, OBJ_BLOB, &oid) < 0) + if (odb_write_object(the_repository->objects, result.ptr, + result.size, OBJ_BLOB, &oid) < 0) ret = error(_("Could not write object file")); } else { oidcpy(&oid, the_hash_algo->empty_blob); diff --git a/builtin/mktag.c b/builtin/mktag.c index 27e649736cf48c..12552bbb217d1f 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -106,7 +106,7 @@ int cmd_mktag(int argc, if (verify_object_in_tag(&tagged_oid, &tagged_type) < 0) die(_("tag on stdin did not refer to a valid object")); - if (write_object_file(buf.buf, buf.len, OBJ_TAG, &result) < 0) + if (odb_write_object(the_repository->objects, buf.buf, buf.len, OBJ_TAG, &result) < 0) die(_("unable to write tag file")); strbuf_release(&buf); diff --git a/builtin/mktree.c b/builtin/mktree.c index 81df7f6099fa4e..12772303f504f3 100644 --- a/builtin/mktree.c +++ b/builtin/mktree.c @@ -63,7 +63,7 @@ static void write_tree(struct object_id *oid) strbuf_add(&buf, ent->oid.hash, the_hash_algo->rawsz); } - write_object_file(buf.buf, buf.len, OBJ_TREE, oid); + odb_write_object(the_repository->objects, buf.buf, buf.len, OBJ_TREE, oid); strbuf_release(&buf); } diff --git a/builtin/notes.c b/builtin/notes.c index a9529b1696ae14..a3580b4aa3dfa1 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -229,7 +229,8 @@ static void prepare_note_data(const struct object_id *object, struct note_data * static void write_note_data(struct note_data *d, struct object_id *oid) { - if (write_object_file(d->buf.buf, d->buf.len, OBJ_BLOB, oid)) { + if (odb_write_object(the_repository->objects, d->buf.buf, + d->buf.len, OBJ_BLOB, oid)) { int status = die_message(_("unable to write note object")); if (d->edit_path) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index dd1d1446e756ba..bd9baf81e566ad 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -760,8 +760,8 @@ static void prepare_push_cert_sha1(struct child_process *proc) int bogs /* beginning_of_gpg_sig */; already_done = 1; - if (write_object_file(push_cert.buf, push_cert.len, OBJ_BLOB, - &push_cert_oid)) + if (odb_write_object(the_repository->objects, push_cert.buf, + push_cert.len, OBJ_BLOB, &push_cert_oid)) oidclr(&push_cert_oid, the_repository->hash_algo); memset(&sigcheck, '\0', sizeof(sigcheck)); diff --git a/builtin/replace.c b/builtin/replace.c index 5ff2ab723cbd58..7c46d05ec15b3b 100644 --- a/builtin/replace.c +++ b/builtin/replace.c @@ -488,7 +488,8 @@ static int create_graft(int argc, const char **argv, int force, int gentle) return -1; } - if (write_object_file(buf.buf, buf.len, OBJ_COMMIT, &new_oid)) { + if (odb_write_object(the_repository->objects, buf.buf, + buf.len, OBJ_COMMIT, &new_oid)) { strbuf_release(&buf); return error(_("could not write replacement commit for: '%s'"), old_ref); diff --git a/builtin/tag.c b/builtin/tag.c index 46cbf892e34224..8fbe9e7be04a68 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -271,8 +271,8 @@ static int build_tag_object(struct strbuf *buf, int sign, struct object_id *resu struct object_id *compat_oid = NULL, compat_oid_buf; if (sign && do_sign(buf, &compat_oid, &compat_oid_buf) < 0) return error(_("unable to sign the tag")); - if (write_object_file_flags(buf->buf, buf->len, OBJ_TAG, result, - compat_oid, 0) < 0) + if (odb_write_object_ext(the_repository->objects, buf->buf, + buf->len, OBJ_TAG, result, compat_oid, 0) < 0) return error(_("unable to write tag file")); return 0; } diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index a69d59eb50c902..1a4fbef36f898a 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -204,8 +204,8 @@ static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) { struct object_id oid; - if (write_object_file(obj_buf->buffer, obj_buf->size, - obj->type, &oid) < 0) + if (odb_write_object(the_repository->objects, obj_buf->buffer, obj_buf->size, + obj->type, &oid) < 0) die("failed to write object %s", oid_to_hex(&obj->oid)); obj->flags |= FLAG_WRITTEN; } @@ -272,16 +272,16 @@ static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { if (!strict) { - if (write_object_file(buf, size, type, - &obj_list[nr].oid) < 0) + if (odb_write_object(the_repository->objects, buf, size, type, + &obj_list[nr].oid) < 0) die("failed to write object"); added_object(nr, type, buf, size); free(buf); obj_list[nr].obj = NULL; } else if (type == OBJ_BLOB) { struct blob *blob; - if (write_object_file(buf, size, type, - &obj_list[nr].oid) < 0) + if (odb_write_object(the_repository->objects, buf, size, type, + &obj_list[nr].oid) < 0) die("failed to write object"); added_object(nr, type, buf, size); free(buf); diff --git a/cache-tree.c b/cache-tree.c index a4bc14ad15c8a0..66ef2becbe01a4 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -456,9 +456,8 @@ static int update_one(struct cache_tree *it, } else if (dryrun) { hash_object_file(the_hash_algo, buffer.buf, buffer.len, OBJ_TREE, &it->oid); - } else if (write_object_file_flags(buffer.buf, buffer.len, OBJ_TREE, - &it->oid, NULL, flags & WRITE_TREE_SILENT - ? WRITE_OBJECT_FILE_SILENT : 0)) { + } else if (odb_write_object_ext(the_repository->objects, buffer.buf, buffer.len, OBJ_TREE, + &it->oid, NULL, flags & WRITE_TREE_SILENT ? WRITE_OBJECT_SILENT : 0)) { strbuf_release(&buffer); return -1; } diff --git a/commit.c b/commit.c index 15115125c3612c..bcc9aea55f65e9 100644 --- a/commit.c +++ b/commit.c @@ -1797,8 +1797,8 @@ int commit_tree_extended(const char *msg, size_t msg_len, compat_oid = &compat_oid_buf; } - result = write_object_file_flags(buffer.buf, buffer.len, OBJ_COMMIT, - ret, compat_oid, 0); + result = odb_write_object_ext(the_repository->objects, buffer.buf, buffer.len, + OBJ_COMMIT, ret, compat_oid, 0); out: free(parent_buf); strbuf_release(&buffer); diff --git a/match-trees.c b/match-trees.c index 5a8a5c39b04ab9..4216933d06b163 100644 --- a/match-trees.c +++ b/match-trees.c @@ -246,7 +246,7 @@ static int splice_tree(struct repository *r, rewrite_with = oid2; } hashcpy(rewrite_here, rewrite_with->hash, r->hash_algo); - status = write_object_file(buf, sz, OBJ_TREE, result); + status = odb_write_object(r->objects, buf, sz, OBJ_TREE, result); free(buf); return status; } diff --git a/merge-ort.c b/merge-ort.c index 473ff61e36e9f7..535ef3efc6fb33 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2216,8 +2216,8 @@ static int handle_content_merge(struct merge_options *opt, } if (!ret && record_object && - write_object_file(result_buf.ptr, result_buf.size, - OBJ_BLOB, &result->oid)) { + odb_write_object(the_repository->objects, result_buf.ptr, result_buf.size, + OBJ_BLOB, &result->oid)) { path_msg(opt, ERROR_OBJECT_WRITE_FAILED, 0, pathnames[0], pathnames[1], pathnames[2], NULL, _("error: unable to add %s to database"), path); @@ -3772,7 +3772,8 @@ static int write_tree(struct object_id *result_oid, } /* Write this object file out, and record in result_oid */ - if (write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid)) + if (odb_write_object(the_repository->objects, buf.buf, + buf.len, OBJ_TREE, result_oid)) ret = -1; strbuf_release(&buf); return ret; diff --git a/notes-cache.c b/notes-cache.c index dd56feed6e8cfe..bf5bb1f6c13a13 100644 --- a/notes-cache.c +++ b/notes-cache.c @@ -98,7 +98,8 @@ int notes_cache_put(struct notes_cache *c, struct object_id *key_oid, { struct object_id value_oid; - if (write_object_file(data, size, OBJ_BLOB, &value_oid) < 0) + if (odb_write_object(the_repository->objects, data, + size, OBJ_BLOB, &value_oid) < 0) return -1; return add_note(&c->tree, key_oid, &value_oid, NULL); } diff --git a/notes.c b/notes.c index 97b995f3f2da6f..7596c0df9a12b4 100644 --- a/notes.c +++ b/notes.c @@ -682,7 +682,8 @@ static int tree_write_stack_finish_subtree(struct tree_write_stack *tws) ret = tree_write_stack_finish_subtree(n); if (ret) return ret; - ret = write_object_file(n->buf.buf, n->buf.len, OBJ_TREE, &s); + ret = odb_write_object(the_repository->objects, n->buf.buf, + n->buf.len, OBJ_TREE, &s); if (ret) return ret; strbuf_release(&n->buf); @@ -847,7 +848,8 @@ int combine_notes_concatenate(struct object_id *cur_oid, free(new_msg); /* create a new blob object from buf */ - ret = write_object_file(buf, buf_len, OBJ_BLOB, cur_oid); + ret = odb_write_object(the_repository->objects, buf, + buf_len, OBJ_BLOB, cur_oid); free(buf); return ret; } @@ -927,7 +929,8 @@ int combine_notes_cat_sort_uniq(struct object_id *cur_oid, string_list_join_lines_helper, &buf)) goto out; - ret = write_object_file(buf.buf, buf.len, OBJ_BLOB, cur_oid); + ret = odb_write_object(the_repository->objects, buf.buf, + buf.len, OBJ_BLOB, cur_oid); out: strbuf_release(&buf); @@ -1215,7 +1218,8 @@ int write_notes_tree(struct notes_tree *t, struct object_id *result) ret = for_each_note(t, flags, write_each_note, &cb_data) || write_each_non_note_until(NULL, &cb_data) || tree_write_stack_finish_subtree(&root) || - write_object_file(root.buf.buf, root.buf.len, OBJ_TREE, result); + odb_write_object(the_repository->objects, root.buf.buf, + root.buf.len, OBJ_TREE, result); strbuf_release(&root.buf); return ret; } diff --git a/object-file.c b/object-file.c index a9248760a26822..84ece01337ec1f 100644 --- a/object-file.c +++ b/object-file.c @@ -755,7 +755,7 @@ static int start_loose_object_common(struct strbuf *tmp_file, fd = create_tmpfile(tmp_file, filename); if (fd < 0) { - if (flags & WRITE_OBJECT_FILE_SILENT) + if (flags & WRITE_OBJECT_SILENT) return -1; else if (errno == EACCES) return error(_("insufficient permission for adding " @@ -887,7 +887,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr, utb.actime = mtime; utb.modtime = mtime; if (utime(tmp_file.buf, &utb) < 0 && - !(flags & WRITE_OBJECT_FILE_SILENT)) + !(flags & WRITE_OBJECT_SILENT)) warning_errno(_("failed utime() on %s"), tmp_file.buf); } @@ -1032,9 +1032,9 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, return err; } -int write_object_file_flags(const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) +int write_object_file(const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { struct repository *repo = the_repository; const struct git_hash_algo *algo = repo->hash_algo; @@ -1159,7 +1159,7 @@ static int index_mem(struct index_state *istate, } if (write_object) - ret = write_object_file(buf, size, type, oid); + ret = odb_write_object(istate->repo->objects, buf, size, type, oid); else hash_object_file(istate->repo->hash_algo, buf, size, type, oid); @@ -1184,8 +1184,8 @@ static int index_stream_convert_blob(struct index_state *istate, get_conv_flags(flags)); if (write_object) - ret = write_object_file(sbuf.buf, sbuf.len, OBJ_BLOB, - oid); + ret = odb_write_object(istate->repo->objects, sbuf.buf, sbuf.len, OBJ_BLOB, + oid); else hash_object_file(istate->repo->hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB, oid); @@ -1287,7 +1287,7 @@ int index_path(struct index_state *istate, struct object_id *oid, if (!(flags & INDEX_WRITE_OBJECT)) hash_object_file(istate->repo->hash_algo, sb.buf, sb.len, OBJ_BLOB, oid); - else if (write_object_file(sb.buf, sb.len, OBJ_BLOB, oid)) + else if (odb_write_object(the_repository->objects, sb.buf, sb.len, OBJ_BLOB, oid)) rc = error(_("%s: failed to insert into database"), path); strbuf_release(&sb); break; diff --git a/object-file.h b/object-file.h index 370139e0762f1b..8ee24b7d8f307d 100644 --- a/object-file.h +++ b/object-file.h @@ -157,29 +157,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -enum { - /* - * By default, `write_object_file()` does not actually write - * anything into the object store, but only computes the object ID. - * This flag changes that so that the object will be written as a loose - * object and persisted. - */ - WRITE_OBJECT_FILE_PERSIST = (1 << 0), - - /* - * Do not print an error in case something gose wrong. - */ - WRITE_OBJECT_FILE_SILENT = (1 << 1), -}; - -int write_object_file_flags(const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags); -static inline int write_object_file(const void *buf, unsigned long len, - enum object_type type, struct object_id *oid) -{ - return write_object_file_flags(buf, len, type, oid, NULL, 0); -} +int write_object_file(const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags); struct input_stream { const void *(*read)(struct input_stream *, unsigned long *len); diff --git a/odb.c b/odb.c index 1f48a0448e398a..519df2fa497863 100644 --- a/odb.c +++ b/odb.c @@ -980,6 +980,16 @@ void odb_assert_oid_type(struct object_database *odb, type_name(expect)); } +int odb_write_object_ext(struct object_database *odb UNUSED, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags) +{ + return write_object_file(buf, len, type, oid, compat_oid, flags); +} + struct object_database *odb_new(struct repository *repo) { struct object_database *o = xmalloc(sizeof(*o)); diff --git a/odb.h b/odb.h index e922f256802a0c..03422068888ad8 100644 --- a/odb.h +++ b/odb.h @@ -437,6 +437,44 @@ enum for_each_object_flags { FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +enum { + /* + * By default, `odb_write_object()` does not actually write anything + * into the object store, but only computes the object ID. This flag + * changes that so that the object will be written as a loose object + * and persisted. + */ + WRITE_OBJECT_PERSIST = (1 << 0), + + /* + * Do not print an error in case something goes wrong. + */ + WRITE_OBJECT_SILENT = (1 << 1), +}; + +/* + * Write an object into the object database. The object is being written into + * the local alternate of the repository. If provided, the converted object ID + * as well as the compatibility object ID are written to the respective + * pointers. + * + * Returns 0 on success, a negative error code otherwise. + */ +int odb_write_object_ext(struct object_database *odb, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags); + +static inline int odb_write_object(struct object_database *odb, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid) +{ + return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); +} + /* Compatibility wrappers, to be removed once Git 2.51 has been released. */ #include "repository.h" diff --git a/read-cache.c b/read-cache.c index 531d87e790529f..be17ca7f586897 100644 --- a/read-cache.c +++ b/read-cache.c @@ -690,7 +690,7 @@ static struct cache_entry *create_alias_ce(struct index_state *istate, void set_object_name_for_intent_to_add_entry(struct cache_entry *ce) { struct object_id oid; - if (write_object_file("", 0, OBJ_BLOB, &oid)) + if (odb_write_object(the_repository->objects, "", 0, OBJ_BLOB, &oid)) die(_("cannot create an empty blob in the object database")); oidcpy(&ce->oid, &oid); } From e7e952f5c27bbca3d98bbcea6d20cd5b63d7d8e5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:36 +0200 Subject: [PATCH 10/22] object-file: get rid of `the_repository` when writing objects The logic that writes loose objects still relies on `the_repository` to decide where exactly the object shall be written to. Refactor it so that the logic instead operates on a `struct odb_source` so that we can get rid of this global dependency. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/unpack-objects.c | 3 +- object-file.c | 96 +++++++++++++++++++++------------------- object-file.h | 6 ++- odb.c | 4 +- 4 files changed, 58 insertions(+), 51 deletions(-) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 1a4fbef36f898a..1d405d156e4a9c 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -403,7 +403,8 @@ static void stream_blob(unsigned long size, unsigned nr) data.zstream = &zstream; git_inflate_init(&zstream); - if (stream_loose_object(&in_stream, size, &info->oid)) + if (stream_loose_object(the_repository->objects->sources, + &in_stream, size, &info->oid)) die(_("failed to write object in stream")); if (data.status != Z_STREAM_END) diff --git a/object-file.c b/object-file.c index 84ece01337ec1f..fc061c37bb5a47 100644 --- a/object-file.c +++ b/object-file.c @@ -667,9 +667,10 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, } /* Finalize a file on disk, and close it. */ -static void close_loose_object(int fd, const char *filename) +static void close_loose_object(struct odb_source *source, + int fd, const char *filename) { - if (the_repository->objects->sources->will_destroy) + if (source->will_destroy) goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) @@ -701,7 +702,8 @@ static inline int directory_size(const char *filename) * We want to avoid cross-directory filename renames, because those * can have problems on various filesystems (FAT, NFS, Coda). */ -static int create_tmpfile(struct strbuf *tmp, const char *filename) +static int create_tmpfile(struct repository *repo, + struct strbuf *tmp, const char *filename) { int fd, dirlen = directory_size(filename); @@ -720,7 +722,7 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) strbuf_add(tmp, filename, dirlen - 1); if (mkdir(tmp->buf, 0777) && errno != EEXIST) return -1; - if (adjust_shared_perm(the_repository, tmp->buf)) + if (adjust_shared_perm(repo, tmp->buf)) return -1; /* Try again */ @@ -741,26 +743,26 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) * Returns a "fd", which should later be provided to * end_loose_object_common(). */ -static int start_loose_object_common(struct strbuf *tmp_file, +static int start_loose_object_common(struct odb_source *source, + struct strbuf *tmp_file, const char *filename, unsigned flags, git_zstream *stream, unsigned char *buf, size_t buflen, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, char *hdr, int hdrlen) { - struct repository *repo = the_repository; - const struct git_hash_algo *algo = repo->hash_algo; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *algo = source->odb->repo->hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; int fd; - fd = create_tmpfile(tmp_file, filename); + fd = create_tmpfile(source->odb->repo, tmp_file, filename); if (fd < 0) { if (flags & WRITE_OBJECT_SILENT) return -1; else if (errno == EACCES) return error(_("insufficient permission for adding " "an object to repository database %s"), - repo_get_object_directory(the_repository)); + source->path); else return error_errno( _("unable to create temporary file")); @@ -790,14 +792,14 @@ static int start_loose_object_common(struct strbuf *tmp_file, * Common steps for the inner git_deflate() loop for writing loose * objects. Returns what git_deflate() returns. */ -static int write_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx *compat_c, +static int write_loose_object_common(struct odb_source *source, + struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, const int flush, unsigned char *in0, const int fd, unsigned char *compressed, const size_t compressed_len) { - struct repository *repo = the_repository; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; int ret; ret = git_deflate(stream, flush ? Z_FINISH : 0); @@ -818,12 +820,12 @@ static int write_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx * - End the compression of zlib stream. * - Get the calculated oid to "oid". */ -static int end_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx *compat_c, +static int end_loose_object_common(struct odb_source *source, + struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, struct object_id *oid, struct object_id *compat_oid) { - struct repository *repo = the_repository; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; int ret; ret = git_deflate_end_gently(stream); @@ -836,7 +838,8 @@ static int end_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx * return Z_OK; } -static int write_loose_object(const struct object_id *oid, char *hdr, +static int write_loose_object(struct odb_source *source, + const struct object_id *oid, char *hdr, int hdrlen, const void *buf, unsigned long len, time_t mtime, unsigned flags) { @@ -851,9 +854,9 @@ static int write_loose_object(const struct object_id *oid, char *hdr, if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) prepare_loose_object_bulk_checkin(); - odb_loose_path(the_repository->objects->sources, &filename, oid); + odb_loose_path(source, &filename, oid); - fd = start_loose_object_common(&tmp_file, filename.buf, flags, + fd = start_loose_object_common(source, &tmp_file, filename.buf, flags, &stream, compressed, sizeof(compressed), &c, NULL, hdr, hdrlen); if (fd < 0) @@ -865,14 +868,14 @@ static int write_loose_object(const struct object_id *oid, char *hdr, do { unsigned char *in0 = stream.next_in; - ret = write_loose_object_common(&c, NULL, &stream, 1, in0, fd, + ret = write_loose_object_common(source, &c, NULL, &stream, 1, in0, fd, compressed, sizeof(compressed)); } while (ret == Z_OK); if (ret != Z_STREAM_END) die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), ret); - ret = end_loose_object_common(&c, NULL, &stream, ¶no_oid, NULL); + ret = end_loose_object_common(source, &c, NULL, &stream, ¶no_oid, NULL); if (ret != Z_OK) die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), ret); @@ -880,7 +883,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr, die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); - close_loose_object(fd, tmp_file.buf); + close_loose_object(source, fd, tmp_file.buf); if (mtime) { struct utimbuf utb; @@ -891,7 +894,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr, warning_errno(_("failed utime() on %s"), tmp_file.buf); } - return finalize_object_file_flags(the_repository, tmp_file.buf, filename.buf, + return finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); } @@ -921,10 +924,11 @@ static int freshen_packed_object(struct object_database *odb, return 1; } -int stream_loose_object(struct input_stream *in_stream, size_t len, +int stream_loose_object(struct odb_source *source, + struct input_stream *in_stream, size_t len, struct object_id *oid) { - const struct git_hash_algo *compat = the_repository->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; int fd, ret, err = 0, flush = 0; unsigned char compressed[4096]; @@ -940,7 +944,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, prepare_loose_object_bulk_checkin(); /* Since oid is not determined, save tmp file to odb path. */ - strbuf_addf(&filename, "%s/", repo_get_object_directory(the_repository)); + strbuf_addf(&filename, "%s/", source->path); hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len); /* @@ -951,7 +955,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, * - Setup zlib stream for compression. * - Start to feed header to zlib stream. */ - fd = start_loose_object_common(&tmp_file, filename.buf, 0, + fd = start_loose_object_common(source, &tmp_file, filename.buf, 0, &stream, compressed, sizeof(compressed), &c, &compat_c, hdr, hdrlen); if (fd < 0) { @@ -971,7 +975,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, if (in_stream->is_finished) flush = 1; } - ret = write_loose_object_common(&c, &compat_c, &stream, flush, in0, fd, + ret = write_loose_object_common(source, &c, &compat_c, &stream, flush, in0, fd, compressed, sizeof(compressed)); /* * Unlike write_loose_object(), we do not have the entire @@ -994,18 +998,18 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, */ if (ret != Z_STREAM_END) die(_("unable to stream deflate new object (%d)"), ret); - ret = end_loose_object_common(&c, &compat_c, &stream, oid, &compat_oid); + ret = end_loose_object_common(source, &c, &compat_c, &stream, oid, &compat_oid); if (ret != Z_OK) die(_("deflateEnd on stream object failed (%d)"), ret); - close_loose_object(fd, tmp_file.buf); + close_loose_object(source, fd, tmp_file.buf); - if (freshen_packed_object(the_repository->objects, oid) || - freshen_loose_object(the_repository->objects, oid)) { + if (freshen_packed_object(source->odb, oid) || + freshen_loose_object(source->odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(the_repository->objects->sources, &filename, oid); + odb_loose_path(source, &filename, oid); /* We finally know the object path, and create the missing dir. */ dirlen = directory_size(filename.buf); @@ -1013,7 +1017,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, struct strbuf dir = STRBUF_INIT; strbuf_add(&dir, filename.buf, dirlen); - if (safe_create_dir_in_gitdir(the_repository, dir.buf) && + if (safe_create_dir_in_gitdir(source->odb->repo, dir.buf) && errno != EEXIST) { err = error_errno(_("unable to create directory %s"), dir.buf); strbuf_release(&dir); @@ -1022,23 +1026,23 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, strbuf_release(&dir); } - err = finalize_object_file_flags(the_repository, tmp_file.buf, filename.buf, + err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) - err = repo_add_loose_object_map(the_repository->objects->sources, oid, &compat_oid); + err = repo_add_loose_object_map(source, oid, &compat_oid); cleanup: strbuf_release(&tmp_file); strbuf_release(&filename); return err; } -int write_object_file(const void *buf, unsigned long len, +int write_object_file(struct odb_source *source, + const void *buf, unsigned long len, enum object_type type, struct object_id *oid, struct object_id *compat_oid_in, unsigned flags) { - struct repository *repo = the_repository; - const struct git_hash_algo *algo = repo->hash_algo; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *algo = source->odb->repo->hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; char hdr[MAX_HEADER_LEN]; int hdrlen = sizeof(hdr); @@ -1051,7 +1055,7 @@ int write_object_file(const void *buf, unsigned long len, hash_object_file(compat, buf, len, type, &compat_oid); else { struct strbuf converted = STRBUF_INIT; - convert_object_file(the_repository, &converted, algo, compat, + convert_object_file(source->odb->repo, &converted, algo, compat, buf, len, type, 0); hash_object_file(compat, converted.buf, converted.len, type, &compat_oid); @@ -1063,13 +1067,13 @@ int write_object_file(const void *buf, unsigned long len, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(repo->objects, oid) || - freshen_loose_object(repo->objects, oid)) + if (freshen_packed_object(source->odb, oid) || + freshen_loose_object(source->odb, oid)) return 0; - if (write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags)) + if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; if (compat) - return repo_add_loose_object_map(repo->objects->sources, oid, &compat_oid); + return repo_add_loose_object_map(source, oid, &compat_oid); return 0; } @@ -1101,7 +1105,7 @@ int force_object_loose(const struct object_id *oid, time_t mtime) oid_to_hex(oid), compat->name); } hdrlen = format_object_header(hdr, sizeof(hdr), type, len); - ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0); + ret = write_loose_object(repo->objects->sources, oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) ret = repo_add_loose_object_map(the_repository->objects->sources, oid, &compat_oid); free(buf); diff --git a/object-file.h b/object-file.h index 8ee24b7d8f307d..622e2b2bb7dbf0 100644 --- a/object-file.h +++ b/object-file.h @@ -157,7 +157,8 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -int write_object_file(const void *buf, unsigned long len, +int write_object_file(struct odb_source *source, + const void *buf, unsigned long len, enum object_type type, struct object_id *oid, struct object_id *compat_oid_in, unsigned flags); @@ -167,7 +168,8 @@ struct input_stream { int is_finished; }; -int stream_loose_object(struct input_stream *in_stream, size_t len, +int stream_loose_object(struct odb_source *source, + struct input_stream *in_stream, size_t len, struct object_id *oid); int force_object_loose(const struct object_id *oid, time_t mtime); diff --git a/odb.c b/odb.c index 519df2fa497863..2a92a018c42940 100644 --- a/odb.c +++ b/odb.c @@ -980,14 +980,14 @@ void odb_assert_oid_type(struct object_database *odb, type_name(expect)); } -int odb_write_object_ext(struct object_database *odb UNUSED, +int odb_write_object_ext(struct object_database *odb, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, struct object_id *compat_oid, unsigned flags) { - return write_object_file(buf, len, type, oid, compat_oid, flags); + return write_object_file(odb->sources, buf, len, type, oid, compat_oid, flags); } struct object_database *odb_new(struct repository *repo) From f2c40e51b235b3814475d3988782ae5dfcae8752 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:37 +0200 Subject: [PATCH 11/22] object-file: inline `for_each_loose_file_in_objdir_buf()` The function `for_each_loose_file_in_objdir_buf()` is declared in our headers, but it is not used anywhere else than in the corresponding code file itself. Drop the declaration and inline the function into its only caller. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 31 ++++++++----------------------- object-file.h | 5 ----- 2 files changed, 8 insertions(+), 28 deletions(-) diff --git a/object-file.c b/object-file.c index fc061c37bb5a47..5a936f171482fe 100644 --- a/object-file.c +++ b/object-file.c @@ -1388,26 +1388,6 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, return r; } -int for_each_loose_file_in_objdir_buf(struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) -{ - int r = 0; - int i; - - for (i = 0; i < 256; i++) { - r = for_each_file_in_obj_subdir(i, path, the_repository->hash_algo, - obj_cb, cruft_cb, - subdir_cb, data); - if (r) - break; - } - - return r; -} - int for_each_loose_file_in_objdir(const char *path, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, @@ -1418,10 +1398,15 @@ int for_each_loose_file_in_objdir(const char *path, int r; strbuf_addstr(&buf, path); - r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb, - subdir_cb, data); - strbuf_release(&buf); + for (int i = 0; i < 256; i++) { + r = for_each_file_in_obj_subdir(i, &buf, the_repository->hash_algo, + obj_cb, cruft_cb, + subdir_cb, data); + if (r) + break; + } + strbuf_release(&buf); return r; } diff --git a/object-file.h b/object-file.h index 622e2b2bb7dbf0..eca323f97362dc 100644 --- a/object-file.h +++ b/object-file.h @@ -98,11 +98,6 @@ int for_each_loose_file_in_objdir(const char *path, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, void *data); -int for_each_loose_file_in_objdir_buf(struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); /* * Iterate over all accessible loose objects without respect to From 83439299f1326c7471f5c7595c96d14b65a71879 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:38 +0200 Subject: [PATCH 12/22] object-file: remove declaration for `for_each_file_in_obj_subdir()` The function `for_each_file_in_obj_subdir()` is declared in our headers, but it is not used anywhere else than in the corresponding code file itself. Drop the declaration and mark the function as file-local. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 14 +++++++------- object-file.h | 7 ------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/object-file.c b/object-file.c index 5a936f171482fe..bd93f17dcfe6f6 100644 --- a/object-file.c +++ b/object-file.c @@ -1318,13 +1318,13 @@ int read_pack_header(int fd, struct pack_header *header) return 0; } -int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - const struct git_hash_algo *algop, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) +static int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + const struct git_hash_algo *algop, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) { size_t origlen, baselen; DIR *dir; diff --git a/object-file.h b/object-file.h index eca323f97362dc..d52b335e85ba79 100644 --- a/object-file.h +++ b/object-file.h @@ -86,13 +86,6 @@ typedef int each_loose_cruft_fn(const char *basename, typedef int each_loose_subdir_fn(unsigned int nr, const char *path, void *data); -int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - const struct git_hash_algo *algo, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data); int for_each_loose_file_in_objdir(const char *path, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, From d81712ce65f7ee59ce88c8b74f09b6e6456a6f3c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:39 +0200 Subject: [PATCH 13/22] object-file: get rid of `the_repository` in loose object iterators The iterators for loose objects still rely on `the_repository`. Refactor them: - `for_each_loose_file_in_objdir()` is refactored so that the caller is now expected to pass an `odb_source` as parameter instead of the path to that source. Furthermore, it is renamed accordingly to `for_each_loose_file_in_source()`. - `for_each_loose_object()` is refactored to take in an object database now and calls the above function in a loop. This allows us to get rid of the global dependency. Adjust callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/count-objects.c | 2 +- builtin/fsck.c | 14 ++++++++------ builtin/gc.c | 10 ++++------ builtin/pack-objects.c | 5 ++--- builtin/prune.c | 2 +- object-file.c | 18 +++++++++--------- object-file.h | 5 +++-- prune-packed.c | 2 +- reachable.c | 2 +- 10 files changed, 31 insertions(+), 31 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 2492a0b6f39ac3..aa1498aa60f2bb 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -848,7 +848,7 @@ static void batch_each_object(struct batch_options *opt, }; struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); - for_each_loose_object(batch_one_object_loose, &payload, 0); + for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0); if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { diff --git a/builtin/count-objects.c b/builtin/count-objects.c index f687647931e0e2..e70a01c628e55f 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -117,7 +117,7 @@ int cmd_count_objects(int argc, report_linked_checkout_garbage(the_repository); } - for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), + for_each_loose_file_in_source(the_repository->objects->sources, count_loose, count_cruft, NULL, NULL); if (verbose) { diff --git a/builtin/fsck.c b/builtin/fsck.c index 0084cf7400bd46..f0854ce5d847d3 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -393,7 +393,8 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - for_each_loose_object(mark_loose_unreachable_referents, NULL, 0); + for_each_loose_object(the_repository->objects, + mark_loose_unreachable_referents, NULL, 0); for_each_packed_object(the_repository, mark_packed_unreachable_referents, NULL, @@ -687,7 +688,7 @@ static int fsck_subdir(unsigned int nr, const char *path UNUSED, void *data) return 0; } -static void fsck_object_dir(const char *path) +static void fsck_source(struct odb_source *source) { struct progress *progress = NULL; struct for_each_loose_cb cb_data = { @@ -701,8 +702,8 @@ static void fsck_object_dir(const char *path) progress = start_progress(the_repository, _("Checking object directories"), 256); - for_each_loose_file_in_objdir(path, fsck_loose, fsck_cruft, fsck_subdir, - &cb_data); + for_each_loose_file_in_source(source, fsck_loose, + fsck_cruft, fsck_subdir, &cb_data); display_progress(progress, 256); stop_progress(&progress); } @@ -994,13 +995,14 @@ int cmd_fsck(int argc, fsck_refs(the_repository); if (connectivity_only) { - for_each_loose_object(mark_loose_for_connectivity, NULL, 0); + for_each_loose_object(the_repository->objects, + mark_loose_for_connectivity, NULL, 0); for_each_packed_object(the_repository, mark_packed_for_connectivity, NULL, 0); } else { odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) - fsck_object_dir(source->path); + fsck_source(source); if (check_full) { struct packed_git *p; diff --git a/builtin/gc.c b/builtin/gc.c index 21bd44e1645eac..6eefefc63d20ad 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1301,7 +1301,7 @@ static int loose_object_auto_condition(struct gc_config *cfg UNUSED) if (loose_object_auto_limit < 0) return 1; - return for_each_loose_file_in_objdir(the_repository->objects->sources->path, + return for_each_loose_file_in_source(the_repository->objects->sources, loose_object_count, NULL, NULL, &count); } @@ -1336,7 +1336,7 @@ static int pack_loose(struct maintenance_run_opts *opts) * Do not start pack-objects process * if there are no loose objects. */ - if (!for_each_loose_file_in_objdir(r->objects->sources->path, + if (!for_each_loose_file_in_source(r->objects->sources, bail_on_loose, NULL, NULL, NULL)) return 0; @@ -1376,11 +1376,9 @@ static int pack_loose(struct maintenance_run_opts *opts) else if (data.batch_size > 0) data.batch_size--; /* Decrease for equality on limit. */ - for_each_loose_file_in_objdir(r->objects->sources->path, + for_each_loose_file_in_source(r->objects->sources, write_loose_object_to_stdin, - NULL, - NULL, - &data); + NULL, NULL, &data); fclose(data.in); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e8e85d8278bba6..9e85293730be1a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4342,9 +4342,8 @@ static int add_loose_object(const struct object_id *oid, const char *path, */ static void add_unreachable_loose_objects(void) { - for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), - add_loose_object, - NULL, NULL, NULL); + for_each_loose_file_in_source(the_repository->objects->sources, + add_loose_object, NULL, NULL, NULL); } static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) diff --git a/builtin/prune.c b/builtin/prune.c index 339017c7ccf37c..bf5d3bb152cf0e 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -200,7 +200,7 @@ int cmd_prune(int argc, revs.exclude_promisor_objects = 1; } - for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), + for_each_loose_file_in_source(the_repository->objects->sources, prune_object, prune_cruft, prune_subdir, &revs); prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0); diff --git a/object-file.c b/object-file.c index bd93f17dcfe6f6..b894379d22cc6f 100644 --- a/object-file.c +++ b/object-file.c @@ -1388,7 +1388,7 @@ static int for_each_file_in_obj_subdir(unsigned int subdir_nr, return r; } -int for_each_loose_file_in_objdir(const char *path, +int for_each_loose_file_in_source(struct odb_source *source, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, @@ -1397,11 +1397,10 @@ int for_each_loose_file_in_objdir(const char *path, struct strbuf buf = STRBUF_INIT; int r; - strbuf_addstr(&buf, path); + strbuf_addstr(&buf, source->path); for (int i = 0; i < 256; i++) { - r = for_each_file_in_obj_subdir(i, &buf, the_repository->hash_algo, - obj_cb, cruft_cb, - subdir_cb, data); + r = for_each_file_in_obj_subdir(i, &buf, source->odb->repo->hash_algo, + obj_cb, cruft_cb, subdir_cb, data); if (r) break; } @@ -1410,14 +1409,15 @@ int for_each_loose_file_in_objdir(const char *path, return r; } -int for_each_loose_object(each_loose_object_fn cb, void *data, +int for_each_loose_object(struct object_database *odb, + each_loose_object_fn cb, void *data, enum for_each_object_flags flags) { struct odb_source *source; - odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) { - int r = for_each_loose_file_in_objdir(source->path, cb, NULL, + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) { + int r = for_each_loose_file_in_source(source, cb, NULL, NULL, data); if (r) return r; diff --git a/object-file.h b/object-file.h index d52b335e85ba79..1b1ab95423d55c 100644 --- a/object-file.h +++ b/object-file.h @@ -86,7 +86,7 @@ typedef int each_loose_cruft_fn(const char *basename, typedef int each_loose_subdir_fn(unsigned int nr, const char *path, void *data); -int for_each_loose_file_in_objdir(const char *path, +int for_each_loose_file_in_source(struct odb_source *source, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, @@ -99,7 +99,8 @@ int for_each_loose_file_in_objdir(const char *path, * * Any flags specific to packs are ignored. */ -int for_each_loose_object(each_loose_object_fn, void *, +int for_each_loose_object(struct object_database *odb, + each_loose_object_fn, void *, enum for_each_object_flags flags); diff --git a/prune-packed.c b/prune-packed.c index 92fb4fbb0ed3d1..d49dc11957c7a6 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -40,7 +40,7 @@ void prune_packed_objects(int opts) progress = start_delayed_progress(the_repository, _("Removing duplicate objects"), 256); - for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), + for_each_loose_file_in_source(the_repository->objects->sources, prune_object, NULL, prune_subdir, &opts); /* Ensure we show 100% before finishing progress */ diff --git a/reachable.c b/reachable.c index e984b68a0c41e1..5706ccaede3320 100644 --- a/reachable.c +++ b/reachable.c @@ -319,7 +319,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, oidset_init(&data.extra_recent_oids, 0); data.extra_recent_oids_loaded = 0; - r = for_each_loose_object(add_recent_loose, &data, + r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, FOR_EACH_OBJECT_LOCAL_ONLY); if (r) goto done; From 0df005353aca4e490478a4e8c2d090728599868e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:40 +0200 Subject: [PATCH 14/22] object-file: get rid of `the_repository` in `read_loose_object()` The function `read_loose_object()` takes a path to an object file and tries to parse it. As such, the function does not depend on any specific object database but instead acts as an ODB-independent way to read a specific file. As such, all it needs as input is a repository so that we can derive repo settings and the hash algorithm. That repository isn't passed in as a parameter though, as we implicitly depend on the global `the_repository`. Refactor the function so that we pass in the repository as a parameter. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 2 +- object-file.c | 9 +++++---- object-file.h | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index f0854ce5d847d3..e9112d884f021c 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -633,7 +633,7 @@ static int fsck_loose(const struct object_id *oid, const char *path, oi.sizep = &size; oi.typep = &type; - if (read_loose_object(path, oid, &real_oid, &contents, &oi) < 0) { + if (read_loose_object(the_repository, path, oid, &real_oid, &contents, &oi) < 0) { if (contents && !oideq(&real_oid, oid)) err = error(_("%s: hash-path mismatch, found at: %s"), oid_to_hex(&real_oid), path); diff --git a/object-file.c b/object-file.c index b894379d22cc6f..f7c07acadc91ab 100644 --- a/object-file.c +++ b/object-file.c @@ -1535,7 +1535,8 @@ static int check_stream_oid(git_zstream *stream, return 0; } -int read_loose_object(const char *path, +int read_loose_object(struct repository *repo, + const char *path, const struct object_id *expected_oid, struct object_id *real_oid, void **contents, @@ -1574,9 +1575,9 @@ int read_loose_object(const char *path, } if (*oi->typep == OBJ_BLOB && - *size > repo_settings_get_big_file_threshold(the_repository)) { + *size > repo_settings_get_big_file_threshold(repo)) { if (check_stream_oid(&stream, hdr, *size, path, expected_oid, - the_repository->hash_algo) < 0) + repo->hash_algo) < 0) goto out_inflate; } else { *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid); @@ -1584,7 +1585,7 @@ int read_loose_object(const char *path, error(_("unable to unpack contents of %s"), path); goto out_inflate; } - hash_object_file(the_repository->hash_algo, + hash_object_file(repo->hash_algo, *contents, *size, *oi->typep, real_oid); if (!oideq(expected_oid, real_oid)) diff --git a/object-file.h b/object-file.h index 1b1ab95423d55c..52f7979267d08c 100644 --- a/object-file.h +++ b/object-file.h @@ -210,7 +210,8 @@ int check_and_freshen_file(const char *fn, int freshen); * * Returns 0 on success, negative on error (details may be written to stderr). */ -int read_loose_object(const char *path, +int read_loose_object(struct repository *repo, + const char *path, const struct object_id *expected_oid, struct object_id *real_oid, void **contents, From c2b5d1490a4b6b8b1a50b9ef82ec204811d7ccf1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:41 +0200 Subject: [PATCH 15/22] object-file: get rid of `the_repository` in `force_object_loose()` The function `force_object_loose()` forces an object to become a loose object in case it only exists in its packed form. To do so it implicitly relies on `the_repository`. Refactor the function by passing a `struct odb_source` as parameter. While the check whether any such loose object exists already acts on the whole object database, writing the loose object happens in one specific source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 3 ++- object-file.c | 18 +++++++++--------- object-file.h | 3 ++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 9e85293730be1a..7ff79d6b376852 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4411,7 +4411,8 @@ static void loosen_unused_packed_objects(void) if (!packlist_find(&to_pack, &oid) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) { - if (force_object_loose(&oid, p->mtime)) + if (force_object_loose(the_repository->objects->sources, + &oid, p->mtime)) die(_("unable to force loose object")); loosened_objects_nr++; } diff --git a/object-file.c b/object-file.c index f7c07acadc91ab..e9152d9e04c26b 100644 --- a/object-file.c +++ b/object-file.c @@ -1077,10 +1077,10 @@ int write_object_file(struct odb_source *source, return 0; } -int force_object_loose(const struct object_id *oid, time_t mtime) +int force_object_loose(struct odb_source *source, + const struct object_id *oid, time_t mtime) { - struct repository *repo = the_repository; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; void *buf; unsigned long len; struct object_info oi = OBJECT_INFO_INIT; @@ -1090,24 +1090,24 @@ int force_object_loose(const struct object_id *oid, time_t mtime) int hdrlen; int ret; - for (struct odb_source *source = repo->objects->sources; source; source = source->next) - if (has_loose_object(source, oid)) + for (struct odb_source *s = source->odb->sources; s; s = s->next) + if (has_loose_object(s, oid)) return 0; oi.typep = &type; oi.sizep = &len; oi.contentp = &buf; - if (odb_read_object_info_extended(the_repository->objects, oid, &oi, 0)) + if (odb_read_object_info_extended(source->odb, oid, &oi, 0)) return error(_("cannot read object for %s"), oid_to_hex(oid)); if (compat) { - if (repo_oid_to_algop(repo, oid, compat, &compat_oid)) + if (repo_oid_to_algop(source->odb->repo, oid, compat, &compat_oid)) return error(_("cannot map object %s to %s"), oid_to_hex(oid), compat->name); } hdrlen = format_object_header(hdr, sizeof(hdr), type, len); - ret = write_loose_object(repo->objects->sources, oid, hdr, hdrlen, buf, len, mtime, 0); + ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) - ret = repo_add_loose_object_map(the_repository->objects->sources, oid, &compat_oid); + ret = repo_add_loose_object_map(source, oid, &compat_oid); free(buf); return ret; diff --git a/object-file.h b/object-file.h index 52f7979267d08c..15d97630d3b11b 100644 --- a/object-file.h +++ b/object-file.h @@ -161,7 +161,8 @@ int stream_loose_object(struct odb_source *source, struct input_stream *in_stream, size_t len, struct object_id *oid); -int force_object_loose(const struct object_id *oid, time_t mtime); +int force_object_loose(struct odb_source *source, + const struct object_id *oid, time_t mtime); /** * With in-core object data in "buf", rehash it to make sure the From 5f2e994e34ab83c90c1c5f3b31c1573b37cb137b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 17 Jul 2025 06:56:42 +0200 Subject: [PATCH 16/22] object-file: get rid of `the_repository` in index-related functions Both `index_fd()` and `index_path()` still use `the_repository` even though they have a repository available via `struct index_state`. Adapt them so that they use the index' repository instead to get rid of this global dependency. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/object-file.c b/object-file.c index e9152d9e04c26b..2bc36ab3ee8cbf 100644 --- a/object-file.c +++ b/object-file.c @@ -1257,7 +1257,7 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_stream_convert_blob(istate, oid, fd, path, flags); else if (!S_ISREG(st->st_mode)) ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(the_repository)) || + else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || type != OBJ_BLOB || (path && would_convert_to_git(istate, path))) ret = index_core(istate, oid, fd, xsize_t(st->st_size), @@ -1291,12 +1291,12 @@ int index_path(struct index_state *istate, struct object_id *oid, if (!(flags & INDEX_WRITE_OBJECT)) hash_object_file(istate->repo->hash_algo, sb.buf, sb.len, OBJ_BLOB, oid); - else if (odb_write_object(the_repository->objects, sb.buf, sb.len, OBJ_BLOB, oid)) + else if (odb_write_object(istate->repo->objects, sb.buf, sb.len, OBJ_BLOB, oid)) rc = error(_("%s: failed to insert into database"), path); strbuf_release(&sb); break; case S_IFDIR: - return repo_resolve_gitlink_ref(the_repository, path, "HEAD", oid); + return repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid); default: return error(_("%s: unsupported file type"), path); } From 1fa06ceddf1ea01bd85e277471ba79330666f037 Mon Sep 17 00:00:00 2001 From: K Jayatheerth Date: Thu, 24 Jul 2025 20:54:17 +0530 Subject: [PATCH 17/22] submodule: prevent overwriting .gitmodules on path reuse Adding a submodule at a path that previously hosted another submodule (e.g., 'child') reuses the submodule name derived from the path. If the original submodule was only moved (e.g., to 'child_old') and not renamed, this silently overwrites its configuration in .gitmodules. This behavior loses user configuration and causes confusion when the original submodule is expected to remain intact. It assumes that the path-derived name is always safe to reuse, even though the name might still be in use elsewhere in the repository. Teach module_add() to check if the computed submodule name already exists in the repository's submodule config, and if so, refuse the operation unless the user explicitly renames the submodule or uses the --force option, which will automatically generate a unique name by appending a number (e.g., child1). Signed-off-by: K Jayatheerth Signed-off-by: Junio C Hamano --- Documentation/git-submodule.adoc | 7 +++++++ builtin/submodule--helper.c | 27 +++++++++++++++++++++++++++ t/t7400-submodule-basic.sh | 22 ++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/Documentation/git-submodule.adoc b/Documentation/git-submodule.adoc index 87d8e0f0c563b7..503c84a200c929 100644 --- a/Documentation/git-submodule.adoc +++ b/Documentation/git-submodule.adoc @@ -307,6 +307,13 @@ OPTIONS --force:: This option is only valid for add, deinit and update commands. When running add, allow adding an otherwise ignored submodule path. + This option is also used to bypass a check that the submodule's name + is not already in use. By default, 'git submodule add' will fail if + the proposed name (which is derived from the path) is already registered + for another submodule in the repository. Using '--force' allows the command + to proceed by automatically generating a unique name by appending a number + to the conflicting name (e.g., if a submodule named 'child' exists, it will + try 'child1', and so on). When running deinit the submodule working trees will be removed even if they contain local changes. When running update (only effective with the checkout procedure), diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 53da2116ddf576..5f85b169c539ac 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -3444,6 +3444,10 @@ static int module_add(int argc, const char **argv, const char *prefix, struct add_data add_data = ADD_DATA_INIT; const char *ref_storage_format = NULL; char *to_free = NULL; + const struct submodule *existing; + struct strbuf buf = STRBUF_INIT; + int i; + char *sm_name_to_free = NULL; struct option options[] = { OPT_STRING('b', "branch", &add_data.branch, N_("branch"), N_("branch of repository to add as submodule")), @@ -3546,6 +3550,28 @@ static int module_add(int argc, const char **argv, const char *prefix, if(!add_data.sm_name) add_data.sm_name = add_data.sm_path; + existing = submodule_from_name(the_repository, + null_oid(the_hash_algo), + add_data.sm_name); + + if (existing && strcmp(existing->path, add_data.sm_path)) { + if (!force) { + die(_("submodule name '%s' already used for path '%s'"), + add_data.sm_name, existing->path); + } + /* --force: build until unique */ + for (i = 1; ; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "%s%d", add_data.sm_name, i); + if (!submodule_from_name(the_repository, + null_oid(the_hash_algo), + buf.buf)) { + break; + } + } + add_data.sm_name = sm_name_to_free = strbuf_detach(&buf, NULL); + } + if (check_submodule_name(add_data.sm_name)) die(_("'%s' is not a valid submodule name"), add_data.sm_name); @@ -3561,6 +3587,7 @@ static int module_add(int argc, const char **argv, const char *prefix, ret = 0; cleanup: + free(sm_name_to_free); free(add_data.sm_path); free(to_free); strbuf_release(&sb); diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index d6a501d4535417..6812df308107cb 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -1482,4 +1482,26 @@ test_expect_success '`submodule init` and `init.templateDir`' ' ) ' +test_expect_success 'submodule add fails when name is reused' ' + git init test-submodule && + ( + cd test-submodule && + git commit --allow-empty -m init && + + git init ../child-origin && + git -C ../child-origin commit --allow-empty -m init && + + git submodule add ../child-origin child && + git commit -m "Add submodule child" && + + git mv child child_old && + git commit -m "Move child to child_old" && + + # Now adding a *new* repo at the old name must fail + git init ../child2-origin && + git -C ../child2-origin commit --allow-empty -m init && + test_must_fail git submodule add ../child2-origin child + ) +' + test_done From bb10dcf5730356b9ef70d40eca2335e9d406954a Mon Sep 17 00:00:00 2001 From: K Jayatheerth Date: Thu, 24 Jul 2025 20:54:18 +0530 Subject: [PATCH 18/22] submodule: skip redundant active entries when pattern covers path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit configure_added_submodule always writes an explicit submodule..active entry, even when the new path is already matched by submodule.active patterns. This leads to unnecessary and cluttered configuration. change the logic to centralize wildmatch-based pattern lookup, in configure_added_submodule. Wrap the active-entry write in a conditional that only fires when that helper reports no existing pattern covers the submodule’s path. Signed-off-by: K Jayatheerth Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 25 +++++++++++++++++++------ t/t7413-submodule-is-active.sh | 15 +++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 5f85b169c539ac..ca6f6fe1d13b60 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -32,6 +32,8 @@ #include "advice.h" #include "branch.h" #include "list-objects-filter-options.h" +#include "wildmatch.h" +#include "strbuf.h" #define OPT_QUIET (1 << 0) #define OPT_CACHED (1 << 1) @@ -3329,6 +3331,9 @@ static void configure_added_submodule(struct add_data *add_data) struct child_process add_submod = CHILD_PROCESS_INIT; struct child_process add_gitmodules = CHILD_PROCESS_INIT; + const struct string_list *values; + size_t i; + int matched = 0; key = xstrfmt("submodule.%s.url", add_data->sm_name); git_config_set_gently(key, add_data->realrepo); free(key); @@ -3370,20 +3375,28 @@ static void configure_added_submodule(struct add_data *add_data) * is_submodule_active(), since that function needs to find * out the value of "submodule.active" again anyway. */ - if (!git_config_get("submodule.active")) { + if (git_config_get("submodule.active") || /* key absent */ + git_config_get_string_multi("submodule.active", &values)) { /* * If the submodule being added isn't already covered by the * current configured pathspec, set the submodule's active flag */ - if (!is_submodule_active(the_repository, add_data->sm_path)) { + key = xstrfmt("submodule.%s.active", add_data->sm_name); + git_config_set_gently(key, "true"); + free(key); + } else { + for (i = 0; i < values->nr; i++) { + const char *pat = values->items[i].string; + if (!wildmatch(pat, add_data->sm_path, 0)) { /* match found */ + matched = 1; + break; + } + } + if (!matched) { /* no pattern matched -> force-enable */ key = xstrfmt("submodule.%s.active", add_data->sm_name); git_config_set_gently(key, "true"); free(key); } - } else { - key = xstrfmt("submodule.%s.active", add_data->sm_name); - git_config_set_gently(key, "true"); - free(key); } } diff --git a/t/t7413-submodule-is-active.sh b/t/t7413-submodule-is-active.sh index 9509dc18fde909..6fd3b870dec777 100755 --- a/t/t7413-submodule-is-active.sh +++ b/t/t7413-submodule-is-active.sh @@ -124,4 +124,19 @@ test_expect_success 'is-active, submodule.active and submodule add' ' git -C super2 config --get submodule.mod.active ' +test_expect_success 'submodule add skips redundant active entry' ' + git init repo && + ( + cd repo && + git config submodule.active "lib/*" && + git commit --allow-empty -m init && + + git init ../lib-origin && + git -C ../lib-origin commit --allow-empty -m init && + + git submodule add ../lib-origin lib/foo && + test_must_fail git config --get submodule.lib/foo.active + ) +' + test_done From 9305027adef9b8e8de8b2bee11dd442c7e579490 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 24 Jul 2025 13:44:24 -0700 Subject: [PATCH 19/22] fixup! submodule: prevent overwriting .gitmodules on path reuse --- builtin/submodule--helper.c | 5 ++--- t/t7400-submodule-basic.sh | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index ca6f6fe1d13b60..08a808e5c42819 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -3459,7 +3459,6 @@ static int module_add(int argc, const char **argv, const char *prefix, char *to_free = NULL; const struct submodule *existing; struct strbuf buf = STRBUF_INIT; - int i; char *sm_name_to_free = NULL; struct option options[] = { OPT_STRING('b', "branch", &add_data.branch, N_("branch"), @@ -3570,10 +3569,10 @@ static int module_add(int argc, const char **argv, const char *prefix, if (existing && strcmp(existing->path, add_data.sm_path)) { if (!force) { die(_("submodule name '%s' already used for path '%s'"), - add_data.sm_name, existing->path); + add_data.sm_name, existing->path); } /* --force: build until unique */ - for (i = 1; ; i++) { + for (int i = 1; ; i++) { strbuf_reset(&buf); strbuf_addf(&buf, "%s%d", add_data.sm_name, i); if (!submodule_from_name(the_repository, diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index 6812df308107cb..fd3e7e355e4ffc 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -1500,7 +1500,8 @@ test_expect_success 'submodule add fails when name is reused' ' # Now adding a *new* repo at the old name must fail git init ../child2-origin && git -C ../child2-origin commit --allow-empty -m init && - test_must_fail git submodule add ../child2-origin child + test_must_fail git submodule add ../child2-origin child 2>err && + test_grep "already used for" err ) ' From 5ed8c5b465aaeae967875d043187668bdc0dfe54 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 24 Jul 2025 15:24:23 -0700 Subject: [PATCH 20/22] fixup! submodule: skip redundant active entries when pattern covers path --- builtin/submodule--helper.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 08a808e5c42819..10cd65e3435220 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -3330,10 +3330,9 @@ static void configure_added_submodule(struct add_data *add_data) char *key; struct child_process add_submod = CHILD_PROCESS_INIT; struct child_process add_gitmodules = CHILD_PROCESS_INIT; - const struct string_list *values; - size_t i; int matched = 0; + key = xstrfmt("submodule.%s.url", add_data->sm_name); git_config_set_gently(key, add_data->realrepo); free(key); @@ -3385,7 +3384,7 @@ static void configure_added_submodule(struct add_data *add_data) git_config_set_gently(key, "true"); free(key); } else { - for (i = 0; i < values->nr; i++) { + for (size_t i = 0; i < values->nr; i++) { const char *pat = values->items[i].string; if (!wildmatch(pat, add_data->sm_path, 0)) { /* match found */ matched = 1; From 161e895e42f4fad83828c1c954adb9375f4b0092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 25 Jul 2025 20:41:24 +0200 Subject: [PATCH 21/22] git: show alias info only with lone -h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builtin commands show usage information on stdout if called with -h as their only option, usage.c::show_usage_if_asked() makes sure of that. Aliases show alias information on stderr if called with -h as the first option since a9a60b94cc (git.c: handle_alias: prepend alias info when first argument is -h, 2018-10-09). This is surprising when using aliases for commands that take -h as a normal argument among others, like git grep. Tighten the condition and show the alias information only if -h is the only option given, to be consistent with builtins. It's probably still is a good idea to write to stderr, as an alias command doesn't have to be a builtin and could instead produce output with just -h that might be spoiled by an extra alias info line. Reported-by: Kevin Brodsky Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- git.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git.c b/git.c index 07a5fe39fb69f0..83eac0aeab75d6 100644 --- a/git.c +++ b/git.c @@ -371,7 +371,7 @@ static int handle_alias(struct strvec *args) alias_command = args->v[0]; alias_string = alias_lookup(alias_command); if (alias_string) { - if (args->nr > 1 && !strcmp(args->v[1], "-h")) + if (args->nr == 2 && !strcmp(args->v[1], "-h")) fprintf_ln(stderr, _("'%s' is aliased to '%s'"), alias_command, alias_string); if (alias_string[0] == '!') { From 64cbe5e2e8a7b0f92c780b210e602496bd5cad0f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 5 Aug 2025 11:53:34 -0700 Subject: [PATCH 22/22] A bit more after -rc0 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.51.0.adoc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/RelNotes/2.51.0.adoc b/Documentation/RelNotes/2.51.0.adoc index a89d459d2e2db0..f8adc2c5cf3b61 100644 --- a/Documentation/RelNotes/2.51.0.adoc +++ b/Documentation/RelNotes/2.51.0.adoc @@ -67,6 +67,11 @@ UI, Workflows & Features * "git switch" and "git restore" are declared to be no longer experimental. + * "git -c alias.foo=bar foo -h baz" reported "'foo' is aliased to + 'bar'" and then went on to run "git foo -h baz", which was + unexpected. Tighten the rule so that alias expansion is reported + only when "-h" is the sole option. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -136,6 +141,9 @@ Performance, Internal Implementation, Development Support etc. support a single object source that belongs to one repository. A midx does span mulitple "object sources". + * Reduce implicit assumption and dependence on the_repository in the + object-file subsystem. + Fixes since v2.50 ----------------- @@ -284,6 +292,11 @@ including security updates, are included in this release. and also they learn to honor the -U command-line option. (merge 2b3ae04011 lm/add-p-context later to maint). + * The case where a new submodule takes a path where used to be a + completely different subproject is now dealt a bit better than + before. + (merge 5ed8c5b465 kj/renamed-submodule later to maint). + * Other code cleanup, docfix, build fix, etc. (merge b257adb571 lo/my-first-ow-doc-update later to maint). (merge 8b34b6a220 ly/sequencer-update-squash-is-fixup-only later to maint).