From 064468e89919e9cf16d2afa59de33a242a4d71ab Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:05 +0000 Subject: [PATCH 01/20] sparse-checkout: remove use of the_repository The logic for the 'git sparse-checkout' builtin uses the_repository all over the place, despite some use of a repository struct in different method parameters. Complete this removal of the_repository by using 'repo' when possible. In one place, there was already a local variable 'r' that was set to the_repository, so move that to a method parameter. We cannot remove the USE_THE_REPOSITORY_VARIABLE declaration as we are still using global constants for the state of the sparse-checkout. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 117 ++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 8c333b3e2e145b..06de61bd9d0d33 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -204,12 +204,12 @@ static void clean_tracked_sparse_directories(struct repository *r) ensure_full_index(r->index); } -static int update_working_directory(struct pattern_list *pl) +static int update_working_directory(struct repository *r, + struct pattern_list *pl) { enum update_sparsity_result result; struct unpack_trees_options o; struct lock_file lock_file = LOCK_INIT; - struct repository *r = the_repository; struct pattern_list *old_pl; /* If no branch has been checked out, there are no updates to make. */ @@ -327,7 +327,8 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) string_list_clear(&sl, 0); } -static int write_patterns_and_update(struct pattern_list *pl) +static int write_patterns_and_update(struct repository *repo, + struct pattern_list *pl) { char *sparse_filename; FILE *fp; @@ -336,15 +337,15 @@ static int write_patterns_and_update(struct pattern_list *pl) sparse_filename = get_sparse_checkout_filename(); - if (safe_create_leading_directories(the_repository, sparse_filename)) + if (safe_create_leading_directories(repo, sparse_filename)) die(_("failed to create directory for sparse-checkout file")); hold_lock_file_for_update(&lk, sparse_filename, LOCK_DIE_ON_ERROR); - result = update_working_directory(pl); + result = update_working_directory(repo, pl); if (result) { rollback_lock_file(&lk); - update_working_directory(NULL); + update_working_directory(repo, NULL); goto out; } @@ -372,25 +373,26 @@ enum sparse_checkout_mode { MODE_CONE_PATTERNS = 2, }; -static int set_config(enum sparse_checkout_mode mode) +static int set_config(struct repository *repo, + enum sparse_checkout_mode mode) { /* Update to use worktree config, if not already. */ - if (init_worktree_config(the_repository)) { + if (init_worktree_config(repo)) { error(_("failed to initialize worktree config")); return 1; } - if (repo_config_set_worktree_gently(the_repository, + if (repo_config_set_worktree_gently(repo, "core.sparseCheckout", mode ? "true" : "false") || - repo_config_set_worktree_gently(the_repository, + repo_config_set_worktree_gently(repo, "core.sparseCheckoutCone", mode == MODE_CONE_PATTERNS ? "true" : "false")) return 1; if (mode == MODE_NO_PATTERNS) - return set_sparse_index_config(the_repository, 0); + return set_sparse_index_config(repo, 0); return 0; } @@ -410,7 +412,7 @@ static enum sparse_checkout_mode update_cone_mode(int *cone_mode) { return MODE_ALL_PATTERNS; } -static int update_modes(int *cone_mode, int *sparse_index) +static int update_modes(struct repository *repo, int *cone_mode, int *sparse_index) { int mode, record_mode; @@ -418,20 +420,20 @@ static int update_modes(int *cone_mode, int *sparse_index) record_mode = (*cone_mode != -1) || !core_apply_sparse_checkout; mode = update_cone_mode(cone_mode); - if (record_mode && set_config(mode)) + if (record_mode && set_config(repo, mode)) return 1; /* Set sparse-index/non-sparse-index mode if specified */ if (*sparse_index >= 0) { - if (set_sparse_index_config(the_repository, *sparse_index) < 0) + if (set_sparse_index_config(repo, *sparse_index) < 0) die(_("failed to modify sparse-index config")); /* force an index rewrite */ - repo_read_index(the_repository); - the_repository->index->updated_workdir = 1; + repo_read_index(repo); + repo->index->updated_workdir = 1; if (!*sparse_index) - ensure_full_index(the_repository->index); + ensure_full_index(repo->index); } return 0; @@ -448,7 +450,7 @@ static struct sparse_checkout_init_opts { } init_opts; static int sparse_checkout_init(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct pattern_list pl; char *sparse_filename; @@ -464,7 +466,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, }; setup_work_tree(); - repo_read_index(the_repository); + repo_read_index(repo); init_opts.cone_mode = -1; init_opts.sparse_index = -1; @@ -473,7 +475,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, builtin_sparse_checkout_init_options, builtin_sparse_checkout_init_usage, 0); - if (update_modes(&init_opts.cone_mode, &init_opts.sparse_index)) + if (update_modes(repo, &init_opts.cone_mode, &init_opts.sparse_index)) return 1; memset(&pl, 0, sizeof(pl)); @@ -485,14 +487,14 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, if (res >= 0) { free(sparse_filename); clear_pattern_list(&pl); - return update_working_directory(NULL); + return update_working_directory(repo, NULL); } - if (repo_get_oid(the_repository, "HEAD", &oid)) { + if (repo_get_oid(repo, "HEAD", &oid)) { FILE *fp; /* assume we are in a fresh repo, but update the sparse-checkout file */ - if (safe_create_leading_directories(the_repository, sparse_filename)) + if (safe_create_leading_directories(repo, sparse_filename)) die(_("unable to create leading directories of %s"), sparse_filename); fp = xfopen(sparse_filename, "w"); @@ -511,7 +513,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, add_pattern("!/*/", empty_base, 0, &pl, 0); pl.use_cone_patterns = init_opts.cone_mode; - return write_patterns_and_update(&pl); + return write_patterns_and_update(repo, &pl); } static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path) @@ -674,7 +676,8 @@ static void add_patterns_literal(int argc, const char **argv, add_patterns_from_input(pl, argc, argv, use_stdin ? stdin : NULL); } -static int modify_pattern_list(struct strvec *args, int use_stdin, +static int modify_pattern_list(struct repository *repo, + struct strvec *args, int use_stdin, enum modify_type m) { int result; @@ -696,22 +699,23 @@ static int modify_pattern_list(struct strvec *args, int use_stdin, } if (!core_apply_sparse_checkout) { - set_config(MODE_ALL_PATTERNS); + set_config(repo, MODE_ALL_PATTERNS); core_apply_sparse_checkout = 1; changed_config = 1; } - result = write_patterns_and_update(pl); + result = write_patterns_and_update(repo, pl); if (result && changed_config) - set_config(MODE_NO_PATTERNS); + set_config(repo, MODE_NO_PATTERNS); clear_pattern_list(pl); free(pl); return result; } -static void sanitize_paths(struct strvec *args, +static void sanitize_paths(struct repository *repo, + struct strvec *args, const char *prefix, int skip_checks) { int i; @@ -752,7 +756,7 @@ static void sanitize_paths(struct strvec *args, for (i = 0; i < args->nr; i++) { struct cache_entry *ce; - struct index_state *index = the_repository->index; + struct index_state *index = repo->index; int pos = index_name_pos(index, args->v[i], strlen(args->v[i])); if (pos < 0) @@ -779,7 +783,7 @@ static struct sparse_checkout_add_opts { } add_opts; static int sparse_checkout_add(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_add_options[] = { OPT_BOOL_F(0, "skip-checks", &add_opts.skip_checks, @@ -796,7 +800,7 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix, if (!core_apply_sparse_checkout) die(_("no sparse-checkout to add to")); - repo_read_index(the_repository); + repo_read_index(repo); argc = parse_options(argc, argv, prefix, builtin_sparse_checkout_add_options, @@ -804,9 +808,9 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix, for (int i = 0; i < argc; i++) strvec_push(&patterns, argv[i]); - sanitize_paths(&patterns, prefix, add_opts.skip_checks); + sanitize_paths(repo, &patterns, prefix, add_opts.skip_checks); - ret = modify_pattern_list(&patterns, add_opts.use_stdin, ADD); + ret = modify_pattern_list(repo, &patterns, add_opts.use_stdin, ADD); strvec_clear(&patterns); return ret; @@ -825,7 +829,7 @@ static struct sparse_checkout_set_opts { } set_opts; static int sparse_checkout_set(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { int default_patterns_nr = 2; const char *default_patterns[] = {"/*", "!/*/", NULL}; @@ -847,7 +851,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, int ret; setup_work_tree(); - repo_read_index(the_repository); + repo_read_index(repo); set_opts.cone_mode = -1; set_opts.sparse_index = -1; @@ -856,7 +860,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, builtin_sparse_checkout_set_options, builtin_sparse_checkout_set_usage, 0); - if (update_modes(&set_opts.cone_mode, &set_opts.sparse_index)) + if (update_modes(repo, &set_opts.cone_mode, &set_opts.sparse_index)) return 1; /* @@ -870,10 +874,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, } else { for (int i = 0; i < argc; i++) strvec_push(&patterns, argv[i]); - sanitize_paths(&patterns, prefix, set_opts.skip_checks); + sanitize_paths(repo, &patterns, prefix, set_opts.skip_checks); } - ret = modify_pattern_list(&patterns, set_opts.use_stdin, REPLACE); + ret = modify_pattern_list(repo, &patterns, set_opts.use_stdin, REPLACE); strvec_clear(&patterns); return ret; @@ -891,7 +895,7 @@ static struct sparse_checkout_reapply_opts { static int sparse_checkout_reapply(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_reapply_options[] = { OPT_BOOL(0, "cone", &reapply_opts.cone_mode, @@ -912,12 +916,12 @@ static int sparse_checkout_reapply(int argc, const char **argv, builtin_sparse_checkout_reapply_options, builtin_sparse_checkout_reapply_usage, 0); - repo_read_index(the_repository); + repo_read_index(repo); - if (update_modes(&reapply_opts.cone_mode, &reapply_opts.sparse_index)) + if (update_modes(repo, &reapply_opts.cone_mode, &reapply_opts.sparse_index)) return 1; - return update_working_directory(NULL); + return update_working_directory(repo, NULL); } static char const * const builtin_sparse_checkout_disable_usage[] = { @@ -927,7 +931,7 @@ static char const * const builtin_sparse_checkout_disable_usage[] = { static int sparse_checkout_disable(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_disable_options[] = { OPT_END(), @@ -955,7 +959,7 @@ static int sparse_checkout_disable(int argc, const char **argv, * are expecting to do that when disabling sparse-checkout. */ give_advice_on_expansion = 0; - repo_read_index(the_repository); + repo_read_index(repo); memset(&pl, 0, sizeof(pl)); hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); @@ -966,13 +970,13 @@ static int sparse_checkout_disable(int argc, const char **argv, add_pattern("/*", empty_base, 0, &pl, 0); prepare_repo_settings(the_repository); - the_repository->settings.sparse_index = 0; + repo->settings.sparse_index = 0; - if (update_working_directory(&pl)) + if (update_working_directory(repo, &pl)) die(_("error while refreshing working directory")); clear_pattern_list(&pl); - return set_config(MODE_NO_PATTERNS); + return set_config(repo, MODE_NO_PATTERNS); } static char const * const builtin_sparse_checkout_check_rules_usage[] = { @@ -987,14 +991,17 @@ static struct sparse_checkout_check_rules_opts { char *rules_file; } check_rules_opts; -static int check_rules(struct pattern_list *pl, int null_terminated) { +static int check_rules(struct repository *repo, + struct pattern_list *pl, + int null_terminated) +{ struct strbuf line = STRBUF_INIT; struct strbuf unquoted = STRBUF_INIT; char *path; int line_terminator = null_terminated ? 0 : '\n'; strbuf_getline_fn getline_fn = null_terminated ? strbuf_getline_nul : strbuf_getline; - the_repository->index->sparse_checkout_patterns = pl; + repo->index->sparse_checkout_patterns = pl; while (!getline_fn(&line, stdin)) { path = line.buf; if (!null_terminated && line.buf[0] == '"') { @@ -1006,7 +1013,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) { path = unquoted.buf; } - if (path_in_sparse_checkout(path, the_repository->index)) + if (path_in_sparse_checkout(path, repo->index)) write_name_quoted(path, stdout, line_terminator); } strbuf_release(&line); @@ -1016,7 +1023,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) { } static int sparse_checkout_check_rules(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_sparse_checkout_check_rules_options[] = { OPT_BOOL('z', NULL, &check_rules_opts.null_termination, @@ -1055,7 +1062,7 @@ static int sparse_checkout_check_rules(int argc, const char **argv, const char * free(sparse_filename); } - ret = check_rules(&pl, check_rules_opts.null_termination); + ret = check_rules(repo, &pl, check_rules_opts.null_termination); clear_pattern_list(&pl); free(check_rules_opts.rules_file); return ret; @@ -1084,8 +1091,8 @@ int cmd_sparse_checkout(int argc, repo_config(the_repository, git_default_config, NULL); - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + prepare_repo_settings(repo); + repo->settings.command_requires_full_index = 0; return fn(argc, argv, prefix, repo); } From 2520efd3bc8c81cb4bd8f832b241c3b2b8c0630f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:06 +0000 Subject: [PATCH 02/20] sparse-checkout: add basics of 'clean' command When users change their sparse-checkout definitions to add new directories and remove old ones, there may be a few reasons why directories no longer in scope remain (ignored or excluded files still exist, Windows handles are still open, etc.). When these files still exist, the sparse index feature notices that a tracked, but sparse, directory still exists on disk and thus the index expands. This causes a performance hit _and_ the advice printed isn't very helpful. Using 'git clean' isn't enough (generally '-dfx' may be needed) but also this may not be sufficient. Add a new subcommand to 'git sparse-checkout' that removes these tracked-but-sparse directories. The implementation details provide a clear definition of what is happening, but it is difficult to describe this without including the internal implementation details. The core operation converts the index to a sparse index (in memory if not already on disk) and then deletes any directories in the worktree that correspond with a sparse directory entry in that sparse index. In the most common case, this means that a file will be removed if it is contained within a directory that is both tracked and outside of the sparse-checkout definition. However, there can be exceptions depending on the current state of the index: * If the worktree has a modification to a tracked, sparse file, then that file's parent directories will be expanded instead of represented as sparse directories. Siblings of those parent directories may be considered sparse. * If the user staged a sparse file with "git add --sparse", then that file loses the SKIP_WORKTREE bit until the sparse-checkout is reapplied. Until then, that file's parent directories are not represented as sparse directory entries and thus will not be removed. Siblings of those parent directories may be considered sparse. (There may be other reasons why the SKIP_WORKTREE bit was removed for a file and this impact on the sparse directories will apply to those as well.) * If the user has a merge conflict outside of the sparse-checkout definition, then those conflict entries prevent the parent directories from being represented as sparse directory entries and thus are not removed. * The cases above present reasons why certain _file conditions_ will impact which _directories_ are considered sparse. The list of tracked directories that are outside of the sparse-checkout definition but not represented as a sparse directory further reduces the list of files that will be removed. For these complicated reasons, the documentation details a potential list of files that will be "considered for removal" instead of defining the list concretely. The special cases can be handled by resolving conflicts, committing staged changes, and running 'git sparse-checkout reapply' to update the SKIP_WORKTREE bits as expected by the sparse-checkout definition. It is important to make clear that this operation will remove ignored and excluded files which would normally be ignored even by 'git clean -f' unless the '-x' or '-X' option is provided. This is the most extreme method for doing this, but it works when the sparse-checkout is in cone mode and is expected to rescope based on directories, not files. The current implementation always deletes these sparse directories without warning. This is unacceptable for a released version, but those features will be added in changes coming immediately after this one. Note that this will not remove an untracked directory (or any of its contents) if its parent is a tracked directory within the sparse-checkout definition. This is required to prevent removing data created by tools that perform caching operations for editors or build tools. Thus, 'git sparse-checkout clean' is both more aggressive and more careful than 'git clean -fx': * It is more aggressive because it will remove _tracked_ files within the sparse directories. * It is less aggressive because it will leave _untracked_ files that are not contained in sparse directories. These special cases will be handled more explicitly in a future change that expands tests for the 'git sparse-checkout clean' command. We handle some of the modified, staged, and committed states including some impact on 'git status' after cleaning. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.adoc | 19 ++++- builtin/sparse-checkout.c | 64 ++++++++++++++- t/t1091-sparse-checkout-builtin.sh | 103 +++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 2 deletions(-) diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index 529a8edd9c1ed8..baaebce746d2a1 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -9,7 +9,7 @@ git-sparse-checkout - Reduce your working tree to a subset of tracked files SYNOPSIS -------- [verse] -'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules) [] +'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules | clean) [] DESCRIPTION @@ -111,6 +111,23 @@ flags, with the same meaning as the flags from the `set` command, in order to change which sparsity mode you are using without needing to also respecify all sparsity paths. +'clean':: + Opportunistically remove files outside of the sparse-checkout + definition. This command requires cone mode to use recursive + directory matches to determine which files should be removed. A + file is considered for removal if it is contained within a tracked + directory that is outside of the sparse-checkout definition. ++ +Some special cases, such as merge conflicts or modified files outside of +the sparse-checkout definition could lead to keeping files that would +otherwise be removed. Resolve conflicts, stage modifications, and use +`git sparse-checkout reapply` in conjunction with `git sparse-checkout +clean` to resolve these cases. ++ +This command can be used to be sure the sparse index works efficiently, +though it does not require enabling the sparse index feature via the +`index.sparse=true` configuration. + 'disable':: Disable the `core.sparseCheckout` config setting, and restore the working directory to include all files. diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 06de61bd9d0d33..f7caa28f3f00aa 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -2,6 +2,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" +#include "abspath.h" #include "config.h" #include "dir.h" #include "environment.h" @@ -23,7 +24,7 @@ static const char *empty_base = ""; static char const * const builtin_sparse_checkout_usage[] = { - N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules) []"), + N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules | clean) []"), NULL }; @@ -924,6 +925,66 @@ static int sparse_checkout_reapply(int argc, const char **argv, return update_working_directory(repo, NULL); } +static char const * const builtin_sparse_checkout_clean_usage[] = { + "git sparse-checkout clean [-n|--dry-run]", + NULL +}; + +static const char *msg_remove = N_("Removing %s\n"); + +static int sparse_checkout_clean(int argc, const char **argv, + const char *prefix, + struct repository *repo) +{ + struct strbuf full_path = STRBUF_INIT; + const char *msg = msg_remove; + size_t worktree_len; + + struct option builtin_sparse_checkout_clean_options[] = { + OPT_END(), + }; + + setup_work_tree(); + if (!core_apply_sparse_checkout) + die(_("must be in a sparse-checkout to clean directories")); + if (!core_sparse_checkout_cone) + die(_("must be in a cone-mode sparse-checkout to clean directories")); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_clean_options, + builtin_sparse_checkout_clean_usage, 0); + + if (repo_read_index(repo) < 0) + die(_("failed to read index")); + + if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) || + repo->index->sparse_index == INDEX_EXPANDED) + die(_("failed to convert index to a sparse index; resolve merge conflicts and try again")); + + strbuf_addstr(&full_path, repo->worktree); + strbuf_addch(&full_path, '/'); + worktree_len = full_path.len; + + for (size_t i = 0; i < repo->index->cache_nr; i++) { + struct cache_entry *ce = repo->index->cache[i]; + if (!S_ISSPARSEDIR(ce->ce_mode)) + continue; + strbuf_setlen(&full_path, worktree_len); + strbuf_add(&full_path, ce->name, ce->ce_namelen); + + if (!is_directory(full_path.buf)) + continue; + + printf(msg, ce->name); + + if (remove_dir_recursively(&full_path, 0)) + warning_errno(_("failed to remove '%s'"), ce->name); + } + + strbuf_release(&full_path); + return 0; +} + static char const * const builtin_sparse_checkout_disable_usage[] = { "git sparse-checkout disable", NULL @@ -1080,6 +1141,7 @@ int cmd_sparse_checkout(int argc, OPT_SUBCOMMAND("set", &fn, sparse_checkout_set), OPT_SUBCOMMAND("add", &fn, sparse_checkout_add), OPT_SUBCOMMAND("reapply", &fn, sparse_checkout_reapply), + OPT_SUBCOMMAND("clean", &fn, sparse_checkout_clean), OPT_SUBCOMMAND("disable", &fn, sparse_checkout_disable), OPT_SUBCOMMAND("check-rules", &fn, sparse_checkout_check_rules), OPT_END(), diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index ab3a105ffff253..bdb7b21e327b2a 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1050,5 +1050,108 @@ test_expect_success 'check-rules null termination' ' test_cmp expect actual ' +test_expect_success 'clean' ' + git -C repo sparse-checkout set --cone deep/deeper1 && + git -C repo sparse-checkout reapply && + mkdir repo/deep/deeper2 repo/folder1 && + + # Add untracked files + touch repo/deep/deeper2/file && + touch repo/folder1/file && + + cat >expect <<-\EOF && + Removing deep/deeper2/ + Removing folder1/ + EOF + + git -C repo sparse-checkout clean >out && + test_cmp expect out && + + test_path_is_missing repo/deep/deeper2 && + test_path_is_missing repo/folder1 +' + +test_expect_success 'clean with sparse file states' ' + test_when_finished git reset --hard && + git -C repo sparse-checkout set --cone deep/deeper1 && + mkdir repo/folder2 && + + # create an untracked file and a modified file + touch repo/folder2/file && + echo dirty >repo/folder2/a && + + # First clean/reapply pass will do nothing. + git -C repo sparse-checkout clean >out && + test_must_be_empty out && + test_path_exists repo/folder2/a && + test_path_exists repo/folder2/file && + + git -C repo sparse-checkout reapply 2>err && + test_grep folder2 err && + test_path_exists repo/folder2/a && + test_path_exists repo/folder2/file && + + # Now, stage the change to the tracked file. + git -C repo add --sparse folder2/a && + + # Clean will continue not doing anything. + git -C repo sparse-checkout clean >out && + test_line_count = 0 out && + test_path_exists repo/folder2/a && + test_path_exists repo/folder2/file && + + # But we can reapply to remove the staged change. + git -C repo sparse-checkout reapply 2>err && + test_grep folder2 err && + test_path_is_missing repo/folder2/a && + test_path_exists repo/folder2/file && + + # We can clean now. + cat >expect <<-\EOF && + Removing folder2/ + EOF + git -C repo sparse-checkout clean >out && + test_cmp expect out && + test_path_is_missing repo/folder2 && + + # At the moment, the file is staged. + cat >expect <<-\EOF && + M folder2/a + EOF + + git -C repo status -s >out && + test_cmp expect out && + + # Reapply persists the modified state. + git -C repo sparse-checkout reapply && + cat >expect <<-\EOF && + M folder2/a + EOF + git -C repo status -s >out && + test_cmp expect out && + + # Committing the change leads to resolved status. + git -C repo commit -m "modified" && + git -C repo status -s >out && + test_must_be_empty out && + + # Repeat, but this time commit before reapplying. + mkdir repo/folder2/ && + echo dirtier >repo/folder2/a && + git -C repo add --sparse folder2/a && + git -C repo sparse-checkout clean >out && + test_must_be_empty out && + test_path_exists repo/folder2/a && + + # Committing without reapplying makes it look like a deletion + # due to no skip-worktree bit. + git -C repo commit -m "dirtier" && + git -C repo status -s >out && + test_must_be_empty out && + + git -C repo sparse-checkout reapply && + git -C repo status -s >out && + test_must_be_empty out +' test_done From a8077c19131a86fa123c5be2c8b735acdb5dacf4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:07 +0000 Subject: [PATCH 03/20] sparse-checkout: match some 'clean' behavior The 'git sparse-checkout clean' subcommand is somewhat similar to 'git clean' in that it will delete files that should not be in the worktree. The big difference is that it focuses on the directories that should not be in the worktree due to cone-mode sparse-checkout. It also does not discriminate in the kinds of files and focuses on deleting entire directories. However, there are some restrictions that would be good to bring over from 'git clean', specifically how it refuses to do anything without the '-f'/'--force' or '-n'/'--dry-run' arguments. The 'clean.requireForce' config can be set to 'false' to imply '--force'. Add this behavior to avoid accidental deletion of files that cannot be recovered from Git. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.adoc | 9 +++++ builtin/sparse-checkout.c | 15 ++++++- t/t1091-sparse-checkout-builtin.sh | 54 +++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index baaebce746d2a1..42050ff5b50cf2 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -127,6 +127,15 @@ clean` to resolve these cases. This command can be used to be sure the sparse index works efficiently, though it does not require enabling the sparse index feature via the `index.sparse=true` configuration. ++ +To prevent accidental deletion of worktree files, the `clean` subcommand +will not delete any files without the `-f` or `--force` option, unless +the `clean.requireForce` config option is set to `false`. ++ +The `--dry-run` option will list the directories that would be removed +without deleting them. Running in this mode can be helpful to predict the +behavior of the clean comand or to determine which kinds of files are left +in the sparse directories. 'disable':: Disable the `core.sparseCheckout` config setting, and restore the diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index f7caa28f3f00aa..d777b64960668d 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -931,6 +931,7 @@ static char const * const builtin_sparse_checkout_clean_usage[] = { }; static const char *msg_remove = N_("Removing %s\n"); +static const char *msg_would_remove = N_("Would remove %s\n"); static int sparse_checkout_clean(int argc, const char **argv, const char *prefix, @@ -939,8 +940,12 @@ static int sparse_checkout_clean(int argc, const char **argv, struct strbuf full_path = STRBUF_INIT; const char *msg = msg_remove; size_t worktree_len; + int force = 0, dry_run = 0; + int require_force = 1; struct option builtin_sparse_checkout_clean_options[] = { + OPT__DRY_RUN(&dry_run, N_("dry run")), + OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE), OPT_END(), }; @@ -954,6 +959,13 @@ static int sparse_checkout_clean(int argc, const char **argv, builtin_sparse_checkout_clean_options, builtin_sparse_checkout_clean_usage, 0); + repo_config_get_bool(repo, "clean.requireforce", &require_force); + if (require_force && !force && !dry_run) + die(_("for safety, refusing to clean without one of --force or --dry-run")); + + if (dry_run) + msg = msg_would_remove; + if (repo_read_index(repo) < 0) die(_("failed to read index")); @@ -977,7 +989,8 @@ static int sparse_checkout_clean(int argc, const char **argv, printf(msg, ce->name); - if (remove_dir_recursively(&full_path, 0)) + if (dry_run <= 0 && + remove_dir_recursively(&full_path, 0)) warning_errno(_("failed to remove '%s'"), ce->name); } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index bdb7b21e327b2a..e6b768a8da959a 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1059,12 +1059,29 @@ test_expect_success 'clean' ' touch repo/deep/deeper2/file && touch repo/folder1/file && + test_must_fail git -C repo sparse-checkout clean 2>err && + grep "refusing to clean" err && + + git -C repo config clean.requireForce true && + test_must_fail git -C repo sparse-checkout clean 2>err && + grep "refusing to clean" err && + + cat >expect <<-\EOF && + Would remove deep/deeper2/ + Would remove folder1/ + EOF + + git -C repo sparse-checkout clean --dry-run >out && + test_cmp expect out && + test_path_exists repo/deep/deeper2 && + test_path_exists repo/folder1 && + cat >expect <<-\EOF && Removing deep/deeper2/ Removing folder1/ EOF - git -C repo sparse-checkout clean >out && + git -C repo sparse-checkout clean -f >out && test_cmp expect out && test_path_is_missing repo/deep/deeper2 && @@ -1076,6 +1093,10 @@ test_expect_success 'clean with sparse file states' ' git -C repo sparse-checkout set --cone deep/deeper1 && mkdir repo/folder2 && + # The previous test case checked the -f option, so + # test the config option in this one. + git -C repo config clean.requireForce false && + # create an untracked file and a modified file touch repo/folder2/file && echo dirty >repo/folder2/a && @@ -1154,4 +1175,35 @@ test_expect_success 'clean with sparse file states' ' test_must_be_empty out ' +test_expect_success 'clean with merge conflict status' ' + git clone repo clean-merge && + + echo dirty >clean-merge/deep/deeper2/a && + touch clean-merge/folder2/extra && + + cat >input <<-EOF && + 0 $ZERO_OID folder1/a + 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a + EOF + git -C clean-merge update-index --index-info err && + grep "failed to convert index to a sparse index" err && + + mkdir -p clean-merge/folder1/ && + echo merged >clean-merge/folder1/a && + git -C clean-merge add --sparse folder1/a && + + # deletes folder2/ but leaves staged change in folder1 + # and dirty change in deep/deeper2/ + cat >expect <<-\EOF && + Removing folder2/ + EOF + + git -C clean-merge sparse-checkout clean -f >out && + test_cmp expect out +' + test_done From 1588e836bb956d14e6cb38e35933ed2749c023b4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:08 +0000 Subject: [PATCH 04/20] dir: add generic "walk all files" helper There is sometimes a need to visit every file within a directory, recursively. The main example is remove_dir_recursively(), though it has some extra flags that make it want to iterate over paths in a custom way. There is also the fill_directory() approach but that involves an index and a pathspec. This change adds a new for_each_file_in_dir() method that will be helpful in the next change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 28 ++++++++++++++++++++++++++++ dir.h | 14 ++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/dir.c b/dir.c index dfb4d40103fb4a..7731f7b9d305b7 100644 --- a/dir.c +++ b/dir.c @@ -30,6 +30,7 @@ #include "read-cache-ll.h" #include "setup.h" #include "sparse-index.h" +#include "strbuf.h" #include "submodule-config.h" #include "symlinks.h" #include "trace2.h" @@ -87,6 +88,33 @@ struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp) return e; } +int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data) +{ + struct dirent *e; + int res = 0; + size_t baselen = path->len; + DIR *dir = opendir(path->buf); + + if (!dir) + return 0; + + while (!res && (e = readdir_skip_dot_and_dotdot(dir)) != NULL) { + unsigned char dtype = get_dtype(e, path, 0); + strbuf_setlen(path, baselen); + strbuf_addstr(path, e->d_name); + + if (dtype == DT_REG) { + res = fn(path->buf, data); + } else if (dtype == DT_DIR) { + strbuf_addch(path, '/'); + res = for_each_file_in_dir(path, fn, data); + } + } + + closedir(dir); + return res; +} + int count_slashes(const char *s) { int cnt = 0; diff --git a/dir.h b/dir.h index fc9be7b427a134..20d4a078d61ef8 100644 --- a/dir.h +++ b/dir.h @@ -536,6 +536,20 @@ int get_sparse_checkout_patterns(struct pattern_list *pl); */ int remove_dir_recursively(struct strbuf *path, int flag); +/* + * This function pointer type is called on each file discovered in + * for_each_file_in_dir. The iteration stops if this method returns + * non-zero. + */ +typedef int (*file_iterator)(const char *path, const void *data); + +struct strbuf; +/* + * Given a directory path, recursively visit each file within, including + * within subdirectories. + */ +int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data); + /* * Tries to remove the path, along with leading empty directories so long as * those empty directories are not startup_info->original_cwd. Ignores From 5b5a7f5ebd2e2701ac6fa522866f22b885147c01 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:09 +0000 Subject: [PATCH 05/20] sparse-checkout: add --verbose option to 'clean' The 'git sparse-checkout clean' subcommand is focused on directories, deleting any tracked sparse directories to clean up the worktree and make the sparse index feature work optimally. However, this directory-focused approach can leave users wondering why those directories exist at all. In my experience, these files are left over due to ignore or exclude patterns, Windows file handles, or possibly merge conflict resolutions. Add a new '--verbose' option for users to see all the files that are being deleted (with '--force') or would be deleted (with '--dry-run'). Based on usage, users may request further context on this list of files for states such as tracked/untracked, unstaged/staged/conflicted, etc. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.adoc | 5 +++++ builtin/sparse-checkout.c | 26 ++++++++++++++++++++++++-- t/t1091-sparse-checkout-builtin.sh | 14 +++++++++++--- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc index 42050ff5b50cf2..113728a0e7c01d 100644 --- a/Documentation/git-sparse-checkout.adoc +++ b/Documentation/git-sparse-checkout.adoc @@ -136,6 +136,11 @@ The `--dry-run` option will list the directories that would be removed without deleting them. Running in this mode can be helpful to predict the behavior of the clean comand or to determine which kinds of files are left in the sparse directories. ++ +The `--verbose` option will list every file within the directories that +are considered for removal. This option is helpful to determine if those +files are actually important or perhaps to explain why the directory is +still present despite the current sparse-checkout. 'disable':: Disable the `core.sparseCheckout` config setting, and restore the diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index d777b64960668d..15d51e60a86533 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -930,6 +930,24 @@ static char const * const builtin_sparse_checkout_clean_usage[] = { NULL }; +static int list_file_iterator(const char *path, const void *data) +{ + const char *msg = data; + + printf(msg, path); + return 0; +} + +static void list_every_file_in_dir(const char *msg, + const char *directory) +{ + struct strbuf path = STRBUF_INIT; + + strbuf_addstr(&path, directory); + for_each_file_in_dir(&path, list_file_iterator, msg); + strbuf_release(&path); +} + static const char *msg_remove = N_("Removing %s\n"); static const char *msg_would_remove = N_("Would remove %s\n"); @@ -940,12 +958,13 @@ static int sparse_checkout_clean(int argc, const char **argv, struct strbuf full_path = STRBUF_INIT; const char *msg = msg_remove; size_t worktree_len; - int force = 0, dry_run = 0; + int force = 0, dry_run = 0, verbose = 0; int require_force = 1; struct option builtin_sparse_checkout_clean_options[] = { OPT__DRY_RUN(&dry_run, N_("dry run")), OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE), + OPT__VERBOSE(&verbose, N_("report each affected file, not just directories")), OPT_END(), }; @@ -987,7 +1006,10 @@ static int sparse_checkout_clean(int argc, const char **argv, if (!is_directory(full_path.buf)) continue; - printf(msg, ce->name); + if (verbose) + list_every_file_in_dir(msg, ce->name); + else + printf(msg, ce->name); if (dry_run <= 0 && remove_dir_recursively(&full_path, 0)) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index e6b768a8da959a..7b15fa669c4662 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1053,11 +1053,11 @@ test_expect_success 'check-rules null termination' ' test_expect_success 'clean' ' git -C repo sparse-checkout set --cone deep/deeper1 && git -C repo sparse-checkout reapply && - mkdir repo/deep/deeper2 repo/folder1 && + mkdir -p repo/deep/deeper2 repo/folder1/extra/inside && # Add untracked files touch repo/deep/deeper2/file && - touch repo/folder1/file && + touch repo/folder1/extra/inside/file && test_must_fail git -C repo sparse-checkout clean 2>err && grep "refusing to clean" err && @@ -1074,7 +1074,15 @@ test_expect_success 'clean' ' git -C repo sparse-checkout clean --dry-run >out && test_cmp expect out && test_path_exists repo/deep/deeper2 && - test_path_exists repo/folder1 && + test_path_exists repo/folder1/extra/inside/file && + + cat >expect <<-\EOF && + Would remove deep/deeper2/file + Would remove folder1/extra/inside/file + EOF + + git -C repo sparse-checkout clean --dry-run --verbose >out && + test_cmp expect out && cat >expect <<-\EOF && Removing deep/deeper2/ From 66c11bd46a29f8f91297eaf6157be912bd0bf12a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:10 +0000 Subject: [PATCH 06/20] sparse-index: point users to new 'clean' action In my experience, the most-common reason that the sparse index must expand to a full one is because there is some leftover file in a tracked directory that is now outside of the sparse-checkout. The new 'git sparse-checkout clean' command will find and delete these directories, so point users to it when they hit the sparse index expansion advice. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sparse-index.c b/sparse-index.c index 5634abafaa07ed..5d14795063b578 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -32,7 +32,8 @@ int give_advice_on_expansion = 1; "Your working directory likely has contents that are outside of\n" \ "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \ "see your sparse-checkout definition and compare it to your working\n" \ - "directory contents. Running 'git clean' may assist in this cleanup." + "directory contents. Running 'git sparse-checkout clean' may assist\n" \ + "in this cleanup." struct modify_index_context { struct index_state *write; From 592d2a93af8a0af047d2212004ca474855096d5f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Sep 2025 10:30:11 +0000 Subject: [PATCH 07/20] t: expand tests around sparse merges and clean With the current implementation of 'git sparse-checkout clean', we notice that a file that was in a conflicted state does not get cleaned up because of some internal details around the SKIP_WORKTREE bit. This test is documenting the current behavior before we update it in the following change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1091-sparse-checkout-builtin.sh | 56 ++++++++++++++++++------------ 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 7b15fa669c4662..b2da4feaeff9ec 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -1183,35 +1183,47 @@ test_expect_success 'clean with sparse file states' ' test_must_be_empty out ' -test_expect_success 'clean with merge conflict status' ' - git clone repo clean-merge && +test_expect_success 'sparse-checkout operations with merge conflicts' ' + git clone repo merge && - echo dirty >clean-merge/deep/deeper2/a && - touch clean-merge/folder2/extra && + ( + cd merge && + mkdir -p folder1/even/more/dirs && + echo base >folder1/even/more/dirs/file && + git add folder1 && + git commit -m "base" && - cat >input <<-EOF && - 0 $ZERO_OID folder1/a - 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a - EOF - git -C clean-merge update-index --index-info folder1/even/more/dirs/file && + git commit -a -m "right" && - git -C clean-merge sparse-checkout set deep/deeper1 && + git checkout -b left HEAD~1 && + echo left >folder1/even/more/dirs/file && + git commit -a -m "left" && - test_must_fail git -C clean-merge sparse-checkout clean -f 2>err && - grep "failed to convert index to a sparse index" err && + git checkout -b merge && + git sparse-checkout set deep/deeper1 && - mkdir -p clean-merge/folder1/ && - echo merged >clean-merge/folder1/a && - git -C clean-merge add --sparse folder1/a && + test_must_fail git merge -m "will-conflict" right && - # deletes folder2/ but leaves staged change in folder1 - # and dirty change in deep/deeper2/ - cat >expect <<-\EOF && - Removing folder2/ - EOF + test_must_fail git sparse-checkout clean -f 2>err && + grep "failed to convert index to a sparse index" err && - git -C clean-merge sparse-checkout clean -f >out && - test_cmp expect out + echo merged >folder1/even/more/dirs/file && + git add --sparse folder1 && + git merge --continue && + + test_path_exists folder1/even/more/dirs/file && + + # clean does not remove the file, because the + # SKIP_WORKTREE bit was not cleared by the merge command. + git sparse-checkout clean -f >out && + test_line_count = 0 out && + test_path_exists folder1/even/more/dirs/file && + + git sparse-checkout reapply && + test_path_is_missing folder1 + ) ' test_done From db674095c025870249c5e283ee9cacefad6a8fa9 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 13 Oct 2025 10:48:53 +0200 Subject: [PATCH 08/20] doc: git-tag: stop focusing on GPG signed tags It looks like the documentation of `git tag` is focused a bit too much on GPG signed tags. This starts with the "NAME" section where the command is described with: "Create, list, delete or verify a tag object signed with GPG" while for example `git branch` is described with simply: "List, create, or delete branches" This could give the false impression that `git tag` only works with tag objects, not with lightweight tags, and that tag objects are always GPG signed. In the "DESCRIPTION" section, it looks like only "GnuPG signed tag objects" can be created by the `-s` and `-u ` options, and it seems `gpg.program` can only specify a "custom GnuPG binary". This goes on in the "OPTIONS" section too, especially about the `-s` and `-u ` options. The "CONFIGURATION" section also doesn't talk about how to configure the command to work with X.509 and SSH signatures. Let's rework all that to make sure users have a more accurate and balanced view of what the command can do. Helped-by: Patrick Steinhardt Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-tag.adoc | 48 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/Documentation/git-tag.adoc b/Documentation/git-tag.adoc index a4b1c0ec05a6c9..28d6fe4e1a63ac 100644 --- a/Documentation/git-tag.adoc +++ b/Documentation/git-tag.adoc @@ -3,7 +3,7 @@ git-tag(1) NAME ---- -git-tag - Create, list, delete or verify a tag object signed with GPG +git-tag - Create, list, delete or verify tags SYNOPSIS @@ -38,15 +38,17 @@ and `-a`, `-s`, and `-u ` are absent, `-a` is implied. Otherwise, a tag reference that points directly at the given object (i.e., a lightweight tag) is created. -A GnuPG signed tag object will be created when `-s` or `-u -` is used. When `-u ` is not used, the -committer identity for the current user is used to find the -GnuPG key for signing. The configuration variable `gpg.program` -is used to specify custom GnuPG binary. +A cryptographically signed tag object will be created when `-s` or +`-u ` is used. The signing backend (GPG, X.509, SSH, etc.) is +controlled by the `gpg.format` configuration variable, defaulting to +OpenPGP. When `-u ` is not used, the committer identity for +the current user is used to find the key for signing. The +configuration variable `gpg.program` is used to specify a custom +signing binary. Tag objects (created with `-a`, `-s`, or `-u`) are called "annotated" tags; they contain a creation date, the tagger name and e-mail, a -tagging message, and an optional GnuPG signature. Whereas a +tagging message, and an optional cryptographic signature. Whereas a "lightweight" tag is simply a name for an object (usually a commit object). @@ -64,10 +66,12 @@ OPTIONS -s:: --sign:: - Make a GPG-signed tag, using the default e-mail address's key. - The default behavior of tag GPG-signing is controlled by `tag.gpgSign` - configuration variable if it exists, or disabled otherwise. - See linkgit:git-config[1]. + Make a cryptographically signed tag, using the default signing + key. The signing backend used depends on the `gpg.format` + configuration variable. The default key is determined by the + backend. For GPG, it's based on the committer's email address, + while for SSH it may be a specific key file or agent + identity. See linkgit:git-config[1]. --no-sign:: Override `tag.gpgSign` configuration variable that is @@ -75,7 +79,10 @@ OPTIONS -u :: --local-user=:: - Make a GPG-signed tag, using the given key. + Make a cryptographically signed tag using the given key. The + format of the and the backend used depend on the + `gpg.format` configuration variable. See + linkgit:git-config[1]. -f:: --force:: @@ -87,7 +94,7 @@ OPTIONS -v:: --verify:: - Verify the GPG signature of the given tag names. + Verify the cryptographic signature of the given tags. -n:: specifies how many lines from the annotation, if any, @@ -236,12 +243,23 @@ it in the repository configuration as follows: ------------------------------------- [user] - signingKey = + signingKey = ------------------------------------- +The signing backend can be chosen via the `gpg.format` configuration +variable, which defaults to `openpgp`. See linkgit:git-config[1] +for a list of other supported formats. + +The path to the program used for each signing backend can be specified +with the `gpg..program` configuration variable. For the +`openpgp` backend, `gpg.program` can be used as a synonym for +`gpg.openpgp.program`. See linkgit:git-config[1] for details. + `pager.tag` is only respected when listing tags, i.e., when `-l` is used or implied. The default is to use a pager. -See linkgit:git-config[1]. + +See linkgit:git-config[1] for more details and other configuration +variables. DISCUSSION ---------- From e204a167757d2c3e4914df60bad5cf78b0e6a9bb Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 13 Oct 2025 10:48:54 +0200 Subject: [PATCH 09/20] lib-gpg: allow tests with GPGSM or GPGSSH prereq first When the 'GPG' prereq is lazily tested, `mkdir "$GNUPGHOME"` could fail if the "$GNUPGHOME" directory already exists. This can happen if the 'GPGSM' or the 'GPGSSH' prereq has been lazily tested before as they already create "$GNUPGHOME". To allow the GPGSM or the GPGSSH prereq to appear before the GPG prereq in some test scripts, let's refactor the creation and setup of the "$GNUPGHOME"` directory in a new prepare_gnupghome() function that uses `mkdir -p "$GNUPGHOME"`. This will be useful in a following commit. Unfortunately the new prepare_gnupghome() function cannot be used when lazily testing the GPG2 prereq, because that would expose existing, hidden bugs in "t1016-compatObjectFormat.sh", so let's just document that with a NEEDSWORK comment. Helped-by: Todd Zullinger Helped-by: Collin Funk Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- t/lib-gpg.sh | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/t/lib-gpg.sh b/t/lib-gpg.sh index 937b876bd05281..b99ae39a06b683 100644 --- a/t/lib-gpg.sh +++ b/t/lib-gpg.sh @@ -9,6 +9,16 @@ GNUPGHOME="$(pwd)/gpghome" export GNUPGHOME +# All the "test_lazy_prereq GPG*" below should use +# `prepare_gnupghome()` either directly or through a call to +# `test_have_prereq GPG*`. That's because `gpg` and `gpgsm` +# only create the directory specified using "$GNUPGHOME" or +# `--homedir` if it's the default (usually "~/.gnupg"). +prepare_gnupghome() { + mkdir -p "$GNUPGHOME" && + chmod 0700 "$GNUPGHOME" +} + test_lazy_prereq GPG ' gpg_version=$(gpg --version 2>&1) test $? != 127 || exit 1 @@ -38,8 +48,7 @@ test_lazy_prereq GPG ' # To export ownertrust: # gpg --homedir /tmp/gpghome --export-ownertrust \ # > lib-gpg/ownertrust - mkdir "$GNUPGHOME" && - chmod 0700 "$GNUPGHOME" && + prepare_gnupghome && (gpgconf --kill all || : ) && gpg --homedir "${GNUPGHOME}" --import \ "$TEST_DIRECTORY"/lib-gpg/keyring.gpg && @@ -63,6 +72,14 @@ test_lazy_prereq GPG2 ' ;; *) (gpgconf --kill all || : ) && + + # NEEDSWORK: prepare_gnupghome() should definitely be + # called here, but it looks like it exposes a + # pre-existing, hidden bug by allowing some tests in + # t1016-compatObjectFormat.sh to run instead of being + # skipped. See: + # https://lore.kernel.org/git/ZoV8b2RvYxLOotSJ@teonanacatl.net/ + gpg --homedir "${GNUPGHOME}" --import \ "$TEST_DIRECTORY"/lib-gpg/keyring.gpg && gpg --homedir "${GNUPGHOME}" --import-ownertrust \ @@ -132,8 +149,7 @@ test_lazy_prereq GPGSSH ' test $? = 0 || exit 1; # Setup some keys and an allowed signers file - mkdir -p "${GNUPGHOME}" && - chmod 0700 "${GNUPGHOME}" && + prepare_gnupghome && (setfacl -k "${GNUPGHOME}" 2>/dev/null || true) && ssh-keygen -t ed25519 -N "" -C "git ed25519 key" -f "${GPGSSH_KEY_PRIMARY}" >/dev/null && ssh-keygen -t rsa -b 2048 -N "" -C "git rsa2048 key" -f "${GPGSSH_KEY_SECONDARY}" >/dev/null && From 132e5666ce785dc47e5d09a9271ee8d2828d6a66 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 13 Oct 2025 10:48:55 +0200 Subject: [PATCH 10/20] t9350: properly count annotated tags In "t9350-fast-export.sh", these existing tests: - 'fast-export | fast-import when main is tagged' - 'cope with tagger-less tags' are checking the number of annotated tags in the test repo by comparing it with some hardcoded values. This could be an issue if some new tests that have some prerequisites add new annotated tags to the repo before these existing tests. When the prerequisites would be satisfied, the number of annotated tags would be different from when some prerequisites would not be satisfied. As we are going to add new tests that add new annotated tags in a following commit, let's properly count the number of annotated tag in the repo by incrementing a counter each time a new annotated tag is added, and then by comparing the number of annotated tags to the value of the counter when checking the number of annotated tags. This is a bit ugly, but it makes it explicit that some tests are interdependent. Alternative solutions, like moving the new tests to the end of the script, were considered, but were rejected because they would instead hide the technical debt and could confuse developers in the future. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- t/t9350-fast-export.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index 8f85c69d62f17a..21ff26939c6885 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -35,6 +35,7 @@ test_expect_success 'setup' ' git commit -m sitzt file2 && test_tick && git tag -a -m valentin muss && + ANNOTATED_TAG_COUNT=1 && git merge -s ours main ' @@ -229,7 +230,8 @@ EOF test_expect_success 'set up faked signed tag' ' - git fast-import output && - test $(grep -c "^tag " output) = 3 + test $(grep -c "^tag " output) = $ANNOTATED_TAG_COUNT ' @@ -506,12 +509,13 @@ test_expect_success 'cope with tagger-less tags' ' TAG=$(git hash-object --literally -t tag -w tag-content) && git update-ref refs/tags/sonnenschein $TAG && + ANNOTATED_TAG_COUNT=$((ANNOTATED_TAG_COUNT + 1)) && git fast-export -C -C --signed-tags=strip --all > output && - test $(grep -c "^tag " output) = 4 && + test $(grep -c "^tag " output) = $ANNOTATED_TAG_COUNT && ! grep "Unspecified Tagger" output && git fast-export -C -C --signed-tags=strip --all \ --fake-missing-tagger > output && - test $(grep -c "^tag " output) = 4 && + test $(grep -c "^tag " output) = $ANNOTATED_TAG_COUNT && grep "Unspecified Tagger" output ' From 31f375c31c645f35b83427045cfef719f2e4301b Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 13 Oct 2025 10:48:56 +0200 Subject: [PATCH 11/20] fast-export: handle all kinds of tag signatures Currently the handle_tag() function in "builtin/fast-export.c" searches only for "\n-----BEGIN PGP SIGNATURE-----\n" in the tag message to find a tag signature. This doesn't handle all kinds of OpenPGP signatures as some can start with "-----BEGIN PGP MESSAGE-----" too, and this doesn't handle SSH and X.509 signatures either as they use "-----BEGIN SSH SIGNATURE-----" and "-----BEGIN SIGNED MESSAGE-----" respectively. To handle all these kinds of tag signatures supported by Git, let's use the parse_signed_buffer() function to properly find signatures in tag messages. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 7 +++---- t/t9350-fast-export.sh | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index dc2486f9a83a9b..7adbc55f0dccb1 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -931,9 +931,8 @@ static void handle_tag(const char *name, struct tag *tag) /* handle signed tags */ if (message) { - const char *signature = strstr(message, - "\n-----BEGIN PGP SIGNATURE-----\n"); - if (signature) + size_t sig_offset = parse_signed_buffer(message, message_size); + if (sig_offset < message_size) switch (signed_tag_mode) { case SIGN_ABORT: die("encountered signed tag %s; use " @@ -950,7 +949,7 @@ static void handle_tag(const char *name, struct tag *tag) oid_to_hex(&tag->object.oid)); /* fallthru */ case SIGN_STRIP: - message_size = signature + 1 - message; + message_size = sig_offset; break; } } diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index 21ff26939c6885..3d153a4805bbfc 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -279,6 +279,42 @@ test_expect_success 'signed-tags=warn-strip' ' test -s err ' +test_expect_success GPGSM 'setup X.509 signed tag' ' + test_config gpg.format x509 && + test_config user.signingkey $GIT_COMMITTER_EMAIL && + + git tag -s -m "X.509 signed tag" x509-signed $(git rev-parse HEAD) && + ANNOTATED_TAG_COUNT=$((ANNOTATED_TAG_COUNT + 1)) +' + +test_expect_success GPGSM 'signed-tags=verbatim with X.509' ' + git fast-export --signed-tags=verbatim x509-signed > output && + test_grep "SIGNED MESSAGE" output +' + +test_expect_success GPGSM 'signed-tags=strip with X.509' ' + git fast-export --signed-tags=strip x509-signed > output && + test_grep ! "SIGNED MESSAGE" output +' + +test_expect_success GPGSSH 'setup SSH signed tag' ' + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + + git tag -s -m "SSH signed tag" ssh-signed $(git rev-parse HEAD) && + ANNOTATED_TAG_COUNT=$((ANNOTATED_TAG_COUNT + 1)) +' + +test_expect_success GPGSSH 'signed-tags=verbatim with SSH' ' + git fast-export --signed-tags=verbatim ssh-signed > output && + test_grep "SSH SIGNATURE" output +' + +test_expect_success GPGSSH 'signed-tags=strip with SSH' ' + git fast-export --signed-tags=strip ssh-signed > output && + test_grep ! "SSH SIGNATURE" output +' + test_expect_success GPG 'set up signed commit' ' # Generate a commit with both "gpgsig" and "encoding" set, so From d8ce08aa13b4dc6c4713ff9dc0b2ffacd5873d06 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 13 Oct 2025 10:48:57 +0200 Subject: [PATCH 12/20] fast-import: add '--signed-tags=' option Recently, eaaddf5791 (fast-import: add '--signed-commits=' option, 2025-09-17) added support for controlling how signed commits are handled by `git fast-import`, but there is no option yet to decide about signed tags. To remediate that, let's add a '--signed-tags=' option to `git fast-import` too. With this, both `git fast-export` and `git fast-import` have both a '--signed-tags=' and a '--signed-commits=' supporting the same s. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 5 ++ builtin/fast-import.c | 43 ++++++++++++++++ t/meson.build | 1 + t/t9306-fast-import-signed-tags.sh | 80 ++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+) create mode 100755 t/t9306-fast-import-signed-tags.sh diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 85ed7a727038bb..b74179a6c891d5 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -66,6 +66,11 @@ fast-import stream! This option is enabled automatically for remote-helpers that use the `import` capability, as they are already trusted to run their own code. +--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort):: + Specify how to handle signed tags. Behaves in the same way + as the same option in linkgit:git-fast-export[1], except that + default is 'verbatim' (instead of 'abort'). + --signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort):: Specify how to handle signed commits. Behaves in the same way as the same option in linkgit:git-fast-export[1], except that diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2010e78475b32e..60d6faa46550e5 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -188,6 +188,7 @@ static int global_argc; static const char **global_argv; static const char *global_prefix; +static enum sign_mode signed_tag_mode = SIGN_VERBATIM; static enum sign_mode signed_commit_mode = SIGN_VERBATIM; /* Memory pools */ @@ -2961,6 +2962,43 @@ static void parse_new_commit(const char *arg) b->last_commit = object_count_by_type[OBJ_COMMIT]; } +static void handle_tag_signature(struct strbuf *msg, const char *name) +{ + size_t sig_offset = parse_signed_buffer(msg->buf, msg->len); + + /* If there is no signature, there is nothing to do. */ + if (sig_offset >= msg->len) + return; + + switch (signed_tag_mode) { + + /* First, modes that don't change anything */ + case SIGN_ABORT: + die(_("encountered signed tag; use " + "--signed-tags= to handle it")); + case SIGN_WARN_VERBATIM: + warning(_("importing a tag signature verbatim for tag '%s'"), name); + /* fallthru */ + case SIGN_VERBATIM: + /* Nothing to do, the signature will be put into the imported tag. */ + break; + + /* Second, modes that remove the signature */ + case SIGN_WARN_STRIP: + warning(_("stripping a tag signature for tag '%s'"), name); + /* fallthru */ + case SIGN_STRIP: + /* Truncate the buffer to remove the signature */ + strbuf_setlen(msg, sig_offset); + break; + + /* Third, BUG */ + default: + BUG("invalid signed_tag_mode value %d from tag '%s'", + signed_tag_mode, name); + } +} + static void parse_new_tag(const char *arg) { static struct strbuf msg = STRBUF_INIT; @@ -3024,6 +3062,8 @@ static void parse_new_tag(const char *arg) /* tag payload/message */ parse_data(&msg, 0, NULL); + handle_tag_signature(&msg, t->name); + /* build the tag object */ strbuf_reset(&new_data); @@ -3544,6 +3584,9 @@ static int parse_one_option(const char *option) } else if (skip_prefix(option, "signed-commits=", &option)) { if (parse_sign_mode(option, &signed_commit_mode)) usagef(_("unknown --signed-commits mode '%s'"), option); + } else if (skip_prefix(option, "signed-tags=", &option)) { + if (parse_sign_mode(option, &signed_tag_mode)) + usagef(_("unknown --signed-tags mode '%s'"), option); } else if (!strcmp(option, "quiet")) { show_stats = 0; quiet = 1; diff --git a/t/meson.build b/t/meson.build index 11376b9e256dd6..cb8c2b4b30b06a 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1036,6 +1036,7 @@ integration_tests = [ 't9303-fast-import-compression.sh', 't9304-fast-import-marks.sh', 't9305-fast-import-signatures.sh', + 't9306-fast-import-signed-tags.sh', 't9350-fast-export.sh', 't9351-fast-export-anonymize.sh', 't9400-git-cvsserver-server.sh', diff --git a/t/t9306-fast-import-signed-tags.sh b/t/t9306-fast-import-signed-tags.sh new file mode 100755 index 00000000000000..363619e7d1abf1 --- /dev/null +++ b/t/t9306-fast-import-signed-tags.sh @@ -0,0 +1,80 @@ +#!/bin/sh + +test_description='git fast-import --signed-tags=' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-gpg.sh" + +test_expect_success 'set up unsigned initial commit and import repo' ' + test_commit first && + git init new +' + +test_expect_success 'import no signed tag with --signed-tags=abort' ' + git fast-export --signed-tags=verbatim >output && + git -C new fast-import --quiet --signed-tags=abort output +' + +test_expect_success GPG 'import OpenPGP signed tag with --signed-tags=abort' ' + test_must_fail git -C new fast-import --quiet --signed-tags=abort log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/tags/openpgp-signed) && + test $OPENPGP_SIGNED = $IMPORTED && + test_must_be_empty log +' + +test_expect_success GPGSM 'setup X.509 signed tag' ' + test_config gpg.format x509 && + test_config user.signingkey $GIT_COMMITTER_EMAIL && + + git tag -s -m "X.509 signed tag" x509-signed first && + X509_SIGNED=$(git rev-parse --verify refs/tags/x509-signed) && + git fast-export --signed-tags=verbatim x509-signed >output +' + +test_expect_success GPGSM 'import X.509 signed tag with --signed-tags=warn-strip' ' + git -C new fast-import --quiet --signed-tags=warn-strip log 2>&1 && + test_grep "stripping a tag signature for tag '\''x509-signed'\''" log && + IMPORTED=$(git -C new rev-parse --verify refs/tags/x509-signed) && + test $X509_SIGNED != $IMPORTED && + git -C new cat-file -p x509-signed >out && + test_grep ! "SIGNED MESSAGE" out +' + +test_expect_success GPGSSH 'setup SSH signed tag' ' + test_config gpg.format ssh && + test_config user.signingkey "${GPGSSH_KEY_PRIMARY}" && + + git tag -s -m "SSH signed tag" ssh-signed first && + SSH_SIGNED=$(git rev-parse --verify refs/tags/ssh-signed) && + git fast-export --signed-tags=verbatim ssh-signed >output +' + +test_expect_success GPGSSH 'import SSH signed tag with --signed-tags=warn-verbatim' ' + git -C new fast-import --quiet --signed-tags=warn-verbatim log 2>&1 && + test_grep "importing a tag signature verbatim for tag '\''ssh-signed'\''" log && + IMPORTED=$(git -C new rev-parse --verify refs/tags/ssh-signed) && + test $SSH_SIGNED = $IMPORTED +' + +test_expect_success GPGSSH 'import SSH signed tag with --signed-tags=strip' ' + git -C new fast-import --quiet --signed-tags=strip log 2>&1 && + test_must_be_empty log && + IMPORTED=$(git -C new rev-parse --verify refs/tags/ssh-signed) && + test $SSH_SIGNED != $IMPORTED && + git -C new cat-file -p ssh-signed >out && + test_grep ! "SSH SIGNATURE" out +' + +test_done From 0de14fe3f3c821fe6dcaf3f86cdfaea427f5ca70 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 08:04:05 +0200 Subject: [PATCH 13/20] ci: deduplicate calls to `apt-get update` When installing dependencies we first check for the distribution that is in use and then we check for the specific job. In the first step we already install all dependencies required to build and test Git, whereas the second step installs a couple of additional dependencies that are only required to perform job-specific tasks. In both steps we use `apt-get update` to update our repository sources. This is unnecessary though: all platforms that use Aptitude would have already executed this command in the distro-specific step anyway. Drop the redundant calls. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ci/install-dependencies.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index 0d3aa496fc3a24..645d03525044c9 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -120,21 +120,17 @@ esac case "$jobname" in ClangFormat) - sudo apt-get -q update sudo apt-get -q -y install clang-format ;; StaticAnalysis) - sudo apt-get -q update sudo apt-get -q -y install coccinelle libcurl4-openssl-dev libssl-dev \ libexpat-dev gettext make ;; sparse) - sudo apt-get -q update -q sudo apt-get -q -y install libssl-dev libcurl4-openssl-dev \ libexpat-dev gettext zlib1g-dev sparse ;; Documentation) - sudo apt-get -q update sudo apt-get -q -y install asciidoc xmlto docbook-xsl-ns make test -n "$ALREADY_HAVE_ASCIIDOCTOR" || From e75cd059001ab49bd92040418660bcdfc7981c84 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 08:04:06 +0200 Subject: [PATCH 14/20] ci: check formatting of our Rust code Introduce a CI check that verifies that our Rust code is well-formatted. This check uses `cargo fmt`, which is a wrapper around rustfmt(1) that executes formatting for all Rust source files. rustfmt(1) itself is the de-facto standard for formatting code in the Rust ecosystem. The rustfmt(1) tool allows to tweak the final format in theory. In practice though, the Rust ecosystem has aligned on style "editions". These editions only exist to ensure that any potential changes to the style don't cause reformats to existing code bases. Other than that, most Rust projects out there accept this default style of a specific edition. Let's do the same and use that default style. It may not be anyone's favorite, but it is consistent and by making it part of our CI we also enforce it right from the start. Note that we don't have to pick a specific style edition here, as the edition is automatically derived from the edition we have specified in our "Cargo.toml" file. The implemented script looks somewhat weird as we perfom manual error handling instead of using something like `set -e`. The intent here is that subsequent commits will add more checks, and we want to execute all of these checks regardless of whether or not a previous check failed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .github/workflows/main.yml | 15 +++++++++++++++ .gitlab-ci.yml | 11 +++++++++++ ci/install-dependencies.sh | 5 +++++ ci/run-rust-checks.sh | 12 ++++++++++++ 4 files changed, 43 insertions(+) create mode 100755 ci/run-rust-checks.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 393ea4d1ccf784..9e36b5c5e3e360 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -458,6 +458,21 @@ jobs: - run: ci/install-dependencies.sh - run: ci/run-static-analysis.sh - run: ci/check-directional-formatting.bash + rust-analysis: + needs: ci-config + if: needs.ci-config.outputs.enabled == 'yes' + env: + jobname: RustAnalysis + CI_JOB_IMAGE: ubuntu:rolling + runs-on: ubuntu-latest + container: ubuntu:rolling + concurrency: + group: rust-analysis-${{ github.ref }} + cancel-in-progress: ${{ needs.ci-config.outputs.skip_concurrent == 'yes' }} + steps: + - uses: actions/checkout@v4 + - run: ci/install-dependencies.sh + - run: ci/run-rust-checks.sh sparse: needs: ci-config if: needs.ci-config.outputs.enabled == 'yes' diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f7d57d1ee96528..a47d839e39abca 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -212,6 +212,17 @@ static-analysis: - ./ci/run-static-analysis.sh - ./ci/check-directional-formatting.bash +rust-analysis: + image: ubuntu:rolling + stage: analyze + needs: [ ] + variables: + jobname: RustAnalysis + before_script: + - ./ci/install-dependencies.sh + script: + - ./ci/run-rust-checks.sh + check-whitespace: image: ubuntu:latest stage: analyze diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index 645d03525044c9..a24b07edff83ac 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -126,6 +126,11 @@ StaticAnalysis) sudo apt-get -q -y install coccinelle libcurl4-openssl-dev libssl-dev \ libexpat-dev gettext make ;; +RustAnalysis) + sudo apt-get -q -y install rustup + rustup default stable + rustup component add rustfmt + ;; sparse) sudo apt-get -q -y install libssl-dev libcurl4-openssl-dev \ libexpat-dev gettext zlib1g-dev sparse diff --git a/ci/run-rust-checks.sh b/ci/run-rust-checks.sh new file mode 100755 index 00000000000000..082eb52f11199b --- /dev/null +++ b/ci/run-rust-checks.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +. ${0%/*}/lib.sh + +set +x + +if ! group "Check Rust formatting" cargo fmt --all --check +then + RET=1 +fi + +exit $RET From 03f3900fb27099366771f54cec6acd558493a4db Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 08:04:07 +0200 Subject: [PATCH 15/20] rust/varint: add safety comments The `decode_varint()` and `encode_varint()` functions in our Rust crate are reimplementations of the respective C functions. As such, we are naturally forced to use the same interface in both Rust and C, which makes use of raw pointers. The consequence is that the code needs to be marked as unsafe in Rust. It is common practice in Rust to provide safety documentation for every block that is marked as unsafe. This common practice is also enforced by Clippy, Rust's static analyser. We don't have Clippy wired up yet, and we could of course just disable this check. But we're about to wire it up, and it is reasonable to always enforce documentation for unsafe blocks. Add such safety comments to already squelch those warnings now. While at it, also document the functions' behaviour. Helped-by: "brian m. carlson" Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- src/varint.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/varint.rs b/src/varint.rs index 6e610bdd8e0794..06492dfc5eaeef 100644 --- a/src/varint.rs +++ b/src/varint.rs @@ -1,3 +1,10 @@ +/// Decode the variable-length integer stored in `bufp` and return the decoded value. +/// +/// Returns 0 in case the decoded integer would overflow u64::MAX. +/// +/// # Safety +/// +/// The buffer must be NUL-terminated to ensure safety. #[no_mangle] pub unsafe extern "C" fn decode_varint(bufp: *mut *const u8) -> u64 { let mut buf = *bufp; @@ -22,6 +29,14 @@ pub unsafe extern "C" fn decode_varint(bufp: *mut *const u8) -> u64 { val } +/// Encode `value` into `buf` as a variable-length integer unless `buf` is null. +/// +/// Returns the number of bytes written, or, if `buf` is null, the number of bytes that would be +/// written to encode the integer. +/// +/// # Safety +/// +/// `buf` must either be null or point to at least 16 bytes of memory. #[no_mangle] pub unsafe extern "C" fn encode_varint(value: u64, buf: *mut u8) -> u8 { let mut varint: [u8; 16] = [0; 16]; From 4b44c46432744a4975432eabba16ad60cb39e089 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 08:04:08 +0200 Subject: [PATCH 16/20] ci: check for common Rust mistakes via Clippy Introduce a CI check that uses Clippy to perform checks for common mistakes and suggested code improvements. Clippy is the official static analyser of the Rust project and thus the de-facto standard. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ci/install-dependencies.sh | 2 +- ci/run-rust-checks.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index a24b07edff83ac..dcd22ddd95c75e 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -129,7 +129,7 @@ StaticAnalysis) RustAnalysis) sudo apt-get -q -y install rustup rustup default stable - rustup component add rustfmt + rustup component add clippy rustfmt ;; sparse) sudo apt-get -q -y install libssl-dev libcurl4-openssl-dev \ diff --git a/ci/run-rust-checks.sh b/ci/run-rust-checks.sh index 082eb52f11199b..fb5ea8991b8af4 100755 --- a/ci/run-rust-checks.sh +++ b/ci/run-rust-checks.sh @@ -9,4 +9,9 @@ then RET=1 fi +if ! group "Check for common Rust mistakes" cargo clippy --all-targets --all-features -- -Dwarnings +then + RET=1 +fi + exit $RET From 1b43384f41d8303324e8e6717dcf109e8846c214 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 08:04:09 +0200 Subject: [PATCH 17/20] ci: verify minimum supported Rust version In the current state of our Rust code base we don't really have any requirements for the minimum supported Rust version yet, as we don't use any features introduced by a recent version of Rust. Consequently, we have decided that we want to aim for a rather old version and edition of Rust, where the hope is that using an old version will make alternatives like gccrs viable earlier for compiling Git. But while we specify the Rust edition, we don't yet specify a Rust version. And even if we did, the Rust version would only be enforced for our own code, but not for any of our dependencies. We don't yet have any dependencies at the current point in time. But let's add some safeguards by specifying the minimum supported Rust version and using cargo-msrv(1) to verify that this version can be satisfied for all of our dependencies. Note that we fix the version of cargo-msrv(1) at v0.18.1. This is the latest release supported by Ubuntu's Rust version. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Cargo.toml | 1 + ci/install-dependencies.sh | 8 ++++++++ ci/run-rust-checks.sh | 5 +++++ 3 files changed, 14 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 45c9b34981abb3..2f51bf5d5ff5f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ name = "gitcore" version = "0.1.0" edition = "2018" +rust-version = "1.49.0" [lib] crate-type = ["staticlib"] diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index dcd22ddd95c75e..29e558bb9ccd7e 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -10,6 +10,8 @@ begin_group "Install dependencies" P4WHENCE=https://cdist2.perforce.com/perforce/r23.2 LFSWHENCE=https://github.com/github/git-lfs/releases/download/v$LINUX_GIT_LFS_VERSION JGITWHENCE=https://repo1.maven.org/maven2/org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh +CARGO_MSRV_VERSION=0.18.4 +CARGO_MSRV_WHENCE=https://github.com/foresterre/cargo-msrv/releases/download/v$CARGO_MSRV_VERSION/cargo-msrv-x86_64-unknown-linux-musl-v$CARGO_MSRV_VERSION.tgz # Make sudo a no-op and execute the command directly when running as root. # While using sudo would be fine on most platforms when we are root already, @@ -130,6 +132,12 @@ RustAnalysis) sudo apt-get -q -y install rustup rustup default stable rustup component add clippy rustfmt + + wget -q "$CARGO_MSRV_WHENCE" -O "cargo-msvc.tgz" + sudo mkdir -p "$CUSTOM_PATH" + sudo tar -xf "cargo-msvc.tgz" --strip-components=1 \ + --directory "$CUSTOM_PATH" --wildcards "*/cargo-msrv" + sudo chmod a+x "$CUSTOM_PATH/cargo-msrv" ;; sparse) sudo apt-get -q -y install libssl-dev libcurl4-openssl-dev \ diff --git a/ci/run-rust-checks.sh b/ci/run-rust-checks.sh index fb5ea8991b8af4..b5ad9e8dc6f71f 100755 --- a/ci/run-rust-checks.sh +++ b/ci/run-rust-checks.sh @@ -14,4 +14,9 @@ then RET=1 fi +if ! group "Check for minimum required Rust version" cargo msrv verify +then + RET=1 +fi + exit $RET From e509b5b8be0f17467dcc75130f941d84a09d96a3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 15 Oct 2025 08:04:10 +0200 Subject: [PATCH 18/20] rust: support for Windows The initial patch series that introduced Rust into the core of Git only cared about macOS and Linux. This specifically leaves out Windows, which indeed fails to build right now due to two issues: - The Rust runtime requires `GetUserProfileDirectoryW()`, but we don't link against "userenv.dll". - The path of the Rust library built on Windows is different than on most other systems systems. Fix both of these issues to support Windows. Note that this commit fixes the Meson-based job in GitHub's CI. Meson auto-detects the availability of Rust, and as the Windows runner has Rust installed by default it already enabled Rust support there. But due to the above issues that job fails consistently. Install Rust on GitLab CI, as well, to improve test coverage there. Based-on-patch-by: Johannes Schindelin Based-on-patch-by: Ezekiel Newren Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- .gitlab-ci.yml | 2 +- Makefile | 14 ++++++++++++-- meson.build | 4 ++++ src/cargo-meson.sh | 11 +++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a47d839e39abca..b419a84e2cc660 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -161,7 +161,7 @@ test:mingw64: - saas-windows-medium-amd64 before_script: - *windows_before_script - - choco install -y git meson ninja + - choco install -y git meson ninja rust-ms - Import-Module $env:ChocolateyInstall\helpers\chocolateyProfile.psm1 - refreshenv diff --git a/Makefile b/Makefile index 7ea149598d8ed8..366fd173e70c14 100644 --- a/Makefile +++ b/Makefile @@ -929,10 +929,17 @@ TEST_SHELL_PATH = $(SHELL_PATH) LIB_FILE = libgit.a XDIFF_LIB = xdiff/lib.a REFTABLE_LIB = reftable/libreftable.a + ifdef DEBUG -RUST_LIB = target/debug/libgitcore.a +RUST_TARGET_DIR = target/debug else -RUST_LIB = target/release/libgitcore.a +RUST_TARGET_DIR = target/release +endif + +ifeq ($(uname_S),Windows) +RUST_LIB = $(RUST_TARGET_DIR)/gitcore.lib +else +RUST_LIB = $(RUST_TARGET_DIR)/libgitcore.a endif # xdiff and reftable libs may in turn depend on what is in libgit.a @@ -1538,6 +1545,9 @@ ALL_LDFLAGS = $(LDFLAGS) $(LDFLAGS_APPEND) ifdef WITH_RUST BASIC_CFLAGS += -DWITH_RUST GITLIBS += $(RUST_LIB) +ifeq ($(uname_S),Windows) +EXTLIBS += -luserenv +endif endif ifdef SANITIZE diff --git a/meson.build b/meson.build index ec55d6a5fdfae2..a9c865b2afe50d 100644 --- a/meson.build +++ b/meson.build @@ -1707,6 +1707,10 @@ rust_option = get_option('rust').disable_auto_if(not cargo.found()) if rust_option.allowed() subdir('src') libgit_c_args += '-DWITH_RUST' + + if host_machine.system() == 'windows' + libgit_dependencies += compiler.find_library('userenv') + endif else libgit_sources += [ 'varint.c', diff --git a/src/cargo-meson.sh b/src/cargo-meson.sh index 99400986d93509..3998db04354864 100755 --- a/src/cargo-meson.sh +++ b/src/cargo-meson.sh @@ -26,7 +26,14 @@ then exit $RET fi -if ! cmp "$BUILD_DIR/$BUILD_TYPE/libgitcore.a" "$BUILD_DIR/libgitcore.a" >/dev/null 2>&1 +case "$(cargo -vV | sed -s 's/^host: \(.*\)$/\1/')" in + *-windows-*) + LIBNAME=gitcore.lib;; + *) + LIBNAME=libgitcore.a;; +esac + +if ! cmp "$BUILD_DIR/$BUILD_TYPE/$LIBNAME" "$BUILD_DIR/libgitcore.a" >/dev/null 2>&1 then - cp "$BUILD_DIR/$BUILD_TYPE/libgitcore.a" "$BUILD_DIR/libgitcore.a" + cp "$BUILD_DIR/$BUILD_TYPE/$LIBNAME" "$BUILD_DIR/libgitcore.a" fi From c32aa72466ec9da5762ef56f70ec10b42cab65da Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Oct 2025 10:24:04 -0400 Subject: [PATCH 19/20] sparse-index: improve advice message instructions When an on-disk sparse index is expanded to a full one, it could be due to some worktree state that requires looking at file entries hidden within sparse tree entries. This can be avoided if the worktree is cleaned up and some other issues related to the index state are resolved. Expand the advice message to include all of these cases, since 'git sparse-checkout clean' is not currently capable of handling all cases. In the future, we may improve the behavior of 'git sparse-checkout clean' to handle all of the cases. Helped-by: Elijah Newren Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index 5d14795063b578..76f90da5f5f41e 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -32,8 +32,9 @@ int give_advice_on_expansion = 1; "Your working directory likely has contents that are outside of\n" \ "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \ "see your sparse-checkout definition and compare it to your working\n" \ - "directory contents. Running 'git sparse-checkout clean' may assist\n" \ - "in this cleanup." + "directory contents. Cleaning up any merge conflicts or staged\n" \ + "changes before running 'git sparse-checkout clean' or 'git\n" \ + "sparse-checkout reapply' may assist in this cleanup." struct modify_index_context { struct index_state *write; From 57da342c786f59eaeb436c18635cc1c7597733d9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 28 Oct 2025 10:28:54 -0700 Subject: [PATCH 20/20] The 25th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.52.0.adoc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index 61211dbe82db10..bfde4bda3b8a08 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -57,6 +57,13 @@ UI, Workflows & Features * Show 'P'ipe command in "git add -p". + * "git sparse-checkout" subcommand learned a new "clean" action to + prune otherwise unused working-tree files that are outside the + areas of interest. + + * "git fast-import" is taught to handle signed tags, just like it + recently learned to handle signed commits, in different ways. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -137,6 +144,8 @@ Performance, Internal Implementation, Development Support etc. * Build procedure for a few credential helpers (in contrib/) have been updated. + * CI improvements to handle the recent Rust integration better. + Fixes since v2.51 -----------------