diff --git a/.gitignore b/.gitignore index 1803023427af81..802ce70e4830f8 100644 --- a/.gitignore +++ b/.gitignore @@ -87,6 +87,7 @@ /git-init-db /git-interpret-trailers /git-instaweb +/git-last-modified /git-log /git-ls-files /git-ls-remote diff --git a/Documentation/RelNotes/2.52.0.adoc b/Documentation/RelNotes/2.52.0.adoc index fa72515358ec09..60660314911cc1 100644 --- a/Documentation/RelNotes/2.52.0.adoc +++ b/Documentation/RelNotes/2.52.0.adoc @@ -14,6 +14,9 @@ UI, Workflows & Features * A new subcommand "git repo" gives users a way to grab various repository characteristics. + * A new command "git last-modified" has been added to show the closest + ancestor commit that touched each path. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -40,6 +43,10 @@ Performance, Internal Implementation, Development Support etc. * Discord has been added to the first contribution documentation as another way to ask for help. + * Inspired by Ezekiel's recent effort to showcase Rust interface, the + hash function implementation used to hash lines have been updated + to the one used for ELF symbol lookup by Glibc. + Fixes since v2.51 ----------------- @@ -130,6 +137,20 @@ including security updates, are included in this release. instead of `gitgitgadget/git`. (merge 37001cdbc4 ds/doc-ggg-pr-fork-clarify later to maint). + * Makefile tried to run multiple "cargo build" which would not work + very well; serialize their execution to work it around. + (merge 0eeacde50e da/cargo-serialize later to maint). + + * "git repack --path-walk" lost objects in some corner cases, which + has been corrected. + (merge 93afe9b060 ds/path-walk-repack-fix later to maint). + + * "git ls-files ..." should not necessarily have to expand + the index fully if a sparsified directory is excluded by the + pathspec; the code is taught to expand the index on demand to avoid + this. + (merge 681f26bccc ds/ls-files-lazy-unsparse later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 823d537fa7 kh/doc-git-log-markup-fix later to maint). (merge cf7efa4f33 rj/t6137-cygwin-fix later to maint). diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc new file mode 100644 index 00000000000000..602843e09598a5 --- /dev/null +++ b/Documentation/git-last-modified.adoc @@ -0,0 +1,54 @@ +git-last-modified(1) +==================== + +NAME +---- +git-last-modified - EXPERIMENTAL: Show when files were last modified + + +SYNOPSIS +-------- +[synopsis] +git last-modified [--recursive] [--show-trees] [] [[--] ...] + +DESCRIPTION +----------- + +Shows which commit last modified each of the relevant files and subdirectories. +A commit renaming a path, or changing it's mode is also taken into account. + +THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. + +OPTIONS +------- + +`-r`:: +`--recursive`:: + Instead of showing tree entries, step into subtrees and show all entries + inside them recursively. + +`-t`:: +`--show-trees`:: + Show tree entries even when recursing into them. It has no effect + without `--recursive`. + +``:: + Only traverse commits in the specified revision range. When no + `` is specified, it defaults to `HEAD` (i.e. the whole + history leading to the current commit). For a complete list of ways to + spell ``, see the 'Specifying Ranges' section of + linkgit:gitrevisions[7]. + +`[--] ...`:: + For each __ given, the commit which last modified it is returned. + Without an optional path parameter, all files and subdirectories + in path traversal the are included in the output. + +SEE ALSO +-------- +linkgit:git-blame[1], +linkgit:git-log[1]. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index 41f43e033699f8..e34965c5b0e236 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -74,6 +74,7 @@ manpages = { 'git-init.adoc' : 1, 'git-instaweb.adoc' : 1, 'git-interpret-trailers.adoc' : 1, + 'git-last-modified.adoc' : 1, 'git-log.adoc' : 1, 'git-ls-files.adoc' : 1, 'git-ls-remote.adoc' : 1, diff --git a/Makefile b/Makefile index 555b7f4dc3c0e1..4c95affadb5e26 100644 --- a/Makefile +++ b/Makefile @@ -1265,6 +1265,7 @@ BUILTIN_OBJS += builtin/hook.o BUILTIN_OBJS += builtin/index-pack.o BUILTIN_OBJS += builtin/init-db.o BUILTIN_OBJS += builtin/interpret-trailers.o +BUILTIN_OBJS += builtin/last-modified.o BUILTIN_OBJS += builtin/log.o BUILTIN_OBJS += builtin/ls-files.o BUILTIN_OBJS += builtin/ls-remote.o @@ -3945,13 +3946,12 @@ unit-tests: $(UNIT_TEST_PROGS) $(CLAR_TEST_PROG) t/helper/test-tool$X $(MAKE) -C t/ unit-tests .PHONY: libgit-sys libgit-rs -libgit-sys libgit-rs: - $(QUIET)(\ - cd contrib/$@ && \ - cargo build \ - ) +libgit-sys: + $(QUIET)cargo build --manifest-path contrib/libgit-sys/Cargo.toml +libgit-rs: libgit-sys + $(QUIET)cargo build --manifest-path contrib/libgit-rs/Cargo.toml ifdef INCLUDE_LIBGIT_RS -all:: libgit-sys libgit-rs +all:: libgit-rs endif LIBGIT_PUB_OBJS += contrib/libgit-sys/public_symbol_export.o diff --git a/builtin.h b/builtin.h index e6458e6fb9a21a..1b35565fbd9a3c 100644 --- a/builtin.h +++ b/builtin.h @@ -176,6 +176,7 @@ int cmd_hook(int argc, const char **argv, const char *prefix, struct repository int cmd_index_pack(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_init_db(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_interpret_trailers(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_last_modified(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log_reflog(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_ls_files(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/last-modified.c b/builtin/last-modified.c new file mode 100644 index 00000000000000..886ba12cb5f1a3 --- /dev/null +++ b/builtin/last-modified.c @@ -0,0 +1,326 @@ +#include "git-compat-util.h" +#include "bloom.h" +#include "builtin.h" +#include "commit-graph.h" +#include "commit.h" +#include "config.h" +#include "environment.h" +#include "diff.h" +#include "diffcore.h" +#include "environment.h" +#include "hashmap.h" +#include "hex.h" +#include "log-tree.h" +#include "object-name.h" +#include "object.h" +#include "parse-options.h" +#include "quote.h" +#include "repository.h" +#include "revision.h" + +struct last_modified_entry { + struct hashmap_entry hashent; + struct object_id oid; + struct bloom_key key; + const char path[FLEX_ARRAY]; +}; + +static int last_modified_entry_hashcmp(const void *unused UNUSED, + const struct hashmap_entry *hent1, + const struct hashmap_entry *hent2, + const void *path) +{ + const struct last_modified_entry *ent1 = + container_of(hent1, const struct last_modified_entry, hashent); + const struct last_modified_entry *ent2 = + container_of(hent2, const struct last_modified_entry, hashent); + return strcmp(ent1->path, path ? path : ent2->path); +} + +struct last_modified { + struct hashmap paths; + struct rev_info rev; + bool recursive; + bool show_trees; +}; + +static void last_modified_release(struct last_modified *lm) +{ + struct hashmap_iter iter; + struct last_modified_entry *ent; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) + bloom_key_clear(&ent->key); + + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); + release_revisions(&lm->rev); +} + +struct last_modified_callback_data { + struct last_modified *lm; + struct commit *commit; +}; + +static void add_path_from_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *data) +{ + struct last_modified *lm = data; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct last_modified_entry *ent; + const char *path = p->two->path; + + FLEX_ALLOC_STR(ent, path, path); + oidcpy(&ent->oid, &p->two->oid); + if (lm->rev.bloom_filter_settings) + bloom_key_fill(&ent->key, path, strlen(path), + lm->rev.bloom_filter_settings); + hashmap_entry_init(&ent->hashent, strhash(ent->path)); + hashmap_add(&lm->paths, &ent->hashent); + } +} + +static int populate_paths_from_revs(struct last_modified *lm) +{ + int num_interesting = 0; + struct diff_options diffopt; + + /* + * Create a copy of `struct diff_options`. In this copy a callback is + * set that when called adds entries to `paths` in `struct last_modified`. + * This copy is used to diff the tree of the target revision against an + * empty tree. This results in all paths in the target revision being + * listed. After `paths` is populated, we don't need this copy no more. + */ + memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); + copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); + diffopt.output_format = DIFF_FORMAT_CALLBACK; + diffopt.format_callback = add_path_from_diff; + diffopt.format_callback_data = lm; + + for (size_t i = 0; i < lm->rev.pending.nr; i++) { + struct object_array_entry *obj = lm->rev.pending.objects + i; + + if (obj->item->flags & UNINTERESTING) + continue; + + if (num_interesting++) + return error(_("last-modified can only operate on one tree at a time")); + + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &obj->item->oid, "", &diffopt); + diff_flush(&diffopt); + } + clear_pathspec(&diffopt.pathspec); + + return 0; +} + +static void last_modified_emit(struct last_modified *lm, + const char *path, const struct commit *commit) + +{ + if (commit->object.flags & BOUNDARY) + putchar('^'); + printf("%s\t", oid_to_hex(&commit->object.oid)); + + if (lm->rev.diffopt.line_termination) + write_name_quoted(path, stdout, '\n'); + else + printf("%s%c", path, '\0'); +} + +static void mark_path(const char *path, const struct object_id *oid, + struct last_modified_callback_data *data) +{ + struct last_modified_entry *ent; + + /* Is it even a path that we are interested in? */ + ent = hashmap_get_entry_from_hash(&data->lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (!ent) + return; + + /* + * Is it arriving at a version of interest, or is it from a side branch + * which did not contribute to the final state? + */ + if (!oideq(oid, &ent->oid)) + return; + + last_modified_emit(data->lm, path, data->commit); + + hashmap_remove(&data->lm->paths, &ent->hashent, path); + bloom_key_clear(&ent->key); + free(ent); +} + +static void last_modified_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *cbdata) +{ + struct last_modified_callback_data *data = cbdata; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + switch (p->status) { + case DIFF_STATUS_DELETED: + /* + * There's no point in feeding a deletion, as it could + * not have resulted in our current state, which + * actually has the file. + */ + break; + + default: + /* + * Otherwise, we care only that we somehow arrived at + * a final oid state. Note that this covers some + * potentially controversial areas, including: + * + * 1. A rename or copy will be found, as it is the + * first time the content has arrived at the given + * path. + * + * 2. Even a non-content modification like a mode or + * type change will trigger it. + * + * We take the inclusive approach for now, and find + * anything which impacts the path. Options to tweak + * the behavior (e.g., to "--follow" the content across + * renames) can come later. + */ + mark_path(p->two->path, &p->two->oid, data); + break; + } + } +} + +static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) +{ + struct bloom_filter *filter; + struct last_modified_entry *ent; + struct hashmap_iter iter; + + if (!lm->rev.bloom_filter_settings) + return true; + + if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY) + return true; + + filter = get_bloom_filter(lm->rev.repo, origin); + if (!filter) + return true; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (bloom_filter_contains(filter, &ent->key, + lm->rev.bloom_filter_settings)) + return true; + } + return false; +} + +static int last_modified_run(struct last_modified *lm) +{ + struct last_modified_callback_data data = { .lm = lm }; + + lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; + lm->rev.diffopt.format_callback = last_modified_diff; + lm->rev.diffopt.format_callback_data = &data; + + prepare_revision_walk(&lm->rev); + + while (hashmap_get_size(&lm->paths)) { + data.commit = get_revision(&lm->rev); + if (!data.commit) + BUG("paths remaining beyond boundary in last-modified"); + + if (data.commit->object.flags & BOUNDARY) { + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &data.commit->object.oid, "", + &lm->rev.diffopt); + diff_flush(&lm->rev.diffopt); + + break; + } + + if (!maybe_changed_path(lm, data.commit)) + continue; + + log_tree_commit(&lm->rev, data.commit); + } + + return 0; +} + +static int last_modified_init(struct last_modified *lm, struct repository *r, + const char *prefix, int argc, const char **argv) +{ + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); + + repo_init_revisions(r, &lm->rev, prefix); + lm->rev.def = "HEAD"; + lm->rev.combine_merges = 1; + lm->rev.show_root_diff = 1; + lm->rev.boundary = 1; + lm->rev.no_commit_id = 1; + lm->rev.diff = 1; + lm->rev.diffopt.flags.recursive = lm->recursive; + lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; + + argc = setup_revisions(argc, argv, &lm->rev, NULL); + if (argc > 1) { + error(_("unknown last-modified argument: %s"), argv[1]); + return argc; + } + + lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); + + if (populate_paths_from_revs(lm) < 0) + return error(_("unable to setup last-modified")); + + return 0; +} + +int cmd_last_modified(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + int ret; + struct last_modified lm = { 0 }; + + const char * const last_modified_usage[] = { + N_("git last-modified [--recursive] [--show-trees] " + "[] [[--] ...]"), + NULL + }; + + struct option last_modified_options[] = { + OPT_BOOL('r', "recursive", &lm.recursive, + N_("recurse into subtrees")), + OPT_BOOL('t', "show-trees", &lm.show_trees, + N_("show tree entries when recursing into subtrees")), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, last_modified_options, + last_modified_usage, + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); + + repo_config(repo, git_default_config, NULL); + + ret = last_modified_init(&lm, repo, prefix, argc, argv); + if (ret > 0) + usage_with_options(last_modified_usage, + last_modified_options); + if (ret) + goto out; + + ret = last_modified_run(&lm); + if (ret) + goto out; + +out: + last_modified_release(&lm); + + return ret; +} diff --git a/builtin/ls-files.c b/builtin/ls-files.c index c06a6f33e41c56..b148607f7a1468 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -414,14 +414,21 @@ static void show_files(struct repository *repo, struct dir_struct *dir) if (!(show_cached || show_stage || show_deleted || show_modified)) return; - if (!show_sparse_dirs) - ensure_full_index(repo->index); - for (i = 0; i < repo->index->cache_nr; i++) { const struct cache_entry *ce = repo->index->cache[i]; struct stat st; int stat_err; + if (S_ISSPARSEDIR(ce->ce_mode) && !show_sparse_dirs) { + /* + * This is the first time we've hit a sparse dir, + * so expansion will leave the first 'i' entries + * alone. + */ + ensure_full_index(repo->index); + ce = repo->index->cache[i]; + } + construct_fullname(&fullname, repo, ce); if ((dir->flags & DIR_SHOW_IGNORED) && diff --git a/command-list.txt b/command-list.txt index 1b0bdee00dd4f1..accd3d0c4b5524 100644 --- a/command-list.txt +++ b/command-list.txt @@ -124,6 +124,7 @@ git-index-pack plumbingmanipulators git-init mainporcelain init git-instaweb ancillaryinterrogators complete git-interpret-trailers purehelpers +git-last-modified plumbinginterrogators git-log mainporcelain info git-ls-files plumbinginterrogators git-ls-remote plumbinginterrogators diff --git a/commit-graph.c b/commit-graph.c index 3cd9e73e2aa092..2f20f66cfdd276 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -812,7 +812,12 @@ int corrected_commit_dates_enabled(struct repository *r) struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r) { - struct commit_graph *g = r->objects->commit_graph; + struct commit_graph *g; + + if (!prepare_commit_graph(r)) + return NULL; + + g = r->objects->commit_graph; while (g) { if (g->bloom_filter_settings) return g->bloom_filter_settings; diff --git a/git.c b/git.c index 5dc210b7b4a28b..d020eef021cbea 100644 --- a/git.c +++ b/git.c @@ -565,6 +565,7 @@ static struct cmd_struct commands[] = { { "init", cmd_init_db }, { "init-db", cmd_init_db }, { "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY }, + { "last-modified", cmd_last_modified, RUN_SETUP }, { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP }, { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, diff --git a/meson.build b/meson.build index e8ec0eca1650a5..b3dfcc04972601 100644 --- a/meson.build +++ b/meson.build @@ -607,6 +607,7 @@ builtin_sources = [ 'builtin/index-pack.c', 'builtin/init-db.c', 'builtin/interpret-trailers.c', + 'builtin/last-modified.c', 'builtin/log.c', 'builtin/ls-files.c', 'builtin/ls-remote.c', diff --git a/path-walk.c b/path-walk.c index 2d4ddbadd50f78..f1ceed99e94ca9 100644 --- a/path-walk.c +++ b/path-walk.c @@ -105,6 +105,24 @@ static void push_to_stack(struct path_walk_context *ctx, prio_queue_put(&ctx->path_stack, xstrdup(path)); } +static void add_path_to_list(struct path_walk_context *ctx, + const char *path, + enum object_type type, + struct object_id *oid, + int interesting) +{ + struct type_and_oid_list *list = strmap_get(&ctx->paths_to_lists, path); + + if (!list) { + CALLOC_ARRAY(list, 1); + list->type = type; + strmap_put(&ctx->paths_to_lists, path, list); + } + + list->maybe_interesting |= interesting; + oid_array_append(&list->oids, oid); +} + static int add_tree_entries(struct path_walk_context *ctx, const char *base_path, struct object_id *oid) @@ -129,7 +147,6 @@ static int add_tree_entries(struct path_walk_context *ctx, init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); while (tree_entry(&desc, &entry)) { - struct type_and_oid_list *list; struct object *o; /* Not actually true, but we will ignore submodules later. */ enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; @@ -190,17 +207,10 @@ static int add_tree_entries(struct path_walk_context *ctx, continue; } - if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { - CALLOC_ARRAY(list, 1); - list->type = type; - strmap_put(&ctx->paths_to_lists, path.buf, list); - } - push_to_stack(ctx, path.buf); - - if (!(o->flags & UNINTERESTING)) - list->maybe_interesting = 1; + add_path_to_list(ctx, path.buf, type, &entry.oid, + !(o->flags & UNINTERESTING)); - oid_array_append(&list->oids, &entry.oid); + push_to_stack(ctx, path.buf); } free_tree_buffer(tree); @@ -377,15 +387,9 @@ static int setup_pending_objects(struct path_walk_info *info, if (!info->trees) continue; if (pending->path) { - struct type_and_oid_list *list; char *path = *pending->path ? xstrfmt("%s/", pending->path) : xstrdup(""); - if (!(list = strmap_get(&ctx->paths_to_lists, path))) { - CALLOC_ARRAY(list, 1); - list->type = OBJ_TREE; - strmap_put(&ctx->paths_to_lists, path, list); - } - oid_array_append(&list->oids, &obj->oid); + add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1); free(path); } else { /* assume a root tree, such as a lightweight tag. */ @@ -396,19 +400,10 @@ static int setup_pending_objects(struct path_walk_info *info, case OBJ_BLOB: if (!info->blobs) continue; - if (pending->path) { - struct type_and_oid_list *list; - char *path = pending->path; - if (!(list = strmap_get(&ctx->paths_to_lists, path))) { - CALLOC_ARRAY(list, 1); - list->type = OBJ_BLOB; - strmap_put(&ctx->paths_to_lists, path, list); - } - oid_array_append(&list->oids, &obj->oid); - } else { - /* assume a root tree, such as a lightweight tag. */ + if (pending->path) + add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1); + else oid_array_append(&tagged_blobs->oids, &obj->oid); - } break; case OBJ_COMMIT: diff --git a/t/Makefile b/t/Makefile index 757674e727348c..ab8a5b54aa6ce0 100644 --- a/t/Makefile +++ b/t/Makefile @@ -189,15 +189,9 @@ perf: .PHONY: libgit-sys-test libgit-rs-test libgit-sys-test: - $(QUIET)(\ - cd ../contrib/libgit-sys && \ - cargo test \ - ) -libgit-rs-test: - $(QUIET)(\ - cd ../contrib/libgit-rs && \ - cargo test \ - ) + $(QUIET)cargo test --manifest-path ../contrib/libgit-sys/Cargo.toml +libgit-rs-test: libgit-sys-test + $(QUIET)cargo test --manifest-path ../contrib/libgit-rs/Cargo.toml ifdef INCLUDE_LIBGIT_RS -all:: libgit-sys-test libgit-rs-test +all:: libgit-rs-test endif diff --git a/t/meson.build b/t/meson.build index baeeba2ce652d1..82af229be3efbe 100644 --- a/t/meson.build +++ b/t/meson.build @@ -951,6 +951,7 @@ integration_tests = [ 't8012-blame-colors.sh', 't8013-blame-ignore-revs.sh', 't8014-blame-ignore-fuzzy.sh', + 't8020-last-modified.sh', 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', @@ -1144,6 +1145,7 @@ benchmarks = [ 'perf/p7820-grep-engines.sh', 'perf/p7821-grep-engines-fixed.sh', 'perf/p7822-grep-perl-character.sh', + 'perf/p8020-last-modified.sh', 'perf/p9210-scalar.sh', 'perf/p9300-fast-import-export.sh', ] diff --git a/t/perf/p8020-last-modified.sh b/t/perf/p8020-last-modified.sh new file mode 100755 index 00000000000000..cb1f98d3db9f4e --- /dev/null +++ b/t/perf/p8020-last-modified.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='last-modified perf tests' +. ./perf-lib.sh + +test_perf_default_repo + +test_perf 'top-level last-modified' ' + git last-modified HEAD +' + +test_perf 'top-level recursive last-modified' ' + git last-modified -r HEAD +' + +test_perf 'subdir last-modified' ' + git ls-tree -d HEAD >subtrees && + path="$(head -n 1 subtrees | cut -f2)" && + git last-modified -r HEAD -- "$path" +' + +test_done diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index d8101139b40aa0..b0f691c151a7d0 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -1506,6 +1506,8 @@ test_expect_success 'sparse-index is not expanded' ' ensure_not_expanded reset --hard && ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 && + ensure_not_expanded ls-files deep/deeper1 && + echo >>sparse-index/README.md && ensure_not_expanded add -A && echo >>sparse-index/extra.txt && @@ -1607,6 +1609,17 @@ test_expect_success 'describe tested on all' ' test_all_match git describe --dirty ' +test_expect_success 'ls-files filtering and expansion' ' + init_repos && + + # This filtering will hit a sparse directory midway + # through the iteration. + test_all_match git ls-files deep && + + # This pathspec will filter the index to only a sparse + # directory. + test_all_match git ls-files folder1 +' test_expect_success 'sparse-index is not expanded: describe' ' init_repos && diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 611755cc139b96..73b78bdd887d80 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -838,4 +838,67 @@ test_expect_success '-n overrides repack.updateServerInfo=true' ' test_server_info_missing ' +test_expect_success 'pending objects are repacked appropriately' ' + test_when_finished rm -rf pending && + git init pending && + + ( + cd pending && + + # Commit file, a/b/c and never change them. + mkdir -p a/b && + echo singleton >file && + echo stuff >a/b/c && + echo more >a/d && + git add file a && + git commit -m "single blobs" && + + # Files a/d and a/e will not be singletons. + echo d >a/d && + echo e >a/e && + git add a && + git commit -m "more blobs" && + + # This use of a sparse index helps to force + # test that the cache-tree is walked, too. + git sparse-checkout set --sparse-index a x && + + # Create staged changes: + # * a/e now has multiple versions. + # * a/i now has only one version. + echo f >a/d && + echo h >a/e && + echo i >a/i && + git add a && + + # Stage and unstage a change to make use of + # resolve-undo cache and how that impacts fsck. + mkdir x && + echo y >x/y && + git add x && + xy=$(git rev-parse :x/y) && + git rm --cached x/y && + + # The blob for x/y must persist through repacks, + # but fsck currently ignores the REUC extension + # for finding links to the blob. + cat >expect <<-EOF && + dangling blob $xy + EOF + + # Bring the loose objects into a packfile to avoid + # leftovers in next test. Without this, the loose + # objects persist and the test succeeds for other + # reasons. + git repack -adf && + git fsck >out && + test_cmp expect out && + + # Test path walk version with pack.useSparse. + git -c pack.useSparse=true repack -adf --path-walk && + git fsck >out && + test_cmp expect out + ) +' + test_done diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh new file mode 100755 index 00000000000000..5eb4cef0359212 --- /dev/null +++ b/t/t8020-last-modified.sh @@ -0,0 +1,210 @@ +#!/bin/sh + +test_description='last-modified tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit 1 file && + mkdir a && + test_commit 2 a/file && + mkdir a/b && + test_commit 3 a/b/file +' + +test_expect_success 'cannot run last-modified on two trees' ' + test_must_fail git last-modified HEAD HEAD~1 +' + +check_last_modified() { + local indir= && + while test $# != 0 + do + case "$1" in + -C) + indir="$2" + shift + ;; + *) + break + ;; + esac && + shift + done && + + cat >expect && + test_when_finished "rm -f tmp.*" && + git ${indir:+-C "$indir"} last-modified "$@" >tmp.1 && + git name-rev --annotate-stdin --name-only --tags \ + tmp.2 && + tr '\t' ' ' actual && + test_cmp expect actual +} + +test_expect_success 'last-modified non-recursive' ' + check_last_modified <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified recursive' ' + check_last_modified -r <<-\EOF + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified recursive with show-trees' ' + check_last_modified -r -t <<-\EOF + 3 a + 3 a/b + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified non-recursive with show-trees' ' + check_last_modified -t <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified subdir' ' + check_last_modified a <<-\EOF + 3 a + EOF +' + +test_expect_success 'last-modified subdir recursive' ' + check_last_modified -r a <<-\EOF + 3 a/b/file + 2 a/file + EOF +' + +test_expect_success 'last-modified from non-HEAD commit' ' + check_last_modified HEAD^ <<-\EOF + 2 a + 1 file + EOF +' + +test_expect_success 'last-modified from subdir defaults to root' ' + check_last_modified -C a <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified from subdir uses relative pathspecs' ' + check_last_modified -C a -r b <<-\EOF + 3 a/b/file + EOF +' + +test_expect_success 'limit last-modified traversal by count' ' + check_last_modified -1 <<-\EOF + 3 a + ^2 file + EOF +' + +test_expect_success 'limit last-modified traversal by commit' ' + check_last_modified HEAD~2..HEAD <<-\EOF + 3 a + ^1 file + EOF +' + +test_expect_success 'only last-modified files in the current tree' ' + git rm -rf a && + git commit -m "remove a" && + check_last_modified <<-\EOF + 1 file + EOF +' + +test_expect_success 'cross merge boundaries in blaming' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit m1 && + git checkout HEAD^ && + git rm -rf . && + test_commit m2 && + git merge m1 && + check_last_modified <<-\EOF + m2 m2.t + m1 m1.t + EOF +' + +test_expect_success 'last-modified merge for resolved conflicts' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit c1 conflict && + git checkout HEAD^ && + git rm -rf . && + test_commit c2 conflict && + test_must_fail git merge c1 && + test_commit resolved conflict && + check_last_modified conflict <<-\EOF + resolved conflict + EOF +' + + +# Consider `file` with this content through history: +# +# A---B---B-------B---B +# \ / +# C---D +test_expect_success 'last-modified merge ignores content from branch' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit a1 file A && + test_commit a2 file B && + test_commit a3 file C && + test_commit a4 file D && + git checkout a2 && + git merge --no-commit --no-ff a4 && + git checkout a2 -- file && + git merge --continue && + check_last_modified <<-\EOF + a2 file + EOF +' + +# Consider `file` with this content through history: +# +# A---B---B---C---D---B---B +# \ / +# B-------B +test_expect_success 'last-modified merge undoes changes' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit b1 file A && + test_commit b2 file B && + test_commit b3 file C && + test_commit b4 file D && + git checkout b2 && + test_commit b5 file2 2 && + git checkout b4 && + git merge --no-commit --no-ff b5 && + git checkout b2 -- file && + git merge --continue && + check_last_modified <<-\EOF + b5 file2 + b2 file + EOF +' + +test_expect_success 'last-modified complains about unknown arguments' ' + test_must_fail git last-modified --foo 2>err && + grep "unknown last-modified argument: --foo" err +' + +test_done diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 444a108f87c0b6..78d1cf74b1cc48 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) return 1; } -static unsigned long xdl_hash_record_with_whitespace(char const **data, +unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags) { unsigned long ha = 5381; char const *ptr = *data; @@ -294,19 +294,67 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data, return ha; } -unsigned long xdl_hash_record(char const **data, char const *top, long flags) { - unsigned long ha = 5381; +/* + * Compiler reassociation barrier: pretend to modify X and Y to disallow + * changing evaluation order with respect to following uses of X and Y. + */ +#ifdef __GNUC__ +#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y)) +#else +#define REASSOC_FENCE(x, y) +#endif + +unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { + unsigned long ha = 5381, c0, c1; char const *ptr = *data; - - if (flags & XDF_WHITESPACE_FLAGS) - return xdl_hash_record_with_whitespace(data, top, flags); - +#if 0 + /* + * The baseline form of the optimized loop below. This is the djb2 + * hash (the above function uses a variant with XOR instead of ADD). + */ for (; ptr < top && *ptr != '\n'; ptr++) { ha += (ha << 5); - ha ^= (unsigned long) *ptr; + ha += (unsigned long) *ptr; } *data = ptr < top ? ptr + 1: ptr; - +#else + /* Process two characters per iteration. */ + if (top - ptr >= 2) do { + if ((c0 = ptr[0]) == '\n') { + *data = ptr + 1; + return ha; + } + if ((c1 = ptr[1]) == '\n') { + *data = ptr + 2; + c0 += ha; + REASSOC_FENCE(c0, ha); + ha = ha * 32 + c0; + return ha; + } + /* + * Combine characters C0 and C1 into the hash HA. We have + * HA = (HA * 33 + C0) * 33 + C1, and we want to ensure + * that dependency chain over HA is just one multiplication + * and one addition, i.e. we want to evaluate this as + * HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer + * (C0 * 32 + (C0 + C1)) for the expression in parenthesis. + */ + ha *= 33 * 33; + c1 += c0; + REASSOC_FENCE(c1, c0); + c1 += c0 * 32; + REASSOC_FENCE(c1, ha); + ha += c1; + + ptr += 2; + } while (ptr < top - 1); + *data = top; + if (ptr < top && (c0 = ptr[0]) != '\n') { + c0 += ha; + REASSOC_FENCE(c0, ha); + ha = ha * 32 + c0; + } +#endif return ha; } diff --git a/xdiff/xutils.h b/xdiff/xutils.h index fd0bba94e8b4d2..13f68310472a69 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha); long xdl_guess_lines(mmfile_t *mf, long sample); int xdl_blankline(const char *line, long size, long flags); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); -unsigned long xdl_hash_record(char const **data, char const *top, long flags); +unsigned long xdl_hash_record_verbatim(char const **data, char const *top); +unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags); +static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags) +{ + if (flags & XDF_WHITESPACE_FLAGS) + return xdl_hash_record_with_whitespace(data, top, flags); + else + return xdl_hash_record_verbatim(data, top); +} unsigned int xdl_hashbits(unsigned int size); int xdl_num_out(char *out, long val); int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,