Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 235 additions & 15 deletions builtin/last-modified.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,32 @@
#include "bloom.h"
#include "builtin.h"
#include "commit-graph.h"
#include "commit-slab.h"
#include "commit.h"
#include "config.h"
#include "environment.h"
#include "diff.h"
#include "diffcore.h"
#include "environment.h"
#include "ewah/ewok.h"
#include "hashmap.h"
#include "hex.h"
#include "log-tree.h"
#include "object-name.h"
#include "object.h"
#include "parse-options.h"
#include "prio-queue.h"
#include "quote.h"
#include "repository.h"
#include "revision.h"

/* Remember to update object flag allocation in object.h */
#define PARENT1 (1u<<16) /* used instead of SEEN */
#define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */

struct last_modified_entry {
struct hashmap_entry hashent;
struct object_id oid;
struct bloom_key key;
size_t diff_idx;
const char path[FLEX_ARRAY];
};

Expand All @@ -37,13 +43,45 @@ static int last_modified_entry_hashcmp(const void *unused UNUSED,
return strcmp(ent1->path, path ? path : ent2->path);
}

/*
* Hold a bitmap for each commit we're working with. In the bitmap, each bit
* represents a path in `lm->all_paths`. An active bit indicates the path still
* needs to be associated to a commit.
*/
define_commit_slab(active_paths_for_commit, struct bitmap *);

struct last_modified {
struct hashmap paths;
struct rev_info rev;
bool recursive;
bool show_trees;

const char **all_paths;
size_t all_paths_nr;
struct active_paths_for_commit active_paths;

/* 'scratch' to avoid allocating a bitmap every process_parent() */
struct bitmap *scratch;
};

static struct bitmap *active_paths_for(struct last_modified *lm, struct commit *c)
{
struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c);
if (!*bitmap)
*bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD + 1);

return *bitmap;
}

static void active_paths_free(struct last_modified *lm, struct commit *c)
{
struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c);
if (*bitmap) {
bitmap_free(*bitmap);
*bitmap = NULL;
}
}

static void last_modified_release(struct last_modified *lm)
{
struct hashmap_iter iter;
Expand All @@ -54,6 +92,8 @@ static void last_modified_release(struct last_modified *lm)

hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
release_revisions(&lm->rev);

free(lm->all_paths);
}

struct last_modified_callback_data {
Expand Down Expand Up @@ -146,7 +186,7 @@ static void mark_path(const char *path, const struct object_id *oid,
* Is it arriving at a version of interest, or is it from a side branch
* which did not contribute to the final state?
*/
if (!oideq(oid, &ent->oid))
if (oid && !oideq(oid, &ent->oid))
return;

last_modified_emit(data->lm, path, data->commit);
Expand Down Expand Up @@ -196,7 +236,17 @@ static void last_modified_diff(struct diff_queue_struct *q,
}
}

static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
static void pass_to_parent(struct bitmap *c,
struct bitmap *p,
size_t pos)
{
bitmap_unset(c, pos);
bitmap_set(p, pos);
}

static bool maybe_changed_path(struct last_modified *lm,
struct commit *origin,
struct bitmap *active)
{
struct bloom_filter *filter;
struct last_modified_entry *ent;
Expand All @@ -213,49 +263,212 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
return true;

hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
if (active && !bitmap_get(active, ent->diff_idx))
continue;

if (bloom_filter_contains(filter, &ent->key,
lm->rev.bloom_filter_settings))
return true;
}
return false;
}

static void process_parent(struct last_modified *lm,
struct prio_queue *queue,
struct commit *c, struct bitmap *active_c,
struct commit *parent, int parent_i)
{
struct bitmap *active_p;

repo_parse_commit(lm->rev.repo, parent);
active_p = active_paths_for(lm, parent);

/*
* The first time entering this function for this commit (i.e. first parent)
* see if Bloom filters will tell us it's worth to do the diff.
*/
if (parent_i || maybe_changed_path(lm, c, active_c)) {
diff_tree_oid(&parent->object.oid,
&c->object.oid, "", &lm->rev.diffopt);
diffcore_std(&lm->rev.diffopt);
}

/*
* Test each path for TREESAME-ness against the parent. If a path is
* TREESAME, pass it on to this parent.
*
* First, collect all paths that are *not* TREESAME in 'scratch'.
* Then, pass paths that *are* TREESAME and active to the parent.
*/
for (int i = 0; i < diff_queued_diff.nr; i++) {
struct diff_filepair *fp = diff_queued_diff.queue[i];
const char *path = fp->two->path;
struct last_modified_entry *ent =
hashmap_get_entry_from_hash(&lm->paths, strhash(path), path,
struct last_modified_entry, hashent);
if (ent) {
size_t k = ent->diff_idx;
if (bitmap_get(active_c, k))
bitmap_set(lm->scratch, k);
}
}
for (size_t i = 0; i < lm->all_paths_nr; i++) {
if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i))
pass_to_parent(active_c, active_p, i);
}

/*
* If parent has any active paths, put it on the queue (if not already).
*/
if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) {
parent->object.flags |= PARENT1;
prio_queue_put(queue, parent);
}
if (!(parent->object.flags & PARENT1))
active_paths_free(lm, parent);

memset(lm->scratch->words, 0x0, lm->scratch->word_alloc);
diff_queue_clear(&diff_queued_diff);
}

static int last_modified_run(struct last_modified *lm)
{
int max_count, queue_popped = 0;
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date };
struct commit_list *list;
struct last_modified_callback_data data = { .lm = lm };

lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
lm->rev.diffopt.format_callback = last_modified_diff;
lm->rev.diffopt.format_callback_data = &data;
lm->rev.no_walk = 1;

prepare_revision_walk(&lm->rev);

while (hashmap_get_size(&lm->paths)) {
data.commit = get_revision(&lm->rev);
if (!data.commit)
BUG("paths remaining beyond boundary in last-modified");
max_count = lm->rev.max_count;

init_active_paths_for_commit(&lm->active_paths);
lm->scratch = bitmap_word_alloc(lm->all_paths_nr);

/*
* lm->rev.commits holds the set of boundary commits for our walk.
*
* Loop through each such commit, and place it in the appropriate queue.
*/
for (list = lm->rev.commits; list; list = list->next) {
struct commit *c = list->item;

if (c->object.flags & BOTTOM) {
prio_queue_put(&not_queue, c);
c->object.flags |= PARENT2;
} else if (!(c->object.flags & PARENT1)) {
/*
* If the commit is a starting point (and hasn't been
* seen yet), then initialize the set of interesting
* paths, too.
*/
struct bitmap *active;

prio_queue_put(&queue, c);
c->object.flags |= PARENT1;

active = active_paths_for(lm, c);
for (size_t i = 0; i < lm->all_paths_nr; i++)
bitmap_set(active, i);
}
}

if (data.commit->object.flags & BOUNDARY) {
while (queue.nr) {
int parent_i;
struct commit_list *p;
struct commit *c = prio_queue_get(&queue);
struct bitmap *active_c = active_paths_for(lm, c);

if ((0 <= max_count && max_count < ++queue_popped) ||
(c->object.flags & PARENT2)) {
/*
* Either a boundary commit, or we have already seen too
* many others. Either way, stop here.
*/
c->object.flags |= PARENT2 | BOUNDARY;
data.commit = c;
diff_tree_oid(lm->rev.repo->hash_algo->empty_tree,
&data.commit->object.oid, "",
&lm->rev.diffopt);
&c->object.oid,
"", &lm->rev.diffopt);
diff_flush(&lm->rev.diffopt);
goto cleanup;
}

break;
/*
* Otherwise, make sure that 'c' isn't reachable from anything
* in the '--not' queue.
*/
repo_parse_commit(lm->rev.repo, c);

while (not_queue.nr) {
struct commit_list *np;
struct commit *n = prio_queue_get(&not_queue);

repo_parse_commit(lm->rev.repo, n);

for (np = n->parents; np; np = np->next) {
if (!(np->item->object.flags & PARENT2)) {
prio_queue_put(&not_queue, np->item);
np->item->object.flags |= PARENT2;
}
}

if (commit_graph_generation(n) < commit_graph_generation(c))
break;
}

if (!maybe_changed_path(lm, data.commit))
continue;
/*
* Look at each parent and pass on each path that's TREESAME
* with that parent. Stop early when no active paths remain.
*/
for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) {
process_parent(lm, &queue,
c, active_c,
p->item, parent_i);

if (bitmap_is_empty(active_c))
break;
}

/*
* Paths that remain active, or not TREESAME with any parent,
* were changed by 'c'.
*/
if (!bitmap_is_empty(active_c)) {
data.commit = c;
for (size_t i = 0; i < lm->all_paths_nr; i++) {
if (bitmap_get(active_c, i))
mark_path(lm->all_paths[i], NULL, &data);
}
}

log_tree_commit(&lm->rev, data.commit);
cleanup:
active_paths_free(lm, c);
}

if (hashmap_get_size(&lm->paths))
BUG("paths remaining beyond boundary in last-modified");

clear_prio_queue(&not_queue);
clear_prio_queue(&queue);
clear_active_paths_for_commit(&lm->active_paths);
bitmap_free(lm->scratch);

return 0;
}

static int last_modified_init(struct last_modified *lm, struct repository *r,
const char *prefix, int argc, const char **argv)
{
struct hashmap_iter iter;
struct last_modified_entry *ent;

hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0);

repo_init_revisions(r, &lm->rev, prefix);
Expand All @@ -280,6 +493,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
if (populate_paths_from_revs(lm) < 0)
return error(_("unable to setup last-modified"));

CALLOC_ARRAY(lm->all_paths, hashmap_get_size(&lm->paths));
lm->all_paths_nr = 0;
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
ent->diff_idx = lm->all_paths_nr++;
lm->all_paths[ent->diff_idx] = ent->path;
}

return 0;
}

Expand Down
1 change: 1 addition & 0 deletions object.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ void object_array_init(struct object_array *array);
* http-push.c: 11-----14
* commit-graph.c: 15
* commit-reach.c: 16-----19
* builtin/last-modified.c: 1617
* sha1-name.c: 20
* list-objects-filter.c: 21
* bloom.c: 2122
Expand Down
2 changes: 1 addition & 1 deletion t/t8020-last-modified.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ test_expect_success 'last-modified recursive' '

test_expect_success 'last-modified recursive with show-trees' '
check_last_modified -r -t <<-\EOF
3 a
3 a/b
3 a/b/file
3 a
2 a/file
1 file
EOF
Expand Down