diff --git a/Documentation/RelNotes/2.51.0.adoc b/Documentation/RelNotes/2.51.0.adoc index 33ae8f9b88cb53..ed968c5342db2c 100644 --- a/Documentation/RelNotes/2.51.0.adoc +++ b/Documentation/RelNotes/2.51.0.adoc @@ -47,6 +47,12 @@ UI, Workflows & Features service names (like smtp) in addition to the numeric port numbers (like 25). + * Lift the limitation to use changed-path filter in "git log" so that + it can be used for a pathspec with multiple literal paths. + + * Clean up the way how signature on commit objects are exported to + and imported from fast-import stream. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -195,6 +201,13 @@ including security updates, are included in this release. expansion. (merge 7d275cd5c0 jb/gpg-program-variable-is-a-pathname later to maint). + * Our header file relied on that the system-supplied + header is not later included, which would override our + macro definitions, but "amazon linux" broke this assumption. Fix + this by preemptively including near the beginning of + ourselves. + (merge 9d3b33125f ps/sane-ctype-workaround later to maint). + * Other code cleanup, docfix, build fix, etc. (merge b257adb571 lo/my-first-ow-doc-update later to maint). (merge 8b34b6a220 ly/sequencer-update-squash-is-fixup-only later to maint). diff --git a/Documentation/git-fast-export.adoc b/Documentation/git-fast-export.adoc index 43bbb4f63ca6bf..297b57bb2efdc2 100644 --- a/Documentation/git-fast-export.adoc +++ b/Documentation/git-fast-export.adoc @@ -50,6 +50,23 @@ resulting tag will have an invalid signature. is the same as how earlier versions of this command without this option behaved. + +When exported, a signature starts with: ++ +gpgsig ++ +where is the Git object hash so either "sha1" or +"sha256", and is the signature type, so "openpgp", +"x509", "ssh" or "unknown". ++ +For example, an OpenPGP signature on a SHA-1 commit starts with +`gpgsig sha1 openpgp`, while an SSH signature on a SHA-256 commit +starts with `gpgsig sha256 ssh`. ++ +While all the signatures of a commit are exported, an importer may +choose to accept only some of them. For example +linkgit:git-fast-import[1] currently stores at most one signature per +Git hash algorithm in each commit. ++ NOTE: This is highly experimental and the format of the data stream may change in the future without compatibility guarantees. diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 250d8666521781..d2327842003879 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -445,7 +445,7 @@ one). original-oid? ('author' (SP )? SP LT GT SP LF)? 'committer' (SP )? SP LT GT SP LF - ('gpgsig' SP LF data)? + ('gpgsig' SP SP LF data)? ('encoding' SP LF)? data ('from' SP LF)? @@ -518,13 +518,39 @@ their syntax. ^^^^^^^^ The optional `gpgsig` command is used to include a PGP/GPG signature -that signs the commit data. +or other cryptographic signature that signs the commit data. -Here specifies which hashing algorithm is used for this -signature, either `sha1` or `sha256`. +.... + 'gpgsig' SP SP LF data +.... + +The `gpgsig` command takes two arguments: + +* `` specifies which Git object format this signature + applies to, either `sha1` or `sha256`. This allows to know which + representation of the commit was signed (the SHA-1 or the SHA-256 + version) which helps with both signature verification and + interoperability between repos with different hash functions. + +* `` specifies the type of signature, such as + `openpgp`, `x509`, `ssh`, or `unknown`. This is a convenience for + tools that process the stream, so they don't have to parse the ASCII + armor to identify the signature type. + +A commit may have at most one signature for the SHA-1 object format +(stored in the "gpgsig" header) and one for the SHA-256 object format +(stored in the "gpgsig-sha256" header). + +See below for a detailed description of the `data` command which +contains the raw signature data. + +Signatures are not yet checked in the current implementation +though. (Already setting the `extensions.compatObjectFormat` +configuration option might help with verifying both SHA-1 and SHA-256 +object format signatures when it will be implemented.) -NOTE: This is highly experimental and the format of the data stream may -change in the future without compatibility guarantees. +NOTE: This is highly experimental and the format of the `gpgsig` +command may change in the future without compatibility guarantees. `encoding` ^^^^^^^^^^ diff --git a/blame.c b/blame.c index dce5c8d855c438..f1c0670144b67c 100644 --- a/blame.c +++ b/blame.c @@ -1311,7 +1311,7 @@ static void add_bloom_key(struct blame_bloom_data *bd, } bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key)); - fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings); + bloom_key_fill(bd->keys[bd->nr], path, strlen(path), bd->settings); bd->nr++; } diff --git a/bloom.c b/bloom.c index 0c8d2cebf9c4ed..b86015f6d1babb 100644 --- a/bloom.c +++ b/bloom.c @@ -107,7 +107,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g, * Not considered to be cryptographically secure. * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm */ -uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len) +static uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len) { const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; @@ -221,9 +221,7 @@ static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len) return seed; } -void fill_bloom_key(const char *data, - size_t len, - struct bloom_key *key, +void bloom_key_fill(struct bloom_key *key, const char *data, size_t len, const struct bloom_filter_settings *settings) { int i; @@ -243,7 +241,7 @@ void fill_bloom_key(const char *data, key->hashes[i] = hash0 + i * hash1; } -void clear_bloom_key(struct bloom_key *key) +void bloom_key_clear(struct bloom_key *key) { FREE_AND_NULL(key->hashes); } @@ -280,6 +278,55 @@ void deinit_bloom_filters(void) deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter); } +struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len, + const struct bloom_filter_settings *settings) +{ + struct bloom_keyvec *vec; + const char *p; + size_t sz; + size_t nr = 1; + + p = path; + while (*p) { + /* + * At this point, the path is normalized to use Unix-style + * path separators. This is required due to how the + * changed-path Bloom filters store the paths. + */ + if (*p == '/') + nr++; + p++; + } + + sz = sizeof(struct bloom_keyvec); + sz += nr * sizeof(struct bloom_key); + vec = (struct bloom_keyvec *)xcalloc(1, sz); + if (!vec) + return NULL; + vec->count = nr; + + bloom_key_fill(&vec->key[0], path, len, settings); + nr = 1; + p = path + len - 1; + while (p > path) { + if (*p == '/') { + bloom_key_fill(&vec->key[nr++], path, p - path, settings); + } + p--; + } + assert(nr == vec->count); + return vec; +} + +void bloom_keyvec_free(struct bloom_keyvec *vec) +{ + if (!vec) + return; + for (size_t nr = 0; nr < vec->count; nr++) + bloom_key_clear(&vec->key[nr]); + free(vec); +} + static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED, const struct hashmap_entry *eptr, const struct hashmap_entry *entry_or_key, @@ -500,9 +547,9 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, hashmap_for_each_entry(&pathmap, &iter, e, entry) { struct bloom_key key; - fill_bloom_key(e->path, strlen(e->path), &key, settings); + bloom_key_fill(&key, e->path, strlen(e->path), settings); add_key_to_filter(&key, filter, settings); - clear_bloom_key(&key); + bloom_key_clear(&key); } cleanup: @@ -540,3 +587,26 @@ int bloom_filter_contains(const struct bloom_filter *filter, return 1; } + +int bloom_filter_contains_vec(const struct bloom_filter *filter, + const struct bloom_keyvec *vec, + const struct bloom_filter_settings *settings) +{ + int ret = 1; + + for (size_t nr = 0; ret > 0 && nr < vec->count; nr++) + ret = bloom_filter_contains(filter, &vec->key[nr], settings); + + return ret; +} + +uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, + int version) +{ + assert(version == 1 || version == 2); + + if (version == 2) + return murmur3_seeded_v2(seed, data, len); + else + return murmur3_seeded_v1(seed, data, len); +} diff --git a/bloom.h b/bloom.h index 6e46489a20129a..92ab2100d390e7 100644 --- a/bloom.h +++ b/bloom.h @@ -74,24 +74,40 @@ struct bloom_key { uint32_t *hashes; }; +/* + * A bloom_keyvec is a vector of bloom_keys, which + * can be used to store multiple keys for a single + * pathspec item. + */ +struct bloom_keyvec { + size_t count; + struct bloom_key key[FLEX_ARRAY]; +}; + int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, uint32_t graph_pos); +void bloom_key_fill(struct bloom_key *key, const char *data, size_t len, + const struct bloom_filter_settings *settings); +void bloom_key_clear(struct bloom_key *key); + /* - * Calculate the murmur3 32-bit hash value for the given data - * using the given seed. - * Produces a uniformly distributed hash value. - * Not considered to be cryptographically secure. - * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm + * bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the + * given path. + * + * This function splits the input path by '/' and generates a bloom key for each + * prefix, in reverse order of specificity. For example, given the input + * "a/b/c", it will generate bloom keys for: + * - "a/b/c" + * - "a/b" + * - "a" + * + * The resulting keys are stored in a newly allocated bloom_keyvec. */ -uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len); - -void fill_bloom_key(const char *data, - size_t len, - struct bloom_key *key, - const struct bloom_filter_settings *settings); -void clear_bloom_key(struct bloom_key *key); +struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len, + const struct bloom_filter_settings *settings); +void bloom_keyvec_free(struct bloom_keyvec *vec); void add_key_to_filter(const struct bloom_key *key, struct bloom_filter *filter, @@ -137,4 +153,18 @@ int bloom_filter_contains(const struct bloom_filter *filter, const struct bloom_key *key, const struct bloom_filter_settings *settings); +/* + * bloom_filter_contains_vec - Check if all keys in a key vector are in the + * Bloom filter. + * + * Returns 1 if **all** keys in the vector are present in the filter, + * 0 if **any** key is not present. + */ +int bloom_filter_contains_vec(const struct bloom_filter *filter, + const struct bloom_keyvec *v, + const struct bloom_filter_settings *settings); + +uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, + int version); + #endif diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 6a3a17a8cd9132..f4169dc5f3a3ca 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -29,6 +29,7 @@ #include "quote.h" #include "remote.h" #include "blob.h" +#include "gpg-interface.h" static const char *const fast_export_usage[] = { N_("git fast-export []"), @@ -652,6 +653,38 @@ static const char *find_commit_multiline_header(const char *msg, return strbuf_detach(&val, NULL); } +static void print_signature(const char *signature, const char *object_hash) +{ + if (!signature) + return; + + printf("gpgsig %s %s\ndata %u\n%s\n", + object_hash, + get_signature_format(signature), + (unsigned)strlen(signature), + signature); +} + +static const char *append_signatures_for_header(struct string_list *signatures, + const char *pos, + const char *header, + const char *object_hash) +{ + const char *signature; + const char *start = pos; + const char *end = pos; + + while ((signature = find_commit_multiline_header(start + 1, + header, + &end))) { + string_list_append(signatures, signature)->util = (void *)object_hash; + free((char *)signature); + start = end; + } + + return end; +} + static void handle_commit(struct commit *commit, struct rev_info *rev, struct string_list *paths_of_changed_objects) { @@ -660,7 +693,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, const char *author, *author_end, *committer, *committer_end; const char *encoding = NULL; size_t encoding_len; - const char *signature_alg = NULL, *signature = NULL; + struct string_list signatures = STRING_LIST_INIT_DUP; const char *message; char *reencoded = NULL; struct commit_list *p; @@ -700,10 +733,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, } if (*commit_buffer_cursor == '\n') { - if ((signature = find_commit_multiline_header(commit_buffer_cursor + 1, "gpgsig", &commit_buffer_cursor))) - signature_alg = "sha1"; - else if ((signature = find_commit_multiline_header(commit_buffer_cursor + 1, "gpgsig-sha256", &commit_buffer_cursor))) - signature_alg = "sha256"; + const char *after_sha1 = append_signatures_for_header(&signatures, commit_buffer_cursor, + "gpgsig", "sha1"); + const char *after_sha256 = append_signatures_for_header(&signatures, commit_buffer_cursor, + "gpgsig-sha256", "sha256"); + commit_buffer_cursor = (after_sha1 > after_sha256) ? after_sha1 : after_sha256; } message = strstr(commit_buffer_cursor, "\n\n"); @@ -769,30 +803,30 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, printf("%.*s\n%.*s\n", (int)(author_end - author), author, (int)(committer_end - committer), committer); - if (signature) { + if (signatures.nr) { switch (signed_commit_mode) { case SIGN_ABORT: die("encountered signed commit %s; use " "--signed-commits= to handle it", oid_to_hex(&commit->object.oid)); case SIGN_WARN_VERBATIM: - warning("exporting signed commit %s", - oid_to_hex(&commit->object.oid)); + warning("exporting %"PRIuMAX" signature(s) for commit %s", + (uintmax_t)signatures.nr, oid_to_hex(&commit->object.oid)); /* fallthru */ case SIGN_VERBATIM: - printf("gpgsig %s\ndata %u\n%s", - signature_alg, - (unsigned)strlen(signature), - signature); + for (size_t i = 0; i < signatures.nr; i++) { + struct string_list_item *item = &signatures.items[i]; + print_signature(item->string, item->util); + } break; case SIGN_WARN_STRIP: - warning("stripping signature from commit %s", + warning("stripping signature(s) from commit %s", oid_to_hex(&commit->object.oid)); /* fallthru */ case SIGN_STRIP: break; } - free((char *)signature); + string_list_clear(&signatures, 0); } if (!reencoded && encoding) printf("encoding %.*s\n", (int)encoding_len, encoding); diff --git a/builtin/fast-import.c b/builtin/fast-import.c index b1389c59211a48..6e7d0c3449df4e 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -29,6 +29,7 @@ #include "commit-reach.h" #include "khash.h" #include "date.h" +#include "gpg-interface.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<hash_algo is freed */ + char *space = strchr(args, ' '); + + if (!space) + die("Expected gpgsig format: 'gpgsig ', " + "got 'gpgsig %s'", args); + *space = '\0'; + + sig->hash_algo = args; + sig->sig_format = space + 1; + + /* Validate hash algorithm */ + if (strcmp(sig->hash_algo, "sha1") && + strcmp(sig->hash_algo, "sha256")) + die("Unknown git hash algorithm in gpgsig: '%s'", sig->hash_algo); + + /* Validate signature format */ + if (!valid_signature_format(sig->sig_format)) + die("Invalid signature format in gpgsig: '%s'", sig->sig_format); + if (!strcmp(sig->sig_format, "unknown")) + warning("'unknown' signature format in gpgsig"); + + /* Read signature data */ + read_next_command(); + parse_data(&sig->data, 0, NULL); +} + +static void add_gpgsig_to_commit(struct strbuf *commit_data, + const char *header, + struct signature_data *sig) +{ + struct string_list siglines = STRING_LIST_INIT_NODUP; + + if (!sig->hash_algo) + return; + + strbuf_addstr(commit_data, header); + string_list_split_in_place(&siglines, sig->data.buf, "\n", -1); + strbuf_add_separated_string_list(commit_data, "\n ", &siglines); + strbuf_addch(commit_data, '\n'); + string_list_clear(&siglines, 1); + strbuf_release(&sig->data); + free(sig->hash_algo); +} + +static void store_signature(struct signature_data *stored_sig, + struct signature_data *new_sig, + const char *hash_type) +{ + if (stored_sig->hash_algo) { + warning("multiple %s signatures found, " + "ignoring additional signature", + hash_type); + strbuf_release(&new_sig->data); + free(new_sig->hash_algo); + } else { + *stored_sig = *new_sig; + } +} + static void parse_new_commit(const char *arg) { - static struct strbuf sig = STRBUF_INIT; static struct strbuf msg = STRBUF_INIT; - struct string_list siglines = STRING_LIST_INIT_NODUP; + struct signature_data sig_sha1 = { NULL, NULL, STRBUF_INIT }; + struct signature_data sig_sha256 = { NULL, NULL, STRBUF_INIT }; struct branch *b; char *author = NULL; char *committer = NULL; - char *sig_alg = NULL; char *encoding = NULL; struct hash_list *merge_list = NULL; unsigned int merge_count; @@ -2748,13 +2816,23 @@ static void parse_new_commit(const char *arg) } if (!committer) die("Expected committer but didn't get one"); - if (skip_prefix(command_buf.buf, "gpgsig ", &v)) { - sig_alg = xstrdup(v); - read_next_command(); - parse_data(&sig, 0, NULL); + + /* Process signatures (up to 2: one "sha1" and one "sha256") */ + while (skip_prefix(command_buf.buf, "gpgsig ", &v)) { + struct signature_data sig = { NULL, NULL, STRBUF_INIT }; + + parse_one_signature(&sig, v); + + if (!strcmp(sig.hash_algo, "sha1")) + store_signature(&sig_sha1, &sig, "SHA-1"); + else if (!strcmp(sig.hash_algo, "sha256")) + store_signature(&sig_sha256, &sig, "SHA-256"); + else + BUG("parse_one_signature() returned unknown hash algo"); + read_next_command(); - } else - strbuf_setlen(&sig, 0); + } + if (skip_prefix(command_buf.buf, "encoding ", &v)) { encoding = xstrdup(v); read_next_command(); @@ -2828,23 +2906,14 @@ static void parse_new_commit(const char *arg) strbuf_addf(&new_data, "encoding %s\n", encoding); - if (sig_alg) { - if (!strcmp(sig_alg, "sha1")) - strbuf_addstr(&new_data, "gpgsig "); - else if (!strcmp(sig_alg, "sha256")) - strbuf_addstr(&new_data, "gpgsig-sha256 "); - else - die("Expected gpgsig algorithm sha1 or sha256, got %s", sig_alg); - string_list_split_in_place(&siglines, sig.buf, "\n", -1); - strbuf_add_separated_string_list(&new_data, "\n ", &siglines); - strbuf_addch(&new_data, '\n'); - } + + add_gpgsig_to_commit(&new_data, "gpgsig ", &sig_sha1); + add_gpgsig_to_commit(&new_data, "gpgsig-sha256 ", &sig_sha256); + strbuf_addch(&new_data, '\n'); strbuf_addbuf(&new_data, &msg); - string_list_clear(&siglines, 1); free(author); free(committer); - free(sig_alg); free(encoding); if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark)) diff --git a/gpg-interface.c b/gpg-interface.c index 12e1d5910e94bd..d26c7135b08283 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -144,6 +144,18 @@ static struct gpg_format *get_format_by_sig(const char *sig) return NULL; } +const char *get_signature_format(const char *buf) +{ + struct gpg_format *format = get_format_by_sig(buf); + return format ? format->name : "unknown"; +} + +int valid_signature_format(const char *format) +{ + return (!!get_format_by_name(format) || + !strcmp(format, "unknown")); +} + void signature_check_clear(struct signature_check *sigc) { FREE_AND_NULL(sigc->payload); diff --git a/gpg-interface.h b/gpg-interface.h index e09f12e8d04d92..60ddf8bbfa3833 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -47,6 +47,18 @@ struct signature_check { void signature_check_clear(struct signature_check *sigc); +/* + * Return the format of the signature (like "openpgp", "x509", "ssh" + * or "unknown"). + */ +const char *get_signature_format(const char *buf); + +/* + * Is the signature format valid (like "openpgp", "x509", "ssh" or + * "unknown") + */ +int valid_signature_format(const char *format); + /* * Look at a GPG signed tag object. If such a signature exists, store it in * signature and the signed content in payload. Return 1 if a signature was diff --git a/line-log.c b/line-log.c index 628e3fe3ae18dc..07f2154e84b843 100644 --- a/line-log.c +++ b/line-log.c @@ -1172,12 +1172,13 @@ static int bloom_filter_check(struct rev_info *rev, return 0; while (!result && range) { - fill_bloom_key(range->path, strlen(range->path), &key, rev->bloom_filter_settings); + bloom_key_fill(&key, range->path, strlen(range->path), + rev->bloom_filter_settings); if (bloom_filter_contains(filter, &key, rev->bloom_filter_settings)) result = 1; - clear_bloom_key(&key); + bloom_key_clear(&key); range = range->next; } diff --git a/revision.c b/revision.c index 0ca6a297a6b09c..212ca0de2768b8 100644 --- a/revision.c +++ b/revision.c @@ -675,24 +675,48 @@ static int forbid_bloom_filters(struct pathspec *spec) { if (spec->has_wildcard) return 1; - if (spec->nr > 1) - return 1; if (spec->magic & ~PATHSPEC_LITERAL) return 1; - if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL)) - return 1; + for (size_t nr = 0; nr < spec->nr; nr++) + if (spec->items[nr].magic & ~PATHSPEC_LITERAL) + return 1; return 0; } -static void prepare_to_use_bloom_filter(struct rev_info *revs) +static void release_revisions_bloom_keyvecs(struct rev_info *revs); + +static int convert_pathspec_to_bloom_keyvec(struct bloom_keyvec **out, + const struct pathspec_item *pi, + const struct bloom_filter_settings *settings) { - struct pathspec_item *pi; char *path_alloc = NULL; - const char *path, *p; + const char *path; size_t len; - int path_component_nr = 1; + int res = 0; + + /* remove single trailing slash from path, if needed */ + if (pi->len > 0 && pi->match[pi->len - 1] == '/') { + path_alloc = xmemdupz(pi->match, pi->len - 1); + path = path_alloc; + } else + path = pi->match; + len = strlen(path); + if (!len) { + res = -1; + goto cleanup; + } + + *out = bloom_keyvec_new(path, len, settings); + +cleanup: + free(path_alloc); + return res; +} + +static void prepare_to_use_bloom_filter(struct rev_info *revs) +{ if (!revs->commits) return; @@ -708,48 +732,14 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) if (!revs->pruning.pathspec.nr) return; - pi = &revs->pruning.pathspec.items[0]; - - /* remove single trailing slash from path, if needed */ - if (pi->len > 0 && pi->match[pi->len - 1] == '/') { - path_alloc = xmemdupz(pi->match, pi->len - 1); - path = path_alloc; - } else - path = pi->match; - - len = strlen(path); - if (!len) { - revs->bloom_filter_settings = NULL; - free(path_alloc); - return; - } - - p = path; - while (*p) { - /* - * At this point, the path is normalized to use Unix-style - * path separators. This is required due to how the - * changed-path Bloom filters store the paths. - */ - if (*p == '/') - path_component_nr++; - p++; - } + revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr; + CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr); - revs->bloom_keys_nr = path_component_nr; - ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr); - - fill_bloom_key(path, len, &revs->bloom_keys[0], - revs->bloom_filter_settings); - path_component_nr = 1; - - p = path + len - 1; - while (p > path) { - if (*p == '/') - fill_bloom_key(path, p - path, - &revs->bloom_keys[path_component_nr++], - revs->bloom_filter_settings); - p--; + for (int i = 0; i < revs->pruning.pathspec.nr; i++) { + if (convert_pathspec_to_bloom_keyvec(&revs->bloom_keyvecs[i], + &revs->pruning.pathspec.items[i], + revs->bloom_filter_settings)) + goto fail; } if (trace2_is_enabled() && !bloom_filter_atexit_registered) { @@ -757,14 +747,18 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) bloom_filter_atexit_registered = 1; } - free(path_alloc); + return; + +fail: + revs->bloom_filter_settings = NULL; + release_revisions_bloom_keyvecs(revs); } static int check_maybe_different_in_bloom_filter(struct rev_info *revs, struct commit *commit) { struct bloom_filter *filter; - int result = 1, j; + int result = 0; if (!revs->repo->objects->commit_graph) return -1; @@ -779,10 +773,10 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, return -1; } - for (j = 0; result && j < revs->bloom_keys_nr; j++) { - result = bloom_filter_contains(filter, - &revs->bloom_keys[j], - revs->bloom_filter_settings); + for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) { + result = bloom_filter_contains_vec(filter, + revs->bloom_keyvecs[nr], + revs->bloom_filter_settings); } if (result) @@ -823,7 +817,7 @@ static int rev_compare_tree(struct rev_info *revs, return REV_TREE_SAME; } - if (revs->bloom_keys_nr && !nth_parent) { + if (revs->bloom_keyvecs_nr && !nth_parent) { bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); if (bloom_ret == 0) @@ -850,7 +844,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit, if (!t1) return 0; - if (!nth_parent && revs->bloom_keys_nr) { + if (!nth_parent && revs->bloom_keyvecs_nr) { bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); if (!bloom_ret) return 1; @@ -3202,6 +3196,14 @@ static void release_revisions_mailmap(struct string_list *mailmap) static void release_revisions_topo_walk_info(struct topo_walk_info *info); +static void release_revisions_bloom_keyvecs(struct rev_info *revs) +{ + for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++) + bloom_keyvec_free(revs->bloom_keyvecs[nr]); + FREE_AND_NULL(revs->bloom_keyvecs); + revs->bloom_keyvecs_nr = 0; +} + static void free_void_commit_list(void *list) { free_commit_list(list); @@ -3230,11 +3232,7 @@ void release_revisions(struct rev_info *revs) clear_decoration(&revs->treesame, free); line_log_free(revs); oidset_clear(&revs->missing_commits); - - for (int i = 0; i < revs->bloom_keys_nr; i++) - clear_bloom_key(&revs->bloom_keys[i]); - FREE_AND_NULL(revs->bloom_keys); - revs->bloom_keys_nr = 0; + release_revisions_bloom_keyvecs(revs); } static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child) diff --git a/revision.h b/revision.h index 6d369cdad6a473..ac843f58d0cf81 100644 --- a/revision.h +++ b/revision.h @@ -62,7 +62,7 @@ struct repository; struct rev_info; struct string_list; struct saved_parents; -struct bloom_key; +struct bloom_keyvec; struct bloom_filter_settings; struct option; struct parse_opt_ctx_t; @@ -360,8 +360,8 @@ struct rev_info { /* Commit graph bloom filter fields */ /* The bloom filter key(s) for the pathspec */ - struct bloom_key *bloom_keys; - int bloom_keys_nr; + struct bloom_keyvec **bloom_keyvecs; + int bloom_keyvecs_nr; /* * The bloom filter settings used to generate the key. diff --git a/sane-ctype.h b/sane-ctype.h index cbea1b299b79a5..4f476c4381680b 100644 --- a/sane-ctype.h +++ b/sane-ctype.h @@ -1,6 +1,15 @@ #ifndef SANE_CTYPE_H #define SANE_CTYPE_H +/* + * Explicitly include so that its header guards kick in from here on. + * This ensures that the file won't get included after "sane-ctype.h", as that + * would otherwise lead to a compiler error because the function declarations + * for `int isascii(int c)` et al would be mangled by our macros with the same + * name. + */ +#include + /* Sane ctype - no locale, and works with signed chars */ #undef isascii #undef isspace diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c index 9aa2c5a5926d47..3283544bd33db6 100644 --- a/t/helper/test-bloom.c +++ b/t/helper/test-bloom.c @@ -12,13 +12,13 @@ static struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; static void add_string_to_filter(const char *data, struct bloom_filter *filter) { struct bloom_key key; - fill_bloom_key(data, strlen(data), &key, &settings); + bloom_key_fill(&key, data, strlen(data), &settings); printf("Hashes:"); for (size_t i = 0; i < settings.num_hashes; i++) printf("0x%08x|", key.hashes[i]); printf("\n"); add_key_to_filter(&key, filter, &settings); - clear_bloom_key(&key); + bloom_key_clear(&key); } static void print_bloom_filter(struct bloom_filter *filter) { @@ -61,13 +61,13 @@ int cmd__bloom(int argc, const char **argv) uint32_t hashed; if (argc < 3) usage(bloom_usage); - hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2])); + hashed = test_bloom_murmur3_seeded(0, argv[2], strlen(argv[2]), 2); printf("Murmur3 Hash with seed=0:0x%08x\n", hashed); } if (!strcmp(argv[1], "get_murmur3_seven_highbit")) { uint32_t hashed; - hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7); + hashed = test_bloom_murmur3_seeded(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7, 2); printf("Murmur3 Hash with seed=0:0x%08x\n", hashed); } diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index 8910d53cac1146..639868ac562f9e 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -66,8 +66,9 @@ sane_unset GIT_TRACE2_CONFIG_PARAMS setup () { rm -f "$TRASH_DIRECTORY/trace.perf" && - git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom && - GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom + eval git -c core.commitGraph=false log --pretty="format:%s" "$1" >log_wo_bloom && + eval "GIT_TRACE2_PERF=\"$TRASH_DIRECTORY/trace.perf\"" \ + git -c core.commitGraph=true log --pretty="format:%s" "$1" >log_w_bloom } test_bloom_filters_used () { @@ -138,10 +139,6 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' ' test_bloom_filters_not_used "--walk-reflogs -- A" ' -test_expect_success 'git log -- multiple path specs does not use Bloom filters' ' - test_bloom_filters_not_used "-- file4 A/file1" -' - test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' ' test_bloom_filters_not_used "-- ." ' @@ -151,9 +148,17 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B test_bloom_filters_used "-- *renamed" ' -test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' ' - test_bloom_filters_not_used "-- *" && - test_bloom_filters_not_used "-- file*" +test_expect_success 'git log with multiple literal paths uses Bloom filter' ' + test_bloom_filters_used "-- file4 A/file1" && + test_bloom_filters_used "-- *" && + test_bloom_filters_used "-- file*" +' + +test_expect_success 'git log with path contains a wildcard does not use Bloom filter' ' + test_bloom_filters_not_used "-- file\*" && + test_bloom_filters_not_used "-- A/\* file4" && + test_bloom_filters_not_used "-- file4 A/\*" && + test_bloom_filters_not_used "-- * A/\*" ' test_expect_success 'setup - add commit-graph to the chain without Bloom filters' ' diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index 76619765fcbc98..46700dbc401734 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -314,7 +314,7 @@ test_expect_success GPG 'signed-commits=abort' ' test_expect_success GPG 'signed-commits=verbatim' ' git fast-export --signed-commits=verbatim --reencode=no commit-signing >output && - grep "^gpgsig sha" output && + test_grep -E "^gpgsig $GIT_DEFAULT_HASH openpgp" output && grep "encoding ISO-8859-1" output && ( cd new && @@ -328,7 +328,7 @@ test_expect_success GPG 'signed-commits=verbatim' ' test_expect_success GPG 'signed-commits=warn-verbatim' ' git fast-export --signed-commits=warn-verbatim --reencode=no commit-signing >output 2>err && - grep "^gpgsig sha" output && + test_grep -E "^gpgsig $GIT_DEFAULT_HASH openpgp" output && grep "encoding ISO-8859-1" output && test -s err && ( @@ -369,6 +369,62 @@ test_expect_success GPG 'signed-commits=warn-strip' ' ' +test_expect_success GPGSM 'setup X.509 signed commit' ' + + git checkout -b x509-signing main && + test_config gpg.format x509 && + test_config user.signingkey $GIT_COMMITTER_EMAIL && + echo "X.509 content" >file && + git add file && + git commit -S -m "X.509 signed commit" && + X509_COMMIT=$(git rev-parse HEAD) && + git checkout main + +' + +test_expect_success GPGSM 'round-trip X.509 signed commit' ' + + git fast-export --signed-commits=verbatim x509-signing >output && + test_grep -E "^gpgsig $GIT_DEFAULT_HASH x509" output && + ( + cd new && + git fast-import && + git cat-file commit refs/heads/x509-signing >actual && + grep "^gpgsig" actual && + IMPORTED=$(git rev-parse refs/heads/x509-signing) && + test $X509_COMMIT = $IMPORTED + ) file && + git add file && + git commit -S -m "SSH signed commit" && + SSH_COMMIT=$(git rev-parse HEAD) && + git checkout main + +' + +test_expect_success GPGSSH 'round-trip SSH signed commit' ' + + git fast-export --signed-commits=verbatim ssh-signing >output && + test_grep -E "^gpgsig $GIT_DEFAULT_HASH ssh" output && + ( + cd new && + git fast-import && + git cat-file commit refs/heads/ssh-signing >actual && + grep "^gpgsig" actual && + IMPORTED=$(git rev-parse refs/heads/ssh-signing) && + test $SSH_COMMIT = $IMPORTED + ) explicit-sha256/B && + git -C explicit-sha256 add B && + test_tick && + git -C explicit-sha256 commit -S -m "signed" B && + SHA256_B=$(git -C explicit-sha256 rev-parse dual-signed) && + + # Create the corresponding SHA-1 commit + SHA1_B=$(git -C explicit-sha256 rev-parse --output-object-format=sha1 dual-signed) && + + # Check that the resulting SHA-1 commit has both signatures + echo $SHA1_B | git -C explicit-sha256 cat-file --batch >out && + test_grep -E "^gpgsig " out && + test_grep -E "^gpgsig-sha256 " out +' + +test_expect_success GPG 'export and import of doubly signed commit' ' + git -C explicit-sha256 fast-export --signed-commits=verbatim dual-signed >output && + test_grep -E "^gpgsig sha1 openpgp" output && + test_grep -E "^gpgsig sha256 openpgp" output && + + ( + cd new && + git fast-import && + git cat-file commit refs/heads/dual-signed >actual && + test_grep -E "^gpgsig " actual && + test_grep -E "^gpgsig-sha256 " actual && + IMPORTED=$(git rev-parse refs/heads/dual-signed) && + if test "$GIT_DEFAULT_HASH" = "sha1" + then + test $SHA1_B = $IMPORTED + else + test $SHA256_B = $IMPORTED + fi + )