From 4f9c8d896397a1748132060d3465e8573c861633 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:17 -0700 Subject: [PATCH 01/54] string-list: report programming error with BUG Passing a string list that has .strdup_strings bit unset to string_list_split(), or one that has .strdup_strings bit set to string_list_split_in_place(), is a programmer error. Do not use die() to abort the execution. Use BUG() instead. As a developer-facing message, the message string itself should be a lot more concise, but let's keep the original one for now. Signed-off-by: Junio C Hamano --- string-list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/string-list.c b/string-list.c index 53faaa84207bf9..0cb920e9b0d520 100644 --- a/string-list.c +++ b/string-list.c @@ -283,7 +283,7 @@ int string_list_split(struct string_list *list, const char *string, const char *p = string, *end; if (!list->strdup_strings) - die("internal error in string_list_split(): " + BUG("internal error in string_list_split(): " "list->strdup_strings must be set"); for (;;) { count++; @@ -309,7 +309,7 @@ int string_list_split_in_place(struct string_list *list, char *string, char *p = string, *end; if (list->strdup_strings) - die("internal error in string_list_split_in_place(): " + BUG("internal error in string_list_split_in_place(): " "list->strdup_strings must not be set"); for (;;) { count++; From 9f6dfe43c8a55b833ae16486bcafe29b543461f9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:18 -0700 Subject: [PATCH 02/54] string-list: align string_list_split() with its _in_place() counterpart The string_list_split_in_place() function was updated by 52acddf3 (string-list: multi-delimiter `string_list_split_in_place()`, 2023-04-24) to take more than one delimiter characters, hoping that we can later use it to replace our uses of strtok(). We however did not make a matching change to the string_list_split() function, which is very similar. Before giving both functions more features in future commits, allow string_list_split() to also take more than one delimiter characters to make them closer to each other. Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- builtin/merge.c | 2 +- builtin/var.c | 2 +- connect.c | 2 +- diff.c | 2 +- fetch-pack.c | 2 +- notes.c | 2 +- parse-options.c | 2 +- pathspec.c | 2 +- protocol.c | 2 +- ref-filter.c | 4 ++-- setup.c | 3 ++- string-list.c | 4 ++-- string-list.h | 16 ++++++++-------- t/helper/test-path-utils.c | 3 ++- t/helper/test-ref-store.c | 2 +- t/unit-tests/u-string-list.c | 16 ++++++++-------- transport.c | 2 +- upload-pack.c | 2 +- 19 files changed, 37 insertions(+), 35 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 91586e6852b09e..70a64604018e99 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -420,7 +420,7 @@ static void parse_color_fields(const char *s) colorfield_nr = 0; /* Ideally this would be stripped and split at the same time? */ - string_list_split(&l, s, ',', -1); + string_list_split(&l, s, ",", -1); ALLOC_GROW(colorfield, colorfield_nr + 1, colorfield_alloc); for_each_string_list_item(item, &l) { diff --git a/builtin/merge.c b/builtin/merge.c index 18b22c0a26d633..893f8950bfc057 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -875,7 +875,7 @@ static void add_strategies(const char *string, unsigned attr) if (string) { struct string_list list = STRING_LIST_INIT_DUP; struct string_list_item *item; - string_list_split(&list, string, ' ', -1); + string_list_split(&list, string, " ", -1); for_each_string_list_item(item, &list) append_strategy(get_strategy(item->string)); string_list_clear(&list, 0); diff --git a/builtin/var.c b/builtin/var.c index ada642a9fe5257..4ae7af0eff96f7 100644 --- a/builtin/var.c +++ b/builtin/var.c @@ -181,7 +181,7 @@ static void list_vars(void) if (ptr->multivalued && *val) { struct string_list list = STRING_LIST_INIT_DUP; - string_list_split(&list, val, '\n', -1); + string_list_split(&list, val, "\n", -1); for (size_t i = 0; i < list.nr; i++) printf("%s=%s\n", ptr->name, list.items[i].string); string_list_clear(&list, 0); diff --git a/connect.c b/connect.c index e77287f426cdfd..867b12bde5a412 100644 --- a/connect.c +++ b/connect.c @@ -407,7 +407,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list, * name. Subsequent fields (symref-target and peeled) are optional and * don't have a particular order. */ - if (string_list_split(&line_sections, line, ' ', -1) < 2) { + if (string_list_split(&line_sections, line, " ", -1) < 2) { ret = 0; goto out; } diff --git a/diff.c b/diff.c index dca87e164fb615..a81949a4220655 100644 --- a/diff.c +++ b/diff.c @@ -327,7 +327,7 @@ static unsigned parse_color_moved_ws(const char *arg) struct string_list l = STRING_LIST_INIT_DUP; struct string_list_item *i; - string_list_split(&l, arg, ',', -1); + string_list_split(&l, arg, ",", -1); for_each_string_list_item(i, &l) { struct strbuf sb = STRBUF_INIT; diff --git a/fetch-pack.c b/fetch-pack.c index c1be9b76eb6373..98662706968dba 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1914,7 +1914,7 @@ static void fetch_pack_config(void) char *str; if (!git_config_get_string("fetch.uriprotocols", &str) && str) { - string_list_split(&uri_protocols, str, ',', -1); + string_list_split(&uri_protocols, str, ",", -1); free(str); } } diff --git a/notes.c b/notes.c index 97b995f3f2da6f..6afcf088b97485 100644 --- a/notes.c +++ b/notes.c @@ -892,7 +892,7 @@ static int string_list_add_note_lines(struct string_list *list, * later, along with any empty strings that came from empty * lines within the file. */ - string_list_split(list, data, '\n', -1); + string_list_split(list, data, "\n", -1); free(data); return 0; } diff --git a/parse-options.c b/parse-options.c index 5224203ffe7bf8..9e7cb7519276c0 100644 --- a/parse-options.c +++ b/parse-options.c @@ -1338,7 +1338,7 @@ static enum parse_opt_result usage_with_options_internal(struct parse_opt_ctx_t if (!saw_empty_line && !*str) saw_empty_line = 1; - string_list_split(&list, str, '\n', -1); + string_list_split(&list, str, "\n", -1); for (j = 0; j < list.nr; j++) { const char *line = list.items[j].string; diff --git a/pathspec.c b/pathspec.c index a3ddd701c740c9..de325f7ef99df6 100644 --- a/pathspec.c +++ b/pathspec.c @@ -201,7 +201,7 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va if (!value || !*value) die(_("attr spec must not be empty")); - string_list_split(&list, value, ' ', -1); + string_list_split(&list, value, " ", -1); string_list_remove_empty_items(&list, 0); item->attr_check = attr_check_alloc(); diff --git a/protocol.c b/protocol.c index bae7226ff4074f..54b9f49c01b599 100644 --- a/protocol.c +++ b/protocol.c @@ -61,7 +61,7 @@ enum protocol_version determine_protocol_version_server(void) if (git_protocol) { struct string_list list = STRING_LIST_INIT_DUP; const struct string_list_item *item; - string_list_split(&list, git_protocol, ':', -1); + string_list_split(&list, git_protocol, ":", -1); for_each_string_list_item(item, &list) { const char *value; diff --git a/ref-filter.c b/ref-filter.c index f9f2c512a8c6e0..4edfb9c83b2393 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -435,7 +435,7 @@ static int remote_ref_atom_parser(struct ref_format *format UNUSED, } atom->u.remote_ref.nobracket = 0; - string_list_split(¶ms, arg, ',', -1); + string_list_split(¶ms, arg, ",", -1); for (i = 0; i < params.nr; i++) { const char *s = params.items[i].string; @@ -831,7 +831,7 @@ static int align_atom_parser(struct ref_format *format UNUSED, align->position = ALIGN_LEFT; - string_list_split(¶ms, arg, ',', -1); + string_list_split(¶ms, arg, ",", -1); for (i = 0; i < params.nr; i++) { const char *s = params.items[i].string; int position; diff --git a/setup.c b/setup.c index 6f52dab64cacb6..b9f5eb8b51e2de 100644 --- a/setup.c +++ b/setup.c @@ -1460,8 +1460,9 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir, if (env_ceiling_dirs) { int empty_entry_found = 0; + static const char path_sep[] = { PATH_SEP, '\0' }; - string_list_split(&ceiling_dirs, env_ceiling_dirs, PATH_SEP, -1); + string_list_split(&ceiling_dirs, env_ceiling_dirs, path_sep, -1); filter_string_list(&ceiling_dirs, 0, canonicalize_ceiling_entry, &empty_entry_found); ceil_offset = longest_ancestor_length(dir->buf, &ceiling_dirs); diff --git a/string-list.c b/string-list.c index 0cb920e9b0d520..2284a009cba6a2 100644 --- a/string-list.c +++ b/string-list.c @@ -277,7 +277,7 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ } int string_list_split(struct string_list *list, const char *string, - int delim, int maxsplit) + const char *delim, int maxsplit) { int count = 0; const char *p = string, *end; @@ -291,7 +291,7 @@ int string_list_split(struct string_list *list, const char *string, string_list_append(list, p); return count; } - end = strchr(p, delim); + end = strpbrk(p, delim); if (end) { string_list_append_nodup(list, xmemdupz(p, end - p)); p = end + 1; diff --git a/string-list.h b/string-list.h index 122b3186419880..6c8650efde0dfb 100644 --- a/string-list.h +++ b/string-list.h @@ -254,7 +254,7 @@ struct string_list_item *unsorted_string_list_lookup(struct string_list *list, void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util); /** - * Split string into substrings on character `delim` and append the + * Split string into substrings on characters in `delim` and append the * substrings to `list`. The input string is not modified. * list->strdup_strings must be set, as new memory needs to be * allocated to hold the substrings. If maxsplit is non-negative, @@ -262,15 +262,15 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ * appended to list. * * Examples: - * string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"] - * string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"] - * string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"] - * string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""] - * string_list_split(l, "", ':', -1) -> [""] - * string_list_split(l, ":", ':', -1) -> ["", ""] + * string_list_split(l, "foo:bar:baz", ":", -1) -> ["foo", "bar", "baz"] + * string_list_split(l, "foo:bar:baz", ":", 0) -> ["foo:bar:baz"] + * string_list_split(l, "foo:bar:baz", ":", 1) -> ["foo", "bar:baz"] + * string_list_split(l, "foo:bar:", ":", -1) -> ["foo", "bar", ""] + * string_list_split(l, "", ":", -1) -> [""] + * string_list_split(l, ":", ":", -1) -> ["", ""] */ int string_list_split(struct string_list *list, const char *string, - int delim, int maxsplit); + const char *delim, int maxsplit); /* * Like string_list_split(), except that string is split in-place: the diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 086238c826aadb..f5f33751da620d 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -348,6 +348,7 @@ int cmd__path_utils(int argc, const char **argv) if (argc == 4 && !strcmp(argv[1], "longest_ancestor_length")) { int len; struct string_list ceiling_dirs = STRING_LIST_INIT_DUP; + const char path_sep[] = { PATH_SEP, '\0' }; char *path = xstrdup(argv[2]); /* @@ -362,7 +363,7 @@ int cmd__path_utils(int argc, const char **argv) */ if (normalize_path_copy(path, path)) die("Path \"%s\" could not be normalized", argv[2]); - string_list_split(&ceiling_dirs, argv[3], PATH_SEP, -1); + string_list_split(&ceiling_dirs, argv[3], path_sep, -1); filter_string_list(&ceiling_dirs, 0, normalize_ceiling_entry, NULL); len = longest_ancestor_length(path, &ceiling_dirs); diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 8d9a271845c4b6..aa1cb9b4acfb2a 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -29,7 +29,7 @@ static unsigned int parse_flags(const char *str, struct flag_definition *defs) if (!strcmp(str, "0")) return 0; - string_list_split(&masks, str, ',', 64); + string_list_split(&masks, str, ",", 64); for (size_t i = 0; i < masks.nr; i++) { const char *name = masks.items[i].string; struct flag_definition *def = defs; diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index d4ba5f9fa52aa2..150a5f505f5bee 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -43,7 +43,7 @@ static void t_string_list_equal(struct string_list *list, expected_strings->items[i].string); } -static void t_string_list_split(const char *data, int delim, int maxsplit, ...) +static void t_string_list_split(const char *data, const char *delim, int maxsplit, ...) { struct string_list expected_strings = STRING_LIST_INIT_DUP; struct string_list list = STRING_LIST_INIT_DUP; @@ -65,13 +65,13 @@ static void t_string_list_split(const char *data, int delim, int maxsplit, ...) void test_string_list__split(void) { - t_string_list_split("foo:bar:baz", ':', -1, "foo", "bar", "baz", NULL); - t_string_list_split("foo:bar:baz", ':', 0, "foo:bar:baz", NULL); - t_string_list_split("foo:bar:baz", ':', 1, "foo", "bar:baz", NULL); - t_string_list_split("foo:bar:baz", ':', 2, "foo", "bar", "baz", NULL); - t_string_list_split("foo:bar:", ':', -1, "foo", "bar", "", NULL); - t_string_list_split("", ':', -1, "", NULL); - t_string_list_split(":", ':', -1, "", "", NULL); + t_string_list_split("foo:bar:baz", ":", -1, "foo", "bar", "baz", NULL); + t_string_list_split("foo:bar:baz", ":", 0, "foo:bar:baz", NULL); + t_string_list_split("foo:bar:baz", ":", 1, "foo", "bar:baz", NULL); + t_string_list_split("foo:bar:baz", ":", 2, "foo", "bar", "baz", NULL); + t_string_list_split("foo:bar:", ":", -1, "foo", "bar", "", NULL); + t_string_list_split("", ":", -1, "", NULL); + t_string_list_split(":", ":", -1, "", "", NULL); } static void t_string_list_split_in_place(const char *data, const char *delim, diff --git a/transport.c b/transport.c index c123ac1e38b815..76487b54530098 100644 --- a/transport.c +++ b/transport.c @@ -1042,7 +1042,7 @@ static const struct string_list *protocol_allow_list(void) if (enabled < 0) { const char *v = getenv("GIT_ALLOW_PROTOCOL"); if (v) { - string_list_split(&allowed, v, ':', -1); + string_list_split(&allowed, v, ":", -1); string_list_sort(&allowed); enabled = 1; } else { diff --git a/upload-pack.c b/upload-pack.c index 4f26f6afc77106..91fcdcad9b5b48 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1685,7 +1685,7 @@ static void process_args(struct packet_reader *request, if (data->uri_protocols.nr) send_err_and_die(data, "multiple packfile-uris lines forbidden"); - string_list_split(&data->uri_protocols, p, ',', -1); + string_list_split(&data->uri_protocols, p, ",", -1); continue; } From 527535fcdd2d9dec56877435f609852d0f2bf163 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:19 -0700 Subject: [PATCH 03/54] string-list: unify string_list_split* functions Thanks to the previous step, the only difference between these two related functions is that string_list_split() works on a string without modifying its contents (i.e. taking "const char *") and the resulting pieces of strings are their own copies in a string list, while string_list_split_in_place() works on a mutable string and the resulting pieces of strings come from the original string. Consolidate their implementations into a single helper function, and make them a thin wrapper around it. We can later add an extra flags parameter to extend both of these functions by updating only the internal helper function. Signed-off-by: Junio C Hamano --- string-list.c | 96 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/string-list.c b/string-list.c index 2284a009cba6a2..65b6ceb2591280 100644 --- a/string-list.c +++ b/string-list.c @@ -276,55 +276,71 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ list->nr--; } -int string_list_split(struct string_list *list, const char *string, - const char *delim, int maxsplit) +/* + * append a substring [p..end] to list; return number of things it + * appended to the list. + */ +static int append_one(struct string_list *list, + const char *p, const char *end, + int in_place) +{ + if (!end) + end = p + strlen(p); + + if (in_place) { + *((char *)end) = '\0'; + string_list_append(list, p); + } else { + string_list_append_nodup(list, xmemdupz(p, end - p)); + } + return 1; +} + +/* + * Unfortunately this cannot become a public interface, as _in_place() + * wants to have "const char *string" while the other variant wants to + * have "char *string" for type safety. + * + * This accepts "const char *string" to allow both wrappers to use it; + * it internally casts away the constness when in_place is true by + * taking advantage of strpbrk() that takes a "const char *" arg and + * returns "char *" pointer into that const string. Yucky but works ;-). + */ +static int split_string(struct string_list *list, const char *string, const char *delim, + int maxsplit, int in_place) { int count = 0; - const char *p = string, *end; + const char *p = string; + + if (in_place && list->strdup_strings) + BUG("string_list_split_in_place() called with strdup_strings"); + else if (!in_place && !list->strdup_strings) + BUG("string_list_split() called without strdup_strings"); - if (!list->strdup_strings) - BUG("internal error in string_list_split(): " - "list->strdup_strings must be set"); for (;;) { - count++; - if (maxsplit >= 0 && count > maxsplit) { - string_list_append(list, p); - return count; - } - end = strpbrk(p, delim); - if (end) { - string_list_append_nodup(list, xmemdupz(p, end - p)); - p = end + 1; - } else { - string_list_append(list, p); + char *end; + + if (0 <= maxsplit && maxsplit <= count) + end = NULL; + else + end = strpbrk(p, delim); + + count += append_one(list, p, end, in_place); + + if (!end) return count; - } + p = end + 1; } } +int string_list_split(struct string_list *list, const char *string, + const char *delim, int maxsplit) +{ + return split_string(list, string, delim, maxsplit, 0); +} + int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit) { - int count = 0; - char *p = string, *end; - - if (list->strdup_strings) - BUG("internal error in string_list_split_in_place(): " - "list->strdup_strings must not be set"); - for (;;) { - count++; - if (maxsplit >= 0 && count > maxsplit) { - string_list_append(list, p); - return count; - } - end = strpbrk(p, delim); - if (end) { - *end = '\0'; - string_list_append(list, p); - p = end + 1; - } else { - string_list_append(list, p); - return count; - } - } + return split_string(list, string, delim, maxsplit, 1); } From 576454974165d51b7e39c0608cde1c84978f1a8a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:20 -0700 Subject: [PATCH 04/54] string-list: optionally trim string pieces split by string_list_split*() Teach the unified split_string() to take an optional "flags" word, and define the first flag STRING_LIST_SPLIT_TRIM to cause the split pieces to be trimmed before they are placed in the string list. Signed-off-by: Junio C Hamano --- string-list.c | 35 +++++++++++++++++--- string-list.h | 15 +++++++++ t/unit-tests/u-string-list.c | 64 ++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/string-list.c b/string-list.c index 65b6ceb2591280..86a309f8fbd25e 100644 --- a/string-list.c +++ b/string-list.c @@ -282,11 +282,18 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ */ static int append_one(struct string_list *list, const char *p, const char *end, - int in_place) + int in_place, unsigned flags) { if (!end) end = p + strlen(p); + if ((flags & STRING_LIST_SPLIT_TRIM)) { + /* rtrim */ + for (; p < end; end--) + if (!isspace(end[-1])) + break; + } + if (in_place) { *((char *)end) = '\0'; string_list_append(list, p); @@ -307,7 +314,7 @@ static int append_one(struct string_list *list, * returns "char *" pointer into that const string. Yucky but works ;-). */ static int split_string(struct string_list *list, const char *string, const char *delim, - int maxsplit, int in_place) + int maxsplit, int in_place, unsigned flags) { int count = 0; const char *p = string; @@ -320,12 +327,18 @@ static int split_string(struct string_list *list, const char *string, const char for (;;) { char *end; + if (flags & STRING_LIST_SPLIT_TRIM) { + /* ltrim */ + while (*p && isspace(*p)) + p++; + } + if (0 <= maxsplit && maxsplit <= count) end = NULL; else end = strpbrk(p, delim); - count += append_one(list, p, end, in_place); + count += append_one(list, p, end, in_place, flags); if (!end) return count; @@ -336,11 +349,23 @@ static int split_string(struct string_list *list, const char *string, const char int string_list_split(struct string_list *list, const char *string, const char *delim, int maxsplit) { - return split_string(list, string, delim, maxsplit, 0); + return split_string(list, string, delim, maxsplit, 0, 0); } int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit) { - return split_string(list, string, delim, maxsplit, 1); + return split_string(list, string, delim, maxsplit, 1, 0); +} + +int string_list_split_f(struct string_list *list, const char *string, + const char *delim, int maxsplit, unsigned flags) +{ + return split_string(list, string, delim, maxsplit, 0, flags); +} + +int string_list_split_in_place_f(struct string_list *list, char *string, + const char *delim, int maxsplit, unsigned flags) +{ + return split_string(list, string, delim, maxsplit, 1, flags); } diff --git a/string-list.h b/string-list.h index 6c8650efde0dfb..40e148712dacca 100644 --- a/string-list.h +++ b/string-list.h @@ -281,4 +281,19 @@ int string_list_split(struct string_list *list, const char *string, */ int string_list_split_in_place(struct string_list *list, char *string, const char *delim, int maxsplit); + +/* Flag bits for split_f and split_in_place_f functions */ +enum { + /* + * trim whitespaces around resulting string piece before adding + * it to the list + */ + STRING_LIST_SPLIT_TRIM = (1 << 0), +}; + +int string_list_split_f(struct string_list *, const char *string, + const char *delim, int maxsplit, unsigned flags); + +int string_list_split_in_place_f(struct string_list *, char *string, + const char *delim, int maxsplit, unsigned flags); #endif /* STRING_LIST_H */ diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index 150a5f505f5bee..daa9307e45ea41 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -63,6 +63,70 @@ static void t_string_list_split(const char *data, const char *delim, int maxspli string_list_clear(&list, 0); } +static void t_string_list_split_f(const char *data, const char *delim, + int maxsplit, unsigned flags, ...) +{ + struct string_list expected_strings = STRING_LIST_INIT_DUP; + struct string_list list = STRING_LIST_INIT_DUP; + va_list ap; + int len; + + va_start(ap, flags); + t_vcreate_string_list_dup(&expected_strings, 0, ap); + va_end(ap); + + string_list_clear(&list, 0); + len = string_list_split_f(&list, data, delim, maxsplit, flags); + cl_assert_equal_i(len, expected_strings.nr); + t_string_list_equal(&list, &expected_strings); + + string_list_clear(&expected_strings, 0); + string_list_clear(&list, 0); +} + +void test_string_list__split_f(void) +{ + t_string_list_split_f("::foo:bar:baz:", ":", -1, 0, + "", "", "foo", "bar", "baz", "", NULL); + t_string_list_split_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM, + "foo", "bar", "baz", NULL); + t_string_list_split_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, + "a", "b c", NULL); +} + +static void t_string_list_split_in_place_f(const char *data_, const char *delim, + int maxsplit, unsigned flags, ...) +{ + struct string_list expected_strings = STRING_LIST_INIT_DUP; + struct string_list list = STRING_LIST_INIT_NODUP; + char *data = xstrdup(data_); + va_list ap; + int len; + + va_start(ap, flags); + t_vcreate_string_list_dup(&expected_strings, 0, ap); + va_end(ap); + + string_list_clear(&list, 0); + len = string_list_split_in_place_f(&list, data, delim, maxsplit, flags); + cl_assert_equal_i(len, expected_strings.nr); + t_string_list_equal(&list, &expected_strings); + + free(data); + string_list_clear(&expected_strings, 0); + string_list_clear(&list, 0); +} + +void test_string_list__split_in_place_f(void) +{ + t_string_list_split_in_place_f("::foo:bar:baz:", ":", -1, 0, + "", "", "foo", "bar", "baz", "", NULL); + t_string_list_split_in_place_f(" foo:bar : baz", ":", -1, STRING_LIST_SPLIT_TRIM, + "foo", "bar", "baz", NULL); + t_string_list_split_in_place_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, + "a", "b c", NULL); +} + void test_string_list__split(void) { t_string_list_split("foo:bar:baz", ":", -1, "foo", "bar", "baz", NULL); From f3a303aef017ad6e53fa44643d832a1fa0de0d91 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:21 -0700 Subject: [PATCH 05/54] diff: simplify parsing of diff.colormovedws The code to parse this configuration variable, whose value is a comma-separated list of known tokens like "ignore-space-change" and "ignore-all-space", uses string_list_split() to split the value into pieces, and then places each piece of string in a strbuf to trim, before comparing the result with the list of known tokens. Thanks to the previous steps, now string_list_split() can trim the resulting pieces before it places them in the string list. Use it to simplify the code. Signed-off-by: Junio C Hamano --- diff.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/diff.c b/diff.c index a81949a4220655..70666ad2cd1ca6 100644 --- a/diff.c +++ b/diff.c @@ -327,29 +327,23 @@ static unsigned parse_color_moved_ws(const char *arg) struct string_list l = STRING_LIST_INIT_DUP; struct string_list_item *i; - string_list_split(&l, arg, ",", -1); + string_list_split_f(&l, arg, ",", -1, STRING_LIST_SPLIT_TRIM); for_each_string_list_item(i, &l) { - struct strbuf sb = STRBUF_INIT; - strbuf_addstr(&sb, i->string); - strbuf_trim(&sb); - - if (!strcmp(sb.buf, "no")) + if (!strcmp(i->string, "no")) ret = 0; - else if (!strcmp(sb.buf, "ignore-space-change")) + else if (!strcmp(i->string, "ignore-space-change")) ret |= XDF_IGNORE_WHITESPACE_CHANGE; - else if (!strcmp(sb.buf, "ignore-space-at-eol")) + else if (!strcmp(i->string, "ignore-space-at-eol")) ret |= XDF_IGNORE_WHITESPACE_AT_EOL; - else if (!strcmp(sb.buf, "ignore-all-space")) + else if (!strcmp(i->string, "ignore-all-space")) ret |= XDF_IGNORE_WHITESPACE; - else if (!strcmp(sb.buf, "allow-indentation-change")) + else if (!strcmp(i->string, "allow-indentation-change")) ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE; else { ret |= COLOR_MOVED_WS_ERROR; - error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), sb.buf); + error(_("unknown color-moved-ws mode '%s', possible values are 'ignore-space-change', 'ignore-space-at-eol', 'ignore-all-space', 'allow-indentation-change'"), i->string); } - - strbuf_release(&sb); } if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) && From 27531efa41cfa882473513dd93e696a16f6eb87b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:22 -0700 Subject: [PATCH 06/54] string-list: optionally omit empty string pieces in string_list_split*() Teach the unified split_string() machinery a new flag bit, STRING_LIST_SPLIT_NONEMPTY, to cause empty split pieces to be omitted from the resulting string list. Signed-off-by: Junio C Hamano --- string-list.c | 3 +++ string-list.h | 2 ++ t/unit-tests/u-string-list.c | 15 +++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/string-list.c b/string-list.c index 86a309f8fbd25e..343cf1ca90d2ac 100644 --- a/string-list.c +++ b/string-list.c @@ -294,6 +294,9 @@ static int append_one(struct string_list *list, break; } + if ((flags & STRING_LIST_SPLIT_NONEMPTY) && (end <= p)) + return 0; + if (in_place) { *((char *)end) = '\0'; string_list_append(list, p); diff --git a/string-list.h b/string-list.h index 40e148712dacca..2b438c7733d869 100644 --- a/string-list.h +++ b/string-list.h @@ -289,6 +289,8 @@ enum { * it to the list */ STRING_LIST_SPLIT_TRIM = (1 << 0), + /* omit adding empty string piece to the resulting list */ + STRING_LIST_SPLIT_NONEMPTY = (1 << 1), }; int string_list_split_f(struct string_list *, const char *string, diff --git a/t/unit-tests/u-string-list.c b/t/unit-tests/u-string-list.c index daa9307e45ea41..a2457d7b1ec8fa 100644 --- a/t/unit-tests/u-string-list.c +++ b/t/unit-tests/u-string-list.c @@ -92,6 +92,13 @@ void test_string_list__split_f(void) "foo", "bar", "baz", NULL); t_string_list_split_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, "a", "b c", NULL); + t_string_list_split_f("::foo::bar:baz:", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "bar", "baz", NULL); + t_string_list_split_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "baz", NULL); + t_string_list_split_f("foo :: : baz", ":", -1, + STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM, + "foo", "baz", NULL); } static void t_string_list_split_in_place_f(const char *data_, const char *delim, @@ -125,6 +132,14 @@ void test_string_list__split_in_place_f(void) "foo", "bar", "baz", NULL); t_string_list_split_in_place_f(" a b c ", " ", 1, STRING_LIST_SPLIT_TRIM, "a", "b c", NULL); + t_string_list_split_in_place_f("::foo::bar:baz:", ":", -1, + STRING_LIST_SPLIT_NONEMPTY, + "foo", "bar", "baz", NULL); + t_string_list_split_in_place_f("foo:baz", ":", -1, STRING_LIST_SPLIT_NONEMPTY, + "foo", "baz", NULL); + t_string_list_split_in_place_f("foo :: : baz", ":", -1, + STRING_LIST_SPLIT_NONEMPTY | STRING_LIST_SPLIT_TRIM, + "foo", "baz", NULL); } void test_string_list__split(void) From 2ab2aac73d234ae75096e2186b07cc14c57d2586 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Aug 2025 15:04:23 -0700 Subject: [PATCH 07/54] string-list: split-then-remove-empty can be done while splitting Thanks to the new STRING_LIST_SPLIT_NONEMPTY flag, a common pattern to split a string into a string list and then remove empty items in the resulting list is no longer needed. Instead, just tell the string_list_split*() to omit empty ones while splitting. Signed-off-by: Junio C Hamano --- notes.c | 4 ++-- pathspec.c | 3 +-- t/helper/test-hashmap.c | 4 ++-- t/helper/test-json-writer.c | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/notes.c b/notes.c index 6afcf088b97485..3603c4a42bdc44 100644 --- a/notes.c +++ b/notes.c @@ -970,8 +970,8 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, char *globs_copy = xstrdup(globs); int i; - string_list_split_in_place(&split, globs_copy, ":", -1); - string_list_remove_empty_items(&split, 0); + string_list_split_in_place_f(&split, globs_copy, ":", -1, + STRING_LIST_SPLIT_NONEMPTY); for (i = 0; i < split.nr; i++) string_list_add_refs_by_glob(list, split.items[i].string); diff --git a/pathspec.c b/pathspec.c index de325f7ef99df6..5993c4afa0eb37 100644 --- a/pathspec.c +++ b/pathspec.c @@ -201,8 +201,7 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va if (!value || !*value) die(_("attr spec must not be empty")); - string_list_split(&list, value, " ", -1); - string_list_remove_empty_items(&list, 0); + string_list_split_f(&list, value, " ", -1, STRING_LIST_SPLIT_NONEMPTY); item->attr_check = attr_check_alloc(); CALLOC_ARRAY(item->attr_match, list.nr); diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 7782ae585e6471..e4dc02bd7a0ba3 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -149,8 +149,8 @@ int cmd__hashmap(int argc UNUSED, const char **argv UNUSED) /* break line into command and up to two parameters */ string_list_setlen(&parts, 0); - string_list_split_in_place(&parts, line.buf, DELIM, 2); - string_list_remove_empty_items(&parts, 0); + string_list_split_in_place_f(&parts, line.buf, DELIM, 2, + STRING_LIST_SPLIT_NONEMPTY); /* ignore empty lines */ if (!parts.nr) diff --git a/t/helper/test-json-writer.c b/t/helper/test-json-writer.c index a288069b04cb3b..f8316a7d29cdd5 100644 --- a/t/helper/test-json-writer.c +++ b/t/helper/test-json-writer.c @@ -492,8 +492,8 @@ static int scripted(void) /* break line into command and zero or more tokens */ string_list_setlen(&parts, 0); - string_list_split_in_place(&parts, line, " ", -1); - string_list_remove_empty_items(&parts, 0); + string_list_split_in_place_f(&parts, line, " ", -1, + STRING_LIST_SPLIT_NONEMPTY); /* ignore empty lines */ if (!parts.nr || !*parts.items[0].string) From 2efe707054d184565f081f9d882940381b2645ca Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:23 -0700 Subject: [PATCH 08/54] wt-status: avoid strbuf_split*() strbuf is a very good data structure to work with string data without having to worry about running past the end of the string, but strbuf_split() is a wrong API and an array of strbuf that the function produces is a wrong thing to use in general. You do not edit these N strings split out of a single strbuf simultaneously. Often it is much better off to split a string into string_list and work with the resulting strings. wt-status.c:abbrev_oid_in_line() takes one line of rebase todo list (like "pick e813a0200a7121b97fec535f0d0b460b0a33356c title"), and for instructions that has an object name as the second token on the line, replace the object name with its unique abbreviation. After splitting these tokens out of a single line, no simultaneous edit on any of these pieces of string that takes advantage of strbuf API takes place. The final string is composed with strbuf API, but these split pieces are merely used as pieces of strings and there is no need for them to be stored in individual strbuf. Instead, split the line into a string_list, and compose the final string using these pieces. Signed-off-by: Junio C Hamano --- wt-status.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/wt-status.c b/wt-status.c index 454601afa15a95..a34dc144ee3616 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1351,8 +1351,8 @@ static int split_commit_in_progress(struct wt_status *s) */ static void abbrev_oid_in_line(struct strbuf *line) { - struct strbuf **split; - int i; + struct string_list split = STRING_LIST_INIT_DUP; + struct object_id oid; if (starts_with(line->buf, "exec ") || starts_with(line->buf, "x ") || @@ -1360,26 +1360,15 @@ static void abbrev_oid_in_line(struct strbuf *line) starts_with(line->buf, "l ")) return; - split = strbuf_split_max(line, ' ', 3); - if (split[0] && split[1]) { - struct object_id oid; - - /* - * strbuf_split_max left a space. Trim it and re-add - * it after abbreviation. - */ - strbuf_trim(split[1]); - if (!repo_get_oid(the_repository, split[1]->buf, &oid)) { - strbuf_reset(split[1]); - strbuf_add_unique_abbrev(split[1], &oid, - DEFAULT_ABBREV); - strbuf_addch(split[1], ' '); - strbuf_reset(line); - for (i = 0; split[i]; i++) - strbuf_addbuf(line, split[i]); - } + if ((2 <= string_list_split(&split, line->buf, " ", 2)) && + !repo_get_oid(the_repository, split.items[1].string, &oid)) { + strbuf_reset(line); + strbuf_addf(line, "%s ", split.items[0].string); + strbuf_add_unique_abbrev(line, &oid, DEFAULT_ABBREV); + for (size_t i = 2; i < split.nr; i++) + strbuf_addf(line, " %s", split.items[i].string); } - strbuf_list_free(split); + string_list_clear(&split, 0); } static int read_rebase_todolist(const char *fname, struct string_list *lines) From 899ff9c1755a84925704c18250fb7ac1afb302c0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:24 -0700 Subject: [PATCH 09/54] clean: do not pass strbuf by value When you pass a structure by value, the callee can modify the contents of the structure that was passed in without having to worry about changing the structure the caller has. Passing structure by value sometimes (but not very often) can be a valid way to give callee a temporary variable it can freely modify. But not a structure with members that are pointers, like a strbuf. builtin/clean.c:list_and_choose() reads a line interactively from the user, and passes the line (in a strbuf) to parse_choice() by value, which then munges by replacing ',' with ' ' (to accept both comma and space separated list of choices). But because the strbuf passed by value still shares the underlying character array buf[], this ends up munging the caller's strbuf contents. This is a catastrophe waiting to happen. If the callee causes the strbuf to be reallocated, the buf[] the caller has will become dangling, and when the caller does strbuf_release(), it would result in double-free. Stop calling the function with misleading call-by-value with strbuf. Signed-off-by: Junio C Hamano --- builtin/clean.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index 053c94fc6bd12a..224551537e3b67 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -477,7 +477,7 @@ static int find_unique(const char *choice, struct menu_stuff *menu_stuff) */ static int parse_choice(struct menu_stuff *menu_stuff, int is_single, - struct strbuf input, + struct strbuf *input, int **chosen) { struct strbuf **choice_list, **ptr; @@ -485,14 +485,14 @@ static int parse_choice(struct menu_stuff *menu_stuff, int i; if (is_single) { - choice_list = strbuf_split_max(&input, '\n', 0); + choice_list = strbuf_split_max(input, '\n', 0); } else { - char *p = input.buf; + char *p = input->buf; do { if (*p == ',') *p = ' '; } while (*p++); - choice_list = strbuf_split_max(&input, ' ', 0); + choice_list = strbuf_split_max(input, ' ', 0); } for (ptr = choice_list; *ptr; ptr++) { @@ -630,7 +630,7 @@ static int *list_and_choose(struct menu_opts *opts, struct menu_stuff *stuff) nr = parse_choice(stuff, opts->flags & MENU_OPTS_SINGLETON, - choice, + &choice, &chosen); if (opts->flags & MENU_OPTS_SINGLETON) { From 7a4acc360782c9eb0e53f51a5cf3147fa88f973e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:25 -0700 Subject: [PATCH 10/54] clean: do not use strbuf_split*() [part 1] builtin/clean.c:parse_choice() is fed a single line of input, which is space or comma separated list of tokens, and a list of menu items. It parses the tokens into number ranges (e.g. 1-3 that means the first three items) or string prefix (e.g. 's' to choose the menu item "(s)elect") that specify the elements in the menu item list, and tells the caller which ones are chosen. For parsing the input string, it uses strbuf_split() to split it into bunch of strbufs. Instead use string_list_split_in_place(), for a few reasons. * strbuf_split() is a bad API function to use, that yields an array of strbuf that is a bad data structure to use in general. * string_list_split_in_place() allows you to split with "comma or space"; the current code has to preprocess the input string to replace comma with space because strbuf_split() does not allow this. Signed-off-by: Junio C Hamano --- builtin/clean.c | 50 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index 224551537e3b67..708cd9344ca905 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -480,40 +480,36 @@ static int parse_choice(struct menu_stuff *menu_stuff, struct strbuf *input, int **chosen) { - struct strbuf **choice_list, **ptr; + struct string_list choice = STRING_LIST_INIT_NODUP; + struct string_list_item *item; int nr = 0; int i; - if (is_single) { - choice_list = strbuf_split_max(input, '\n', 0); - } else { - char *p = input->buf; - do { - if (*p == ',') - *p = ' '; - } while (*p++); - choice_list = strbuf_split_max(input, ' ', 0); - } + string_list_split_in_place_f(&choice, input->buf, + is_single ? "\n" : ", ", -1, + STRING_LIST_SPLIT_TRIM); - for (ptr = choice_list; *ptr; ptr++) { - char *p; - int choose = 1; + for_each_string_list_item(item, &choice) { + const char *string; + int choose; int bottom = 0, top = 0; int is_range, is_number; - strbuf_trim(*ptr); - if (!(*ptr)->len) + string = item->string; + if (!*string) continue; /* Input that begins with '-'; unchoose */ - if (*(*ptr)->buf == '-') { + if (string[0] == '-') { choose = 0; - strbuf_remove((*ptr), 0, 1); + string++; + } else { + choose = 1; } is_range = 0; is_number = 1; - for (p = (*ptr)->buf; *p; p++) { + for (const char *p = string; *p; p++) { if ('-' == *p) { if (!is_range) { is_range = 1; @@ -531,27 +527,27 @@ static int parse_choice(struct menu_stuff *menu_stuff, } if (is_number) { - bottom = atoi((*ptr)->buf); + bottom = atoi(string); top = bottom; } else if (is_range) { - bottom = atoi((*ptr)->buf); + bottom = atoi(string); /* a range can be specified like 5-7 or 5- */ - if (!*(strchr((*ptr)->buf, '-') + 1)) + if (!*(strchr(string, '-') + 1)) top = menu_stuff->nr; else - top = atoi(strchr((*ptr)->buf, '-') + 1); - } else if (!strcmp((*ptr)->buf, "*")) { + top = atoi(strchr(string, '-') + 1); + } else if (!strcmp(string, "*")) { bottom = 1; top = menu_stuff->nr; } else { - bottom = find_unique((*ptr)->buf, menu_stuff); + bottom = find_unique(string, menu_stuff); top = bottom; } if (top <= 0 || bottom <= 0 || top > menu_stuff->nr || bottom > top || (is_single && bottom != top)) { clean_print_color(CLEAN_COLOR_ERROR); - printf(_("Huh (%s)?\n"), (*ptr)->buf); + printf(_("Huh (%s)?\n"), string); clean_print_color(CLEAN_COLOR_RESET); continue; } @@ -560,7 +556,7 @@ static int parse_choice(struct menu_stuff *menu_stuff, (*chosen)[i-1] = choose; } - strbuf_list_free(choice_list); + string_list_clear(&choice, 0); for (i = 0; i < menu_stuff->nr; i++) nr += (*chosen)[i]; From 4985f72ea5133441c2e9ba808bdea861a2d9f042 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 2 Aug 2025 22:42:29 -0700 Subject: [PATCH 11/54] clean: do not pass the whole structure when it is not necessary The callee parse_choice() only needs to access a NUL-terminated string; instead of insisting to take a pointer to a strbuf, just take a pointer to a character array. Signed-off-by: Junio C Hamano --- builtin/clean.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index 708cd9344ca905..9bb920e7fdc61a 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -477,7 +477,7 @@ static int find_unique(const char *choice, struct menu_stuff *menu_stuff) */ static int parse_choice(struct menu_stuff *menu_stuff, int is_single, - struct strbuf *input, + char *input, int **chosen) { struct string_list choice = STRING_LIST_INIT_NODUP; @@ -485,7 +485,7 @@ static int parse_choice(struct menu_stuff *menu_stuff, int nr = 0; int i; - string_list_split_in_place_f(&choice, input->buf, + string_list_split_in_place_f(&choice, input, is_single ? "\n" : ", ", -1, STRING_LIST_SPLIT_TRIM); @@ -626,7 +626,7 @@ static int *list_and_choose(struct menu_opts *opts, struct menu_stuff *stuff) nr = parse_choice(stuff, opts->flags & MENU_OPTS_SINGLETON, - &choice, + choice.buf, &chosen); if (opts->flags & MENU_OPTS_SINGLETON) { From 4f60672f6f7cbc61fb704c993c54187860f1e9c8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:26 -0700 Subject: [PATCH 12/54] clean: do not use strbuf_split*() [part 2] builtin/clean.c:filter_by_patterns_cmd() interactively reads a line that has exclude patterns from the user and splits the line into a list of patterns. It uses the strbuf_split() so that each split piece can then trimmed. There is no need to use strbuf anymore, thanks to the recent enhancement to string_list_split*() family that allows us to trim the pieces split into a string_list. Signed-off-by: Junio C Hamano --- builtin/clean.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/builtin/clean.c b/builtin/clean.c index 9bb920e7fdc61a..38780edc395f0c 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -674,12 +674,13 @@ static int filter_by_patterns_cmd(void) { struct dir_struct dir = DIR_INIT; struct strbuf confirm = STRBUF_INIT; - struct strbuf **ignore_list; - struct string_list_item *item; struct pattern_list *pl; int changed = -1, i; for (;;) { + struct string_list ignore_list = STRING_LIST_INIT_NODUP; + struct string_list_item *item; + if (!del_list.nr) break; @@ -697,14 +698,15 @@ static int filter_by_patterns_cmd(void) break; pl = add_pattern_list(&dir, EXC_CMDL, "manual exclude"); - ignore_list = strbuf_split_max(&confirm, ' ', 0); - for (i = 0; ignore_list[i]; i++) { - strbuf_trim(ignore_list[i]); - if (!ignore_list[i]->len) - continue; + string_list_split_in_place_f(&ignore_list, confirm.buf, " ", -1, + STRING_LIST_SPLIT_TRIM); - add_pattern(ignore_list[i]->buf, "", 0, pl, -(i+1)); + for (i = 0; i < ignore_list.nr; i++) { + item = &ignore_list.items[i]; + if (!*item->string) + continue; + add_pattern(item->string, "", 0, pl, -(i+1)); } changed = 0; @@ -725,7 +727,7 @@ static int filter_by_patterns_cmd(void) clean_print_color(CLEAN_COLOR_RESET); } - strbuf_list_free(ignore_list); + string_list_clear(&ignore_list, 0); dir_clear(&dir); } From d33091220dadedfcb874d179fe164f507d5f09b2 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:27 -0700 Subject: [PATCH 13/54] merge-tree: do not use strbuf_split*() When reading merge instructions from the standard input, the program reads from the standard input, splits the line into tokens at whitespace, and trims each of them before using. We no longer need to use strbuf just for trimming, as string_list_split*() family can trim while splitting a string. Signed-off-by: Junio C Hamano --- builtin/merge-tree.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index cf8b06cadc7d50..70235856d7aae1 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -618,32 +618,34 @@ int cmd_merge_tree(int argc, "--merge-base", "--stdin"); line_termination = '\0'; while (strbuf_getline_lf(&buf, stdin) != EOF) { - struct strbuf **split; + struct string_list split = STRING_LIST_INIT_NODUP; const char *input_merge_base = NULL; - split = strbuf_split(&buf, ' '); - if (!split[0] || !split[1]) + string_list_split_in_place_f(&split, buf.buf, " ", -1, + STRING_LIST_SPLIT_TRIM); + + if (split.nr < 2) die(_("malformed input line: '%s'."), buf.buf); - strbuf_rtrim(split[0]); - strbuf_rtrim(split[1]); /* parse the merge-base */ - if (!strcmp(split[1]->buf, "--")) { - input_merge_base = split[0]->buf; + if (!strcmp(split.items[1].string, "--")) { + input_merge_base = split.items[0].string; } - if (input_merge_base && split[2] && split[3] && !split[4]) { - strbuf_rtrim(split[2]); - strbuf_rtrim(split[3]); - real_merge(&o, input_merge_base, split[2]->buf, split[3]->buf, prefix); - } else if (!input_merge_base && !split[2]) { - real_merge(&o, NULL, split[0]->buf, split[1]->buf, prefix); + if (input_merge_base && split.nr == 4) { + real_merge(&o, input_merge_base, + split.items[2].string, split.items[3].string, + prefix); + } else if (!input_merge_base && split.nr == 2) { + real_merge(&o, NULL, + split.items[0].string, split.items[1].string, + prefix); } else { die(_("malformed input line: '%s'."), buf.buf); } maybe_flush_or_die(stdout, "stdout"); - strbuf_list_free(split); + string_list_clear(&split, 0); } strbuf_release(&buf); From 566e91049558cf9837e2f760877437b929fbb232 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:28 -0700 Subject: [PATCH 14/54] notes: do not use strbuf_split*() When reading copy instructions from the standard input, the program reads a line, splits it into tokens at whitespace, and trims each of the tokens before using. We no longer need to use strbuf just to be able to trim, as string_list_split*() family now can trim while splitting a string. Retire the use of strbuf_split() from this code path. Note that this loop is a bit sloppy in that it ensures at least there are two tokens on each line, but ignores if there are extra tokens on the line. Tightening it is outside the scope of this series. Signed-off-by: Junio C Hamano --- builtin/notes.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/builtin/notes.c b/builtin/notes.c index a9529b1696ae14..4fb36a743cd376 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -375,18 +375,19 @@ static int notes_copy_from_stdin(int force, const char *rewrite_cmd) while (strbuf_getline_lf(&buf, stdin) != EOF) { struct object_id from_obj, to_obj; - struct strbuf **split; + struct string_list split = STRING_LIST_INIT_NODUP; int err; - split = strbuf_split(&buf, ' '); - if (!split[0] || !split[1]) + string_list_split_in_place_f(&split, buf.buf, " ", -1, + STRING_LIST_SPLIT_TRIM); + if (split.nr < 2) die(_("malformed input line: '%s'."), buf.buf); - strbuf_rtrim(split[0]); - strbuf_rtrim(split[1]); - if (repo_get_oid(the_repository, split[0]->buf, &from_obj)) - die(_("failed to resolve '%s' as a valid ref."), split[0]->buf); - if (repo_get_oid(the_repository, split[1]->buf, &to_obj)) - die(_("failed to resolve '%s' as a valid ref."), split[1]->buf); + if (repo_get_oid(the_repository, split.items[0].string, &from_obj)) + die(_("failed to resolve '%s' as a valid ref."), + split.items[0].string); + if (repo_get_oid(the_repository, split.items[1].string, &to_obj)) + die(_("failed to resolve '%s' as a valid ref."), + split.items[1].string); if (rewrite_cmd) err = copy_note_for_rewrite(c, &from_obj, &to_obj); @@ -396,11 +397,11 @@ static int notes_copy_from_stdin(int force, const char *rewrite_cmd) if (err) { error(_("failed to copy notes from '%s' to '%s'"), - split[0]->buf, split[1]->buf); + split.items[0].string, split.items[1].string); ret = 1; } - strbuf_list_free(split); + string_list_clear(&split, 0); } if (!rewrite_cmd) { From dcecac2580ef871186fdc4e9efc87815a4ce4c66 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:29 -0700 Subject: [PATCH 15/54] config: do not use strbuf_split() When parsing an old-style GIT_CONFIG_PARAMETERS environment variable, the code parses key=value pairs by splitting them at '=' into an array of strbuf's. As strbuf_split() leaves the delimiter at the end of the split piece, the code has to manually trim it. If we split with string_list_split(), that becomes unnecessary. Retire the use of strbuf_split() from this code path. Note that the max parameter of string_list_split() is of an ergonomically iffy design---it specifies the maximum number of times the function is allowed to split, which means that in order to split a text into up to 2 pieces, you have to pass 1, not 2. Signed-off-by: Junio C Hamano --- config.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/config.c b/config.c index 8a2d0b7916442f..1769f15ee31862 100644 --- a/config.c +++ b/config.c @@ -638,31 +638,28 @@ int git_config_parse_parameter(const char *text, config_fn_t fn, void *data) { const char *value; - struct strbuf **pair; + struct string_list pair = STRING_LIST_INIT_DUP; int ret; struct key_value_info kvi = KVI_INIT; kvi_from_param(&kvi); - pair = strbuf_split_str(text, '=', 2); - if (!pair[0]) + string_list_split(&pair, text, "=", 1); + if (!pair.nr) return error(_("bogus config parameter: %s"), text); - if (pair[0]->len && pair[0]->buf[pair[0]->len - 1] == '=') { - strbuf_setlen(pair[0], pair[0]->len - 1); - value = pair[1] ? pair[1]->buf : ""; - } else { + if (pair.nr == 1) value = NULL; - } + else + value = pair.items[1].string; - strbuf_trim(pair[0]); - if (!pair[0]->len) { - strbuf_list_free(pair); + if (!*pair.items[0].string) { + string_list_clear(&pair, 0); return error(_("bogus config parameter: %s"), text); } - ret = config_parse_pair(pair[0]->buf, value, &kvi, fn, data); - strbuf_list_free(pair); + ret = config_parse_pair(pair.items[0].string, value, &kvi, fn, data); + string_list_clear(&pair, 0); return ret; } From b894d4481f4068a84323dfc7048f007b3df5234d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:30 -0700 Subject: [PATCH 16/54] environment: do not use strbuf_split*() environment.c:get_git_namespace() learns the raw namespace from an environment variable, splits it at "/", and appends them after "refs/namespaces/"; the reason why it splits first is so that an empty string resulting from double slashes can be omitted. The split pieces do not need to be edited in any way, so an array of strbufs is a wrong data structure to use. Instead split into a string list and use the pieces from there. Signed-off-by: Junio C Hamano --- environment.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/environment.c b/environment.c index 7c2480b22e5991..ab3ed08433d2c0 100644 --- a/environment.c +++ b/environment.c @@ -163,10 +163,10 @@ int have_git_dir(void) const char *get_git_namespace(void) { static const char *namespace; - struct strbuf buf = STRBUF_INIT; - struct strbuf **components, **c; const char *raw_namespace; + struct string_list components = STRING_LIST_INIT_DUP; + struct string_list_item *item; if (namespace) return namespace; @@ -178,12 +178,17 @@ const char *get_git_namespace(void) } strbuf_addstr(&buf, raw_namespace); - components = strbuf_split(&buf, '/'); + + string_list_split(&components, buf.buf, "/", -1); strbuf_reset(&buf); - for (c = components; *c; c++) - if (strcmp((*c)->buf, "/") != 0) - strbuf_addf(&buf, "refs/namespaces/%s", (*c)->buf); - strbuf_list_free(components); + + for_each_string_list_item(item, &components) { + if (item->string[0]) + strbuf_addf(&buf, "refs/namespaces/%s/", item->string); + } + string_list_clear(&components, 0); + + strbuf_trim_trailing_dir_sep(&buf); if (check_refname_format(buf.buf, 0)) die(_("bad git namespace path \"%s\""), raw_namespace); strbuf_addch(&buf, '/'); From d6fd08bd760711d51b98f9ad98c3cd94d90d2618 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:31 -0700 Subject: [PATCH 17/54] sub-process: do not use strbuf_split*() The code to read status from subprocess reads one packet line and tries to find "status=". It is way overkill to split the line into an array of two strbufs to extract . Signed-off-by: Junio C Hamano --- sub-process.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/sub-process.c b/sub-process.c index 1daf5a975254b9..83bf0a0e82e56d 100644 --- a/sub-process.c +++ b/sub-process.c @@ -30,23 +30,20 @@ struct subprocess_entry *subprocess_find_entry(struct hashmap *hashmap, const ch int subprocess_read_status(int fd, struct strbuf *status) { - struct strbuf **pair; - char *line; int len; for (;;) { + char *line; + const char *value; + len = packet_read_line_gently(fd, NULL, &line); if ((len < 0) || !line) break; - pair = strbuf_split_str(line, '=', 2); - if (pair[0] && pair[0]->len && pair[1]) { + if (skip_prefix(line, "status=", &value)) { /* the last "status=" line wins */ - if (!strcmp(pair[0]->buf, "status=")) { - strbuf_reset(status); - strbuf_addbuf(status, pair[1]); - } + strbuf_reset(status); + strbuf_addstr(status, value); } - strbuf_list_free(pair); } return (len < 0) ? len : 0; From cb8e82a6414653d5dbda81eedb8ca0cd9ce34c68 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:32 -0700 Subject: [PATCH 18/54] trace2: trim_trailing_newline followed by trim is a no-op strbuf_trim_trailing_newline() removes a LF or a CRLF from the tail of a string. If the code plans to call strbuf_trim() immediately after doing so, the code is better off skipping the EOL trimming in the first place. After all, LF/CRLF at the end is a mere special case of whitespaces at the end of the string, which will be removed by strbuf_rtrim() anyway. Signed-off-by: Junio C Hamano --- trace2/tr2_cfg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/trace2/tr2_cfg.c b/trace2/tr2_cfg.c index 22a99a0682a498..2b7cfcd10c70ea 100644 --- a/trace2/tr2_cfg.c +++ b/trace2/tr2_cfg.c @@ -39,7 +39,6 @@ static int tr2_cfg_load_patterns(void) if (buf->len && buf->buf[buf->len - 1] == ',') strbuf_setlen(buf, buf->len - 1); - strbuf_trim_trailing_newline(*s); strbuf_trim(*s); } @@ -78,7 +77,6 @@ static int tr2_load_env_vars(void) if (buf->len && buf->buf[buf->len - 1] == ',') strbuf_setlen(buf, buf->len - 1); - strbuf_trim_trailing_newline(*s); strbuf_trim(*s); } From 838fe56920684bf0ab734f7ddf2bad69cb5f5d45 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 31 Jul 2025 15:54:33 -0700 Subject: [PATCH 19/54] trace2: do not use strbuf_split*() tr2_cfg_load_patterns() and tr2_load_env_vars() functions are functions with very similar structure that each reads an environment variable, splits its value at the ',' boundaries, and trims the resulting string pieces into an array of strbufs. But the code paths that later use these strbufs take no advantage of the strbuf-ness of the result (they do not benefit from representation to avoid having to run strlen(), for example). Simplify the code by teaching these functions to split into a string list instead; even the trimming comes for free ;-). Signed-off-by: Junio C Hamano --- trace2/tr2_cfg.c | 78 +++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/trace2/tr2_cfg.c b/trace2/tr2_cfg.c index 2b7cfcd10c70ea..bbcfeda60af4de 100644 --- a/trace2/tr2_cfg.c +++ b/trace2/tr2_cfg.c @@ -8,87 +8,65 @@ #include "trace2/tr2_sysenv.h" #include "wildmatch.h" -static struct strbuf **tr2_cfg_patterns; -static int tr2_cfg_count_patterns; +static struct string_list tr2_cfg_patterns = STRING_LIST_INIT_DUP; static int tr2_cfg_loaded; -static struct strbuf **tr2_cfg_env_vars; -static int tr2_cfg_env_vars_count; +static struct string_list tr2_cfg_env_vars = STRING_LIST_INIT_DUP; static int tr2_cfg_env_vars_loaded; /* * Parse a string containing a comma-delimited list of config keys - * or wildcard patterns into a list of strbufs. + * or wildcard patterns into a string list. */ -static int tr2_cfg_load_patterns(void) +static size_t tr2_cfg_load_patterns(void) { - struct strbuf **s; const char *envvar; if (tr2_cfg_loaded) - return tr2_cfg_count_patterns; + return tr2_cfg_patterns.nr; tr2_cfg_loaded = 1; envvar = tr2_sysenv_get(TR2_SYSENV_CFG_PARAM); if (!envvar || !*envvar) - return tr2_cfg_count_patterns; + return tr2_cfg_patterns.nr; - tr2_cfg_patterns = strbuf_split_buf(envvar, strlen(envvar), ',', -1); - for (s = tr2_cfg_patterns; *s; s++) { - struct strbuf *buf = *s; - - if (buf->len && buf->buf[buf->len - 1] == ',') - strbuf_setlen(buf, buf->len - 1); - strbuf_trim(*s); - } - - tr2_cfg_count_patterns = s - tr2_cfg_patterns; - return tr2_cfg_count_patterns; + string_list_split_f(&tr2_cfg_patterns, envvar, ",", -1, + STRING_LIST_SPLIT_TRIM); + return tr2_cfg_patterns.nr; } void tr2_cfg_free_patterns(void) { - if (tr2_cfg_patterns) - strbuf_list_free(tr2_cfg_patterns); - tr2_cfg_count_patterns = 0; + if (tr2_cfg_patterns.nr) + string_list_clear(&tr2_cfg_patterns, 0); tr2_cfg_loaded = 0; } /* * Parse a string containing a comma-delimited list of environment variable - * names into a list of strbufs. + * names into a string list. */ -static int tr2_load_env_vars(void) +static size_t tr2_load_env_vars(void) { - struct strbuf **s; const char *varlist; if (tr2_cfg_env_vars_loaded) - return tr2_cfg_env_vars_count; + return tr2_cfg_env_vars.nr; tr2_cfg_env_vars_loaded = 1; varlist = tr2_sysenv_get(TR2_SYSENV_ENV_VARS); if (!varlist || !*varlist) - return tr2_cfg_env_vars_count; - - tr2_cfg_env_vars = strbuf_split_buf(varlist, strlen(varlist), ',', -1); - for (s = tr2_cfg_env_vars; *s; s++) { - struct strbuf *buf = *s; - - if (buf->len && buf->buf[buf->len - 1] == ',') - strbuf_setlen(buf, buf->len - 1); - strbuf_trim(*s); - } + return tr2_cfg_env_vars.nr; - tr2_cfg_env_vars_count = s - tr2_cfg_env_vars; - return tr2_cfg_env_vars_count; + string_list_split_f(&tr2_cfg_env_vars, varlist, ",", -1, + STRING_LIST_SPLIT_TRIM); + return tr2_cfg_env_vars.nr; } void tr2_cfg_free_env_vars(void) { - if (tr2_cfg_env_vars) - strbuf_list_free(tr2_cfg_env_vars); - tr2_cfg_env_vars_count = 0; + if (tr2_cfg_env_vars.nr) + string_list_clear(&tr2_cfg_env_vars, 0); tr2_cfg_env_vars_loaded = 0; } @@ -103,12 +81,11 @@ struct tr2_cfg_data { static int tr2_cfg_cb(const char *key, const char *value, const struct config_context *ctx, void *d) { - struct strbuf **s; + struct string_list_item *item; struct tr2_cfg_data *data = (struct tr2_cfg_data *)d; - for (s = tr2_cfg_patterns; *s; s++) { - struct strbuf *buf = *s; - int wm = wildmatch(buf->buf, key, WM_CASEFOLD); + for_each_string_list_item(item, &tr2_cfg_patterns) { + int wm = wildmatch(item->string, key, WM_CASEFOLD); if (wm == WM_MATCH) { trace2_def_param_fl(data->file, data->line, key, value, ctx->kvi); @@ -130,17 +107,16 @@ void tr2_cfg_list_config_fl(const char *file, int line) void tr2_list_env_vars_fl(const char *file, int line) { struct key_value_info kvi = KVI_INIT; - struct strbuf **s; + struct string_list_item *item; kvi_from_param(&kvi); if (tr2_load_env_vars() <= 0) return; - for (s = tr2_cfg_env_vars; *s; s++) { - struct strbuf *buf = *s; - const char *val = getenv(buf->buf); + for_each_string_list_item(item, &tr2_cfg_env_vars) { + const char *val = getenv(item->string); if (val && *val) - trace2_def_param_fl(file, line, buf->buf, val, &kvi); + trace2_def_param_fl(file, line, item->string, val, &kvi); } } From 66e2adb8f6fe97bb480d96205fb3473b8c1fe4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 3 Aug 2025 13:38:29 +0200 Subject: [PATCH 20/54] describe: use prio_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use a list-based priority queue whose order is maintained by commit_list_insert_by_date() with a prio_queue. This avoids quadratic worst-case complexity. And in the somewhat contrived example of describing the 4751 commits from v2.41.0 to v2.47.0 in one go (to get a sizable chunk of describe work with minimal ref loading overhead) it's significantly faster: Benchmark 1: ./git_2.50.1 describe $(git rev-list v2.41.0..v2.47.0) Time (mean ± σ): 1.558 s ± 0.002 s [User: 1.492 s, System: 0.051 s] Range (min … max): 1.557 s … 1.562 s 10 runs Benchmark 2: ./git describe $(git rev-list v2.41.0..v2.47.0) Time (mean ± σ): 1.209 s ± 0.006 s [User: 1.143 s, System: 0.051 s] Range (min … max): 1.201 s … 1.219 s 10 runs Summary ./git describe $(git rev-list v2.41.0..v2.47.0) ran 1.29 ± 0.01 times faster than ./git_2.50.1 describe $(git rev-list v2.41.0..v2.47.0) Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/describe.c | 51 ++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index fbf305d7624487..80722ae0c0421b 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -23,6 +23,7 @@ #include "list-objects.h" #include "commit-slab.h" #include "wildmatch.h" +#include "prio-queue.h" #define MAX_TAGS (FLAG_BITS - 1) #define DEFAULT_CANDIDATES 10 @@ -249,24 +250,26 @@ static int compare_pt(const void *a_, const void *b_) return 0; } -static unsigned long finish_depth_computation( - struct commit_list **list, - struct possible_tag *best) +static bool all_have_flag(const struct prio_queue *queue, unsigned flag) +{ + for (size_t i = 0; i < queue->nr; i++) { + struct commit *commit = queue->array[i].data; + if (!(commit->object.flags & flag)) + return false; + } + return true; +} + +static unsigned long finish_depth_computation(struct prio_queue *queue, + struct possible_tag *best) { unsigned long seen_commits = 0; - while (*list) { - struct commit *c = pop_commit(list); + while (queue->nr) { + struct commit *c = prio_queue_get(queue); struct commit_list *parents = c->parents; seen_commits++; if (c->object.flags & best->flag_within) { - struct commit_list *a = *list; - while (a) { - struct commit *i = a->item; - if (!(i->object.flags & best->flag_within)) - break; - a = a->next; - } - if (!a) + if (all_have_flag(queue, best->flag_within)) break; } else best->depth++; @@ -274,7 +277,7 @@ static unsigned long finish_depth_computation( struct commit *p = parents->item; repo_parse_commit(the_repository, p); if (!(p->object.flags & SEEN)) - commit_list_insert_by_date(p, list); + prio_queue_put(queue, p); p->object.flags |= c->object.flags; parents = parents->next; } @@ -316,7 +319,7 @@ static void append_suffix(int depth, const struct object_id *oid, struct strbuf static void describe_commit(struct object_id *oid, struct strbuf *dst) { struct commit *cmit, *gave_up_on = NULL; - struct commit_list *list; + struct prio_queue queue = { compare_commits_by_commit_date }; struct commit_name *n; struct possible_tag all_matches[MAX_TAGS]; unsigned int match_cnt = 0, annotated_cnt = 0, cur_match; @@ -359,11 +362,10 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) have_util = 1; } - list = NULL; cmit->object.flags = SEEN; - commit_list_insert(cmit, &list); - while (list) { - struct commit *c = pop_commit(&list); + prio_queue_put(&queue, cmit); + while (queue.nr) { + struct commit *c = prio_queue_get(&queue); struct commit_list *parents = c->parents; struct commit_name **slot; @@ -397,7 +399,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) t->depth++; } /* Stop if last remaining path already covered by best candidate(s) */ - if (annotated_cnt && !list) { + if (annotated_cnt && !queue.nr) { int best_depth = INT_MAX; unsigned best_within = 0; for (cur_match = 0; cur_match < match_cnt; cur_match++) { @@ -420,7 +422,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) struct commit *p = parents->item; repo_parse_commit(the_repository, p); if (!(p->object.flags & SEEN)) - commit_list_insert_by_date(p, &list); + prio_queue_put(&queue, p); p->object.flags |= c->object.flags; parents = parents->next; @@ -435,6 +437,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) strbuf_add_unique_abbrev(dst, cmit_oid, abbrev); if (suffix) strbuf_addstr(dst, suffix); + clear_prio_queue(&queue); return; } if (unannotated_cnt) @@ -450,11 +453,11 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) QSORT(all_matches, match_cnt, compare_pt); if (gave_up_on) { - commit_list_insert_by_date(gave_up_on, &list); + prio_queue_put(&queue, gave_up_on); seen_commits--; } - seen_commits += finish_depth_computation(&list, &all_matches[0]); - free_commit_list(list); + seen_commits += finish_depth_computation(&queue, &all_matches[0]); + clear_prio_queue(&queue); if (debug) { static int label_width = -1; From 08bb69d70f55cb6b44cdc6aefa7bc1d9cf4eb3f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 3 Aug 2025 13:49:11 +0200 Subject: [PATCH 21/54] describe: use prio_queue_replace() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize the sequence get+put to peek+replace to avoid one unnecessary heap rebalance. Do that by tracking partial get operations in a prio_queue wrapper, struct lazy_queue, and using wrapper functions that turn get into peek and put into replace as needed. This is simpler than tracking the state explicitly in the calling code. We get a nice speedup on top of the previous patch's conversion to prio_queue: Benchmark 1: ./git_2.50.1 describe $(git rev-list v2.41.0..v2.47.0) Time (mean ± σ): 1.559 s ± 0.002 s [User: 1.493 s, System: 0.051 s] Range (min … max): 1.556 s … 1.563 s 10 runs Benchmark 2: ./git_describe_pq describe $(git rev-list v2.41.0..v2.47.0) Time (mean ± σ): 1.204 s ± 0.001 s [User: 1.138 s, System: 0.051 s] Range (min … max): 1.202 s … 1.205 s 10 runs Benchmark 3: ./git describe $(git rev-list v2.41.0..v2.47.0) Time (mean ± σ): 850.9 ms ± 1.6 ms [User: 786.6 ms, System: 49.8 ms] Range (min … max): 849.1 ms … 854.1 ms 10 runs Summary ./git describe $(git rev-list v2.41.0..v2.47.0) ran 1.41 ± 0.00 times faster than ./git_describe_pq describe $(git rev-list v2.41.0..v2.47.0) 1.83 ± 0.00 times faster than ./git_2.50.1 describe $(git rev-list v2.41.0..v2.47.0) Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/describe.c | 68 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 80722ae0c0421b..c18e4b3e4b714f 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -250,22 +250,58 @@ static int compare_pt(const void *a_, const void *b_) return 0; } -static bool all_have_flag(const struct prio_queue *queue, unsigned flag) +struct lazy_queue { + struct prio_queue queue; + bool get_pending; +}; + +#define LAZY_QUEUE_INIT { { compare_commits_by_commit_date }, false } + +static void *lazy_queue_get(struct lazy_queue *queue) +{ + if (queue->get_pending) + prio_queue_get(&queue->queue); + else + queue->get_pending = true; + return prio_queue_peek(&queue->queue); +} + +static void lazy_queue_put(struct lazy_queue *queue, void *thing) +{ + if (queue->get_pending) + prio_queue_replace(&queue->queue, thing); + else + prio_queue_put(&queue->queue, thing); + queue->get_pending = false; +} + +static bool lazy_queue_empty(const struct lazy_queue *queue) +{ + return queue->queue.nr == (queue->get_pending ? 1 : 0); +} + +static void lazy_queue_clear(struct lazy_queue *queue) +{ + clear_prio_queue(&queue->queue); + queue->get_pending = false; +} + +static bool all_have_flag(const struct lazy_queue *queue, unsigned flag) { - for (size_t i = 0; i < queue->nr; i++) { - struct commit *commit = queue->array[i].data; + for (size_t i = queue->get_pending ? 1 : 0; i < queue->queue.nr; i++) { + struct commit *commit = queue->queue.array[i].data; if (!(commit->object.flags & flag)) return false; } return true; } -static unsigned long finish_depth_computation(struct prio_queue *queue, +static unsigned long finish_depth_computation(struct lazy_queue *queue, struct possible_tag *best) { unsigned long seen_commits = 0; - while (queue->nr) { - struct commit *c = prio_queue_get(queue); + while (!lazy_queue_empty(queue)) { + struct commit *c = lazy_queue_get(queue); struct commit_list *parents = c->parents; seen_commits++; if (c->object.flags & best->flag_within) { @@ -277,7 +313,7 @@ static unsigned long finish_depth_computation(struct prio_queue *queue, struct commit *p = parents->item; repo_parse_commit(the_repository, p); if (!(p->object.flags & SEEN)) - prio_queue_put(queue, p); + lazy_queue_put(queue, p); p->object.flags |= c->object.flags; parents = parents->next; } @@ -319,7 +355,7 @@ static void append_suffix(int depth, const struct object_id *oid, struct strbuf static void describe_commit(struct object_id *oid, struct strbuf *dst) { struct commit *cmit, *gave_up_on = NULL; - struct prio_queue queue = { compare_commits_by_commit_date }; + struct lazy_queue queue = LAZY_QUEUE_INIT; struct commit_name *n; struct possible_tag all_matches[MAX_TAGS]; unsigned int match_cnt = 0, annotated_cnt = 0, cur_match; @@ -363,9 +399,9 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) } cmit->object.flags = SEEN; - prio_queue_put(&queue, cmit); - while (queue.nr) { - struct commit *c = prio_queue_get(&queue); + lazy_queue_put(&queue, cmit); + while (!lazy_queue_empty(&queue)) { + struct commit *c = lazy_queue_get(&queue); struct commit_list *parents = c->parents; struct commit_name **slot; @@ -399,7 +435,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) t->depth++; } /* Stop if last remaining path already covered by best candidate(s) */ - if (annotated_cnt && !queue.nr) { + if (annotated_cnt && lazy_queue_empty(&queue)) { int best_depth = INT_MAX; unsigned best_within = 0; for (cur_match = 0; cur_match < match_cnt; cur_match++) { @@ -422,7 +458,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) struct commit *p = parents->item; repo_parse_commit(the_repository, p); if (!(p->object.flags & SEEN)) - prio_queue_put(&queue, p); + lazy_queue_put(&queue, p); p->object.flags |= c->object.flags; parents = parents->next; @@ -437,7 +473,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) strbuf_add_unique_abbrev(dst, cmit_oid, abbrev); if (suffix) strbuf_addstr(dst, suffix); - clear_prio_queue(&queue); + lazy_queue_clear(&queue); return; } if (unannotated_cnt) @@ -453,11 +489,11 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) QSORT(all_matches, match_cnt, compare_pt); if (gave_up_on) { - prio_queue_put(&queue, gave_up_on); + lazy_queue_put(&queue, gave_up_on); seen_commits--; } seen_commits += finish_depth_computation(&queue, &all_matches[0]); - clear_prio_queue(&queue); + lazy_queue_clear(&queue); if (debug) { static int label_width = -1; From e9493c55af074fb7179922fafa61104332c05cc9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 07:54:12 +0200 Subject: [PATCH 22/54] Documentation/git-reflog: convert to use synopsis type With 974cdca345c (doc: introduce a synopsis typesetting, 2024-09-24) we have introduced a new synopsis type that simplifies the rules for typesetting a command's synopsis. Convert the git-reflog(1) documentation to use it. While at it, convert the list of options to use backticks. This is done to appease an upcoming new linter that mandates the use of backticks when using the synopsis type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-reflog.adoc | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/git-reflog.adoc b/Documentation/git-reflog.adoc index 412f06b8fec57a..0d6601fdeadcd0 100644 --- a/Documentation/git-reflog.adoc +++ b/Documentation/git-reflog.adoc @@ -8,16 +8,16 @@ git-reflog - Manage reflog information SYNOPSIS -------- -[verse] -'git reflog' [show] [] [] -'git reflog list' -'git reflog expire' [--expire=