From 04949538203ba7aeb298a607280715d4f26dbe0a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:46 -0800 Subject: [PATCH 001/185] sideband: mask control characters The output of `git clone` is a vital component for understanding what has happened when things go wrong. However, these logs are partially under the control of the remote server (via the "sideband", which typically contains what the remote `git pack-objects` process sends to `stderr`), and is currently not sanitized by Git. This makes Git susceptible to ANSI escape sequence injection (see CWE-150, https://cwe.mitre.org/data/definitions/150.html), which allows attackers to corrupt terminal state, to hide information, and even to insert characters into the input buffer (i.e. as if the user had typed those characters). To plug this vulnerability, disallow any control character in the sideband, replacing them instead with the common `^` (e.g. `^[` for `\x1b`, `^A` for `\x01`). There is likely a need for more fine-grained controls instead of using a "heavy hammer" like this, which will be introduced subsequently. Helped-by: Phillip Wood Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- sideband.c | 17 +++++++++++++++-- t/t5409-colorize-remote-messages.sh | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/sideband.c b/sideband.c index ea7c25211ef7e1..c1bbadccac682b 100644 --- a/sideband.c +++ b/sideband.c @@ -66,6 +66,19 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) +{ + strbuf_grow(dest, n); + for (; n && *src; src++, n--) { + if (!iscntrl(*src) || *src == '\t' || *src == '\n') { + strbuf_addch(dest, *src); + } else { + strbuf_addch(dest, '^'); + strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src); + } + } +} + /* * Optionally highlight one keyword in remote output if it appears at the start * of the line. This should be called for a single line only, which is @@ -81,7 +94,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n) int i; if (!want_color_stderr(use_sideband_colors())) { - strbuf_add(dest, src, n); + strbuf_add_sanitized(dest, src, n); return; } @@ -114,7 +127,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n) } } - strbuf_add(dest, src, n); + strbuf_add_sanitized(dest, src, n); } diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index fa5de4500a4f50..aa5b57057148e0 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -98,4 +98,16 @@ test_expect_success 'fallback to color.ui' ' grep "error: error" decoded ' +test_expect_success 'disallow (color) control sequences in sideband' ' + write_script .git/color-me-surprised <<-\EOF && + printf "error: Have you \\033[31mread\\033[m this?\\n" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./color-me-surprised && + test_commit need-at-least-one-commit && + git clone --no-local . throw-away 2>stderr && + test_decode_color decoded && + test_grep ! RED decoded +' + test_done From 9ed1625a581a35d7ec2d851258cf4c7fc08c1ed7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:47 -0800 Subject: [PATCH 002/185] sideband: introduce an "escape hatch" to allow control characters The preceding commit fixed the vulnerability whereas sideband messages (that are under the control of the remote server) could contain ANSI escape sequences that would be sent to the terminal verbatim. However, this fix may not be desirable under all circumstances, e.g. when remote servers deliberately add coloring to their messages to increase their urgency. To help with those use cases, give users a way to opt-out of the protections: `sideband.allowControlCharacters`. Suggested-by: brian m. carlson Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config.adoc | 2 ++ Documentation/config/sideband.adoc | 5 +++++ sideband.c | 10 ++++++++++ t/t5409-colorize-remote-messages.sh | 8 +++++++- 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Documentation/config/sideband.adoc diff --git a/Documentation/config.adoc b/Documentation/config.adoc index 62eebe7c54501c..dcea3c0c15e2a9 100644 --- a/Documentation/config.adoc +++ b/Documentation/config.adoc @@ -523,6 +523,8 @@ include::config/sequencer.adoc[] include::config/showbranch.adoc[] +include::config/sideband.adoc[] + include::config/sparse.adoc[] include::config/splitindex.adoc[] diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc new file mode 100644 index 00000000000000..3fb5045cd79581 --- /dev/null +++ b/Documentation/config/sideband.adoc @@ -0,0 +1,5 @@ +sideband.allowControlCharacters:: + By default, control characters that are delivered via the sideband + are masked, to prevent potentially unwanted ANSI escape sequences + from being sent to the terminal. Use this config setting to override + this behavior. diff --git a/sideband.c b/sideband.c index c1bbadccac682b..682f1cbbedb9b8 100644 --- a/sideband.c +++ b/sideband.c @@ -26,6 +26,8 @@ static struct keyword_entry keywords[] = { { "error", GIT_COLOR_BOLD_RED }, }; +static int allow_control_characters; + /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) { @@ -39,6 +41,9 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; + repo_config_get_bool(the_repository, "sideband.allowcontrolcharacters", + &allow_control_characters); + if (!repo_config_get_string_tmp(the_repository, key, &value)) use_sideband_colors_cached = git_config_colorbool(key, value); else if (!repo_config_get_string_tmp(the_repository, "color.ui", &value)) @@ -68,6 +73,11 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { + if (allow_control_characters) { + strbuf_add(dest, src, n); + return; + } + strbuf_grow(dest, n); for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index aa5b57057148e0..9caee9a07f1556 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -105,9 +105,15 @@ test_expect_success 'disallow (color) control sequences in sideband' ' EOF test_config_global uploadPack.packObjectsHook ./color-me-surprised && test_commit need-at-least-one-commit && + git clone --no-local . throw-away 2>stderr && test_decode_color decoded && - test_grep ! RED decoded + test_grep ! RED decoded && + + rm -rf throw-away && + git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr && + test_decode_color decoded && + test_grep RED decoded ' test_done From 12f0fda905b4af3a15c125f96808e49ddbe39742 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:48 -0800 Subject: [PATCH 003/185] sideband: do allow ANSI color sequences by default The preceding two commits introduced special handling of the sideband channel to neutralize ANSI escape sequences before sending the payload to the terminal, and `sideband.allowControlCharacters` to override that behavior. However, as reported by brian m. carlson, some `pre-receive` hooks that are actively used in practice want to color their messages and therefore rely on the fact that Git passes them through to the terminal, even though they have no way to determine whether the receiving side can actually handle Escape sequences (think e.g. about the practice recommended by Git that third-party applications wishing to use Git functionality parse the output of Git commands). In contrast to other ANSI escape sequences, it is highly unlikely that coloring sequences can be essential tools in attack vectors that mislead Git users e.g. by hiding crucial information. Therefore we can have both: Continue to allow ANSI coloring sequences to be passed to the terminal by default, and neutralize all other ANSI Escape sequences. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 18 ++++++-- sideband.c | 66 +++++++++++++++++++++++++++-- t/t5409-colorize-remote-messages.sh | 16 ++++++- 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 3fb5045cd79581..b55c73726fe2c7 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -1,5 +1,17 @@ sideband.allowControlCharacters:: By default, control characters that are delivered via the sideband - are masked, to prevent potentially unwanted ANSI escape sequences - from being sent to the terminal. Use this config setting to override - this behavior. + are masked, except ANSI color sequences. This prevents potentially + unwanted ANSI escape sequences from being sent to the terminal. Use + this config setting to override this behavior: ++ +-- + `default`:: + `color`:: + Allow ANSI color sequences, line feeds and horizontal tabs, + but mask all other control characters. This is the default. + `false`:: + Mask all control characters other than line feeds and + horizontal tabs. + `true`:: + Allow all control characters to be sent to the terminal. +-- diff --git a/sideband.c b/sideband.c index 682f1cbbedb9b8..eeba6fa2ca8dd6 100644 --- a/sideband.c +++ b/sideband.c @@ -26,7 +26,12 @@ static struct keyword_entry keywords[] = { { "error", GIT_COLOR_BOLD_RED }, }; -static int allow_control_characters; +static enum { + ALLOW_NO_CONTROL_CHARACTERS = 0, + ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, + ALLOW_ALL_CONTROL_CHARACTERS = 1<<1, +} allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) @@ -41,8 +46,26 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; - repo_config_get_bool(the_repository, "sideband.allowcontrolcharacters", - &allow_control_characters); + switch (repo_config_get_maybe_bool(the_repository, "sideband.allowcontrolcharacters", &i)) { + case 0: /* Boolean value */ + allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS : + ALLOW_NO_CONTROL_CHARACTERS; + break; + case -1: /* non-Boolean value */ + if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", + &value)) + ; /* huh? `get_maybe_bool()` returned -1 */ + else if (!strcmp(value, "default")) + allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + else if (!strcmp(value, "color")) + allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; + else + warning(_("unrecognized value for `sideband." + "allowControlCharacters`: '%s'"), value); + break; + default: + break; /* not configured */ + } if (!repo_config_get_string_tmp(the_repository, key, &value)) use_sideband_colors_cached = git_config_colorbool(key, value); @@ -71,9 +94,41 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } +static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int n) +{ + int i; + + /* + * Valid ANSI color sequences are of the form + * + * ESC [ [ [; ]*] m + * + * These are part of the Select Graphic Rendition sequences which + * contain more than just color sequences, for more details see + * https://en.wikipedia.org/wiki/ANSI_escape_code#SGR. + */ + + if (allow_control_characters != ALLOW_ANSI_COLOR_SEQUENCES || + n < 3 || src[0] != '\x1b' || src[1] != '[') + return 0; + + for (i = 2; i < n; i++) { + if (src[i] == 'm') { + strbuf_add(dest, src, i + 1); + return i; + } + if (!isdigit(src[i]) && src[i] != ';') + break; + } + + return 0; +} + static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { - if (allow_control_characters) { + int i; + + if (allow_control_characters == ALLOW_ALL_CONTROL_CHARACTERS) { strbuf_add(dest, src, n); return; } @@ -82,6 +137,9 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { strbuf_addch(dest, *src); + } else if ((i = handle_ansi_color_sequence(dest, src, n))) { + src += i; + n -= i; } else { strbuf_addch(dest, '^'); strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src); diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 9caee9a07f1556..e5092d3b426cd3 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -100,7 +100,7 @@ test_expect_success 'fallback to color.ui' ' test_expect_success 'disallow (color) control sequences in sideband' ' write_script .git/color-me-surprised <<-\EOF && - printf "error: Have you \\033[31mread\\033[m this?\\n" >&2 + printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2 exec "$@" EOF test_config_global uploadPack.packObjectsHook ./color-me-surprised && @@ -108,12 +108,24 @@ test_expect_success 'disallow (color) control sequences in sideband' ' git clone --no-local . throw-away 2>stderr && test_decode_color decoded && + test_grep RED decoded && + test_grep "\\^G" stderr && + tr -dc "\\007" actual && + test_must_be_empty actual && + + rm -rf throw-away && + git -c sideband.allowControlCharacters=false \ + clone --no-local . throw-away 2>stderr && + test_decode_color decoded && test_grep ! RED decoded && + test_grep "\\^G" stderr && rm -rf throw-away && git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr && test_decode_color decoded && - test_grep RED decoded + test_grep RED decoded && + tr -dc "\\007" actual && + test_file_not_empty actual ' test_done From 128914438a0d2d55ae34314a0881f55a797024d5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:49 -0800 Subject: [PATCH 004/185] sideband: add options to allow more control sequences to be passed through Even though control sequences that erase characters are quite juicy for attack scenarios, where attackers are eager to hide traces of suspicious activities, during the review of the side band sanitizing patch series concerns were raised that there might be some legimitate scenarios where Git server's `pre-receive` hooks use those sequences in a benign way. Control sequences to move the cursor can likewise be used to hide tracks by overwriting characters, and have been equally pointed out as having legitimate users. Let's add options to let users opt into passing through those ANSI Escape sequences: `sideband.allowControlCharacters` now supports also `cursor` and `erase`, and it parses the value as a comma-separated list. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 9 ++- sideband.c | 91 ++++++++++++++++++++++++----- t/t5409-colorize-remote-messages.sh | 38 ++++++++++++ 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index b55c73726fe2c7..2bf04262840b02 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -2,13 +2,20 @@ sideband.allowControlCharacters:: By default, control characters that are delivered via the sideband are masked, except ANSI color sequences. This prevents potentially unwanted ANSI escape sequences from being sent to the terminal. Use - this config setting to override this behavior: + this config setting to override this behavior (the value can be + a comma-separated list of the following keywords): + -- `default`:: `color`:: Allow ANSI color sequences, line feeds and horizontal tabs, but mask all other control characters. This is the default. + `cursor:`: + Allow control sequences that move the cursor. This is + disabled by default. + `erase`:: + Allow control sequences that erase charactrs. This is + disabled by default. `false`:: Mask all control characters other than line feeds and horizontal tabs. diff --git a/sideband.c b/sideband.c index eeba6fa2ca8dd6..0b420ca3193888 100644 --- a/sideband.c +++ b/sideband.c @@ -29,9 +29,43 @@ static struct keyword_entry keywords[] = { static enum { ALLOW_NO_CONTROL_CHARACTERS = 0, ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, + ALLOW_ANSI_ERASE = 1<<2, ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, - ALLOW_ALL_CONTROL_CHARACTERS = 1<<1, -} allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; + ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, +} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + +static inline int skip_prefix_in_csv(const char *value, const char *prefix, + const char **out) +{ + if (!skip_prefix(value, prefix, &value) || + (*value && *value != ',')) + return 0; + *out = value + !!*value; + return 1; +} + +static void parse_allow_control_characters(const char *value) +{ + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + while (*value) { + if (skip_prefix_in_csv(value, "default", &value)) + allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES; + else if (skip_prefix_in_csv(value, "color", &value)) + allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES; + else if (skip_prefix_in_csv(value, "cursor", &value)) + allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS; + else if (skip_prefix_in_csv(value, "erase", &value)) + allow_control_characters |= ALLOW_ANSI_ERASE; + else if (skip_prefix_in_csv(value, "true", &value)) + allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS; + else if (skip_prefix_in_csv(value, "false", &value)) + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + else + warning(_("unrecognized value for `sideband." + "allowControlCharacters`: '%s'"), value); + } +} /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) @@ -55,13 +89,8 @@ static enum git_colorbool use_sideband_colors(void) if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", &value)) ; /* huh? `get_maybe_bool()` returned -1 */ - else if (!strcmp(value, "default")) - allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; - else if (!strcmp(value, "color")) - allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; else - warning(_("unrecognized value for `sideband." - "allowControlCharacters`: '%s'"), value); + parse_allow_control_characters(value); break; default: break; /* not configured */ @@ -94,7 +123,7 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } -static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int n) +static int handle_ansi_sequence(struct strbuf *dest, const char *src, int n) { int i; @@ -106,14 +135,47 @@ static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int * These are part of the Select Graphic Rendition sequences which * contain more than just color sequences, for more details see * https://en.wikipedia.org/wiki/ANSI_escape_code#SGR. + * + * The cursor movement sequences are: + * + * ESC [ n A - Cursor up n lines (CUU) + * ESC [ n B - Cursor down n lines (CUD) + * ESC [ n C - Cursor forward n columns (CUF) + * ESC [ n D - Cursor back n columns (CUB) + * ESC [ n E - Cursor next line, beginning (CNL) + * ESC [ n F - Cursor previous line, beginning (CPL) + * ESC [ n G - Cursor to column n (CHA) + * ESC [ n ; m H - Cursor position (row n, col m) (CUP) + * ESC [ n ; m f - Same as H (HVP) + * + * The sequences to erase characters are: + * + * + * ESC [ 0 J - Clear from cursor to end of screen (ED) + * ESC [ 1 J - Clear from cursor to beginning of screen (ED) + * ESC [ 2 J - Clear entire screen (ED) + * ESC [ 3 J - Clear entire screen + scrollback (ED) - xterm extension + * ESC [ 0 K - Clear from cursor to end of line (EL) + * ESC [ 1 K - Clear from cursor to beginning of line (EL) + * ESC [ 2 K - Clear entire line (EL) + * ESC [ n M - Delete n lines (DL) + * ESC [ n P - Delete n characters (DCH) + * ESC [ n X - Erase n characters (ECH) + * + * For a comprehensive list of common ANSI Escape sequences, see + * https://www.xfree86.org/current/ctlseqs.html */ - if (allow_control_characters != ALLOW_ANSI_COLOR_SEQUENCES || - n < 3 || src[0] != '\x1b' || src[1] != '[') + if (n < 3 || src[0] != '\x1b' || src[1] != '[') return 0; for (i = 2; i < n; i++) { - if (src[i] == 'm') { + if (((allow_control_characters & ALLOW_ANSI_COLOR_SEQUENCES) && + src[i] == 'm') || + ((allow_control_characters & ALLOW_ANSI_CURSOR_MOVEMENTS) && + strchr("ABCDEFGHf", src[i])) || + ((allow_control_characters & ALLOW_ANSI_ERASE) && + strchr("JKMPX", src[i]))) { strbuf_add(dest, src, i + 1); return i; } @@ -128,7 +190,7 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { int i; - if (allow_control_characters == ALLOW_ALL_CONTROL_CHARACTERS) { + if ((allow_control_characters & ALLOW_ALL_CONTROL_CHARACTERS)) { strbuf_add(dest, src, n); return; } @@ -137,7 +199,8 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { strbuf_addch(dest, *src); - } else if ((i = handle_ansi_color_sequence(dest, src, n))) { + } else if (allow_control_characters != ALLOW_NO_CONTROL_CHARACTERS && + (i = handle_ansi_sequence(dest, src, n))) { src += i; n -= i; } else { diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index e5092d3b426cd3..896e790bf955cd 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -128,4 +128,42 @@ test_expect_success 'disallow (color) control sequences in sideband' ' test_file_not_empty actual ' +test_decode_csi() { + awk '{ + while (match($0, /\033/) != 0) { + printf "%sCSI ", substr($0, 1, RSTART-1); + $0 = substr($0, RSTART + RLENGTH, length($0) - RSTART - RLENGTH + 1); + } + print + }' +} + +test_expect_success 'control sequences in sideband allowed by default' ' + write_script .git/color-me-surprised <<-\EOF && + printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./color-me-surprised && + test_commit need-at-least-one-commit-at-least && + + rm -rf throw-away && + git clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep ! "CSI \\[K" decoded && + test_grep ! "CSI \\[G" decoded && + test_grep "\\^\\[?25l" decoded && + + rm -rf throw-away && + git -c sideband.allowControlCharacters=erase,cursor,color \ + clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep "RED" decoded && + test_grep "CSI \\[K" decoded && + test_grep "CSI \\[G" decoded && + test_grep ! "\\^\\[\\[K" decoded && + test_grep ! "\\^\\[\\[G" decoded +' + test_done From 602c83f0efed46c2e86a36273673bf8776ded04e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:50 -0800 Subject: [PATCH 005/185] sideband: offer to configure sanitizing on a per-URL basis The main objection against sanitizing the sideband that was raised during the review of the sideband sanitizing patches, first on the git-security mailing list, then on the public mailing list, was that there are some setups where server-side `pre-receive` hooks want to error out, giving colorful messages to the users on the client side (if they are not redirecting the output into a file, that is). To avoid breaking such setups, the default chosen by the sideband sanitizing patches is to pass through ANSI color sequences. Still, there might be some use case out there where that is not enough. Therefore the `sideband.allowControlCharacters` config setting allows for configuring levels of sanitizing. As Junio Hamano pointed out, to keep users safe by default, we need to be able to scope this to some servers because while a user may trust their company's Git server, the same might not apply to other Git servers. To allow for this, let's imitate the way `http..*` offers to scope config settings to certain URLs, by letting users override the `sideband.allowControlCharacters` setting via `sideband..allowControlCharacters`. Suggested-by: Junio Hamano Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 4 ++ sideband.c | 81 ++++++++++++++++++++--------- sideband.h | 14 +++++ t/t5409-colorize-remote-messages.sh | 24 +++++++++ transport.c | 3 ++ 5 files changed, 102 insertions(+), 24 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 2bf04262840b02..32088bbf2f0a40 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -22,3 +22,7 @@ sideband.allowControlCharacters:: `true`:: Allow all control characters to be sent to the terminal. -- + +sideband..*:: + Apply the `sideband.*` option selectively to specific URLs. The + same URL matching logic applies as for `http..*` settings. diff --git a/sideband.c b/sideband.c index 0b420ca3193888..a90db9e2880cba 100644 --- a/sideband.c +++ b/sideband.c @@ -10,6 +10,7 @@ #include "help.h" #include "pkt-line.h" #include "write-or-die.h" +#include "urlmatch.h" struct keyword_entry { /* @@ -27,13 +28,14 @@ static struct keyword_entry keywords[] = { }; static enum { - ALLOW_NO_CONTROL_CHARACTERS = 0, - ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, - ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, - ALLOW_ANSI_ERASE = 1<<2, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, - ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, -} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + ALLOW_CONTROL_SEQUENCES_UNSET = -1, + ALLOW_NO_CONTROL_CHARACTERS = 0, + ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, + ALLOW_ANSI_ERASE = 1<<2, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, + ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, +} allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, const char **out) @@ -45,8 +47,19 @@ static inline int skip_prefix_in_csv(const char *value, const char *prefix, return 1; } -static void parse_allow_control_characters(const char *value) +int sideband_allow_control_characters_config(const char *var, const char *value) { + switch (git_parse_maybe_bool(value)) { + case 0: + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + return 0; + case 1: + allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS; + return 0; + default: + break; + } + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; while (*value) { if (skip_prefix_in_csv(value, "default", &value)) @@ -62,9 +75,37 @@ static void parse_allow_control_characters(const char *value) else if (skip_prefix_in_csv(value, "false", &value)) allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; else - warning(_("unrecognized value for `sideband." - "allowControlCharacters`: '%s'"), value); + warning(_("unrecognized value for '%s': '%s'"), var, value); } + return 0; +} + +static int sideband_config_callback(const char *var, const char *value, + const struct config_context *ctx UNUSED, + void *data UNUSED) +{ + if (!strcmp(var, "sideband.allowcontrolcharacters")) + return sideband_allow_control_characters_config(var, value); + + return 0; +} + +void sideband_apply_url_config(const char *url) +{ + struct urlmatch_config config = URLMATCH_CONFIG_INIT; + char *normalized_url; + + if (!url) + BUG("must not call sideband_apply_url_config(NULL)"); + + config.section = "sideband"; + config.collect_fn = sideband_config_callback; + + normalized_url = url_normalize(url, &config.url); + repo_config(the_repository, urlmatch_config_entry, &config); + free(normalized_url); + string_list_clear(&config.vars, 1); + urlmatch_config_release(&config); } /* Returns a color setting (GIT_COLOR_NEVER, etc). */ @@ -80,20 +121,12 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; - switch (repo_config_get_maybe_bool(the_repository, "sideband.allowcontrolcharacters", &i)) { - case 0: /* Boolean value */ - allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS : - ALLOW_NO_CONTROL_CHARACTERS; - break; - case -1: /* non-Boolean value */ - if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", - &value)) - ; /* huh? `get_maybe_bool()` returned -1 */ - else - parse_allow_control_characters(value); - break; - default: - break; /* not configured */ + if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) { + if (!repo_config_get_value(the_repository, "sideband.allowcontrolcharacters", &value)) + sideband_allow_control_characters_config("sideband.allowcontrolcharacters", value); + + if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) + allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; } if (!repo_config_get_string_tmp(the_repository, key, &value)) diff --git a/sideband.h b/sideband.h index 5a25331be55d30..d15fa4015fa0a3 100644 --- a/sideband.h +++ b/sideband.h @@ -30,4 +30,18 @@ int demultiplex_sideband(const char *me, int status, void send_sideband(int fd, int band, const char *data, ssize_t sz, int packet_max); +/* + * Apply sideband configuration for the given URL. This should be called + * when a transport is created to allow URL-specific configuration of + * sideband behavior (e.g., sideband..allowControlCharacters). + */ +void sideband_apply_url_config(const char *url); + +/* + * Parse and set the sideband allow control characters configuration. + * The var parameter should be the key name (without section prefix). + * Returns 0 if the variable was recognized and handled, non-zero otherwise. + */ +int sideband_allow_control_characters_config(const char *var, const char *value); + #endif diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 896e790bf955cd..3010913bb113e4 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -166,4 +166,28 @@ test_expect_success 'control sequences in sideband allowed by default' ' test_grep ! "\\^\\[\\[G" decoded ' +test_expect_success 'allow all control sequences for a specific URL' ' + write_script .git/eraser <<-\EOF && + printf "error: Ohai!\\r\\033[K" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./eraser && + test_commit one-more-please && + + rm -rf throw-away && + git clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep ! "CSI \\[K" decoded && + test_grep "\\^\\[\\[K" decoded && + + rm -rf throw-away && + git -c "sideband.file://.allowControlCharacters=true" \ + clone --no-local "file://$PWD" throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep "CSI \\[K" decoded && + test_grep ! "\\^\\[\\[K" decoded +' + test_done diff --git a/transport.c b/transport.c index c7f06a7382e605..1602065953a54e 100644 --- a/transport.c +++ b/transport.c @@ -29,6 +29,7 @@ #include "object-name.h" #include "color.h" #include "bundle-uri.h" +#include "sideband.h" static enum git_colorbool transport_use_color = GIT_COLOR_UNKNOWN; static char transport_colors[][COLOR_MAXLEN] = { @@ -1245,6 +1246,8 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; + sideband_apply_url_config(ret->url); + return ret; } From 826cc4722088a02d0ae240c1267b5b74d476b153 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 5 Mar 2026 15:34:51 -0800 Subject: [PATCH 006/185] sideband: drop 'default' configuration The topic so far allows users to tweak the configuration variable sideband.allowControlCharacters to override the hardcoded default, but among which there is the value called 'default'. The plan [*] of the series is to loosen the setting by a later commit in the series and schedule it to tighten at the Git 3.0 boundary for end users, at which point, the meaning of this 'default' value will change. Which is a dubious design. A user expresses their preference by setting configuration variable in order to guard against sudden change brought in by changes to the hardcoded default behaviour, and letting them set it to 'default' that will change at the Git 3.0 boundary defeats its purpose. If a user wants to say "I am easy and can go with whatever hardcoded default Git implementors choose for me", they simply leave the configuration variable unspecified. Let's remove it from the state before Git 3.0 so that those users who set it to 'default' will not see the behaviour changed under their feet all of sudden. Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 1 - sideband.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 32088bbf2f0a40..96fade7f5fee39 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -6,7 +6,6 @@ sideband.allowControlCharacters:: a comma-separated list of the following keywords): + -- - `default`:: `color`:: Allow ANSI color sequences, line feeds and horizontal tabs, but mask all other control characters. This is the default. diff --git a/sideband.c b/sideband.c index a90db9e2880cba..04282a568edd90 100644 --- a/sideband.c +++ b/sideband.c @@ -33,8 +33,8 @@ static enum { ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, ALLOW_ANSI_ERASE = 1<<2, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES } allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, @@ -62,9 +62,7 @@ int sideband_allow_control_characters_config(const char *var, const char *value) allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; while (*value) { - if (skip_prefix_in_csv(value, "default", &value)) - allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES; - else if (skip_prefix_in_csv(value, "color", &value)) + if (skip_prefix_in_csv(value, "color", &value)) allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES; else if (skip_prefix_in_csv(value, "cursor", &value)) allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS; From 818fbfd208f919e7a4fd9c827b65e5ce5372479b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:52 -0800 Subject: [PATCH 007/185] sideband: delay sanitizing by default to Git v3.0 The sideband sanitization patches allow ANSI color sequences through by default, preserving compatibility with pre-receive hooks that provide colored output during `git push`. Even so, there is concern that changing any default behavior in a minor release may have unforeseen consequences. To accommodate this, defer the secure-by-default behavior to Git v3.0, where breaking changes are expected. This gives users and tooling time to prepare, while committing to address CVE-2024-52005 in Git v3.0. Signed-off-by: Johannes Schindelin [jc: adjusted for the removal of 'default' value] Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 12 ++++++++++-- sideband.c | 6 +++++- t/t5409-colorize-remote-messages.sh | 18 +++++++++++++----- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 96fade7f5fee39..ddba93393ccadc 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -1,8 +1,16 @@ sideband.allowControlCharacters:: +ifdef::with-breaking-changes[] By default, control characters that are delivered via the sideband are masked, except ANSI color sequences. This prevents potentially - unwanted ANSI escape sequences from being sent to the terminal. Use - this config setting to override this behavior (the value can be + unwanted ANSI escape sequences from being sent to the terminal. +endif::with-breaking-changes[] +ifndef::with-breaking-changes[] + By default, no control characters delivered via the sideband + are masked. This is unsafe and will change in Git v3.* to only + allow ANSI color sequences by default, preventing potentially + unwanted ANSI escape sequences from being sent to the terminal. +endif::with-breaking-changes[] + Use this config setting to override this behavior (the value can be a comma-separated list of the following keywords): + -- diff --git a/sideband.c b/sideband.c index 04282a568edd90..5fb60e52bf00b2 100644 --- a/sideband.c +++ b/sideband.c @@ -34,7 +34,11 @@ static enum { ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, ALLOW_ANSI_ERASE = 1<<2, ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES +#ifdef WITH_BREAKING_CHANGES + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, +#else + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ALL_CONTROL_CHARACTERS, +#endif } allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 3010913bb113e4..07cbc62736bd26 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -98,6 +98,13 @@ test_expect_success 'fallback to color.ui' ' grep "error: error" decoded ' +if test_have_prereq WITH_BREAKING_CHANGES +then + TURN_ON_SANITIZING=already.turned=on +else + TURN_ON_SANITIZING=sideband.allowControlCharacters=color +fi + test_expect_success 'disallow (color) control sequences in sideband' ' write_script .git/color-me-surprised <<-\EOF && printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2 @@ -106,7 +113,7 @@ test_expect_success 'disallow (color) control sequences in sideband' ' test_config_global uploadPack.packObjectsHook ./color-me-surprised && test_commit need-at-least-one-commit && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color decoded && test_grep RED decoded && test_grep "\\^G" stderr && @@ -138,7 +145,7 @@ test_decode_csi() { }' } -test_expect_success 'control sequences in sideband allowed by default' ' +test_expect_success 'control sequences in sideband allowed by default (in Git v3.8)' ' write_script .git/color-me-surprised <<-\EOF && printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2 exec "$@" @@ -147,7 +154,7 @@ test_expect_success 'control sequences in sideband allowed by default' ' test_commit need-at-least-one-commit-at-least && rm -rf throw-away && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && test_grep ! "CSI \\[K" decoded && @@ -175,14 +182,15 @@ test_expect_success 'allow all control sequences for a specific URL' ' test_commit one-more-please && rm -rf throw-away && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && test_grep ! "CSI \\[K" decoded && test_grep "\\^\\[\\[K" decoded && rm -rf throw-away && - git -c "sideband.file://.allowControlCharacters=true" \ + git -c sideband.allowControlCharacters=false \ + -c "sideband.file://.allowControlCharacters=true" \ clone --no-local "file://$PWD" throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && From f8e90b972ef9567df2d6983ae2c5f1f2659e86ad Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:47 -0600 Subject: [PATCH 008/185] contrib/subtree: reduce function side-effects `process_subtree_split_trailer()` communicates its return value to the caller by setting a variable (`sub`) that is also defined by the calling function. This is both unclear and encourages side-effects. Invoke this function in a sub-shell instead. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 791fd8260c4703..bae5d9170bd7a3 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -373,6 +373,10 @@ try_remove_previous () { } # Usage: process_subtree_split_trailer SPLIT_HASH MAIN_HASH [REPOSITORY] +# +# Parse SPLIT_HASH as a commit. If the commit is not found, fetches +# REPOSITORY and tries again. If found, prints full commit hash. +# Otherwise, dies. process_subtree_split_trailer () { assert test $# -ge 2 assert test $# -le 3 @@ -400,6 +404,7 @@ process_subtree_split_trailer () { die "$fail_msg" fi fi + echo "${sub}" } # Usage: find_latest_squash DIR [REPOSITORY] @@ -432,7 +437,7 @@ find_latest_squash () { main="$b" ;; git-subtree-split:) - process_subtree_split_trailer "$b" "$sq" "$repository" + sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1 ;; END) if test -n "$sub" @@ -489,7 +494,7 @@ find_existing_splits () { main="$b" ;; git-subtree-split:) - process_subtree_split_trailer "$b" "$sq" "$repository" + sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1 ;; END) debug "Main is: '$main'" From 3b3ace4d5bb72cb1845e547439b53e00dcf49b8e Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:48 -0600 Subject: [PATCH 009/185] contrib/subtree: functionalize split traversal `git subtree split` requires an ancestor-first history traversal. Refactor the existing rev-list traversal into its own function, `find_commits_to_split`. Pass unrevs via stdin to avoid limits on the maximum length of command-line arguments. Also remove an unnecessary `eval`. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index bae5d9170bd7a3..c1756b3e74cc35 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -519,6 +519,31 @@ find_existing_splits () { done || exit $? } +# Usage: find_commits_to_split REV UNREVS [ARGS...] +# +# List each commit to split, with its parents. +# +# Specify the starting REV for the split, which is usually +# a branch tip. Populate UNREVS with the last --rejoin for +# this prefix, if any. Typically, `subtree split` ignores +# history prior to the last --rejoin... unless and if it +# becomes necessary to consider it. `find_existing_splits` is +# a convenient source of UNREVS. +# +# Remaining arguments are passed to rev-list. +# +# Outputs commits in ancestor-first order, one per line, with +# parent information. Outputs all parents before any child. +find_commits_to_split() { + assert test $# -ge 2 + rev="$1" + unrevs="$2" + shift 2 + + echo "$unrevs" | + git rev-list --topo-order --reverse --parents --stdin "$rev" "$@" +} + # Usage: copy_commit REV TREE FLAGS_STR copy_commit () { assert test $# = 3 @@ -976,12 +1001,11 @@ cmd_split () { # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) - grl='git rev-list --topo-order --reverse --parents $rev $unrevs' - revmax=$(eval "$grl" | wc -l) + revmax="$(find_commits_to_split "$rev" "$unrevs" --count)" revcount=0 createcount=0 extracount=0 - eval "$grl" | + find_commits_to_split "$rev" "$unrevs" | while read rev parents do process_split_commit "$rev" "$parents" From c30871b91d4d01ddf24f8129e23aff9da0a57575 Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:49 -0600 Subject: [PATCH 010/185] contrib/subtree: reduce recursion during split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Debian-alikes, POSIX sh has a hardcoded recursion depth of 1000. This limit operates like bash's `$FUNCNEST` [1], but it does not actually respect `$FUNCNEST`. This is non-standard behavior. On other distros, the sh recursion depth is limited only by the available stack size. With certain history graphs, subtree splits are recursive—with one recursion per commit. Attempting to split complex repos that have thousands of commits, like [2], may fail on these distros. Reduce the amount of recursion required by eagerly discovering the complete range of commits to process. The recursion is a side-effect of the rejoin-finder in `find_existing_splits`. Rejoin mode, as in git subtree split --rejoin -b hax main ... improves the speed of later splits by merging the split history back into `main`. This gives the splitting algorithm a stopping point. The rejoin maps one commit on `main` to one split commit on `hax`. If we encounter this commit, we know that it maps to `hax`. But this is only a single point in the history. Many splits require history from before the rejoin. See patch content for examples. If pre-rejoin history is required, `check_parents` recursively discovers each individual parent, with one recursion per commit. The recursion deepens the entire tree, even if an older rejoin is available. This quickly overwhelms the Debian sh stack. Instead of recursively processing each commit, process *all* the commits back to the next obvious starting point: i.e., either the next-oldest --rejoin or the beginning of history. This is where the recursion is likely to stop anyway. While this still requires recursion, it is *considerably* less recursive. [1]: https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html#index-FUNCNEST [2]: https://github.com/christian-heusel/aur.git Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 56 ++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index c1756b3e74cc35..c649a9e393a96c 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -315,6 +315,46 @@ cache_miss () { } # Usage: check_parents [REVS...] +# +# During a split, check that every commit in REVS has already been +# processed via `process_split_commit`. If not, deepen the history +# until it is. +# +# Commits authored by `subtree split` have to be created in the +# same order as every other git commit: ancestor-first, with new +# commits building on old commits. The traversal order normally +# ensures this is the case, but it also excludes --rejoins commits +# by default. +# +# The --rejoin tells us, "this mainline commit is equivalent to +# this split commit." The relationship is only known for that +# exact commit---and not before or after it. Frequently, commits +# prior to a rejoin are not needed... but, just as often, they +# are! Consider this history graph: +# +# --D--- +# / \ +# A--B--C--R--X--Y main +# / / +# a--b--c / split +# \ / +# --e--/ +# +# The main branch has commits A, B, and C. main is split into +# commits a, b, and c. The split history is rejoined at R. +# +# There are at least two cases where we might need the A-B-C +# history that is prior to R: +# +# 1. Commit D is based on history prior to R, but +# it isn't merged into mainline until after R. +# +# 2. Commit e is based on old split history. It is merged +# back into mainline with a subtree merge. Again, this +# happens after R. +# +# check_parents detects these cases and deepens the history +# to the next available rejoin. check_parents () { missed=$(cache_miss "$@") || exit $? local indent=$(($indent + 1)) @@ -322,8 +362,20 @@ check_parents () { do if ! test -r "$cachedir/notree/$miss" then - debug "incorrect order: $miss" - process_split_commit "$miss" "" + debug "found commit excluded by --rejoin: $miss. skipping to the next --rejoin..." + unrevs="$(find_existing_splits "$dir" "$miss" "$repository")" || exit 1 + + find_commits_to_split "$miss" "$unrevs" | + while read -r rev parents + do + process_split_commit "$rev" "$parents" + done + + if ! test -r "$cachedir/$miss" && + ! test -r "$cachedir/notree/$miss" + then + die "failed to deepen history at $miss" + fi fi done } From f584f9d36129f8af18251bd0f193c914ed8b0cfb Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 11 Mar 2026 14:19:38 +0000 Subject: [PATCH 011/185] promisor-remote: prevent lazy-fetch recursion in child fetch fetch_objects() spawns a child `git fetch` to lazily fill in missing objects. That child's index-pack, when it receives a thin pack containing a REF_DELTA against a still-missing base, calls promisor_remote_get_direct() -- which is fetch_objects() again. With negotiationAlgorithm=noop the client advertises no "have" lines, so a well-behaved server sends requested objects un-deltified or deltified only against objects in the same pack. A server that nevertheless sends REF_DELTA against a base the client does not have is misbehaving; however the client should not recurse unboundedly in response. Propagate GIT_NO_LAZY_FETCH=1 into the child fetch's environment so that if the child's index-pack encounters such a REF_DELTA, it hits the existing guard at the top of fetch_objects() and fails fast instead of recursing. Depth-1 lazy fetch (the whole point of fetch_objects()) is unaffected: only the child and its descendants see the variable. Add a test that injects a thin pack containing a REF_DELTA against a missing base via HTTP, triggering the recursion path through index-pack's promisor_remote_get_direct() call. With the fix, the child's fetch_objects() sees GIT_NO_LAZY_FETCH=1 and blocks the depth-2 fetch with a "lazy fetching disabled" warning. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- promisor-remote.c | 7 +++++ t/t5616-partial-clone.sh | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/promisor-remote.c b/promisor-remote.c index 77ebf537e2b3ee..2f56e89404b19a 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -42,6 +42,13 @@ static int fetch_objects(struct repository *repo, child.in = -1; if (repo != the_repository) prepare_other_repo_env(&child.env, repo->gitdir); + /* + * Prevent the child's index-pack from recursing back into + * fetch_objects() when resolving REF_DELTA bases it does not + * have. With noop negotiation the server should never need + * to send such deltas, so a depth-2 fetch would not help. + */ + strvec_pushf(&child.env, "%s=1", NO_LAZY_FETCH_ENVIRONMENT); strvec_pushl(&child.args, "-c", "fetch.negotiationAlgorithm=noop", "fetch", remote_name, "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1e354e057fa12c..27f131c8d9ba57 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -907,6 +907,66 @@ test_expect_success PERL_TEST_HELPERS 'tolerate server sending REF_DELTA against ! test -e "$HTTPD_ROOT_PATH/one-time-script" ' +test_expect_success PERL_TEST_HELPERS 'lazy-fetch of REF_DELTA with missing base does not recurse' ' + SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && + rm -rf "$SERVER" repo && + test_create_repo "$SERVER" && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && + + # Create a commit with 2 blobs to be used as delta base and content. + for i in $(test_seq 10) + do + echo "this is a line" >>"$SERVER/foo.txt" && + echo "this is another line" >>"$SERVER/bar.txt" || return 1 + done && + git -C "$SERVER" add foo.txt bar.txt && + git -C "$SERVER" commit -m initial && + BLOB_FOO=$(git -C "$SERVER" rev-parse HEAD:foo.txt) && + BLOB_BAR=$(git -C "$SERVER" rev-parse HEAD:bar.txt) && + + # Partial clone with blob:none. The client has commits and + # trees but no blobs. + test_config -C "$SERVER" protocol.version 2 && + git -c protocol.version=2 clone --no-checkout \ + --filter=blob:none $HTTPD_URL/one_time_script/server repo && + + # Sanity check: client does not have either blob locally. + git -C repo rev-list --objects --ignore-missing \ + -- $BLOB_FOO >objlist && + test_line_count = 0 objlist && + + # Craft a thin pack where BLOB_FOO is a REF_DELTA against + # BLOB_BAR. Since the client has neither blob (blob:none + # filter), the delta base will be missing. This simulates a + # misbehaving server that sends REF_DELTA against an object + # the client does not have. + test-tool -C "$SERVER" pack-deltas --num-objects=1 >thin.pack <<-EOF && + REF_DELTA $BLOB_FOO $BLOB_BAR + EOF + + replace_packfile thin.pack && + + # Trigger a lazy fetch for BLOB_FOO. The child fetch spawned + # by fetch_objects() receives our crafted thin pack. Its + # index-pack encounters the missing delta base (BLOB_BAR) and + # tries to lazy-fetch it via promisor_remote_get_direct(). + # + # With the fix: fetch_objects() propagates GIT_NO_LAZY_FETCH=1 + # to the child, so the depth-2 fetch is blocked and we see the + # "lazy fetching disabled" warning. The object cannot be + # resolved, so cat-file fails. + # + # Without the fix: the depth-2 fetch would proceed, potentially + # recursing unboundedly with a persistently misbehaving server. + test_must_fail git -C repo -c protocol.version=2 \ + cat-file -p $BLOB_FOO 2>err && + test_grep "lazy fetching disabled" err && + + # Ensure that the one-time-script was used. + ! test -e "$HTTPD_ROOT_PATH/one-time-script" +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled. From acefb71d0d4e5a16ccda9226acac0920202de771 Mon Sep 17 00:00:00 2001 From: Alan Braithwaite Date: Sun, 15 Mar 2026 05:37:02 +0000 Subject: [PATCH 012/185] clone: add clone..defaultObjectFilter config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new configuration option that lets users specify a default partial clone filter, optionally scoped by URL pattern. When cloning a repository whose URL matches a configured pattern, git-clone automatically applies the filter, equivalent to passing --filter on the command line. [clone] defaultObjectFilter = blob:limit=1m [clone "https://github.com/"] defaultObjectFilter = blob:limit=5m [clone "https://internal.corp.com/large-project/"] defaultObjectFilter = blob:none The bare clone.defaultObjectFilter applies to all clones. The URL-qualified form clone..defaultObjectFilter restricts the setting to matching URLs. URL matching uses the existing urlmatch_config_entry() infrastructure, following the same rules as http..* — a domain, namespace, or specific project can be matched, and the most specific match wins. The config only affects the initial clone. Once the clone completes, the filter is recorded in remote..partialCloneFilter, so subsequent fetches inherit it automatically. An explicit --filter on the command line takes precedence, and --no-filter defeats the configured default entirely. Signed-off-by: Alan Braithwaite Signed-off-by: Junio C Hamano --- Documentation/config/clone.adoc | 34 +++++++++ builtin/clone.c | 54 ++++++++++++++ t/t5616-partial-clone.sh | 126 ++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+) diff --git a/Documentation/config/clone.adoc b/Documentation/config/clone.adoc index 0a10efd174ea4b..1d6c0957a066c5 100644 --- a/Documentation/config/clone.adoc +++ b/Documentation/config/clone.adoc @@ -21,3 +21,37 @@ endif::[] If a partial clone filter is provided (see `--filter` in linkgit:git-rev-list[1]) and `--recurse-submodules` is used, also apply the filter to submodules. + +`clone.defaultObjectFilter`:: +`clone..defaultObjectFilter`:: + When set to a filter spec string (e.g., `blob:limit=1m`, + `blob:none`, `tree:0`), linkgit:git-clone[1] will automatically + use `--filter=` to enable partial clone behavior. + Objects matching the filter are excluded from the initial + transfer and lazily fetched on demand (e.g., during checkout). + Subsequent fetches inherit the filter via the per-remote config + that is written during the clone. ++ +The bare `clone.defaultObjectFilter` applies to all clones. The +URL-qualified form `clone..defaultObjectFilter` restricts the +setting to clones whose URL matches ``, following the same +rules as `http..*` (see linkgit:git-config[1]). The most +specific URL match wins. You can match a domain, a namespace, or a +specific project: ++ +---- +[clone] + defaultObjectFilter = blob:limit=1m + +[clone "https://github.com/"] + defaultObjectFilter = blob:limit=5m + +[clone "https://internal.corp.com/large-project/"] + defaultObjectFilter = blob:none +---- ++ +An explicit `--filter` option on the command line takes precedence +over this config, and `--no-filter` defeats it entirely to force a +full clone. Only affects the initial clone; it has no effect on +later fetches into an existing repository. If the server does not +support object filtering, the setting is silently ignored. diff --git a/builtin/clone.c b/builtin/clone.c index fba3c9c508bc06..ed3af9325921f1 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -44,6 +44,7 @@ #include "path.h" #include "pkt-line.h" #include "list-objects-filter-options.h" +#include "urlmatch.h" #include "hook.h" #include "bundle.h" #include "bundle-uri.h" @@ -759,6 +760,51 @@ static int git_clone_config(const char *k, const char *v, return git_default_config(k, v, ctx, cb); } +static int clone_filter_collect(const char *var, const char *value, + const struct config_context *ctx UNUSED, + void *cb) +{ + char **filter_spec_p = cb; + + if (!strcmp(var, "clone.defaultobjectfilter")) { + if (!value) + return config_error_nonbool(var); + free(*filter_spec_p); + *filter_spec_p = xstrdup(value); + } + return 0; +} + +/* + * Look up clone.defaultObjectFilter or clone..defaultObjectFilter + * using the urlmatch infrastructure. A URL-qualified entry that matches + * the clone URL takes precedence over the bare form, following the same + * rules as http..* configuration variables. + */ +static char *get_default_object_filter(const char *url) +{ + struct urlmatch_config config = URLMATCH_CONFIG_INIT; + char *filter_spec = NULL; + char *normalized_url; + + config.section = "clone"; + config.key = "defaultobjectfilter"; + config.collect_fn = clone_filter_collect; + config.cb = &filter_spec; + + normalized_url = url_normalize(url, &config.url); + if (!normalized_url) { + urlmatch_config_release(&config); + return NULL; + } + + repo_config(the_repository, urlmatch_config_entry, &config); + free(normalized_url); + urlmatch_config_release(&config); + + return filter_spec; +} + static int write_one_config(const char *key, const char *value, const struct config_context *ctx, void *data) @@ -1059,6 +1105,14 @@ int cmd_clone(int argc, } else die(_("repository '%s' does not exist"), repo_name); + if (!filter_options.choice && !filter_options.no_filter) { + char *config_filter = get_default_object_filter(repo); + if (config_filter) { + parse_list_objects_filter(&filter_options, config_filter); + free(config_filter); + } + } + /* no need to be strict, transport_set_option() will validate it again */ if (option_depth && atoi(option_depth) < 1) die(_("depth %s is not a positive number"), option_depth); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1c2805accac636..cff3e06873bdb7 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -723,6 +723,132 @@ test_expect_success 'after fetching descendants of non-promisor commits, gc work git -C partial gc --prune=now ' +# Test clone..defaultObjectFilter config + +test_expect_success 'setup for clone.defaultObjectFilter tests' ' + git init default-filter-src && + echo "small" >default-filter-src/small.txt && + git -C default-filter-src add . && + git -C default-filter-src commit -m "initial" && + + git clone --bare "file://$(pwd)/default-filter-src" default-filter-srv.bare && + git -C default-filter-srv.bare config --local uploadpack.allowfilter 1 && + git -C default-filter-srv.bare config --local uploadpack.allowanysha1inwant 1 +' + +test_expect_success 'clone with clone..defaultObjectFilter applies filter' ' + test_when_finished "rm -r default-filter-clone" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" clone \ + "$SERVER_URL" default-filter-clone && + + echo true >expect && + git -C default-filter-clone config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:limit=1024" >expect && + git -C default-filter-clone config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone with --filter overrides clone..defaultObjectFilter' ' + test_when_finished "rm -r default-filter-override" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" \ + clone --filter=blob:none "$SERVER_URL" default-filter-override && + + echo "blob:none" >expect && + git -C default-filter-override config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone with clone..defaultObjectFilter=blob:none works' ' + test_when_finished "rm -r default-filter-blobnone" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" clone \ + "$SERVER_URL" default-filter-blobnone && + + echo true >expect && + git -C default-filter-blobnone config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:none" >expect && + git -C default-filter-blobnone config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone..defaultObjectFilter with tree:0 works' ' + test_when_finished "rm -r default-filter-tree0" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=tree:0" clone \ + "$SERVER_URL" default-filter-tree0 && + + echo true >expect && + git -C default-filter-tree0 config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "tree:0" >expect && + git -C default-filter-tree0 config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'most specific URL match wins for clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-url-specific" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git \ + -c "clone.file://.defaultObjectFilter=blob:limit=1k" \ + -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone "$SERVER_URL" default-filter-url-specific && + + echo "blob:none" >expect && + git -C default-filter-url-specific config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'non-matching URL does not apply clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-url-nomatch" && + git \ + -c "clone.https://other.example.com/.defaultObjectFilter=blob:none" \ + clone "file://$(pwd)/default-filter-srv.bare" default-filter-url-nomatch && + + test_must_fail git -C default-filter-url-nomatch config --local remote.origin.promisor +' + +test_expect_success 'bare clone.defaultObjectFilter applies to all clones' ' + test_when_finished "rm -r default-filter-bare-key" && + git -c clone.defaultObjectFilter=blob:none \ + clone "file://$(pwd)/default-filter-srv.bare" default-filter-bare-key && + + echo true >expect && + git -C default-filter-bare-key config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:none" >expect && + git -C default-filter-bare-key config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'URL-specific clone.defaultObjectFilter overrides bare form' ' + test_when_finished "rm -r default-filter-url-over-bare" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git \ + -c clone.defaultObjectFilter=blob:limit=1k \ + -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone "$SERVER_URL" default-filter-url-over-bare && + + echo "blob:none" >expect && + git -C default-filter-url-over-bare config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success '--no-filter defeats clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-no-filter" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone --no-filter "$SERVER_URL" default-filter-no-filter && + + test_must_fail git -C default-filter-no-filter config --local remote.origin.promisor +' . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 386fe44951c3d0f8eaee98809aae5f2d886bac83 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:14 +0900 Subject: [PATCH 013/185] parseopt: extract subcommand handling from parse_options_step() Move the subcommand branch out of parse_options_step() into a new handle_subcommand() helper. Also, make parse_subcommand() return a simple success/failure status. This removes the switch over impossible parse_opt_result values and makes the non-option path easier to follow and maintain. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- parse-options.c | 87 ++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/parse-options.c b/parse-options.c index c9cafc21b90355..02a4f00919f6d6 100644 --- a/parse-options.c +++ b/parse-options.c @@ -605,17 +605,44 @@ static enum parse_opt_result parse_nodash_opt(struct parse_opt_ctx_t *p, return PARSE_OPT_ERROR; } -static enum parse_opt_result parse_subcommand(const char *arg, - const struct option *options) +static int parse_subcommand(const char *arg, const struct option *options) { - for (; options->type != OPTION_END; options++) - if (options->type == OPTION_SUBCOMMAND && - !strcmp(options->long_name, arg)) { - *(parse_opt_subcommand_fn **)options->value = options->subcommand_fn; - return PARSE_OPT_SUBCOMMAND; - } + for (; options->type != OPTION_END; options++) { + parse_opt_subcommand_fn **opt_val; - return PARSE_OPT_UNKNOWN; + if (options->type != OPTION_SUBCOMMAND || + strcmp(options->long_name, arg)) + continue; + + opt_val = options->value; + *opt_val = options->subcommand_fn; + return 0; + } + + return -1; +} + +static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, + const char *arg, + const struct option *options, + const char * const usagestr[]) +{ + int err = parse_subcommand(arg, options); + + if (!err) + return PARSE_OPT_SUBCOMMAND; + + /* + * arg is neither a short or long option nor a subcommand. Since this + * command has a default operation mode, we have to treat this arg and + * all remaining args as args meant to that default operation mode. + * So we are done parsing. + */ + if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) + return PARSE_OPT_DONE; + + error(_("unknown subcommand: `%s'"), arg); + usage_with_options(usagestr, options); } static void check_typos(const char *arg, const struct option *options) @@ -990,38 +1017,16 @@ enum parse_opt_result parse_options_step(struct parse_opt_ctx_t *ctx, if (*arg != '-' || !arg[1]) { if (parse_nodash_opt(ctx, arg, options) == 0) continue; - if (!ctx->has_subcommands) { - if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) - return PARSE_OPT_NON_OPTION; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - continue; - } - switch (parse_subcommand(arg, options)) { - case PARSE_OPT_SUBCOMMAND: - return PARSE_OPT_SUBCOMMAND; - case PARSE_OPT_UNKNOWN: - if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) - /* - * arg is neither a short or long - * option nor a subcommand. Since - * this command has a default - * operation mode, we have to treat - * this arg and all remaining args - * as args meant to that default - * operation mode. - * So we are done parsing. - */ - return PARSE_OPT_DONE; - error(_("unknown subcommand: `%s'"), arg); - usage_with_options(usagestr, options); - case PARSE_OPT_COMPLETE: - case PARSE_OPT_HELP: - case PARSE_OPT_ERROR: - case PARSE_OPT_DONE: - case PARSE_OPT_NON_OPTION: - /* Impossible. */ - BUG("parse_subcommand() cannot return these"); - } + + if (ctx->has_subcommands) + return handle_subcommand(ctx, arg, options, + usagestr); + + if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) + return PARSE_OPT_NON_OPTION; + + ctx->out[ctx->cpidx++] = ctx->argv[0]; + continue; } /* lone -h asks for help */ From e0245a1169b2acddd94be4da02c426419507f0b5 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:15 +0900 Subject: [PATCH 014/185] help: make autocorrect handling reusable Move config parsing and prompt/delay handling into autocorrect.c and expose them in autocorrect.h. This makes autocorrect reusable regardless of which target links against it. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- Makefile | 1 + autocorrect.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ autocorrect.h | 16 ++++++++++++ help.c | 64 +++------------------------------------------ meson.build | 1 + 5 files changed, 94 insertions(+), 60 deletions(-) create mode 100644 autocorrect.c create mode 100644 autocorrect.h diff --git a/Makefile b/Makefile index f3264d0a37cc50..6111631c2caaea 100644 --- a/Makefile +++ b/Makefile @@ -1098,6 +1098,7 @@ LIB_OBJS += archive-tar.o LIB_OBJS += archive-zip.o LIB_OBJS += archive.o LIB_OBJS += attr.o +LIB_OBJS += autocorrect.o LIB_OBJS += base85.o LIB_OBJS += bisect.o LIB_OBJS += blame.o diff --git a/autocorrect.c b/autocorrect.c new file mode 100644 index 00000000000000..97145d3a53ce07 --- /dev/null +++ b/autocorrect.c @@ -0,0 +1,72 @@ +#include "git-compat-util.h" +#include "autocorrect.h" +#include "config.h" +#include "parse.h" +#include "strbuf.h" +#include "prompt.h" +#include "gettext.h" + +static int parse_autocorrect(const char *value) +{ + switch (git_parse_maybe_bool_text(value)) { + case 1: + return AUTOCORRECT_IMMEDIATELY; + case 0: + return AUTOCORRECT_SHOW; + default: /* other random text */ + break; + } + + if (!strcmp(value, "prompt")) + return AUTOCORRECT_PROMPT; + if (!strcmp(value, "never")) + return AUTOCORRECT_NEVER; + if (!strcmp(value, "immediate")) + return AUTOCORRECT_IMMEDIATELY; + if (!strcmp(value, "show")) + return AUTOCORRECT_SHOW; + + return 0; +} + +void autocorrect_resolve_config(const char *var, const char *value, + const struct config_context *ctx, void *data) +{ + int *out = data; + + if (!strcmp(var, "help.autocorrect")) { + int v = parse_autocorrect(value); + + if (!v) { + v = git_config_int(var, value, ctx->kvi); + if (v < 0 || v == 1) + v = AUTOCORRECT_IMMEDIATELY; + } + + *out = v; + } +} + +void autocorrect_confirm(int autocorrect, const char *assumed) +{ + if (autocorrect == AUTOCORRECT_IMMEDIATELY) { + fprintf_ln(stderr, + _("Continuing under the assumption that you meant '%s'."), + assumed); + } else if (autocorrect == AUTOCORRECT_PROMPT) { + char *answer; + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, _("Run '%s' instead [y/N]? "), assumed); + answer = git_prompt(msg.buf, PROMPT_ECHO); + strbuf_release(&msg); + + if (!(starts_with(answer, "y") || starts_with(answer, "Y"))) + exit(1); + } else { + fprintf_ln(stderr, + _("Continuing in %0.1f seconds, assuming that you meant '%s'."), + (float)autocorrect / 10.0, assumed); + sleep_millisec(autocorrect * 100); + } +} diff --git a/autocorrect.h b/autocorrect.h new file mode 100644 index 00000000000000..f5fadf9d96059b --- /dev/null +++ b/autocorrect.h @@ -0,0 +1,16 @@ +#ifndef AUTOCORRECT_H +#define AUTOCORRECT_H + +#define AUTOCORRECT_SHOW (-4) +#define AUTOCORRECT_PROMPT (-3) +#define AUTOCORRECT_NEVER (-2) +#define AUTOCORRECT_IMMEDIATELY (-1) + +struct config_context; + +void autocorrect_resolve_config(const char *var, const char *value, + const struct config_context *ctx, void *data); + +void autocorrect_confirm(int autocorrect, const char *assumed); + +#endif /* AUTOCORRECT_H */ diff --git a/help.c b/help.c index 95f576c5c81d9f..4acb6ca585ff8f 100644 --- a/help.c +++ b/help.c @@ -22,6 +22,7 @@ #include "repository.h" #include "alias.h" #include "utf8.h" +#include "autocorrect.h" #ifndef NO_CURL #include "git-curl-compat.h" /* For LIBCURL_VERSION only */ @@ -541,34 +542,6 @@ struct help_unknown_cmd_config { struct cmdnames aliases; }; -#define AUTOCORRECT_SHOW (-4) -#define AUTOCORRECT_PROMPT (-3) -#define AUTOCORRECT_NEVER (-2) -#define AUTOCORRECT_IMMEDIATELY (-1) - -static int parse_autocorrect(const char *value) -{ - switch (git_parse_maybe_bool_text(value)) { - case 1: - return AUTOCORRECT_IMMEDIATELY; - case 0: - return AUTOCORRECT_SHOW; - default: /* other random text */ - break; - } - - if (!strcmp(value, "prompt")) - return AUTOCORRECT_PROMPT; - if (!strcmp(value, "never")) - return AUTOCORRECT_NEVER; - if (!strcmp(value, "immediate")) - return AUTOCORRECT_IMMEDIATELY; - if (!strcmp(value, "show")) - return AUTOCORRECT_SHOW; - - return 0; -} - static int git_unknown_cmd_config(const char *var, const char *value, const struct config_context *ctx, void *cb) @@ -577,17 +550,7 @@ static int git_unknown_cmd_config(const char *var, const char *value, const char *subsection, *key; size_t subsection_len; - if (!strcmp(var, "help.autocorrect")) { - int v = parse_autocorrect(value); - - if (!v) { - v = git_config_int(var, value, ctx->kvi); - if (v < 0 || v == 1) - v = AUTOCORRECT_IMMEDIATELY; - } - - cfg->autocorrect = v; - } + autocorrect_resolve_config(var, value, ctx, &cfg->autocorrect); /* Also use aliases for command lookup */ if (!parse_config_key(var, "alias", &subsection, &subsection_len, @@ -724,27 +687,8 @@ char *help_unknown_cmd(const char *cmd) _("WARNING: You called a Git command named '%s', " "which does not exist."), cmd); - if (cfg.autocorrect == AUTOCORRECT_IMMEDIATELY) - fprintf_ln(stderr, - _("Continuing under the assumption that " - "you meant '%s'."), - assumed); - else if (cfg.autocorrect == AUTOCORRECT_PROMPT) { - char *answer; - struct strbuf msg = STRBUF_INIT; - strbuf_addf(&msg, _("Run '%s' instead [y/N]? "), assumed); - answer = git_prompt(msg.buf, PROMPT_ECHO); - strbuf_release(&msg); - if (!(starts_with(answer, "y") || - starts_with(answer, "Y"))) - exit(1); - } else { - fprintf_ln(stderr, - _("Continuing in %0.1f seconds, " - "assuming that you meant '%s'."), - (float)cfg.autocorrect/10.0, assumed); - sleep_millisec(cfg.autocorrect * 100); - } + + autocorrect_confirm(cfg.autocorrect, assumed); cmdnames_release(&cfg.aliases); cmdnames_release(&main_cmds); diff --git a/meson.build b/meson.build index 4b536e012481ca..0429e80a5c966c 100644 --- a/meson.build +++ b/meson.build @@ -283,6 +283,7 @@ libgit_sources = [ 'archive-zip.c', 'archive.c', 'attr.c', + 'autocorrect.c', 'base85.c', 'bisect.c', 'blame.c', From 916b96c0ec006216fcb9475fea37c9bc8e6b6505 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:16 +0900 Subject: [PATCH 015/185] help: move tty check for autocorrection to autocorrect.c TTY checking is the autocorrect config parser's responsibility. It must ensure the parsed value is correct and reliable. Thus, move the check to autocorrect_resolve_config(). Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 24 ++++++++++++++++-------- help.c | 6 ------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 97145d3a53ce07..887d2396da44b9 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -33,18 +33,26 @@ void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data) { int *out = data; + int parsed; - if (!strcmp(var, "help.autocorrect")) { - int v = parse_autocorrect(value); + if (strcmp(var, "help.autocorrect")) + return; - if (!v) { - v = git_config_int(var, value, ctx->kvi); - if (v < 0 || v == 1) - v = AUTOCORRECT_IMMEDIATELY; - } + parsed = parse_autocorrect(value); - *out = v; + /* + * Disable autocorrection prompt in a non-interactive session + */ + if (parsed == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) + parsed = AUTOCORRECT_NEVER; + + if (!parsed) { + parsed = git_config_int(var, value, ctx->kvi); + if (parsed < 0 || parsed == 1) + parsed = AUTOCORRECT_IMMEDIATELY; } + + *out = parsed; } void autocorrect_confirm(int autocorrect, const char *assumed) diff --git a/help.c b/help.c index 4acb6ca585ff8f..983057970e7c7f 100644 --- a/help.c +++ b/help.c @@ -607,12 +607,6 @@ char *help_unknown_cmd(const char *cmd) read_early_config(the_repository, git_unknown_cmd_config, &cfg); - /* - * Disable autocorrection prompt in a non-interactive session - */ - if ((cfg.autocorrect == AUTOCORRECT_PROMPT) && (!isatty(0) || !isatty(2))) - cfg.autocorrect = AUTOCORRECT_NEVER; - if (cfg.autocorrect == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); From a6e0ccbd38e4b274fa2360bc8a49d8049d1ded95 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:17 +0900 Subject: [PATCH 016/185] autocorrect: use mode and delay instead of magic numbers Drop magic numbers and describe autocorrect config with a mode enum and an integer delay. This reduces errors when mutating config values and makes the values easier to access. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 46 +++++++++++++++++++++++----------------------- autocorrect.h | 20 ++++++++++++++------ help.c | 9 +++++---- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 887d2396da44b9..2484546fc731d9 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -6,7 +6,7 @@ #include "prompt.h" #include "gettext.h" -static int parse_autocorrect(const char *value) +static enum autocorrect_mode parse_autocorrect(const char *value) { switch (git_parse_maybe_bool_text(value)) { case 1: @@ -19,49 +19,49 @@ static int parse_autocorrect(const char *value) if (!strcmp(value, "prompt")) return AUTOCORRECT_PROMPT; - if (!strcmp(value, "never")) + else if (!strcmp(value, "never")) return AUTOCORRECT_NEVER; - if (!strcmp(value, "immediate")) + else if (!strcmp(value, "immediate")) return AUTOCORRECT_IMMEDIATELY; - if (!strcmp(value, "show")) + else if (!strcmp(value, "show")) return AUTOCORRECT_SHOW; - - return 0; + else + return AUTOCORRECT_DELAY; } void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data) { - int *out = data; - int parsed; + struct autocorrect *conf = data; if (strcmp(var, "help.autocorrect")) return; - parsed = parse_autocorrect(value); + conf->mode = parse_autocorrect(value); /* * Disable autocorrection prompt in a non-interactive session */ - if (parsed == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) - parsed = AUTOCORRECT_NEVER; + if (conf->mode == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) + conf->mode = AUTOCORRECT_NEVER; - if (!parsed) { - parsed = git_config_int(var, value, ctx->kvi); - if (parsed < 0 || parsed == 1) - parsed = AUTOCORRECT_IMMEDIATELY; - } + if (conf->mode == AUTOCORRECT_DELAY) { + conf->delay = git_config_int(var, value, ctx->kvi); - *out = parsed; + if (!conf->delay) + conf->mode = AUTOCORRECT_SHOW; + else if (conf->delay < 0 || conf->delay == 1) + conf->mode = AUTOCORRECT_IMMEDIATELY; + } } -void autocorrect_confirm(int autocorrect, const char *assumed) +void autocorrect_confirm(struct autocorrect *conf, const char *assumed) { - if (autocorrect == AUTOCORRECT_IMMEDIATELY) { + if (conf->mode == AUTOCORRECT_IMMEDIATELY) { fprintf_ln(stderr, _("Continuing under the assumption that you meant '%s'."), assumed); - } else if (autocorrect == AUTOCORRECT_PROMPT) { + } else if (conf->mode == AUTOCORRECT_PROMPT) { char *answer; struct strbuf msg = STRBUF_INIT; @@ -71,10 +71,10 @@ void autocorrect_confirm(int autocorrect, const char *assumed) if (!(starts_with(answer, "y") || starts_with(answer, "Y"))) exit(1); - } else { + } else if (conf->mode == AUTOCORRECT_DELAY) { fprintf_ln(stderr, _("Continuing in %0.1f seconds, assuming that you meant '%s'."), - (float)autocorrect / 10.0, assumed); - sleep_millisec(autocorrect * 100); + conf->delay / 10.0, assumed); + sleep_millisec(conf->delay * 100); } } diff --git a/autocorrect.h b/autocorrect.h index f5fadf9d96059b..5506a36f11a7cc 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -1,16 +1,24 @@ #ifndef AUTOCORRECT_H #define AUTOCORRECT_H -#define AUTOCORRECT_SHOW (-4) -#define AUTOCORRECT_PROMPT (-3) -#define AUTOCORRECT_NEVER (-2) -#define AUTOCORRECT_IMMEDIATELY (-1) - struct config_context; +enum autocorrect_mode { + AUTOCORRECT_SHOW, + AUTOCORRECT_NEVER, + AUTOCORRECT_PROMPT, + AUTOCORRECT_IMMEDIATELY, + AUTOCORRECT_DELAY, +}; + +struct autocorrect { + enum autocorrect_mode mode; + int delay; +}; + void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data); -void autocorrect_confirm(int autocorrect, const char *assumed); +void autocorrect_confirm(struct autocorrect *conf, const char *assumed); #endif /* AUTOCORRECT_H */ diff --git a/help.c b/help.c index 983057970e7c7f..a89ac5aced9994 100644 --- a/help.c +++ b/help.c @@ -538,7 +538,7 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) } struct help_unknown_cmd_config { - int autocorrect; + struct autocorrect autocorrect; struct cmdnames aliases; }; @@ -607,7 +607,7 @@ char *help_unknown_cmd(const char *cmd) read_early_config(the_repository, git_unknown_cmd_config, &cfg); - if (cfg.autocorrect == AUTOCORRECT_NEVER) { + if (cfg.autocorrect.mode == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); } @@ -673,7 +673,8 @@ char *help_unknown_cmd(const char *cmd) n++) ; /* still counting */ } - if (cfg.autocorrect && cfg.autocorrect != AUTOCORRECT_SHOW && n == 1 && + + if (cfg.autocorrect.mode != AUTOCORRECT_SHOW && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); @@ -682,7 +683,7 @@ char *help_unknown_cmd(const char *cmd) "which does not exist."), cmd); - autocorrect_confirm(cfg.autocorrect, assumed); + autocorrect_confirm(&cfg.autocorrect, assumed); cmdnames_release(&cfg.aliases); cmdnames_release(&main_cmds); From f06f1f043cbf58b82f1243fd09fc2c8b0202a853 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:18 +0900 Subject: [PATCH 017/185] autocorrect: rename AUTOCORRECT_SHOW to AUTOCORRECT_HINT AUTOCORRECT_SHOW is ambiguous. Its purpose is to show commands similar to the unknown one and take no other action. Rename it to fit the semantics. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 6 +++--- autocorrect.h | 2 +- help.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 2484546fc731d9..de0fa282c934a8 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -12,7 +12,7 @@ static enum autocorrect_mode parse_autocorrect(const char *value) case 1: return AUTOCORRECT_IMMEDIATELY; case 0: - return AUTOCORRECT_SHOW; + return AUTOCORRECT_HINT; default: /* other random text */ break; } @@ -24,7 +24,7 @@ static enum autocorrect_mode parse_autocorrect(const char *value) else if (!strcmp(value, "immediate")) return AUTOCORRECT_IMMEDIATELY; else if (!strcmp(value, "show")) - return AUTOCORRECT_SHOW; + return AUTOCORRECT_HINT; else return AUTOCORRECT_DELAY; } @@ -49,7 +49,7 @@ void autocorrect_resolve_config(const char *var, const char *value, conf->delay = git_config_int(var, value, ctx->kvi); if (!conf->delay) - conf->mode = AUTOCORRECT_SHOW; + conf->mode = AUTOCORRECT_HINT; else if (conf->delay < 0 || conf->delay == 1) conf->mode = AUTOCORRECT_IMMEDIATELY; } diff --git a/autocorrect.h b/autocorrect.h index 5506a36f11a7cc..328807242c15ab 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -4,7 +4,7 @@ struct config_context; enum autocorrect_mode { - AUTOCORRECT_SHOW, + AUTOCORRECT_HINT, AUTOCORRECT_NEVER, AUTOCORRECT_PROMPT, AUTOCORRECT_IMMEDIATELY, diff --git a/help.c b/help.c index a89ac5aced9994..2d441ded3f1489 100644 --- a/help.c +++ b/help.c @@ -674,7 +674,7 @@ char *help_unknown_cmd(const char *cmd) ; /* still counting */ } - if (cfg.autocorrect.mode != AUTOCORRECT_SHOW && n == 1 && + if (cfg.autocorrect.mode != AUTOCORRECT_HINT && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); From 7cd07f167d2980ad58de08a8bd7787d2ef5882b9 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:19 +0900 Subject: [PATCH 018/185] autocorrect: provide config resolution API Add autocorrect_resolve(). This resolves and populates the correct values for autocorrect config. Make autocorrect config callback internal. The API is meant to provide a high-level way to retrieve the config. Allowing access to the config callback from outside violates that intent. Additionally, in some cases, without access to the config callback, two config iterations cannot be merged into one, which can hurt performance. This is fine, as the code path that calls autocorrect_resolve() is cold. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 15 ++++++++++++--- autocorrect.h | 5 +---- help.c | 40 +++++++++++++++++----------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index de0fa282c934a8..b2ee9f51e8c09a 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "git-compat-util.h" #include "autocorrect.h" #include "config.h" @@ -29,13 +31,13 @@ static enum autocorrect_mode parse_autocorrect(const char *value) return AUTOCORRECT_DELAY; } -void autocorrect_resolve_config(const char *var, const char *value, - const struct config_context *ctx, void *data) +static int resolve_autocorrect(const char *var, const char *value, + const struct config_context *ctx, void *data) { struct autocorrect *conf = data; if (strcmp(var, "help.autocorrect")) - return; + return 0; conf->mode = parse_autocorrect(value); @@ -53,6 +55,13 @@ void autocorrect_resolve_config(const char *var, const char *value, else if (conf->delay < 0 || conf->delay == 1) conf->mode = AUTOCORRECT_IMMEDIATELY; } + + return 0; +} + +void autocorrect_resolve(struct autocorrect *conf) +{ + read_early_config(the_repository, resolve_autocorrect, conf); } void autocorrect_confirm(struct autocorrect *conf, const char *assumed) diff --git a/autocorrect.h b/autocorrect.h index 328807242c15ab..0d3e819262edee 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -1,8 +1,6 @@ #ifndef AUTOCORRECT_H #define AUTOCORRECT_H -struct config_context; - enum autocorrect_mode { AUTOCORRECT_HINT, AUTOCORRECT_NEVER, @@ -16,8 +14,7 @@ struct autocorrect { int delay; }; -void autocorrect_resolve_config(const char *var, const char *value, - const struct config_context *ctx, void *data); +void autocorrect_resolve(struct autocorrect *conf); void autocorrect_confirm(struct autocorrect *conf, const char *assumed); diff --git a/help.c b/help.c index 2d441ded3f1489..81efdb13d4a375 100644 --- a/help.c +++ b/help.c @@ -537,32 +537,23 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) return 0; } -struct help_unknown_cmd_config { - struct autocorrect autocorrect; - struct cmdnames aliases; -}; - -static int git_unknown_cmd_config(const char *var, const char *value, - const struct config_context *ctx, - void *cb) +static int resolve_aliases(const char *var, const char *value UNUSED, + const struct config_context *ctx UNUSED, void *data) { - struct help_unknown_cmd_config *cfg = cb; + struct cmdnames *aliases = data; const char *subsection, *key; size_t subsection_len; - autocorrect_resolve_config(var, value, ctx, &cfg->autocorrect); - - /* Also use aliases for command lookup */ if (!parse_config_key(var, "alias", &subsection, &subsection_len, &key)) { if (subsection) { /* [alias "name"] command = value */ if (!strcmp(key, "command")) - add_cmdname(&cfg->aliases, subsection, + add_cmdname(aliases, subsection, subsection_len); } else { /* alias.name = value */ - add_cmdname(&cfg->aliases, key, strlen(key)); + add_cmdname(aliases, key, strlen(key)); } } @@ -599,22 +590,26 @@ static const char bad_interpreter_advice[] = char *help_unknown_cmd(const char *cmd) { - struct help_unknown_cmd_config cfg = { 0 }; + struct cmdnames aliases = { 0 }; + struct autocorrect autocorrect = { 0 }; int i, n, best_similarity = 0; struct cmdnames main_cmds = { 0 }; struct cmdnames other_cmds = { 0 }; struct cmdname_help *common_cmds; - read_early_config(the_repository, git_unknown_cmd_config, &cfg); + autocorrect_resolve(&autocorrect); - if (cfg.autocorrect.mode == AUTOCORRECT_NEVER) { + if (autocorrect.mode == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); } load_command_list("git-", &main_cmds, &other_cmds); - add_cmd_list(&main_cmds, &cfg.aliases); + /* Also use aliases for command lookup */ + read_early_config(the_repository, resolve_aliases, &aliases); + + add_cmd_list(&main_cmds, &aliases); add_cmd_list(&main_cmds, &other_cmds); QSORT(main_cmds.names, main_cmds.cnt, cmdname_compare); uniq(&main_cmds); @@ -674,18 +669,17 @@ char *help_unknown_cmd(const char *cmd) ; /* still counting */ } - if (cfg.autocorrect.mode != AUTOCORRECT_HINT && n == 1 && + if (autocorrect.mode != AUTOCORRECT_HINT && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); fprintf_ln(stderr, - _("WARNING: You called a Git command named '%s', " - "which does not exist."), + _("WARNING: You called a Git command named '%s', which does not exist."), cmd); - autocorrect_confirm(&cfg.autocorrect, assumed); + autocorrect_confirm(&autocorrect, assumed); - cmdnames_release(&cfg.aliases); + cmdnames_release(&aliases); cmdnames_release(&main_cmds); cmdnames_release(&other_cmds); return assumed; From be9df6de6e2cfd09ffc327e9d4edd451248296f9 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:20 +0900 Subject: [PATCH 019/185] parseopt: autocorrect mistyped subcommands Try to autocorrect the mistyped mandatory subcommand before showing an error and exiting. Subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL are skipped. Use standard Damerau-Levenshtein distance (weights 1, 1, 1, 1) to establish a predictable, mathematically sound baseline. Scale the allowed edit distance based on input length to prevent false positives on short commands, following common practice for fuzziness thresholds (e.g., Elasticsearch's AUTO fuzziness): - Length 0-2: 0 edits allowed - Length 3-5: 1 edit allowed - Length 6+: 2 edits allowed Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- parse-options.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/parse-options.c b/parse-options.c index 02a4f00919f6d6..1f1b72762790c0 100644 --- a/parse-options.c +++ b/parse-options.c @@ -6,6 +6,8 @@ #include "strbuf.h" #include "string-list.h" #include "utf8.h" +#include "autocorrect.h" +#include "levenshtein.h" static int disallow_abbreviated_options; @@ -622,13 +624,77 @@ static int parse_subcommand(const char *arg, const struct option *options) return -1; } +static void find_subcommands(struct string_list *list, + const struct option *options) +{ + for (; options->type != OPTION_END; options++) { + if (options->type == OPTION_SUBCOMMAND) + string_list_append(list, options->long_name); + } +} + +static int similar_enough(const char *cmd, unsigned int edit) +{ + size_t len = strlen(cmd); + unsigned int allowed = len < 3 ? 0 : len < 6 ? 1 : 2; + + return edit <= allowed; +} + +static const char *autocorrect_subcommand(const char *cmd, + struct string_list *cmds) +{ + struct autocorrect autocorrect = { 0 }; + unsigned int min = UINT_MAX; + unsigned int ties = 0; + struct string_list_item *cand; + struct string_list_item *best = NULL; + + autocorrect_resolve(&autocorrect); + + /* + * Builtin subcommands are small enough that printing them all via + * usage_with_options() is sufficient. Therefore, AUTOCORRECT_HINT + * acts like AUTOCORRECT_NEVER. + */ + if (autocorrect.mode == AUTOCORRECT_HINT || + autocorrect.mode == AUTOCORRECT_NEVER) + return NULL; + + for_each_string_list_item(cand, cmds) { + unsigned int edit = levenshtein(cmd, cand->string, 1, 1, 1, 1); + + if (edit < min) { + min = edit; + best = cand; + ties = 0; + } else if (edit == min) { + ties++; + } + } + + if (!ties && similar_enough(cmd, min)) { + fprintf_ln(stderr, + _("WARNING: You called a subcommand named '%s', which does not exist."), + cmd); + + autocorrect_confirm(&autocorrect, best->string); + return best->string; + } + + return NULL; +} + static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, const char *arg, const struct option *options, const char * const usagestr[]) { - int err = parse_subcommand(arg, options); + int err; + const char *assumed; + struct string_list cmds = STRING_LIST_INIT_NODUP; + err = parse_subcommand(arg, options); if (!err) return PARSE_OPT_SUBCOMMAND; @@ -641,8 +707,17 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) return PARSE_OPT_DONE; - error(_("unknown subcommand: `%s'"), arg); - usage_with_options(usagestr, options); + find_subcommands(&cmds, options); + assumed = autocorrect_subcommand(arg, &cmds); + + if (!assumed) { + error(_("unknown subcommand: `%s'"), arg); + usage_with_options(usagestr, options); + } + + string_list_clear(&cmds, 0); + parse_subcommand(assumed, options); + return PARSE_OPT_SUBCOMMAND; } static void check_typos(const char *arg, const struct option *options) From ae8b7e1d200977165755c2e6d5a22e1df8ab6bf1 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:21 +0900 Subject: [PATCH 020/185] parseopt: enable subcommand autocorrection for git-remote and git-notes Add PARSE_OPT_SUBCOMMAND_AUTOCORR to enable autocorrection for subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL. Use it for git-remote and git-notes, so mistyped subcommands can be automatically corrected, and builtin entry points no longer need to handle the unknown subcommand error path themselves. This is safe for these two builtins, because they either resolve to a single subcommand or take no subcommand at all. This means that if the subcommand parser encounters an unknown argument, it must be a mistyped subcommand. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- builtin/notes.c | 10 +++------- builtin/remote.c | 12 ++++-------- parse-options.c | 16 +++++++++------- parse-options.h | 1 + 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/builtin/notes.c b/builtin/notes.c index 9af602bdd7b402..087eb898a4415f 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -1149,14 +1149,10 @@ int cmd_notes(int argc, repo_config(the_repository, git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, git_notes_usage, - PARSE_OPT_SUBCOMMAND_OPTIONAL); - if (!fn) { - if (argc) { - error(_("unknown subcommand: `%s'"), argv[0]); - usage_with_options(git_notes_usage, options); - } + PARSE_OPT_SUBCOMMAND_OPTIONAL | + PARSE_OPT_SUBCOMMAND_AUTOCORR); + if (!fn) fn = list; - } if (override_notes_ref) { struct strbuf sb = STRBUF_INIT; diff --git a/builtin/remote.c b/builtin/remote.c index 0fddaa177331f6..9415f6cb03e807 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -1953,15 +1953,11 @@ int cmd_remote(int argc, }; argc = parse_options(argc, argv, prefix, options, builtin_remote_usage, - PARSE_OPT_SUBCOMMAND_OPTIONAL); + PARSE_OPT_SUBCOMMAND_OPTIONAL | + PARSE_OPT_SUBCOMMAND_AUTOCORR); - if (fn) { + if (fn) return !!fn(argc, argv, prefix, repo); - } else { - if (argc) { - error(_("unknown subcommand: `%s'"), argv[0]); - usage_with_options(builtin_remote_usage, options); - } + else return !!show_all(); - } } diff --git a/parse-options.c b/parse-options.c index 1f1b72762790c0..0b84061a381153 100644 --- a/parse-options.c +++ b/parse-options.c @@ -698,14 +698,16 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, if (!err) return PARSE_OPT_SUBCOMMAND; - /* - * arg is neither a short or long option nor a subcommand. Since this - * command has a default operation mode, we have to treat this arg and - * all remaining args as args meant to that default operation mode. - * So we are done parsing. - */ - if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) + if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL && + !(ctx->flags & PARSE_OPT_SUBCOMMAND_AUTOCORR)) { + /* + * arg is neither a short or long option nor a subcommand. + * Since this command has a default operation mode, we have to + * treat this arg and all remaining args as args meant to that + * default operation mode. So we are done parsing. + */ return PARSE_OPT_DONE; + } find_subcommands(&cmds, options); assumed = autocorrect_subcommand(arg, &cmds); diff --git a/parse-options.h b/parse-options.h index 706de9729f6b3f..f29ac337893c92 100644 --- a/parse-options.h +++ b/parse-options.h @@ -40,6 +40,7 @@ enum parse_opt_flags { PARSE_OPT_ONE_SHOT = 1 << 5, PARSE_OPT_SHELL_EVAL = 1 << 6, PARSE_OPT_SUBCOMMAND_OPTIONAL = 1 << 7, + PARSE_OPT_SUBCOMMAND_AUTOCORR = 1 << 8, }; enum parse_opt_option_flags { From 273faabea8db47247564df092adf5a675ddfa284 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:22 +0900 Subject: [PATCH 021/185] parseopt: add tests for subcommand autocorrection These tests cover default behavior (help.autocorrect is unset), no correction, immediate correction, delayed correction, and rejection when the typo is too dissimilar. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t9004-autocorrect-subcommand.sh | 51 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100755 t/t9004-autocorrect-subcommand.sh diff --git a/t/meson.build b/t/meson.build index f66a73f8a07d93..bf0503d705a9a3 100644 --- a/t/meson.build +++ b/t/meson.build @@ -973,6 +973,7 @@ integration_tests = [ 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', + 't9004-autocorrect-subcommand.sh', 't9100-git-svn-basic.sh', 't9101-git-svn-props.sh', 't9102-git-svn-deep-rmdir.sh', diff --git a/t/t9004-autocorrect-subcommand.sh b/t/t9004-autocorrect-subcommand.sh new file mode 100755 index 00000000000000..d10031659b940b --- /dev/null +++ b/t/t9004-autocorrect-subcommand.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +test_description='subcommand auto-correction test + +Test autocorrection for subcommands with different +help.autocorrect mode.' + +. ./test-lib.sh + +test_expect_success 'setup' " + echo '^error: unknown subcommand: ' >grep_unknown +" + +test_expect_success 'default is not to autocorrect' ' + test_must_fail git worktree lsit 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first +' + +for mode in false no off 0 show never +do + test_expect_success "'$mode' disables autocorrection" " + test_config help.autocorrect $mode && + + test_must_fail git worktree lsit 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first + " +done + +for mode in -39 immediate 1 +do + test_expect_success "autocorrect immediately with '$mode'" - <<-EOT + test_config help.autocorrect $mode && + + git worktree lsit 2>actual && + test_grep "you meant 'list'\.$" actual + EOT +done + +test_expect_success 'delay path is executed' - <<-\EOT + test_config help.autocorrect 2 && + + git worktree lsit 2>actual && + test_grep '^Continuing in 0.2 seconds, ' actual +EOT + +test_expect_success 'deny if too dissimilar' - <<-\EOT + test_must_fail git remote rensnr 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first +EOT + +test_done From 916b45080534e9603094dda39c4599a2f2a466d1 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:23 +0900 Subject: [PATCH 022/185] doc: document autocorrect API Explain behaviors for autocorrect_resolve(), autocorrect_confirm(), and struct autocorrect. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/autocorrect.h b/autocorrect.h index 0d3e819262edee..bfa3ba20a4fb73 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -9,13 +9,24 @@ enum autocorrect_mode { AUTOCORRECT_DELAY, }; +/** + * `mode` indicates which action will be performed by autocorrect_confirm(). + * `delay` is the timeout before autocorrect_confirm() returns, in tenths of a + * second. Use it only with AUTOCORRECT_DELAY. + */ struct autocorrect { enum autocorrect_mode mode; int delay; }; +/** + * Resolve the autocorrect configuration into `conf`. + */ void autocorrect_resolve(struct autocorrect *conf); +/** + * Interact with the user in different ways depending on `conf->mode`. + */ void autocorrect_confirm(struct autocorrect *conf, const char *assumed); #endif /* AUTOCORRECT_H */ From 331e1b1e9c399e0fa4b6d46b183620e1d12b0a51 Mon Sep 17 00:00:00 2001 From: Harald Nordgren Date: Tue, 17 Mar 2026 09:35:36 +0000 Subject: [PATCH 023/185] stash: add --ours-label, --theirs-label, --base-label for apply Allow callers of "git stash apply" to pass custom labels for conflict markers instead of the default "Updated upstream" and "Stashed changes". Document the new options and add a test. Signed-off-by: Harald Nordgren Signed-off-by: Junio C Hamano --- Documentation/git-stash.adoc | 11 ++++++++++- builtin/stash.c | 2 +- t/t3903-stash.sh | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Documentation/git-stash.adoc b/Documentation/git-stash.adoc index 235d57ddd8f5d1..43650a3c1a092f 100644 --- a/Documentation/git-stash.adoc +++ b/Documentation/git-stash.adoc @@ -12,7 +12,7 @@ git stash list [] git stash show [-u | --include-untracked | --only-untracked] [] [] git stash drop [-q | --quiet] [] git stash pop [--index] [-q | --quiet] [] -git stash apply [--index] [-q | --quiet] [] +git stash apply [--index] [-q | --quiet] [--ours-label=