python · methane · Apr 18, 2025 · Apr 29, 2023 · Apr 29, 2023 · Apr 29, 2023
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
@@ -901,6 +901,75 @@ def res2int(res):
         )
         self.assertEqual(res2int(res), (6000, 6000))
 
+    def test_cmd_dedent(self):
+        # test that -c auto-dedents its arguments
+        from textwrap import dedent
+        test_cases = [
+            {
+                'code': '''
+                    print('space-auto-dedent')
+                ''',
+                'expected': b'space-auto-dedent',
+            },
+            {
+                'code': dedent('''
+                ^^^print('tab-auto-dedent')
+                ''').replace('^', '\t'),
+                'expected': b'tab-auto-dedent',
+            },
+            {
+                'code': dedent('''
+                ^^if 1:
+                ^^^^print('mixed-auto-dedent-1')
+                ^^print('mixed-auto-dedent-2')
+                ''').replace('^', '\t \t'),
+                'expected': b'mixed-auto-dedent-1\nmixed-auto-dedent-2',
+            },
+            {
+                'code': '''
+                    data = """$
+
+                    this data has an empty newline above and a newline with spaces below $
+                                            $
+                    """$
+                    if 1:         $
+                        print(repr(data))$
+                '''.replace('$', ''),
+                # Note: entirely blank lines are normalized to \n, even if they
+                # are part of a data string. This is consistent with
+                # textwrap.dedent behavior, but might not be intuitive.
+                'expected': b"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
+            },
+        ]
+        for case in test_cases:
+            # Run the auto-dedent case
+            args1 = sys.executable, '-c', case['code']
+            proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
+            self.assertEqual(proc1.returncode, 0, proc1)
+            output1 = proc1.stdout.strip()
+
+            # Manually dedent beforehand, check the result is the same.
+            args2 = sys.executable, '-c', dedent(case['code'])
+            proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
+            self.assertEqual(proc2.returncode, 0, proc2)
+            output2 = proc2.stdout.strip()
+
+            self.assertEqual(output1, output2)
+            self.assertEqual(output1.replace(b'\r\n', b'\n'), case['expected'])
+
+    def test_cmd_dedent_failcase(self):
+        # Mixing tabs and spaces is not allowed
+        from textwrap import dedent
+        template = dedent(
+            '''
+            -+if 1:
+            +-++ print('will fail')
+            ''')
+        code = template.replace('-', ' ').replace('+', '\t')
+        assert_python_failure('-c', code)
+        code = template.replace('-', '\t').replace('+', ' ')
+        assert_python_failure('-c', code)
+
 
 @unittest.skipIf(interpreter_requires_environment(),
                  'Cannot run -I tests when PYTHON env vars are required.')

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
@@ -0,0 +1 @@
+Strings passed to  "-c" are now automatically dedented (common leading whitespace is removed). This allows "python -c" invocations to be indented in shell scripts without causing indentation errors.
@@ -228,6 +228,150 @@ pymain_import_readline(const PyConfig *config)
     }
 }
 
+/* Strip common leading whitespace, just as textwrap.dedent.
+   It steals 1 reference from bytes if succeeded, else it will return NULL. */
+static PyObject *dedent_utf8_bytes(PyObject *bytes) {
+    if (bytes == NULL || !PyBytes_CheckExact(bytes)) {
+        return NULL;
+    }
+
+    char *start;
+    Py_ssize_t nchars;
+
+    if (PyBytes_AsStringAndSize(bytes, &start, &nchars) != 0) {
+        return NULL;
+    }
+
+    char *end = start + nchars;
+    assert(start < end);
+
+    char *candidate_start = NULL;
+    Py_ssize_t candidate_len = 0;
+
+    for (char *iter = start; iter < end; ++iter) {
+        char *line_start = iter;
+        char *leading_whitespace_end = NULL;
+
+        // scan the whole line
+        while (iter < end && *iter != '\n') {
+            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
+                if (iter == line_start) {
+                    // some line has no indent, fast exit!
+                    return bytes;
+                }
+                leading_whitespace_end = iter;
+            }
+            ++iter;
+        }
+
+        // if this line has all white space, skip it
+        if (!leading_whitespace_end) {
+            continue;
+        }
+
+        if (!candidate_start) {
+            candidate_start = line_start;
+            candidate_len = leading_whitespace_end - line_start;
+            assert(candidate_len > 0);
+        } else {
+            /* We then compare with the current longest leading whitespace.
+
+               [line_start, leading_whitespace_end) is the leading whitespace of
+               this line,
+
+               [candidate_start, candidate_start + candidate_len)
+               is the leading whitespace of the current longest leading
+               whitespace. */
+            Py_ssize_t new_candidate_len = 0;
+
+            for (char *candidate_iter = candidate_start,
+                      *line_iter = line_start;
+                 candidate_iter < candidate_start + candidate_len &&
+                 line_iter < leading_whitespace_end;
+                 ++candidate_iter, ++line_iter) {
+                if (*candidate_iter != *line_iter) {
+                    break;
+                }
+                ++new_candidate_len;
+            }
+
+            candidate_len = new_candidate_len;
+            if (candidate_len == 0) {
+                return bytes;
+            }
+        }
+    }
+
+    if (candidate_len == 0) {
+        return bytes;
+    }
+    assert(candidate_len > 0);
+
+    // trigger a dedent
+    char *p;
+    PyObject *new_bytes;
+    char *line_start;
+    Py_ssize_t new_line_len;
+    bool in_leading_space;
+    _PyBytesWriter writer;
+
+    _PyBytesWriter_Init(&writer);
+    p = _PyBytesWriter_Alloc(&writer, nchars);
+    if (p == NULL) {
+        goto error;
+    }
+
+    for (char *iter = start; iter < end; ++iter) {
+        line_start = iter;
+        in_leading_space = true;
+
+        // iterate over a line
+        while (iter < end && *iter != '\n') {
+            if (in_leading_space && *iter != ' ' && *iter != '\t') {
+                in_leading_space = false;
+            }
+            ++iter;
+        }
+
+        // invariant: *iter == '\n' or iter == end
+
+        // if this line has all white space, write '\n'
+        if (in_leading_space) {
+            p = _PyBytesWriter_Prepare(&writer, p, 1);
+            if (p == NULL) {
+                goto error;
+            }
+            *p++ = '\n';
+            continue;
+        }
+
+        // copy [new_line_start + candidate_len, iter) to buffer, then append
+        // '\n'
+        new_line_len = iter - line_start - candidate_len;
+        assert(new_line_len >= 0);
+        p = _PyBytesWriter_Prepare(&writer, p, new_line_len + 1);
+        if (p == NULL) {
+            goto error;
+        }
+        memcpy(p, line_start + candidate_len, new_line_len);
+
+        p += new_line_len;
+
+        // this may always append '\n' at the end of `new_bytes`
+        *p++ = '\n';
+    }
+
+    new_bytes = _PyBytesWriter_Finish(&writer, p);
+    if (new_bytes == NULL) {
+        goto error;
+    }
+    Py_DECREF(bytes);
+    return new_bytes;
+
+error:
+    _PyBytesWriter_Dealloc(&writer);
+    return NULL;
+}
 
 static int
 pymain_run_command(wchar_t *command)
@@ -250,6 +394,11 @@ pymain_run_command(wchar_t *command)
         goto error;
     }
 
+    bytes = dedent_utf8_bytes(bytes);
+    if (bytes == NULL) {
+        goto error;
+    }
+
     PyCompilerFlags cf = _PyCompilerFlags_INIT;
     cf.cf_flags |= PyCF_IGNORE_COOKIE;
     ret = PyRun_SimpleStringFlags(PyBytes_AsString(bytes), &cf);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Strings passed to "-c" are now automatically dedented (common leading whitespace is removed). This allows "python -c" invocations to be indented in shell scripts without causing indentation errors.