automark add reasons to reason-less expectedFailures and handles parent better (#6936)

youknowone · web-flow · commit 153b0da60d4d · 2026-02-01T22:58:07.000+09:00
diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs
@@ -1219,7 +1219,10 @@ pub(crate) fn label_exception_targets(blocks: &mut [Block]) {
                     preserve_lasti,
                 });
             } else if is_pop {
-                debug_assert!(!stack.is_empty(), "POP_BLOCK with empty except stack at block {bi} instruction {i}");
+                debug_assert!(
+                    !stack.is_empty(),
+                    "POP_BLOCK with empty except stack at block {bi} instruction {i}"
+                );
                 stack.pop();
                 // POP_BLOCK → NOP
                 blocks[bi].instructions[i].instr = Instruction::Nop.into();
diff --git a/scripts/update_lib/cmd_auto_mark.py b/scripts/update_lib/cmd_auto_mark.py
@@ -250,6 +250,99 @@ def path_to_test_parts(path: str) -> list[str]:
     return parts[-2:]
 
 
+def _expand_stripped_to_children(
+    contents: str,
+    stripped_tests: set[tuple[str, str]],
+    all_failing_tests: set[tuple[str, str]],
+) -> set[tuple[str, str]]:
+    """Find child-class failures that correspond to stripped parent-class markers.
+
+    When ``strip_reasonless_expected_failures`` removes a marker from a parent
+    (mixin) class, test failures are reported against the concrete subclasses,
+    not the parent itself.  This function maps those child failures back so
+    they get re-marked (and later consolidated to the parent by
+    ``_consolidate_to_parent``).
+
+    Returns the set of ``(class, method)`` pairs from *all_failing_tests* that
+    should be re-marked.
+    """
+    # Direct matches (stripped test itself is a concrete TestCase)
+    result = stripped_tests & all_failing_tests
+
+    unmatched = stripped_tests - all_failing_tests
+    if not unmatched:
+        return result
+
+    tree = ast.parse(contents)
+    class_bases, class_methods = _build_inheritance_info(tree)
+
+    for parent_cls, method_name in unmatched:
+        if method_name not in class_methods.get(parent_cls, set()):
+            continue
+        for cls in _find_all_inheritors(
+            parent_cls, method_name, class_bases, class_methods
+        ):
+            if (cls, method_name) in all_failing_tests:
+                result.add((cls, method_name))
+
+    return result
+
+
+def _consolidate_to_parent(
+    contents: str,
+    failing_tests: set[tuple[str, str]],
+    error_messages: dict[tuple[str, str], str] | None = None,
+) -> tuple[set[tuple[str, str]], dict[tuple[str, str], str] | None]:
+    """Move failures to the parent class when ALL inheritors fail.
+
+    If every concrete subclass that inherits a method from a parent class
+    appears in *failing_tests*, replace those per-subclass entries with a
+    single entry on the parent.  This avoids creating redundant super-call
+    overrides in every child.
+
+    Returns:
+        (consolidated_failing_tests, consolidated_error_messages)
+    """
+    tree = ast.parse(contents)
+    class_bases, class_methods = _build_inheritance_info(tree)
+
+    # Group by (defining_parent, method) → set of failing children
+    from collections import defaultdict
+
+    groups: dict[tuple[str, str], set[str]] = defaultdict(set)
+    for class_name, method_name in failing_tests:
+        defining = _find_method_definition(
+            class_name, method_name, class_bases, class_methods
+        )
+        if defining and defining != class_name:
+            groups[(defining, method_name)].add(class_name)
+
+    if not groups:
+        return failing_tests, error_messages
+
+    result = set(failing_tests)
+    new_error_messages = dict(error_messages) if error_messages else {}
+
+    for (parent, method_name), failing_children in groups.items():
+        all_inheritors = _find_all_inheritors(
+            parent, method_name, class_bases, class_methods
+        )
+
+        if all_inheritors and failing_children >= all_inheritors:
+            # All inheritors fail → mark on parent instead
+            children_keys = {(child, method_name) for child in failing_children}
+            result -= children_keys
+            result.add((parent, method_name))
+            # Pick any child's error message for the parent
+            if new_error_messages:
+                for child in failing_children:
+                    msg = new_error_messages.pop((child, method_name), "")
+                    if msg:
+                        new_error_messages[(parent, method_name)] = msg
+
+    return result, new_error_messages or error_messages
+
+
 def build_patches(
     test_parts_set: set[tuple[str, str]],
     error_messages: dict[tuple[str, str], str] | None = None,
@@ -293,6 +386,24 @@ def _is_super_call_only(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> bo
     return True
 
 
+def _method_removal_range(
+    func_node: ast.FunctionDef | ast.AsyncFunctionDef, lines: list[str]
+) -> range:
+    """Line range covering an entire method including decorators and a preceding COMMENT line."""
+    first = (
+        func_node.decorator_list[0].lineno - 1
+        if func_node.decorator_list
+        else func_node.lineno - 1
+    )
+    if (
+        first > 0
+        and lines[first - 1].strip().startswith("#")
+        and COMMENT in lines[first - 1]
+    ):
+        first -= 1
+    return range(first, func_node.end_lineno)
+
+
 def _build_inheritance_info(tree: ast.Module) -> tuple[dict, dict]:
     """
     Build inheritance information from AST.
@@ -348,6 +459,20 @@ def _find_method_definition(
     return None
 
 
+def _find_all_inheritors(
+    parent: str, method_name: str, class_bases: dict, class_methods: dict
+) -> set[str]:
+    """Find all classes that inherit *method_name* from *parent* (not overriding it)."""
+    return {
+        cls
+        for cls in class_bases
+        if cls != parent
+        and method_name not in class_methods.get(cls, set())
+        and _find_method_definition(cls, method_name, class_bases, class_methods)
+        == parent
+    }
+
+
 def remove_expected_failures(
     contents: str, tests_to_remove: set[tuple[str, str]]
 ) -> str:
@@ -383,15 +508,7 @@ def remove_expected_failures(
             remove_entire_method = _is_super_call_only(item)
 
             if remove_entire_method:
-                first_line = item.lineno - 1
-                if item.decorator_list:
-                    first_line = item.decorator_list[0].lineno - 1
-                if first_line > 0:
-                    prev_line = lines[first_line - 1].strip()
-                    if prev_line.startswith("#") and COMMENT in prev_line:
-                        first_line -= 1
-                for i in range(first_line, item.end_lineno):
-                    lines_to_remove.add(i)
+                lines_to_remove.update(_method_removal_range(item, lines))
             else:
                 for dec in item.decorator_list:
                     dec_line = dec.lineno - 1
@@ -406,11 +523,18 @@ def remove_expected_failures(
                         and lines[dec_line - 1].strip().startswith("#")
                         and COMMENT in lines[dec_line - 1]
                     )
+                    has_comment_after = (
+                        dec_line + 1 < len(lines)
+                        and lines[dec_line + 1].strip().startswith("#")
+                        and COMMENT not in lines[dec_line + 1]
+                    )
 
                     if has_comment_on_line or has_comment_before:
                         lines_to_remove.add(dec_line)
                         if has_comment_before:
                             lines_to_remove.add(dec_line - 1)
+                        if has_comment_after and has_comment_on_line:
+                            lines_to_remove.add(dec_line + 1)
 
     for line_idx in sorted(lines_to_remove, reverse=True):
         del lines[line_idx]
@@ -481,12 +605,98 @@ def apply_test_changes(
         contents = remove_expected_failures(contents, unexpected_successes)
 
     if failing_tests:
+        failing_tests, error_messages = _consolidate_to_parent(
+            contents, failing_tests, error_messages
+        )
         patches = build_patches(failing_tests, error_messages)
         contents = apply_patches(contents, patches)
 
     return contents
 
 
+def strip_reasonless_expected_failures(
+    contents: str,
+) -> tuple[str, set[tuple[str, str]]]:
+    """Strip @expectedFailure decorators that have no failure reason.
+
+    Markers like ``@unittest.expectedFailure  # TODO: RUSTPYTHON`` (without a
+    reason after the semicolon) are removed so the tests fail normally during
+    the next test run and error messages can be captured.
+
+    Returns:
+        (modified_contents, stripped_tests) where stripped_tests is a set of
+        (class_name, method_name) tuples whose markers were removed.
+    """
+    tree = ast.parse(contents)
+    lines = contents.splitlines()
+    stripped_tests: set[tuple[str, str]] = set()
+    lines_to_remove: set[int] = set()
+
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.ClassDef):
+            continue
+        for item in node.body:
+            if not isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                continue
+            for dec in item.decorator_list:
+                dec_line = dec.lineno - 1
+                line_content = lines[dec_line]
+
+                if "expectedFailure" not in line_content:
+                    continue
+
+                has_comment_on_line = COMMENT in line_content
+                has_comment_before = (
+                    dec_line > 0
+                    and lines[dec_line - 1].strip().startswith("#")
+                    and COMMENT in lines[dec_line - 1]
+                )
+
+                if not has_comment_on_line and not has_comment_before:
+                    continue  # not our marker
+
+                # Check if there's a reason (on either the decorator or before)
+                for check_line in (
+                    line_content,
+                    lines[dec_line - 1] if has_comment_before else "",
+                ):
+                    match = re.search(rf"{COMMENT}(.*)", check_line)
+                    if match and match.group(1).strip(";:, "):
+                        break  # has a reason, keep it
+                else:
+                    # No reason found — strip this decorator
+                    stripped_tests.add((node.name, item.name))
+
+                    if _is_super_call_only(item):
+                        # Remove entire super-call override (the method
+                        # exists only to apply the decorator; without it
+                        # the override is pointless and blocks parent
+                        # consolidation)
+                        lines_to_remove.update(_method_removal_range(item, lines))
+                    else:
+                        lines_to_remove.add(dec_line)
+
+                        if has_comment_before:
+                            lines_to_remove.add(dec_line - 1)
+
+                        # Also remove a reason-comment on the line after (old format)
+                        if (
+                            has_comment_on_line
+                            and dec_line + 1 < len(lines)
+                            and lines[dec_line + 1].strip().startswith("#")
+                            and COMMENT not in lines[dec_line + 1]
+                        ):
+                            lines_to_remove.add(dec_line + 1)
+
+    if not lines_to_remove:
+        return contents, stripped_tests
+
+    for idx in sorted(lines_to_remove, reverse=True):
+        del lines[idx]
+
+    return "\n".join(lines) + "\n" if lines else "", stripped_tests
+
+
 def extract_test_methods(contents: str) -> set[tuple[str, str]]:
     """
     Extract all test method names from file contents.
@@ -529,6 +739,13 @@ def auto_mark_file(
     if not test_path.exists():
         raise FileNotFoundError(f"File not found: {test_path}")
 
+    # Strip reason-less markers so those tests fail normally and we capture
+    # their error messages during the test run.
+    contents = test_path.read_text(encoding="utf-8")
+    contents, stripped_tests = strip_reasonless_expected_failures(contents)
+    if stripped_tests:
+        test_path.write_text(contents, encoding="utf-8")
+
     test_name = get_test_module_name(test_path)
     if verbose:
         print(f"Running test: {test_name}")
@@ -559,6 +776,13 @@ def auto_mark_file(
     else:
         failing_tests = set()
 
+    # Re-mark stripped tests that still fail (to restore markers with reasons).
+    # Uses inheritance expansion: if a parent marker was stripped, child
+    # failures are included so _consolidate_to_parent can re-mark the parent.
+    failing_tests |= _expand_stripped_to_children(
+        contents, stripped_tests, all_failing_tests
+    )
+
     regressions = all_failing_tests - failing_tests
 
     if verbose:
@@ -626,6 +850,19 @@ def auto_mark_directory(
     if not test_dir.is_dir():
         raise ValueError(f"Not a directory: {test_dir}")
 
+    # Get all .py files in directory
+    test_files = sorted(test_dir.glob("**/*.py"))
+
+    # Strip reason-less markers from ALL files before running tests so those
+    # tests fail normally and we capture their error messages.
+    stripped_per_file: dict[pathlib.Path, set[tuple[str, str]]] = {}
+    for test_file in test_files:
+        contents = test_file.read_text(encoding="utf-8")
+        contents, stripped = strip_reasonless_expected_failures(contents)
+        if stripped:
+            test_file.write_text(contents, encoding="utf-8")
+            stripped_per_file[test_file] = stripped
+
     test_name = get_test_module_name(test_dir)
     if verbose:
         print(f"Running test: {test_name}")
@@ -644,9 +881,6 @@ def auto_mark_directory(
     total_regressions = 0
     all_regressions: list[tuple[str, str, str, str]] = []
 
-    # Get all .py files in directory
-    test_files = sorted(test_dir.glob("**/*.py"))
-
     for test_file in test_files:
         # Get module prefix for this file (e.g., "test_inspect.test_inspect")
         module_prefix = get_test_module_name(test_file)
@@ -671,6 +905,15 @@ def auto_mark_directory(
         else:
             failing_tests = set()
 
+        # Re-mark stripped tests that still fail (restore markers with reasons).
+        # Uses inheritance expansion for parent→child mapping.
+        stripped = stripped_per_file.get(test_file, set())
+        if stripped:
+            file_contents = test_file.read_text(encoding="utf-8")
+            failing_tests |= _expand_stripped_to_children(
+                file_contents, stripped, all_failing_tests
+            )
+
         regressions = all_failing_tests - failing_tests
 
         if failing_tests or unexpected_successes:
diff --git a/scripts/update_lib/tests/test_auto_mark.py b/scripts/update_lib/tests/test_auto_mark.py