Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Make Instruction and MacroInstruction pre-compute active cache effects
  • Loading branch information
gvanrossum committed Jun 28, 2023
commit ea90c43d9d511847f2c591ada61aa967144cdce5
175 changes: 81 additions & 94 deletions Tools/cases_generator/generate_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,13 @@ def emit_macros(cls, out: Formatter):
f"(_PyOpcode_opcode_metadata[(OP)].flags & ({name}))")


@dataclasses.dataclass
class ActiveCacheEffect:
"""Wraps a CacheEffect that is actually used, in context."""
effect: parser.CacheEffect
offset: int


FORBIDDEN_NAMES_IN_UOPS = (
"resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER
"unbound_local_error",
Expand Down Expand Up @@ -344,6 +351,7 @@ class Instruction:
unmoved_names: frozenset[str]
instr_fmt: str
instr_flags: InstructionFlags
active_caches: list[ActiveCacheEffect]

# Set later
family: parser.Family | None = None
Expand Down Expand Up @@ -375,15 +383,19 @@ def __init__(self, inst: parser.InstDef):

self.instr_flags = InstructionFlags.fromInstruction(inst)

self.active_caches = []
offset = 0
for effect in self.cache_effects:
if effect.name != UNUSED:
self.active_caches.append(ActiveCacheEffect(effect, offset))
offset += effect.size

if self.instr_flags.HAS_ARG_FLAG:
fmt = "IB"
else:
fmt = "IX"
cache = "C"
for ce in self.cache_effects:
for _ in range(ce.size):
fmt += cache
cache = "0"
if offset:
fmt += "C" + "0"*(offset-1)
self.instr_fmt = fmt

def is_viable_uop(self) -> bool:
Expand All @@ -392,18 +404,11 @@ def is_viable_uop(self) -> bool:
return False
if self.instr_flags.HAS_ARG_FLAG:
# If the instruction uses oparg, it cannot use any caches
for c in self.cache_effects:
if c.name != UNUSED:
return False
if self.active_caches:
return False
else:
# If it doesn't use oparg, it can have one cache entry
caches: list[parser.CacheEffect] = []
cache_offset = 0
for c in self.cache_effects:
if c.name != UNUSED:
caches.append(c)
cache_offset += c.size
if len(caches) > 1:
if len(self.active_caches) > 1:
return False
for forbidden in FORBIDDEN_NAMES_IN_UOPS:
# TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions
Expand Down Expand Up @@ -458,7 +463,7 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:

# out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")

self.write_body(out, 0, tier=tier)
self.write_body(out, 0, self.active_caches, tier=tier)

# Skip the rest if the block always exits
if self.always_exits:
Expand Down Expand Up @@ -492,33 +497,30 @@ def write_body(
self,
out: Formatter,
dedent: int,
cache_adjust: int = 0,
active_caches: list[ActiveCacheEffect],
tier: Tiers = TIER_ONE,
) -> None:
"""Write the instruction body."""
# Write cache effect variable declarations and initializations
cache_offset = cache_adjust
for ceffect in self.cache_effects:
if ceffect.name != UNUSED:
bits = ceffect.size * BITS_PER_CODE_UNIT
if bits == 64:
# NOTE: We assume that 64-bit data in the cache
# is always an object pointer.
# If this becomes false, we need a way to specify
# syntactically what type the cache data is.
typ = "PyObject *"
func = "read_obj"
else:
typ = f"uint{bits}_t "
func = f"read_u{bits}"
if tier == TIER_ONE:
out.emit(
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
)
else:
out.emit(f"{typ}{ceffect.name} = operand;")
cache_offset += ceffect.size
assert cache_offset == self.cache_offset + cache_adjust
for active in active_caches:
ceffect = active.effect
bits = ceffect.size * BITS_PER_CODE_UNIT
if bits == 64:
# NOTE: We assume that 64-bit data in the cache
# is always an object pointer.
# If this becomes false, we need a way to specify
# syntactically what type the cache data is.
typ = "PyObject *"
func = "read_obj"
else:
typ = f"uint{bits}_t "
func = f"read_u{bits}"
if tier == TIER_ONE:
out.emit(
f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
)
else:
out.emit(f"{typ}{ceffect.name} = operand;")

# Write the body, substituting a goto for ERROR_IF() and other stuff
assert dedent <= 0
Expand Down Expand Up @@ -583,8 +585,9 @@ class Component:
instr: Instruction
input_mapping: StackEffectMapping
output_mapping: StackEffectMapping
active_caches: list[ActiveCacheEffect]

def write_body(self, out: Formatter, cache_adjust: int) -> None:
def write_body(self, out: Formatter) -> None:
with out.block(""):
input_names = {ieffect.name for _, ieffect in self.input_mapping}
for var, ieffect in self.input_mapping:
Expand All @@ -593,7 +596,7 @@ def write_body(self, out: Formatter, cache_adjust: int) -> None:
if oeffect.name not in input_names:
out.declare(oeffect, None)

self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust)
self.instr.write_body(out, -4, self.active_caches)

for var, oeffect in self.output_mapping:
out.assign(var, oeffect)
Expand All @@ -611,6 +614,7 @@ class MacroInstruction:
instr_flags: InstructionFlags
macro: parser.Macro
parts: list[Component | parser.CacheEffect]
cache_offset: int
predicted: bool = False


Expand Down Expand Up @@ -873,21 +877,18 @@ def effect_counts(self, name: str) -> tuple[int, int, int]:
cache = instr.cache_offset
input = len(instr.input_effects)
output = len(instr.output_effects)
elif macro := self.macro_instrs.get(name):
cache, input, output = 0, 0, 0
for part in macro.parts:
elif mac := self.macro_instrs.get(name):
cache = mac.cache_offset
input, output = 0, 0
for part in mac.parts:
if isinstance(part, Component):
cache += part.instr.cache_offset
# A component may pop what the previous component pushed,
# so we offset the input/output counts by that.
delta_i = len(part.instr.input_effects)
delta_o = len(part.instr.output_effects)
offset = min(delta_i, output)
input += delta_i - offset
output += delta_o - offset
else:
assert isinstance(part, parser.CacheEffect), part
cache += part.size
else:
assert False, f"Unknown instruction {name!r}"
return cache, input, output
Expand All @@ -906,29 +907,25 @@ def analyze_macro(self, macro: parser.Macro) -> MacroInstruction:
stack, initial_sp = self.stack_analysis(components)
sp = initial_sp
parts: list[Component | parser.CacheEffect] = []
format = "IB"
flags = InstructionFlags.newEmpty()
cache = "C"
offset = 0
for component in components:
match component:
case parser.CacheEffect() as ceffect:
parts.append(ceffect)
for _ in range(ceffect.size):
format += cache
cache = "0"
offset += ceffect.size
case Instruction() as instr:
part, sp = self.analyze_instruction(instr, stack, sp)
part, sp, offset = self.analyze_instruction(instr, stack, sp, offset)
parts.append(part)
for ce in instr.cache_effects:
for _ in range(ce.size):
format += cache
cache = "0"
flags.add(instr.instr_flags)
case _:
typing.assert_never(component)
final_sp = sp
format = "IB"
if offset:
format += "C" + "0"*(offset-1)
return MacroInstruction(
macro.name, stack, initial_sp, final_sp, format, flags, macro, parts
macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset
)

def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction:
Expand All @@ -941,8 +938,8 @@ def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction:
return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)

def analyze_instruction(
self, instr: Instruction, stack: list[StackEffect], sp: int
) -> tuple[Component, int]:
self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int
) -> tuple[Component, int, int]:
input_mapping: StackEffectMapping = []
for ieffect in reversed(instr.input_effects):
sp -= 1
Expand All @@ -951,7 +948,12 @@ def analyze_instruction(
for oeffect in instr.output_effects:
output_mapping.append((stack[sp], oeffect))
sp += 1
return Component(instr, input_mapping, output_mapping), sp
active_effects: list[ActiveCacheEffect] = []
for ceffect in instr.cache_effects:
if ceffect.name != UNUSED:
active_effects.append(ActiveCacheEffect(ceffect, offset))
offset += ceffect.size
return Component(instr, input_mapping, output_mapping, active_effects), sp, offset

def check_macro_components(
self, macro: parser.Macro
Expand Down Expand Up @@ -1030,7 +1032,7 @@ def stack_analysis(

def get_stack_effect_info(
self, thing: parser.InstDef | parser.Macro | parser.Pseudo
) -> tuple[AnyInstruction | None, str, str]:
) -> tuple[AnyInstruction | None, str | None, str | None]:
def effect_str(effects: list[StackEffect]) -> str:
n_effect, sym_effect = list_effect_size(effects)
if sym_effect:
Expand Down Expand Up @@ -1108,6 +1110,7 @@ def write_stack_effect_functions(self) -> None:
continue
instr, popped, pushed = self.get_stack_effect_info(thing)
if instr is not None:
assert popped is not None and pushed is not None
popped_data.append((instr, popped))
pushed_data.append((instr, pushed))

Expand Down Expand Up @@ -1254,8 +1257,7 @@ def write_metadata(self) -> None:
if instr.kind != "op" and instr.is_viable_uop():
# Double check there aren't any used cache effects.
# If this fails, see write_macro_expansions().
assert not [e for e in instr.cache_effects if e.name != UNUSED], \
(instr.name, instr.cache_effects)
assert not instr.active_caches, (instr.name, instr.cache_effects)
self.out.emit(
f"[{name}] = "
f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }},"
Expand Down Expand Up @@ -1332,34 +1334,19 @@ def write_macro_expansions(self, mac: MacroInstruction) -> None:
offset = 0 # Cache effect offset
expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...]
for part in mac.parts:
match part:
case parser.CacheEffect():
offset += part.size
case Component():
# All component instructions must be viable uops
if not part.instr.is_viable_uop():
print(f"Part {part.instr.name} of {mac.name} is not a viable uop")
return
effect = None
effect_offset = 0
for eff in part.instr.cache_effects:
if eff.name != UNUSED:
assert effect is None, \
f"{mac.name} has multiple cache effects {eff.name}"
effect = eff
effect_offset = offset
offset += eff.size
if effect is not None:
# We don't seem to have examples of this yet; remove the print once we do
print(f"Part {part.instr.name} of {mac.name} has cache effect {effect.name}")
assert not part.instr.instr_flags.HAS_ARG_FLAG, \
f"{mac.name} has both cache effect and arg flag"
exp = (part.instr.name, effect.size, effect_offset)
else:
exp = (part.instr.name, 0, 0)
expansions.append(exp)
case _:
typing.assert_never(part)
if isinstance(part, Component):
# All component instructions must be viable uops
if not part.instr.is_viable_uop():
print(f"NOTE: Part {part.instr.name} of {mac.name} is not a viable uop")
return
if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches:
size, offset = 0, 0
else:
# If this assert triggers, is_viable_uops() lied
assert len(part.active_caches) == 1, (mac.name, part.instr.name)
cache = part.active_caches[0]
size, offset = cache.effect.size, cache.offset
expansions.append((part.instr.name, size, offset))
assert len(expansions) > 0, f"Macro {mac.name} has empty expansion?!"
pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions]
self.out.emit(
Expand Down Expand Up @@ -1483,7 +1470,7 @@ def write_macro(self, mac: MacroInstruction) -> None:
cache_adjust += size
case Component() as comp:
last_instr = comp.instr
comp.write_body(self.out, cache_adjust)
comp.write_body(self.out)
cache_adjust += comp.instr.cache_offset

if cache_adjust:
Expand Down