Skip to content

Commit 6388800

Browse files
committed
fix performance regression in tco and fploop caused by the lazify changes
1 parent 79ef391 commit 6388800

3 files changed

Lines changed: 124 additions & 38 deletions

File tree

unpythonic/lazyutil.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,25 @@
88
__all__ = ["mark_lazy", "lazycall", "force1", "force"]
99

1010
from .regutil import register_decorator
11+
from .dynassign import make_dynvar
1112

1213
# HACK: break dependency loop llist -> fun -> lazyutil -> collections -> llist
1314
#from .collections import mogrify
1415
_init_done = False
16+
jump = object() # gensym, nothing else "is" this
1517
def _init_module(): # called by unpythonic.__init__ when otherwise done
16-
global mogrify, _init_done
18+
global mogrify, jump, _init_done
1719
from .collections import mogrify
20+
from .tco import jump
1821
_init_done = True
1922

2023
try: # MacroPy is optional for unpythonic
2124
from macropy.quick_lambda import Lazy
2225
except ImportError:
2326
Lazy = type()
2427

28+
make_dynvar(_build_lazy_trampoline=False) # interaction with TCO
29+
2530
# -----------------------------------------------------------------------------
2631

2732
@register_decorator(priority=95)
@@ -37,6 +42,9 @@ def islazy(f):
3742

3843
def lazycall(f, *thunks, **kwthunks):
3944
"""Internal. Helps calling strict functions from inside a ``with lazify`` block."""
45+
if f is jump: # special case to avoid drastic performance hit in strict code
46+
target, *argthunks = thunks
47+
return jump(force(target), *argthunks, **kwthunks)
4048
if islazy(f):
4149
return f(*thunks, **kwthunks)
4250
return f(*force(thunks), **force(kwthunks))

unpythonic/syntax/lazify.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from ast import Lambda, FunctionDef, Call, Name, Attribute, \
55
Starred, keyword, List, Tuple, Dict, Set, \
6-
Subscript, Load
6+
Subscript, Load, With, withitem
77
from .astcompat import AsyncFunctionDef
88

99
from macropy.core.quotes import macros, q, ast_literal
@@ -16,6 +16,7 @@
1616
isx, make_isxpred, getname, is_decorator
1717
from .letdoutil import islet, isdo, ExpandedLetView
1818
from ..lazyutil import mark_lazy, force, force1, lazycall
19+
from ..dynassign import dyn
1920

2021
# -----------------------------------------------------------------------------
2122

@@ -349,6 +350,52 @@ def transform_starred(tree, dstarred=False):
349350
newbody = []
350351
for stmt in body:
351352
newbody.append(transform.recurse(stmt, forcing_mode="full"))
352-
return newbody
353+
354+
# Pay-as-you-go: to avoid a drastic performance hit (~10x) in trampolines
355+
# built by unpythonic.tco.trampolined for regular strict code, a special mode
356+
# must be enabled to build lazify-aware trampolines.
357+
#
358+
# The idea is that the mode is enabled while any function definitions in the
359+
# "with lazify" block run, so they get a lazify-aware trampoline.
360+
# This should be determined lexically, but that's complicated to do API-wise,
361+
# so we currently enable the mode for the dynamic extent of the "with lazify".
362+
# Usually this is close enough; the main case where this can behave
363+
# unexpectedly is::
364+
#
365+
# @trampolined # strict trampoline
366+
# def g():
367+
# ...
368+
#
369+
# def make_f():
370+
# @trampolined # which kind of trampoline is this?
371+
# def f():
372+
# ...
373+
# return f
374+
#
375+
# f1 = make_f() # f1 gets the strict trampoline
376+
#
377+
# with lazify:
378+
# @trampolined # lazify-aware trampoline
379+
# def h():
380+
# ...
381+
#
382+
# f2 = make_f() # f2 gets the lazify-aware trampoline
383+
#
384+
# TCO chains with an arbitrary mix of lazy and strict functions should work
385+
# as long as the first function in the chain has a lazify-aware trampoline
386+
# (because the chain runs under the trampoline of the first function).
387+
#
388+
# Tail-calling from a strict function into a lazy function should work, because
389+
# all arguments are evaluated at the strict side before the call is made.
390+
#
391+
# But tail-calling strict -> lazy -> strict will fail in some cases.
392+
# The second strict callee may get promises instead of values, because the
393+
# strict trampoline does not have the lazycall (that usually forces the args
394+
# when lazy code calls into strict code).
395+
item = hq[dyn.let(_build_lazy_trampoline=True)]
396+
wrapped = With(items=[withitem(context_expr=item, optional_vars=None)],
397+
body=newbody,
398+
lineno=body[0].lineno, col_offset=body[0].col_offset)
399+
return [wrapped]
353400

354401
# -----------------------------------------------------------------------------

unpythonic/tco.py

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,12 @@ def baz():
131131
from sys import stderr
132132

133133
from .regutil import register_decorator
134-
from .lazyutil import islazy, mark_lazy, force, lazycall
134+
from .lazyutil import islazy, mark_lazy, lazycall
135+
from .dynassign import dyn
135136

136-
@mark_lazy
137+
# In principle, jump should have @mark_lazy, but for performance reasons
138+
# it doesn't. "force(target)" is slow, so strict code shouldn't have to do that.
139+
# This is handled by a special case in lazycall.
137140
def jump(target, *args, **kwargs):
138141
"""A jump (noun, not verb).
139142
@@ -151,7 +154,7 @@ def jump(target, *args, **kwargs):
151154
**kwargs:
152155
Named arguments to be passed to `target`.
153156
"""
154-
return _jump(force(target), args, kwargs)
157+
return _jump(target, args, kwargs)
155158

156159
class _jump:
157160
"""The actual class representing a jump.
@@ -234,35 +237,63 @@ def trampolined(function):
234237
to perform optimized tail calls. (*Optimized* in the sense of not
235238
increasing the call stack depth, not for speed.)
236239
"""
237-
@wraps(function)
238-
def trampoline(*args, **kwargs):
239-
f = function
240-
while True:
241-
if callable(f): # general case
242-
v = lazycall(f, *args, **kwargs)
243-
else: # inert-data return value from call_ec or similar
244-
v = f
245-
if isinstance(v, _jump):
246-
f = v.target
247-
if not callable(f): # protect against jump() to inert data from call_ec or similar
248-
raise RuntimeError("Cannot jump into a non-callable value '{}'".format(f))
249-
args = v.args
250-
kwargs = v.kwargs
251-
v._claimed = True
252-
else: # final result, exit trampoline
253-
return v
254-
# Work together with call_ec and other do-it-now decorators.
255-
#
256-
# The function has already been replaced by its return value. E.g. call_ec
257-
# must work that way, because the ec is only valid during the dynamic extent
258-
# of the call_ec. OTOH, the trampoline must be **outside**, to be able to
259-
# catch a jump() from the result of the call_ec. So we treat a non-callable
260-
# "function" as an inert-data return value.
261-
if callable(function):
262-
# fortunately functions in Python are just objects; stash for jump constructor
263-
trampoline._entrypoint = function
264-
if islazy(function):
265-
trampoline = mark_lazy(trampoline)
266-
return trampoline
267-
else: # return value from call_ec or similar do-it-now decorator
268-
return trampoline()
240+
if not dyn._build_lazy_trampoline:
241+
# building a trampoline for regular strict code
242+
@wraps(function)
243+
def trampoline(*args, **kwargs):
244+
f = function
245+
while True:
246+
if callable(f): # general case
247+
v = f(*args, **kwargs)
248+
else: # inert-data return value from call_ec or similar
249+
v = f
250+
if isinstance(v, _jump):
251+
f = v.target
252+
if not callable(f): # protect against jump() to inert data from call_ec or similar
253+
raise RuntimeError("Cannot jump into a non-callable value '{}'".format(f))
254+
args = v.args
255+
kwargs = v.kwargs
256+
v._claimed = True
257+
else: # final result, exit trampoline
258+
return v
259+
# Work together with call_ec and other do-it-now decorators.
260+
#
261+
# The function has already been replaced by its return value. E.g. call_ec
262+
# must work that way, because the ec is only valid during the dynamic extent
263+
# of the call_ec. OTOH, the trampoline must be **outside**, to be able to
264+
# catch a jump() from the result of the call_ec. So we treat a non-callable
265+
# "function" as an inert-data return value.
266+
if callable(function):
267+
# fortunately functions in Python are just objects; stash for jump constructor
268+
trampoline._entrypoint = function
269+
return trampoline
270+
else: # return value from call_ec or similar do-it-now decorator
271+
return trampoline()
272+
else:
273+
# Exact same code as above, except has the lazify-aware stuff.
274+
# This is to avoid a drastic (~10x) performance hit in trampolines
275+
# built for regular strict code.
276+
@wraps(function)
277+
def trampoline(*args, **kwargs):
278+
f = function
279+
while True:
280+
if callable(f):
281+
v = lazycall(f, *args, **kwargs) # <-- this causes the performance hit
282+
else:
283+
v = f
284+
if isinstance(v, _jump):
285+
f = v.target
286+
if not callable(f):
287+
raise RuntimeError("Cannot jump into a non-callable value '{}'".format(f))
288+
args = v.args
289+
kwargs = v.kwargs
290+
v._claimed = True
291+
else: # final result, exit trampoline
292+
return v
293+
if callable(function):
294+
trampoline._entrypoint = function
295+
if islazy(function): # <--
296+
trampoline = mark_lazy(trampoline) # <--
297+
return trampoline
298+
else:
299+
return trampoline()

0 commit comments

Comments
 (0)