Skip to content

Commit 5296d7b

Browse files
committed
C++: reachableRecursive refactor for performance
The `reachable` predicate is large and slow to compute. It's part of a mutual recursion that's non-linear, meaning it has a recursive call on both sides of an `and`. This change removes a part of the base case that has no effect on recursive cases. The removed part is added back after the recursion has finished. That change itself was only a few lines long, but it broke the caching of the `aborting` and `abortingFunction` predicates in `ConstantExprs.qll`. These predicates could previously be recomputed cheaply from cached information, but that's no longer the case because they now refer (indirectly) to the new `reachableRecursive` predicate, which is not cached, instead of the `reachable` predicate. To fix that, I moved all predicates related to CFG pruning into a single `cached module`, which required most of the predicates to be moved from `ControlFlowGraph.qll` to `ConstantExprs.qll`. Before, on Wireshark: ControlFlowGraph::Cached::reachable#f .......... 20.8s (executed 9800 times) ConstantExprs::successors_adapted#ff ........... 4.2s (executed 615 times) ConstantExprs::potentiallyReturningFunction#f .. 3.9s (executed 9799 times) ConstantExprs::possiblePredecessor#f ........... 2.9s (executed 788 times) After, on Wireshark: ConstantExprs::reachableRecursive#f ............ 13.2s (executed 9800 times) ConstantExprs::successors_adapted#ff ........... 4.2s (executed 615 times) ConstantExprs::potentiallyReturningFunction#f .. 4.3s (executed 9799 times) ConstantExprs::possiblePredecessor#f ........... 2.6s (executed 788 times) I've verified that this change doesn't change what's computed by checking that the output of the following query is unchanged: import cpp import semmle.code.cpp.controlflow.internal.ConstantExprs select strictcount(ControlFlowNode n | reachable(n)) as reachable, strictcount(ControlFlowNode n1, ControlFlowNode n2 | n2 = n1.getASuccessor()) as edges, strictcount(FunctionCall c | aborting(c)) as abortingCall, strictcount(Function f | abortingFunction(f)) as abortingFunction
1 parent 0096024 commit 5296d7b

2 files changed

Lines changed: 111 additions & 101 deletions

File tree

cpp/ql/src/semmle/code/cpp/controlflow/ControlFlowGraph.qll

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -75,85 +75,7 @@ class ControlFlowNode extends Locatable, ControlFlowNodeBase {
7575
}
7676
}
7777

78-
import Cached
79-
private cached module Cached {
80-
// The base case of `reachable` is factored out for performance. If it's
81-
// written in-line in `reachable`, the compiler inserts a `n instanceof
82-
// ControlFlowNode` check because the `not ... and not ...` case doesn't
83-
// otherwise bind `n`. The problem is that this check is inserted at the
84-
// outermost level of this predicate, so it covers all cases including the
85-
// recursive case. The optimizer doesn't eliminate the check even though it's
86-
// redundant, and having the check leads to needless extra computation and a
87-
// risk of bad join orders.
88-
private predicate reachableBaseCase(ControlFlowNode n) {
89-
exists(Function f | f.getEntryPoint() = n)
90-
or
91-
// Okay to use successors_extended directly here
92-
(not successors_extended(_,n) and not successors_extended(n,_))
93-
or
94-
n instanceof Handler
95-
}
96-
97-
/**
98-
* Holds if the control-flow node `n` is reachable, meaning that either
99-
* it is an entry point, or there exists a path in the control-flow
100-
* graph of its function that connects an entry point to it.
101-
* Compile-time constant conditions are taken into account, so that
102-
* the call to `f` is not reachable in `if (0) f();` even if the
103-
* `if` statement as a whole is reachable.
104-
*/
105-
cached
106-
predicate reachable(ControlFlowNode n)
107-
{
108-
reachableBaseCase(n)
109-
or
110-
reachable(n.getAPredecessor())
111-
}
112-
113-
/** Holds if `condition` always evaluates to a nonzero value. */
114-
cached
115-
predicate conditionAlwaysTrue(Expr condition) {
116-
conditionAlways(condition, true)
117-
}
118-
119-
/** Holds if `condition` always evaluates to zero. */
120-
cached
121-
predicate conditionAlwaysFalse(Expr condition) {
122-
conditionAlways(condition, false)
123-
or
124-
// If a loop condition evaluates to false upon entry, it will always
125-
// be false
126-
loopConditionAlwaysUponEntry(_, condition, false)
127-
}
128-
129-
/**
130-
* The condition `condition` for the loop `loop` is provably `true` upon entry.
131-
* That is, at least one iteration of the loop is guaranteed.
132-
*/
133-
cached
134-
predicate loopConditionAlwaysTrueUponEntry(ControlFlowNode loop, Expr condition) {
135-
loopConditionAlwaysUponEntry(loop, condition, true)
136-
}
137-
}
138-
139-
private predicate conditionAlways(Expr condition, boolean b) {
140-
exists(ConditionEvaluator x, int val |
141-
val = x.getValue(condition) |
142-
val != 0 and b = true
143-
or
144-
val = 0 and b = false
145-
)
146-
}
147-
148-
private predicate loopConditionAlwaysUponEntry(ControlFlowNode loop, Expr condition, boolean b) {
149-
exists(LoopEntryConditionEvaluator x, int val |
150-
x.isLoopEntry(condition, loop) and
151-
val = x.getValue(condition) |
152-
val != 0 and b = true
153-
or
154-
val = 0 and b = false
155-
)
156-
}
78+
import ControlFlowGraphPublic
15779

15880
/**
15981
* An element that is convertible to `ControlFlowNode`. This class is similar

cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll

Lines changed: 110 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,107 @@
11
import cpp
22
private import PrimitiveBasicBlocks
33
private class Node = ControlFlowNodeBase;
4+
import Cached
5+
6+
cached
7+
private module Cached {
8+
/** A call to a function known not to return. */
9+
cached
10+
predicate aborting(FunctionCall c) {
11+
not potentiallyReturningFunctionCall(c)
12+
}
13+
14+
/**
15+
* Functions that are known not to return. This is normally because the function
16+
* exits the program or longjmps to another location.
17+
*/
18+
cached
19+
predicate abortingFunction(Function f) {
20+
not potentiallyReturningFunction(f)
21+
}
22+
23+
cached
24+
module ControlFlowGraphPublic {
25+
/**
26+
* Holds if the control-flow node `n` is reachable, meaning that either
27+
* it is an entry point, or there exists a path in the control-flow
28+
* graph of its function that connects an entry point to it.
29+
* Compile-time constant conditions are taken into account, so that
30+
* the call to `f` is not reachable in `if (0) f();` even if the
31+
* `if` statement as a whole is reachable.
32+
*/
33+
cached
34+
predicate reachable(ControlFlowNode n)
35+
{
36+
// Okay to use successors_extended directly here
37+
reachableRecursive(n)
38+
or
39+
(not successors_extended(_,n) and not successors_extended(n,_))
40+
}
41+
42+
/** Holds if `condition` always evaluates to a nonzero value. */
43+
cached
44+
predicate conditionAlwaysTrue(Expr condition) {
45+
conditionAlways(condition, true)
46+
}
47+
48+
/** Holds if `condition` always evaluates to zero. */
49+
cached
50+
predicate conditionAlwaysFalse(Expr condition) {
51+
conditionAlways(condition, false)
52+
or
53+
// If a loop condition evaluates to false upon entry, it will always
54+
// be false
55+
loopConditionAlwaysUponEntry(_, condition, false)
56+
}
57+
58+
/**
59+
* The condition `condition` for the loop `loop` is provably `true` upon entry.
60+
* That is, at least one iteration of the loop is guaranteed.
61+
*/
62+
cached
63+
predicate loopConditionAlwaysTrueUponEntry(ControlFlowNode loop, Expr condition) {
64+
loopConditionAlwaysUponEntry(loop, condition, true)
65+
}
66+
}
67+
68+
/**
69+
* An adapted version of the `successors_extended` relation that excludes
70+
* impossible control-flow edges - flow will never occur along these
71+
* edges, so it is safe (and indeed sensible) to remove them.
72+
*/
73+
cached predicate successors_adapted(Node pred, Node succ) {
74+
successors_extended(pred, succ)
75+
and possiblePredecessor(pred)
76+
and not impossibleFalseEdge(pred, succ)
77+
and not impossibleTrueEdge(pred, succ)
78+
and not impossibleSwitchEdge(pred, succ)
79+
and not impossibleDefaultSwitchEdge(pred, succ)
80+
and not impossibleFunctionReturn(pred, succ)
81+
and not getOptions().exprExits(pred)
82+
}
83+
}
484

5-
/** A call to a function known not to return. */
6-
predicate aborting(FunctionCall c) {
7-
not potentiallyReturningFunctionCall(c)
85+
private predicate conditionAlways(Expr condition, boolean b) {
86+
exists(ConditionEvaluator x, int val |
87+
val = x.getValue(condition) |
88+
val != 0 and b = true
89+
or
90+
val = 0 and b = false
91+
)
892
}
993

10-
/**
11-
* Functions that are known not to return. This is normally because the function
12-
* exits the program or longjmps to another location.
13-
*/
14-
predicate abortingFunction(Function f) {
15-
not potentiallyReturningFunction(f)
94+
private predicate loopConditionAlwaysUponEntry(ControlFlowNode loop, Expr condition, boolean b) {
95+
exists(LoopEntryConditionEvaluator x, int val |
96+
x.isLoopEntry(condition, loop) and
97+
val = x.getValue(condition) |
98+
val != 0 and b = true
99+
or
100+
val = 0 and b = false
101+
)
16102
}
17103

104+
18105
/**
19106
* This relation is the same as the `el instanceof Function`, only obfuscated
20107
* so the optimizer will not understand that any `FunctionCall.getTarget()`
@@ -61,7 +148,7 @@ private predicate potentiallyReturningFunction(Function f) {
61148
nonAnalyzableFunction(f)
62149
or
63150
// otherwise the exit-point of `f` must be reachable
64-
reachable(f)
151+
reachableRecursive(f)
65152
)
66153
}
67154

@@ -202,19 +289,20 @@ private predicate possiblePredecessor(Node pred) {
202289
}
203290

204291
/**
205-
* An adapted version of the `successors_extended` relation that excludes
206-
* impossible control-flow edges - flow will never occur along these
207-
* edges, so it is safe (and indeed sensible) to remove them.
292+
* Holds if the control-flow node `n` is reachable, meaning that either
293+
* it is an entry point, or there exists a path in the control-flow
294+
* graph of its function that connects an entry point to it.
295+
* Compile-time constant conditions are taken into account, so that
296+
* the call to `f` is not reachable in `if (0) f();` even if the
297+
* `if` statement as a whole is reachable.
208298
*/
209-
cached predicate successors_adapted(Node pred, Node succ) {
210-
successors_extended(pred, succ)
211-
and possiblePredecessor(pred)
212-
and not impossibleFalseEdge(pred, succ)
213-
and not impossibleTrueEdge(pred, succ)
214-
and not impossibleSwitchEdge(pred, succ)
215-
and not impossibleDefaultSwitchEdge(pred, succ)
216-
and not impossibleFunctionReturn(pred, succ)
217-
and not getOptions().exprExits(pred)
299+
private predicate reachableRecursive(ControlFlowNode n)
300+
{
301+
exists(Function f | f.getEntryPoint() = n)
302+
or
303+
n instanceof Handler
304+
or
305+
reachableRecursive(n.getAPredecessor())
218306
}
219307

220308
private predicate compileTimeConstantInt(Expr e, int val) {

0 commit comments

Comments
 (0)