From e3765ced7809ae800bdade1fb1543a1e598a3426 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 23 Aug 2021 15:29:07 +0200 Subject: [PATCH 01/24] Python: Add tests for modification of defaults --- ...odificationOfParameterWithDefault.expected | 28 +++++++ .../ModificationOfParameterWithDefault.qlref | 1 + .../test.py | 80 +++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected create mode 100644 python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.qlref create mode 100644 python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected new file mode 100644 index 000000000000..bf51e172916e --- /dev/null +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -0,0 +1,28 @@ +edges +| test.py:2:12:2:12 | empty mutable value | test.py:3:5:3:5 | empty mutable value | +| test.py:7:14:7:14 | empty mutable value | test.py:9:14:9:14 | empty mutable value | +| test.py:9:5:9:15 | empty mutable value | test.py:10:5:10:5 | empty mutable value | +| test.py:9:14:9:14 | empty mutable value | test.py:9:5:9:15 | empty mutable value | +| test.py:13:13:13:13 | empty mutable value | test.py:14:5:14:5 | empty mutable value | +| test.py:18:14:18:14 | empty mutable value | test.py:19:13:19:13 | empty mutable value | +| test.py:19:13:19:13 | empty mutable value | test.py:13:13:13:13 | empty mutable value | +| test.py:23:14:23:14 | non-empty mutable value | test.py:24:5:24:5 | non-empty mutable value | +| test.py:52:17:52:17 | empty mutable value | test.py:53:5:53:5 | empty mutable value | +| test.py:57:26:57:26 | non-empty mutable value | test.py:58:5:58:5 | non-empty mutable value | +| test.py:62:35:62:35 | non-empty mutable value | test.py:63:5:63:5 | non-empty mutable value | +| test.py:66:21:66:21 | empty mutable value | test.py:67:5:67:5 | empty mutable value | +| test.py:71:26:71:26 | empty mutable value | test.py:72:21:72:21 | empty mutable value | +| test.py:72:21:72:21 | empty mutable value | test.py:66:21:66:21 | empty mutable value | +| test.py:76:19:76:19 | empty mutable value | test.py:78:14:78:14 | empty mutable value | +| test.py:78:5:78:15 | empty mutable value | test.py:79:5:79:5 | empty mutable value | +| test.py:78:14:78:14 | empty mutable value | test.py:78:5:78:15 | empty mutable value | +#select +| test.py:3:5:3:5 | l | test.py:2:12:2:12 | empty mutable value | test.py:3:5:3:5 | empty mutable value | $@ flows to here and is mutated. | test.py:2:12:2:12 | l | Default value | +| test.py:10:5:10:5 | x | test.py:7:14:7:14 | empty mutable value | test.py:10:5:10:5 | empty mutable value | $@ flows to here and is mutated. | test.py:7:14:7:14 | l | Default value | +| test.py:14:5:14:5 | l | test.py:18:14:18:14 | empty mutable value | test.py:14:5:14:5 | empty mutable value | $@ flows to here and is mutated. | test.py:18:14:18:14 | l | Default value | +| test.py:24:5:24:5 | l | test.py:23:14:23:14 | non-empty mutable value | test.py:24:5:24:5 | non-empty mutable value | $@ flows to here and is mutated. | test.py:23:14:23:14 | l | Default value | +| test.py:53:5:53:5 | d | test.py:52:17:52:17 | empty mutable value | test.py:53:5:53:5 | empty mutable value | $@ flows to here and is mutated. | test.py:52:17:52:17 | d | Default value | +| test.py:58:5:58:5 | d | test.py:57:26:57:26 | non-empty mutable value | test.py:58:5:58:5 | non-empty mutable value | $@ flows to here and is mutated. | test.py:57:26:57:26 | d | Default value | +| test.py:63:5:63:5 | d | test.py:62:35:62:35 | non-empty mutable value | test.py:63:5:63:5 | non-empty mutable value | $@ flows to here and is mutated. | test.py:62:35:62:35 | d | Default value | +| test.py:67:5:67:5 | d | test.py:71:26:71:26 | empty mutable value | test.py:67:5:67:5 | empty mutable value | $@ flows to here and is mutated. | test.py:71:26:71:26 | d | Default value | +| test.py:79:5:79:5 | x | test.py:76:19:76:19 | empty mutable value | test.py:79:5:79:5 | empty mutable value | $@ flows to here and is mutated. | test.py:76:19:76:19 | d | Default value | diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.qlref b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.qlref new file mode 100644 index 000000000000..8c4044e8feeb --- /dev/null +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.qlref @@ -0,0 +1 @@ +Functions/ModificationOfParameterWithDefault.ql diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py new file mode 100644 index 000000000000..239492f8ead5 --- /dev/null +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -0,0 +1,80 @@ +# Not OK +def simple(l = []): + l.append(1) + return l + +# OK +def includes(l = []): + x = [0] + x.extend(l) + x.extend([1]) # FP + return x + +def extends(l): + l.extend([1]) + return l + +# Not OK +def deferred(l = []): + extends(l) + return l + +# Not OK +def nonempty(l = [5]): + l.append(1) + return l + +# Not OK +def dict(d = {}): + d['a'] = 1 # FN + return d + +# Not OK +def dict_nonempty(d = {'a': 1}): + d['a'] = 2 # FN + return d + +# OK +def dict_nonempty_nochange(d = {'a': 1}): + d['a'] = 1 + return d + +def modifies(d): + d['a'] = 1 # FN + return d + +# Not OK +def dict_deferred(d = {}): + modifies(d) + return d + +# Not OK +def dict_method(d = {}): + d.update({'a': 1}) + return d + +# Not OK +def dict_method_nonempty(d = {'a': 1}): + d.update({'a': 2}) + return d + +# OK +def dict_method_nonempty_nochange(d = {'a': 1}): + d.update({'a': 1}) # FP + return d + +def modifies_method(d): + d.update({'a': 1}) + return d + +# Not OK +def dict_deferred_method(d = {}): + modifies_method(d) + return d + +# OK +def dict_includes(d = {}): + x = {} + x.update(d) + x.update({'a': 1}) # FP + return x From e865a290dee644a12a9cd7bdd377ecc5a9b3682b Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 24 Aug 2021 16:34:12 +0200 Subject: [PATCH 02/24] Python: straight port of query The old query uses `pointsTo` to limit the sinks to methods on lists and dictionaries. That constraint is omitted here which could hurt performance. --- .../ModificationOfParameterWithDefault.ql | 92 ++----------------- .../ModificationOfParameterWithDefault.qll | 34 +++++++ ...onOfParameterWithDefaultCustomizations.qll | 90 ++++++++++++++++++ ...odificationOfParameterWithDefault.expected | 63 +++++++------ .../test.py | 4 +- 5 files changed, 171 insertions(+), 112 deletions(-) create mode 100644 python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll create mode 100644 python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll diff --git a/python/ql/src/Functions/ModificationOfParameterWithDefault.ql b/python/ql/src/Functions/ModificationOfParameterWithDefault.ql index 03edc35fa17c..9ff89d30e3fa 100644 --- a/python/ql/src/Functions/ModificationOfParameterWithDefault.ql +++ b/python/ql/src/Functions/ModificationOfParameterWithDefault.ql @@ -12,88 +12,12 @@ */ import python -import semmle.python.security.Paths - -predicate safe_method(string name) { - name = "count" or - name = "index" or - name = "copy" or - name = "get" or - name = "has_key" or - name = "items" or - name = "keys" or - name = "values" or - name = "iteritems" or - name = "iterkeys" or - name = "itervalues" or - name = "__contains__" or - name = "__getitem__" or - name = "__getattribute__" -} - -/** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */ -private boolean mutableDefaultValue(Parameter p) { - exists(Dict d | p.getDefault() = d | - exists(d.getAKey()) and result = true - or - not exists(d.getAKey()) and result = false - ) - or - exists(List l | p.getDefault() = l | - exists(l.getAnElt()) and result = true - or - not exists(l.getAnElt()) and result = false - ) -} - -class NonEmptyMutableValue extends TaintKind { - NonEmptyMutableValue() { this = "non-empty mutable value" } -} - -class EmptyMutableValue extends TaintKind { - EmptyMutableValue() { this = "empty mutable value" } - - override boolean booleanValue() { result = false } -} - -class MutableDefaultValue extends TaintSource { - boolean nonEmpty; - - MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.(NameNode).getNode()) } - - override string toString() { result = "mutable default value" } - - override predicate isSourceOf(TaintKind kind) { - nonEmpty = false and kind instanceof EmptyMutableValue - or - nonEmpty = true and kind instanceof NonEmptyMutableValue - } -} - -private ClassValue mutable_class() { - result = Value::named("list") or - result = Value::named("dict") -} - -class Mutation extends TaintSink { - Mutation() { - exists(AugAssign a | a.getTarget().getAFlowNode() = this) - or - exists(Call c, Attribute a | c.getFunc() = a | - a.getObject().getAFlowNode() = this and - not safe_method(a.getName()) and - this.(ControlFlowNode).pointsTo().getClass() = mutable_class() - ) - } - - override predicate sinks(TaintKind kind) { - kind instanceof EmptyMutableValue - or - kind instanceof NonEmptyMutableValue - } -} - -from TaintedPathSource src, TaintedPathSink sink -where src.flowsTo(sink) -select sink.getSink(), src, sink, "$@ flows to here and is mutated.", src.getSource(), +import semmle.python.functions.ModificationOfParameterWithDefault +import DataFlow::PathGraph + +from + ModificationOfParameterWithDefault::Configuration config, DataFlow::PathNode source, + DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "$@ flows to here and is mutated.", source.getNode(), "Default value" diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll new file mode 100644 index 000000000000..914eb7a473e0 --- /dev/null +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -0,0 +1,34 @@ +/** + * Provides a data-flow configuration for detecting modifications of a parameters default value. + * + * Note, for performance reasons: only import this file if + * `ModificationOfParameterWithDefault::Configuration` is needed, otherwise + * `ModificationOfParameterWithDefaultCustomizations` should be imported instead. + */ + +private import python +import semmle.python.dataflow.new.DataFlow + +/** + * Provides a data-flow configuration for detecting modifications of a parameters default value. + */ +module ModificationOfParameterWithDefault { + import ModificationOfParameterWithDefaultCustomizations::ModificationOfParameterWithDefault + + /** + * A data-flow configuration for detecting modifications of a parameters default value. + */ + class Configuration extends DataFlow::Configuration { + Configuration() { this = "ModificationOfParameterWithDefault" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier } + + override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { + guard instanceof BarrierGuard + } + } +} diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll new file mode 100644 index 000000000000..90ab83e6753c --- /dev/null +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -0,0 +1,90 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * modifications of a parameters default value, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.BarrierGuards + +/** + * Provides default sources, sinks and sanitizers for detecting + * "command injection" + * vulnerabilities, as well as extension points for adding your own. + */ +module ModificationOfParameterWithDefault { + /** + * A data flow source for detecting modifications of a parameters default value. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for detecting modifications of a parameters default value. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for detecting modifications of a parameters default value. + */ + abstract class Barrier extends DataFlow::Node { } + + /** + * A sanitizer guard for detecting modifications of a parameters default value. + */ + abstract class BarrierGuard extends DataFlow::BarrierGuard { } + + /** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */ + private boolean mutableDefaultValue(Parameter p) { + exists(Dict d | p.getDefault() = d | + exists(d.getAKey()) and result = true + or + not exists(d.getAKey()) and result = false + ) + or + exists(List l | p.getDefault() = l | + exists(l.getAnElt()) and result = true + or + not exists(l.getAnElt()) and result = false + ) + } + + /** + * A source of remote user input, considered as a flow source. + */ + class MutableDefaultValue extends Source { + boolean nonEmpty; + + MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.asCfgNode().(NameNode).getNode()) } + } + + predicate safe_method(string name) { + name = "count" or + name = "index" or + name = "copy" or + name = "get" or + name = "has_key" or + name = "items" or + name = "keys" or + name = "values" or + name = "iteritems" or + name = "iterkeys" or + name = "itervalues" or + name = "__contains__" or + name = "__getitem__" or + name = "__getattribute__" + } + + /** + * A mutation is considered a flow sink. + */ + class Mutation extends Sink { + Mutation() { + exists(AugAssign a | a.getTarget().getAFlowNode() = this.asCfgNode()) + or + exists(Call c, Attribute a | c.getFunc() = a | + a.getObject().getAFlowNode() = this.asCfgNode() and + not safe_method(a.getName()) + ) + } + } +} diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index bf51e172916e..3032f6b1f807 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -1,28 +1,39 @@ edges -| test.py:2:12:2:12 | empty mutable value | test.py:3:5:3:5 | empty mutable value | -| test.py:7:14:7:14 | empty mutable value | test.py:9:14:9:14 | empty mutable value | -| test.py:9:5:9:15 | empty mutable value | test.py:10:5:10:5 | empty mutable value | -| test.py:9:14:9:14 | empty mutable value | test.py:9:5:9:15 | empty mutable value | -| test.py:13:13:13:13 | empty mutable value | test.py:14:5:14:5 | empty mutable value | -| test.py:18:14:18:14 | empty mutable value | test.py:19:13:19:13 | empty mutable value | -| test.py:19:13:19:13 | empty mutable value | test.py:13:13:13:13 | empty mutable value | -| test.py:23:14:23:14 | non-empty mutable value | test.py:24:5:24:5 | non-empty mutable value | -| test.py:52:17:52:17 | empty mutable value | test.py:53:5:53:5 | empty mutable value | -| test.py:57:26:57:26 | non-empty mutable value | test.py:58:5:58:5 | non-empty mutable value | -| test.py:62:35:62:35 | non-empty mutable value | test.py:63:5:63:5 | non-empty mutable value | -| test.py:66:21:66:21 | empty mutable value | test.py:67:5:67:5 | empty mutable value | -| test.py:71:26:71:26 | empty mutable value | test.py:72:21:72:21 | empty mutable value | -| test.py:72:21:72:21 | empty mutable value | test.py:66:21:66:21 | empty mutable value | -| test.py:76:19:76:19 | empty mutable value | test.py:78:14:78:14 | empty mutable value | -| test.py:78:5:78:15 | empty mutable value | test.py:79:5:79:5 | empty mutable value | -| test.py:78:14:78:14 | empty mutable value | test.py:78:5:78:15 | empty mutable value | +| test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | +| test.py:13:13:13:13 | ControlFlowNode for l | test.py:14:5:14:5 | ControlFlowNode for l | +| test.py:18:14:18:14 | ControlFlowNode for l | test.py:19:13:19:13 | ControlFlowNode for l | +| test.py:19:13:19:13 | ControlFlowNode for l | test.py:13:13:13:13 | ControlFlowNode for l | +| test.py:23:14:23:14 | ControlFlowNode for l | test.py:24:5:24:5 | ControlFlowNode for l | +| test.py:52:17:52:17 | ControlFlowNode for d | test.py:53:5:53:5 | ControlFlowNode for d | +| test.py:57:26:57:26 | ControlFlowNode for d | test.py:58:5:58:5 | ControlFlowNode for d | +| test.py:62:35:62:35 | ControlFlowNode for d | test.py:63:5:63:5 | ControlFlowNode for d | +| test.py:66:21:66:21 | ControlFlowNode for d | test.py:67:5:67:5 | ControlFlowNode for d | +| test.py:71:26:71:26 | ControlFlowNode for d | test.py:72:21:72:21 | ControlFlowNode for d | +| test.py:72:21:72:21 | ControlFlowNode for d | test.py:66:21:66:21 | ControlFlowNode for d | +nodes +| test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:13:13:13:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:14:5:14:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:18:14:18:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:19:13:19:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:23:14:23:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:24:5:24:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:52:17:52:17 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:53:5:53:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:57:26:57:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:58:5:58:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:62:35:62:35 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:63:5:63:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:66:21:66:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:67:5:67:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:71:26:71:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:72:21:72:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | #select -| test.py:3:5:3:5 | l | test.py:2:12:2:12 | empty mutable value | test.py:3:5:3:5 | empty mutable value | $@ flows to here and is mutated. | test.py:2:12:2:12 | l | Default value | -| test.py:10:5:10:5 | x | test.py:7:14:7:14 | empty mutable value | test.py:10:5:10:5 | empty mutable value | $@ flows to here and is mutated. | test.py:7:14:7:14 | l | Default value | -| test.py:14:5:14:5 | l | test.py:18:14:18:14 | empty mutable value | test.py:14:5:14:5 | empty mutable value | $@ flows to here and is mutated. | test.py:18:14:18:14 | l | Default value | -| test.py:24:5:24:5 | l | test.py:23:14:23:14 | non-empty mutable value | test.py:24:5:24:5 | non-empty mutable value | $@ flows to here and is mutated. | test.py:23:14:23:14 | l | Default value | -| test.py:53:5:53:5 | d | test.py:52:17:52:17 | empty mutable value | test.py:53:5:53:5 | empty mutable value | $@ flows to here and is mutated. | test.py:52:17:52:17 | d | Default value | -| test.py:58:5:58:5 | d | test.py:57:26:57:26 | non-empty mutable value | test.py:58:5:58:5 | non-empty mutable value | $@ flows to here and is mutated. | test.py:57:26:57:26 | d | Default value | -| test.py:63:5:63:5 | d | test.py:62:35:62:35 | non-empty mutable value | test.py:63:5:63:5 | non-empty mutable value | $@ flows to here and is mutated. | test.py:62:35:62:35 | d | Default value | -| test.py:67:5:67:5 | d | test.py:71:26:71:26 | empty mutable value | test.py:67:5:67:5 | empty mutable value | $@ flows to here and is mutated. | test.py:71:26:71:26 | d | Default value | -| test.py:79:5:79:5 | x | test.py:76:19:76:19 | empty mutable value | test.py:79:5:79:5 | empty mutable value | $@ flows to here and is mutated. | test.py:76:19:76:19 | d | Default value | +| test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | +| test.py:14:5:14:5 | ControlFlowNode for l | test.py:18:14:18:14 | ControlFlowNode for l | test.py:14:5:14:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:18:14:18:14 | ControlFlowNode for l | Default value | +| test.py:24:5:24:5 | ControlFlowNode for l | test.py:23:14:23:14 | ControlFlowNode for l | test.py:24:5:24:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:23:14:23:14 | ControlFlowNode for l | Default value | +| test.py:53:5:53:5 | ControlFlowNode for d | test.py:52:17:52:17 | ControlFlowNode for d | test.py:53:5:53:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:52:17:52:17 | ControlFlowNode for d | Default value | +| test.py:58:5:58:5 | ControlFlowNode for d | test.py:57:26:57:26 | ControlFlowNode for d | test.py:58:5:58:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:57:26:57:26 | ControlFlowNode for d | Default value | +| test.py:63:5:63:5 | ControlFlowNode for d | test.py:62:35:62:35 | ControlFlowNode for d | test.py:63:5:63:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:62:35:62:35 | ControlFlowNode for d | Default value | +| test.py:67:5:67:5 | ControlFlowNode for d | test.py:71:26:71:26 | ControlFlowNode for d | test.py:67:5:67:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:71:26:71:26 | ControlFlowNode for d | Default value | diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 239492f8ead5..81319ed909f0 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -7,7 +7,7 @@ def simple(l = []): def includes(l = []): x = [0] x.extend(l) - x.extend([1]) # FP + x.extend([1]) return x def extends(l): @@ -76,5 +76,5 @@ def dict_deferred_method(d = {}): def dict_includes(d = {}): x = {} x.update(d) - x.update({'a': 1}) # FP + x.update({'a': 1}) return x From 5bff5188ac32a656a1204eef6f5351ffe430a78e Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 25 Aug 2021 23:52:42 +0200 Subject: [PATCH 03/24] Python: switch from negative to positive list This should avoid potentially terrible performance. Also noted the missing syntactic constructs, as I went through the documnetation. --- ...onOfParameterWithDefaultCustomizations.qll | 56 ++++++++++++------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 90ab83e6753c..c9be49b1feef 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -57,33 +57,51 @@ module ModificationOfParameterWithDefault { MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.asCfgNode().(NameNode).getNode()) } } - predicate safe_method(string name) { - name = "count" or - name = "index" or - name = "copy" or - name = "get" or - name = "has_key" or - name = "items" or - name = "keys" or - name = "values" or - name = "iteritems" or - name = "iterkeys" or - name = "itervalues" or - name = "__contains__" or - name = "__getitem__" or - name = "__getattribute__" + /** + * A name of a list function that modifies the list. + * See https://docs.python.org/3/tutorial/datastructures.html#more-on-lists + */ + string list_modifying_method() { + result in ["append", "extend", "insert", "remove", "pop", "clear", "sort", "reverse"] } /** - * A mutation is considered a flow sink. + * A name of a dict function that modifies the dict. + * See https://docs.python.org/3/library/stdtypes.html#dict + */ + string dict_modifying_method() { result in ["clear", "pop", "popitem", "setdefault", "update"] } + + /** + * A mutation of the default value is a flow sink. + * + * Syntactic constructs that modify a list are: + * - s[i] = x + * - s[i:j] = t + * - del s[i:j] + * - s[i:j:k] = t + * - del s[i:j:k] + * - s += t + * - s *= n + * See https://docs.python.org/3/library/stdtypes.html#mutable-sequence-types + * + * Syntactic constructs that modify a dictionary are: + * - d[key] = value + * - del d[key] + * - d |= other + * See https://docs.python.org/3/library/stdtypes.html#dict + * + * These are all covered by: + * - assignment to a subscript (includes slices) + * - deletion of a subscript + * - augmented assignment to the value */ class Mutation extends Sink { Mutation() { exists(AugAssign a | a.getTarget().getAFlowNode() = this.asCfgNode()) or - exists(Call c, Attribute a | c.getFunc() = a | - a.getObject().getAFlowNode() = this.asCfgNode() and - not safe_method(a.getName()) + exists(DataFlow::CallCfgNode c, DataFlow::AttrRead a | c.getFunction() = a | + a.getObject() = this and + a.getAttributeName() in [list_modifying_method(), dict_modifying_method()] ) } } From 8614563b4284b293df0cd9cf71ac1a12fbdd80ee Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 26 Aug 2021 10:56:41 +0200 Subject: [PATCH 04/24] Python: More tests of syntactic constructs --- ...odificationOfParameterWithDefault.expected | 88 +++++++++++-------- .../test.py | 44 +++++++++- 2 files changed, 95 insertions(+), 37 deletions(-) diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index 3032f6b1f807..6bd668fe43f2 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -1,39 +1,55 @@ edges -| test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | -| test.py:13:13:13:13 | ControlFlowNode for l | test.py:14:5:14:5 | ControlFlowNode for l | -| test.py:18:14:18:14 | ControlFlowNode for l | test.py:19:13:19:13 | ControlFlowNode for l | -| test.py:19:13:19:13 | ControlFlowNode for l | test.py:13:13:13:13 | ControlFlowNode for l | -| test.py:23:14:23:14 | ControlFlowNode for l | test.py:24:5:24:5 | ControlFlowNode for l | -| test.py:52:17:52:17 | ControlFlowNode for d | test.py:53:5:53:5 | ControlFlowNode for d | -| test.py:57:26:57:26 | ControlFlowNode for d | test.py:58:5:58:5 | ControlFlowNode for d | -| test.py:62:35:62:35 | ControlFlowNode for d | test.py:63:5:63:5 | ControlFlowNode for d | -| test.py:66:21:66:21 | ControlFlowNode for d | test.py:67:5:67:5 | ControlFlowNode for d | -| test.py:71:26:71:26 | ControlFlowNode for d | test.py:72:21:72:21 | ControlFlowNode for d | -| test.py:72:21:72:21 | ControlFlowNode for d | test.py:66:21:66:21 | ControlFlowNode for d | +| test.py:17:15:17:15 | ControlFlowNode for l | test.py:18:5:18:5 | ControlFlowNode for l | +| test.py:22:15:22:15 | ControlFlowNode for l | test.py:23:5:23:5 | ControlFlowNode for l | +| test.py:27:12:27:12 | ControlFlowNode for l | test.py:28:5:28:5 | ControlFlowNode for l | +| test.py:38:13:38:13 | ControlFlowNode for l | test.py:39:5:39:5 | ControlFlowNode for l | +| test.py:43:14:43:14 | ControlFlowNode for l | test.py:44:13:44:13 | ControlFlowNode for l | +| test.py:44:13:44:13 | ControlFlowNode for l | test.py:38:13:38:13 | ControlFlowNode for l | +| test.py:48:14:48:14 | ControlFlowNode for l | test.py:49:5:49:5 | ControlFlowNode for l | +| test.py:77:17:77:17 | ControlFlowNode for d | test.py:78:5:78:5 | ControlFlowNode for d | +| test.py:82:26:82:26 | ControlFlowNode for d | test.py:83:5:83:5 | ControlFlowNode for d | +| test.py:87:35:87:35 | ControlFlowNode for d | test.py:88:5:88:5 | ControlFlowNode for d | +| test.py:91:21:91:21 | ControlFlowNode for d | test.py:92:5:92:5 | ControlFlowNode for d | +| test.py:96:26:96:26 | ControlFlowNode for d | test.py:97:21:97:21 | ControlFlowNode for d | +| test.py:97:21:97:21 | ControlFlowNode for d | test.py:91:21:91:21 | ControlFlowNode for d | +| test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | +| test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | nodes -| test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:13:13:13:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:14:5:14:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:18:14:18:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:19:13:19:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:23:14:23:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:24:5:24:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:52:17:52:17 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:53:5:53:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:57:26:57:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:58:5:58:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:62:35:62:35 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:63:5:63:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:66:21:66:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:67:5:67:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:71:26:71:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:72:21:72:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:17:15:17:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:18:5:18:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:22:15:22:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:23:5:23:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:27:12:27:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:28:5:28:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:38:13:38:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:39:5:39:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:43:14:43:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:44:13:44:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:48:14:48:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:49:5:49:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:77:17:77:17 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:78:5:78:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:82:26:82:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:83:5:83:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:87:35:87:35 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:88:5:88:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:91:21:91:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:92:5:92:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:96:26:96:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:97:21:97:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:113:20:113:20 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:115:5:115:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:119:29:119:29 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:121:5:121:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | #select -| test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | -| test.py:14:5:14:5 | ControlFlowNode for l | test.py:18:14:18:14 | ControlFlowNode for l | test.py:14:5:14:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:18:14:18:14 | ControlFlowNode for l | Default value | -| test.py:24:5:24:5 | ControlFlowNode for l | test.py:23:14:23:14 | ControlFlowNode for l | test.py:24:5:24:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:23:14:23:14 | ControlFlowNode for l | Default value | -| test.py:53:5:53:5 | ControlFlowNode for d | test.py:52:17:52:17 | ControlFlowNode for d | test.py:53:5:53:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:52:17:52:17 | ControlFlowNode for d | Default value | -| test.py:58:5:58:5 | ControlFlowNode for d | test.py:57:26:57:26 | ControlFlowNode for d | test.py:58:5:58:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:57:26:57:26 | ControlFlowNode for d | Default value | -| test.py:63:5:63:5 | ControlFlowNode for d | test.py:62:35:62:35 | ControlFlowNode for d | test.py:63:5:63:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:62:35:62:35 | ControlFlowNode for d | Default value | -| test.py:67:5:67:5 | ControlFlowNode for d | test.py:71:26:71:26 | ControlFlowNode for d | test.py:67:5:67:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:71:26:71:26 | ControlFlowNode for d | Default value | +| test.py:18:5:18:5 | ControlFlowNode for l | test.py:17:15:17:15 | ControlFlowNode for l | test.py:18:5:18:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:17:15:17:15 | ControlFlowNode for l | Default value | +| test.py:23:5:23:5 | ControlFlowNode for l | test.py:22:15:22:15 | ControlFlowNode for l | test.py:23:5:23:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:22:15:22:15 | ControlFlowNode for l | Default value | +| test.py:28:5:28:5 | ControlFlowNode for l | test.py:27:12:27:12 | ControlFlowNode for l | test.py:28:5:28:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:27:12:27:12 | ControlFlowNode for l | Default value | +| test.py:39:5:39:5 | ControlFlowNode for l | test.py:43:14:43:14 | ControlFlowNode for l | test.py:39:5:39:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:43:14:43:14 | ControlFlowNode for l | Default value | +| test.py:49:5:49:5 | ControlFlowNode for l | test.py:48:14:48:14 | ControlFlowNode for l | test.py:49:5:49:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:48:14:48:14 | ControlFlowNode for l | Default value | +| test.py:78:5:78:5 | ControlFlowNode for d | test.py:77:17:77:17 | ControlFlowNode for d | test.py:78:5:78:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:77:17:77:17 | ControlFlowNode for d | Default value | +| test.py:83:5:83:5 | ControlFlowNode for d | test.py:82:26:82:26 | ControlFlowNode for d | test.py:83:5:83:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:82:26:82:26 | ControlFlowNode for d | Default value | +| test.py:88:5:88:5 | ControlFlowNode for d | test.py:87:35:87:35 | ControlFlowNode for d | test.py:88:5:88:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:87:35:87:35 | ControlFlowNode for d | Default value | +| test.py:92:5:92:5 | ControlFlowNode for d | test.py:96:26:96:26 | ControlFlowNode for d | test.py:92:5:92:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:96:26:96:26 | ControlFlowNode for d | Default value | +| test.py:115:5:115:5 | ControlFlowNode for d | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:113:20:113:20 | ControlFlowNode for d | Default value | +| test.py:121:5:121:5 | ControlFlowNode for d | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:119:29:119:29 | ControlFlowNode for d | Default value | diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 81319ed909f0..60c3ff01b545 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -1,5 +1,30 @@ # Not OK -def simple(l = []): +def simple(l = [0]): + l[0] = 1 # FN + return l + +# Not OK +def slice(l = [0]): + l[0:1] = 1 # FN + return l + +# Not OK +def list_del(l = [0]): + del l[0] # FN + return l + +# Not OK +def append_op(l = []): + l += 1 + return l + +# Not OK +def repeat_op(l = [0]): + l *= 3 + return l + +# Not OK +def append(l = []): l.append(1) return l @@ -78,3 +103,20 @@ def dict_includes(d = {}): x.update(d) x.update({'a': 1}) return x + +# Not OK +def dict_del(d = {'a': 1}): + del d['a'] # FN + return d + +# Not OK +def dict_update_op(d = {}): + x = {'a': 1} + d |= x + return d + +# OK +def dict_update_op_nochange(d = {}): + x = {} + d |= x # FP + return d From d834cec9b93f5aaa9b5087cb4dd834070449fc03 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 26 Aug 2021 11:31:20 +0200 Subject: [PATCH 05/24] Python: test simple sanitizer --- .../ModificationOfParameterWithDefault.expected | 4 ++++ .../Functions/ModificationOfParameterWithDefault/test.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index 6bd668fe43f2..c07a609dcebd 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -14,6 +14,7 @@ edges | test.py:97:21:97:21 | ControlFlowNode for d | test.py:91:21:91:21 | ControlFlowNode for d | | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | +| test.py:125:15:125:15 | ControlFlowNode for l | test.py:127:9:127:9 | ControlFlowNode for l | nodes | test.py:17:15:17:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:18:5:18:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | @@ -41,6 +42,8 @@ nodes | test.py:115:5:115:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:121:5:121:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:125:15:125:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:127:9:127:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | #select | test.py:18:5:18:5 | ControlFlowNode for l | test.py:17:15:17:15 | ControlFlowNode for l | test.py:18:5:18:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:17:15:17:15 | ControlFlowNode for l | Default value | | test.py:23:5:23:5 | ControlFlowNode for l | test.py:22:15:22:15 | ControlFlowNode for l | test.py:23:5:23:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:22:15:22:15 | ControlFlowNode for l | Default value | @@ -53,3 +56,4 @@ nodes | test.py:92:5:92:5 | ControlFlowNode for d | test.py:96:26:96:26 | ControlFlowNode for d | test.py:92:5:92:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:96:26:96:26 | ControlFlowNode for d | Default value | | test.py:115:5:115:5 | ControlFlowNode for d | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:113:20:113:20 | ControlFlowNode for d | Default value | | test.py:121:5:121:5 | ControlFlowNode for d | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:119:29:119:29 | ControlFlowNode for d | Default value | +| test.py:127:9:127:9 | ControlFlowNode for l | test.py:125:15:125:15 | ControlFlowNode for l | test.py:127:9:127:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:125:15:125:15 | ControlFlowNode for l | Default value | diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 60c3ff01b545..25ad530310e4 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -120,3 +120,9 @@ def dict_update_op_nochange(d = {}): x = {} d |= x # FP return d + +# OK +def sanitizer(l = []): + if not l == []: + l.append(1) # FP + return l From 097c23e4379ed184320a908beada05869c81ff3c Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 26 Aug 2021 14:08:52 +0200 Subject: [PATCH 06/24] Python: add inline expectations test Consider removing the original test --- .../test.expected | 0 .../test.py | 38 +++++++++---------- .../test.ql | 24 ++++++++++++ 3 files changed, 43 insertions(+), 19 deletions(-) create mode 100644 python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.expected create mode 100644 python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 25ad530310e4..3c656611c59e 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -1,31 +1,31 @@ # Not OK def simple(l = [0]): - l[0] = 1 # FN + l[0] = 1 #$ MISSING: modification=l return l # Not OK def slice(l = [0]): - l[0:1] = 1 # FN + l[0:1] = 1 #$ MISSING: modification=l return l # Not OK def list_del(l = [0]): - del l[0] # FN + del l[0] #$ MISSING: modification=l return l # Not OK def append_op(l = []): - l += 1 + l += 1 #$ modification=l return l # Not OK def repeat_op(l = [0]): - l *= 3 + l *= 3 #$ modification=l return l # Not OK def append(l = []): - l.append(1) + l.append(1) #$ modification=l return l # OK @@ -36,7 +36,7 @@ def includes(l = []): return x def extends(l): - l.extend([1]) + l.extend([1]) #$ modification=l return l # Not OK @@ -46,17 +46,17 @@ def deferred(l = []): # Not OK def nonempty(l = [5]): - l.append(1) + l.append(1) #$ modification=l return l # Not OK def dict(d = {}): - d['a'] = 1 # FN + d['a'] = 1 #$ MISSING: modification=d return d # Not OK def dict_nonempty(d = {'a': 1}): - d['a'] = 2 # FN + d['a'] = 2 #$ MISSING: modification=d return d # OK @@ -65,7 +65,7 @@ def dict_nonempty_nochange(d = {'a': 1}): return d def modifies(d): - d['a'] = 1 # FN + d['a'] = 1 #$ MISSING: modification=d return d # Not OK @@ -75,21 +75,21 @@ def dict_deferred(d = {}): # Not OK def dict_method(d = {}): - d.update({'a': 1}) + d.update({'a': 1}) #$ modification=d return d # Not OK def dict_method_nonempty(d = {'a': 1}): - d.update({'a': 2}) + d.update({'a': 2}) #$ modification=d return d # OK def dict_method_nonempty_nochange(d = {'a': 1}): - d.update({'a': 1}) # FP + d.update({'a': 1}) #$ SPURIOUS:modification=d return d def modifies_method(d): - d.update({'a': 1}) + d.update({'a': 1}) #$ modification=d return d # Not OK @@ -106,23 +106,23 @@ def dict_includes(d = {}): # Not OK def dict_del(d = {'a': 1}): - del d['a'] # FN + del d['a'] #$ MISSING: modification=d return d # Not OK def dict_update_op(d = {}): x = {'a': 1} - d |= x + d |= x #$ modification=d return d # OK def dict_update_op_nochange(d = {}): x = {} - d |= x # FP + d |= x #$ SPURIOUS: modification=d return d # OK def sanitizer(l = []): if not l == []: - l.append(1) # FP + l.append(1) #$ SPURIOUS: modification=l return l diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql new file mode 100644 index 000000000000..cac05eebd22f --- /dev/null +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql @@ -0,0 +1,24 @@ +import python +import semmle.python.dataflow.new.DataFlow +import TestUtilities.InlineExpectationsTest +import experimental.dataflow.TestUtil.PrintNode +import semmle.python.functions.ModificationOfParameterWithDefault + +class ModificationOfParameterWithDefaultTest extends InlineExpectationsTest { + ModificationOfParameterWithDefaultTest() { this = "ModificationOfParameterWithDefaultTest" } + + override string getARelevantTag() { result = "modification" } + + predicate relevant_node(DataFlow::Node sink) { + exists(ModificationOfParameterWithDefault::Configuration cfg | cfg.hasFlowTo(sink)) + } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlow::Node n | relevant_node(n) | + n.getLocation() = location and + tag = "modification" and + value = prettyNode(n) and + element = n.toString() + ) + } +} From 49ae549e892b20e254c3fe169638196d5d171a54 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 26 Aug 2021 14:29:18 +0200 Subject: [PATCH 07/24] Python: Implement modifying syntax --- ...ationOfParameterWithDefaultCustomizations.qll | 8 ++++++++ .../ModificationOfParameterWithDefault/test.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index c9be49b1feef..12255c5a6413 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -97,8 +97,16 @@ module ModificationOfParameterWithDefault { */ class Mutation extends Sink { Mutation() { + // assignment to a subscript (includes slices) + exists(DefinitionNode d | d.(SubscriptNode).getObject() = this.asCfgNode()) + or + // deletion of a subscript + exists(DeletionNode d | d.getTarget().(SubscriptNode).getObject() = this.asCfgNode()) + or + // augmented assignment to the value exists(AugAssign a | a.getTarget().getAFlowNode() = this.asCfgNode()) or + // modifying function call exists(DataFlow::CallCfgNode c, DataFlow::AttrRead a | c.getFunction() = a | a.getObject() = this and a.getAttributeName() in [list_modifying_method(), dict_modifying_method()] diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 3c656611c59e..09ad7ed986ca 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -1,16 +1,16 @@ # Not OK def simple(l = [0]): - l[0] = 1 #$ MISSING: modification=l + l[0] = 1 #$ modification=l return l # Not OK def slice(l = [0]): - l[0:1] = 1 #$ MISSING: modification=l + l[0:1] = 1 #$ modification=l return l # Not OK def list_del(l = [0]): - del l[0] #$ MISSING: modification=l + del l[0] #$ modification=l return l # Not OK @@ -51,21 +51,21 @@ def nonempty(l = [5]): # Not OK def dict(d = {}): - d['a'] = 1 #$ MISSING: modification=d + d['a'] = 1 #$ modification=d return d # Not OK def dict_nonempty(d = {'a': 1}): - d['a'] = 2 #$ MISSING: modification=d + d['a'] = 2 #$ modification=d return d # OK def dict_nonempty_nochange(d = {'a': 1}): - d['a'] = 1 + d['a'] = 1 #$ SPURIOUS: modification=d return d def modifies(d): - d['a'] = 1 #$ MISSING: modification=d + d['a'] = 1 #$ modification=d return d # Not OK @@ -106,7 +106,7 @@ def dict_includes(d = {}): # Not OK def dict_del(d = {'a': 1}): - del d['a'] #$ MISSING: modification=d + del d['a'] #$ modification=d return d # Not OK From a762373ad6565c5af8cfa987b6c105a414ada26f Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 30 Aug 2021 11:04:27 +0200 Subject: [PATCH 08/24] Python: Implement simple barrier guard The one found in the original test case --- .../ModificationOfParameterWithDefault.qll | 11 ++- ...onOfParameterWithDefaultCustomizations.qll | 53 +++++++++++++- ...odificationOfParameterWithDefault.expected | 36 ++++++++++ ...odificationOfParameterWithDefault.expected | 70 +++++++++++++------ 4 files changed, 143 insertions(+), 27 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll index 914eb7a473e0..82acd10c3d79 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -19,16 +19,21 @@ module ModificationOfParameterWithDefault { * A data-flow configuration for detecting modifications of a parameters default value. */ class Configuration extends DataFlow::Configuration { - Configuration() { this = "ModificationOfParameterWithDefault" } + boolean nonEmpty; - override predicate isSource(DataFlow::Node source) { source instanceof Source } + Configuration() { + nonEmpty in [true, false] and + this = "ModificationOfParameterWithDefault:" + nonEmpty.toString() + } + + override predicate isSource(DataFlow::Node source) { source.(Source).isNonEmpty() = nonEmpty } override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier } override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - guard instanceof BarrierGuard + guard.(BarrierGuard).blocksNonEmpty() = nonEmpty } } } diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 12255c5a6413..e7be292513eb 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -16,7 +16,9 @@ module ModificationOfParameterWithDefault { /** * A data flow source for detecting modifications of a parameters default value. */ - abstract class Source extends DataFlow::Node { } + abstract class Source extends DataFlow::Node { + abstract boolean isNonEmpty(); + } /** * A data flow sink for detecting modifications of a parameters default value. @@ -31,7 +33,9 @@ module ModificationOfParameterWithDefault { /** * A sanitizer guard for detecting modifications of a parameters default value. */ - abstract class BarrierGuard extends DataFlow::BarrierGuard { } + abstract class BarrierGuard extends DataFlow::BarrierGuard { + abstract boolean blocksNonEmpty(); + } /** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */ private boolean mutableDefaultValue(Parameter p) { @@ -55,6 +59,8 @@ module ModificationOfParameterWithDefault { boolean nonEmpty; MutableDefaultValue() { nonEmpty = mutableDefaultValue(this.asCfgNode().(NameNode).getNode()) } + + override boolean isNonEmpty() { result = nonEmpty } } /** @@ -113,4 +119,47 @@ module ModificationOfParameterWithDefault { ) } } + + private class IdentityGuarded extends Expr { + boolean inverted; + + IdentityGuarded() { + this = any(If i).getTest() and + inverted = false + or + exists(IdentityGuarded ig, UnaryExpr notExp | + notExp.getOp() instanceof Not and + ig = notExp and + notExp.getOperand() = this + | + inverted = ig.isInverted().booleanNot() + ) + } + + boolean isInverted() { result = inverted } + } + + class IdentityGuard extends BarrierGuard { + ControlFlowNode checked_node; + boolean safe_branch; + boolean nonEmpty; + + IdentityGuard() { + nonEmpty in [true, false] and + exists(IdentityGuarded ig | + this.getNode() = ig and + checked_node = this and + // The raw guard is true if the value is non-empty + // So we are safe either if we are looking for a non-empty value + // or if we are looking for an empty value and the guard is inverted. + safe_branch = ig.isInverted().booleanXor(nonEmpty) + ) + } + + override predicate checks(ControlFlowNode node, boolean branch) { + node = checked_node and branch = safe_branch + } + + override boolean blocksNonEmpty() { result = nonEmpty } + } } diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index c07a609dcebd..f5c6f3a6baa3 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -1,4 +1,7 @@ edges +| test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | +| test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | +| test.py:12:14:12:14 | ControlFlowNode for l | test.py:13:9:13:9 | ControlFlowNode for l | | test.py:17:15:17:15 | ControlFlowNode for l | test.py:18:5:18:5 | ControlFlowNode for l | | test.py:22:15:22:15 | ControlFlowNode for l | test.py:23:5:23:5 | ControlFlowNode for l | | test.py:27:12:27:12 | ControlFlowNode for l | test.py:28:5:28:5 | ControlFlowNode for l | @@ -6,16 +9,29 @@ edges | test.py:43:14:43:14 | ControlFlowNode for l | test.py:44:13:44:13 | ControlFlowNode for l | | test.py:44:13:44:13 | ControlFlowNode for l | test.py:38:13:38:13 | ControlFlowNode for l | | test.py:48:14:48:14 | ControlFlowNode for l | test.py:49:5:49:5 | ControlFlowNode for l | +| test.py:53:10:53:10 | ControlFlowNode for d | test.py:54:5:54:5 | ControlFlowNode for d | +| test.py:58:19:58:19 | ControlFlowNode for d | test.py:59:5:59:5 | ControlFlowNode for d | +| test.py:63:28:63:28 | ControlFlowNode for d | test.py:64:5:64:5 | ControlFlowNode for d | +| test.py:67:14:67:14 | ControlFlowNode for d | test.py:68:5:68:5 | ControlFlowNode for d | +| test.py:72:19:72:19 | ControlFlowNode for d | test.py:73:14:73:14 | ControlFlowNode for d | +| test.py:73:14:73:14 | ControlFlowNode for d | test.py:67:14:67:14 | ControlFlowNode for d | | test.py:77:17:77:17 | ControlFlowNode for d | test.py:78:5:78:5 | ControlFlowNode for d | | test.py:82:26:82:26 | ControlFlowNode for d | test.py:83:5:83:5 | ControlFlowNode for d | | test.py:87:35:87:35 | ControlFlowNode for d | test.py:88:5:88:5 | ControlFlowNode for d | | test.py:91:21:91:21 | ControlFlowNode for d | test.py:92:5:92:5 | ControlFlowNode for d | | test.py:96:26:96:26 | ControlFlowNode for d | test.py:97:21:97:21 | ControlFlowNode for d | | test.py:97:21:97:21 | ControlFlowNode for d | test.py:91:21:91:21 | ControlFlowNode for d | +| test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | | test.py:125:15:125:15 | ControlFlowNode for l | test.py:127:9:127:9 | ControlFlowNode for l | nodes +| test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:7:11:7:11 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:8:5:8:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:12:14:12:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:13:9:13:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:17:15:17:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:18:5:18:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:22:15:22:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | @@ -28,6 +44,16 @@ nodes | test.py:44:13:44:13 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:48:14:48:14 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:49:5:49:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:53:10:53:10 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:54:5:54:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:58:19:58:19 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:59:5:59:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:63:28:63:28 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:64:5:64:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:67:14:67:14 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:68:5:68:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:72:19:72:19 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:73:14:73:14 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:77:17:77:17 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:78:5:78:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:82:26:82:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | @@ -38,6 +64,8 @@ nodes | test.py:92:5:92:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:96:26:96:26 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:97:21:97:21 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:108:14:108:14 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | +| test.py:109:9:109:9 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:113:20:113:20 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:115:5:115:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | @@ -45,15 +73,23 @@ nodes | test.py:125:15:125:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:127:9:127:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | #select +| test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | +| test.py:8:5:8:5 | ControlFlowNode for l | test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:7:11:7:11 | ControlFlowNode for l | Default value | +| test.py:13:9:13:9 | ControlFlowNode for l | test.py:12:14:12:14 | ControlFlowNode for l | test.py:13:9:13:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:12:14:12:14 | ControlFlowNode for l | Default value | | test.py:18:5:18:5 | ControlFlowNode for l | test.py:17:15:17:15 | ControlFlowNode for l | test.py:18:5:18:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:17:15:17:15 | ControlFlowNode for l | Default value | | test.py:23:5:23:5 | ControlFlowNode for l | test.py:22:15:22:15 | ControlFlowNode for l | test.py:23:5:23:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:22:15:22:15 | ControlFlowNode for l | Default value | | test.py:28:5:28:5 | ControlFlowNode for l | test.py:27:12:27:12 | ControlFlowNode for l | test.py:28:5:28:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:27:12:27:12 | ControlFlowNode for l | Default value | | test.py:39:5:39:5 | ControlFlowNode for l | test.py:43:14:43:14 | ControlFlowNode for l | test.py:39:5:39:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:43:14:43:14 | ControlFlowNode for l | Default value | | test.py:49:5:49:5 | ControlFlowNode for l | test.py:48:14:48:14 | ControlFlowNode for l | test.py:49:5:49:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:48:14:48:14 | ControlFlowNode for l | Default value | +| test.py:54:5:54:5 | ControlFlowNode for d | test.py:53:10:53:10 | ControlFlowNode for d | test.py:54:5:54:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:53:10:53:10 | ControlFlowNode for d | Default value | +| test.py:59:5:59:5 | ControlFlowNode for d | test.py:58:19:58:19 | ControlFlowNode for d | test.py:59:5:59:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:58:19:58:19 | ControlFlowNode for d | Default value | +| test.py:64:5:64:5 | ControlFlowNode for d | test.py:63:28:63:28 | ControlFlowNode for d | test.py:64:5:64:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:63:28:63:28 | ControlFlowNode for d | Default value | +| test.py:68:5:68:5 | ControlFlowNode for d | test.py:72:19:72:19 | ControlFlowNode for d | test.py:68:5:68:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:72:19:72:19 | ControlFlowNode for d | Default value | | test.py:78:5:78:5 | ControlFlowNode for d | test.py:77:17:77:17 | ControlFlowNode for d | test.py:78:5:78:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:77:17:77:17 | ControlFlowNode for d | Default value | | test.py:83:5:83:5 | ControlFlowNode for d | test.py:82:26:82:26 | ControlFlowNode for d | test.py:83:5:83:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:82:26:82:26 | ControlFlowNode for d | Default value | | test.py:88:5:88:5 | ControlFlowNode for d | test.py:87:35:87:35 | ControlFlowNode for d | test.py:88:5:88:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:87:35:87:35 | ControlFlowNode for d | Default value | | test.py:92:5:92:5 | ControlFlowNode for d | test.py:96:26:96:26 | ControlFlowNode for d | test.py:92:5:92:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:96:26:96:26 | ControlFlowNode for d | Default value | +| test.py:109:9:109:9 | ControlFlowNode for d | test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:108:14:108:14 | ControlFlowNode for d | Default value | | test.py:115:5:115:5 | ControlFlowNode for d | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:113:20:113:20 | ControlFlowNode for d | Default value | | test.py:121:5:121:5 | ControlFlowNode for d | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:119:29:119:29 | ControlFlowNode for d | Default value | | test.py:127:9:127:9 | ControlFlowNode for l | test.py:125:15:125:15 | ControlFlowNode for l | test.py:127:9:127:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:125:15:125:15 | ControlFlowNode for l | Default value | diff --git a/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected index 08586d02c100..c9460845e295 100644 --- a/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected @@ -1,24 +1,50 @@ edges -| functions_test.py:39:9:39:9 | empty mutable value | functions_test.py:40:5:40:5 | empty mutable value | -| functions_test.py:133:15:133:15 | empty mutable value | functions_test.py:134:5:134:5 | empty mutable value | -| functions_test.py:151:25:151:25 | empty mutable value | functions_test.py:152:5:152:5 | empty mutable value | -| functions_test.py:154:21:154:21 | empty mutable value | functions_test.py:155:5:155:5 | empty mutable value | -| functions_test.py:157:27:157:27 | empty mutable value | functions_test.py:158:25:158:25 | empty mutable value | -| functions_test.py:157:27:157:27 | empty mutable value | functions_test.py:159:21:159:21 | empty mutable value | -| functions_test.py:158:25:158:25 | empty mutable value | functions_test.py:151:25:151:25 | empty mutable value | -| functions_test.py:159:21:159:21 | empty mutable value | functions_test.py:154:21:154:21 | empty mutable value | -| functions_test.py:175:28:175:28 | non-empty mutable value | functions_test.py:179:9:179:9 | non-empty mutable value | -| functions_test.py:175:28:175:28 | non-empty mutable value | functions_test.py:181:9:181:9 | non-empty mutable value | -| functions_test.py:188:18:188:18 | non-empty mutable value | functions_test.py:189:28:189:28 | non-empty mutable value | -| functions_test.py:189:28:189:28 | non-empty mutable value | functions_test.py:175:28:175:28 | non-empty mutable value | -| functions_test.py:191:18:191:18 | non-empty mutable value | functions_test.py:192:28:192:28 | non-empty mutable value | -| functions_test.py:192:28:192:28 | non-empty mutable value | functions_test.py:175:28:175:28 | non-empty mutable value | +| functions_test.py:39:9:39:9 | ControlFlowNode for x | functions_test.py:40:5:40:5 | ControlFlowNode for x | +| functions_test.py:133:15:133:15 | ControlFlowNode for x | functions_test.py:134:5:134:5 | ControlFlowNode for x | +| functions_test.py:151:25:151:25 | ControlFlowNode for x | functions_test.py:152:5:152:5 | ControlFlowNode for x | +| functions_test.py:154:21:154:21 | ControlFlowNode for x | functions_test.py:155:5:155:5 | ControlFlowNode for x | +| functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:158:25:158:25 | ControlFlowNode for y | +| functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:159:21:159:21 | ControlFlowNode for y | +| functions_test.py:158:25:158:25 | ControlFlowNode for y | functions_test.py:151:25:151:25 | ControlFlowNode for x | +| functions_test.py:159:21:159:21 | ControlFlowNode for y | functions_test.py:154:21:154:21 | ControlFlowNode for x | +| functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | +| functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | +| functions_test.py:175:28:175:28 | ControlFlowNode for x | functions_test.py:179:9:179:9 | ControlFlowNode for x | +| functions_test.py:175:28:175:28 | ControlFlowNode for x | functions_test.py:181:9:181:9 | ControlFlowNode for x | +| functions_test.py:188:18:188:18 | ControlFlowNode for x | functions_test.py:189:28:189:28 | ControlFlowNode for x | +| functions_test.py:189:28:189:28 | ControlFlowNode for x | functions_test.py:175:28:175:28 | ControlFlowNode for x | +| functions_test.py:191:18:191:18 | ControlFlowNode for x | functions_test.py:192:28:192:28 | ControlFlowNode for x | +| functions_test.py:192:28:192:28 | ControlFlowNode for x | functions_test.py:175:28:175:28 | ControlFlowNode for x | +nodes +| functions_test.py:39:9:39:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:40:5:40:5 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:133:15:133:15 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:134:5:134:5 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:151:25:151:25 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:152:5:152:5 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:154:21:154:21 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:155:5:155:5 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:157:27:157:27 | ControlFlowNode for y | semmle.label | ControlFlowNode for y | +| functions_test.py:158:25:158:25 | ControlFlowNode for y | semmle.label | ControlFlowNode for y | +| functions_test.py:159:21:159:21 | ControlFlowNode for y | semmle.label | ControlFlowNode for y | +| functions_test.py:166:21:166:25 | ControlFlowNode for param | semmle.label | ControlFlowNode for param | +| functions_test.py:170:9:170:13 | ControlFlowNode for param | semmle.label | ControlFlowNode for param | +| functions_test.py:170:9:170:13 | ControlFlowNode for param | semmle.label | ControlFlowNode for param | +| functions_test.py:175:28:175:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:179:9:179:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:181:9:181:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:188:18:188:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:189:28:189:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:191:18:191:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:192:28:192:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | #select -| functions_test.py:40:5:40:5 | x | functions_test.py:39:9:39:9 | empty mutable value | functions_test.py:40:5:40:5 | empty mutable value | $@ flows to here and is mutated. | functions_test.py:39:9:39:9 | x | Default value | -| functions_test.py:134:5:134:5 | x | functions_test.py:133:15:133:15 | empty mutable value | functions_test.py:134:5:134:5 | empty mutable value | $@ flows to here and is mutated. | functions_test.py:133:15:133:15 | x | Default value | -| functions_test.py:152:5:152:5 | x | functions_test.py:157:27:157:27 | empty mutable value | functions_test.py:152:5:152:5 | empty mutable value | $@ flows to here and is mutated. | functions_test.py:157:27:157:27 | y | Default value | -| functions_test.py:155:5:155:5 | x | functions_test.py:157:27:157:27 | empty mutable value | functions_test.py:155:5:155:5 | empty mutable value | $@ flows to here and is mutated. | functions_test.py:157:27:157:27 | y | Default value | -| functions_test.py:179:9:179:9 | x | functions_test.py:188:18:188:18 | non-empty mutable value | functions_test.py:179:9:179:9 | non-empty mutable value | $@ flows to here and is mutated. | functions_test.py:188:18:188:18 | x | Default value | -| functions_test.py:179:9:179:9 | x | functions_test.py:191:18:191:18 | non-empty mutable value | functions_test.py:179:9:179:9 | non-empty mutable value | $@ flows to here and is mutated. | functions_test.py:191:18:191:18 | x | Default value | -| functions_test.py:181:9:181:9 | x | functions_test.py:188:18:188:18 | non-empty mutable value | functions_test.py:181:9:181:9 | non-empty mutable value | $@ flows to here and is mutated. | functions_test.py:188:18:188:18 | x | Default value | -| functions_test.py:181:9:181:9 | x | functions_test.py:191:18:191:18 | non-empty mutable value | functions_test.py:181:9:181:9 | non-empty mutable value | $@ flows to here and is mutated. | functions_test.py:191:18:191:18 | x | Default value | +| functions_test.py:40:5:40:5 | ControlFlowNode for x | functions_test.py:39:9:39:9 | ControlFlowNode for x | functions_test.py:40:5:40:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:39:9:39:9 | ControlFlowNode for x | Default value | +| functions_test.py:134:5:134:5 | ControlFlowNode for x | functions_test.py:133:15:133:15 | ControlFlowNode for x | functions_test.py:134:5:134:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:133:15:133:15 | ControlFlowNode for x | Default value | +| functions_test.py:152:5:152:5 | ControlFlowNode for x | functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:152:5:152:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:157:27:157:27 | ControlFlowNode for y | Default value | +| functions_test.py:155:5:155:5 | ControlFlowNode for x | functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:155:5:155:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:157:27:157:27 | ControlFlowNode for y | Default value | +| functions_test.py:170:9:170:13 | ControlFlowNode for param | functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | $@ flows to here and is mutated. | functions_test.py:166:21:166:25 | ControlFlowNode for param | Default value | +| functions_test.py:170:9:170:13 | ControlFlowNode for param | functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | $@ flows to here and is mutated. | functions_test.py:166:21:166:25 | ControlFlowNode for param | Default value | +| functions_test.py:179:9:179:9 | ControlFlowNode for x | functions_test.py:188:18:188:18 | ControlFlowNode for x | functions_test.py:179:9:179:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:188:18:188:18 | ControlFlowNode for x | Default value | +| functions_test.py:179:9:179:9 | ControlFlowNode for x | functions_test.py:191:18:191:18 | ControlFlowNode for x | functions_test.py:179:9:179:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:191:18:191:18 | ControlFlowNode for x | Default value | +| functions_test.py:181:9:181:9 | ControlFlowNode for x | functions_test.py:188:18:188:18 | ControlFlowNode for x | functions_test.py:181:9:181:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:188:18:188:18 | ControlFlowNode for x | Default value | +| functions_test.py:181:9:181:9 | ControlFlowNode for x | functions_test.py:191:18:191:18 | ControlFlowNode for x | functions_test.py:181:9:181:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:191:18:191:18 | ControlFlowNode for x | Default value | From 1903cb8f829949d38d10020c7986e9011866a105 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 30 Aug 2021 11:27:55 +0200 Subject: [PATCH 09/24] Python: Add change note --- python/change-notes/2021-08-30-port-modifying-default-query.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 python/change-notes/2021-08-30-port-modifying-default-query.md diff --git a/python/change-notes/2021-08-30-port-modifying-default-query.md b/python/change-notes/2021-08-30-port-modifying-default-query.md new file mode 100644 index 000000000000..2fcd9a11ded1 --- /dev/null +++ b/python/change-notes/2021-08-30-port-modifying-default-query.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis. From 0de621edf97253a215150e0d52d494fe7c2244c7 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 30 Aug 2021 15:03:58 +0200 Subject: [PATCH 10/24] Python: Add qldoc --- .../ModificationOfParameterWithDefaultCustomizations.qll | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index e7be292513eb..96dd41adfcbe 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -139,6 +139,9 @@ module ModificationOfParameterWithDefault { boolean isInverted() { result = inverted } } + /** + * A check for the value being truthy or falsy can guard against modifying the default value. + */ class IdentityGuard extends BarrierGuard { ControlFlowNode checked_node; boolean safe_branch; @@ -149,7 +152,7 @@ module ModificationOfParameterWithDefault { exists(IdentityGuarded ig | this.getNode() = ig and checked_node = this and - // The raw guard is true if the value is non-empty + // The raw guard is true if the value is non-empty. // So we are safe either if we are looking for a non-empty value // or if we are looking for an empty value and the guard is inverted. safe_branch = ig.isInverted().booleanXor(nonEmpty) From a855074588b1b007a771984ae5bd88364366e3a9 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 30 Aug 2021 15:41:51 +0200 Subject: [PATCH 11/24] Python: Try to remove py2/3 differences --- python/ql/test/query-tests/Functions/general/options | 1 + 1 file changed, 1 insertion(+) create mode 100644 python/ql/test/query-tests/Functions/general/options diff --git a/python/ql/test/query-tests/Functions/general/options b/python/ql/test/query-tests/Functions/general/options new file mode 100644 index 000000000000..2032dd9e54c5 --- /dev/null +++ b/python/ql/test/query-tests/Functions/general/options @@ -0,0 +1 @@ +semmle-extractor-options: --max-import-depth=1 --dont-split-graph From c6eb795e76aaaa728eb3c61d5e0b1eef533c6284 Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 3 Sep 2021 14:23:57 +0200 Subject: [PATCH 12/24] Apply suggestions from code review Co-authored-by: Rasmus Wriedt Larsen --- .../ModificationOfParameterWithDefaultCustomizations.qll | 8 +++++++- .../Functions/ModificationOfParameterWithDefault/test.py | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 96dd41adfcbe..7cd16415dd96 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -53,7 +53,7 @@ module ModificationOfParameterWithDefault { } /** - * A source of remote user input, considered as a flow source. + * A mutable default value for a parameter, considered as a flow source. */ class MutableDefaultValue extends Source { boolean nonEmpty; @@ -120,6 +120,9 @@ module ModificationOfParameterWithDefault { } } + /** + * An expression that is checked directly in an `if`, possibly with `not`, such as `if x:` or `if not x:`. + */ private class IdentityGuarded extends Expr { boolean inverted; @@ -136,6 +139,9 @@ module ModificationOfParameterWithDefault { ) } + /** + * Whether this guard has been inverted. For `if x:` the result is `false`, and for `if not x:` the result is `true`. + */ boolean isInverted() { result = inverted } } diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 09ad7ed986ca..3246d7362a2d 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -15,7 +15,7 @@ def list_del(l = [0]): # Not OK def append_op(l = []): - l += 1 #$ modification=l + l += [1, 2, 3] #$ modification=l return l # Not OK @@ -123,6 +123,6 @@ def dict_update_op_nochange(d = {}): # OK def sanitizer(l = []): - if not l == []: + if l: l.append(1) #$ SPURIOUS: modification=l return l From 913990bc6224ec0d1248bf373f9a713d0931611c Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 3 Sep 2021 14:40:16 +0200 Subject: [PATCH 13/24] Python: Add suggested comments and test case --- .../ModificationOfParameterWithDefaultCustomizations.qll | 6 ++++-- .../Functions/ModificationOfParameterWithDefault/test.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 7cd16415dd96..3aed2eeaf16d 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -17,6 +17,7 @@ module ModificationOfParameterWithDefault { * A data flow source for detecting modifications of a parameters default value. */ abstract class Source extends DataFlow::Node { + /** Result is true if the default value is non-empty for this source and false if not. */ abstract boolean isNonEmpty(); } @@ -34,6 +35,7 @@ module ModificationOfParameterWithDefault { * A sanitizer guard for detecting modifications of a parameters default value. */ abstract class BarrierGuard extends DataFlow::BarrierGuard { + /** Result is true if this guard blocks non-empty values and false if it blocks empty values. */ abstract boolean blocksNonEmpty(); } @@ -120,7 +122,7 @@ module ModificationOfParameterWithDefault { } } - /** + /** * An expression that is checked directly in an `if`, possibly with `not`, such as `if x:` or `if not x:`. */ private class IdentityGuarded extends Expr { @@ -139,7 +141,7 @@ module ModificationOfParameterWithDefault { ) } - /** + /** * Whether this guard has been inverted. For `if x:` the result is `false`, and for `if not x:` the result is `true`. */ boolean isInverted() { result = inverted } diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 3246d7362a2d..d7aef0b24d00 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -124,5 +124,11 @@ def dict_update_op_nochange(d = {}): # OK def sanitizer(l = []): if l: + l.append(1) #$ modification=l + return l + +# OK +def sanitizer_negated(l = [1]): + if not l: l.append(1) #$ SPURIOUS: modification=l return l From 4998a48f99bbbb7f39b5223f8c1626d7b1cef7ed Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 6 Sep 2021 22:40:30 +0200 Subject: [PATCH 14/24] Python: Fix simple guards --- .../ModificationOfParameterWithDefault.qll | 17 +++-- ...onOfParameterWithDefaultCustomizations.qll | 76 +++++++++++++------ .../test.py | 16 +++- 3 files changed, 78 insertions(+), 31 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll index 82acd10c3d79..61ec0ecd7db3 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -19,21 +19,28 @@ module ModificationOfParameterWithDefault { * A data-flow configuration for detecting modifications of a parameters default value. */ class Configuration extends DataFlow::Configuration { - boolean nonEmpty; + /** Record whether the default value being tracked is non-empty. */ + boolean nonEmptyDefault; Configuration() { - nonEmpty in [true, false] and - this = "ModificationOfParameterWithDefault:" + nonEmpty.toString() + nonEmptyDefault in [true, false] and + this = "ModificationOfParameterWithDefault:" + nonEmptyDefault.toString() } - override predicate isSource(DataFlow::Node source) { source.(Source).isNonEmpty() = nonEmpty } + override predicate isSource(DataFlow::Node source) { + source.(Source).isNonEmpty() = nonEmptyDefault + } override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier } override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - guard.(BarrierGuard).blocksNonEmpty() = nonEmpty + // if we are tracking a emmpty default, then we should not modify falsy values + nonEmptyDefault = false and guard instanceof BlocksFalsey + or + // if we are tracking a non-empty default, then we should not modify truthy values + nonEmptyDefault = true and guard instanceof BlocksTruthy } } } diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 3aed2eeaf16d..9d5616d67a50 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -32,12 +32,24 @@ module ModificationOfParameterWithDefault { abstract class Barrier extends DataFlow::Node { } /** - * A sanitizer guard for detecting modifications of a parameters default value. + * A sanitizer guard that does not let a truthy value flow to the true branch. + * + * Since guards with different behaviour cannot exist on the same node, + * we let all guards have the same behaviour, in the sense that they all check + * the true branch. Instead, we partition guards into those that block + * truthy values and those that block falsy values. */ - abstract class BarrierGuard extends DataFlow::BarrierGuard { - /** Result is true if this guard blocks non-empty values and false if it blocks empty values. */ - abstract boolean blocksNonEmpty(); - } + abstract class BlocksTruthy extends DataFlow::BarrierGuard { } + + /** + * A sanitizer guard that does not let a falsy value flow to the true branch. + * + * Since guards with different behaviour cannot exist on the same node, + * we let all guards have the same behaviour, in the sense that they all check + * the true branch. Instead, we partition guards into those that block + * truthy values and those that block falsy values. + */ + abstract class BlocksFalsey extends DataFlow::BarrierGuard { } /** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */ private boolean mutableDefaultValue(Parameter p) { @@ -147,30 +159,46 @@ module ModificationOfParameterWithDefault { boolean isInverted() { result = inverted } } - /** - * A check for the value being truthy or falsy can guard against modifying the default value. - */ - class IdentityGuard extends BarrierGuard { - ControlFlowNode checked_node; - boolean safe_branch; - boolean nonEmpty; + boolean isIdentityGuard(DataFlow::GuardNode guard, ControlFlowNode guarded) { + exists(IdentityGuarded ig | + ig instanceof Name and + // In `not l`, the `ControlFlowNode` for `l` is not an instance of `GuardNode`. + // TODO: This is slightly naive, we should change it when we have a proper guards library. + guard.getNode().getAChildNode*() = ig and + result = ig.isInverted() and + guarded.getNode() = ig + ) + } - IdentityGuard() { - nonEmpty in [true, false] and - exists(IdentityGuarded ig | - this.getNode() = ig and - checked_node = this and - // The raw guard is true if the value is non-empty. - // So we are safe either if we are looking for a non-empty value - // or if we are looking for an empty value and the guard is inverted. - safe_branch = ig.isInverted().booleanXor(nonEmpty) - ) + class BlocksTruthyGuard extends BlocksTruthy { + ControlFlowNode guarded; + + BlocksTruthyGuard() { + // The raw guard is true if the value is non-empty. + // We wish to send truthy falues to the false branch, + // se we are looking for inverted guards. + isIdentityGuard(this, guarded) = true } override predicate checks(ControlFlowNode node, boolean branch) { - node = checked_node and branch = safe_branch + node = guarded and + branch = true } + } + + class BlocksFalseyGuard extends BlocksFalsey { + ControlFlowNode guarded; - override boolean blocksNonEmpty() { result = nonEmpty } + BlocksFalseyGuard() { + // The raw guard is true if the value is non-empty. + // We wish to send falsy falues to the false branch, + // se we are looking for guards that are not inverted. + isIdentityGuard(this, guarded) = false + } + + override predicate checks(ControlFlowNode node, boolean branch) { + node = guarded and + branch = true + } } } diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index d7aef0b24d00..10dee114d36b 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -124,11 +124,23 @@ def dict_update_op_nochange(d = {}): # OK def sanitizer(l = []): if l: - l.append(1) #$ modification=l + l.append(1) return l # OK def sanitizer_negated(l = [1]): if not l: - l.append(1) #$ SPURIOUS: modification=l + l.append(1) + return l + +# Not OK +def sanitizer(l = []): + if not l: + l.append(1) #$ modification=l + return l + +# Not OK +def sanitizer_negated(l = [1]): + if l: + l.append(1) #$ modification=l return l From ae8408bcab8822085a21be75eac18a4663d8b4c1 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 7 Sep 2021 10:09:02 +0200 Subject: [PATCH 15/24] Python: Add missing qldoc --- ...cationOfParameterWithDefaultCustomizations.qll | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 9d5616d67a50..50a97d4d5566 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -159,17 +159,26 @@ module ModificationOfParameterWithDefault { boolean isInverted() { result = inverted } } + /** + * Holds iff `guard` is checking the `Name` represented by `guarded` for truthyness. + * `result` is true if the check is inverted and false if it is not. + */ boolean isIdentityGuard(DataFlow::GuardNode guard, ControlFlowNode guarded) { exists(IdentityGuarded ig | ig instanceof Name and // In `not l`, the `ControlFlowNode` for `l` is not an instance of `GuardNode`. - // TODO: This is slightly naive, we should change it when we have a proper guards library. + // TODO: This is slightly naive, not handling e.g. `l or cond` correctly. + // We should change it when we have a proper guards library. guard.getNode().getAChildNode*() = ig and result = ig.isInverted() and guarded.getNode() = ig ) } + /** + * A sanitizer guard that does not let a truthy value flow to the true branch. + * Based on `isIdentityGuard`, so comes with the same caveats. + */ class BlocksTruthyGuard extends BlocksTruthy { ControlFlowNode guarded; @@ -186,6 +195,10 @@ module ModificationOfParameterWithDefault { } } + /** + * A sanitizer guard that does not let a falsy value flow to the true branch. + * Based on `isIdentityGuard`, so comes with the same caveats. + */ class BlocksFalseyGuard extends BlocksFalsey { ControlFlowNode guarded; From 29cb0677697f421bb223fa45441d0bdc18acb2a6 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 7 Sep 2021 10:13:17 +0200 Subject: [PATCH 16/24] Python: Remember to update test expectations --- .../ModificationOfParameterWithDefault.expected | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index f5c6f3a6baa3..547665631cbf 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -24,7 +24,8 @@ edges | test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | -| test.py:125:15:125:15 | ControlFlowNode for l | test.py:127:9:127:9 | ControlFlowNode for l | +| test.py:137:15:137:15 | ControlFlowNode for l | test.py:139:9:139:9 | ControlFlowNode for l | +| test.py:143:23:143:23 | ControlFlowNode for l | test.py:145:9:145:9 | ControlFlowNode for l | nodes | test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | @@ -70,8 +71,10 @@ nodes | test.py:115:5:115:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:121:5:121:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:125:15:125:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:127:9:127:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:137:15:137:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:139:9:139:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:143:23:143:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:145:9:145:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | #select | test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | | test.py:8:5:8:5 | ControlFlowNode for l | test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:7:11:7:11 | ControlFlowNode for l | Default value | @@ -92,4 +95,5 @@ nodes | test.py:109:9:109:9 | ControlFlowNode for d | test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:108:14:108:14 | ControlFlowNode for d | Default value | | test.py:115:5:115:5 | ControlFlowNode for d | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:113:20:113:20 | ControlFlowNode for d | Default value | | test.py:121:5:121:5 | ControlFlowNode for d | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:119:29:119:29 | ControlFlowNode for d | Default value | -| test.py:127:9:127:9 | ControlFlowNode for l | test.py:125:15:125:15 | ControlFlowNode for l | test.py:127:9:127:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:125:15:125:15 | ControlFlowNode for l | Default value | +| test.py:139:9:139:9 | ControlFlowNode for l | test.py:137:15:137:15 | ControlFlowNode for l | test.py:139:9:139:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:137:15:137:15 | ControlFlowNode for l | Default value | +| test.py:145:9:145:9 | ControlFlowNode for l | test.py:143:23:143:23 | ControlFlowNode for l | test.py:145:9:145:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:143:23:143:23 | ControlFlowNode for l | Default value | From b48caaf46532b620167a40a5897ec3709c1242e3 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 7 Sep 2021 10:19:42 +0200 Subject: [PATCH 17/24] Python: fix reference to PrintNode.qll --- .../Functions/ModificationOfParameterWithDefault/test.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql index cac05eebd22f..de516c7ec9ba 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.ql @@ -1,8 +1,8 @@ import python import semmle.python.dataflow.new.DataFlow import TestUtilities.InlineExpectationsTest -import experimental.dataflow.TestUtil.PrintNode import semmle.python.functions.ModificationOfParameterWithDefault +private import semmle.python.dataflow.new.internal.PrintNode class ModificationOfParameterWithDefaultTest extends InlineExpectationsTest { ModificationOfParameterWithDefaultTest() { this = "ModificationOfParameterWithDefaultTest" } From e8644f6f2ae1b3294a6be14e567e8b88289a5e35 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 7 Sep 2021 10:30:38 +0200 Subject: [PATCH 18/24] Python: coment out discriminating test The test case has different behaviour between py2/3. When merging this, we should create an issue to resolve it. --- ...odificationOfParameterWithDefault.expected | 41 ++++++++----------- .../Functions/general/functions_test.py | 14 ++++--- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected index c9460845e295..37fb38475af1 100644 --- a/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected @@ -7,14 +7,12 @@ edges | functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:159:21:159:21 | ControlFlowNode for y | | functions_test.py:158:25:158:25 | ControlFlowNode for y | functions_test.py:151:25:151:25 | ControlFlowNode for x | | functions_test.py:159:21:159:21 | ControlFlowNode for y | functions_test.py:154:21:154:21 | ControlFlowNode for x | -| functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | -| functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | -| functions_test.py:175:28:175:28 | ControlFlowNode for x | functions_test.py:179:9:179:9 | ControlFlowNode for x | -| functions_test.py:175:28:175:28 | ControlFlowNode for x | functions_test.py:181:9:181:9 | ControlFlowNode for x | -| functions_test.py:188:18:188:18 | ControlFlowNode for x | functions_test.py:189:28:189:28 | ControlFlowNode for x | -| functions_test.py:189:28:189:28 | ControlFlowNode for x | functions_test.py:175:28:175:28 | ControlFlowNode for x | -| functions_test.py:191:18:191:18 | ControlFlowNode for x | functions_test.py:192:28:192:28 | ControlFlowNode for x | -| functions_test.py:192:28:192:28 | ControlFlowNode for x | functions_test.py:175:28:175:28 | ControlFlowNode for x | +| functions_test.py:179:28:179:28 | ControlFlowNode for x | functions_test.py:183:9:183:9 | ControlFlowNode for x | +| functions_test.py:179:28:179:28 | ControlFlowNode for x | functions_test.py:185:9:185:9 | ControlFlowNode for x | +| functions_test.py:192:18:192:18 | ControlFlowNode for x | functions_test.py:193:28:193:28 | ControlFlowNode for x | +| functions_test.py:193:28:193:28 | ControlFlowNode for x | functions_test.py:179:28:179:28 | ControlFlowNode for x | +| functions_test.py:195:18:195:18 | ControlFlowNode for x | functions_test.py:196:28:196:28 | ControlFlowNode for x | +| functions_test.py:196:28:196:28 | ControlFlowNode for x | functions_test.py:179:28:179:28 | ControlFlowNode for x | nodes | functions_test.py:39:9:39:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | | functions_test.py:40:5:40:5 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | @@ -27,24 +25,19 @@ nodes | functions_test.py:157:27:157:27 | ControlFlowNode for y | semmle.label | ControlFlowNode for y | | functions_test.py:158:25:158:25 | ControlFlowNode for y | semmle.label | ControlFlowNode for y | | functions_test.py:159:21:159:21 | ControlFlowNode for y | semmle.label | ControlFlowNode for y | -| functions_test.py:166:21:166:25 | ControlFlowNode for param | semmle.label | ControlFlowNode for param | -| functions_test.py:170:9:170:13 | ControlFlowNode for param | semmle.label | ControlFlowNode for param | -| functions_test.py:170:9:170:13 | ControlFlowNode for param | semmle.label | ControlFlowNode for param | -| functions_test.py:175:28:175:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| functions_test.py:179:9:179:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| functions_test.py:181:9:181:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| functions_test.py:188:18:188:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| functions_test.py:189:28:189:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| functions_test.py:191:18:191:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | -| functions_test.py:192:28:192:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:179:28:179:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:183:9:183:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:185:9:185:9 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:192:18:192:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:193:28:193:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:195:18:195:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +| functions_test.py:196:28:196:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | #select | functions_test.py:40:5:40:5 | ControlFlowNode for x | functions_test.py:39:9:39:9 | ControlFlowNode for x | functions_test.py:40:5:40:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:39:9:39:9 | ControlFlowNode for x | Default value | | functions_test.py:134:5:134:5 | ControlFlowNode for x | functions_test.py:133:15:133:15 | ControlFlowNode for x | functions_test.py:134:5:134:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:133:15:133:15 | ControlFlowNode for x | Default value | | functions_test.py:152:5:152:5 | ControlFlowNode for x | functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:152:5:152:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:157:27:157:27 | ControlFlowNode for y | Default value | | functions_test.py:155:5:155:5 | ControlFlowNode for x | functions_test.py:157:27:157:27 | ControlFlowNode for y | functions_test.py:155:5:155:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:157:27:157:27 | ControlFlowNode for y | Default value | -| functions_test.py:170:9:170:13 | ControlFlowNode for param | functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | $@ flows to here and is mutated. | functions_test.py:166:21:166:25 | ControlFlowNode for param | Default value | -| functions_test.py:170:9:170:13 | ControlFlowNode for param | functions_test.py:166:21:166:25 | ControlFlowNode for param | functions_test.py:170:9:170:13 | ControlFlowNode for param | $@ flows to here and is mutated. | functions_test.py:166:21:166:25 | ControlFlowNode for param | Default value | -| functions_test.py:179:9:179:9 | ControlFlowNode for x | functions_test.py:188:18:188:18 | ControlFlowNode for x | functions_test.py:179:9:179:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:188:18:188:18 | ControlFlowNode for x | Default value | -| functions_test.py:179:9:179:9 | ControlFlowNode for x | functions_test.py:191:18:191:18 | ControlFlowNode for x | functions_test.py:179:9:179:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:191:18:191:18 | ControlFlowNode for x | Default value | -| functions_test.py:181:9:181:9 | ControlFlowNode for x | functions_test.py:188:18:188:18 | ControlFlowNode for x | functions_test.py:181:9:181:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:188:18:188:18 | ControlFlowNode for x | Default value | -| functions_test.py:181:9:181:9 | ControlFlowNode for x | functions_test.py:191:18:191:18 | ControlFlowNode for x | functions_test.py:181:9:181:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:191:18:191:18 | ControlFlowNode for x | Default value | +| functions_test.py:183:9:183:9 | ControlFlowNode for x | functions_test.py:192:18:192:18 | ControlFlowNode for x | functions_test.py:183:9:183:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:192:18:192:18 | ControlFlowNode for x | Default value | +| functions_test.py:183:9:183:9 | ControlFlowNode for x | functions_test.py:195:18:195:18 | ControlFlowNode for x | functions_test.py:183:9:183:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:195:18:195:18 | ControlFlowNode for x | Default value | +| functions_test.py:185:9:185:9 | ControlFlowNode for x | functions_test.py:192:18:192:18 | ControlFlowNode for x | functions_test.py:185:9:185:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:192:18:192:18 | ControlFlowNode for x | Default value | +| functions_test.py:185:9:185:9 | ControlFlowNode for x | functions_test.py:195:18:195:18 | ControlFlowNode for x | functions_test.py:185:9:185:9 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:195:18:195:18 | ControlFlowNode for x | Default value | diff --git a/python/ql/test/query-tests/Functions/general/functions_test.py b/python/ql/test/query-tests/Functions/general/functions_test.py index 95e0923ea216..c47794fef28f 100644 --- a/python/ql/test/query-tests/Functions/general/functions_test.py +++ b/python/ql/test/query-tests/Functions/general/functions_test.py @@ -163,11 +163,15 @@ def guarded_modification(z=[]): z.append(0) return z -def issue1143(expr, param=[]): - if not param: - return result - for i in param: - param.remove(i) # Mutation here +# This function causes a discrepancy between the +# Python 2 and 3 versions of the analysis. +# We comment it out until we have resoved the issue. +# +# def issue1143(expr, param=[]): +# if not param: +# return result +# for i in param: +# param.remove(i) # Mutation here # Type guarding of modification of parameter with default: From 43effd2b406a6f64e8cd6bd00d0296e334c74885 Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 7 Sep 2021 15:08:50 +0200 Subject: [PATCH 19/24] Update python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll Co-authored-by: Rasmus Wriedt Larsen --- .../python/functions/ModificationOfParameterWithDefault.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll index 61ec0ecd7db3..98c33f86c945 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -36,7 +36,7 @@ module ModificationOfParameterWithDefault { override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier } override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - // if we are tracking a emmpty default, then we should not modify falsy values + // if we are tracking a empty default, then we should not modify falsey values nonEmptyDefault = false and guard instanceof BlocksFalsey or // if we are tracking a non-empty default, then we should not modify truthy values From b20232db3c92a7a51570044c6a46799665f75149 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 10 Sep 2021 10:31:48 +0200 Subject: [PATCH 20/24] Python: Simplify guards as suggested --- .../ModificationOfParameterWithDefault.qll | 10 ++- ...onOfParameterWithDefaultCustomizations.qll | 82 ++++++------------- ...odificationOfParameterWithDefault.expected | 30 +++++-- .../test.py | 12 ++- 4 files changed, 61 insertions(+), 73 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll index 98c33f86c945..cf1fafd5fa1c 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -36,11 +36,13 @@ module ModificationOfParameterWithDefault { override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier } override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - // if we are tracking a empty default, then we should not modify falsey values - nonEmptyDefault = false and guard instanceof BlocksFalsey + // if we are tracking a empty default, then it is ok to modify truthy values, + // so our tracking ends at those. + nonEmptyDefault = false and guard instanceof BlocksTruthy or - // if we are tracking a non-empty default, then we should not modify truthy values - nonEmptyDefault = true and guard instanceof BlocksTruthy + // if we are tracking a non-empty default, then it is ok to modify falsy values, + // so our tracking ends at those. + nonEmptyDefault = true and guard instanceof BlocksFalsey } } } diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 50a97d4d5566..35edc7911660 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -34,20 +34,26 @@ module ModificationOfParameterWithDefault { /** * A sanitizer guard that does not let a truthy value flow to the true branch. * + * Implementation note: * Since guards with different behaviour cannot exist on the same node, * we let all guards have the same behaviour, in the sense that they all check * the true branch. Instead, we partition guards into those that block * truthy values and those that block falsy values. + * + * If you extend this class, make sure that your barrier checks the true branch. */ abstract class BlocksTruthy extends DataFlow::BarrierGuard { } /** * A sanitizer guard that does not let a falsy value flow to the true branch. * + * Implementation note: * Since guards with different behaviour cannot exist on the same node, * we let all guards have the same behaviour, in the sense that they all check * the true branch. Instead, we partition guards into those that block * truthy values and those that block falsy values. + * + * If you extend this class, make sure that your barrier checks the true branch. */ abstract class BlocksFalsey extends DataFlow::BarrierGuard { } @@ -135,78 +141,40 @@ module ModificationOfParameterWithDefault { } /** - * An expression that is checked directly in an `if`, possibly with `not`, such as `if x:` or `if not x:`. - */ - private class IdentityGuarded extends Expr { - boolean inverted; - - IdentityGuarded() { - this = any(If i).getTest() and - inverted = false - or - exists(IdentityGuarded ig, UnaryExpr notExp | - notExp.getOp() instanceof Not and - ig = notExp and - notExp.getOperand() = this - | - inverted = ig.isInverted().booleanNot() - ) - } - - /** - * Whether this guard has been inverted. For `if x:` the result is `false`, and for `if not x:` the result is `true`. - */ - boolean isInverted() { result = inverted } - } - - /** - * Holds iff `guard` is checking the `Name` represented by `guarded` for truthyness. - * `result` is true if the check is inverted and false if it is not. - */ - boolean isIdentityGuard(DataFlow::GuardNode guard, ControlFlowNode guarded) { - exists(IdentityGuarded ig | - ig instanceof Name and - // In `not l`, the `ControlFlowNode` for `l` is not an instance of `GuardNode`. - // TODO: This is slightly naive, not handling e.g. `l or cond` correctly. - // We should change it when we have a proper guards library. - guard.getNode().getAChildNode*() = ig and - result = ig.isInverted() and - guarded.getNode() = ig - ) - } - - /** - * A sanitizer guard that does not let a truthy value flow to the true branch. - * Based on `isIdentityGuard`, so comes with the same caveats. + * A simple sanitizer guard that does not let a truthy value flow to the true branch. + * + * Blocks flow of `x` in the true branch in the example below. + * ```py + * if x: + * x.append(42) + * ``` */ class BlocksTruthyGuard extends BlocksTruthy { ControlFlowNode guarded; - BlocksTruthyGuard() { - // The raw guard is true if the value is non-empty. - // We wish to send truthy falues to the false branch, - // se we are looking for inverted guards. - isIdentityGuard(this, guarded) = true - } + BlocksTruthyGuard() { this instanceof NameNode } override predicate checks(ControlFlowNode node, boolean branch) { - node = guarded and + node = this and branch = true } } /** - * A sanitizer guard that does not let a falsy value flow to the true branch. - * Based on `isIdentityGuard`, so comes with the same caveats. + * A simple sanitizer guard that does not let a truthy value flow to the true branch. + * + * Blocks flow of `x` in the true branch in the example below. + * ```py + * if not x: + * x.append(42) + * ``` */ class BlocksFalseyGuard extends BlocksFalsey { - ControlFlowNode guarded; + NameNode guarded; BlocksFalseyGuard() { - // The raw guard is true if the value is non-empty. - // We wish to send falsy falues to the false branch, - // se we are looking for guards that are not inverted. - isIdentityGuard(this, guarded) = false + this.(UnaryExprNode).getNode().getOp() instanceof Not and + guarded = this.(UnaryExprNode).getOperand() } override predicate checks(ControlFlowNode node, boolean branch) { diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index 547665631cbf..9d64f42a61bb 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -24,8 +24,12 @@ edges | test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | -| test.py:137:15:137:15 | ControlFlowNode for l | test.py:139:9:139:9 | ControlFlowNode for l | -| test.py:143:23:143:23 | ControlFlowNode for l | test.py:145:9:145:9 | ControlFlowNode for l | +| test.py:124:15:124:15 | ControlFlowNode for l | test.py:128:9:128:9 | ControlFlowNode for l | +| test.py:131:23:131:23 | ControlFlowNode for l | test.py:135:9:135:9 | ControlFlowNode for l | +| test.py:138:15:138:15 | ControlFlowNode for l | test.py:140:9:140:9 | ControlFlowNode for l | +| test.py:138:15:138:15 | ControlFlowNode for l | test.py:142:9:142:9 | ControlFlowNode for l | +| test.py:145:23:145:23 | ControlFlowNode for l | test.py:147:9:147:9 | ControlFlowNode for l | +| test.py:145:23:145:23 | ControlFlowNode for l | test.py:149:9:149:9 | ControlFlowNode for l | nodes | test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | @@ -71,10 +75,16 @@ nodes | test.py:115:5:115:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:119:29:119:29 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | | test.py:121:5:121:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d | -| test.py:137:15:137:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:139:9:139:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:143:23:143:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:145:9:145:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:124:15:124:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:128:9:128:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:131:23:131:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:135:9:135:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:138:15:138:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:140:9:140:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:142:9:142:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:145:23:145:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:147:9:147:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +| test.py:149:9:149:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | #select | test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | | test.py:8:5:8:5 | ControlFlowNode for l | test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:7:11:7:11 | ControlFlowNode for l | Default value | @@ -95,5 +105,9 @@ nodes | test.py:109:9:109:9 | ControlFlowNode for d | test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:108:14:108:14 | ControlFlowNode for d | Default value | | test.py:115:5:115:5 | ControlFlowNode for d | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:113:20:113:20 | ControlFlowNode for d | Default value | | test.py:121:5:121:5 | ControlFlowNode for d | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:119:29:119:29 | ControlFlowNode for d | Default value | -| test.py:139:9:139:9 | ControlFlowNode for l | test.py:137:15:137:15 | ControlFlowNode for l | test.py:139:9:139:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:137:15:137:15 | ControlFlowNode for l | Default value | -| test.py:145:9:145:9 | ControlFlowNode for l | test.py:143:23:143:23 | ControlFlowNode for l | test.py:145:9:145:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:143:23:143:23 | ControlFlowNode for l | Default value | +| test.py:128:9:128:9 | ControlFlowNode for l | test.py:124:15:124:15 | ControlFlowNode for l | test.py:128:9:128:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:124:15:124:15 | ControlFlowNode for l | Default value | +| test.py:135:9:135:9 | ControlFlowNode for l | test.py:131:23:131:23 | ControlFlowNode for l | test.py:135:9:135:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:131:23:131:23 | ControlFlowNode for l | Default value | +| test.py:140:9:140:9 | ControlFlowNode for l | test.py:138:15:138:15 | ControlFlowNode for l | test.py:140:9:140:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:138:15:138:15 | ControlFlowNode for l | Default value | +| test.py:142:9:142:9 | ControlFlowNode for l | test.py:138:15:138:15 | ControlFlowNode for l | test.py:142:9:142:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:138:15:138:15 | ControlFlowNode for l | Default value | +| test.py:147:9:147:9 | ControlFlowNode for l | test.py:145:23:145:23 | ControlFlowNode for l | test.py:147:9:147:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:145:23:145:23 | ControlFlowNode for l | Default value | +| test.py:149:9:149:9 | ControlFlowNode for l | test.py:145:23:145:23 | ControlFlowNode for l | test.py:149:9:149:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:145:23:145:23 | ControlFlowNode for l | Default value | diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 10dee114d36b..412a6da0137d 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -121,26 +121,30 @@ def dict_update_op_nochange(d = {}): d |= x #$ SPURIOUS: modification=d return d -# OK def sanitizer(l = []): if l: l.append(1) + else: + l.append(1) #$ modification=l return l -# OK def sanitizer_negated(l = [1]): if not l: l.append(1) + else: + l.append(1) #$ modification=l return l -# Not OK def sanitizer(l = []): if not l: l.append(1) #$ modification=l + else: + l.append(1) #$ SPURIOUS: modification=l return l -# Not OK def sanitizer_negated(l = [1]): if l: l.append(1) #$ modification=l + else: + l.append(1) #$ SPURIOUS: modification=l return l From 7cfa08abc86046c7423c385df50c061a33f51856 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 10 Sep 2021 12:48:24 +0200 Subject: [PATCH 21/24] Python: Do not use BarrierGuards They are simply not right for this problem. We should not even make them available as an extension point. --- .../ModificationOfParameterWithDefault.qll | 12 +- ...onOfParameterWithDefaultCustomizations.qll | 113 +++++++++--------- ...odificationOfParameterWithDefault.expected | 6 - .../test.py | 4 +- 4 files changed, 63 insertions(+), 72 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll index cf1fafd5fa1c..24103a33aceb 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -33,16 +33,14 @@ module ModificationOfParameterWithDefault { override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } - override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier } - - override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { - // if we are tracking a empty default, then it is ok to modify truthy values, + override predicate isBarrier(DataFlow::Node node) { + // if we are tracking a non-empty default, then it is ok to modify empty values, // so our tracking ends at those. - nonEmptyDefault = false and guard instanceof BlocksTruthy + nonEmptyDefault = true and node instanceof MustBeEmpty or - // if we are tracking a non-empty default, then it is ok to modify falsy values, + // if we are tracking a empty default, then it is ok to modify non-empty values, // so our tracking ends at those. - nonEmptyDefault = true and guard instanceof BlocksFalsey + nonEmptyDefault = false and node instanceof MustBeNonEmpty } } } diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 35edc7911660..9f5da1d14c86 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -14,7 +14,8 @@ private import semmle.python.dataflow.new.BarrierGuards */ module ModificationOfParameterWithDefault { /** - * A data flow source for detecting modifications of a parameters default value. + * A data flow source for detecting modifications of a parameters default value, + * that is a default value for some parameter. */ abstract class Source extends DataFlow::Node { /** Result is true if the default value is non-empty for this source and false if not. */ @@ -22,40 +23,36 @@ module ModificationOfParameterWithDefault { } /** - * A data flow sink for detecting modifications of a parameters default value. + * A data flow sink for detecting modifications of a parameters default value, + * that is a node representing a modification. */ abstract class Sink extends DataFlow::Node { } /** - * A sanitizer for detecting modifications of a parameters default value. - */ - abstract class Barrier extends DataFlow::Node { } - - /** - * A sanitizer guard that does not let a truthy value flow to the true branch. + * A sanitizer for detecting modifications of a parameters default value + * should determine if the node (which is perhaps about to be modified) + * can be the default value or not. * - * Implementation note: - * Since guards with different behaviour cannot exist on the same node, - * we let all guards have the same behaviour, in the sense that they all check - * the true branch. Instead, we partition guards into those that block - * truthy values and those that block falsy values. + * In this query we do not track the default value exactly, but rather wheter + * it is empty or not (see `Source`). * - * If you extend this class, make sure that your barrier checks the true branch. + * This is the extension point for determining that a node must be empty and + * therefor is allowed to be modified if the tracked default value is non-empty. */ - abstract class BlocksTruthy extends DataFlow::BarrierGuard { } + abstract class MustBeEmpty extends DataFlow::Node { } /** - * A sanitizer guard that does not let a falsy value flow to the true branch. + * A sanitizer for detecting modifications of a parameters default value + * should determine if the node (which is perhaps about to be modified) + * can be the default value or not. * - * Implementation note: - * Since guards with different behaviour cannot exist on the same node, - * we let all guards have the same behaviour, in the sense that they all check - * the true branch. Instead, we partition guards into those that block - * truthy values and those that block falsy values. + * In this query we do not track the default value exactly, but rather wheter + * it is empty or not (see `Source`). * - * If you extend this class, make sure that your barrier checks the true branch. + * This is the extension point for determining that a node must be non-empty + * and therefor is allowed to be modified if the tracked default value is empty. */ - abstract class BlocksFalsey extends DataFlow::BarrierGuard { } + abstract class MustBeNonEmpty extends DataFlow::Node { } /** Gets the truthiness (non emptyness) of the default of `p` if that value is mutable */ private boolean mutableDefaultValue(Parameter p) { @@ -140,46 +137,48 @@ module ModificationOfParameterWithDefault { } } - /** - * A simple sanitizer guard that does not let a truthy value flow to the true branch. - * - * Blocks flow of `x` in the true branch in the example below. - * ```py - * if x: - * x.append(42) - * ``` - */ - class BlocksTruthyGuard extends BlocksTruthy { - ControlFlowNode guarded; - - BlocksTruthyGuard() { this instanceof NameNode } - - override predicate checks(ControlFlowNode node, boolean branch) { - node = this and - branch = true - } - } + // This to reimplement some of the functionality of the DataFlow::BarrierGuard + private import semmle.python.essa.SsaCompute /** - * A simple sanitizer guard that does not let a truthy value flow to the true branch. + * Simple detection of truthy and falsey values. * - * Blocks flow of `x` in the true branch in the example below. - * ```py - * if not x: - * x.append(42) - * ``` + * It handles the cases `if x` and `if not x`. */ - class BlocksFalseyGuard extends BlocksFalsey { + private class MustBe extends DataFlow::Node { + DataFlow::GuardNode guard; NameNode guarded; - - BlocksFalseyGuard() { - this.(UnaryExprNode).getNode().getOp() instanceof Not and - guarded = this.(UnaryExprNode).getOperand() + boolean branch; + boolean truthy; + + MustBe() { + ( + // case: if x + guard = guarded and + branch = truthy + or + // case: if not x + guard.(UnaryExprNode).getNode().getOp() instanceof Not and + guarded = guard.(UnaryExprNode).getOperand() and + branch = truthy.booleanNot() + ) and + // guard controls this + guard.controlsBlock(this.asCfgNode().getBasicBlock(), branch) and + // there is a definition tying the guarded value to this + exists(EssaDefinition def | + AdjacentUses::useOfDef(def, this.asCfgNode()) and + AdjacentUses::useOfDef(def, guarded) + ) } + } - override predicate checks(ControlFlowNode node, boolean branch) { - node = guarded and - branch = true - } + /** Simple guard detecting truthy values. */ + private class MustBeTruthy extends MustBe, MustBeNonEmpty { + MustBeTruthy() { truthy = true } + } + + /** Simple guard detecting falsey values. */ + private class MustBeFalsey extends MustBe, MustBeEmpty { + MustBeFalsey() { truthy = false } } } diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index 9d64f42a61bb..3cef69d5921c 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -27,9 +27,7 @@ edges | test.py:124:15:124:15 | ControlFlowNode for l | test.py:128:9:128:9 | ControlFlowNode for l | | test.py:131:23:131:23 | ControlFlowNode for l | test.py:135:9:135:9 | ControlFlowNode for l | | test.py:138:15:138:15 | ControlFlowNode for l | test.py:140:9:140:9 | ControlFlowNode for l | -| test.py:138:15:138:15 | ControlFlowNode for l | test.py:142:9:142:9 | ControlFlowNode for l | | test.py:145:23:145:23 | ControlFlowNode for l | test.py:147:9:147:9 | ControlFlowNode for l | -| test.py:145:23:145:23 | ControlFlowNode for l | test.py:149:9:149:9 | ControlFlowNode for l | nodes | test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | @@ -81,10 +79,8 @@ nodes | test.py:135:9:135:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:138:15:138:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:140:9:140:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:142:9:142:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:145:23:145:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:147:9:147:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | -| test.py:149:9:149:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | #select | test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | | test.py:8:5:8:5 | ControlFlowNode for l | test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:7:11:7:11 | ControlFlowNode for l | Default value | @@ -108,6 +104,4 @@ nodes | test.py:128:9:128:9 | ControlFlowNode for l | test.py:124:15:124:15 | ControlFlowNode for l | test.py:128:9:128:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:124:15:124:15 | ControlFlowNode for l | Default value | | test.py:135:9:135:9 | ControlFlowNode for l | test.py:131:23:131:23 | ControlFlowNode for l | test.py:135:9:135:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:131:23:131:23 | ControlFlowNode for l | Default value | | test.py:140:9:140:9 | ControlFlowNode for l | test.py:138:15:138:15 | ControlFlowNode for l | test.py:140:9:140:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:138:15:138:15 | ControlFlowNode for l | Default value | -| test.py:142:9:142:9 | ControlFlowNode for l | test.py:138:15:138:15 | ControlFlowNode for l | test.py:142:9:142:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:138:15:138:15 | ControlFlowNode for l | Default value | | test.py:147:9:147:9 | ControlFlowNode for l | test.py:145:23:145:23 | ControlFlowNode for l | test.py:147:9:147:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:145:23:145:23 | ControlFlowNode for l | Default value | -| test.py:149:9:149:9 | ControlFlowNode for l | test.py:145:23:145:23 | ControlFlowNode for l | test.py:149:9:149:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:145:23:145:23 | ControlFlowNode for l | Default value | diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py index 412a6da0137d..f1377f36e56e 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/test.py @@ -139,12 +139,12 @@ def sanitizer(l = []): if not l: l.append(1) #$ modification=l else: - l.append(1) #$ SPURIOUS: modification=l + l.append(1) return l def sanitizer_negated(l = [1]): if l: l.append(1) #$ modification=l else: - l.append(1) #$ SPURIOUS: modification=l + l.append(1) return l From 2eb11731e2a74ca2a09ec7b9c142ae5f69043a42 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 10 Sep 2021 14:04:57 +0200 Subject: [PATCH 22/24] Python: Subpaths in test output --- .../ModificationOfParameterWithDefault.expected | 1 + .../general/ModificationOfParameterWithDefault.expected | 1 + 2 files changed, 2 insertions(+) diff --git a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected index 3cef69d5921c..86daf9cb09ea 100644 --- a/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/ModificationOfParameterWithDefault/ModificationOfParameterWithDefault.expected @@ -81,6 +81,7 @@ nodes | test.py:140:9:140:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:145:23:145:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | | test.py:147:9:147:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l | +subpaths #select | test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value | | test.py:8:5:8:5 | ControlFlowNode for l | test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:7:11:7:11 | ControlFlowNode for l | Default value | diff --git a/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected b/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected index 37fb38475af1..c32326d49422 100644 --- a/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected +++ b/python/ql/test/query-tests/Functions/general/ModificationOfParameterWithDefault.expected @@ -32,6 +32,7 @@ nodes | functions_test.py:193:28:193:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | | functions_test.py:195:18:195:18 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | | functions_test.py:196:28:196:28 | ControlFlowNode for x | semmle.label | ControlFlowNode for x | +subpaths #select | functions_test.py:40:5:40:5 | ControlFlowNode for x | functions_test.py:39:9:39:9 | ControlFlowNode for x | functions_test.py:40:5:40:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:39:9:39:9 | ControlFlowNode for x | Default value | | functions_test.py:134:5:134:5 | ControlFlowNode for x | functions_test.py:133:15:133:15 | ControlFlowNode for x | functions_test.py:134:5:134:5 | ControlFlowNode for x | $@ flows to here and is mutated. | functions_test.py:133:15:133:15 | ControlFlowNode for x | Default value | From 758b6bd4dda679455286deeb0d31580b0eee8b0d Mon Sep 17 00:00:00 2001 From: yoff Date: Wed, 15 Sep 2021 12:25:27 +0200 Subject: [PATCH 23/24] Update python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll Co-authored-by: Rasmus Wriedt Larsen --- ...odificationOfParameterWithDefaultCustomizations.qll | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 9f5da1d14c86..4617e60cbe5f 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -141,9 +141,17 @@ module ModificationOfParameterWithDefault { private import semmle.python.essa.SsaCompute /** - * Simple detection of truthy and falsey values. + * A data-flow node that is known to be either truthy or falsey. * * It handles the cases `if x` and `if not x`. + * + * For example, in the following code, `this` will be the `x` that is printed, + * which we will know is truthy: + * + * ```py + * if x: + * print(x) + * ``` */ private class MustBe extends DataFlow::Node { DataFlow::GuardNode guard; From 8ea7a28a77d8f9693323a3dad8861e8a585edc57 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 15 Sep 2021 12:32:21 +0200 Subject: [PATCH 24/24] Python: Unexpose fields as suggested. --- ...onOfParameterWithDefaultCustomizations.qll | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index 4617e60cbe5f..6abb3f800858 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -144,23 +144,20 @@ module ModificationOfParameterWithDefault { * A data-flow node that is known to be either truthy or falsey. * * It handles the cases `if x` and `if not x`. - * + * * For example, in the following code, `this` will be the `x` that is printed, * which we will know is truthy: - * + * * ```py * if x: * print(x) * ``` */ private class MustBe extends DataFlow::Node { - DataFlow::GuardNode guard; - NameNode guarded; - boolean branch; boolean truthy; MustBe() { - ( + exists(DataFlow::GuardNode guard, NameNode guarded, boolean branch | // case: if x guard = guarded and branch = truthy @@ -169,13 +166,14 @@ module ModificationOfParameterWithDefault { guard.(UnaryExprNode).getNode().getOp() instanceof Not and guarded = guard.(UnaryExprNode).getOperand() and branch = truthy.booleanNot() - ) and - // guard controls this - guard.controlsBlock(this.asCfgNode().getBasicBlock(), branch) and - // there is a definition tying the guarded value to this - exists(EssaDefinition def | - AdjacentUses::useOfDef(def, this.asCfgNode()) and - AdjacentUses::useOfDef(def, guarded) + | + // guard controls this + guard.controlsBlock(this.asCfgNode().getBasicBlock(), branch) and + // there is a definition tying the guarded value to this + exists(EssaDefinition def | + AdjacentUses::useOfDef(def, this.asCfgNode()) and + AdjacentUses::useOfDef(def, guarded) + ) ) } }