From 4d856d4461179f2e22f0655344d95d915453b443 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 12 Mar 2021 18:04:13 +0100 Subject: [PATCH 1/5] Python: Add small api enhancements determined useful during documentation work. --- .../2021-03-12-small-api-enhancements.md | 5 + .../dataflow/new/internal/DataFlowPublic.qll | 108 ++------------- .../dataflow/new/internal/LocalSources.qll | 131 ++++++++++++++++++ 3 files changed, 146 insertions(+), 98 deletions(-) create mode 100644 python/change-notes/2021-03-12-small-api-enhancements.md create mode 100644 python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll diff --git a/python/change-notes/2021-03-12-small-api-enhancements.md b/python/change-notes/2021-03-12-small-api-enhancements.md new file mode 100644 index 000000000000..f75a91e548a5 --- /dev/null +++ b/python/change-notes/2021-03-12-small-api-enhancements.md @@ -0,0 +1,5 @@ +lgtm,codescanning +* Make `ParameterNode` extend `LocalSourceNode`, thus making members like `flowsTo` available. +* Add member predicate `taintFlowsTo` to `LocalSourceNode` facilitating smoother syntax for local taint tracking. +* Add member `getALocalTaintSource` to `DataFlow::Node` facilitating smoother syntax for local taint tracking. +* Add predicate `parameterNode` to map from a `Parameter` to a data-flow node. diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 7b75fd70f2aa..aa69f829b9fd 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -6,6 +6,7 @@ private import python private import DataFlowPrivate import semmle.python.dataflow.new.TypeTracker import Attributes +import LocalSources private import semmle.python.essa.SsaCompute /** @@ -138,6 +139,11 @@ class Node extends TNode { * Gets a local source node from which data may flow to this node in zero or more local steps. */ LocalSourceNode getALocalSource() { result.flowsTo(this) } + + /** + * Gets a local source node from which data may flow to this node in zero or more local steps. + */ + LocalSourceNode getALocalTaintSource() { result.taintFlowsTo(this) } } /** A data-flow node corresponding to an SSA variable. */ @@ -215,7 +221,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e } * The value of a parameter at function entry, viewed as a node in a data * flow graph. */ -class ParameterNode extends CfgNode { +class ParameterNode extends CfgNode, LocalSourceNode { ParameterDefinition def; ParameterNode() { @@ -237,6 +243,9 @@ class ParameterNode extends CfgNode { Parameter getParameter() { result = def.getParameter() } } +/** Gets a node corresponding to parameter `p`. */ +ParameterNode parameterNode(Parameter p) { result.getParameter() = p } + /** A data flow node that represents a call argument. */ class ArgumentNode extends Node { ArgumentNode() { this = any(DataFlowCall c).getArg(_) } @@ -467,103 +476,6 @@ class BarrierGuard extends GuardNode { } } -/** - * A data flow node that is a source of local flow. This includes things like - * - Expressions - * - Function parameters - */ -class LocalSourceNode extends Node { - LocalSourceNode() { - not simpleLocalFlowStep+(any(CfgNode n), this) and - not this instanceof ModuleVariableNode - or - this = any(ModuleVariableNode mvn).getARead() - } - - /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ - pragma[inline] - predicate flowsTo(Node nodeTo) { Cached::hasLocalSource(nodeTo, this) } - - /** - * Gets a reference (read or write) of attribute `attrName` on this node. - */ - AttrRef getAnAttributeReference(string attrName) { Cached::namedAttrRef(this, attrName, result) } - - /** - * Gets a read of attribute `attrName` on this node. - */ - AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) } - - /** - * Gets a reference (read or write) of any attribute on this node. - */ - AttrRef getAnAttributeReference() { - Cached::namedAttrRef(this, _, result) - or - Cached::dynamicAttrRef(this, result) - } - - /** - * Gets a read of any attribute on this node. - */ - AttrRead getAnAttributeRead() { result = getAnAttributeReference() } - - /** - * Gets a call to this node. - */ - CallCfgNode getACall() { Cached::call(this, result) } -} - -cached -private module Cached { - /** - * Holds if `source` is a `LocalSourceNode` that can reach `sink` via local flow steps. - * - * The slightly backwards parametering ordering is to force correct indexing. - */ - cached - predicate hasLocalSource(Node sink, Node source) { - // Declaring `source` to be a `SourceNode` currently causes a redundant check in the - // recursive case, so instead we check it explicitly here. - source = sink and - source instanceof LocalSourceNode - or - exists(Node mid | - hasLocalSource(mid, source) and - simpleLocalFlowStep(mid, sink) - ) - } - - /** - * Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`. - */ - cached - predicate namedAttrRef(LocalSourceNode base, string attr, AttrRef ref) { - base.flowsTo(ref.getObject()) and - ref.getAttributeName() = attr - } - - /** - * Holds if `base` flows to the base of `ref` and `ref` has no known attribute name. - */ - cached - predicate dynamicAttrRef(LocalSourceNode base, AttrRef ref) { - base.flowsTo(ref.getObject()) and - not exists(ref.getAttributeName()) - } - - /** - * Holds if `func` flows to the callee of `call`. - */ - cached - predicate call(LocalSourceNode func, CallCfgNode call) { - exists(CfgNode n | - func.flowsTo(n) and - n = call.getFunction() - ) - } -} - /** * Algebraic datatype for tracking data content associated with values. * Content can be collection elements or object attributes. diff --git a/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll new file mode 100644 index 000000000000..a84fed4824af --- /dev/null +++ b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll @@ -0,0 +1,131 @@ +/** + * Provides support for intra-procedural tracking of a customizable + * set of data flow nodes. + * + * Note that unlike `TypeTracker.qll`, this library only performs + * local tracking within a function. + */ + +import python +import DataFlowPublic +private import DataFlowPrivate +private import TaintTrackingPublic + +/** + * A data flow node that is a source of local flow. This includes things like + * - Expressions + * - Function parameters + */ +class LocalSourceNode extends Node { + LocalSourceNode() { + not simpleLocalFlowStep+(any(CfgNode n), this) and + not this instanceof ModuleVariableNode + or + this = any(ModuleVariableNode mvn).getARead() + } + + /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ + pragma[inline] + predicate flowsTo(Node nodeTo) { Cached::hasLocalSource(nodeTo, this) } + + /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local taint steps. */ + pragma[inline] + predicate taintFlowsTo(Node nodeTo) { Cached::hasLocalTaintSource(nodeTo, this) } + + /** + * Gets a reference (read or write) of attribute `attrName` on this node. + */ + AttrRef getAnAttributeReference(string attrName) { Cached::namedAttrRef(this, attrName, result) } + + /** + * Gets a read of attribute `attrName` on this node. + */ + AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) } + + /** + * Gets a reference (read or write) of any attribute on this node. + */ + AttrRef getAnAttributeReference() { + Cached::namedAttrRef(this, _, result) + or + Cached::dynamicAttrRef(this, result) + } + + /** + * Gets a read of any attribute on this node. + */ + AttrRead getAnAttributeRead() { result = getAnAttributeReference() } + + /** + * Gets a call to this node. + */ + CallCfgNode getACall() { Cached::call(this, result) } +} + +cached +private module Cached { + /** + * Holds if `source` is a `LocalSourceNode` that can reach `sink` via local flow steps. + * + * The slightly backwards parametering ordering is to force correct indexing. + */ + cached + predicate hasLocalSource(Node sink, Node source) { + // Declaring `source` to be a `SourceNode` currently causes a redundant check in the + // recursive case, so instead we check it explicitly here. + source = sink and + source instanceof LocalSourceNode + or + exists(Node mid | + hasLocalSource(mid, source) and + simpleLocalFlowStep(mid, sink) + ) + } + + /** + * Holds if `source` is a `LocalSourceNode` that can reach `sink` via local taint steps. + * + * The slightly backwards parametering ordering is to force correct indexing. + */ + cached + predicate hasLocalTaintSource(Node sink, Node source) { + // Declaring `source` to be a `SourceNode` currently causes a redundant check in the + // recursive case, so instead we check it explicitly here. + source = sink and + source instanceof LocalSourceNode + or + exists(Node mid | + hasLocalTaintSource(mid, source) and + localTaintStep(mid, sink) + ) + } + + /** + * Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`. + */ + cached + predicate namedAttrRef(LocalSourceNode base, string attr, AttrRef ref) { + base.flowsTo(ref.getObject()) and + ref.getAttributeName() = attr + } + + /** + * Holds if `base` flows to the base of `ref` and `ref` has no known attribute name. + */ + cached + predicate dynamicAttrRef(LocalSourceNode base, AttrRef ref) { + base.flowsTo(ref.getObject()) and + not exists(ref.getAttributeName()) + } + + /** + * Holds if `func` flows to the callee of `call`. + */ + cached + predicate call(LocalSourceNode func, CallCfgNode call) { + exists(CfgNode n | + func.flowsTo(n) and + n = call.getFunction() + ) + } +} From 63b732ce1f78b2639b796b2ccc12bffae03bef23 Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 12 Mar 2021 18:36:20 +0100 Subject: [PATCH 2/5] Apply suggestions from code review Co-authored-by: Taus --- .../semmle/python/dataflow/new/internal/DataFlowPublic.qll | 2 +- .../src/semmle/python/dataflow/new/internal/LocalSources.qll | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll index aa69f829b9fd..aac5f501263f 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -141,7 +141,7 @@ class Node extends TNode { LocalSourceNode getALocalSource() { result.flowsTo(this) } /** - * Gets a local source node from which data may flow to this node in zero or more local steps. + * Gets a local source node from which data may flow to this node in zero or more local taint-flow steps. */ LocalSourceNode getALocalTaintSource() { result.taintFlowsTo(this) } } diff --git a/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll index a84fed4824af..26d731061b4c 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll @@ -71,7 +71,7 @@ private module Cached { */ cached predicate hasLocalSource(Node sink, Node source) { - // Declaring `source` to be a `SourceNode` currently causes a redundant check in the + // Declaring `source` to be a `LocalSourceNode` currently causes a redundant check in the // recursive case, so instead we check it explicitly here. source = sink and source instanceof LocalSourceNode @@ -89,7 +89,7 @@ private module Cached { */ cached predicate hasLocalTaintSource(Node sink, Node source) { - // Declaring `source` to be a `SourceNode` currently causes a redundant check in the + // Declaring `source` to be a `LocalSourceNode` currently causes a redundant check in the // recursive case, so instead we check it explicitly here. source = sink and source instanceof LocalSourceNode From 8f467003d257dd018c0d3034ff077e90ed7f68dc Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 12 Mar 2021 18:45:09 +0100 Subject: [PATCH 3/5] Python: More review suggestions --- python/change-notes/2021-03-12-small-api-enhancements.md | 7 +++---- .../semmle/python/dataflow/new/internal/DataFlowPublic.qll | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/change-notes/2021-03-12-small-api-enhancements.md b/python/change-notes/2021-03-12-small-api-enhancements.md index f75a91e548a5..ce7421cde7c7 100644 --- a/python/change-notes/2021-03-12-small-api-enhancements.md +++ b/python/change-notes/2021-03-12-small-api-enhancements.md @@ -1,5 +1,4 @@ lgtm,codescanning -* Make `ParameterNode` extend `LocalSourceNode`, thus making members like `flowsTo` available. -* Add member predicate `taintFlowsTo` to `LocalSourceNode` facilitating smoother syntax for local taint tracking. -* Add member `getALocalTaintSource` to `DataFlow::Node` facilitating smoother syntax for local taint tracking. -* Add predicate `parameterNode` to map from a `Parameter` to a data-flow node. +* The class ParameterNode now extends LocalSourceNode, thus making methods like flowsTo available. +* Local taint tracking can now be performed using the `taintFlowsTo` method on the `LocalSourceNode` class. Conversely, the new member predicate `getALocalTaintSource` can be called on a `DataFlow::Node` to obtain a `LocalSourceNode` from which taint can be tracked locally to that data-flow node. +* The new predicate `parameterNode` can now be used to map from a `Parameter` to a data-flow node. diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll index aac5f501263f..e802373a527b 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -136,7 +136,7 @@ class Node extends TNode { LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) } /** - * Gets a local source node from which data may flow to this node in zero or more local steps. + * Gets a local source node from which data may flow to this node in zero or more local data-flow steps. */ LocalSourceNode getALocalSource() { result.flowsTo(this) } From b3ff3f7ee7f0838f43c29417033c89322b77dcb2 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 12 Mar 2021 18:59:49 +0100 Subject: [PATCH 4/5] =?UTF-8?q?Python=C3=86=20adjust=20test=20expectations?= =?UTF-8?q?=20I=20suspect=20it=20has=20to=20do=20with=20ParameterNode=20be?= =?UTF-8?q?ing=20a=20LocalSourceNode,=20but=20I=20really=20have=20no=20ide?= =?UTF-8?q?a...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ql/test/2/library-tests/locations/general/Locations.expected | 1 + .../ql/test/3/library-tests/locations/general/Locations.expected | 1 + 2 files changed, 2 insertions(+) diff --git a/python/ql/test/2/library-tests/locations/general/Locations.expected b/python/ql/test/2/library-tests/locations/general/Locations.expected index 3c74898d80f2..c6473cb2cc22 100644 --- a/python/ql/test/2/library-tests/locations/general/Locations.expected +++ b/python/ql/test/2/library-tests/locations/general/Locations.expected @@ -55,6 +55,7 @@ | Dict | 46 | 54 | 46 | 55 | | Dict | 48 | 9 | 48 | 19 | | DictUnpacking | 46 | 52 | 46 | 55 | +| DjangoViewClassHelper | 4 | 1 | 4 | 8 | | Ellipsis | 7 | 7 | 7 | 9 | | Ellipsis | 50 | 14 | 50 | 16 | | ExceptStmt | 32 | 9 | 32 | 31 | diff --git a/python/ql/test/3/library-tests/locations/general/Locations.expected b/python/ql/test/3/library-tests/locations/general/Locations.expected index 8da184f747e8..70217f5117ad 100644 --- a/python/ql/test/3/library-tests/locations/general/Locations.expected +++ b/python/ql/test/3/library-tests/locations/general/Locations.expected @@ -44,6 +44,7 @@ | Dict | 46 | 54 | 46 | 55 | | Dict | 48 | 9 | 48 | 19 | | DictUnpacking | 46 | 52 | 46 | 55 | +| DjangoViewClassHelper | 4 | 1 | 4 | 8 | | Ellipsis | 7 | 7 | 7 | 9 | | Ellipsis | 50 | 14 | 50 | 16 | | ExceptStmt | 32 | 9 | 32 | 31 | From a9af135d7e892f690669f57e1a8662970cdd2acf Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 24 Mar 2021 01:22:21 +0100 Subject: [PATCH 5/5] Python: Remove `getALocalTaintSource` and `taintFlowsTo` for now.. --- .../2021-03-12-small-api-enhancements.md | 1 - .../dataflow/new/internal/DataFlowPublic.qll | 5 ---- .../dataflow/new/internal/LocalSources.qll | 23 ------------------- .../locations/general/Locations.expected | 1 - .../locations/general/Locations.expected | 1 - 5 files changed, 31 deletions(-) diff --git a/python/change-notes/2021-03-12-small-api-enhancements.md b/python/change-notes/2021-03-12-small-api-enhancements.md index ce7421cde7c7..8d3c5c67b5f7 100644 --- a/python/change-notes/2021-03-12-small-api-enhancements.md +++ b/python/change-notes/2021-03-12-small-api-enhancements.md @@ -1,4 +1,3 @@ lgtm,codescanning * The class ParameterNode now extends LocalSourceNode, thus making methods like flowsTo available. -* Local taint tracking can now be performed using the `taintFlowsTo` method on the `LocalSourceNode` class. Conversely, the new member predicate `getALocalTaintSource` can be called on a `DataFlow::Node` to obtain a `LocalSourceNode` from which taint can be tracked locally to that data-flow node. * The new predicate `parameterNode` can now be used to map from a `Parameter` to a data-flow node. diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll index e802373a527b..36db37a0a0f9 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -139,11 +139,6 @@ class Node extends TNode { * Gets a local source node from which data may flow to this node in zero or more local data-flow steps. */ LocalSourceNode getALocalSource() { result.flowsTo(this) } - - /** - * Gets a local source node from which data may flow to this node in zero or more local taint-flow steps. - */ - LocalSourceNode getALocalTaintSource() { result.taintFlowsTo(this) } } /** A data-flow node corresponding to an SSA variable. */ diff --git a/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll index 26d731061b4c..205188dd5fdc 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll @@ -9,7 +9,6 @@ import python import DataFlowPublic private import DataFlowPrivate -private import TaintTrackingPublic /** * A data flow node that is a source of local flow. This includes things like @@ -28,10 +27,6 @@ class LocalSourceNode extends Node { pragma[inline] predicate flowsTo(Node nodeTo) { Cached::hasLocalSource(nodeTo, this) } - /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local taint steps. */ - pragma[inline] - predicate taintFlowsTo(Node nodeTo) { Cached::hasLocalTaintSource(nodeTo, this) } - /** * Gets a reference (read or write) of attribute `attrName` on this node. */ @@ -82,24 +77,6 @@ private module Cached { ) } - /** - * Holds if `source` is a `LocalSourceNode` that can reach `sink` via local taint steps. - * - * The slightly backwards parametering ordering is to force correct indexing. - */ - cached - predicate hasLocalTaintSource(Node sink, Node source) { - // Declaring `source` to be a `LocalSourceNode` currently causes a redundant check in the - // recursive case, so instead we check it explicitly here. - source = sink and - source instanceof LocalSourceNode - or - exists(Node mid | - hasLocalTaintSource(mid, source) and - localTaintStep(mid, sink) - ) - } - /** * Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`. */ diff --git a/python/ql/test/2/library-tests/locations/general/Locations.expected b/python/ql/test/2/library-tests/locations/general/Locations.expected index c6473cb2cc22..3c74898d80f2 100644 --- a/python/ql/test/2/library-tests/locations/general/Locations.expected +++ b/python/ql/test/2/library-tests/locations/general/Locations.expected @@ -55,7 +55,6 @@ | Dict | 46 | 54 | 46 | 55 | | Dict | 48 | 9 | 48 | 19 | | DictUnpacking | 46 | 52 | 46 | 55 | -| DjangoViewClassHelper | 4 | 1 | 4 | 8 | | Ellipsis | 7 | 7 | 7 | 9 | | Ellipsis | 50 | 14 | 50 | 16 | | ExceptStmt | 32 | 9 | 32 | 31 | diff --git a/python/ql/test/3/library-tests/locations/general/Locations.expected b/python/ql/test/3/library-tests/locations/general/Locations.expected index 70217f5117ad..8da184f747e8 100644 --- a/python/ql/test/3/library-tests/locations/general/Locations.expected +++ b/python/ql/test/3/library-tests/locations/general/Locations.expected @@ -44,7 +44,6 @@ | Dict | 46 | 54 | 46 | 55 | | Dict | 48 | 9 | 48 | 19 | | DictUnpacking | 46 | 52 | 46 | 55 | -| DjangoViewClassHelper | 4 | 1 | 4 | 8 | | Ellipsis | 7 | 7 | 7 | 9 | | Ellipsis | 50 | 14 | 50 | 16 | | ExceptStmt | 32 | 9 | 32 | 31 |