diff --git a/python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll b/python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll index acdf3ea255f0..9d3e55c507b0 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll @@ -17,6 +17,13 @@ abstract class AttrRef extends Node { */ abstract Node getObject(); + /** + * Holds if this data flow node accesses attribute named `attrName` on object `object`. + */ + predicate accesses(Node object, string attrName) { + this.getObject() = object and this.getAttributeName() = attrName + } + /** * Gets the expression node that defines the attribute being accessed, if any. This is * usually an identifier or literal. diff --git a/python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll b/python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll index a6e169243db1..4d3353705eef 100644 --- a/python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll +++ b/python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll @@ -1,7 +1,8 @@ private import python private import semmle.python.dataflow.new.DataFlow -private import semmle.python.dataflow.new.internal.DataFlowPrivate +private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate private import semmle.python.dataflow.new.internal.TaintTrackingPublic +private import semmle.python.ApiGraphs /** * Holds if `node` should be a sanitizer in all global taint flow configurations @@ -75,13 +76,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) { */ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) { // transforming something tainted into a string will make the string tainted - exists(CallNode call | call = nodeTo.getNode() | - call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and + exists(DataFlow::CallCfgNode call | call = nodeTo | ( - nodeFrom.getNode() = call.getArg(0) + call = API::builtin(["str", "bytes", "unicode"]).getACall() or - nodeFrom.getNode() = call.getArgByName("object") - ) + call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"] + ) and + nodeFrom in [call.getArg(0), call.getArgByName("object")] ) or // String methods. Note that this doesn't recognize `meth = "foo".upper; meth()` @@ -148,39 +149,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) { // construction by literal // TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :| - storeStep(nodeFrom, _, nodeTo) + DataFlowPrivate::storeStep(nodeFrom, _, nodeTo) or // constructor call - exists(CallNode call | call = nodeTo.asCfgNode() | - call.getFunction().(NameNode).getId() in [ - "list", "set", "frozenset", "dict", "defaultdict", "tuple" - ] and - call.getArg(0) = nodeFrom.getNode() + exists(DataFlow::CallCfgNode call | call = nodeTo | + call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and + call.getArg(0) = nodeFrom + // TODO: Properly handle defaultdict/namedtuple ) or // functions operating on collections - exists(CallNode call | call = nodeTo.asCfgNode() | - call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and - call.getArg(0) = nodeFrom.getNode() + exists(DataFlow::CallCfgNode call | call = nodeTo | + call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and + call.getArg(0) = nodeFrom ) or // methods - exists(CallNode call, string name | call = nodeTo.asCfgNode() | - name in [ + exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo | + methodName in [ // general "copy", "pop", // dict "values", "items", "get", "popitem" ] and - call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode() + call.calls(nodeFrom, methodName) ) or // list.append, set.add - exists(CallNode call, string name | - name in ["append", "add"] and - call.getFunction().(AttrNode).getObject(name) = - nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and - call.getArg(0) = nodeFrom.getNode() + exists(DataFlow::MethodCallNode call, DataFlow::Node obj | + call.calls(obj, ["append", "add"]) and + obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and + call.getArg(0) = nodeFrom ) } @@ -188,14 +187,9 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) { * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying. */ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) { - exists(CallNode call | call = nodeTo.getNode() | - // Fully qualified: copy.copy, copy.deepcopy - ( - call.getFunction().(NameNode).getId() in ["copy", "deepcopy"] - or - call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy" - ) and - call.getArg(0) = nodeFrom.getNode() + exists(DataFlow::CallCfgNode call | call = nodeTo | + call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and + call.getArg(0) = nodeFrom ) } diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py index eed3543bc170..42ac758bfffa 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py @@ -104,7 +104,7 @@ def non_syntactic(): _str = str ensure_tainted( meth(), # $ MISSING: tainted - _str(ts), # $ MISSING: tainted + _str(ts), # $ tainted )