Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* More precise modelling of several container functions (such as `sorted`, `reversed`) and methods (such as `set.add`, `list.append`).
Original file line number Diff line number Diff line change
Expand Up @@ -183,25 +183,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// longer -- but there needs to be a matching read-step for the store-step, and we
// don't provide that right now.
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
or
// functions operating on collections
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
call.getArg(0) = nodeFrom
)
or
// dict methods
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
methodName in ["values", "items"] and
call.calls(nodeFrom, methodName)
)
or
// list.append, set.add
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
call.calls(obj, ["append", "add"]) and
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
call.getArg(0) = nodeFrom
)
}

/**
Expand Down
247 changes: 246 additions & 1 deletion python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3883,6 +3883,9 @@ private module StdlibPrivate {
}
}

// ---------------------------------------------------------------------------
// Flow summaries for functions operating on containers
// ---------------------------------------------------------------------------
/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
ReversedSummary() { this = "builtins.reversed" }
Expand All @@ -3894,9 +3897,114 @@ private module StdlibPrivate {
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0].ListElement" and
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for `sorted`. */
class SortedSummary extends SummarizedCallable {
SortedSummary() { this = "builtins.sorted" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("sorted").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(string content |
content = "ListElement"
or
content = "SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
content = "TupleElement[" + i.toString() + "]"
)
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[0]." + content and
output = "ReturnValue.ListElement" and
preservesValue = true
)
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for `iter`. */
class IterSummary extends SummarizedCallable {
IterSummary() { this = "builtins.iter" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("iter").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

/** A flow summary for `next`. */
class NextSummary extends SummarizedCallable {
NextSummary() { this = "builtins.next" }

override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() }

override DataFlow::ArgumentNode getACallback() {
result = API::builtin("next").getAValueReachableFromSource()
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue" and
preservesValue = true
or
input = "Argument[1]" and
output = "ReturnValue" and
preservesValue = true
}
}

Expand Down Expand Up @@ -4127,6 +4235,143 @@ private module StdlibPrivate {
preservesValue = true
}
}

/**
* A flow summary for `dict.values`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.values
*/
class DictValues extends SummarizedCallable {
DictValues() { this = "dict.values" }

override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).calls(_, "values")
}

override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "values"
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement" and
preservesValue = true
)
or
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = false
}
}

/**
* A flow summary for `dict.keys`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.keys
*/
class DictKeys extends SummarizedCallable {
DictKeys() { this = "dict.keys" }

override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") }

override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "keys"
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = false
}
}

/**
* A flow summary for `dict.items`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.items
*/
class DictItems extends SummarizedCallable {
DictItems() { this = "dict.items" }

override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).calls(_, "items")
}

override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "items"
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
)
or
// TODO: Add the keys to output list
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = false
}
}

/**
* A flow summary for `list.append`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable
*/
class ListAppend extends SummarizedCallable {
ListAppend() { this = "list.append" }

override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).calls(_, "append")
}

override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "append"
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// newly added element added to this
input = "Argument[0]" and
output = "Argument[self].ListElement" and
preservesValue = true
or
// transfer taint from new element to this (TODO: remove in future when taint-handling is more in line with other languages)
input = "Argument[0]" and
output = "Argument[self]" and
preservesValue = false
}
}

/**
* A flow summary for `set.add`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add
*/
class SetAdd extends SummarizedCallable {
SetAdd() { this = "set.add" }

override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") }

override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "add"
}

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// newly added element added to this
input = "Argument[0]" and
output = "Argument[self].SetElement" and
preservesValue = true
or
// transfer taint from new element to this (TODO: remove in future when taint-handling is more in line with other languages)
input = "Argument[0]" and
output = "Argument[self]" and
preservesValue = false
}
}
}

// ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion python/ql/test/experimental/dataflow/coverage/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def test_nested_comprehension_deep_with_local_flow():
def test_nested_comprehension_dict():
d = {"s": [SOURCE]}
x = [y for k, v in d.items() for y in v]
SINK(x[0]) #$ MISSING:flow="SOURCE, l:-2 -> x[0]"
SINK(x[0]) #$ flow="SOURCE, l:-2 -> x[0]"


def test_nested_comprehension_paren():
Expand Down
Loading