Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
27e2307
Python: Add import regression for refined variable
RasmusWL Feb 17, 2023
fb425b7
Python: Add import test of `py/insecure-protocol`
RasmusWL Feb 21, 2023
00eec69
Python: Allow import of refined variable
RasmusWL Feb 17, 2023
e522009
Python: More complex import examples
RasmusWL Feb 17, 2023
4a66e48
Python: Allow import resolution with recursive phi/refine steps
RasmusWL Feb 17, 2023
6a5eebe
Python: Add test of `module_export`
RasmusWL Feb 22, 2023
6ba39d5
Python: Add import regression for re-exported things
RasmusWL Feb 22, 2023
4df7dfb
Python: Don't import module as module_attr
RasmusWL Feb 22, 2023
d77ce4f
Python: minor rewrite of `from <pkg> import *` handling
RasmusWL Feb 22, 2023
be5812c
Python: `from <pkg> import *` ignores `__all__` regression
RasmusWL Feb 22, 2023
c8a7624
Python: Take `__all__` into consideration for re-export of `from <pkg…
RasmusWL Feb 22, 2023
8eaaf8e
Python: Ignore `trace.py` in `ModuleExport.ql` test
RasmusWL Feb 22, 2023
321a4b4
Python: `ModuleExport.ql` test: ignore `main.py`
RasmusWL Feb 22, 2023
bea0acb
Python: Add barrier test to import resolution
RasmusWL Feb 22, 2023
97fefd2
Python: Attempt to fix import flow
RasmusWL Feb 22, 2023
3739072
Python: Fixed most problems from last commit
RasmusWL Feb 22, 2023
13ae98e
Python: Fix submodule exported under wrong name (when attribute clash)
RasmusWL Feb 22, 2023
96c0d95
Python: Illustrate that `clashing_attr` can be submodule
RasmusWL Feb 22, 2023
b7bdc55
Python: Show import resolution is a bit generous with exported value
RasmusWL Feb 22, 2023
11000fd
Python: Fix `ModuleExport.ql` test for Python 2
RasmusWL Feb 27, 2023
be7d668
Merge branch 'main' into import-refined
RasmusWL Feb 27, 2023
35bd809
Merge branch 'main' into import-refined
RasmusWL Mar 6, 2023
2cc8fba
Python: Accept changes due to better import resolution of `operator.py`
RasmusWL Mar 6, 2023
93c9f59
Python: Extract version specific coverage/classes.py tests
RasmusWL Mar 20, 2023
2ee09cc
Merge branch 'main' into import-refined
RasmusWL Mar 20, 2023
e90559b
Python: Add missing `options` files
RasmusWL Mar 21, 2023
b2f34ef
Merge branch 'main' into import-refined
RasmusWL Mar 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Fixed module resolution so we allow imports of definitions that have had an attribute assigned to it, such as `class Foo; Foo.bar = 42`.
Original file line number Diff line number Diff line change
Expand Up @@ -65,31 +65,75 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate
*/
module ImportResolution {
/**
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
* not include `name`).
* Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
* for import resolution.
*/
private predicate allowedEssaImportStep(EssaDefinition defFrom, EssaDefinition defTo) {
// to handle definitions guarded by if-then-else
defFrom = defTo.(PhiFunction).getAnInput()
or
// refined variable
// example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
Comment thread
calumgrant marked this conversation as resolved.
defFrom = defTo.(EssaNodeRefinement).getInput().getDefinition()
}

/**
* Holds if the module `m` defines a name `name` with the value `val`. The value
* represents the value `name` will have at the end of the module (the last place we
* have def-use flow to).
*
* Note: The handling of re-exporting imports is a bit simplistic. We assume that if
* an import is made, it will be re-exported (which will not be the case if a new
* value is assigned to the name, or it is deleted).
*/
pragma[nomagic]
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
exists(EssaVariable v, EssaDefinition essaDef |
v.getName() = name and
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit() and
(
essaDef = v.getDefinition()
or
// to handle definitions guarded by if-then-else
essaDef = v.getDefinition().(PhiFunction).getAnInput()
)
predicate module_export(Module m, string name, DataFlow::Node val) {
// Definitions made inside `m` itself
//
// for code such as `foo = ...; foo.bar = ...` there will be TWO
// EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
// for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
// EssaNodeRefinement is the one that will reach the end of the module (normal
// exit).
//
// However, we cannot just use the EssaNodeRefinement as the `val`, because the
// normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
// EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
// reaches the end of the module, to the first definition of the variable, and then
// track forwards using use-use flow to find a suitable CFG node that has flow into
// it from use-use flow.
exists(EssaVariable lastUseVar, EssaVariable firstDef |
lastUseVar.getName() = name and
// we ignore special variable $ introduced by our analysis (not used for anything)
// we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
not name in ["$", "*"] and
lastUseVar.getAUse() = m.getANormalExit() and
allowedEssaImportStep*(firstDef, lastUseVar) and
not allowedEssaImportStep(_, firstDef)
|
defn.getNode() = essaDef.(AssignmentDefinition).getValue()
not EssaFlow::defToFirstUse(firstDef, _) and
val.asVar() = firstDef
or
defn.getNode() = essaDef.(ArgumentRefinement).getArgument()
exists(ControlFlowNode mid, ControlFlowNode end |
EssaFlow::defToFirstUse(firstDef, mid) and
EssaFlow::useToNextUse*(mid, end) and
not EssaFlow::useToNextUse(end, _) and
val.asCfgNode() = end
)
)
or
// re-exports from `from <pkg> import *`
exists(Module importedFrom |
importedFrom = ImportStar::getStarImported(m) and
module_export(importedFrom, name, val) and
potential_module_export(importedFrom, name)
)
or
// re-exports from `import <pkg>` or `from <pkg> import <stuff>`
exists(Alias a |
defn.asExpr() = [a.getValue(), a.getValue().(ImportMember).getModule()] and
val.asExpr() = a.getValue() and
a.getAsname().(Name).getId() = name and
defn.getScope() = m
val.getScope() = m
)
}

Expand Down Expand Up @@ -263,9 +307,21 @@ module ImportResolution {
module_reexport(reexporter, attr_name, m)
)
or
// Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
// In practice, we create a definition for each module in a package, even if it is not imported.
// submodules of packages will be available as `<pkg>.<submodule>` after doing
// `import <pkg>.<submodule>` at least once in the program, or can be directly
// imported with `from <pkg> import <submodule>` (even with an empty
// `<pkg>.__init__` file).
//
// Until an import of `<pkg>.<submodule>` is executed, it is technically possible
// that `<pkg>.<submodule>` (or `from <pkg> import <submodule>`) can refer to an
// attribute set in `<pkg>.__init__`.
//
// Therefore, if there is an attribute defined in `<pkg>.__init__` with the same
// name as a submodule, we always consider that this attribute _could_ be a
// reference to the submodule, even if we don't know that the submodule has been
// imported yet.
exists(string submodule, Module package |
submodule = result.asVar().getName() and
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
../coverage/argumentRoutingTest.ql
54 changes: 54 additions & 0 deletions python/ql/test/experimental/dataflow/coverage-py2/classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Python 2 specific tests, like the one in coverage/classes.py
#
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests should cover all the class calls that we hope to support.
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.

import sys
import os

sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects


def SINK1(x):
pass


def SINK2(x):
pass


def SINK3(x):
pass


def SINK4(x):
pass


def OK():
print("OK")


# 3.3.8. Emulating numeric types

# object.__index__(self)
class With_index:
def __index__(self):
SINK1(self)
OK() # Call not found
return 0


def test_index():
import operator

with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
../coverage/argumentRoutingTest.ql
72 changes: 72 additions & 0 deletions python/ql/test/experimental/dataflow/coverage-py3/classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Python 3 specific tests, like the one in coverage/classes.py
#
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
#
# These tests should cover all the class calls that we hope to support.
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.

import sys
import os

sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects


def SINK1(x):
pass


def SINK2(x):
pass


def SINK3(x):
pass


def SINK4(x):
pass


def OK():
print("OK")



# 3.3.7. Emulating container types

# object.__length_hint__(self)
class With_length_hint:
def __length_hint__(self):
SINK1(self)
OK()
return 0


def test_length_hint():
import operator

with_length_hint = With_length_hint() #$ arg1="SSA variable with_length_hint" func=With_length_hint.__length_hint__
operator.length_hint(with_length_hint)


# 3.3.8. Emulating numeric types

# object.__index__(self)
class With_index:
def __index__(self):
SINK1(self)
OK() # Call not found
return 0


def test_index():
import operator

with_index = With_index() #$ arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3
22 changes: 0 additions & 22 deletions python/ql/test/experimental/dataflow/coverage/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,21 +535,6 @@ def test_len_if():
pass


# object.__length_hint__(self)
class With_length_hint:
def __length_hint__(self):
SINK1(self)
OK() # Call not found
return 0


def test_length_hint():
import operator

with_length_hint = With_length_hint() #$ MISSING: arg1="SSA variable with_length_hint" func=With_length_hint.__length_hint__
operator.length_hint(with_length_hint)


# object.__getitem__(self, key)
class With_getitem:
def __getitem__(self, key):
Expand Down Expand Up @@ -1378,13 +1363,6 @@ def __index__(self):
return 0


def test_index():
import operator

with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
operator.index(with_index)


def test_index_slicing():
with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
[0][with_index:1]
Expand Down
2 changes: 2 additions & 0 deletions python/ql/test/experimental/dataflow/validTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def check_tests_valid_after_version(testFile, version):
check_tests_valid("coverage.test")
check_tests_valid("coverage.argumentPassing")
check_tests_valid("coverage.datamodel")
check_tests_valid("coverage-py2.classes")
check_tests_valid("coverage-py3.classes")
check_tests_valid("variable-capture.in")
check_tests_valid("variable-capture.nonlocal")
check_tests_valid("variable-capture.dict")
Expand Down
Loading