Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/change-notes/2021-03-11-api-graph-builtins.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lgtm,codescanning
* API graphs now contain nodes for built-in functions and classes. For instance, `API::builtin("open")` is the API graph node corresponding to the built-in `open` function.
45 changes: 41 additions & 4 deletions python/ql/src/semmle/python/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ module API {
*/
Node moduleImport(string m) { result = Impl::MkModuleImport(m) }

/** Gets a node corresponding to the built-in with the given name, if any. */
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }

/**
* Provides the actual implementation of API graphs, cached for performance.
*
Expand Down Expand Up @@ -300,11 +303,18 @@ module API {
MkRoot() or
/** An abstract representative for imports of the module called `name`. */
MkModuleImport(string name) {
imports(_, name)
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
(name != "__builtin__" or major_version() = 3) and
(
imports(_, name)
or
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
// `foo` and `foo.bar`:
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
)
or
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
// `foo` and `foo.bar`:
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
// The `builtins` module should always be implicitly available
name = "builtins"
} or
/** A use of an API member at the node `nd`. */
MkUse(DataFlow::Node nd) { use(_, _, nd) }
Expand Down Expand Up @@ -339,6 +349,24 @@ module API {
)
}

private import semmle.python.types.Builtins as Builtins

/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and does not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
result.asCfgNode() =
any(NameNode n |
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = any(Builtins::Builtin b).getName()
)
}

/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
Expand Down Expand Up @@ -369,6 +397,10 @@ module API {
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
)
)
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
}

/**
Expand All @@ -381,6 +413,11 @@ module API {
imports(ref, name)
)
or
// Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module.
major_version() = 2 and
nd = MkModuleImport("builtins") and
imports(ref, "__builtin__")
or
nd = MkUse(ref)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
semmle-extractor-options: --lang=2 --max-import-depth=1
3 changes: 3 additions & 0 deletions python/ql/test/experimental/dataflow/ApiGraphs-py2/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def python2_style():
from __builtin__ import open #$ use=moduleImport("builtins").getMember("open")
open("hello.txt") #$ use=moduleImport("builtins").getMember("open").getReturn()
Empty file.
30 changes: 30 additions & 0 deletions python/ql/test/experimental/dataflow/ApiGraphs-py2/use.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
import semmle.python.ApiGraphs

class ApiUseTest extends InlineExpectationsTest {
ApiUseTest() { this = "ApiUseTest" }

override string getARelevantTag() { result = "use" }

private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) {
n = a.getAUse() and l = n.getLocation()
}

override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) |
tag = "use" and
// Only report the longest path on this line:
value =
max(API::Node a2, Location l2 |
relevant_node(a2, _, l2) and
l2.getFile() = location.getFile() and
l2.getStartLine() = location.getStartLine()
|
a2.getPath()
) and
element = n.toString()
)
}
}
39 changes: 39 additions & 0 deletions python/ql/test/experimental/dataflow/ApiGraphs/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,42 @@ def my_internal_method(self): #$ use=moduleImport("pflask").getMember("views").g
pass

int_instance = IntMyView() #$ use=moduleImport("pflask").getMember("views").getMember("View").getASubclass().getReturn()


# Built-ins

def use_of_builtins():
for x in range(5): #$ use=moduleImport("builtins").getMember("range").getReturn()
if x < len([]): #$ use=moduleImport("builtins").getMember("len").getReturn()
print("Hello") #$ use=moduleImport("builtins").getMember("print").getReturn()
raise Exception("Farewell") #$ use=moduleImport("builtins").getMember("Exception").getReturn()

def imported_builtins():
import builtins #$ use=moduleImport("builtins")
def open(f):
return builtins.open(f) #$ MISSING: use=moduleImport("builtins").getMember("open").getReturn()

def redefine_print():
def my_print(x):
import builtins #$ use=moduleImport("builtins")
builtins.print("I'm printing", x) #$ use=moduleImport("builtins").getMember("print").getReturn()
print = my_print
print("these words")

def local_redefine_range():
range = 5
return range

def global_redefine_range():
global range
range = 6
return range #$ SPURIOUS: use=moduleImport("builtins").getMember("range")

def obscured_print():
p = print #$ use=moduleImport("builtins").getMember("print")
p("Can you see me?") #$ use=moduleImport("builtins").getMember("print").getReturn()

def python2_style():
# In Python 3, `__builtin__` has no special meaning.
from __builtin__ import open #$ use=moduleImport("__builtin__").getMember("open")
open("hello.txt") #$ use=moduleImport("__builtin__").getMember("open").getReturn()
4 changes: 2 additions & 2 deletions python/ql/test/experimental/dataflow/ApiGraphs/test1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
try:
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'
except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError")
print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar'
4 changes: 2 additions & 2 deletions python/ql/test/experimental/dataflow/ApiGraphs/test5.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42
try:
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar")
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'
except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError")
print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar'

from mypkg import bar as _bar #$ use=moduleImport("mypkg").getMember("bar")
print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") // <module 'mypkg.bar' ...