diff --git a/python/change-notes/2021-03-11-api-graph-builtins.md b/python/change-notes/2021-03-11-api-graph-builtins.md new file mode 100644 index 000000000000..fdcf090c8ccb --- /dev/null +++ b/python/change-notes/2021-03-11-api-graph-builtins.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* API graphs now contain nodes for built-in functions and classes. For instance, `API::builtin("open")` is the API graph node corresponding to the built-in `open` function. \ No newline at end of file diff --git a/python/ql/src/semmle/python/ApiGraphs.qll b/python/ql/src/semmle/python/ApiGraphs.qll index 40a3bf4f756e..b05a8910530f 100644 --- a/python/ql/src/semmle/python/ApiGraphs.qll +++ b/python/ql/src/semmle/python/ApiGraphs.qll @@ -216,6 +216,9 @@ module API { */ Node moduleImport(string m) { result = Impl::MkModuleImport(m) } + /** Gets a node corresponding to the built-in with the given name, if any. */ + Node builtin(string n) { result = moduleImport("builtins").getMember(n) } + /** * Provides the actual implementation of API graphs, cached for performance. * @@ -300,11 +303,18 @@ module API { MkRoot() or /** An abstract representative for imports of the module called `name`. */ MkModuleImport(string name) { - imports(_, name) + // Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere + (name != "__builtin__" or major_version() = 3) and + ( + imports(_, name) + or + // When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes + // `foo` and `foo.bar`: + name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName() + ) or - // When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes - // `foo` and `foo.bar`: - name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName() + // The `builtins` module should always be implicitly available + name = "builtins" } or /** A use of an API member at the node `nd`. */ MkUse(DataFlow::Node nd) { use(_, _, nd) } @@ -339,6 +349,24 @@ module API { ) } + private import semmle.python.types.Builtins as Builtins + + /** + * Gets a data flow node that is likely to refer to a built-in with the name `name`. + * + * Currently this is an over-approximation, and does not account for things like overwriting a + * built-in with a different value. + */ + private DataFlow::Node likely_builtin(string name) { + result.asCfgNode() = + any(NameNode n | + n.isGlobal() and + n.isLoad() and + name = n.getId() and + name = any(Builtins::Builtin b).getName() + ) + } + /** * Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled * `lbl` in the API graph. @@ -369,6 +397,10 @@ module API { ref.asExpr().(ClassExpr).getABase() = superclass.asExpr() ) ) + or + // Built-ins, treated as members of the module `builtins` + base = MkModuleImport("builtins") and + lbl = Label::member(any(string name | ref = likely_builtin(name))) } /** @@ -381,6 +413,11 @@ module API { imports(ref, name) ) or + // Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module. + major_version() = 2 and + nd = MkModuleImport("builtins") and + imports(ref, "__builtin__") + or nd = MkUse(ref) } diff --git a/python/ql/test/experimental/dataflow/ApiGraphs-py2/options b/python/ql/test/experimental/dataflow/ApiGraphs-py2/options new file mode 100644 index 000000000000..4d24074fe196 --- /dev/null +++ b/python/ql/test/experimental/dataflow/ApiGraphs-py2/options @@ -0,0 +1 @@ +semmle-extractor-options: --lang=2 --max-import-depth=1 \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/ApiGraphs-py2/test.py b/python/ql/test/experimental/dataflow/ApiGraphs-py2/test.py new file mode 100644 index 000000000000..d98eae408528 --- /dev/null +++ b/python/ql/test/experimental/dataflow/ApiGraphs-py2/test.py @@ -0,0 +1,3 @@ +def python2_style(): + from __builtin__ import open #$ use=moduleImport("builtins").getMember("open") + open("hello.txt") #$ use=moduleImport("builtins").getMember("open").getReturn() diff --git a/python/ql/test/experimental/dataflow/ApiGraphs-py2/use.expected b/python/ql/test/experimental/dataflow/ApiGraphs-py2/use.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/dataflow/ApiGraphs-py2/use.ql b/python/ql/test/experimental/dataflow/ApiGraphs-py2/use.ql new file mode 100644 index 000000000000..f02bb048c585 --- /dev/null +++ b/python/ql/test/experimental/dataflow/ApiGraphs-py2/use.ql @@ -0,0 +1,30 @@ +import python +import semmle.python.dataflow.new.DataFlow +import TestUtilities.InlineExpectationsTest +import semmle.python.ApiGraphs + +class ApiUseTest extends InlineExpectationsTest { + ApiUseTest() { this = "ApiUseTest" } + + override string getARelevantTag() { result = "use" } + + private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) { + n = a.getAUse() and l = n.getLocation() + } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) | + tag = "use" and + // Only report the longest path on this line: + value = + max(API::Node a2, Location l2 | + relevant_node(a2, _, l2) and + l2.getFile() = location.getFile() and + l2.getStartLine() = location.getStartLine() + | + a2.getPath() + ) and + element = n.toString() + ) + } +} diff --git a/python/ql/test/experimental/dataflow/ApiGraphs/test.py b/python/ql/test/experimental/dataflow/ApiGraphs/test.py index 179f0f522dfa..5a382f3d6cc9 100644 --- a/python/ql/test/experimental/dataflow/ApiGraphs/test.py +++ b/python/ql/test/experimental/dataflow/ApiGraphs/test.py @@ -100,3 +100,42 @@ def my_internal_method(self): #$ use=moduleImport("pflask").getMember("views").g pass int_instance = IntMyView() #$ use=moduleImport("pflask").getMember("views").getMember("View").getASubclass().getReturn() + + +# Built-ins + +def use_of_builtins(): + for x in range(5): #$ use=moduleImport("builtins").getMember("range").getReturn() + if x < len([]): #$ use=moduleImport("builtins").getMember("len").getReturn() + print("Hello") #$ use=moduleImport("builtins").getMember("print").getReturn() + raise Exception("Farewell") #$ use=moduleImport("builtins").getMember("Exception").getReturn() + +def imported_builtins(): + import builtins #$ use=moduleImport("builtins") + def open(f): + return builtins.open(f) #$ MISSING: use=moduleImport("builtins").getMember("open").getReturn() + +def redefine_print(): + def my_print(x): + import builtins #$ use=moduleImport("builtins") + builtins.print("I'm printing", x) #$ use=moduleImport("builtins").getMember("print").getReturn() + print = my_print + print("these words") + +def local_redefine_range(): + range = 5 + return range + +def global_redefine_range(): + global range + range = 6 + return range #$ SPURIOUS: use=moduleImport("builtins").getMember("range") + +def obscured_print(): + p = print #$ use=moduleImport("builtins").getMember("print") + p("Can you see me?") #$ use=moduleImport("builtins").getMember("print").getReturn() + +def python2_style(): + # In Python 3, `__builtin__` has no special meaning. + from __builtin__ import open #$ use=moduleImport("__builtin__").getMember("open") + open("hello.txt") #$ use=moduleImport("__builtin__").getMember("open").getReturn() diff --git a/python/ql/test/experimental/dataflow/ApiGraphs/test1.py b/python/ql/test/experimental/dataflow/ApiGraphs/test1.py index 847abda7749a..471cb09d4fa8 100644 --- a/python/ql/test/experimental/dataflow/ApiGraphs/test1.py +++ b/python/ql/test/experimental/dataflow/ApiGraphs/test1.py @@ -2,5 +2,5 @@ print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42 try: print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") -except AttributeError as e: - print(e) # module 'mypkg' has no attribute 'bar' +except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError") + print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar' diff --git a/python/ql/test/experimental/dataflow/ApiGraphs/test5.py b/python/ql/test/experimental/dataflow/ApiGraphs/test5.py index 8d1bc48b9a4b..1825937a6a23 100644 --- a/python/ql/test/experimental/dataflow/ApiGraphs/test5.py +++ b/python/ql/test/experimental/dataflow/ApiGraphs/test5.py @@ -3,8 +3,8 @@ print(mypkg.foo) #$ use=moduleImport("mypkg").getMember("foo") // 42 try: print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") -except AttributeError as e: - print(e) # module 'mypkg' has no attribute 'bar' +except AttributeError as e: #$ use=moduleImport("builtins").getMember("AttributeError") + print(e) #$ use=moduleImport("builtins").getMember("print").getReturn() // module 'mypkg' has no attribute 'bar' from mypkg import bar as _bar #$ use=moduleImport("mypkg").getMember("bar") print(mypkg.bar) #$ use=moduleImport("mypkg").getMember("bar") //