From 77958849468c2946496e300343c4a9fca310fd01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:30:20 +0000 Subject: [PATCH 1/5] Initial plan From a4585d8d948ea028eb91335536458c0e5bf50727 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:48:40 +0000 Subject: [PATCH 2/5] Add test documenting missing PEP249 alerts for connection stored in self attribute --- .../library-tests/frameworks/hdbcli/pep249.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index 713f15cb6d4f..f317a4958169 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -7,3 +7,31 @@ cursor.executemany("some sql", (42,)) # $ getSql="some sql" cursor.close() + + +# Connection stored in a class attribute (`self._conn`) and used in another method. +# +# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in +# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a +# `self` attribute in one method and read from a `self` attribute in another method (see the +# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the +# limitation is specific to the type-tracking-based modeling. +class Database: + def __init__(self): + self._conn = dbapi.connect(address="hostname", port=300, user="username") + + def get_connection(self): + return self._conn + + def run_via_getter(self): + conn = self.get_connection() + cursor = conn.cursor() + cursor.execute("getter sql") # $ MISSING: getSql="getter sql" + + def run_direct(self): + self._conn.execute("direct sql") # $ MISSING: getSql="direct sql" + + +db = Database() +db.run_via_getter() +db.run_direct() From 73bc2d70ae1067b8bc0ee0d8f55c7e9a0c38326e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 06:08:49 +0000 Subject: [PATCH 3/5] Model instance-attribute type flow Use a field level step like JS and Ruby. --- .../new/internal/TypeTrackingImpl.qll | 47 +++++++++++++++++++ .../dataflow/typetracking/attribute_tests.py | 4 +- .../library-tests/frameworks/hdbcli/pep249.py | 13 +++-- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 215c7906e655..a242c1d8e50c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -172,6 +172,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) { TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) + or + localFieldStep(nodeFrom, nodeTo) } /** @@ -317,6 +319,51 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first + * parameter of an instance method of `cls` (i.e. an access of the form `self.attr`). + * + * Static methods and class methods are excluded, since their first parameter is not a + * `self` instance reference. + */ + private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) { + exists(Function method, Name selfUse | + method = cls.getAMethod() and + not DataFlowDispatch::isStaticmethod(method) and + not DataFlowDispatch::isClassmethod(method) and + selfUse.getVariable() = method.getArg(0).(Name).getVariable() and + ref.getObject().asCfgNode().getNode() = selfUse and + ref.mayHaveAttributeName(attr) + ) + } + + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance + * method of the same class. + * + * This models flow through instance attributes (`self.foo`): a value stored into + * `self.foo` in one method can be read from `self.foo` in another method. Type-tracking + * handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot + * relate the `self` of the writing method to the `self` of the reading method. Following + * the approach used for Ruby and JavaScript, we model this directly as a level step from + * the written value to the read reference, for any pair of methods on the class (not + * just from `__init__`). + * + * This is an over-approximation: it is instance-insensitive (it does not distinguish + * between different instances of the same class) and order-insensitive (it does not + * require the write to happen before the read), matching the precision of + * instance-attribute handling for Ruby and JavaScript. + */ + private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | + selfAttrRef(cls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(cls, attr, read) and + nodeTo = read + ) + } + /** * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. */ diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index c49cdf77fcdf..05496ad74d09 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -151,10 +151,10 @@ def __init__(self): # $ tracked=foo self.foo = tracked # $ tracked=foo tracked def print_foo(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo def possibly_uncalled_method(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo instance = MyClass2() print(instance.foo) # $ MISSING: tracked=foo tracked diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index f317a4958169..0c6c39086482 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -11,11 +11,10 @@ # Connection stored in a class attribute (`self._conn`) and used in another method. # -# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in -# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a -# `self` attribute in one method and read from a `self` attribute in another method (see the -# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the -# limitation is specific to the type-tracking-based modeling. +# This is detected because type tracking includes a level step modelling flow through +# instance attributes: a value written to `self._conn` in one method (here `__init__`) can +# be read back from `self._conn` (directly or via a getter) in any other method on the same +# class. This follows the same approach used for instance fields in Ruby and JavaScript. class Database: def __init__(self): self._conn = dbapi.connect(address="hostname", port=300, user="username") @@ -26,10 +25,10 @@ def get_connection(self): def run_via_getter(self): conn = self.get_connection() cursor = conn.cursor() - cursor.execute("getter sql") # $ MISSING: getSql="getter sql" + cursor.execute("getter sql") # $ getSql="getter sql" def run_direct(self): - self._conn.execute("direct sql") # $ MISSING: getSql="direct sql" + self._conn.execute("direct sql") # $ getSql="direct sql" db = Database() From befb557bfd8b0c3d1800177c155a580765abbe22 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Thu, 11 Jun 2026 15:44:20 +0200 Subject: [PATCH 4/5] Accept fixed MISSING tests --- .../library-tests/CallGraph/InlineCallGraphTest.expected | 3 --- .../library-tests/CallGraph/code/class_attr_assign.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index b353309e852f..1cd62c6de347 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -1,9 +1,6 @@ testFailures debug_callableNotUnique pointsTo_found_typeTracker_notFound -| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method | | code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def | | code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar | | code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py index 605375925f72..714e27dba1a0 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py @@ -7,8 +7,8 @@ def __init__(self, func): self.direct_ref = my_func def later(self): - self.indirect_ref() # $ pt=my_func MISSING: tt=my_func - self.direct_ref() # $ pt=my_func MISSING: tt=my_func + self.indirect_ref() # $ pt=my_func tt=my_func + self.direct_ref() # $ pt=my_func tt=my_func foo = Foo(my_func) # $ tt=Foo.__init__ foo.later() # $ pt,tt=Foo.later @@ -23,7 +23,7 @@ def __init__(self): self.obj = DummyObject() def later(self): - self.obj.method() # $ pt=DummyObject.method MISSING: tt=DummyObject.method + self.obj.method() # $ pt=DummyObject.method tt=DummyObject.method bar = Bar(my_func) # $ tt=Bar.__init__ From 913dcb119059b42918661e644333e5052f7f6a6b Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Thu, 11 Jun 2026 22:20:52 +0200 Subject: [PATCH 5/5] Add change note --- .../2026-06-11-fix-type-tracking-instance-attributes.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md diff --git a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md new file mode 100644 index 000000000000..25bc0e0f31f9 --- /dev/null +++ b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods on the same class. As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results.