Skip to content

Commit 6d40e7e

Browse files
tausbnRasmusWL
authored andcommitted
Python: Add extensible modelling for lxml.etree
1 parent 5b9d567 commit 6d40e7e

File tree

2 files changed

+26
-34
lines changed
  • python/ql

2 files changed

+26
-34
lines changed

python/ql/lib/semmle/python/frameworks/Lxml.qll

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,25 @@ private import python
1010
private import semmle.python.dataflow.new.DataFlow
1111
private import semmle.python.Concepts
1212
private import semmle.python.ApiGraphs
13+
private import semmle.python.frameworks.data.ModelsAsData
1314

1415
/**
16+
* INTERNAL: Do not use.
17+
*
1518
* Provides classes modeling security-relevant aspects of the `lxml` PyPI package
1619
*
1720
* See
1821
* - https://pypi.org/project/lxml/
1922
* - https://lxml.de/tutorial.html
2023
*/
21-
private module Lxml {
24+
module Lxml {
25+
/** Gets a reference to the `lxml.etree` module */
26+
API::Node etreeRef() {
27+
result = API::moduleImport("lxml").getMember("etree")
28+
or
29+
result = ModelOutput::getATypeNode("lxml.etree~Alias")
30+
}
31+
2232
// ---------------------------------------------------------------------------
2333
// XPath
2434
// ---------------------------------------------------------------------------
@@ -34,9 +44,7 @@ private module Lxml {
3444
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ETXPath
3545
*/
3646
private class XPathClassCall extends XML::XPathConstruction::Range, DataFlow::CallCfgNode {
37-
XPathClassCall() {
38-
this = API::moduleImport("lxml").getMember("etree").getMember(["XPath", "ETXPath"]).getACall()
39-
}
47+
XPathClassCall() { this = etreeRef().getMember(["XPath", "ETXPath"]).getACall() }
4048

4149
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("path")] }
4250

@@ -62,20 +70,11 @@ private module Lxml {
6270
XPathCall() {
6371
exists(API::Node parseResult |
6472
parseResult =
65-
API::moduleImport("lxml")
66-
.getMember("etree")
67-
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
68-
.getReturn()
73+
etreeRef().getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]).getReturn()
6974
or
7075
// TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
7176
// but we don't really have a way to model that nicely.
72-
parseResult =
73-
API::moduleImport("lxml")
74-
.getMember("etree")
75-
.getMember("XMLParser")
76-
.getReturn()
77-
.getMember("close")
78-
.getReturn()
77+
parseResult = etreeRef().getMember("XMLParser").getReturn().getMember("close").getReturn()
7978
|
8079
this = parseResult.getMember("xpath").getACall()
8180
)
@@ -87,14 +86,7 @@ private module Lxml {
8786
}
8887

8988
class XPathEvaluatorCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
90-
XPathEvaluatorCall() {
91-
this =
92-
API::moduleImport("lxml")
93-
.getMember("etree")
94-
.getMember("XPathEvaluator")
95-
.getReturn()
96-
.getACall()
97-
}
89+
XPathEvaluatorCall() { this = etreeRef().getMember("XPathEvaluator").getReturn().getACall() }
9890

9991
override DataFlow::Node getXPath() { result = this.getArg(0) }
10092

@@ -130,9 +122,7 @@ private module Lxml {
130122
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
131123
*/
132124
private class LxmlParser extends InstanceSource, API::CallNode {
133-
LxmlParser() {
134-
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
135-
}
125+
LxmlParser() { this = etreeRef().getMember("XMLParser").getACall() }
136126

137127
// NOTE: it's not possible to change settings of a parser after constructing it
138128
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
@@ -162,10 +152,7 @@ private module Lxml {
162152
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
163153
*/
164154
private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
165-
LxmlDefaultParser() {
166-
this =
167-
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
168-
}
155+
LxmlDefaultParser() { this = etreeRef().getMember("get_default_parser").getACall() }
169156

170157
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
171158
// as highlighted by
@@ -240,7 +227,7 @@ private module Lxml {
240227

241228
LxmlParsing() {
242229
functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and
243-
this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall()
230+
this = etreeRef().getMember(functionName).getACall()
244231
}
245232

246233
override DataFlow::Node getAnInput() {
@@ -309,9 +296,7 @@ private module Lxml {
309296
private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range,
310297
FileSystemAccess::Range
311298
{
312-
LxmlIterparseCall() {
313-
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
314-
}
299+
LxmlIterparseCall() { this = etreeRef().getMember("iterparse").getACall() }
315300

316301
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
317302

python/ql/src/meta/ClassHierarchy/Find.ql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ private import semmle.python.frameworks.Urllib3
3030
private import semmle.python.frameworks.Pydantic
3131
private import semmle.python.frameworks.Peewee
3232
private import semmle.python.frameworks.Aioch
33+
private import semmle.python.frameworks.Lxml
3334
import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions as Extensions
3435

3536
class FlaskViewClasses extends FindSubclassesSpec {
@@ -457,6 +458,12 @@ class ElementTree extends FindSubclassesSpec {
457458
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::elementTreeClassRef() }
458459
}
459460

461+
class LxmlETreeAlias extends FindSubclassesSpec {
462+
LxmlETreeAlias() { this = "lxml.etree~Alias" }
463+
464+
override API::Node getAlreadyModeledClass() { result = Lxml::etreeRef() }
465+
}
466+
460467
bindingset[fullyQualified]
461468
predicate fullyQualifiedToYamlFormat(string fullyQualified, string type2, string path) {
462469
exists(int firstDot | firstDot = fullyQualified.indexOf(".", 0, 0) |

0 commit comments

Comments
 (0)