@@ -10,15 +10,25 @@ private import python
1010private import semmle.python.dataflow.new.DataFlow
1111private import semmle.python.Concepts
1212private import semmle.python.ApiGraphs
13+ private import semmle.python.frameworks.data.ModelsAsData
1314
1415/**
16+ * INTERNAL: Do not use.
17+ *
1518 * Provides classes modeling security-relevant aspects of the `lxml` PyPI package
1619 *
1720 * See
1821 * - https://pypi.org/project/lxml/
1922 * - https://lxml.de/tutorial.html
2023 */
21- private module Lxml {
24+ module Lxml {
25+ /** Gets a reference to the `lxml.etree` module */
26+ API:: Node etreeRef ( ) {
27+ result = API:: moduleImport ( "lxml" ) .getMember ( "etree" )
28+ or
29+ result = ModelOutput:: getATypeNode ( "lxml.etree~Alias" )
30+ }
31+
2232 // ---------------------------------------------------------------------------
2333 // XPath
2434 // ---------------------------------------------------------------------------
@@ -34,9 +44,7 @@ private module Lxml {
3444 * - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ETXPath
3545 */
3646 private class XPathClassCall extends XML:: XPathConstruction:: Range , DataFlow:: CallCfgNode {
37- XPathClassCall ( ) {
38- this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( [ "XPath" , "ETXPath" ] ) .getACall ( )
39- }
47+ XPathClassCall ( ) { this = etreeRef ( ) .getMember ( [ "XPath" , "ETXPath" ] ) .getACall ( ) }
4048
4149 override DataFlow:: Node getXPath ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "path" ) ] }
4250
@@ -62,20 +70,11 @@ private module Lxml {
6270 XPathCall ( ) {
6371 exists ( API:: Node parseResult |
6472 parseResult =
65- API:: moduleImport ( "lxml" )
66- .getMember ( "etree" )
67- .getMember ( [ "parse" , "fromstring" , "fromstringlist" , "HTML" , "XML" ] )
68- .getReturn ( )
73+ etreeRef ( ) .getMember ( [ "parse" , "fromstring" , "fromstringlist" , "HTML" , "XML" ] ) .getReturn ( )
6974 or
7075 // TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
7176 // but we don't really have a way to model that nicely.
72- parseResult =
73- API:: moduleImport ( "lxml" )
74- .getMember ( "etree" )
75- .getMember ( "XMLParser" )
76- .getReturn ( )
77- .getMember ( "close" )
78- .getReturn ( )
77+ parseResult = etreeRef ( ) .getMember ( "XMLParser" ) .getReturn ( ) .getMember ( "close" ) .getReturn ( )
7978 |
8079 this = parseResult .getMember ( "xpath" ) .getACall ( )
8180 )
@@ -87,14 +86,7 @@ private module Lxml {
8786 }
8887
8988 class XPathEvaluatorCall extends XML:: XPathExecution:: Range , DataFlow:: CallCfgNode {
90- XPathEvaluatorCall ( ) {
91- this =
92- API:: moduleImport ( "lxml" )
93- .getMember ( "etree" )
94- .getMember ( "XPathEvaluator" )
95- .getReturn ( )
96- .getACall ( )
97- }
89+ XPathEvaluatorCall ( ) { this = etreeRef ( ) .getMember ( "XPathEvaluator" ) .getReturn ( ) .getACall ( ) }
9890
9991 override DataFlow:: Node getXPath ( ) { result = this .getArg ( 0 ) }
10092
@@ -130,9 +122,7 @@ private module Lxml {
130122 * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
131123 */
132124 private class LxmlParser extends InstanceSource , API:: CallNode {
133- LxmlParser ( ) {
134- this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "XMLParser" ) .getACall ( )
135- }
125+ LxmlParser ( ) { this = etreeRef ( ) .getMember ( "XMLParser" ) .getACall ( ) }
136126
137127 // NOTE: it's not possible to change settings of a parser after constructing it
138128 override predicate vulnerableTo ( XML:: XmlParsingVulnerabilityKind kind ) {
@@ -162,10 +152,7 @@ private module Lxml {
162152 * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
163153 */
164154 private class LxmlDefaultParser extends InstanceSource , DataFlow:: CallCfgNode {
165- LxmlDefaultParser ( ) {
166- this =
167- API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "get_default_parser" ) .getACall ( )
168- }
155+ LxmlDefaultParser ( ) { this = etreeRef ( ) .getMember ( "get_default_parser" ) .getACall ( ) }
169156
170157 override predicate vulnerableTo ( XML:: XmlParsingVulnerabilityKind kind ) {
171158 // as highlighted by
@@ -240,7 +227,7 @@ private module Lxml {
240227
241228 LxmlParsing ( ) {
242229 functionName in [ "fromstring" , "fromstringlist" , "XML" , "XMLID" , "parse" , "parseid" ] and
243- this = API :: moduleImport ( "lxml" ) . getMember ( "etree" ) .getMember ( functionName ) .getACall ( )
230+ this = etreeRef ( ) .getMember ( functionName ) .getACall ( )
244231 }
245232
246233 override DataFlow:: Node getAnInput ( ) {
@@ -309,9 +296,7 @@ private module Lxml {
309296 private class LxmlIterparseCall extends API:: CallNode , XML:: XmlParsing:: Range ,
310297 FileSystemAccess:: Range
311298 {
312- LxmlIterparseCall ( ) {
313- this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "iterparse" ) .getACall ( )
314- }
299+ LxmlIterparseCall ( ) { this = etreeRef ( ) .getMember ( "iterparse" ) .getACall ( ) }
315300
316301 override DataFlow:: Node getAnInput ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "source" ) ] }
317302
0 commit comments