Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Django.qll
Original file line number Diff line number Diff line change
Expand Up @@ -2295,4 +2295,22 @@ module PrivateDjango {

override string getMimetypeDefault() { none() }
}

// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A standard Python logger instance from Django.
* see https://github.com/django/django/blob/stable/4.0.x/django/utils/log.py#L11
*/
Comment thread
yoff marked this conversation as resolved.
private class DjangoLogger extends Stdlib::Logger::InstanceSource {
DjangoLogger() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getAnImmediateUse()
}
}
}
15 changes: 15 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Flask.qll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
private import semmle.python.frameworks.Stdlib
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.PathInjectionCustomizations
Expand Down Expand Up @@ -569,4 +570,18 @@ module Flask {
result in [this.getArg(0), this.getArgByName("filename_or_fp")]
}
}

// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A Flask application provides a standard Python logger via the `logger` attribute.
*
* See
* - https://flask.palletsprojects.com/en/2.0.x/api/#flask.Flask.logger
* - https://flask.palletsprojects.com/en/2.0.x/logging/
*/
private class FlaskLogger extends Stdlib::Logger::InstanceSource {
FlaskLogger() { this = FlaskApp::instance().getMember("logger").getAnImmediateUse() }
}
}
73 changes: 50 additions & 23 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,54 @@ module Stdlib {
}
}
}

// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class
Comment thread
yoff marked this conversation as resolved.
Outdated
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
private API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}

/**
* A source of instances of `logging.Logger`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Logger::instance()` to get references to instances of `logging.Logger`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }

/** A direct instantiation of `logging.Logger`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
ClassInstantiation() {
this = subclassRef().getACall()
or
this = API::moduleImport("logging").getMember("root").getAnImmediateUse()
or
this = API::moduleImport("logging").getMember("getLogger").getACall()
}
}

/** Gets a reference to an instance of `logging.Logger`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}

/** Gets a reference to an instance of `logging.Logger`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
Comment on lines +249 to +287
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that I created this non-standard setup with just using API::Node, but now that we're exposing it more publicly, we need to rewrite this to use the standard library modeling setup with InstanceSource and type-tracking (which has a snippet, so it's quite easy).

Looking over the code again, I don't quite know why I did this in the first place. LoggerLogCall can easily be written with the standard approach together with DataFlow::MethodCallNode 🤷

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely it is nicer, if we can get away with using an API:Node?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to standard setup as discussed offline.

}

/**
Expand Down Expand Up @@ -2642,27 +2690,6 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class and subclasses.
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}

/** Gets a reference to an instance of `logging.Logger` or any subclass. */
API::Node instance() {
result = subclassRef().getReturn()
or
result = API::moduleImport("logging").getMember("root")
or
result = API::moduleImport("logging").getMember("getLogger").getReturn()
}
}

/**
* A call to one of the logging methods from `logging` or on a `logging.Logger`
* subclass.
Expand All @@ -2683,14 +2710,14 @@ private module StdlibPrivate {
method = "log" and
msgIndex = 1
|
this = Logger::instance().getMember(method).getACall()
this.(DataFlow::MethodCallNode).calls(Stdlib::Logger::instance(), method)
or
this = API::moduleImport("logging").getMember(method).getACall()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't agree about this removal. I see that API::moduleImport("logging") is now modeled as a LoggerInstance... but that is not true, these are helper methods defined on the module leve.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there value in modelling instances specifically? Or could we just rename instance to something like loggingMethodProvider?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted to previous modelling approach as discussed offline.

)
}

override DataFlow::Node getAnInput() {
result = this.getArgByName("msg")
result = this.getArgByName(["msg", "extra"])
or
result = this.getArg(any(int i | i >= msgIndex))
}
Expand Down
35 changes: 35 additions & 0 deletions python/ql/lib/semmle/python/security/dataflow/LogInjection.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*
* Note, for performance reasons: only import this file if
* `LogInjection::Configuration` is needed, otherwise
* `LogInjectionCustomizations` should be imported instead.
*/

import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking

/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*/
module LogInjection {
import LogInjectionCustomizations::LogInjection

/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "LogInjection" }

override predicate isSource(DataFlow::Node source) { source instanceof Source }

override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }

override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }

override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards

/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module LogInjection {
/**
* A data flow source for "log injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }

/**
* A data flow sink for "log injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }

/**
* A sanitizer for "log injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }

/**
* A sanitizer guard for "log injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }

/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }

/**
* A logging operation, considered as a flow sink.
*/
class LoggingAsSink extends Sink {
LoggingAsSink() { this = any(Logging write).getAnInput() }
}

/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }

/**
* A call to replace line breaks functions as a sanitizer.
Comment thread
yoff marked this conversation as resolved.
Outdated
*/
class ReplaceLineBreaksSanitizer extends Sanitizer, DataFlow::CallCfgNode {
ReplaceLineBreaksSanitizer() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "replace" and
this.getArg(0).asExpr().(StrConst).getText() in ["\r\n", "\n"]
}
}
Comment on lines +59 to +72
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sanitizer check seems a bit weak. What if it only did text.replace("\r\n", "\n")? I guess it's not trivial to write a sanitizer that knows we've eliminated both, so maybe this is ok? (what are your thoughts on this)

Copy link
Copy Markdown
Contributor Author

@yoff yoff Feb 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have had the same concerns from the beginning.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be great to have a small comment in the code to explain this dilemma, and explain why the solution implemented was deemed ok.

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>

<p>Forgery can occur if a user provides some input creating the appearance of multiple
log entries. This can include unescaped new-line characters, or HTML or other markup.</p>
<p>Forgery can occur if a user provides some input with characters that are interpreted
when the log output is displayed. If the log is displayed as a plain text file, then new
line characters can be used by a malicious user to create the appearance of multiple log
entries. If the log is displayed as HTML, then arbitrary HTML may be included to spoof
log entries.</p>
</overview>

<recommendation>
Expand All @@ -29,14 +32,14 @@ other forms of HTML injection.

<example>
<p>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
as a parameter, the log entry will be divided into two lines, the first line is <code>User name: Guest</code> code>, the second line is <code>User name: Admin</code>.
</p>
<sample src="LogInjectionBad.py" />

<p>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.
</p>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
* insertion of forged log entries by a malicious user.
* @kind path-problem
* @problem.severity error
* @precision high
* @security-severity 7.8
* @precision medium
Comment thread
RasmusWL marked this conversation as resolved.
* @id py/log-injection
* @tags security
* external/cwe/cwe-117
*/

import python
import experimental.semmle.python.security.injection.LogInjection
import semmle.python.security.dataflow.LogInjection
import DataFlow::PathGraph

from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: newQuery
---
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).
30 changes: 0 additions & 30 deletions python/ql/src/experimental/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks

/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getAnInput();
}
}

/**
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LogOutput::Range` instead.
*/
class LogOutput extends DataFlow::Node {
LogOutput::Range range;

LogOutput() { this = range }

DataFlow::Node getAnInput() { result = range.getAnInput() }
}

/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**
Expand Down
1 change: 0 additions & 1 deletion python/ql/src/experimental/semmle/python/Frameworks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Authlib
Expand Down
Loading