Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/codeql/support/reusables/frameworks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ Python built-in support
Flask, Web framework
Tornado, Web framework
Twisted, Web framework
PyYAML, Serialization
starlette, Asynchronous Server Gateway Interface (ASGI)
dill, Serialization
PyYAML, Serialization
ruamel.yaml, Serialization
simplejson, Serialization
ujson, Serialization
fabric, Utility library
Expand Down
2 changes: 2 additions & 0 deletions python/change-notes/2021-10-26-ruamel.yaml-modeling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the `ruamel.yaml` PyPI package, resulting in additional sinks for the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query (since `ruamel.yaml.load` can lead to code execution).
1 change: 1 addition & 0 deletions python/ql/lib/semmle/python/Frameworks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib
Expand Down
57 changes: 57 additions & 0 deletions python/ql/lib/semmle/python/frameworks/RuamelYaml.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Provides classes modeling security-relevant aspects of the `ruamel.yaml` PyPI package
*
* See
* - https://pypi.org/project/ruamel.yaml/
*/

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs

/**
* Provides models for the `ruamel.yaml` PyPI package.
*
* See
* - https://pypi.org/project/ruamel.yaml/
*/
private module RuamelYaml {
// Note: `ruamel.yaml` is a fork of the `PyYAML` PyPI package, so that's why the
// interface is so similar.
/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class RuamelYamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
string func_name;

RuamelYamlLoadCall() {
func_name in ["load", "load_all", "safe_load", "safe_load_all"] and
this = API::moduleImport("ruamel").getMember("yaml").getMember(func_name).getACall()
}

override predicate mayExecuteInput() {
func_name in ["load", "load_all"] and
// If the `Loader` argument is not set, the default loader will be used, which is
// not safe. The only safe loaders are `SafeLoader` or `BaseLoader` (and their
// variants with C implementation).
not exists(DataFlow::Node loader_arg |
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
loader_arg =
API::moduleImport("ruamel")
.getMember("yaml")
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
.getAUse()
)
}

override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }

override DataFlow::Node getOutput() { result = this }

override string getFormat() { result = "YAML" }
}
}
19 changes: 12 additions & 7 deletions python/ql/lib/semmle/python/frameworks/Yaml.qll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs

Expand Down Expand Up @@ -41,11 +40,17 @@ private module Yaml {
}

/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
* This function was thought safe from the 5.1 release in 2017, when the default
* loader was changed to `FullLoader` (see
* https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation).
*
* In 2020 new exploits were found, meaning it's not safe. With the 6.0 release (see
* https://github.com/yaml/pyyaml/commit/8cdff2c80573b8be8e8ad28929264a913a63aa33),
* when using `load` and `load_all` you are now required to specify a Loader. But
* from what I (@RasmusWL) can gather, `FullLoader` is not to be considered safe,
* although known exploits have been mitigated (is at least my impression). Also see
* https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389 for more
* details.
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
Expand All @@ -63,7 +68,7 @@ private module Yaml {
)
}

override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }

override DataFlow::Node getOutput() { result = this }

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest
33 changes: 33 additions & 0 deletions python/ql/test/library-tests/frameworks/ruamel.yaml/Decoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import ruamel.yaml

# Unsafe:
ruamel.yaml.load(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
ruamel.yaml.load(stream=payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
ruamel.yaml.load(payload, ruamel.yaml.Loader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput

# Safe:
ruamel.yaml.load(payload, ruamel.yaml.SafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.load(payload, Loader=ruamel.yaml.SafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.load(payload, ruamel.yaml.BaseLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.safe_load(..) decodeFormat=YAML

################################################################################
# load_all variants
################################################################################

# Unsafe:
ruamel.yaml.load_all(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load_all(..) decodeFormat=YAML decodeMayExecuteInput

# Safe:
ruamel.yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.safe_load_all(..) decodeFormat=YAML

################################################################################
# C-based loaders with `libyaml`
################################################################################

# Unsafe:
ruamel.yaml.load(payload, ruamel.yaml.CLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput

# Safe:
ruamel.yaml.load(payload, ruamel.yaml.CSafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.load(payload, ruamel.yaml.CBaseLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
63 changes: 63 additions & 0 deletions python/ql/test/library-tests/frameworks/ruamel.yaml/PoC
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python3

# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
# doesn't have to be annotated

# This file is just a Proof of Concept for how code execution can be triggered.

import os
import ruamel.yaml

class Exploit(object):
def __reduce__(self):
return (os.system, ('ls',))

data = Exploit()
serialized_data = ruamel.yaml.dump(data)

# All these will execute `ls`
print("!!! ruamel.yaml.load")
ruamel.yaml.load(serialized_data)

print("!!! ruamel.yaml.load kwarg")
ruamel.yaml.load(stream=serialized_data)

print("!!! ruamel.yaml.load with Loader=ruamel.yaml.Loader")
ruamel.yaml.load(serialized_data, ruamel.yaml.Loader)

print("!!! ruamel.yaml.load with Loader=ruamel.yaml.UnsafeLoader")
ruamel.yaml.load(serialized_data, ruamel.yaml.UnsafeLoader)

print("!!! ruamel.yaml.load with Loader=ruamel.yaml.CLoader")
ruamel.yaml.load(serialized_data, ruamel.yaml.CLoader)

# you need to iterate through the result for it to execute... but it still works
print("!!! ruamel.yaml.load_all")
for _ in ruamel.yaml.load_all(serialized_data):
pass

# check that the safe version is actually safe
print("\n" + "-"*80)
print("safe versions")
print("-" * 80)

print("!!! ruamel.yaml.safe_load")
try:
ruamel.yaml.safe_load(serialized_data)
raise Exception("should not happen")
except ruamel.yaml.constructor.ConstructorError:
pass

print("!!! ruamel.yaml.load with Loader=ruamel.yaml.SafeLoader")
try:
ruamel.yaml.load(serialized_data, ruamel.yaml.SafeLoader)
raise Exception("should not happen")
except ruamel.yaml.constructor.ConstructorError:
pass

print("!!! ruamel.yaml.load with Loader=ruamel.yaml.CSafeLoader")
try:
ruamel.yaml.load(serialized_data, ruamel.yaml.CSafeLoader)
raise Exception("should not happen")
except ruamel.yaml.constructor.ConstructorError:
pass
1 change: 1 addition & 0 deletions python/ql/test/library-tests/frameworks/yaml/Decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Unsafe:
yaml.load(payload) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
yaml.load(stream=payload) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=yaml.unsafe_load(..) decodeFormat=YAML decodeMayExecuteInput
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=yaml.full_load(..) decodeFormat=YAML decodeMayExecuteInput
Expand Down
58 changes: 58 additions & 0 deletions python/ql/test/library-tests/frameworks/yaml/PoC
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3

# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
# doesn't have to be annotated

# This file is just a Proof of Concept for how code execution can be triggered.


import os
import yaml

class Exploit(object):
def __reduce__(self):
return (os.system, ('ls',))

data = Exploit()
serialized_data = yaml.dump(data)

# All these will execute `ls`
print("!!! yaml.unsafe_load")
yaml.unsafe_load(serialized_data)

print("!!! yaml.unsafe_load kwarg")
yaml.unsafe_load(stream=serialized_data)

print("!!! yaml.load with Loader=yaml.UnsafeLoader")
yaml.load(serialized_data, yaml.UnsafeLoader)

# you need to iterate through the result for it to execute... but it still works
print("!!! yaml.unsafe_load_all")
for _ in yaml.unsafe_load_all(serialized_data):
pass

# check that the safe version is actually safe
print("\n" + "-"*80)
print("safe versions")
print("-" * 80)

print("!!! yaml.load")
try:
yaml.load(serialized_data)
raise Exception("should not happen")
except yaml.constructor.ConstructorError:
pass

print("!!! yaml.safe_load")
try:
yaml.safe_load(serialized_data)
raise Exception("should not happen")
except yaml.constructor.ConstructorError:
pass

print("!!! yaml.load with Loader=yaml.SafeLoader")
try:
yaml.load(serialized_data, yaml.SafeLoader)
raise Exception("should not happen")
except yaml.constructor.ConstructorError:
pass