Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Address reviewer feedback: fix sink modeling for shutil and subprocess
- Remove ShutilUnpackArchiveSource (should not be a source)
- Change ShutilUnpackArchiveSink to target getArg(0) (the filename arg, not the whole call); removes the now-redundant literal check
- Remove SubprocessTarExtractionSource (should not be a source)
- Change SubprocessTarExtractionSink to target the specific non-literal element in the command list (the filename), not the call itself
- Remove private isSubprocessTarExtraction predicate (inlined into the sink)
- Revert TarSlip.expected to its pre-PR state (the new sinks require proper source taint flow to fire)

Agent-Logs-Url: https://github.com/github/codeql/sessions/833673da-f868-4c3b-8bff-62364ee0ed19

Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
  • Loading branch information
Copilot and hvitved authored Apr 16, 2026
commit 50848834a88c70ca67adf1d9c1d6c422c5a3ae12
Original file line number Diff line number Diff line change
Expand Up @@ -132,77 +132,50 @@ module TarSlip {
}
}

/**
* A call to `shutil.unpack_archive`, considered as a flow source.
*
* The archive filename is not hardcoded, so it may come from user input.
*/
class ShutilUnpackArchiveSource extends Source {
ShutilUnpackArchiveSource() {
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
}
}

/**
* A call to `shutil.unpack_archive`, considered as a flow sink.
*
* The archive filename is not hardcoded, so it may come from user input.
* The first argument (the archive filename) is the sink.
*/
class ShutilUnpackArchiveSink extends Sink {
ShutilUnpackArchiveSink() {
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
this = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
}
}

/**
* Holds if `call` is a subprocess call that invokes `tar` for archive extraction
* with at least one non-literal argument (the archive filename).
*
* Detects patterns like `subprocess.run(["tar", "-xf", untrusted_filename])`.
*/
private predicate isSubprocessTarExtraction(DataFlow::CallCfgNode call) {
exists(SequenceNode cmdList |
call =
API::moduleImport("subprocess")
.getMember(["run", "call", "check_call", "check_output", "Popen"])
.getACall() and
cmdList = call.getArg(0).asCfgNode() and
// Command must be "tar" exactly or a path ending in "/tar" (e.g. "/usr/bin/tar")
exists(string cmd |
cmd = cmdList.getElement(0).getNode().(StringLiteral).getText() and
(cmd = "tar" or cmd.matches("%/tar"))
) and
// At least one extraction-related flag must be present:
// single-dash flags containing 'x' (like -x, -xf, -xvf) or the long option --extract
exists(string flag |
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
(flag.regexpMatch("-[a-zA-Z]*x[a-zA-Z]*") or flag = "--extract")
) and
// At least one non-literal argument (the archive filename)
exists(int i |
i > 0 and
exists(cmdList.getElement(i)) and
not cmdList.getElement(i).getNode() instanceof StringLiteral
)
)
}

/**
* A call to `subprocess` functions that invokes `tar` for archive extraction,
* considered as a flow source.
*/
class SubprocessTarExtractionSource extends Source {
SubprocessTarExtractionSource() { isSubprocessTarExtraction(this) }
}

/**
* A call to `subprocess` functions that invokes `tar` for archive extraction,
* considered as a flow sink.
*
* The sink is the non-literal element in the command list that corresponds
* to the archive filename (e.g. the `unsafe_filename` in
* `subprocess.run(["tar", "-xf", unsafe_filename])`).
*/
class SubprocessTarExtractionSink extends Sink {
SubprocessTarExtractionSink() { isSubprocessTarExtraction(this) }
SubprocessTarExtractionSink() {
exists(SequenceNode cmdList, DataFlow::CallCfgNode call, int i |
call =
API::moduleImport("subprocess")
.getMember(["run", "call", "check_call", "check_output", "Popen"])
.getACall() and
cmdList = call.getArg(0).asCfgNode() and
// Command must be "tar" exactly or a path ending in "/tar" (e.g. "/usr/bin/tar")
exists(string cmd |
cmd = cmdList.getElement(0).getNode().(StringLiteral).getText() and
(cmd = "tar" or cmd.matches("%/tar"))
) and
// At least one extraction-related flag must be present:
// single-dash flags containing 'x' (like -x, -xf, -xvf) or the long option --extract
exists(string flag |
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
(flag.regexpMatch("-[a-zA-Z]*x[a-zA-Z]*") or flag = "--extract")
) and
// The sink is the specific non-literal argument (the archive filename)
i > 0 and
not cmdList.getElement(i).getNode() instanceof StringLiteral and
this.asCfgNode() = cmdList.getElement(i)
)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ nodes
| tarslip.py:112:1:112:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
subpaths
#select
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
Expand All @@ -67,6 +64,3 @@ subpaths
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | potentially untrusted source |
Loading