Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,15 @@ private predicate tempToDestructorSink(DataFlow::Node sink, CallInstruction call
call.getStaticCallTarget() instanceof Destructor
}

/**
* A configuration to track flow from a temporary variable to the qualifier of
* a destructor call
*/
module TempToDestructorConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source.asInstruction().(VariableAddressInstruction).getIRVariable() instanceof IRTempVariable
}

predicate isSink(DataFlow::Node sink) { tempToDestructorSink(sink, _) }
}

module TempToDestructorFlow = DataFlow::Global<TempToDestructorConfig>;

/** Holds if `pun` is the post-update node of the qualifier of `Call`. */
private predicate isPostUpdateOfQualifier(CallInstruction call, DataFlow::PostUpdateNode pun) {
call.getThisArgumentOperand() = pun.getPreUpdateNode().asOperand()
}

/**
* Gets a `DataFlow::Node` that represents a temporary that will be destroyed
* by a call to a destructor, or a `DataFlow::Node` that will transitively be
* destroyed by a call to a destructor.
* by a call to a destructor when `n` is destroyed, or a `DataFlow::Node` that
* will transitively be destroyed by a call to a destructor.
*
* For the latter case, consider something like:
* ```
Expand All @@ -57,23 +43,21 @@ private predicate isPostUpdateOfQualifier(CallInstruction call, DataFlow::PostUp
* destroyed by a call to `std::vector<std::vector<int>>::~vector`,
* and thus the result of `get_2d_vector()[0]` is also an invalid reference.
*/
DataFlow::Node getADestroyedNode() {
exists(DataFlow::Node n | TempToDestructorFlow::flowTo(n) |
// Case 1: The pointer that goes into the destructor call is destroyed
exists(CallInstruction destructorCall |
tempToDestructorSink(n, destructorCall) and
isPostUpdateOfQualifier(destructorCall, result)
)
or
// Case 2: Anything that was derived from the temporary that is now destroyed
// is also destroyed.
exists(CallInstruction call |
result.asInstruction() = call and
DataFlow::localFlow(DataFlow::operandNode(call.getThisArgumentOperand()), n)
|
call.getStaticCallTarget() instanceof StdSequenceContainerAt or
call.getStaticCallTarget() instanceof StdMapAt
)
DataFlow::Node getADestroyedNode(DataFlow::Node n) {
// Case 1: The pointer that goes into the destructor call is destroyed
exists(CallInstruction destructorCall |
tempToDestructorSink(n, destructorCall) and
isPostUpdateOfQualifier(destructorCall, result)
)
or
// Case 2: Anything that was derived from the temporary that is now destroyed
// is also destroyed.
exists(CallInstruction call |
result.asInstruction() = call and
DataFlow::localFlow(DataFlow::operandNode(call.getThisArgumentOperand()), n)
|
call.getStaticCallTarget() instanceof StdSequenceContainerAt or
call.getStaticCallTarget() instanceof StdMapAt
)
}

Expand All @@ -86,12 +70,39 @@ predicate destroyedToBeginSink(DataFlow::Node sink, FunctionCall fc) {
}

/**
* Flow from any destroyed object to the qualifier of a `begin` or `end` call
* A configuration to track flow from a temporary variable to the qualifier of
* a destructor call, and subsequently to a qualifier of a call to `begin` or
* `end`.
*/
module DestroyedToBeginConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source = getADestroyedNode() }
module Config implements DataFlow::StateConfigSig {
newtype FlowState =
additional TempToDestructor() or
additional DestroyedToBegin(DataFlow::Node n) {
exists(DataFlow::Node thisOperand |
tempToDestructorSink(thisOperand, _) and
n = getADestroyedNode(thisOperand)
)
}

predicate isSource(DataFlow::Node source, FlowState state) {
source.asInstruction().(VariableAddressInstruction).getIRVariable() instanceof IRTempVariable and
state = TempToDestructor()
}

predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
tempToDestructorSink(node1, _) and
state1 = TempToDestructor() and
state2 = DestroyedToBegin(node2) and
node2 = getADestroyedNode(node1)
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what exactly is different now compared to when we had two sequential dataflows? The source of the second flow (DestroyedToBeginFlow) still had to be a reached sink of the first (TempToDestructorFlow) flow. I can see that we now know the correct overall source ... but that isn't reported by the query anyway.

Copy link
Copy Markdown
Contributor Author

@MathiasVP MathiasVP Apr 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider this hypothetical example:

int id(int x) {
  return x;
}

void f1() {
  int x2 = id(source());
  safe(x2);
}

void f2() {
  int x1 = id(0);
  sink(x1);
}

And consider a QL query that has two configurations:

  • One configuration (C1) from source() to x in return x (in id), and
  • One configuration (C2) starting at any sink from C1 to the argument of sink.

C1 finds a flow path from f1 to id, and C2 finds a flow path from id to f2. So if we simply use the result of C2 we get a FP saying that there's flow from source() to sink(x1).

However, if we combine these into a single configuration then the dataflow library won't lose track of the call context. This means flow can no longer enter id from f1 and exit id to f2.

Does that make sense?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, that makes sense and fits with what's going on in the full test case. Thanks!


predicate isSink(DataFlow::Node sink) { destroyedToBeginSink(sink, _) }
predicate isSink(DataFlow::Node sink, FlowState state) {
// Note: This is a non-trivial cartesian product!
// Hopefully, both of these sets are quite small in practice
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was a tiny bit concerned that in a database with lots of results for this query, both sets might be distinctly "medium" rather than clearly "small". Mind you a cartesian product of even a few thousand nodes / states still shouldn't be enough to matter.

Based on the stage timings table on the DCA run, I decided to have a closer look at an abseil-cpp-linux run locally. It took one second to execute (with a cache warmed up on another dataflow query), no more than 122ms on any single predicate. So I'm reassured.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, this is basically the same CP as the one we fixed for a different query in #10123. We could make this a lot safer by doing a similar transformation, but I didn't want to do it before we had seen a DB that needed this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can also consider doing this transformation before we pull the query out of experimental. But in any case, I would prefer to keep that transformation out of this PR

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like it might be worth doing before we pull the query out of experimental, or alternatively we can rely on QA to catch any problems. Either way you're right, it doesn't need addressing here.

destroyedToBeginSink(sink, _) and state instanceof DestroyedToBegin
}

DataFlow::FlowFeature getAFeature() {
// By blocking argument-to-parameter flow we ensure that we don't enter a
Expand All @@ -108,8 +119,11 @@ module DestroyedToBeginConfig implements DataFlow::ConfigSig {
}
}

module DestroyedToBeginFlow = DataFlow::Global<DestroyedToBeginConfig>;
module Flow = DataFlow::GlobalWithState<Config>;

from DataFlow::Node source, DataFlow::Node sink, FunctionCall beginOrEnd
where DestroyedToBeginFlow::flow(source, sink) and destroyedToBeginSink(sink, beginOrEnd)
select source, "This object is destroyed before $@ is called.", beginOrEnd, beginOrEnd.toString()
from Flow::PathNode source, Flow::PathNode sink, FunctionCall beginOrEnd, DataFlow::Node mid
where
Flow::flowPath(source, sink) and
destroyedToBeginSink(sink.getNode(), beginOrEnd) and
sink.getState() = Config::DestroyedToBegin(mid)
select mid, "This object is destroyed before $@ is called.", beginOrEnd, beginOrEnd.toString()
Original file line number Diff line number Diff line change
Expand Up @@ -770,4 +770,26 @@ void test2() {
void test3() {
const std::vector<std::vector<int>>& v = returnValue(); // GOOD
for(const std::vector<int>& x : v) {}
}

struct A : public std::vector<int> {
void foo(std::vector<int>& result) {
int i = 0;
while (i < 10) {
A chunk;
result.insert(result.end(), chunk.begin(), chunk.end());
++i;
}
}

~A() = default;
};

void test4() {
// This creates a temporary, after which `~A` is called at the semicolon, and
// `~A` calls `~vector<int>` inside the compiler-generated destructor.
// If we don't preserve the call context and return to the destructor call in this
// function we may end up in the destructor call `chunk.~A()`in `A.foo`. This destructor
// call can flow to `begin` through the back-edge and cause a strange FP.
auto zero = A().size();
}