-
Notifications
You must be signed in to change notification settings - Fork 2k
Fix more FPs in cpp/iterator-to-expired-container
#16255
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2b9c96d
cedc84d
4aee6d5
77a7e00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,29 +23,15 @@ private predicate tempToDestructorSink(DataFlow::Node sink, CallInstruction call | |
| call.getStaticCallTarget() instanceof Destructor | ||
| } | ||
|
|
||
| /** | ||
| * A configuration to track flow from a temporary variable to the qualifier of | ||
| * a destructor call | ||
| */ | ||
| module TempToDestructorConfig implements DataFlow::ConfigSig { | ||
| predicate isSource(DataFlow::Node source) { | ||
| source.asInstruction().(VariableAddressInstruction).getIRVariable() instanceof IRTempVariable | ||
| } | ||
|
|
||
| predicate isSink(DataFlow::Node sink) { tempToDestructorSink(sink, _) } | ||
| } | ||
|
|
||
| module TempToDestructorFlow = DataFlow::Global<TempToDestructorConfig>; | ||
|
|
||
| /** Holds if `pun` is the post-update node of the qualifier of `Call`. */ | ||
| private predicate isPostUpdateOfQualifier(CallInstruction call, DataFlow::PostUpdateNode pun) { | ||
| call.getThisArgumentOperand() = pun.getPreUpdateNode().asOperand() | ||
| } | ||
|
|
||
| /** | ||
| * Gets a `DataFlow::Node` that represents a temporary that will be destroyed | ||
| * by a call to a destructor, or a `DataFlow::Node` that will transitively be | ||
| * destroyed by a call to a destructor. | ||
| * by a call to a destructor when `n` is destroyed, or a `DataFlow::Node` that | ||
| * will transitively be destroyed by a call to a destructor. | ||
| * | ||
| * For the latter case, consider something like: | ||
| * ``` | ||
|
|
@@ -57,23 +43,21 @@ private predicate isPostUpdateOfQualifier(CallInstruction call, DataFlow::PostUp | |
| * destroyed by a call to `std::vector<std::vector<int>>::~vector`, | ||
| * and thus the result of `get_2d_vector()[0]` is also an invalid reference. | ||
| */ | ||
| DataFlow::Node getADestroyedNode() { | ||
| exists(DataFlow::Node n | TempToDestructorFlow::flowTo(n) | | ||
| // Case 1: The pointer that goes into the destructor call is destroyed | ||
| exists(CallInstruction destructorCall | | ||
| tempToDestructorSink(n, destructorCall) and | ||
| isPostUpdateOfQualifier(destructorCall, result) | ||
| ) | ||
| or | ||
| // Case 2: Anything that was derived from the temporary that is now destroyed | ||
| // is also destroyed. | ||
| exists(CallInstruction call | | ||
| result.asInstruction() = call and | ||
| DataFlow::localFlow(DataFlow::operandNode(call.getThisArgumentOperand()), n) | ||
| | | ||
| call.getStaticCallTarget() instanceof StdSequenceContainerAt or | ||
| call.getStaticCallTarget() instanceof StdMapAt | ||
| ) | ||
| DataFlow::Node getADestroyedNode(DataFlow::Node n) { | ||
| // Case 1: The pointer that goes into the destructor call is destroyed | ||
| exists(CallInstruction destructorCall | | ||
| tempToDestructorSink(n, destructorCall) and | ||
| isPostUpdateOfQualifier(destructorCall, result) | ||
| ) | ||
| or | ||
| // Case 2: Anything that was derived from the temporary that is now destroyed | ||
| // is also destroyed. | ||
| exists(CallInstruction call | | ||
| result.asInstruction() = call and | ||
| DataFlow::localFlow(DataFlow::operandNode(call.getThisArgumentOperand()), n) | ||
| | | ||
| call.getStaticCallTarget() instanceof StdSequenceContainerAt or | ||
| call.getStaticCallTarget() instanceof StdMapAt | ||
| ) | ||
| } | ||
|
|
||
|
|
@@ -86,12 +70,39 @@ predicate destroyedToBeginSink(DataFlow::Node sink, FunctionCall fc) { | |
| } | ||
|
|
||
| /** | ||
| * Flow from any destroyed object to the qualifier of a `begin` or `end` call | ||
| * A configuration to track flow from a temporary variable to the qualifier of | ||
| * a destructor call, and subsequently to a qualifier of a call to `begin` or | ||
| * `end`. | ||
| */ | ||
| module DestroyedToBeginConfig implements DataFlow::ConfigSig { | ||
| predicate isSource(DataFlow::Node source) { source = getADestroyedNode() } | ||
| module Config implements DataFlow::StateConfigSig { | ||
| newtype FlowState = | ||
| additional TempToDestructor() or | ||
| additional DestroyedToBegin(DataFlow::Node n) { | ||
| exists(DataFlow::Node thisOperand | | ||
| tempToDestructorSink(thisOperand, _) and | ||
| n = getADestroyedNode(thisOperand) | ||
| ) | ||
| } | ||
|
|
||
| predicate isSource(DataFlow::Node source, FlowState state) { | ||
| source.asInstruction().(VariableAddressInstruction).getIRVariable() instanceof IRTempVariable and | ||
| state = TempToDestructor() | ||
| } | ||
|
|
||
| predicate isAdditionalFlowStep( | ||
| DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 | ||
| ) { | ||
| tempToDestructorSink(node1, _) and | ||
| state1 = TempToDestructor() and | ||
| state2 = DestroyedToBegin(node2) and | ||
| node2 = getADestroyedNode(node1) | ||
| } | ||
|
|
||
| predicate isSink(DataFlow::Node sink) { destroyedToBeginSink(sink, _) } | ||
| predicate isSink(DataFlow::Node sink, FlowState state) { | ||
| // Note: This is a non-trivial cartesian product! | ||
| // Hopefully, both of these sets are quite small in practice | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was a tiny bit concerned that in a database with lots of results for this query, both sets might be distinctly "medium" rather than clearly "small". Mind you a cartesian product of even a few thousand nodes / states still shouldn't be enough to matter. Based on the stage timings table on the DCA run, I decided to have a closer look at an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, this is basically the same CP as the one we fixed for a different query in #10123. We could make this a lot safer by doing a similar transformation, but I didn't want to do it before we had seen a DB that needed this.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can also consider doing this transformation before we pull the query out of experimental. But in any case, I would prefer to keep that transformation out of this PR
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds like it might be worth doing before we pull the query out of experimental, or alternatively we can rely on QA to catch any problems. Either way you're right, it doesn't need addressing here. |
||
| destroyedToBeginSink(sink, _) and state instanceof DestroyedToBegin | ||
| } | ||
|
|
||
| DataFlow::FlowFeature getAFeature() { | ||
| // By blocking argument-to-parameter flow we ensure that we don't enter a | ||
|
|
@@ -108,8 +119,11 @@ module DestroyedToBeginConfig implements DataFlow::ConfigSig { | |
| } | ||
| } | ||
|
|
||
| module DestroyedToBeginFlow = DataFlow::Global<DestroyedToBeginConfig>; | ||
| module Flow = DataFlow::GlobalWithState<Config>; | ||
|
|
||
| from DataFlow::Node source, DataFlow::Node sink, FunctionCall beginOrEnd | ||
| where DestroyedToBeginFlow::flow(source, sink) and destroyedToBeginSink(sink, beginOrEnd) | ||
| select source, "This object is destroyed before $@ is called.", beginOrEnd, beginOrEnd.toString() | ||
| from Flow::PathNode source, Flow::PathNode sink, FunctionCall beginOrEnd, DataFlow::Node mid | ||
| where | ||
| Flow::flowPath(source, sink) and | ||
| destroyedToBeginSink(sink.getNode(), beginOrEnd) and | ||
| sink.getState() = Config::DestroyedToBegin(mid) | ||
| select mid, "This object is destroyed before $@ is called.", beginOrEnd, beginOrEnd.toString() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So what exactly is different now compared to when we had two sequential dataflows? The source of the second flow (
DestroyedToBeginFlow) still had to be a reached sink of the first (TempToDestructorFlow) flow. I can see that we now know the correct overall source ... but that isn't reported by the query anyway.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider this hypothetical example:
And consider a QL query that has two configurations:
C1) fromsource()toxinreturn x(inid), andC2) starting at any sink fromC1to the argument ofsink.C1finds a flow path fromf1toid, andC2finds a flow path fromidtof2. So if we simply use the result ofC2we get a FP saying that there's flow fromsource()tosink(x1).However, if we combine these into a single configuration then the dataflow library won't lose track of the call context. This means flow can no longer enter
idfromf1and exitidtof2.Does that make sense?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, that makes sense and fits with what's going on in the full test case. Thanks!