Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,20 @@ private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
min(RelevantRegExpTerm t, Location loc, File file |
loc = t.getLocation() and
file = t.getFile() and
str = t.getRawValue() + "|" + getCanonicalizationFlags(t.getRootTerm())
str = getCanonicalizationString(t)
|
t order by t.getFile().getRelativePath(), loc.getStartLine(), loc.getStartColumn()
)
}

/**
* Gets a string representation of the flags used with the regular expression.
* Only the flags that are relevant for the canonicalization are included.
* Gets a string representation of `term` that is used for canonicalization.
*/
string getCanonicalizationFlags(RegExpTerm root) {
root.isRootTerm() and
(if RegExpFlags::isIgnoreCase(root) then result = "i" else result = "")
private string getCanonicalizationString(RelevantRegExpTerm term) {
exists(string ignoreCase |
(if RegExpFlags::isIgnoreCase(term.getRootTerm()) then ignoreCase = "i" else ignoreCase = "") and
result = term.getRawValue() + "|" + ignoreCase
)
}

/**
Expand Down Expand Up @@ -186,12 +187,19 @@ private newtype TInputSymbol =
Epsilon()

/**
* Gets the canonical CharClass for `term`.
* Gets the the CharClass corresponding to the canonical representative `term`.
*/
CharClass getCanonicalCharClass(RegExpTerm term) {
private CharClass getCharClassForCanonicalTerm(RegExpTerm term) {
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
}

/**
* Gets a char class that represents `term`, even when `term` is not the canonical representative.
*/
CharacterClass getCanonicalCharClass(RegExpTerm term) {
exists(string str | str = getCanonicalizationString(term) and result = CharClass(str))
}

/**
* Holds if `a` and `b` are input symbols from the same regexp.
*/
Expand Down Expand Up @@ -284,7 +292,7 @@ private module CharacterClasses {
*/
pragma[noinline]
predicate hasChildThatMatchesIgnoringCasingFlags(RegExpCharacterClass cc, string char) {
exists(getCanonicalCharClass(cc)) and
exists(getCharClassForCanonicalTerm(cc)) and
exists(RegExpTerm child | child = cc.getAChild() |
char = child.(RegexpCharacterConstant).getValue()
or
Expand Down Expand Up @@ -387,7 +395,7 @@ private module CharacterClasses {
private class PositiveCharacterClass extends CharacterClass {
RegExpCharacterClass cc;

PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
PositiveCharacterClass() { this = getCharClassForCanonicalTerm(cc) and not cc.isInverted() }

override string getARelevantChar() { result = caseNormalize(getAMentionedChar(cc), cc) }

Expand All @@ -400,7 +408,7 @@ private module CharacterClasses {
private class InvertedCharacterClass extends CharacterClass {
RegExpCharacterClass cc;

InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
InvertedCharacterClass() { this = getCharClassForCanonicalTerm(cc) and cc.isInverted() }

override string getARelevantChar() {
result = nextChar(caseNormalize(getAMentionedChar(cc), cc)) or
Expand Down Expand Up @@ -435,7 +443,7 @@ private module CharacterClasses {

PositiveCharacterClassEscape() {
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["d", "s", "w"]
}

Expand Down Expand Up @@ -475,7 +483,7 @@ private module CharacterClasses {
NegativeCharacterClassEscape() {
exists(RegExpTerm cc |
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["D", "S", "W"]
)
}
Expand Down Expand Up @@ -652,17 +660,13 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
or
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
Expand Down
42 changes: 23 additions & 19 deletions javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,20 @@ private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
min(RelevantRegExpTerm t, Location loc, File file |
loc = t.getLocation() and
file = t.getFile() and
str = t.getRawValue() + "|" + getCanonicalizationFlags(t.getRootTerm())
str = getCanonicalizationString(t)
|
t order by t.getFile().getRelativePath(), loc.getStartLine(), loc.getStartColumn()
)
}

/**
* Gets a string representation of the flags used with the regular expression.
* Only the flags that are relevant for the canonicalization are included.
* Gets a string representation of `term` that is used for canonicalization.
*/
string getCanonicalizationFlags(RegExpTerm root) {
root.isRootTerm() and
(if RegExpFlags::isIgnoreCase(root) then result = "i" else result = "")
private string getCanonicalizationString(RelevantRegExpTerm term) {
exists(string ignoreCase |
(if RegExpFlags::isIgnoreCase(term.getRootTerm()) then ignoreCase = "i" else ignoreCase = "") and
result = term.getRawValue() + "|" + ignoreCase
)
}

/**
Expand Down Expand Up @@ -186,12 +187,19 @@ private newtype TInputSymbol =
Epsilon()

/**
* Gets the canonical CharClass for `term`.
* Gets the the CharClass corresponding to the canonical representative `term`.
*/
CharClass getCanonicalCharClass(RegExpTerm term) {
private CharClass getCharClassForCanonicalTerm(RegExpTerm term) {
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
}

/**
* Gets a char class that represents `term`, even when `term` is not the canonical representative.
*/
CharacterClass getCanonicalCharClass(RegExpTerm term) {
exists(string str | str = getCanonicalizationString(term) and result = CharClass(str))
}

/**
* Holds if `a` and `b` are input symbols from the same regexp.
*/
Expand Down Expand Up @@ -284,7 +292,7 @@ private module CharacterClasses {
*/
pragma[noinline]
predicate hasChildThatMatchesIgnoringCasingFlags(RegExpCharacterClass cc, string char) {
exists(getCanonicalCharClass(cc)) and
exists(getCharClassForCanonicalTerm(cc)) and
exists(RegExpTerm child | child = cc.getAChild() |
char = child.(RegexpCharacterConstant).getValue()
or
Expand Down Expand Up @@ -387,7 +395,7 @@ private module CharacterClasses {
private class PositiveCharacterClass extends CharacterClass {
RegExpCharacterClass cc;

PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
PositiveCharacterClass() { this = getCharClassForCanonicalTerm(cc) and not cc.isInverted() }

override string getARelevantChar() { result = caseNormalize(getAMentionedChar(cc), cc) }

Expand All @@ -400,7 +408,7 @@ private module CharacterClasses {
private class InvertedCharacterClass extends CharacterClass {
RegExpCharacterClass cc;

InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
InvertedCharacterClass() { this = getCharClassForCanonicalTerm(cc) and cc.isInverted() }

override string getARelevantChar() {
result = nextChar(caseNormalize(getAMentionedChar(cc), cc)) or
Expand Down Expand Up @@ -435,7 +443,7 @@ private module CharacterClasses {

PositiveCharacterClassEscape() {
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["d", "s", "w"]
}

Expand Down Expand Up @@ -475,7 +483,7 @@ private module CharacterClasses {
NegativeCharacterClassEscape() {
exists(RegExpTerm cc |
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["D", "S", "W"]
)
}
Expand Down Expand Up @@ -652,17 +660,13 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
or
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,5 @@
| tst-multi-character-sanitization.js:143:13:143:56 | content ... /g, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:143:30:143:30 | < | <script |
| tst-multi-character-sanitization.js:144:13:144:91 | content ... /g, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:144:30:144:30 | < | <script |
| tst-multi-character-sanitization.js:145:13:145:90 | content ... /g, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:145:30:145:30 | < | <script |
| tst-multi-character-sanitization.js:148:3:148:99 | n.clone ... gi, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:148:41:148:41 | < | <script |
| tst-multi-character-sanitization.js:152:3:152:99 | n.clone ... gi, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:152:41:152:41 | < | <script |
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,12 @@
content = content.replace(/<(script|iframe|video)[\s\S]*?<\/(script|iframe|video)>/g, '') // NOT OK
content = content.replace(/<(script|iframe|video)(.|\s)*?\/(script|iframe|video)>/g, '') // NOT OK
content = content.replace(/<[^<]*>/g, ""); // OK

n.cloneNode(false).outerHTML.replace(/<\/?[\w:\-]+ ?|=[\"][^\"]+\"|=\'[^\']+\'|=[\w\-]+|>/gi, '').replace(/[\w:\-]+/gi, function(a) { // NOT OK
o.push({specified : 1, nodeName : a});
});

n.cloneNode(false).outerHTML.replace(/<\/?[\w:\-]+ ?|=[\"][^\"]+\"|=\'[^\']+\'|=[\w\-]+|>/gi, '').replace(/[\w:\-]+/gi, function(a) { // NOT OK
o.push({specified : 1, nodeName : a});
});
});
42 changes: 23 additions & 19 deletions python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,20 @@ private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
min(RelevantRegExpTerm t, Location loc, File file |
loc = t.getLocation() and
file = t.getFile() and
str = t.getRawValue() + "|" + getCanonicalizationFlags(t.getRootTerm())
str = getCanonicalizationString(t)
|
t order by t.getFile().getRelativePath(), loc.getStartLine(), loc.getStartColumn()
)
}

/**
* Gets a string representation of the flags used with the regular expression.
* Only the flags that are relevant for the canonicalization are included.
* Gets a string representation of `term` that is used for canonicalization.
*/
string getCanonicalizationFlags(RegExpTerm root) {
root.isRootTerm() and
(if RegExpFlags::isIgnoreCase(root) then result = "i" else result = "")
private string getCanonicalizationString(RelevantRegExpTerm term) {
exists(string ignoreCase |
(if RegExpFlags::isIgnoreCase(term.getRootTerm()) then ignoreCase = "i" else ignoreCase = "") and
result = term.getRawValue() + "|" + ignoreCase
)
}

/**
Expand Down Expand Up @@ -186,12 +187,19 @@ private newtype TInputSymbol =
Epsilon()

/**
* Gets the canonical CharClass for `term`.
* Gets the the CharClass corresponding to the canonical representative `term`.
*/
CharClass getCanonicalCharClass(RegExpTerm term) {
private CharClass getCharClassForCanonicalTerm(RegExpTerm term) {
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
}

/**
* Gets a char class that represents `term`, even when `term` is not the canonical representative.
*/
CharacterClass getCanonicalCharClass(RegExpTerm term) {
exists(string str | str = getCanonicalizationString(term) and result = CharClass(str))
}

/**
* Holds if `a` and `b` are input symbols from the same regexp.
*/
Expand Down Expand Up @@ -284,7 +292,7 @@ private module CharacterClasses {
*/
pragma[noinline]
predicate hasChildThatMatchesIgnoringCasingFlags(RegExpCharacterClass cc, string char) {
exists(getCanonicalCharClass(cc)) and
exists(getCharClassForCanonicalTerm(cc)) and
exists(RegExpTerm child | child = cc.getAChild() |
char = child.(RegexpCharacterConstant).getValue()
or
Expand Down Expand Up @@ -387,7 +395,7 @@ private module CharacterClasses {
private class PositiveCharacterClass extends CharacterClass {
RegExpCharacterClass cc;

PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
PositiveCharacterClass() { this = getCharClassForCanonicalTerm(cc) and not cc.isInverted() }

override string getARelevantChar() { result = caseNormalize(getAMentionedChar(cc), cc) }

Expand All @@ -400,7 +408,7 @@ private module CharacterClasses {
private class InvertedCharacterClass extends CharacterClass {
RegExpCharacterClass cc;

InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
InvertedCharacterClass() { this = getCharClassForCanonicalTerm(cc) and cc.isInverted() }

override string getARelevantChar() {
result = nextChar(caseNormalize(getAMentionedChar(cc), cc)) or
Expand Down Expand Up @@ -435,7 +443,7 @@ private module CharacterClasses {

PositiveCharacterClassEscape() {
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["d", "s", "w"]
}

Expand Down Expand Up @@ -475,7 +483,7 @@ private module CharacterClasses {
NegativeCharacterClassEscape() {
exists(RegExpTerm cc |
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["D", "S", "W"]
)
}
Expand Down Expand Up @@ -652,17 +660,13 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
or
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
Expand Down
Loading