Skip to content

Commit 5f204f2

Browse files
committed
Have a limit on how many characters are presented to the Parser
1 parent 4aedac7 commit 5f204f2

File tree

8 files changed

+276
-44
lines changed

8 files changed

+276
-44
lines changed

src/main/java/graphql/parser/Parser.java

Lines changed: 78 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import graphql.parser.antlr.GraphqlParser;
1515
import graphql.parser.exceptions.ParseCancelledException;
1616
import graphql.parser.exceptions.ParseCancelledTooDeepException;
17+
import graphql.parser.exceptions.ParseCancelledTooManyCharsException;
1718
import org.antlr.v4.runtime.BaseErrorListener;
1819
import org.antlr.v4.runtime.CharStreams;
1920
import org.antlr.v4.runtime.CodePointCharStream;
@@ -25,6 +26,7 @@
2526
import org.antlr.v4.runtime.atn.PredictionMode;
2627
import org.antlr.v4.runtime.tree.ParseTreeListener;
2728
import org.antlr.v4.runtime.tree.TerminalNode;
29+
import org.jetbrains.annotations.NotNull;
2830

2931
import java.io.IOException;
3032
import java.io.Reader;
@@ -33,6 +35,7 @@
3335
import java.util.Optional;
3436
import java.util.function.BiConsumer;
3537
import java.util.function.BiFunction;
38+
import java.util.function.Consumer;
3639

3740
/**
3841
* This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an
@@ -259,6 +262,57 @@ private Node<?> parseImpl(ParserEnvironment environment, BiFunction<GraphqlParse
259262
ParserOptions parserOptions = environment.getParserOptions();
260263
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
261264

265+
MultiSourceReader multiSourceReader = setupMultiSourceReader(environment, parserOptions);
266+
267+
SafeTokenReader safeTokenReader = setupSafeTokenReader(environment, parserOptions, multiSourceReader);
268+
269+
CodePointCharStream charStream = setupCharStream(safeTokenReader);
270+
271+
GraphqlLexer lexer = setupGraphqlLexer(environment, multiSourceReader, charStream);
272+
273+
// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
274+
SafeTokenSource safeTokenSource = getSafeTokenSource(environment, parserOptions, multiSourceReader, lexer);
275+
276+
CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
277+
278+
GraphqlParser parser = new GraphqlParser(tokens);
279+
parser.removeErrorListeners();
280+
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
281+
282+
ExtendedBailStrategy bailStrategy = new ExtendedBailStrategy(multiSourceReader, environment);
283+
parser.setErrorHandler(bailStrategy);
284+
285+
// preserve old protected call semantics - remove at some point
286+
GraphqlAntlrToLanguage toLanguage = getAntlrToLanguage(tokens, multiSourceReader, environment);
287+
288+
setupParserListener(environment, multiSourceReader, parser, toLanguage);
289+
290+
291+
//
292+
// parsing starts ...... now!
293+
//
294+
Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
295+
ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
296+
Node<?> node = (Node<?>) contextAndNode[1];
297+
298+
Token stop = parserRuleContext.getStop();
299+
List<Token> allTokens = tokens.getTokens();
300+
if (stop != null && allTokens != null && !allTokens.isEmpty()) {
301+
Token last = allTokens.get(allTokens.size() - 1);
302+
//
303+
// do we have more tokens in the stream than we consumed in the parse?
304+
// if yes then it's invalid. We make sure it's the same channel
305+
boolean notEOF = last.getType() != Token.EOF;
306+
boolean lastGreaterThanDocument = last.getTokenIndex() > stop.getTokenIndex();
307+
boolean sameChannel = last.getChannel() == stop.getChannel();
308+
if (notEOF && lastGreaterThanDocument && sameChannel) {
309+
throw bailStrategy.mkMoreTokensException(last);
310+
}
311+
}
312+
return node;
313+
}
314+
315+
private static MultiSourceReader setupMultiSourceReader(ParserEnvironment environment, ParserOptions parserOptions) {
262316
MultiSourceReader multiSourceReader;
263317
Reader reader = environment.getDocument();
264318
if (reader instanceof MultiSourceReader) {
@@ -269,13 +323,31 @@ private Node<?> parseImpl(ParserEnvironment environment, BiFunction<GraphqlParse
269323
.trackData(parserOptions.isReaderTrackData())
270324
.build();
271325
}
326+
return multiSourceReader;
327+
}
328+
329+
@NotNull
330+
private static SafeTokenReader setupSafeTokenReader(ParserEnvironment environment, ParserOptions parserOptions, MultiSourceReader multiSourceReader) {
331+
int maxCharacters = parserOptions.getMaxCharacters();
332+
Consumer<Integer> onTooManyCharacters = it -> {
333+
throw new ParseCancelledTooManyCharsException(environment.getI18N(), maxCharacters);
334+
};
335+
return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters);
336+
}
337+
338+
@NotNull
339+
private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) {
272340
CodePointCharStream charStream;
273341
try {
274-
charStream = CharStreams.fromReader(multiSourceReader);
342+
charStream = CharStreams.fromReader(safeTokenReader);
275343
} catch (IOException e) {
276344
throw new UncheckedIOException(e);
277345
}
346+
return charStream;
347+
}
278348

349+
@NotNull
350+
private static GraphqlLexer setupGraphqlLexer(ParserEnvironment environment, MultiSourceReader multiSourceReader, CodePointCharStream charStream) {
279351
GraphqlLexer lexer = new GraphqlLexer(charStream);
280352
lexer.removeErrorListeners();
281353
lexer.addErrorListener(new BaseErrorListener() {
@@ -296,8 +368,11 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
296368
throw new InvalidSyntaxException(msg, sourceLocation, null, preview, null);
297369
}
298370
});
371+
return lexer;
372+
}
299373

300-
// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
374+
@NotNull
375+
private SafeTokenSource getSafeTokenSource(ParserEnvironment environment, ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) {
301376
int maxTokens = parserOptions.getMaxTokens();
302377
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
303378
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
@@ -306,45 +381,7 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
306381
maxTokenCount,
307382
multiSourceReader,
308383
ParseCancelledException.class);
309-
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
310-
311-
CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
312-
313-
GraphqlParser parser = new GraphqlParser(tokens);
314-
parser.removeErrorListeners();
315-
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
316-
317-
ExtendedBailStrategy bailStrategy = new ExtendedBailStrategy(multiSourceReader, environment);
318-
parser.setErrorHandler(bailStrategy);
319-
320-
// preserve old protected call semantics - remove at some point
321-
GraphqlAntlrToLanguage toLanguage = getAntlrToLanguage(tokens, multiSourceReader, environment);
322-
323-
setupParserListener(environment, multiSourceReader, parser, toLanguage);
324-
325-
326-
//
327-
// parsing starts ...... now!
328-
//
329-
Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
330-
ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
331-
Node<?> node = (Node<?>) contextAndNode[1];
332-
333-
Token stop = parserRuleContext.getStop();
334-
List<Token> allTokens = tokens.getTokens();
335-
if (stop != null && allTokens != null && !allTokens.isEmpty()) {
336-
Token last = allTokens.get(allTokens.size() - 1);
337-
//
338-
// do we have more tokens in the stream than we consumed in the parse?
339-
// if yes then it's invalid. We make sure it's the same channel
340-
boolean notEOF = last.getType() != Token.EOF;
341-
boolean lastGreaterThanDocument = last.getTokenIndex() > stop.getTokenIndex();
342-
boolean sameChannel = last.getChannel() == stop.getChannel();
343-
if (notEOF && lastGreaterThanDocument && sameChannel) {
344-
throw bailStrategy.mkMoreTokensException(last);
345-
}
346-
}
347-
return node;
384+
return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
348385
}
349386

350387
private void setupParserListener(ParserEnvironment environment, MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {

src/main/java/graphql/parser/ParserOptions.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@
1111
*/
1212
@PublicApi
1313
public class ParserOptions {
14+
/**
15+
* A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
16+
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
17+
* sets this value to 512KB. ANTLR parsing time is linear to the number of characters presented. The more you
18+
* allow the longer it takes.
19+
* <p>
20+
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
21+
* JVM wide.
22+
*/
23+
public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB
1424

1525
/**
1626
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
@@ -47,6 +57,7 @@ public class ParserOptions {
4757
.captureSourceLocation(true)
4858
.captureLineComments(true)
4959
.readerTrackData(true)
60+
.maxCharacters(MAX_QUERY_CHARACTERS)
5061
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
5162
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
5263
.maxRuleDepth(MAX_RULE_DEPTH)
@@ -57,6 +68,7 @@ public class ParserOptions {
5768
.captureSourceLocation(true)
5869
.captureLineComments(false) // #comments are not useful in query parsing
5970
.readerTrackData(true)
71+
.maxCharacters(MAX_QUERY_CHARACTERS)
6072
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
6173
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
6274
.maxRuleDepth(MAX_RULE_DEPTH)
@@ -67,6 +79,7 @@ public class ParserOptions {
6779
.captureSourceLocation(true)
6880
.captureLineComments(true) // #comments are useful in SDL parsing
6981
.readerTrackData(true)
82+
.maxCharacters(Integer.MAX_VALUE)
7083
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
7184
.maxWhitespaceTokens(Integer.MAX_VALUE)
7285
.maxRuleDepth(Integer.MAX_VALUE)
@@ -171,6 +184,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
171184
private final boolean captureSourceLocation;
172185
private final boolean captureLineComments;
173186
private final boolean readerTrackData;
187+
private final int maxCharacters;
174188
private final int maxTokens;
175189
private final int maxWhitespaceTokens;
176190
private final int maxRuleDepth;
@@ -181,6 +195,7 @@ private ParserOptions(Builder builder) {
181195
this.captureSourceLocation = builder.captureSourceLocation;
182196
this.captureLineComments = builder.captureLineComments;
183197
this.readerTrackData = builder.readerTrackData;
198+
this.maxCharacters = builder.maxCharacters;
184199
this.maxTokens = builder.maxTokens;
185200
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
186201
this.maxRuleDepth = builder.maxRuleDepth;
@@ -233,6 +248,18 @@ public boolean isReaderTrackData() {
233248
return readerTrackData;
234249
}
235250

251+
/**
252+
* A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
253+
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
254+
* sets this value to 1MB.
255+
*
256+
* @return the maximum number of characters the parser will accept, after which an exception will be thrown.
257+
*/
258+
public int getMaxCharacters() {
259+
return maxCharacters;
260+
}
261+
262+
236263
/**
237264
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
238265
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
@@ -287,6 +314,7 @@ public static class Builder {
287314
private boolean captureLineComments = true;
288315
private boolean readerTrackData = true;
289316
private ParsingListener parsingListener = ParsingListener.NOOP;
317+
private int maxCharacters = MAX_QUERY_CHARACTERS;
290318
private int maxTokens = MAX_QUERY_TOKENS;
291319
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
292320
private int maxRuleDepth = MAX_RULE_DEPTH;
@@ -298,6 +326,7 @@ public static class Builder {
298326
this.captureIgnoredChars = parserOptions.captureIgnoredChars;
299327
this.captureSourceLocation = parserOptions.captureSourceLocation;
300328
this.captureLineComments = parserOptions.captureLineComments;
329+
this.maxCharacters = parserOptions.maxCharacters;
301330
this.maxTokens = parserOptions.maxTokens;
302331
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
303332
this.maxRuleDepth = parserOptions.maxRuleDepth;
@@ -324,6 +353,11 @@ public Builder readerTrackData(boolean readerTrackData) {
324353
return this;
325354
}
326355

356+
public Builder maxCharacters(int maxCharacters) {
357+
this.maxCharacters = maxCharacters;
358+
return this;
359+
}
360+
327361
public Builder maxTokens(int maxTokens) {
328362
this.maxTokens = maxTokens;
329363
return this;
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package graphql.parser;
2+
3+
import graphql.Internal;
4+
import org.jetbrains.annotations.NotNull;
5+
6+
import java.io.IOException;
7+
import java.io.Reader;
8+
import java.nio.CharBuffer;
9+
import java.util.function.Consumer;
10+
11+
/**
12+
* This reader will only emit a maximum number of characters from it. This is used to protect us from evil input.
13+
* <p>
14+
* If a graphql system does not have some max HTTP input limit, then this will help protect the system. This is a limit
15+
* of last resort. Ideally the http input should be limited, but if its not, we have this.
16+
*/
17+
@Internal
18+
public class SafeTokenReader extends Reader {
19+
20+
private final Reader delegate;
21+
private final int maxCharacters;
22+
private final Consumer<Integer> whenMaxCharactersExceeded;
23+
private int count;
24+
25+
public SafeTokenReader(Reader delegate, int maxCharacters, Consumer<Integer> whenMaxCharactersExceeded) {
26+
this.delegate = delegate;
27+
this.maxCharacters = maxCharacters;
28+
this.whenMaxCharactersExceeded = whenMaxCharactersExceeded;
29+
count = 0;
30+
}
31+
32+
private int checkHowMany(int read, int howMany) {
33+
if (read != -1) {
34+
count += howMany;
35+
if (count > maxCharacters) {
36+
whenMaxCharactersExceeded.accept(maxCharacters);
37+
}
38+
}
39+
return read;
40+
}
41+
42+
@Override
43+
public int read(char @NotNull [] buff, int off, int len) throws IOException {
44+
int howMany = delegate.read(buff, off, len);
45+
return checkHowMany(howMany, howMany);
46+
}
47+
48+
@Override
49+
public int read() throws IOException {
50+
int ch = delegate.read();
51+
return checkHowMany(ch, 1);
52+
}
53+
54+
@Override
55+
public int read(@NotNull CharBuffer target) throws IOException {
56+
int howMany = delegate.read(target);
57+
return checkHowMany(howMany, howMany);
58+
}
59+
60+
@Override
61+
public int read( char @NotNull [] buff) throws IOException {
62+
int howMany = delegate.read(buff);
63+
return checkHowMany(howMany, howMany);
64+
}
65+
66+
@Override
67+
public void close() throws IOException {
68+
delegate.close();
69+
}
70+
71+
@Override
72+
public long skip(long n) throws IOException {
73+
return delegate.skip(n);
74+
}
75+
76+
@Override
77+
public boolean ready() throws IOException {
78+
return delegate.ready();
79+
}
80+
81+
@Override
82+
public boolean markSupported() {
83+
return delegate.markSupported();
84+
}
85+
86+
@Override
87+
public void mark(int readAheadLimit) throws IOException {
88+
delegate.mark(readAheadLimit);
89+
}
90+
91+
@Override
92+
public void reset() throws IOException {
93+
delegate.reset();
94+
}
95+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package graphql.parser.exceptions;
2+
3+
import graphql.Internal;
4+
import graphql.i18n.I18n;
5+
import graphql.parser.InvalidSyntaxException;
6+
import org.jetbrains.annotations.NotNull;
7+
8+
@Internal
9+
public class ParseCancelledTooManyCharsException extends InvalidSyntaxException {
10+
11+
@Internal
12+
public ParseCancelledTooManyCharsException(@NotNull I18n i18N, int maxCharacters) {
13+
super(i18N.msg("ParseCancelled.tooManyChars", maxCharacters),
14+
null, null, null, null);
15+
}
16+
}

src/main/resources/i18n/Parsing.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ InvalidSyntaxMoreTokens.full=Invalid syntax encountered. There are extra tokens
2020
#
2121
ParseCancelled.full=More than {0} ''{1}'' tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.
2222
ParseCancelled.tooDeep=More than {0} deep ''{1}'' rules have been entered. To prevent Denial Of Service attacks, parsing has been cancelled.
23+
ParseCancelled.tooManyChars=More than {0} characters have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.
2324
#
2425
InvalidUnicode.trailingLeadingSurrogate=Invalid unicode encountered. Trailing surrogate must be preceded with a leading surrogate. Offending token ''{0}'' at line {1} column {2}
2526
InvalidUnicode.leadingTrailingSurrogate=Invalid unicode encountered. Leading surrogate must be followed by a trailing surrogate. Offending token ''{0}'' at line {1} column {2}

0 commit comments

Comments
 (0)