-
-
Notifications
You must be signed in to change notification settings - Fork 35.4k
url: make WHATWG URL implementation more spec compliant #10317
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
This patch contains the following changes: url: make IPv4 parser more spec compliant * Return int64_t from ParseNumber to prevent overflow for valid big numbers * Don't throw when there are more than 4 parts (it cannot be an IP address) * Correctly interpret the address and don't always throw when there are numbers > 255 Ref: https://url.spec.whatwg.org/#concept-ipv4-parser Fixes: #10306 url: percent encode fragment to follow spec change Ref: whatwg/url#150 Ref: whatwg/url@373dbed url: fix URL#search setter The check for empty string must be done before removing the leading '?'. Ref: https://url.spec.whatwg.org/#dom-url-search url: set port to null if an empty string is given This is to follow a spec change. Ref: whatwg/url#113 url: fix parsing of paths with Windows drive letter test: update WHATWG URL test fixtures PR-URL: #10317 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -261,7 +261,7 @@ namespace url { | |
| return type; | ||
| } | ||
|
|
||
| static inline int ParseNumber(const char* start, const char* end) { | ||
| static inline int64_t ParseNumber(const char* start, const char* end) { | ||
| unsigned R = 10; | ||
| if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { | ||
| start += 2; | ||
|
|
@@ -293,7 +293,7 @@ namespace url { | |
| } | ||
| p++; | ||
| } | ||
| return strtol(start, NULL, R); | ||
| return strtoll(start, NULL, R); | ||
| } | ||
|
|
||
| static url_host_type ParseIPv4Host(url_host* host, | ||
|
|
@@ -305,28 +305,25 @@ namespace url { | |
| const char* end = pointer + length; | ||
| int parts = 0; | ||
| uint32_t val = 0; | ||
| unsigned numbers[4]; | ||
| uint64_t numbers[4]; | ||
| int tooBigNumbers = 0; | ||
| if (length == 0) | ||
| goto end; | ||
|
|
||
| while (pointer <= end) { | ||
| const char ch = pointer < end ? pointer[0] : kEOL; | ||
| const int remaining = end - pointer - 1; | ||
| if (ch == '.' || ch == kEOL) { | ||
| if (++parts > 4 || pointer - mark == 0) | ||
| break; | ||
| int n = ParseNumber(mark, pointer); | ||
| if (n < 0) { | ||
| type = HOST_TYPE_DOMAIN; | ||
| if (++parts > 4) | ||
| goto end; | ||
| } | ||
| if (pointer - mark == 10) { | ||
| numbers[parts - 1] = n; | ||
| if (pointer - mark == 0) | ||
| break; | ||
| } | ||
| if (n > 255) { | ||
| type = HOST_TYPE_FAILED; | ||
| int64_t n = ParseNumber(mark, pointer); | ||
| if (n < 0) | ||
| goto end; | ||
|
|
||
| if (n > 255) { | ||
| tooBigNumbers++; | ||
| } | ||
| numbers[parts - 1] = n; | ||
| mark = pointer + 1; | ||
|
|
@@ -335,14 +332,23 @@ namespace url { | |
| } | ||
| pointer++; | ||
| } | ||
| CHECK_GT(parts, 0); | ||
|
|
||
| // If any but the last item in numbers is greater than 255, return failure. | ||
| // If the last item in numbers is greater than or equal to | ||
| // 256^(5 - the number of items in numbers), return failure. | ||
| if (tooBigNumbers > 1 || | ||
| (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || | ||
| numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) { | ||
| type = HOST_TYPE_FAILED; | ||
| goto end; | ||
| } | ||
|
|
||
| type = HOST_TYPE_IPV4; | ||
| if (parts > 0) { | ||
| val = numbers[parts - 1]; | ||
| for (int n = 0; n < parts - 1; n++) { | ||
| double b = 3-n; | ||
| val += numbers[n] * pow(256, b); | ||
| } | ||
| val = numbers[parts - 1]; | ||
| for (int n = 0; n < parts - 1; n++) { | ||
| double b = 3 - n; | ||
| val += numbers[n] * pow(256, b); | ||
| } | ||
|
|
||
| host->value.ipv4 = val; | ||
|
|
@@ -618,6 +624,13 @@ namespace url { | |
| } | ||
| } | ||
|
|
||
| static inline void ShortenUrlPath(struct url_data* url) { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it better to use a const reference?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possibly, no strong opinion
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually it cannot be a const because the url is modified in the function. Non-const references are not allowed by the linter, so I'll keep the pointer. |
||
| if (url->path.empty()) return; | ||
| if (url->path.size() == 1 && url->scheme == "file:" && | ||
| NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return; | ||
| url->path.pop_back(); | ||
| } | ||
|
|
||
| static void Parse(Environment* env, | ||
| Local<Value> recv, | ||
| const char* input, | ||
|
|
@@ -895,8 +908,7 @@ namespace url { | |
| if (DOES_HAVE_PATH(base)) { | ||
| SET_HAVE_PATH() | ||
| url.path = base.path; | ||
| if (!url.path.empty()) | ||
| url.path.pop_back(); | ||
| ShortenUrlPath(&url); | ||
| } | ||
| url.port = base.port; | ||
| state = kPath; | ||
|
|
@@ -1112,8 +1124,7 @@ namespace url { | |
| SET_HAVE_PATH() | ||
| url.path = base.path; | ||
| } | ||
| if (!url.path.empty()) | ||
| url.path.pop_back(); | ||
| ShortenUrlPath(&url); | ||
| } | ||
| state = kPath; | ||
| continue; | ||
|
|
@@ -1172,8 +1183,7 @@ namespace url { | |
| special_back_slash || | ||
| (!state_override && (ch == '?' || ch == '#'))) { | ||
| if (IsDoubleDotSegment(buffer)) { | ||
| if (!url.path.empty()) | ||
| url.path.pop_back(); | ||
| ShortenUrlPath(&url); | ||
| if (ch != '/' && !special_back_slash) { | ||
| SET_HAVE_PATH() | ||
| url.path.push_back(""); | ||
|
|
@@ -1247,7 +1257,7 @@ namespace url { | |
| case 0: | ||
| break; | ||
| default: | ||
| buffer += ch; | ||
| AppendOrEscape(&buffer, ch, SimpleEncodeSet); | ||
| } | ||
| break; | ||
| default: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jasnell Just wondering if you remember what this condition was for? Maybe it was a mistake to remove it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not off the top of my head... I'll be able to take a look in the next day or two. (sorry, company in town for the holiday is taking up quite a bit of my time this week)