Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
url: improve spec compliance of WHATWG URL
This patch contains the following changes:

url: make IPv4 parser more spec compliant

* Return int64_t from ParseNumber to prevent overflow for valid big numbers
* Don't throw when there are more than 4 parts (it cannot be an IP
address)
* Correctly interpret the address and don't always throw when there are
numbers > 255

Ref: https://url.spec.whatwg.org/#concept-ipv4-parser
Fixes: #10306

url: percent encode fragment to follow spec change

Ref: whatwg/url#150
Ref: whatwg/url@373dbed

url: fix URL#search setter

The check for empty string must be done before removing the leading '?'.

Ref: https://url.spec.whatwg.org/#dom-url-search

url: set port to null if an empty string is given

This is to follow a spec change.

Ref: whatwg/url#113

url: fix parsing of paths with Windows drive letter

test: update WHATWG URL test fixtures

PR-URL: #10317
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
  • Loading branch information
targos committed Jan 1, 2017
commit db18dd83567ff45b7d6236053053b3b90aaca357
5 changes: 2 additions & 3 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
return;
port = String(port);
if (port === '') {
// Currently, if port number is empty, left unchanged.
// TODO(jasnell): This might be changing in the spec
ctx.port = undefined;
return;
}
binding.parse(port, binding.kPort, null, ctx,
Expand Down Expand Up @@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
set(search) {
const ctx = this[context];
search = String(search);
if (search[0] === '?') search = search.slice(1);
if (!search) {
ctx.query = null;
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
this[searchParams][searchParams] = {};
return;
}
if (search[0] === '?') search = search.slice(1);
ctx.query = '';
binding.parse(search, binding.kQuery, null, ctx,
onParseSearchComplete.bind(this));
Expand Down
64 changes: 37 additions & 27 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ namespace url {
return type;
}

static inline int ParseNumber(const char* start, const char* end) {
static inline int64_t ParseNumber(const char* start, const char* end) {
unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2;
Expand Down Expand Up @@ -293,7 +293,7 @@ namespace url {
}
p++;
}
return strtol(start, NULL, R);
return strtoll(start, NULL, R);
}

static url_host_type ParseIPv4Host(url_host* host,
Expand All @@ -305,28 +305,25 @@ namespace url {
const char* end = pointer + length;
int parts = 0;
uint32_t val = 0;
unsigned numbers[4];
uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0)
goto end;

while (pointer <= end) {
const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) {
if (++parts > 4 || pointer - mark == 0)
break;
int n = ParseNumber(mark, pointer);
if (n < 0) {
type = HOST_TYPE_DOMAIN;
if (++parts > 4)
goto end;
}
if (pointer - mark == 10) {
numbers[parts - 1] = n;
if (pointer - mark == 0)
break;
}
if (n > 255) {
type = HOST_TYPE_FAILED;
int64_t n = ParseNumber(mark, pointer);
if (n < 0)
goto end;
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasnell Just wondering if you remember what this condition was for? Maybe it was a mistake to remove it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not off the top of my head... I'll be able to take a look in the next day or two. (sorry, company in town for the holiday is taking up quite a bit of my time this week)


if (n > 255) {
tooBigNumbers++;
}
numbers[parts - 1] = n;
mark = pointer + 1;
Expand All @@ -335,14 +332,23 @@ namespace url {
}
pointer++;
}
CHECK_GT(parts, 0);

// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
// 256^(5 - the number of items in numbers), return failure.
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
type = HOST_TYPE_FAILED;
goto end;
}

type = HOST_TYPE_IPV4;
if (parts > 0) {
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3-n;
val += numbers[n] * pow(256, b);
}
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3 - n;
val += numbers[n] * pow(256, b);
}

host->value.ipv4 = val;
Expand Down Expand Up @@ -618,6 +624,13 @@ namespace url {
}
}

static inline void ShortenUrlPath(struct url_data* url) {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it better to use a const reference?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possibly, no strong opinion

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it cannot be a const because the url is modified in the function. Non-const references are not allowed by the linter, so I'll keep the pointer.

if (url->path.empty()) return;
if (url->path.size() == 1 && url->scheme == "file:" &&
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
url->path.pop_back();
}

static void Parse(Environment* env,
Local<Value> recv,
const char* input,
Expand Down Expand Up @@ -895,8 +908,7 @@ namespace url {
if (DOES_HAVE_PATH(base)) {
SET_HAVE_PATH()
url.path = base.path;
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
url.port = base.port;
state = kPath;
Expand Down Expand Up @@ -1112,8 +1124,7 @@ namespace url {
SET_HAVE_PATH()
url.path = base.path;
}
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
state = kPath;
continue;
Expand Down Expand Up @@ -1172,8 +1183,7 @@ namespace url {
special_back_slash ||
(!state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) {
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH()
url.path.push_back("");
Expand Down Expand Up @@ -1247,7 +1257,7 @@ namespace url {
case 0:
break;
default:
buffer += ch;
AppendOrEscape(&buffer, ch, SimpleEncodeSet);
}
break;
default:
Expand Down
34 changes: 20 additions & 14 deletions test/fixtures/url-setter-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@
}
},
{
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is unchanged if not specified",
"href": "http://example.net:8080",
"new_value": "example.com:",
"expected": {
Expand All @@ -358,7 +358,6 @@
}
},
{

"comment": "The empty host is not valid for special schemes",
"href": "http://example.net",
"new_value": "",
Expand Down Expand Up @@ -763,14 +762,14 @@
}
},
{
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is removed if empty is the new value",
"href": "http://example.net:8080",
"new_value": "",
"expected": {
"href": "http://example.net:8080/",
"host": "example.net:8080",
"href": "http://example.net/",
"host": "example.net",
"hostname": "example.net",
"port": "8080"
"port": ""
}
},
{
Expand Down Expand Up @@ -975,6 +974,15 @@
"href": "http://example.net/..%c3%89t%C3%A9",
"pathname": "/..%c3%89t%C3%A9"
}
},
{
"comment": "? needs to be encoded",
"href": "http://example.net",
"new_value": "?",
"expected": {
"href": "http://example.net/%3F",
"pathname": "/%3F"
}
}
],
"search": [
Expand Down Expand Up @@ -1011,7 +1019,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "?",
"expected": {
Expand Down Expand Up @@ -1096,7 +1103,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "#",
"expected": {
Expand All @@ -1113,22 +1119,22 @@
}
},
{
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
"href": "a:/",
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"expected": {
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
}
},
{
"comment": "Bytes already percent-encoded are left as-is",
"href": "http://example.net",
"new_value": "%c3%89té",
"expected": {
"href": "http://example.net/#%c3%89té",
"hash": "#%c3%89té"
"href": "http://example.net/#%c3%89t%C3%A9",
"hash": "#%c3%89t%C3%A9"
}
}
]
}
}
Loading