Skip to content

Commit 4e18190

Browse files
jungshikCommit bot
authored andcommitted
Timezone name check fix
1. Location names with more than one underscores (e.g. Ho_Chi_Minh) didn't work because of the way capturing works with repeated patterns in RE. It's now supported by changing the RE to capture the whole string and splitting on '_' in the next step. 2. Adds support for location names with a hyphen 3. Adds support for timezone ids with three parts (e.g. American/Argentina/Buenos_Aires) 4. Adds special handling of 'au', 'es' and 'of' in zone ids. They need to be kept in lowercase. (see the full list at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones ) 5. Adds regression tests for all the above and make the existing tests more robust against future ICU changes. ICU canonicalizes zone names to deprecated names, but it may change. ( http://bugs.icu-project.org/trac/ticket/12044 ) BUG=364374 LOG=Y Review URL: https://codereview.chromium.org/1529363005 Cr-Commit-Position: refs/heads/master@{#33097}
1 parent af95a4d commit 4e18190

File tree

5 files changed

+145
-28
lines changed

5 files changed

+145
-28
lines changed

src/js/i18n.js

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,25 @@ var TIMEZONE_NAME_CHECK_RE = UNDEFINED;
176176

177177
function GetTimezoneNameCheckRE() {
178178
if (IS_UNDEFINED(TIMEZONE_NAME_CHECK_RE)) {
179-
TIMEZONE_NAME_CHECK_RE =
180-
new GlobalRegExp('^([A-Za-z]+)/([A-Za-z]+)(?:_([A-Za-z]+))*$');
179+
TIMEZONE_NAME_CHECK_RE = new GlobalRegExp(
180+
'^([A-Za-z]+)/([A-Za-z_-]+)((?:\/[A-Za-z_-]+)+)*$');
181181
}
182182
return TIMEZONE_NAME_CHECK_RE;
183183
}
184184

185+
/**
186+
* Matches valid location parts of IANA time zone names.
187+
*/
188+
var TIMEZONE_NAME_LOCATION_PART_RE = UNDEFINED;
189+
190+
function GetTimezoneNameLocationPartRE() {
191+
if (IS_UNDEFINED(TIMEZONE_NAME_LOCATION_PART_RE)) {
192+
TIMEZONE_NAME_LOCATION_PART_RE =
193+
new GlobalRegExp('^([A-Za-z]+)((?:[_-][A-Za-z]+)+)*$');
194+
}
195+
return TIMEZONE_NAME_LOCATION_PART_RE;
196+
}
197+
185198
/**
186199
* Adds bound method to the prototype of the given object.
187200
*/
@@ -678,6 +691,34 @@ function toTitleCaseWord(word) {
678691
%StringToLowerCase(%_Call(StringSubstr, word, 1));
679692
}
680693

694+
/**
695+
* Returns titlecased location, bueNos_airES -> Buenos_Aires
696+
* or ho_cHi_minH -> Ho_Chi_Minh. It is locale-agnostic and only
697+
* deals with ASCII only characters.
698+
* 'of', 'au' and 'es' are special-cased and lowercased.
699+
*/
700+
function toTitleCaseTimezoneLocation(location) {
701+
var match = %_Call(StringMatch, location, GetTimezoneNameLocationPartRE());
702+
if (IS_NULL(match)) throw MakeRangeError(kExpectedLocation, location);
703+
704+
var result = toTitleCaseWord(match[1]);
705+
if (!IS_UNDEFINED(match[2]) && 2 < match.length) {
706+
// The first character is a separator, '_' or '-'.
707+
// None of IANA zone names has both '_' and '-'.
708+
var separator = %_Call(StringSubstring, match[2], 0, 1);
709+
var parts = %_Call(StringSplit, match[2], separator);
710+
for (var i = 1; i < parts.length; i++) {
711+
var part = parts[i]
712+
var lowercasedPart = %StringToLowerCase(part);
713+
result = result + separator +
714+
((lowercasedPart !== 'es' &&
715+
lowercasedPart !== 'of' && lowercasedPart !== 'au') ?
716+
toTitleCaseWord(part) : lowercasedPart);
717+
}
718+
}
719+
return result;
720+
}
721+
681722
/**
682723
* Canonicalizes the language tag, or throws in case the tag is invalid.
683724
*/
@@ -1735,8 +1776,8 @@ addBoundMethod(Intl.DateTimeFormat, 'v8Parse', parseDate, 1);
17351776

17361777

17371778
/**
1738-
* Returns canonical Area/Location name, or throws an exception if the zone
1739-
* name is invalid IANA name.
1779+
* Returns canonical Area/Location(/Location) name, or throws an exception
1780+
* if the zone name is invalid IANA name.
17401781
*/
17411782
function canonicalizeTimeZoneID(tzID) {
17421783
// Skip undefined zones.
@@ -1751,16 +1792,22 @@ function canonicalizeTimeZoneID(tzID) {
17511792
return 'UTC';
17521793
}
17531794

1754-
// We expect only _ and / beside ASCII letters.
1755-
// All inputs should conform to Area/Location from now on.
1795+
// TODO(jshin): Add support for Etc/GMT[+-]([1-9]|1[0-2])
1796+
1797+
// We expect only _, '-' and / beside ASCII letters.
1798+
// All inputs should conform to Area/Location(/Location)* from now on.
17561799
var match = %_Call(StringMatch, tzID, GetTimezoneNameCheckRE());
1757-
if (IS_NULL(match)) throw MakeRangeError(kExpectedLocation, tzID);
1800+
if (IS_NULL(match)) throw MakeRangeError(kExpectedTimezoneID, tzID);
1801+
1802+
var result = toTitleCaseTimezoneLocation(match[1]) + '/' +
1803+
toTitleCaseTimezoneLocation(match[2]);
17581804

1759-
var result = toTitleCaseWord(match[1]) + '/' + toTitleCaseWord(match[2]);
1760-
var i = 3;
1761-
while (!IS_UNDEFINED(match[i]) && i < match.length) {
1762-
result = result + '_' + toTitleCaseWord(match[i]);
1763-
i++;
1805+
if (!IS_UNDEFINED(match[3]) && 3 < match.length) {
1806+
var locations = %_Call(StringSplit, match[3], '/');
1807+
// The 1st element is empty. Starts with i=1.
1808+
for (var i = 1; i < locations.length; i++) {
1809+
result = result + '/' + toTitleCaseTimezoneLocation(locations[i]);
1810+
}
17641811
}
17651812

17661813
return result;

src/messages.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,11 @@ class CallSite {
329329
T(UnsupportedSuper, "Unsupported reference to 'super'") \
330330
/* RangeError */ \
331331
T(DateRange, "Provided date is not in valid range.") \
332-
T(ExpectedLocation, "Expected Area/Location for time zone, got %") \
332+
T(ExpectedTimezoneID, \
333+
"Expected Area/Location(/Location)* for time zone, got %") \
334+
T(ExpectedLocation, \
335+
"Expected letters optionally connected with underscores or hyphens for " \
336+
"a location, got %") \
333337
T(InvalidArrayBufferLength, "Invalid array buffer length") \
334338
T(ArrayBufferAllocationFailed, "Array buffer allocation failed") \
335339
T(InvalidArrayLength, "Invalid array length") \

test/mjsunit/regress/regress-487322.js

Lines changed: 0 additions & 15 deletions
This file was deleted.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2015 the V8 project authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
if (this.Intl) {
6+
// chromium:364374
7+
8+
// Locations with 2 underscores are accepted and normalized.
9+
// 'of' and 'es' are always lowercased.
10+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'eUrope/isLe_OF_man'})
11+
assertEquals('Europe/Isle_of_Man', df.resolvedOptions().timeZone);
12+
13+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'africa/Dar_eS_salaam'})
14+
assertEquals('Africa/Dar_es_Salaam', df.resolvedOptions().timeZone);
15+
16+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/port_of_spain'})
17+
assertEquals('America/Port_of_Spain', df.resolvedOptions().timeZone);
18+
19+
// Zone ids with more than 2 parts are accepted and normalized.
20+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/north_Dakota/new_salem'})
21+
assertEquals('America/North_Dakota/New_Salem', df.resolvedOptions().timeZone);
22+
23+
// 3-part zone IDs are accepted and normalized.
24+
// Two Buenose Aires aliases are identical.
25+
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/aRgentina/buenos_aIres'})
26+
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Argentina/Buenos_Aires'})
27+
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
28+
29+
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Buenos_Aires'})
30+
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
31+
32+
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Indiana/Indianapolis'})
33+
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Indianapolis'})
34+
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
35+
36+
// ICU does not recognize East-Indiana. Add later when it does.
37+
// df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/East-Indiana'})
38+
// assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
39+
40+
41+
// Zone IDs with hyphens. 'au' has to be in lowercase.
42+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/port-aU-pRince'})
43+
assertEquals('America/Port-au-Prince', df.resolvedOptions().timeZone);
44+
45+
// Accepts Ho_Chi_Minh and treats it as identical to Saigon
46+
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ho_Chi_Minh'})
47+
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Saigon'})
48+
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
49+
50+
// Throws for invalid timezone ids.
51+
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Europe/_Paris'}));
52+
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New__York'}));
53+
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America//New_York'}));
54+
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New_York_'}));
55+
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New_Y0rk'}));
56+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright 2015 the V8 project authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
if (this.Intl) {
6+
// Normalizes Kat{h,}mandu (chromium:487322)
7+
// According to the IANA timezone db, Kathmandu is the current canonical
8+
// name, but ICU got it backward. To make this test robust against a future
9+
// ICU change ( http://bugs.icu-project.org/trac/ticket/12044 ),
10+
// just check that Kat(h)mandu is resolved identically.
11+
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Katmandu'})
12+
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Kathmandu'})
13+
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);
14+
15+
// Normalizes Ulan_Bator to Ulaanbaatar. Unlike Kat(h)mandu, ICU got this
16+
// right so that we make sure that Ulan_Bator is resolved to Ulaanbaatar.
17+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ulaanbaatar'})
18+
assertEquals('Asia/Ulaanbaatar', df.resolvedOptions().timeZone);
19+
20+
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ulan_Bator'})
21+
assertEquals('Asia/Ulaanbaatar', df.resolvedOptions().timeZone);
22+
23+
// Throws for unsupported time zones.
24+
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Aurope/Paris'}));
25+
}

0 commit comments

Comments
 (0)