Skip to content

Commit 0054eae

Browse files
committed
Handle numeric separators in integer literals while tokenizing
1 parent f69bccf commit 0054eae

7 files changed

Lines changed: 126 additions & 12 deletions

dist/assemblyscript.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/assemblyscript.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/diagnosticMessages.generated.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ export enum DiagnosticCode {
111111
Expected_0_type_arguments_but_got_1 = 2558,
112112
A_member_initializer_in_a_enum_declaration_cannot_reference_members_declared_after_it_including_members_defined_in_other_enums = 2651,
113113
Namespace_0_has_no_exported_member_1 = 2694,
114-
File_0_not_found = 6054
114+
File_0_not_found = 6054,
115+
Numeric_separators_are_not_allowed_here = 6188,
116+
Multiple_consecutive_numeric_separators_are_not_permitted = 6189
115117
}
116118

117119
/** Translates a diagnostic code to its respective string. */
@@ -222,6 +224,8 @@ export function diagnosticCodeToString(code: DiagnosticCode): string {
222224
case 2651: return "A member initializer in a enum declaration cannot reference members declared after it, including members defined in other enums.";
223225
case 2694: return "Namespace '{0}' has no exported member '{1}'.";
224226
case 6054: return "File '{0}' not found.";
227+
case 6188: return "Numeric separators are not allowed here.";
228+
case 6189: return "Multiple consecutive numeric separators are not permitted.";
225229
default: return "";
226230
}
227231
}

src/diagnosticMessages.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,7 @@
106106
"A member initializer in a enum declaration cannot reference members declared after it, including members defined in other enums.": 2651,
107107
"Namespace '{0}' has no exported member '{1}'.": 2694,
108108

109-
"File '{0}' not found.": 6054
109+
"File '{0}' not found.": 6054,
110+
"Numeric separators are not allowed here.": 6188,
111+
"Multiple consecutive numeric separators are not permitted.": 6189
110112
}

src/tokenizer.ts

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,8 @@ export class Tokenizer extends DiagnosticEmitter {
12221222
if (c == CharCode.DOT || c == CharCode.E || c == CharCode.e) {
12231223
return false;
12241224
}
1225-
if (c < CharCode._0 || c > CharCode._9) break;
1225+
if ((c < CharCode._0 || c > CharCode._9) && c != CharCode._) break;
1226+
// does not validate separator placement (this is done in readXYInteger)
12261227
pos++;
12271228
}
12281229
return true;
@@ -1267,8 +1268,10 @@ export class Tokenizer extends DiagnosticEmitter {
12671268
var start = this.pos;
12681269
var value = i64_new(0, 0);
12691270
var i64_16 = i64_new(16, 0);
1271+
var sepEnd = start;
12701272
while (this.pos < this.end) {
1271-
let c = text.charCodeAt(this.pos);
1273+
let pos = this.pos;
1274+
let c = text.charCodeAt(pos);
12721275
if (c >= CharCode._0 && c <= CharCode._9) {
12731276
// value = value * 16 + c - CharCode._0;
12741277
value = i64_add(
@@ -1287,16 +1290,31 @@ export class Tokenizer extends DiagnosticEmitter {
12871290
i64_mul(value, i64_16),
12881291
i64_new(10 + c - CharCode.a, 0)
12891292
);
1293+
} else if (c == CharCode._) {
1294+
if (sepEnd == pos) {
1295+
this.error(
1296+
sepEnd == start
1297+
? DiagnosticCode.Numeric_separators_are_not_allowed_here
1298+
: DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted,
1299+
this.range(pos)
1300+
);
1301+
}
1302+
sepEnd = pos + 1;
12901303
} else {
12911304
break;
12921305
}
1293-
++this.pos;
1306+
this.pos = pos + 1;
12941307
}
12951308
if (this.pos == start) {
12961309
this.error(
12971310
DiagnosticCode.Hexadecimal_digit_expected,
12981311
this.range(start)
12991312
);
1313+
} else if (sepEnd == this.pos) {
1314+
this.error(
1315+
DiagnosticCode.Numeric_separators_are_not_allowed_here,
1316+
this.range(sepEnd - 1)
1317+
);
13001318
}
13011319
return value;
13021320
}
@@ -1306,24 +1324,41 @@ export class Tokenizer extends DiagnosticEmitter {
13061324
var start = this.pos;
13071325
var value = i64_new(0, 0);
13081326
var i64_10 = i64_new(10, 0);
1327+
var sepEnd = start;
13091328
while (this.pos < this.end) {
1310-
let c = text.charCodeAt(this.pos);
1329+
let pos = this.pos;
1330+
let c = text.charCodeAt(pos);
13111331
if (c >= CharCode._0 && c <= CharCode._9) {
13121332
// value = value * 10 + c - CharCode._0;
13131333
value = i64_add(
13141334
i64_mul(value, i64_10),
13151335
i64_new(c - CharCode._0, 0)
13161336
);
1337+
} else if (c == CharCode._) {
1338+
if (sepEnd == pos) {
1339+
this.error(
1340+
sepEnd == start
1341+
? DiagnosticCode.Numeric_separators_are_not_allowed_here
1342+
: DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted,
1343+
this.range(pos)
1344+
);
1345+
}
1346+
sepEnd = pos + 1;
13171347
} else {
13181348
break;
13191349
}
1320-
++this.pos;
1350+
this.pos = pos + 1;
13211351
}
13221352
if (this.pos == start) {
13231353
this.error(
13241354
DiagnosticCode.Digit_expected,
13251355
this.range(start)
13261356
);
1357+
} else if (sepEnd == this.pos) {
1358+
this.error(
1359+
DiagnosticCode.Numeric_separators_are_not_allowed_here,
1360+
this.range(sepEnd - 1)
1361+
);
13271362
}
13281363
return value;
13291364
}
@@ -1333,14 +1368,26 @@ export class Tokenizer extends DiagnosticEmitter {
13331368
var start = this.pos;
13341369
var value = i64_new(0, 0);
13351370
var i64_8 = i64_new(8, 0);
1371+
var sepEnd = start;
13361372
while (this.pos < this.end) {
1337-
let c = text.charCodeAt(this.pos);
1373+
let pos = this.pos;
1374+
let c = text.charCodeAt(pos);
13381375
if (c >= CharCode._0 && c <= CharCode._7) {
13391376
// value = value * 8 + c - CharCode._0;
13401377
value = i64_add(
13411378
i64_mul(value, i64_8),
13421379
i64_new(c - CharCode._0, 0)
13431380
);
1381+
} else if (c == CharCode._) {
1382+
if (sepEnd == pos) {
1383+
this.error(
1384+
sepEnd == start
1385+
? DiagnosticCode.Numeric_separators_are_not_allowed_here
1386+
: DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted,
1387+
this.range(pos)
1388+
);
1389+
}
1390+
sepEnd = pos + 1;
13441391
} else {
13451392
break;
13461393
}
@@ -1351,6 +1398,11 @@ export class Tokenizer extends DiagnosticEmitter {
13511398
DiagnosticCode.Octal_digit_expected,
13521399
this.range(start)
13531400
);
1401+
} else if (sepEnd == this.pos) {
1402+
this.error(
1403+
DiagnosticCode.Numeric_separators_are_not_allowed_here,
1404+
this.range(sepEnd - 1)
1405+
);
13541406
}
13551407
return value;
13561408
}
@@ -1361,8 +1413,10 @@ export class Tokenizer extends DiagnosticEmitter {
13611413
var value = i64_new(0, 0);
13621414
var i64_2 = i64_new(2, 0);
13631415
var i64_1 = i64_new(1, 0);
1416+
var sepEnd = start;
13641417
while (this.pos < this.end) {
1365-
let c = text.charCodeAt(this.pos);
1418+
let pos = this.pos;
1419+
let c = text.charCodeAt(pos);
13661420
if (c == CharCode._0) {
13671421
// value = value * 2;
13681422
value = i64_mul(
@@ -1375,16 +1429,31 @@ export class Tokenizer extends DiagnosticEmitter {
13751429
i64_mul(value, i64_2),
13761430
i64_1
13771431
);
1432+
} else if (c == CharCode._) {
1433+
if (sepEnd == pos) {
1434+
this.error(
1435+
sepEnd == start
1436+
? DiagnosticCode.Numeric_separators_are_not_allowed_here
1437+
: DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted,
1438+
this.range(pos)
1439+
);
1440+
}
1441+
sepEnd = pos + 1;
13781442
} else {
13791443
break;
13801444
}
1381-
++this.pos;
1445+
this.pos = pos + 1;
13821446
}
13831447
if (this.pos == start) {
13841448
this.error(
13851449
DiagnosticCode.Binary_digit_expected,
13861450
this.range(start)
13871451
);
1452+
} else if (sepEnd == this.pos) {
1453+
this.error(
1454+
DiagnosticCode.Numeric_separators_are_not_allowed_here,
1455+
this.range(sepEnd - 1)
1456+
);
13881457
}
13891458
return value;
13901459
}
@@ -1404,6 +1473,7 @@ export class Tokenizer extends DiagnosticEmitter {
14041473
}
14051474

14061475
readDecimalFloat(): f64 {
1476+
// TODO: numeric separators (parseFloat can't handle these)
14071477
var start = this.pos;
14081478
var text = this.source.text;
14091479
while (this.pos < this.end && isDecimalDigit(text.charCodeAt(this.pos))) {

tests/parser/numeric-separators.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
11_11_11;
2+
0b01_01_01;
3+
0o12_12_12;
4+
0x23_23_23;
5+
6+
// error cases that should still continue parsing:
7+
8+
11_11_11_; // 6188
9+
11__11_11; // 6189
10+
11+
0b01_01_01_; // 6188
12+
0b01__01_01; // 6189
13+
14+
0o12_12_12_; // 6188
15+
0o12__12_12; // 6189
16+
17+
0x23_23_23_; // 6188
18+
0x23__23_23; // 6189
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
111111;
2+
21;
3+
41610;
4+
2302755;
5+
111111;
6+
111111;
7+
21;
8+
21;
9+
41610;
10+
41610;
11+
2302755;
12+
2302755;
13+
// ERROR 6188: "Numeric separators are not allowed here." in numeric-separators.ts:8:8
14+
// ERROR 6189: "Multiple consecutive numeric separators are not permitted." in numeric-separators.ts:9:3
15+
// ERROR 6188: "Numeric separators are not allowed here." in numeric-separators.ts:11:10
16+
// ERROR 6189: "Multiple consecutive numeric separators are not permitted." in numeric-separators.ts:12:5
17+
// ERROR 6188: "Numeric separators are not allowed here." in numeric-separators.ts:14:10
18+
// ERROR 6189: "Multiple consecutive numeric separators are not permitted." in numeric-separators.ts:15:5
19+
// ERROR 6188: "Numeric separators are not allowed here." in numeric-separators.ts:17:10
20+
// ERROR 6189: "Multiple consecutive numeric separators are not permitted." in numeric-separators.ts:18:5

0 commit comments

Comments
 (0)