Skip to content

Commit 626000e

Browse files
committed
[[ LCB ]] Make \u parsing more permissive.
Any number of nibbles may now be specified in a \u escape sequence. If the resulting codepoint value is greater than 0x10FFFF then it is clamped and a warning is issued. If there are no nibbles specified, a warning is issued.
1 parent 010b7d3 commit 626000e

6 files changed

Lines changed: 29 additions & 11 deletions

File tree

docs/specs/livecode_builder_language_reference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Strings use backslash ('\') as an escape - the following are understood:
3030
- **\r**: CR (ASCII 13)
3131
- **\t**: TAB (ASCII 9)
3232
- **\q**: quote '"'
33-
- **\u{FEDCBA}: character with unicode codepoint hex value 0xFEDCBA - BCDEF are optional.
33+
- **\u{X…X}: character with unicode codepoint hex value 0xX...X - any number of nibbles may be specified, but the value will be clamped to 0x10FFFF.
3434
- **\\**: backslash '\'
3535

3636
> **Note:** The presence of '.' in identifiers are used as a namespace scope delimiter.

toolchain/lc-compile/src/grammar.g

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1089,7 +1089,7 @@
10891089
'rule' StringLiteral(-> Value):
10901090
STRING_LITERAL(-> EscapedValue) @(-> Position)
10911091
(|
1092-
UnescapeStringLiteral(EscapedValue -> Value)
1092+
UnescapeStringLiteral(Position, EscapedValue -> Value)
10931093
||
10941094
Error_MalformedEscapedString(Position, EscapedValue)
10951095
where(EscapedValue -> Value)

toolchain/lc-compile/src/literal.c

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ void MakeDoubleLiteral(const char *p_token, long *r_literal)
7878
*r_literal = (long)t_value;
7979
}
8080

81-
static int char_to_nibble(char p_char, int *r_nibble)
81+
static int char_to_nibble(char p_char, unsigned int *r_nibble)
8282
{
8383
if (isdigit(p_char))
8484
*r_nibble = p_char - '0';
@@ -122,7 +122,7 @@ void append_utf8_char(char *p_string, int *x_index, int p_char)
122122
}
123123
}
124124

125-
int UnescapeStringLiteral(const char *p_string, long *r_unescaped_string)
125+
int UnescapeStringLiteral(long p_position, const char *p_string, long *r_unescaped_string)
126126
{
127127
// Allocate enough room for the length of the string including a NUL char.
128128
// This is more than enough to handle any escapes as escaped chars are always
@@ -141,6 +141,10 @@ int UnescapeStringLiteral(const char *p_string, long *r_unescaped_string)
141141
{
142142
if (*t_ptr == '\\')
143143
{
144+
// Record the start of the escape.
145+
const char *t_escape;
146+
t_escape = t_ptr;
147+
144148
if (t_ptr + 1 < t_limit)
145149
{
146150
t_ptr += 1;
@@ -163,9 +167,11 @@ int UnescapeStringLiteral(const char *p_string, long *r_unescaped_string)
163167
t_ptr += 1;
164168
if (t_ptr < t_limit && *t_ptr == '{')
165169
{
166-
int t_char, i;
170+
int t_overflow;
171+
unsigned int t_char;
167172
t_char = 0;
168-
for(i = 0; i < 6; i++)
173+
t_overflow = 0;
174+
for(;;)
169175
{
170176
// Advance the input ptr - if we are at the end here
171177
// it is an error.
@@ -177,26 +183,33 @@ int UnescapeStringLiteral(const char *p_string, long *r_unescaped_string)
177183
// a nibble yet, in which case it is an error.
178184
if (*t_ptr == '}')
179185
{
180-
if (i == 0)
181-
goto error_exit;
186+
if (t_ptr == t_escape + 3)
187+
Warning_EmptyUnicodeEscape(p_position + (t_escape - p_string));
182188
break;
183189
}
184190

185191
// Parse the next nibble, shift and add it.
186-
int t_nibble;
192+
unsigned int t_nibble;
187193
if (!char_to_nibble(*t_ptr, &t_nibble))
188194
goto error_exit;
189195

190196
t_char = t_char << 4;
191197
t_char |= t_nibble;
198+
199+
if (t_char > 0x10FFFF)
200+
t_overflow = 1;
192201
}
193202

194203
// If we get here and we are not looking at } then it
195204
// is an error.
196205
if (*t_ptr != '}')
197206
goto error_exit;
198207

199-
t_ptr += 1;
208+
if (t_overflow)
209+
{
210+
Warning_UnicodeEscapeTooBig(p_position + (t_escape - p_string));
211+
t_char = 0x10FFFF;
212+
}
200213

201214
append_utf8_char(t_value, &t_length, t_char);
202215
}

toolchain/lc-compile/src/report.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ DEFINE_ERROR(NonHandlerTypeVariablesCannotBeCalled, "Variables must have handler
185185
void Warning_##Name(long p_position) { _Warning(p_position, Message); }
186186

187187
DEFINE_WARNING(MetadataClausesShouldComeAfterUseClauses, "Metadata clauses should come after use clauses")
188+
DEFINE_WARNING(EmptyUnicodeEscape, "Unicode escape sequence specified with no nibbles")
189+
DEFINE_WARNING(UnicodeEscapeTooBig, "Unicode escape sequence too big, character clamped to \\u{10FFFF}");
188190

189191
////////////////////////////////////////////////////////////////////////////////
190192

toolchain/lc-compile/src/report.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ void Error_CouldNotOpenInputFile(const char *path);
3333
void Error_MalformedToken(long position, const char *token);
3434
void Error_MalformedSyntax(long position);
3535

36+
void Warning_EmptyUnicodeEscape(long position);
37+
void Warning_UnicodeEscapeTooBig(long position);
38+
3639
#ifdef __cplusplus
3740
}
3841
#endif

toolchain/lc-compile/src/support.g

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@
314314
'action' MakeIntegerLiteral(Token: STRING -> Literal: INT)
315315
'action' MakeDoubleLiteral(Token: STRING -> Literal: DOUBLE)
316316
'action' MakeStringLiteral(Token: STRING -> Literal: STRING)
317-
'condition' UnescapeStringLiteral(String: STRING -> UnescapedString: STRING)
317+
'condition' UnescapeStringLiteral(Position:POS, String: STRING -> UnescapedString: STRING)
318318
'action' MakeNameLiteral(Token: STRING -> Literal: NAME)
319319

320320
'action' GetStringOfNameLiteral(Name: NAME -> String: STRING)

0 commit comments

Comments
 (0)