Skip to content

Commit 4097c18

Browse files
committed
drop unicode support in keywords
1 parent a12f0ea commit 4097c18

2 files changed

Lines changed: 20 additions & 78 deletions

File tree

src/main/javacc/CSS3Parser.jj

Lines changed: 7 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -193,66 +193,6 @@ TOKEN_MGR_DECLS :
193193
// url ([!#$%&*-~]|{nonascii}|{escape})*
194194
| < #URL: ( ["!","#","$","%","&","*"-"[","]"-"~"] | <NONASCII> | <ESCAPE> )* >
195195

196-
// A a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?
197-
| < #A_LETTER: "a" | "\\" ("0")? ("0")? ("0")? ("0")? ( "41" | "61" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? >
198-
199-
// C c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?
200-
| < #C_LETTER: "c" | "\\" ("0")? ("0")? ("0")? ("0")? ( "43" | "63" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? >
201-
202-
// D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?
203-
| < #D_LETTER: "d" | "\\" ("0")? ("0")? ("0")? ("0")? ( "44" | "64" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? >
204-
205-
// E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?
206-
| < #E_LETTER: "e" | "\\" ("0")? ("0")? ("0")? ("0")? ( "45" | "65" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? >
207-
208-
// F f|\\0{0,4}(46|66)(\r\n|[ \t\r\n\f])?
209-
| < #F_LETTER: "f" | "\\" ("0")? ("0")? ("0")? ("0")? ( "46" | "66" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? >
210-
211-
// G g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g
212-
| < #G_LETTER: "g" | "\\" ("0")? ("0")? ("0")? ("0")? ( "47" | "67" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "g" >
213-
214-
// H h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h
215-
| < #H_LETTER: "h" | "\\" ("0")? ("0")? ("0")? ("0")? ( "48" | "68" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "h" >
216-
217-
// I i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i
218-
| < #I_LETTER: "i" | "\\" ("0")? ("0")? ("0")? ("0")? ( "49" | "69" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "i" >
219-
220-
// K k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k
221-
| < #K_LETTER: "k" | "\\" ("0")? ("0")? ("0")? ("0")? ( "4b" | "6b" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "k" >
222-
223-
// L l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l
224-
| < #L_LETTER: "l" | "\\" ("0")? ("0")? ("0")? ("0")? ( "4c" | "6c" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "l" >
225-
226-
// M m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m
227-
| < #M_LETTER: "m" | "\\" ("0")? ("0")? ("0")? ("0")? ( "4d" | "6d" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "m" >
228-
229-
// N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
230-
| < #N_LETTER: "n" | "\\" ("0")? ("0")? ("0")? ("0")? ( "4e" | "6e" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "n" >
231-
232-
// O o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o
233-
| < #O_LETTER: "o" | "\\" ("0")? ("0")? ("0")? ("0")? ( "4f" | "6f" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "o" >
234-
235-
// P p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p
236-
| < #P_LETTER: "p" | "\\" ("0")? ("0")? ("0")? ("0")? ( "50" | "70" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "p" >
237-
238-
// R r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r
239-
| < #R_LETTER: "r" | "\\" ("0")? ("0")? ("0")? ("0")? ( "52" | "72" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "r" >
240-
241-
// S s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s
242-
| < #S_LETTER: "s" | "\\" ("0")? ("0")? ("0")? ("0")? ( "53" | "73" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "s" >
243-
244-
// T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
245-
| < #T_LETTER: "t" | "\\" ("0")? ("0")? ("0")? ("0")? ( "54" | "74" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "t" >
246-
247-
// U u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\v
248-
| < #U_LETTER: "u" | "\\" ("0")? ("0")? ("0")? ("0")? ( "55" | "75" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "u" >
249-
250-
// X x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x
251-
| < #X_LETTER: "x" | "\\" ("0")? ("0")? ("0")? ("0")? ( "58" | "78" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "x" >
252-
253-
// Z z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z
254-
| < #Z_LETTER: "z" | "\\" ("0")? ("0")? ("0")? ("0")? ( "5a" | "7a" ) ( "\r\n" | [ " ", "\t", "\r", "\n", "\f"])? | "\\" "z" >
255-
256196
// "<!--" {return CDO;}
257197
| < CDO: "<!--" >
258198

@@ -307,25 +247,25 @@ TOKEN_MGR_DECLS :
307247
| < HASH: "#" <NAME> >
308248

309249
// @{I}{M}{P}{O}{R}{T} {return IMPORT_SYM;}
310-
| < IMPORT_SYM: "@" <I_LETTER> <M_LETTER> <P_LETTER> <O_LETTER> <R_LETTER> <T_LETTER>>
250+
| < IMPORT_SYM: "@import" >
311251

312252
// @{P}{A}{G}{E} {return PAGE_SYM;}
313-
| < PAGE_SYM: "@" <P_LETTER> <A_LETTER> <G_LETTER> <E_LETTER> >
253+
| < PAGE_SYM: "@page" >
314254

315255
// @{M}{E}{D}{I}{A} {return MEDIA_SYM;}
316-
| < MEDIA_SYM: "@" <M_LETTER> <E_LETTER> <D_LETTER> <I_LETTER> <A_LETTER> >
256+
| < MEDIA_SYM: "@media" >
317257

318258
// "@{F}{O}{N}{T}-{F}{A}{C}{E}" {return FONT_FACE_SYM;}
319-
| < FONT_FACE_SYM: "@" <F_LETTER> <O_LETTER> <N_LETTER> <T_LETTER> < MINUS > <F_LETTER> <A_LETTER> <C_LETTER> <E_LETTER> >
259+
| < FONT_FACE_SYM: "@font-face" >
320260

321261
// @{C}{H}{A}{R}{S}{E}{T} {return CHARSET_SYM;}
322-
| < CHARSET_SYM: "@" <C_LETTER> <H_LETTER> <A_LETTER> <R_LETTER> <S_LETTER> <E_LETTER> <T_LETTER> >
262+
| < CHARSET_SYM: "@charset" >
323263

324264
// "!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T} {return IMPORTANT_SYM;}
325-
| < IMPORTANT_SYM: "!" ( <S> | <COMMENT_> )* <I_LETTER> <M_LETTER> <P_LETTER> <O_LETTER> <R_LETTER> <T_LETTER> <A_LETTER> <N_LETTER> <T_LETTER> >
265+
| < IMPORTANT_SYM: "!" ( <S> | <COMMENT_> )* "important" >
326266

327267
// {num}{E}{M} {return EMS;}
328-
| < EMS: <NUM> <E_LETTER> <M_LETTER> > { matchedToken.image = ParserUtils.trimBy(image, 0, 2); }
268+
| < EMS: <NUM> "em" > { matchedToken.image = ParserUtils.trimBy(image, 0, 2); }
329269

330270
// {num}{R}{E}{M} {return REM;}
331271
| < REM: <NUM> "rem" > { matchedToken.image = ParserUtils.trimBy(image, 0, 3); }

src/test/java/org/htmlunit/cssparser/parser/CSS3ParserTest.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3727,25 +3727,27 @@ public void backslashZeroHack() throws Exception {
37273727
*/
37283728
@Test
37293729
public void unicode() throws Exception {
3730-
unicode("@p\\41ge :right {}", "@page :right { }");
3731-
unicode("@p\\041ge :right {}", "@page :right { }");
3732-
unicode("@p\\0041ge :right {}", "@page :right { }");
3733-
unicode("@p\\00041ge :right {}", "@page :right { }");
3734-
unicode("@p\\000041ge :right {}", "@page :right { }");
3730+
// no longer supported starting from version 5
3731+
// this is really uncommon and makes the parser much simpler
3732+
// unicode("@p\\41ge :right {}", "@page :right { }");
3733+
// unicode("@p\\041ge :right {}", "@page :right { }");
3734+
// unicode("@p\\0041ge :right {}", "@page :right { }");
3735+
// unicode("@p\\00041ge :right {}", "@page :right { }");
3736+
// unicode("@p\\000041ge :right {}", "@page :right { }");
37353737

37363738
// \\0000041 - fails
3737-
unicode("@p\\0000041ge :right {}", "@p\\0000041ge :right {}");
3739+
// unicode("@p\\0000041ge :right {}", "@p\\0000041ge :right {}");
37383740

37393741
// terminated by whitespace
3740-
unicode("@\\0070 age :right {}", "@page :right { }");
3741-
unicode("@\\0070\tage :right {}", "@page :right { }");
3742-
unicode("@\\0070\r\nage :right {}", "@page :right { }");
3742+
// unicode("@\\0070 age :right {}", "@page :right { }");
3743+
// unicode("@\\0070\tage :right {}", "@page :right { }");
3744+
// unicode("@\\0070\r\nage :right {}", "@page :right { }");
37433745

37443746
// terminated by lenght
3745-
unicode("@\\000070age :right {}", "@page :right { }");
3747+
// unicode("@\\000070age :right {}", "@page :right { }");
37463748

37473749
// backslash ignored
3748-
unicode("@\\page :right {}", "@page :right { }");
3750+
// unicode("@\\page :right {}", "@page :right { }");
37493751
}
37503752

37513753
/**

0 commit comments

Comments
 (0)