@@ -101,6 +101,47 @@ def parse_datetime(string, strip_module=False):
101101 return result .replace ("datetime." , "" , 1 )
102102 return result
103103
104+ INVALID_ESCAPE_RE = re .compile (
105+ r"""
106+ \\(?! # a backslash NOT followed by
107+ newline # the literal newline
108+ |[ # OR precisely one of
109+ \\ # another backslash
110+ ' # the single quote
111+ " # the double quote
112+ a # the ASCII bell
113+ b # the ASCII backspace
114+ f # the ASCII formfeed
115+ n # the ASCII linefeed
116+ r # the ASCII carriage return
117+ t # the ASCII horizontal tab
118+ v # the ASCII vertical tab
119+ ]| # OR
120+ o(?:[0-8]{1,3}) # an octal value
121+ | # OR
122+ x(?:[0-9A-Fa-f]{2}) # a hexidecimal value
123+ | # OR
124+ N # a unicode char name composed of
125+ \{ # an opening brace
126+ [A-Z][A-Z\ \-]*[A-Z] # uppercase WORD, WORDs (or WORD-WORDs)
127+ \} # and a closing brace
128+ | # OR
129+ u(?:[0-9A-Fa-f]{4}) # a 16-bit unicode char
130+ | # OR
131+ U(?:[0-9A-Fa-f]{8}) # a 32-bit unicode char
132+ )""" , flags = re .VERBOSE )
133+
134+ def escape_invalid_escapes (string ):
135+ """
136+ Some canonical data includes invalid escape sequences, which
137+ need to be properly escaped before template render.
138+ """
139+ return INVALID_ESCAPE_RE .sub (r"\\\\" , string )
140+
141+ ALL_VALID = r"\newline\\\'\"\a\b\f\n\r\t\v\o123" \
142+ r"\xFF\N{GREATER-THAN SIGN}\u0394\U00000394"
143+
144+ assert ALL_VALID == escape_invalid_escapes (ALL_VALID )
104145
105146def get_tested_properties (spec ):
106147 """
@@ -291,6 +332,7 @@ def generate(
291332 env .filters ["regex_split" ] = regex_split
292333 env .filters ["zip" ] = zip
293334 env .filters ["parse_datetime" ] = parse_datetime
335+ env .filters ["escape_invalid_escapes" ] = escape_invalid_escapes
294336 env .tests ["error_case" ] = error_case
295337 result = True
296338 for exercise in sorted (glob (os .path .join ("exercises" , exercise_glob ))):
0 commit comments