Skip to content

Commit 0b3847d

Browse files
author
Christian Heimes
committed
Issue python#15096: Drop support for the ur string prefix
1 parent 10c8791 commit 0b3847d

6 files changed

Lines changed: 28 additions & 37 deletions

File tree

Doc/reference/lexical_analysis.rst

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ String literals are described by the following lexical definitions:
401401

402402
.. productionlist::
403403
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
404-
stringprefix: "r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR"
404+
stringprefix: "r" | "u" | "R" | "U"
405405
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
406406
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
407407
shortstringitem: `shortstringchar` | `stringescapeseq`
@@ -444,19 +444,21 @@ must be expressed with escapes.
444444
As of Python 3.3 it is possible again to prefix unicode strings with a
445445
``u`` prefix to simplify maintenance of dual 2.x and 3.x codebases.
446446

447-
Both string and bytes literals may optionally be prefixed with a letter ``'r'``
447+
Bytes literals may optionally be prefixed with a letter ``'r'``
448448
or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as
449449
literal characters. As a result, in string literals, ``'\U'`` and ``'\u'``
450-
escapes in raw strings are not treated specially.
450+
escapes in raw strings are not treated specially. Given that Python 2.x's raw
451+
unicode literals behave differently than Python 3.x's the ``'ur'`` syntax
452+
is not supported.
451453

452454
.. versionadded:: 3.3
453455
The ``'rb'`` prefix of raw bytes literals has been added as a synonym
454456
of ``'br'``.
455457

456458
.. versionadded:: 3.3
457-
Support for the unicode legacy literal (``u'value'``) and other
458-
versions were reintroduced to simplify the maintenance of dual
459-
Python 2.x and 3.x codebases. See :pep:`414` for more information.
459+
Support for the unicode legacy literal (``u'value'``) was reintroduced
460+
to simplify the maintenance of dual Python 2.x and 3.x codebases.
461+
See :pep:`414` for more information.
460462

461463
In triple-quoted strings, unescaped newlines and quotes are allowed (and are
462464
retained), except that three unescaped quotes in a row terminate the string. (A

Lib/test/test_strlit.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,15 @@ def test_eval_bytes_raw(self):
123123
self.assertRaises(SyntaxError, eval, """ rrb'' """)
124124
self.assertRaises(SyntaxError, eval, """ rbb'' """)
125125

126+
def test_eval_str_u(self):
127+
self.assertEqual(eval(""" u'x' """), 'x')
128+
self.assertEqual(eval(""" U'\u00e4' """), 'ä')
129+
self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
130+
self.assertRaises(SyntaxError, eval, """ ur'' """)
131+
self.assertRaises(SyntaxError, eval, """ ru'' """)
132+
self.assertRaises(SyntaxError, eval, """ bu'' """)
133+
self.assertRaises(SyntaxError, eval, """ ub'' """)
134+
126135
def check_encoding(self, encoding, extra=""):
127136
modname = "xx_" + encoding.replace("-", "_")
128137
fn = os.path.join(self.tmpdir, modname + ".py")

Lib/test/test_tokenize.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -299,24 +299,6 @@
299299
STRING 'u"abc"' (1, 0) (1, 6)
300300
OP '+' (1, 7) (1, 8)
301301
STRING 'U"abc"' (1, 9) (1, 15)
302-
>>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'")
303-
ENCODING 'utf-8' (0, 0) (0, 0)
304-
STRING "ur'abc'" (1, 0) (1, 7)
305-
OP '+' (1, 8) (1, 9)
306-
STRING "uR'abc'" (1, 10) (1, 17)
307-
OP '+' (1, 18) (1, 19)
308-
STRING "Ur'abc'" (1, 20) (1, 27)
309-
OP '+' (1, 28) (1, 29)
310-
STRING "UR'abc'" (1, 30) (1, 37)
311-
>>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"')
312-
ENCODING 'utf-8' (0, 0) (0, 0)
313-
STRING 'ur"abc"' (1, 0) (1, 7)
314-
OP '+' (1, 8) (1, 9)
315-
STRING 'uR"abc"' (1, 10) (1, 17)
316-
OP '+' (1, 18) (1, 19)
317-
STRING 'Ur"abc"' (1, 20) (1, 27)
318-
OP '+' (1, 28) (1, 29)
319-
STRING 'UR"abc"' (1, 30) (1, 37)
320302
321303
>>> dump_tokens("b'abc' + B'abc'")
322304
ENCODING 'utf-8' (0, 0) (0, 0)
@@ -642,15 +624,15 @@
642624
643625
Legacy unicode literals:
644626
645-
>>> dump_tokens("Örter = u'places'\\ngrün = UR'green'")
627+
>>> dump_tokens("Örter = u'places'\\ngrün = U'green'")
646628
ENCODING 'utf-8' (0, 0) (0, 0)
647629
NAME 'Örter' (1, 0) (1, 5)
648630
OP '=' (1, 6) (1, 7)
649631
STRING "u'places'" (1, 8) (1, 17)
650632
NEWLINE '\\n' (1, 17) (1, 18)
651633
NAME 'grün' (2, 0) (2, 4)
652634
OP '=' (2, 5) (2, 6)
653-
STRING "UR'green'" (2, 7) (2, 16)
635+
STRING "U'green'" (2, 7) (2, 15)
654636
"""
655637

656638
from test import support

Lib/tokenize.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def maybe(*choices): return group(*choices) + '?'
127127
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
128128
Number = group(Imagnumber, Floatnumber, Intnumber)
129129

130-
StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?'
130+
StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
131131

132132
# Tail end of ' string.
133133
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
@@ -183,12 +183,8 @@ def _compile(expr):
183183
"rB'''": Single3, 'rB"""': Double3,
184184
"RB'''": Single3, 'RB"""': Double3,
185185
"u'''": Single3, 'u"""': Double3,
186-
"ur'''": Single3, 'ur"""': Double3,
187186
"R'''": Single3, 'R"""': Double3,
188187
"U'''": Single3, 'U"""': Double3,
189-
"uR'''": Single3, 'uR"""': Double3,
190-
"Ur'''": Single3, 'Ur"""': Double3,
191-
"UR'''": Single3, 'UR"""': Double3,
192188
'r': None, 'R': None, 'b': None, 'B': None,
193189
'u': None, 'U': None}
194190

@@ -201,8 +197,7 @@ def _compile(expr):
201197
"rb'''", 'rb"""', "rB'''", 'rB"""',
202198
"Rb'''", 'Rb"""', "RB'''", 'RB"""',
203199
"u'''", 'u"""', "U'''", 'U"""',
204-
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
205-
"uR'''", 'uR"""', "UR'''", 'UR"""'):
200+
):
206201
triple_quoted[t] = t
207202
single_quoted = {}
208203
for t in ("'", '"',
@@ -213,8 +208,7 @@ def _compile(expr):
213208
"rb'", 'rb"', "rB'", 'rB"',
214209
"Rb'", 'Rb"', "RB'", 'RB"' ,
215210
"u'", 'u"', "U'", 'U"',
216-
"ur'", 'ur"', "Ur'", 'Ur"',
217-
"uR'", 'uR"', "UR'", 'UR"' ):
211+
):
218212
single_quoted[t] = t
219213

220214
tabsize = 8

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.3.0 Beta 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #15096: Removed support for ur'' as the raw notation isn't
14+
compatible with Python 2.x's raw unicode strings.
15+
1316
- Issue #13783: Generator objects now use the identifier APIs internally
1417

1518
- Issue #14874: Restore charmap decoding speed to pre-PEP 393 levels.

Parser/tokenizer.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,7 +1412,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
14121412
/* Identifier (most frequent token!) */
14131413
nonascii = 0;
14141414
if (is_potential_identifier_start(c)) {
1415-
/* Process b"", r"", u"", br"", rb"" and ur"" */
1415+
/* Process b"", r"", u"", br"" and rb"" */
14161416
int saw_b = 0, saw_r = 0, saw_u = 0;
14171417
while (1) {
14181418
if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
@@ -1421,7 +1421,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
14211421
want to support it in arbitrary order like byte literals. */
14221422
else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
14231423
saw_u = 1;
1424-
else if (!saw_r && (c == 'r' || c == 'R'))
1424+
/* ur"" and ru"" are not supported */
1425+
else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
14251426
saw_r = 1;
14261427
else
14271428
break;

0 commit comments

Comments
 (0)