Skip to content

Commit faacd60

Browse files
committed
Simplify handling of encoding in lexer.py
1 parent ce13747 commit faacd60

1 file changed

Lines changed: 10 additions & 22 deletions

File tree

sqlparse/lexer.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# and to allow some customizations.
1414

1515
import re
16-
import sys
1716

1817
from sqlparse import tokens
1918
from sqlparse.keywords import SQL_REGEX
@@ -42,14 +41,6 @@ def __init__(self):
4241
new_state = (tdef[2],)
4342
self._tokens[state].append((rex, tdef[1], new_state))
4443

45-
def _decode(self, text):
46-
if not isinstance(text, text_type):
47-
try:
48-
text = text.decode(self.encoding)
49-
except UnicodeDecodeError:
50-
text = text.decode('unicode-escape')
51-
return text
52-
5344
def get_tokens(self, text):
5445
"""
5546
Return an iterable of (tokentype, value) pairs generated from
@@ -58,27 +49,24 @@ def get_tokens(self, text):
5849
5950
Also preprocess the text, i.e. expand tabs and strip it if
6051
wanted and applies registered filters.
61-
"""
62-
if isinstance(text, string_types):
63-
if sys.version_info[0] < 3 and isinstance(text, text_type):
64-
text = StringIO(text.encode('utf-8'))
65-
self.encoding = 'utf-8'
66-
else:
67-
text = StringIO(text)
68-
69-
return self.get_tokens_unprocessed(text)
7052
71-
def get_tokens_unprocessed(self, stream):
72-
"""
7353
Split ``text`` into (tokentype, text) pairs.
7454
7555
``stack`` is the inital stack (default: ``['root']``)
7656
"""
7757
statestack = ['root', ]
7858
statetokens = self._tokens['root']
7959

80-
text = stream.read()
81-
text = self._decode(text)
60+
if isinstance(text, string_types):
61+
text = StringIO(text)
62+
63+
text = text.read()
64+
if not isinstance(text, text_type):
65+
try:
66+
text = text.decode(self.encoding)
67+
except UnicodeDecodeError:
68+
text = text.decode('unicode-escape')
69+
8270
iterable = iter(range(len(text)))
8371

8472
for pos in iterable:

0 commit comments

Comments
 (0)