Skip to content

Commit 042aed4

Browse files
committed
Fix _tokenize CI failures and address code review
- Rename tokenize.rs to _tokenize.rs - Skip IndentationError when source contains tabs, as ruff and CPython handle tab indentation differently (CPython uses tabsize=8) - Raise SyntaxError for unclosed brackets before ENDMARKER - Remove expectedFailure from test_with_errored_file (now passes)
1 parent c931b6e commit 042aed4

File tree

3 files changed

+62
-15
lines changed

3 files changed

+62
-15
lines changed

Lib/test/test_tabnanny.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,6 @@ def validate_cmd(self, *args, stdout="", stderr="", partial=False, expect_failur
316316
self.assertListEqual(out.splitlines(), stdout.splitlines())
317317
self.assertListEqual(err.splitlines(), stderr.splitlines())
318318

319-
@unittest.expectedFailure # TODO: RUSTPYTHON; Should displays error when errored python file is given.
320319
def test_with_errored_file(self):
321320
"""Should displays error when errored python file is given."""
322321
with TemporaryPyFile(SOURCE_CODES["wrong_indented"]) as file_path:
Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -206,18 +206,23 @@ mod _tokenize {
206206
let kind = token.kind();
207207
let range = token.range();
208208

209-
// Check for lexical indentation errors
210-
for err in errors.iter() {
211-
if !matches!(
212-
err.error,
213-
ParseErrorType::Lexical(LexicalErrorType::IndentationError)
214-
) {
215-
continue;
216-
}
217-
if err.location.start() <= range.start()
218-
&& range.start() < err.location.end()
219-
{
220-
return Err(raise_indentation_error(vm, err, source, line_index));
209+
// Check for lexical indentation errors.
210+
// Skip when source has tabs — ruff and CPython handle tab
211+
// indentation differently (CPython uses tabsize=8), so ruff may
212+
// report false IndentationErrors for valid mixed-tab code.
213+
if !source.contains('\t') {
214+
for err in errors.iter() {
215+
if !matches!(
216+
err.error,
217+
ParseErrorType::Lexical(LexicalErrorType::IndentationError)
218+
) {
219+
continue;
220+
}
221+
if err.location.start() <= range.start()
222+
&& range.start() < err.location.end()
223+
{
224+
return Err(raise_indentation_error(vm, err, source, line_index));
225+
}
221226
}
222227
}
223228

@@ -348,6 +353,28 @@ mod _tokenize {
348353
}
349354
}
350355

356+
// Check for unclosed brackets before ENDMARKER — CPython's tokenizer
357+
// raises SyntaxError("EOF in multi-line statement") in this case.
358+
{
359+
let bracket_count: i32 = tokens
360+
.iter()
361+
.map(|t| match t.kind() {
362+
TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => 1,
363+
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => -1,
364+
_ => 0,
365+
})
366+
.sum();
367+
if bracket_count > 0 {
368+
let last_line = source.lines().count();
369+
return Err(raise_syntax_error(
370+
vm,
371+
"EOF in multi-line statement",
372+
last_line + 1,
373+
0,
374+
));
375+
}
376+
}
377+
351378
// All tokens consumed — emit ENDMARKER
352379
let last_line = source.lines().count();
353380
let (em_line, em_col, em_line_str): (usize, isize, &str) = if extra_tokens {
@@ -414,6 +441,26 @@ mod _tokenize {
414441
(default_pos, "")
415442
}
416443

444+
/// Raise a SyntaxError with the given message and position.
445+
fn raise_syntax_error(
446+
vm: &VirtualMachine,
447+
msg: &str,
448+
lineno: usize,
449+
offset: usize,
450+
) -> rustpython_vm::builtins::PyBaseExceptionRef {
451+
let exc = vm.new_exception_msg(
452+
vm.ctx.exceptions.syntax_error.to_owned(),
453+
msg.into(),
454+
);
455+
let obj = exc.as_object();
456+
let _ = obj.set_attr("msg", vm.ctx.new_str(msg), vm);
457+
let _ = obj.set_attr("lineno", vm.ctx.new_int(lineno), vm);
458+
let _ = obj.set_attr("offset", vm.ctx.new_int(offset), vm);
459+
let _ = obj.set_attr("filename", vm.ctx.new_str("<string>"), vm);
460+
let _ = obj.set_attr("text", vm.ctx.none(), vm);
461+
exc
462+
}
463+
417464
/// Raise an IndentationError from a parse error.
418465
fn raise_indentation_error(
419466
vm: &VirtualMachine,

crates/stdlib/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ mod pystruct;
4949
mod random;
5050
mod statistics;
5151
mod suggestions;
52-
mod tokenize;
52+
#[path = "_tokenize.rs"]
53+
mod _tokenize;
5354
// TODO: maybe make this an extension module, if we ever get those
5455
// mod re;
5556
#[cfg(all(feature = "host_env", not(target_arch = "wasm32")))]
@@ -226,7 +227,7 @@ pub fn stdlib_module_defs(ctx: &Context) -> Vec<&'static builtins::PyModuleDef>
226227
ssl::module_def(ctx),
227228
statistics::module_def(ctx),
228229
suggestions::module_def(ctx),
229-
tokenize::module_def(ctx),
230+
_tokenize::module_def(ctx),
230231
#[cfg(all(feature = "host_env", unix, not(target_os = "redox")))]
231232
syslog::module_def(ctx),
232233
#[cfg(all(

0 commit comments

Comments
 (0)