Skip to content

Commit d1643ac

Browse files
committed
Fix syntax_non_utf8 test to not depend on locale encoding
Use explicit encoding='latin-1' so the test works regardless of the system locale (e.g. C/POSIX locale uses ASCII by default).
1 parent 6418578 commit d1643ac

File tree

6 files changed

+13
-7
lines changed

6 files changed

+13
-7
lines changed

.cspell.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@
147147
// "stat"
148148
"FIRMLINK",
149149
// CPython internal names
150+
"PYTHONUTF",
150151
"sysdict",
151152
"settraceallthreads",
152153
"setprofileallthreads"

crates/vm/src/stdlib/_winapi.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,9 +1944,7 @@ mod _winapi {
19441944
} else {
19451945
hr as u32
19461946
};
1947-
return Err(
1948-
std::io::Error::from_raw_os_error(err as i32).to_pyexception(vm),
1949-
);
1947+
return Err(std::io::Error::from_raw_os_error(err as i32).to_pyexception(vm));
19501948
}
19511949
Ok(())
19521950
}

crates/vm/src/stdlib/sys.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,7 @@ mod sys {
15881588
hash_randomization: settings.hash_seed.is_none() as u8,
15891589
isolated: settings.isolated as u8,
15901590
dev_mode: settings.dev_mode,
1591-
utf8_mode: settings.utf8_mode,
1591+
utf8_mode: settings.utf8_mode as u8,
15921592
int_max_str_digits: settings.int_max_str_digits,
15931593
safe_path: settings.safe_path,
15941594
warn_default_encoding: settings.warn_default_encoding as u8,

crates/vm/src/vm/setting.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ pub struct Settings {
124124
pub stdio_encoding: Option<String>,
125125
/// PYTHONIOENCODING - stdio error handler
126126
pub stdio_errors: Option<String>,
127-
pub utf8_mode: u8,
127+
pub utf8_mode: i8,
128128
/// --check-hash-based-pycs
129129
pub check_hash_pycs_mode: CheckHashPycsMode,
130130

@@ -211,7 +211,7 @@ impl Default for Settings {
211211
allow_external_library: cfg!(feature = "importlib"),
212212
stdio_encoding: None,
213213
stdio_errors: None,
214-
utf8_mode: 0,
214+
utf8_mode: -1,
215215
int_max_str_digits: 4300,
216216
#[cfg(feature = "flame-it")]
217217
profile_output: None,

extra_tests/snippets/syntax_non_utf8.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55
dir_path = os.path.dirname(os.path.realpath(__file__))
66

77
with assert_raises(SyntaxError):
8-
with open(os.path.join(dir_path , "non_utf8.txt")) as f:
8+
with open(os.path.join(dir_path , "non_utf8.txt"), encoding="latin-1") as f:
99
eval(f.read())

src/settings.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,13 @@ pub fn parse_opts() -> Result<(Settings, RunMode), lexopt::Error> {
335335
});
336336
settings.xoptions.extend(xopts);
337337

338+
// Resolve utf8_mode if not explicitly set by PYTHONUTF8 or -X utf8.
339+
// Default to UTF-8 mode since RustPython's locale encoding detection
340+
// is incomplete. Users can set PYTHONUTF8=0 or -X utf8=0 to disable.
341+
if settings.utf8_mode < 0 {
342+
settings.utf8_mode = 1;
343+
}
344+
338345
settings.warn_default_encoding =
339346
settings.warn_default_encoding || env_bool("PYTHONWARNDEFAULTENCODING");
340347
settings.faulthandler = settings.faulthandler || env_bool("PYTHONFAULTHANDLER");

0 commit comments

Comments
 (0)