Skip to content

Commit 4ef242f

Browse files
committed
Fix syntax_non_utf8 test to not depend on locale encoding
Use explicit encoding='latin-1' so the test works regardless of the system locale (e.g. C/POSIX locale uses ASCII by default).
1 parent c7c0fe5 commit 4ef242f

File tree

6 files changed

+31
-25
lines changed

6 files changed

+31
-25
lines changed

.cspell.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@
147147
// "stat"
148148
"FIRMLINK",
149149
// CPython internal names
150+
"PYTHONUTF",
150151
"sysdict",
151152
"settraceallthreads",
152153
"setprofileallthreads"

crates/vm/src/stdlib/_winapi.rs

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -279,9 +279,9 @@ mod _winapi {
279279
}
280280
si_attr!(dwFlags);
281281
si_attr!(wShowWindow);
282-
si_attr!(hStdInput, usize);
283-
si_attr!(hStdOutput, usize);
284-
si_attr!(hStdError, usize);
282+
si_attr!(hStdInput, isize);
283+
si_attr!(hStdOutput, isize);
284+
si_attr!(hStdError, isize);
285285

286286
let mut env = args
287287
.env_mapping
@@ -1160,7 +1160,7 @@ mod _winapi {
11601160
initial_state: bool,
11611161
name: Option<PyStrRef>,
11621162
vm: &VirtualMachine,
1163-
) -> PyResult<Option<WinHandle>> {
1163+
) -> PyResult<WinHandle> {
11641164
use windows_sys::Win32::System::Threading::CreateEventW as WinCreateEventW;
11651165

11661166
let _ = security_attributes; // Ignored, always NULL
@@ -1177,15 +1177,11 @@ mod _winapi {
11771177
)
11781178
};
11791179

1180-
if handle == windows_sys::Win32::Foundation::INVALID_HANDLE_VALUE {
1181-
return Err(vm.new_last_os_error());
1182-
}
1183-
11841180
if handle.is_null() {
1185-
return Ok(None);
1181+
return Err(vm.new_last_os_error());
11861182
}
11871183

1188-
Ok(Some(WinHandle(handle)))
1184+
Ok(WinHandle(handle))
11891185
}
11901186

11911187
/// SetEvent - Set the specified event object to the signaled state.
@@ -1944,9 +1940,7 @@ mod _winapi {
19441940
} else {
19451941
hr as u32
19461942
};
1947-
return Err(
1948-
std::io::Error::from_raw_os_error(err as i32).to_pyexception(vm),
1949-
);
1943+
return Err(std::io::Error::from_raw_os_error(err as i32).to_pyexception(vm));
19501944
}
19511945
Ok(())
19521946
}
@@ -1967,6 +1961,7 @@ mod _winapi {
19671961
if err != 0 {
19681962
return Err(vm.new_os_error(err as i32));
19691963
}
1964+
scopeguard::defer! { unsafe { RegCloseKey(hkcr) }; }
19701965

19711966
let mut i: u32 = 0;
19721967
let mut entries: Vec<(String, String)> = Vec::new();
@@ -1993,7 +1988,6 @@ mod _winapi {
19931988
break;
19941989
}
19951990
if err != 0 && err != windows_sys::Win32::Foundation::ERROR_MORE_DATA {
1996-
unsafe { RegCloseKey(hkcr) };
19971991
return Err(vm.new_os_error(err as i32));
19981992
}
19991993

@@ -2013,7 +2007,6 @@ mod _winapi {
20132007
continue;
20142008
}
20152009
if err != 0 {
2016-
unsafe { RegCloseKey(hkcr) };
20172010
return Err(vm.new_os_error(err as i32));
20182011
}
20192012

@@ -2057,8 +2050,6 @@ mod _winapi {
20572050
}
20582051
}
20592052

2060-
unsafe { RegCloseKey(hkcr) };
2061-
20622053
// Process remaining entries
20632054
for (mime_type, ext) in entries {
20642055
on_type_read.call((vm.ctx.new_str(mime_type), vm.ctx.new_str(ext)), vm)?;

crates/vm/src/stdlib/sys.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,11 @@ mod sys {
15881588
hash_randomization: settings.hash_seed.is_none() as u8,
15891589
isolated: settings.isolated as u8,
15901590
dev_mode: settings.dev_mode,
1591-
utf8_mode: settings.utf8_mode,
1591+
utf8_mode: if settings.utf8_mode < 0 {
1592+
1
1593+
} else {
1594+
settings.utf8_mode as u8
1595+
},
15921596
int_max_str_digits: settings.int_max_str_digits,
15931597
safe_path: settings.safe_path,
15941598
warn_default_encoding: settings.warn_default_encoding as u8,

crates/vm/src/vm/setting.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ pub struct Settings {
124124
pub stdio_encoding: Option<String>,
125125
/// PYTHONIOENCODING - stdio error handler
126126
pub stdio_errors: Option<String>,
127-
pub utf8_mode: u8,
127+
pub utf8_mode: i8,
128128
/// --check-hash-based-pycs
129129
pub check_hash_pycs_mode: CheckHashPycsMode,
130130

@@ -211,7 +211,7 @@ impl Default for Settings {
211211
allow_external_library: cfg!(feature = "importlib"),
212212
stdio_encoding: None,
213213
stdio_errors: None,
214-
utf8_mode: 0,
214+
utf8_mode: -1,
215215
int_max_str_digits: 4300,
216216
#[cfg(feature = "flame-it")]
217217
profile_output: None,

extra_tests/snippets/syntax_non_utf8.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55
dir_path = os.path.dirname(os.path.realpath(__file__))
66

77
with assert_raises(SyntaxError):
8-
with open(os.path.join(dir_path , "non_utf8.txt")) as f:
8+
with open(os.path.join(dir_path , "non_utf8.txt"), encoding="latin-1") as f:
99
eval(f.read())

src/settings.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,10 +260,13 @@ pub fn parse_opts() -> Result<(Settings, RunMode), lexopt::Error> {
260260

261261
settings.check_hash_pycs_mode = args.check_hash_based_pycs;
262262

263-
if let Some(val) = get_env("PYTHONUTF8") {
264-
settings.utf8_mode = match val.to_str() {
265-
Some("1") | Some("") => 1,
266-
Some("0") => 0,
263+
if let Some(val) = get_env("PYTHONUTF8")
264+
&& let Some(val_str) = val.to_str()
265+
&& !val_str.is_empty()
266+
{
267+
settings.utf8_mode = match val_str {
268+
"1" => 1,
269+
"0" => 0,
267270
_ => {
268271
error!(
269272
"Fatal Python error: config_init_utf8_mode: \
@@ -335,6 +338,13 @@ pub fn parse_opts() -> Result<(Settings, RunMode), lexopt::Error> {
335338
});
336339
settings.xoptions.extend(xopts);
337340

341+
// Resolve utf8_mode if not explicitly set by PYTHONUTF8 or -X utf8.
342+
// Default to UTF-8 mode since RustPython's locale encoding detection
343+
// is incomplete. Users can set PYTHONUTF8=0 or -X utf8=0 to disable.
344+
if settings.utf8_mode < 0 {
345+
settings.utf8_mode = 1;
346+
}
347+
338348
settings.warn_default_encoding =
339349
settings.warn_default_encoding || env_bool("PYTHONWARNDEFAULTENCODING");
340350
settings.faulthandler = settings.faulthandler || env_bool("PYTHONFAULTHANDLER");

0 commit comments

Comments
 (0)