Skip to content

Commit 86e1a0b

Browse files
committed
oem codec
1 parent 9c3b789 commit 86e1a0b

File tree

1 file changed

+198
-0
lines changed

1 file changed

+198
-0
lines changed

crates/vm/src/stdlib/codecs.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,204 @@ mod _codecs {
421421
fn mbcs_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
422422
delegate_pycodecs!(mbcs_decode, args, vm)
423423
}
424+
425+
#[cfg(windows)]
426+
#[derive(FromArgs)]
427+
struct OemEncodeArgs {
428+
#[pyarg(positional)]
429+
s: PyStrRef,
430+
#[pyarg(positional, optional)]
431+
errors: Option<PyStrRef>,
432+
}
433+
434+
#[cfg(windows)]
435+
#[pyfunction]
436+
fn oem_encode(args: OemEncodeArgs, vm: &VirtualMachine) -> PyResult<(Vec<u8>, usize)> {
437+
use std::os::windows::ffi::OsStrExt;
438+
use windows_sys::Win32::Globalization::{
439+
CP_OEMCP, WC_NO_BEST_FIT_CHARS, WideCharToMultiByte,
440+
};
441+
442+
let errors = args.errors.as_ref().map(|s| s.as_str()).unwrap_or("strict");
443+
let s = match args.s.to_str() {
444+
Some(s) => s,
445+
None => {
446+
// String contains surrogates - not encodable with oem
447+
return Err(vm.new_unicode_encode_error(
448+
"'oem' codec can't encode character: surrogates not allowed".to_string(),
449+
));
450+
}
451+
};
452+
let char_len = args.s.char_len();
453+
454+
if s.is_empty() {
455+
return Ok((Vec::new(), char_len));
456+
}
457+
458+
// Convert UTF-8 string to UTF-16
459+
let wide: Vec<u16> = std::ffi::OsStr::new(s).encode_wide().collect();
460+
461+
// Get the required buffer size
462+
let size = unsafe {
463+
WideCharToMultiByte(
464+
CP_OEMCP,
465+
WC_NO_BEST_FIT_CHARS,
466+
wide.as_ptr(),
467+
wide.len() as i32,
468+
std::ptr::null_mut(),
469+
0,
470+
std::ptr::null(),
471+
std::ptr::null_mut(),
472+
)
473+
};
474+
475+
if size == 0 {
476+
let err = std::io::Error::last_os_error();
477+
return Err(vm.new_os_error(format!("oem_encode failed: {}", err)));
478+
}
479+
480+
let mut buffer = vec![0u8; size as usize];
481+
let mut used_default_char: i32 = 0;
482+
483+
let result = unsafe {
484+
WideCharToMultiByte(
485+
CP_OEMCP,
486+
WC_NO_BEST_FIT_CHARS,
487+
wide.as_ptr(),
488+
wide.len() as i32,
489+
buffer.as_mut_ptr().cast(),
490+
size,
491+
std::ptr::null(),
492+
if errors == "strict" {
493+
&mut used_default_char
494+
} else {
495+
std::ptr::null_mut()
496+
},
497+
)
498+
};
499+
500+
if result == 0 {
501+
let err = std::io::Error::last_os_error();
502+
return Err(vm.new_os_error(format!("oem_encode failed: {err}")));
503+
}
504+
505+
if errors == "strict" && used_default_char != 0 {
506+
return Err(vm.new_unicode_encode_error(
507+
"'oem' codec can't encode characters: invalid character",
508+
));
509+
}
510+
511+
buffer.truncate(result as usize);
512+
Ok((buffer, char_len))
513+
}
514+
515+
#[cfg(not(windows))]
516+
#[pyfunction]
517+
fn oem_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
518+
delegate_pycodecs!(oem_encode, args, vm)
519+
}
520+
521+
#[cfg(windows)]
522+
#[derive(FromArgs)]
523+
struct OemDecodeArgs {
524+
#[pyarg(positional)]
525+
data: ArgBytesLike,
526+
#[pyarg(positional, optional)]
527+
errors: Option<PyStrRef>,
528+
#[pyarg(positional, default = false)]
529+
#[allow(dead_code)]
530+
r#final: bool,
531+
}
532+
533+
#[cfg(windows)]
534+
#[pyfunction]
535+
fn oem_decode(args: OemDecodeArgs, vm: &VirtualMachine) -> PyResult<(String, usize)> {
536+
use windows_sys::Win32::Globalization::{
537+
CP_OEMCP, MB_ERR_INVALID_CHARS, MultiByteToWideChar,
538+
};
539+
540+
let _errors = args.errors.as_ref().map(|s| s.as_str()).unwrap_or("strict");
541+
let data = args.data.borrow_buf();
542+
let len = data.len();
543+
544+
if data.is_empty() {
545+
return Ok((String::new(), 0));
546+
}
547+
548+
// Get the required buffer size for UTF-16
549+
let size = unsafe {
550+
MultiByteToWideChar(
551+
CP_OEMCP,
552+
MB_ERR_INVALID_CHARS,
553+
data.as_ptr().cast(),
554+
len as i32,
555+
std::ptr::null_mut(),
556+
0,
557+
)
558+
};
559+
560+
if size == 0 {
561+
// Try without MB_ERR_INVALID_CHARS for non-strict mode
562+
let size = unsafe {
563+
MultiByteToWideChar(
564+
CP_OEMCP,
565+
0,
566+
data.as_ptr().cast(),
567+
len as i32,
568+
std::ptr::null_mut(),
569+
0,
570+
)
571+
};
572+
if size == 0 {
573+
let err = std::io::Error::last_os_error();
574+
return Err(vm.new_os_error(format!("oem_decode failed: {}", err)));
575+
}
576+
}
577+
578+
let size = unsafe {
579+
MultiByteToWideChar(
580+
CP_OEMCP,
581+
0, // Use 0 flags for replacement behavior
582+
data.as_ptr().cast(),
583+
len as i32,
584+
std::ptr::null_mut(),
585+
0,
586+
)
587+
};
588+
589+
let mut buffer = vec![0u16; size as usize];
590+
591+
let result = unsafe {
592+
MultiByteToWideChar(
593+
CP_OEMCP,
594+
0,
595+
data.as_ptr().cast(),
596+
len as i32,
597+
buffer.as_mut_ptr(),
598+
size,
599+
)
600+
};
601+
602+
if result == 0 {
603+
let err = std::io::Error::last_os_error();
604+
return Err(vm.new_os_error(format!("oem_decode failed: {}", err)));
605+
}
606+
607+
buffer.truncate(result as usize);
608+
609+
// Convert UTF-16 to UTF-8 String
610+
let s = String::from_utf16(&buffer)
611+
.map_err(|e| vm.new_unicode_decode_error(format!("oem_decode failed: {}", e)))?;
612+
613+
Ok((s, len))
614+
}
615+
616+
#[cfg(not(windows))]
617+
#[pyfunction]
618+
fn oem_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
619+
delegate_pycodecs!(oem_decode, args, vm)
620+
}
621+
424622
#[pyfunction]
425623
fn readbuffer_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
426624
delegate_pycodecs!(readbuffer_encode, args, vm)

0 commit comments

Comments
 (0)