Skip to content

Commit 7f2560c

Browse files
committed
Add str.encode for utf-8
1 parent ad357d0 commit 7f2560c

4 files changed

Lines changed: 62 additions & 11 deletions

File tree

tests/snippets/strings.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,13 @@ def try_mutate_str():
206206
word[0] = 'x'
207207

208208
assert_raises(TypeError, try_mutate_str)
209+
210+
ss = ['Hello', '안녕', '👋']
211+
bs = [b'Hello', b'\xec\x95\x88\xeb\x85\x95', b'\xf0\x9f\x91\x8b']
212+
213+
for s, b in zip(ss, bs):
214+
assert s.encode() == b
215+
216+
for s, b, e in zip(ss, bs, ['u8', 'U8', 'utf-8', 'UTF-8', 'utf_8']):
217+
assert s.encode(e) == b
218+
# assert s.encode(encoding=e) == b

vm/src/function.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,17 @@ impl<T> OptionalArg<T> {
374374
Missing => f(),
375375
}
376376
}
377+
378+
pub fn map_or_else<U, D, F>(self, default: D, f: F) -> U
379+
where
380+
D: FnOnce() -> U,
381+
F: FnOnce(T) -> U,
382+
{
383+
match self {
384+
Present(value) => f(value),
385+
Missing => default(),
386+
}
387+
}
377388
}
378389

379390
impl<T> FromArgs for OptionalArg<T>

vm/src/obj/objbyteinner.rs

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,28 @@ pub fn normalize_encoding(encoding: &str) -> String {
9393
res
9494
}
9595

96+
pub fn encode_to_vec(value: &str, encoding: &str, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
97+
let encoding = normalize_encoding(encoding);
98+
if encoding == "utf_8" || encoding == "u8" {
99+
Ok(value.as_bytes().to_vec())
100+
} else {
101+
// TODO: different encoding
102+
return Err(
103+
vm.new_value_error(format!("unknown encoding: {}", encoding)), //should be lookup error
104+
);
105+
}
106+
}
107+
96108
impl ByteInnerNewOptions {
97109
pub fn get_value(self, vm: &VirtualMachine) -> PyResult<PyByteInner> {
98110
// First handle bytes(string, encoding[, errors])
99111
if let OptionalArg::Present(enc) = self.encoding {
100112
if let OptionalArg::Present(eval) = self.val_option {
101113
if let Ok(input) = eval.downcast::<PyString>() {
102114
let encoding = enc.as_str();
103-
if encoding.to_lowercase() == "utf8" || encoding.to_lowercase() == "utf-8"
104-
// TODO: different encoding
105-
{
106-
return Ok(PyByteInner {
107-
elements: input.value.as_bytes().to_vec(),
108-
});
109-
} else {
110-
return Err(
111-
vm.new_value_error(format!("unknown encoding: {}", encoding)), //should be lookup error
112-
);
113-
}
115+
return Ok(PyByteInner {
116+
elements: encode_to_vec(&input.value, &encoding, vm)?,
117+
});
114118
} else {
115119
return Err(vm.new_type_error("encoding without a string argument".to_string()));
116120
}

vm/src/obj/objstr.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::pyobject::{
1919
};
2020
use crate::vm::VirtualMachine;
2121

22+
use super::objbyteinner;
2223
use super::objdict::PyDict;
2324
use super::objint::{self, PyInt};
2425
use super::objnone::PyNone;
@@ -957,6 +958,31 @@ impl PyString {
957958
}
958959
}
959960
}
961+
962+
#[pymethod]
963+
fn encode(
964+
&self,
965+
encoding: OptionalArg<PyObjectRef>,
966+
_errors: OptionalArg<PyObjectRef>,
967+
vm: &VirtualMachine,
968+
) -> PyResult {
969+
let encoding = encoding.map_or_else(
970+
|| Ok("utf-8".to_string()),
971+
|v| {
972+
if objtype::isinstance(&v, &vm.ctx.str_type()) {
973+
Ok(get_value(&v))
974+
} else {
975+
Err(vm.new_type_error(format!(
976+
"encode() argument 1 must be str, not {}",
977+
v.class().name
978+
)))
979+
}
980+
},
981+
)?;
982+
983+
let encoded = objbyteinner::encode_to_vec(&self.value, &encoding, vm)?;
984+
Ok(vm.ctx.new_bytes(encoded))
985+
}
960986
}
961987

962988
impl PyValue for PyString {

0 commit comments

Comments
 (0)