@@ -74,24 +74,33 @@ pub struct ByteInnerNewOptions {
7474 encoding : OptionalArg < PyStringRef > ,
7575}
7676
77+ //same algorithm as cpython
78+ pub fn normalize_encoding ( encoding : & str ) -> String {
79+ let mut res = String :: new ( ) ;
80+ let mut punct = false ;
81+
82+ for c in encoding. chars ( ) {
83+ if c. is_alphanumeric ( ) || c == '.' {
84+ if punct && !res. is_empty ( ) {
85+ res. push ( '_' )
86+ }
87+ res. push ( c. to_ascii_lowercase ( ) ) ;
88+ punct = false ;
89+ } else {
90+ punct = true ;
91+ }
92+ }
93+ res
94+ }
95+
7796impl ByteInnerNewOptions {
7897 pub fn get_value ( self , vm : & VirtualMachine ) -> PyResult < PyByteInner > {
7998 // First handle bytes(string, encoding[, errors])
8099 if let OptionalArg :: Present ( enc) = self . encoding {
81100 if let OptionalArg :: Present ( eval) = self . val_option {
82101 if let Ok ( input) = eval. downcast :: < PyString > ( ) {
83- let encoding = enc. as_str ( ) ;
84- if encoding. to_lowercase ( ) == "utf8" || encoding. to_lowercase ( ) == "utf-8"
85- // TODO: different encoding
86- {
87- return Ok ( PyByteInner {
88- elements : input. value . as_bytes ( ) . to_vec ( ) ,
89- } ) ;
90- } else {
91- return Err (
92- vm. new_value_error ( format ! ( "unknown encoding: {}" , encoding) ) , //should be lookup error
93- ) ;
94- }
102+ let inner = PyByteInner :: from_string ( & input. value , enc. as_str ( ) , vm) ?;
103+ return Ok ( inner) ;
95104 } else {
96105 return Err ( vm. new_type_error ( "encoding without a string argument" . to_string ( ) ) ) ;
97106 }
@@ -311,6 +320,20 @@ impl ByteInnerSplitlinesOptions {
311320}
312321
313322impl PyByteInner {
323+ pub fn from_string ( value : & str , encoding : & str , vm : & VirtualMachine ) -> PyResult < Self > {
324+ let normalized = normalize_encoding ( encoding) ;
325+ if normalized == "utf_8" || normalized == "utf8" || normalized == "u8" {
326+ Ok ( PyByteInner {
327+ elements : value. as_bytes ( ) . to_vec ( ) ,
328+ } )
329+ } else {
330+ // TODO: different encoding
331+ Err (
332+ vm. new_value_error ( format ! ( "unknown encoding: {}" , encoding) ) , // should be lookup error
333+ )
334+ }
335+ }
336+
314337 pub fn repr ( & self ) -> PyResult < String > {
315338 let mut res = String :: with_capacity ( self . elements . len ( ) ) ;
316339 for i in self . elements . iter ( ) {
0 commit comments