44
55// spell-checker:ignore codep decomp DECOMP nfkc unistr unidata
66
7- use core:: {
8- cmp:: Ordering ,
9- fmt:: { self , Display , Formatter } ,
10- hint:: cold_path,
11- } ;
7+ use core:: { cmp:: Ordering , hint:: cold_path} ;
128
139pub ( crate ) use unicodedata:: module_def;
1410
@@ -28,25 +24,6 @@ include!(concat!(
2824 "/generated/unicode_numeric_value.rs"
2925) ) ;
3026
31- #[ derive( Clone , Copy , Debug , PartialEq ) ]
32- struct UnicodeVersion {
33- pub major : u8 ,
34- pub minor : u8 ,
35- pub micro : u8 ,
36- }
37-
38- impl Display for UnicodeVersion {
39- fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
40- write ! ( f, "{}.{}.{}" , self . major, self . minor, self . micro)
41- }
42- }
43-
44- const UNICODE_VERSION : UnicodeVersion = UnicodeVersion {
45- major : char:: UNICODE_VERSION . 0 ,
46- minor : char:: UNICODE_VERSION . 1 ,
47- micro : char:: UNICODE_VERSION . 2 ,
48- } ;
49-
5027#[ derive( Clone , Copy ) ]
5128#[ repr( u8 ) ]
5229enum DecompositionType {
@@ -118,8 +95,8 @@ fn lookup_property<T: Copy>(table: &[(u32, u32, T)], ch: char) -> Option<T> {
11895 . map ( |i| table[ i] . 2 )
11996}
12097
121- fn lookup_numeric_val ( ch : char , version : UnicodeVersion ) -> Option < f64 > {
122- if version . major > 3 {
98+ fn lookup_numeric_val ( ch : char , modern : bool ) -> Option < f64 > {
99+ if modern {
123100 lookup_property ( NUMERIC_VALUES , ch)
124101 } else {
125102 cold_path ( ) ;
@@ -162,8 +139,8 @@ mod unicodedata {
162139
163140 use super :: {
164141 BIDI_CLASS , BIDI_MIRRORED , COMBINING_CLASS , DECOMP_COMPAT , DECOMP_RANGE , DECOMP_UPDATES ,
165- EAST_ASIAN_WIDTH , GENERAL_CATEGORY , NUMERIC_TYPE_DIFF , NormalizeForm , UNICODE_VERSION ,
166- UnicodeVersion , lookup_numeric_val , lookup_property,
142+ EAST_ASIAN_WIDTH , GENERAL_CATEGORY , NUMERIC_TYPE_DIFF , NormalizeForm , lookup_numeric_val ,
143+ lookup_property,
167144 } ;
168145 use crate :: vm:: {
169146 Py , PyObjectRef , PyPayload , PyRef , PyResult , VirtualMachine ,
@@ -186,7 +163,7 @@ mod unicodedata {
186163 __module_exec ( vm, module) ;
187164
188165 // Add UCD methods as module-level functions
189- let ucd: PyObjectRef = Ucd :: new ( UNICODE_VERSION ) . into_ref ( & vm. ctx ) . into ( ) ;
166+ let ucd: PyObjectRef = Ucd :: new ( true ) . into_ref ( & vm. ctx ) . into ( ) ;
190167
191168 for attr in [
192169 "category" ,
@@ -213,12 +190,12 @@ mod unicodedata {
213190 #[ pyclass( name = "UCD" ) ]
214191 #[ derive( Debug , PyPayload ) ]
215192 pub ( super ) struct Ucd {
216- unic_version : UnicodeVersion ,
193+ modern : bool ,
217194 }
218195
219196 impl Ucd {
220- pub ( super ) const fn new ( unic_version : UnicodeVersion ) -> Self {
221- Self { unic_version }
197+ pub ( super ) const fn new ( modern : bool ) -> Self {
198+ Self { modern }
222199 }
223200
224201 fn extract_char ( & self , character : PyStrRef , vm : & VirtualMachine ) -> PyResult < CodePoint > {
@@ -238,7 +215,7 @@ mod unicodedata {
238215 let Some ( c) = c. to_char ( ) else {
239216 return GeneralCategory :: Surrogate . short_name ( ) ;
240217 } ;
241- if self . unic_version . major > 3 {
218+ if self . modern {
242219 Some ( GeneralCategory :: for_char ( c) )
243220 } else {
244221 cold_path ( ) ;
@@ -291,7 +268,7 @@ mod unicodedata {
291268 self . extract_char ( character, vm) . map ( |c| {
292269 c. to_char ( )
293270 . and_then ( |c| {
294- if self . unic_version . major > 3 {
271+ if self . modern {
295272 Some ( BidiClass :: for_char ( c) )
296273 } else {
297274 cold_path ( ) ;
@@ -312,7 +289,7 @@ mod unicodedata {
312289 self . extract_char ( character, vm) . map ( |c| {
313290 c. to_char ( )
314291 . and_then ( |c| {
315- if self . unic_version . major > 3 {
292+ if self . modern {
316293 Some ( EastAsianWidth :: for_char ( c) )
317294 } else {
318295 cold_path ( ) ;
@@ -392,7 +369,7 @@ mod unicodedata {
392369 fn mirrored ( & self , character : PyStrRef , vm : & VirtualMachine ) -> PyResult < i32 > {
393370 self . extract_char ( character, vm) . map ( |c| {
394371 c. to_char ( ) . map_or ( 0 , |c| {
395- ( if self . unic_version . major > 3 {
372+ ( if self . modern {
396373 BidiMirrored :: for_char ( c)
397374 } else {
398375 cold_path ( ) ;
@@ -418,7 +395,7 @@ mod unicodedata {
418395 self . extract_char ( character, vm) . map ( |c| {
419396 c. to_char ( )
420397 . and_then ( |c| {
421- if self . unic_version . major > 3 {
398+ if self . modern {
422399 Some ( CanonicalCombiningClass :: for_char ( c) )
423400 } else {
424401 cold_path ( ) ;
@@ -442,7 +419,7 @@ mod unicodedata {
442419 // For 3.2.0, we use the original decomp for compatibility while ignoring the update.
443420 //
444421 // Finally, we don't have to do anything for the latest UCD as it's already updated.
445- if self . unic_version . major == 3
422+ if self . modern
446423 && let Some ( ( _, original) ) = DECOMP_UPDATES
447424 . iter ( )
448425 . find ( |& & ( codep, _original) | codep == ch as u32 )
@@ -485,7 +462,7 @@ mod unicodedata {
485462 fn numeric_type_matches ( & self , ch : CodePoint , expected : & [ NumericType ] ) -> Option < char > {
486463 let ch = ch. to_char ( ) ?;
487464
488- let actual = if self . unic_version . major > 3 {
465+ let actual = if self . modern {
489466 NumericType :: for_char ( ch)
490467 } else {
491468 cold_path ( ) ;
@@ -506,7 +483,7 @@ mod unicodedata {
506483 let expected = [ NumericType :: Decimal , NumericType :: Digit ] ;
507484 self . numeric_type_matches ( ch, & expected)
508485 . and_then ( |ch| {
509- let value = lookup_numeric_val ( ch, UNICODE_VERSION ) ?;
486+ let value = lookup_numeric_val ( ch, true ) ?;
510487 ( value. trunc ( ) == value) . then ( || vm. ctx . new_int ( value as u64 ) . into ( ) )
511488 } )
512489 . or_else ( || default. present ( ) )
@@ -525,7 +502,7 @@ mod unicodedata {
525502 let expected = [ NumericType :: Decimal ] ;
526503 self . numeric_type_matches ( ch, & expected)
527504 . and_then ( |ch| {
528- let value = lookup_numeric_val ( ch, self . unic_version ) ?;
505+ let value = lookup_numeric_val ( ch, self . modern ) ?;
529506 ( value. trunc ( ) == value) . then ( || vm. ctx . new_int ( value as u64 ) . into ( ) )
530507 } )
531508 . or_else ( || default. present ( ) )
@@ -544,34 +521,30 @@ mod unicodedata {
544521 let expected = & NumericType :: ALL_VALUES [ 1 ..] ;
545522 self . numeric_type_matches ( ch, expected)
546523 . and_then ( |ch| {
547- lookup_numeric_val ( ch, self . unic_version )
548- . map ( |value| vm. ctx . new_float ( value) . into ( ) )
524+ lookup_numeric_val ( ch, self . modern ) . map ( |value| vm. ctx . new_float ( value) . into ( ) )
549525 } )
550526 . or_else ( || default. present ( ) )
551527 . map ( Option :: Some )
552528 . ok_or_else ( || vm. new_value_error ( "not a numeric character" ) )
553529 }
554530
555531 #[ pygetset]
556- fn unidata_version ( & self ) -> String {
557- self . unic_version . to_string ( )
532+ const fn unidata_version ( & self ) -> & ' static str {
533+ if self . modern {
534+ env ! ( "RUST_UNICODE_VERSION" )
535+ } else {
536+ "3.2.0"
537+ }
558538 }
559539 }
560540
561541 #[ pyattr]
562542 fn ucd_3_2_0 ( vm : & VirtualMachine ) -> PyRef < Ucd > {
563- Ucd {
564- unic_version : UnicodeVersion {
565- major : 3 ,
566- minor : 2 ,
567- micro : 0 ,
568- } ,
569- }
570- . into_ref ( & vm. ctx )
543+ Ucd :: new ( false ) . into_ref ( & vm. ctx )
571544 }
572545
573546 #[ pyattr]
574- fn unidata_version ( _vm : & VirtualMachine ) -> String {
575- UNICODE_VERSION . to_string ( )
547+ const fn unidata_version ( _vm : & VirtualMachine ) -> & ' static str {
548+ env ! ( "RUST_UNICODE_VERSION" )
576549 }
577550}
0 commit comments