11use num_bigint:: BigInt ;
22use num_complex:: Complex64 ;
33use num_traits:: ToPrimitive ;
4- use std:: collections:: hash_map:: DefaultHasher ;
5- use std:: hash:: { Hash , Hasher } ;
4+ use siphasher:: sip:: SipHasher24 ;
5+ use std:: convert:: TryInto ;
6+ use std:: hash:: { BuildHasher , Hash , Hasher } ;
67use std:: num:: Wrapping ;
78
89pub type PyHash = i64 ;
@@ -16,24 +17,75 @@ pub const MODULUS: PyUHash = (1 << BITS) - 1;
1617pub const INF : PyHash = 314_159 ;
1718pub const NAN : PyHash = 0 ;
1819pub const IMAG : PyHash = MULTIPLIER ;
19- pub const ALGO : & str = "siphasher13 " ;
20+ pub const ALGO : & str = "siphash24 " ;
2021pub const HASH_BITS : usize = std:: mem:: size_of :: < PyHash > ( ) * 8 ;
21- // internally DefaultHasher uses 2 u64s as the seed, but
22- // that's not guaranteed to be consistent across Rust releases
23- // TODO: use something like the siphasher crate as our hash algorithm
24- pub const SEED_BITS : usize = std:: mem:: size_of :: < PyHash > ( ) * 2 * 8 ;
22+ // SipHasher24 takes 2 u64s as a seed
23+ pub const SEED_BITS : usize = std:: mem:: size_of :: < u64 > ( ) * 2 * 8 ;
2524
2625// pub const CUTOFF: usize = 7;
2726
28- #[ inline]
29- pub fn mod_int ( value : i64 ) -> PyHash {
30- value % MODULUS as i64
27+ pub struct HashSecret {
28+ k0 : u64 ,
29+ k1 : u64 ,
30+ }
31+
32+ impl BuildHasher for HashSecret {
33+ type Hasher = SipHasher24 ;
34+ fn build_hasher ( & self ) -> Self :: Hasher {
35+ SipHasher24 :: new_with_keys ( self . k0 , self . k1 )
36+ }
37+ }
38+
39+ impl rand:: distributions:: Distribution < HashSecret > for rand:: distributions:: Standard {
40+ fn sample < R : rand:: Rng + ?Sized > ( & self , rng : & mut R ) -> HashSecret {
41+ HashSecret {
42+ k0 : rng. gen ( ) ,
43+ k1 : rng. gen ( ) ,
44+ }
45+ }
3146}
3247
33- pub fn hash_value < T : Hash + ?Sized > ( data : & T ) -> PyHash {
34- let mut hasher = DefaultHasher :: new ( ) ;
35- data. hash ( & mut hasher) ;
36- mod_int ( hasher. finish ( ) as PyHash )
48+ impl HashSecret {
49+ pub fn new ( seed : u32 ) -> Self {
50+ let mut buf = [ 0u8 ; 16 ] ;
51+ lcg_urandom ( seed, & mut buf) ;
52+ let k0 = u64:: from_le_bytes ( buf[ ..8 ] . try_into ( ) . unwrap ( ) ) ;
53+ let k1 = u64:: from_le_bytes ( buf[ 8 ..] . try_into ( ) . unwrap ( ) ) ;
54+ Self { k0, k1 }
55+ }
56+ }
57+
58+ impl HashSecret {
59+ pub fn hash_value < T : Hash + ?Sized > ( & self , data : & T ) -> PyHash {
60+ let mut hasher = self . build_hasher ( ) ;
61+ data. hash ( & mut hasher) ;
62+ mod_int ( hasher. finish ( ) as PyHash )
63+ }
64+
65+ pub fn hash_iter < ' a , T : ' a , I , F , E > ( & self , iter : I , hashf : F ) -> Result < PyHash , E >
66+ where
67+ I : IntoIterator < Item = & ' a T > ,
68+ F : Fn ( & ' a T ) -> Result < PyHash , E > ,
69+ {
70+ let mut hasher = self . build_hasher ( ) ;
71+ for element in iter {
72+ let item_hash = hashf ( element) ?;
73+ item_hash. hash ( & mut hasher) ;
74+ }
75+ Ok ( mod_int ( hasher. finish ( ) as PyHash ) )
76+ }
77+
78+ pub fn hash_bytes ( & self , value : & [ u8 ] ) -> PyHash {
79+ if value. is_empty ( ) {
80+ 0
81+ } else {
82+ self . hash_value ( value)
83+ }
84+ }
85+
86+ pub fn hash_str ( & self , value : & str ) -> PyHash {
87+ self . hash_bytes ( value. as_bytes ( ) )
88+ }
3789}
3890
3991pub fn hash_float ( value : f64 ) -> PyHash {
@@ -84,21 +136,8 @@ pub fn hash_float(value: f64) -> PyHash {
84136pub fn hash_complex ( value : & Complex64 ) -> PyHash {
85137 let re_hash = hash_float ( value. re ) ;
86138 let im_hash = hash_float ( value. im ) ;
87- let ret = Wrapping ( re_hash) + Wrapping ( im_hash) * Wrapping ( IMAG ) ;
88- ret. 0
89- }
90-
91- pub fn hash_iter < ' a , T : ' a , I , F , E > ( iter : I , hashf : F ) -> Result < PyHash , E >
92- where
93- I : IntoIterator < Item = & ' a T > ,
94- F : Fn ( & ' a T ) -> Result < PyHash , E > ,
95- {
96- let mut hasher = DefaultHasher :: new ( ) ;
97- for element in iter {
98- let item_hash = hashf ( element) ?;
99- item_hash. hash ( & mut hasher) ;
100- }
101- Ok ( mod_int ( hasher. finish ( ) as PyHash ) )
139+ let Wrapping ( ret) = Wrapping ( re_hash) + Wrapping ( im_hash) * Wrapping ( IMAG ) ;
140+ ret
102141}
103142
104143pub fn hash_iter_unordered < ' a , T : ' a , I , F , E > ( iter : I , hashf : F ) -> Result < PyHash , E >
@@ -126,6 +165,15 @@ pub fn hash_bigint(value: &BigInt) -> PyHash {
126165 )
127166}
128167
129- pub fn hash_str ( value : & str ) -> PyHash {
130- hash_value ( value. as_bytes ( ) )
168+ #[ inline]
169+ pub fn mod_int ( value : i64 ) -> PyHash {
170+ value % MODULUS as i64
171+ }
172+
173+ pub fn lcg_urandom ( mut x : u32 , buf : & mut [ u8 ] ) {
174+ for b in buf {
175+ x *= 214013 ;
176+ x += 2531011 ;
177+ * b = ( ( x >> 16 ) & 0xff ) as u8 ;
178+ }
131179}
0 commit comments