RustPython · coolreader18 · Jul 29, 2019 · Jul 28, 2019 · Jul 28, 2019 · coolreader18
diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs
@@ -2,6 +2,7 @@ use crate::obj::objbool;
 use crate::pyhash;
 use crate::pyobject::{IdProtocol, PyObjectRef, PyResult};
 use crate::vm::VirtualMachine;
+use num_bigint::ToBigInt;
 /// Ordered dictionary implementation.
 /// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
 /// And: https://www.youtube.com/watch?v=p33CVV29OG8
@@ -93,7 +94,7 @@ impl<T: Clone> Dict<T> {
         }
     }
 
-    pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<bool> {
+    pub fn contains<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<bool> {
         if let LookupResult::Existing(_) = self.lookup(vm, key)? {
             Ok(true)
         } else {
@@ -111,7 +112,7 @@ impl<T: Clone> Dict<T> {
 
     /// Retrieve a key
     #[cfg_attr(feature = "flame-it", flame("Dict"))]
-    pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
+    pub fn get<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
         if let LookupResult::Existing(index) = self.lookup(vm, key)? {
             Ok(Some(self.unchecked_get(index)))
         } else {
@@ -149,7 +150,7 @@ impl<T: Clone> Dict<T> {
         key: &PyObjectRef,
         value: T,
     ) -> PyResult<()> {
-        match self.lookup(vm, &key)? {
+        match self.lookup(vm, key)? {
             LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index),
             LookupResult::NewIndex {
                 hash_value,
@@ -199,8 +200,8 @@ impl<T: Clone> Dict<T> {
 
     /// Lookup the index for the given key.
     #[cfg_attr(feature = "flame-it", flame("Dict"))]
-    fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<LookupResult> {
-        let hash_value = collection_hash(vm, key)?;
+    fn lookup<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<LookupResult> {
+        let hash_value = key.do_hash(vm)?;
         let perturb = hash_value;
         let mut hash_index: HashIndex = hash_value;
         loop {
@@ -209,11 +210,11 @@ impl<T: Clone> Dict<T> {
                 let index = self.indices[&hash_index];
                 if let Some(entry) = &self.entries[index] {
                     // Okay, we have an entry at this place
-                    if entry.key.is(key) {
+                    if key.do_is(&entry.key) {
                         // Literally the same object
                         break Ok(LookupResult::Existing(index));
                     } else if entry.hash == hash_value {
-                        if do_eq(vm, &entry.key, key)? {
+                        if key.do_eq(vm, &entry.key)? {
                             break Ok(LookupResult::Existing(index));
                         } else {
                             // entry mismatch.
@@ -242,7 +243,7 @@ impl<T: Clone> Dict<T> {
     }
 
     /// Retrieve and delete a key
-    pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
+    pub fn pop<K: DictKey>(&mut self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
         if let LookupResult::Existing(index) = self.lookup(vm, key)? {
             let value = self.unchecked_get(index);
             self.unchecked_delete(index);
@@ -273,23 +274,63 @@ enum LookupResult {
     Existing(EntryIndex), // Existing record, index into entries
 }
 
-#[cfg_attr(feature = "flame-it", flame())]
-fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult<HashValue> {
-    let raw_hash = vm._hash(object)?;
-    let mut hasher = DefaultHasher::new();
-    raw_hash.hash(&mut hasher);
-    Ok(hasher.finish() as HashValue)
+/// Types implementing this trait can be used to index
+/// the dictionary. Typical usecases are:
+/// - PyObjectRef -> arbitrary python type used as key
+/// - str -> string reference used as key, this is often used internally
+pub trait DictKey {
+    fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue>;
+    fn do_is(&self, other: &PyObjectRef) -> bool;
+    fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool>;
 }
 
-/// Invoke __eq__ on two keys
-fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result<bool, PyObjectRef> {
-    let result = vm._eq(key1.clone(), key2.clone())?;
-    objbool::boolval(vm, result)
+/// Implement trait for PyObjectRef such that we can use python objects
+/// to index dictionaries.
+impl DictKey for PyObjectRef {
+    fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> {
+        let raw_hash = vm._hash(self)?;
+        let mut hasher = DefaultHasher::new();
+        raw_hash.hash(&mut hasher);
+        Ok(hasher.finish() as HashValue)
+    }
+
+    fn do_is(&self, other: &PyObjectRef) -> bool {
+        self.is(other)
+    }
+
+    fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
+        let result = vm._eq(self.clone(), other_key.clone())?;
+        objbool::boolval(vm, result)
+    }
+}
+
+/// Implement trait for the str type, so that we can use strings
+/// to index dictionaries.
+impl DictKey for String {
-impl DictKey for String {
+impl DictKey for str {
-impl DictKey for String {
+impl DictKey for str {
+    fn do_hash(&self, _vm: &VirtualMachine) -> PyResult<HashValue> {
+        let raw_hash = pyhash::hash_value(self).to_bigint().unwrap();
+        let raw_hash = pyhash::hash_bigint(&raw_hash);
+        let mut hasher = DefaultHasher::new();
+        raw_hash.hash(&mut hasher);
+        Ok(hasher.finish() as HashValue)
+    }
+
+    fn do_is(&self, _other: &PyObjectRef) -> bool {
+        // No matter who the other pyobject is, we are never the same thing, since
+        // we are a str, not a pyobject.
+        false
+    }
+
+    fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
+        // Fall back to PyString implementation.
+        let s = vm.new_str(self.to_string());
+        s.do_eq(vm, other_key)
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::{Dict, VirtualMachine};
+    use super::{Dict, DictKey, VirtualMachine};
 
     #[test]
     fn test_insert() {
@@ -313,9 +354,40 @@ mod tests {
         dict.delete(&vm, &key1).unwrap();
         assert_eq!(1, dict.len());
 
-        dict.insert(&vm, &key1, value2).unwrap();
+        dict.insert(&vm, &key1, value2.clone()).unwrap();
         assert_eq!(2, dict.len());
 
         assert_eq!(true, dict.contains(&vm, &key1).unwrap());
+        assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap());
+
+        let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap();
+        vm._eq(val, value2)
+            .expect("retrieved value must be equal to inserted value.");
+    }
+
+    macro_rules! hash_tests {
+        ($($name:ident: $example_hash:expr,)*) => {
+            $(
+                #[test]
+                fn $name() {
+                    check_hash_equivalence($example_hash);
+                }
+            )*
+        }
+    }
+
+    hash_tests! {
+        test_abc: "abc",
+        test_x: "x",
+    }
+
+    fn check_hash_equivalence(text: &str) {
+        let vm: VirtualMachine = Default::default();
+        let value1 = text.to_string();
+        let value2 = vm.new_str(value1.clone());
+
+        let hash1 = value1.do_hash(&vm).expect("Hash should not fail.");
+        let hash2 = value2.do_hash(&vm).expect("Hash should not fail.");
+        assert_eq!(hash1, hash2);
     }
 }
diff --git a/vm/src/obj/objint.rs b/vm/src/obj/objint.rs
@@ -418,10 +418,7 @@ impl PyInt {
 
     #[pymethod(name = "__hash__")]
     pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
-        match self.value.to_i64() {
-            Some(value) => (value % pyhash::MODULUS as i64),
-            None => (&self.value % pyhash::MODULUS).to_i64().unwrap(),
-        }
+        pyhash::hash_bigint(&self.value)
     }
 
     #[pymethod(name = "__abs__")]

diff --git a/vm/src/pyhash.rs b/vm/src/pyhash.rs
@@ -1,3 +1,5 @@
+use num_bigint::BigInt;
+use num_traits::ToPrimitive;
 use std::hash::{Hash, Hasher};
 
 use crate::obj::objfloat;
@@ -81,3 +83,10 @@ pub fn hash_iter<'a, I: std::iter::Iterator<Item = &'a PyObjectRef>>(
     }
     Ok(hasher.finish() as PyHash)
 }
+
+pub fn hash_bigint(value: &BigInt) -> PyHash {
+    match value.to_i64() {
+        Some(i64_value) => (i64_value % MODULUS as i64),
+        None => (value % MODULUS).to_i64().unwrap(),
+    }
+}