-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Implement dictionary indexing by trait. #1187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ use crate::obj::objbool; | |
| use crate::pyhash; | ||
| use crate::pyobject::{IdProtocol, PyObjectRef, PyResult}; | ||
| use crate::vm::VirtualMachine; | ||
| use num_bigint::ToBigInt; | ||
| /// Ordered dictionary implementation. | ||
| /// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html | ||
| /// And: https://www.youtube.com/watch?v=p33CVV29OG8 | ||
|
|
@@ -93,7 +94,7 @@ impl<T: Clone> Dict<T> { | |
| } | ||
| } | ||
|
|
||
| pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<bool> { | ||
| pub fn contains<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<bool> { | ||
| if let LookupResult::Existing(_) = self.lookup(vm, key)? { | ||
| Ok(true) | ||
| } else { | ||
|
|
@@ -111,7 +112,7 @@ impl<T: Clone> Dict<T> { | |
|
|
||
| /// Retrieve a key | ||
| #[cfg_attr(feature = "flame-it", flame("Dict"))] | ||
| pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> { | ||
| pub fn get<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> { | ||
| if let LookupResult::Existing(index) = self.lookup(vm, key)? { | ||
| Ok(Some(self.unchecked_get(index))) | ||
| } else { | ||
|
|
@@ -149,7 +150,7 @@ impl<T: Clone> Dict<T> { | |
| key: &PyObjectRef, | ||
| value: T, | ||
| ) -> PyResult<()> { | ||
| match self.lookup(vm, &key)? { | ||
| match self.lookup(vm, key)? { | ||
| LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index), | ||
| LookupResult::NewIndex { | ||
| hash_value, | ||
|
|
@@ -199,8 +200,8 @@ impl<T: Clone> Dict<T> { | |
|
|
||
| /// Lookup the index for the given key. | ||
| #[cfg_attr(feature = "flame-it", flame("Dict"))] | ||
| fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<LookupResult> { | ||
| let hash_value = collection_hash(vm, key)?; | ||
| fn lookup<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<LookupResult> { | ||
| let hash_value = key.do_hash(vm)?; | ||
| let perturb = hash_value; | ||
| let mut hash_index: HashIndex = hash_value; | ||
| loop { | ||
|
|
@@ -209,11 +210,11 @@ impl<T: Clone> Dict<T> { | |
| let index = self.indices[&hash_index]; | ||
| if let Some(entry) = &self.entries[index] { | ||
| // Okay, we have an entry at this place | ||
| if entry.key.is(key) { | ||
| if key.do_is(&entry.key) { | ||
| // Literally the same object | ||
| break Ok(LookupResult::Existing(index)); | ||
| } else if entry.hash == hash_value { | ||
| if do_eq(vm, &entry.key, key)? { | ||
| if key.do_eq(vm, &entry.key)? { | ||
| break Ok(LookupResult::Existing(index)); | ||
| } else { | ||
| // entry mismatch. | ||
|
|
@@ -242,7 +243,7 @@ impl<T: Clone> Dict<T> { | |
| } | ||
|
|
||
| /// Retrieve and delete a key | ||
| pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> { | ||
| pub fn pop<K: DictKey>(&mut self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> { | ||
| if let LookupResult::Existing(index) = self.lookup(vm, key)? { | ||
| let value = self.unchecked_get(index); | ||
| self.unchecked_delete(index); | ||
|
|
@@ -273,23 +274,63 @@ enum LookupResult { | |
| Existing(EntryIndex), // Existing record, index into entries | ||
| } | ||
|
|
||
| #[cfg_attr(feature = "flame-it", flame())] | ||
| fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult<HashValue> { | ||
| let raw_hash = vm._hash(object)?; | ||
| let mut hasher = DefaultHasher::new(); | ||
| raw_hash.hash(&mut hasher); | ||
| Ok(hasher.finish() as HashValue) | ||
| /// Types implementing this trait can be used to index | ||
| /// the dictionary. Typical usecases are: | ||
| /// - PyObjectRef -> arbitrary python type used as key | ||
| /// - str -> string reference used as key, this is often used internally | ||
| pub trait DictKey { | ||
| fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue>; | ||
| fn do_is(&self, other: &PyObjectRef) -> bool; | ||
| fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool>; | ||
| } | ||
|
|
||
| /// Invoke __eq__ on two keys | ||
| fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result<bool, PyObjectRef> { | ||
| let result = vm._eq(key1.clone(), key2.clone())?; | ||
| objbool::boolval(vm, result) | ||
| /// Implement trait for PyObjectRef such that we can use python objects | ||
| /// to index dictionaries. | ||
| impl DictKey for PyObjectRef { | ||
| fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> { | ||
| let raw_hash = vm._hash(self)?; | ||
| let mut hasher = DefaultHasher::new(); | ||
| raw_hash.hash(&mut hasher); | ||
| Ok(hasher.finish() as HashValue) | ||
| } | ||
|
|
||
| fn do_is(&self, other: &PyObjectRef) -> bool { | ||
| self.is(other) | ||
| } | ||
|
|
||
| fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> { | ||
| let result = vm._eq(self.clone(), other_key.clone())?; | ||
| objbool::boolval(vm, result) | ||
| } | ||
| } | ||
|
|
||
| /// Implement trait for the str type, so that we can use strings | ||
| /// to index dictionaries. | ||
| impl DictKey for String { | ||
| fn do_hash(&self, _vm: &VirtualMachine) -> PyResult<HashValue> { | ||
| let raw_hash = pyhash::hash_value(self).to_bigint().unwrap(); | ||
| let raw_hash = pyhash::hash_bigint(&raw_hash); | ||
| let mut hasher = DefaultHasher::new(); | ||
| raw_hash.hash(&mut hasher); | ||
| Ok(hasher.finish() as HashValue) | ||
| } | ||
|
|
||
| fn do_is(&self, _other: &PyObjectRef) -> bool { | ||
| // No matter who the other pyobject is, we are never the same thing, since | ||
| // we are a str, not a pyobject. | ||
| false | ||
| } | ||
|
|
||
| fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> { | ||
| // Fall back to PyString implementation. | ||
| let s = vm.new_str(self.to_string()); | ||
| s.do_eq(vm, other_key) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like we could have a more efficient eq implementation here than reallocating the string.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absolutely, this involves checking the specific python type to be of |
||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::{Dict, VirtualMachine}; | ||
| use super::{Dict, DictKey, VirtualMachine}; | ||
|
|
||
| #[test] | ||
| fn test_insert() { | ||
|
|
@@ -313,9 +354,40 @@ mod tests { | |
| dict.delete(&vm, &key1).unwrap(); | ||
| assert_eq!(1, dict.len()); | ||
|
|
||
| dict.insert(&vm, &key1, value2).unwrap(); | ||
| dict.insert(&vm, &key1, value2.clone()).unwrap(); | ||
| assert_eq!(2, dict.len()); | ||
|
|
||
| assert_eq!(true, dict.contains(&vm, &key1).unwrap()); | ||
| assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap()); | ||
|
|
||
| let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap(); | ||
| vm._eq(val, value2) | ||
| .expect("retrieved value must be equal to inserted value."); | ||
| } | ||
|
|
||
| macro_rules! hash_tests { | ||
| ($($name:ident: $example_hash:expr,)*) => { | ||
| $( | ||
| #[test] | ||
| fn $name() { | ||
| check_hash_equivalence($example_hash); | ||
| } | ||
| )* | ||
| } | ||
| } | ||
|
|
||
| hash_tests! { | ||
| test_abc: "abc", | ||
| test_x: "x", | ||
| } | ||
|
|
||
| fn check_hash_equivalence(text: &str) { | ||
| let vm: VirtualMachine = Default::default(); | ||
| let value1 = text.to_string(); | ||
| let value2 = vm.new_str(value1.clone()); | ||
|
|
||
| let hash1 = value1.do_hash(&vm).expect("Hash should not fail."); | ||
| let hash2 = value2.do_hash(&vm).expect("Hash should not fail."); | ||
| assert_eq!(hash1, hash2); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried this, without success. I get this compilation error:
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I think it's because it's trying to make a double-fat pointer, with both a vtable and a slice length, and it doesn't like that.