Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 92 additions & 20 deletions vm/src/dictdatatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::obj::objbool;
use crate::pyhash;
use crate::pyobject::{IdProtocol, PyObjectRef, PyResult};
use crate::vm::VirtualMachine;
use num_bigint::ToBigInt;
/// Ordered dictionary implementation.
/// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
Expand Down Expand Up @@ -93,7 +94,7 @@ impl<T: Clone> Dict<T> {
}
}

pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<bool> {
pub fn contains<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<bool> {
if let LookupResult::Existing(_) = self.lookup(vm, key)? {
Ok(true)
} else {
Expand All @@ -111,7 +112,7 @@ impl<T: Clone> Dict<T> {

/// Retrieve a key
#[cfg_attr(feature = "flame-it", flame("Dict"))]
pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
pub fn get<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
Ok(Some(self.unchecked_get(index)))
} else {
Expand Down Expand Up @@ -149,7 +150,7 @@ impl<T: Clone> Dict<T> {
key: &PyObjectRef,
value: T,
) -> PyResult<()> {
match self.lookup(vm, &key)? {
match self.lookup(vm, key)? {
LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index),
LookupResult::NewIndex {
hash_value,
Expand Down Expand Up @@ -199,8 +200,8 @@ impl<T: Clone> Dict<T> {

/// Lookup the index for the given key.
#[cfg_attr(feature = "flame-it", flame("Dict"))]
fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<LookupResult> {
let hash_value = collection_hash(vm, key)?;
fn lookup<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<LookupResult> {
let hash_value = key.do_hash(vm)?;
let perturb = hash_value;
let mut hash_index: HashIndex = hash_value;
loop {
Expand All @@ -209,11 +210,11 @@ impl<T: Clone> Dict<T> {
let index = self.indices[&hash_index];
if let Some(entry) = &self.entries[index] {
// Okay, we have an entry at this place
if entry.key.is(key) {
if key.do_is(&entry.key) {
// Literally the same object
break Ok(LookupResult::Existing(index));
} else if entry.hash == hash_value {
if do_eq(vm, &entry.key, key)? {
if key.do_eq(vm, &entry.key)? {
break Ok(LookupResult::Existing(index));
} else {
// entry mismatch.
Expand Down Expand Up @@ -242,7 +243,7 @@ impl<T: Clone> Dict<T> {
}

/// Retrieve and delete a key
pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
pub fn pop<K: DictKey>(&mut self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
let value = self.unchecked_get(index);
self.unchecked_delete(index);
Expand Down Expand Up @@ -273,23 +274,63 @@ enum LookupResult {
Existing(EntryIndex), // Existing record, index into entries
}

#[cfg_attr(feature = "flame-it", flame())]
fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult<HashValue> {
let raw_hash = vm._hash(object)?;
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
/// Types implementing this trait can be used to index
/// the dictionary. Typical usecases are:
/// - PyObjectRef -> arbitrary python type used as key
/// - str -> string reference used as key, this is often used internally
pub trait DictKey {
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue>;
fn do_is(&self, other: &PyObjectRef) -> bool;
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool>;
}

/// Invoke __eq__ on two keys
fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result<bool, PyObjectRef> {
let result = vm._eq(key1.clone(), key2.clone())?;
objbool::boolval(vm, result)
/// Implement trait for PyObjectRef such that we can use python objects
/// to index dictionaries.
impl DictKey for PyObjectRef {
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> {
let raw_hash = vm._hash(self)?;
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
}

fn do_is(&self, other: &PyObjectRef) -> bool {
self.is(other)
}

fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
let result = vm._eq(self.clone(), other_key.clone())?;
objbool::boolval(vm, result)
}
}

/// Implement trait for the str type, so that we can use strings
/// to index dictionaries.
impl DictKey for String {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
impl DictKey for String {
impl DictKey for str {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this, without success. I get this compilation error:

 --> vm/src/dictdatatype.rs:363:24
    |
363 |         let val = dict.get(&vm, "x").unwrap().unwrap();
    |                        ^^^ doesn't have a size known at compile-time

Copy link
Copy Markdown
Member

@coolreader18 coolreader18 Jul 28, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think it's because it's trying to make a double-fat pointer, with both a vtable and a slice length, and it doesn't like that.

fn do_hash(&self, _vm: &VirtualMachine) -> PyResult<HashValue> {
let raw_hash = pyhash::hash_value(self).to_bigint().unwrap();
let raw_hash = pyhash::hash_bigint(&raw_hash);
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
}

fn do_is(&self, _other: &PyObjectRef) -> bool {
// No matter who the other pyobject is, we are never the same thing, since
// we are a str, not a pyobject.
false
}

fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
// Fall back to PyString implementation.
let s = vm.new_str(self.to_string());
s.do_eq(vm, other_key)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like we could have a more efficient eq implementation here than reallocating the string.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely, this involves checking the specific python type to be of PyStringRef, and then doing a string compare. I suspect this happens only in a couple of cases, after the first hash compare is a success. I will implement this though, since I think it is a good idea.

}
}

#[cfg(test)]
mod tests {
use super::{Dict, VirtualMachine};
use super::{Dict, DictKey, VirtualMachine};

#[test]
fn test_insert() {
Expand All @@ -313,9 +354,40 @@ mod tests {
dict.delete(&vm, &key1).unwrap();
assert_eq!(1, dict.len());

dict.insert(&vm, &key1, value2).unwrap();
dict.insert(&vm, &key1, value2.clone()).unwrap();
assert_eq!(2, dict.len());

assert_eq!(true, dict.contains(&vm, &key1).unwrap());
assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap());

let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap();
vm._eq(val, value2)
.expect("retrieved value must be equal to inserted value.");
}

macro_rules! hash_tests {
($($name:ident: $example_hash:expr,)*) => {
$(
#[test]
fn $name() {
check_hash_equivalence($example_hash);
}
)*
}
}

hash_tests! {
test_abc: "abc",
test_x: "x",
}

fn check_hash_equivalence(text: &str) {
let vm: VirtualMachine = Default::default();
let value1 = text.to_string();
let value2 = vm.new_str(value1.clone());

let hash1 = value1.do_hash(&vm).expect("Hash should not fail.");
let hash2 = value2.do_hash(&vm).expect("Hash should not fail.");
assert_eq!(hash1, hash2);
}
}
5 changes: 1 addition & 4 deletions vm/src/obj/objint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,7 @@ impl PyInt {

#[pymethod(name = "__hash__")]
pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
match self.value.to_i64() {
Some(value) => (value % pyhash::MODULUS as i64),
None => (&self.value % pyhash::MODULUS).to_i64().unwrap(),
}
pyhash::hash_bigint(&self.value)
}

#[pymethod(name = "__abs__")]
Expand Down
9 changes: 9 additions & 0 deletions vm/src/pyhash.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use num_bigint::BigInt;
use num_traits::ToPrimitive;
use std::hash::{Hash, Hasher};

use crate::obj::objfloat;
Expand Down Expand Up @@ -81,3 +83,10 @@ pub fn hash_iter<'a, I: std::iter::Iterator<Item = &'a PyObjectRef>>(
}
Ok(hasher.finish() as PyHash)
}

pub fn hash_bigint(value: &BigInt) -> PyHash {
match value.to_i64() {
Some(i64_value) => (i64_value % MODULUS as i64),
None => (value % MODULUS).to_i64().unwrap(),
}
}