-
Notifications
You must be signed in to change notification settings - Fork 128
Expand file tree
/
Copy pathutil.py
More file actions
137 lines (101 loc) · 3.2 KB
/
util.py
File metadata and controls
137 lines (101 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from rpython.rlib.rarithmetic import r_uint, LONG_BIT, intmask, LONG_MASK
from rpython.rlib.runicode import str_decode_utf_8, unicode_encode_utf_8
from pixie.vm.object import affirm
seed = 0
C1 = r_uint(0xcc9e2d51)
C2 = r_uint(0x1b873593)
def rotr(value, shift):
return (value >> shift) | (value << (LONG_BIT - shift))
def rotl(value, shift):
return (value << shift) | (value >> (LONG_BIT - shift))
def hash_int(input):
if input == 0:
return r_uint(0)
k1 = mix_k1(input)
h1 = mix_h1(seed, k1)
return fmix(h1, 4)
def mix_k1(k1):
k1 *= C1
k1 = rotl(k1, 15)
k1 *= C2
return k1
def mix_h1(h1, k1):
h1 ^= k1
h1 = rotl(h1, 13)
h1 = h1 * 5 + r_uint(0xe6546b64)
return h1
def hash_unencoded_chars(u):
#assert isinstance(u, unicode)
h1 = seed
# step through the CharSequence 2 chars at a time
for i in range(1, len(u), 2):
k1 = r_uint(ord(u[i-1]) | ord(u[i]))
k1 = mix_k1(k1)
h1 = mix_h1(h1, k1)
# deal with any remaining characters
if (len(u) & 1) == 1:
k1 = r_uint(ord(u[(len(u) - 1)]))
k1 = mix_k1(k1)
h1 ^= k1
return fmix(h1, 2 * len(u))
def fmix(h1, length):
h1 ^= length
h1 ^= h1 >> 16
h1 *= r_uint(0x85ebca6b)
h1 ^= h1 >> 13
h1 *= r_uint(0xc2b2ae35)
h1 ^= h1 >> 16
return h1
def mix_coll_hash(hash, count):
h1 = seed
k1 = mix_k1(hash)
h1 = mix_h1(h1, k1)
return fmix(h1, count)
from pixie.vm.object import Object, Type
import pixie.vm.code as code
from pixie.vm.code import as_var
import pixie.vm.rt as rt
import pixie.vm.numbers as numbers
class HashingState(Object):
_type = Type(u"pixie.stdlib.HashingState")
def type(self):
return HashingState._type
def __init__(self):
self._n = r_uint(0)
self._hash = r_uint(1)
def update_hash_ordered(self, itm):
self._n += 1
self._hash = 31 * self._hash + rt.hash(itm)
return self
def update_hash_unordered(self, itm):
self._n += 1
self._hash += rt.hash(itm)
return self
def finish(self):
return rt.wrap(intmask(mix_coll_hash(self._hash, self._n)))
@as_var("new-hash-state")
def new_hash_state():
return HashingState()
@as_var("update-hash-ordered!")
def update_hash_ordered(acc, val):
affirm(isinstance(acc, HashingState), u"Expected HashingState as first argument")
return acc.update_hash_ordered(val)
@as_var("update-hash-unordered!")
def update_hash_ordered(acc, val):
affirm(isinstance(acc, HashingState), u"Expected HashingState as first argument")
return acc.update_hash_unordered(val)
@as_var("finish-hash-state")
def finish_hash_state(acc):
affirm(isinstance(acc, HashingState), u"Expected HashingState as first argument")
return acc.finish()
@as_var("hash-int")
def _hash_int(acc):
return rt.wrap(intmask(hash_int(acc.r_uint_val())))
# unicode utils
def unicode_from_utf8(s):
"""Converts a `str` value to a `unicode` value assuming it's encoded in UTF8."""
res, _ = str_decode_utf_8(s, len(s), 'strict')
return res
def unicode_to_utf8(s):
"""Converts a `unicode` value to a UTF8 encoded `str` value."""
return unicode_encode_utf_8(s, len(s), 'strict')