Skip to content

Commit fe35829

Browse files
author
Tom De Smedt
committed
pattern.search bug fixes
notably: do not cache patterns that use a taxonomy, which are mutable dicts.
1 parent 078e46d commit fe35829

1 file changed

Lines changed: 19 additions & 13 deletions

File tree

pattern/text/search.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def push(self, kv):
205205
""" Adds a new item from the given (key, value)-tuple.
206206
If the key exists, pushes the updated item to the head of the dict.
207207
"""
208-
if k in self:
208+
if kv[0] in self:
209209
self.__delitem__(kv[0])
210210
self.__setitem__(kv[0], kv[1])
211211
append = push
@@ -699,7 +699,7 @@ def _ungroup(sequence, groups=None):
699699
# - Pattern.greedy(chunk, constraint) determines (True/False) if a chunk is a match.
700700
self.strict = kwargs.get("strict", STRICT in args and not GREEDY in args)
701701
self.greedy = kwargs.get("greedy", lambda chunk, constraint: True)
702-
702+
703703
def __iter__(self):
704704
return iter(self.sequence)
705705
def __len__(self):
@@ -781,11 +781,13 @@ def scan(self, string):
781781
def search(self, sentence):
782782
""" Returns a list of all matches found in the given sentence.
783783
"""
784-
if isinstance(sentence, list) or sentence.__class__.__name__ == "Text":
784+
if sentence.__class__.__name__ == "Sentence":
785+
pass
786+
elif isinstance(sentence, list) or sentence.__class__.__name__ == "Text":
785787
a=[]; [a.extend(self.search(s)) for s in sentence]; return a
786-
if isinstance(sentence, basestring):
788+
elif isinstance(sentence, basestring):
787789
sentence = Sentence(sentence)
788-
if isinstance(sentence, Match) and len(sentence) > 0:
790+
elif isinstance(sentence, Match) and len(sentence) > 0:
789791
sentence = sentence[0].sentence.slice(sentence[0].index, sentence[-1].index + 1)
790792
a = []
791793
v = self._variations()
@@ -799,11 +801,13 @@ def search(self, sentence):
799801
def match(self, sentence, start=0, _v=None, _u=None):
800802
""" Returns the first match found in the given sentence, or None.
801803
"""
802-
if isinstance(sentence, list) or sentence.__class__.__name__ == "Text":
804+
if sentence.__class__.__name__ == "Sentence":
805+
pass
806+
elif isinstance(sentence, list) or sentence.__class__.__name__ == "Text":
803807
return find(lambda m,s: m is not None, ((self.match(s, start, _v), s) for s in sentence))[0]
804-
if isinstance(sentence, basestring):
808+
elif isinstance(sentence, basestring):
805809
sentence = Sentence(sentence)
806-
if isinstance(sentence, Match) and len(sentence) > 0:
810+
elif isinstance(sentence, Match) and len(sentence) > 0:
807811
sentence = sentence[0].sentence.slice(sentence[0].index, sentence[-1].index + 1)
808812
# Variations (_v) further down the list may match words more to the front.
809813
# We need to check all of them. Unmatched variations are blacklisted (_u).
@@ -841,9 +845,9 @@ def _match(self, sequence, sentence, start=0, i=0, w0=None, map=None, d=0):
841845

842846
if map is None:
843847
map = {}
844-
848+
845849
n = len(sequence)
846-
850+
847851
# --- MATCH ----------
848852
if i == n:
849853
if w0 is not None:
@@ -915,19 +919,21 @@ def string(self):
915919
_CACHE_SIZE = 100 # Number of dynamic Pattern objects to keep in cache.
916920
def compile(pattern, *args, **kwargs):
917921
""" Returns a Pattern from the given string or regular expression.
918-
Recently compiled patterns are kept in cache.
922+
Recently compiled patterns are kept in cache
923+
(if they do not use taxonomies, which are mutable dicts).
919924
"""
920925
id, p = repr(pattern) + repr(args), pattern
921-
if id in _cache:
926+
if id in _cache and not kwargs:
922927
return _cache[id]
923928
if isinstance(pattern, basestring):
924929
p = Pattern.fromstring(pattern, *args, **kwargs)
925930
if isinstance(pattern, regexp):
926931
p = Pattern([Constraint(words=[pattern], taxonomy=kwargs.get("taxonomy", TAXONOMY))], *args, **kwargs)
927932
if len(_cache) > _CACHE_SIZE:
928933
_cache.clear()
929-
if isinstance(p, Pattern):
934+
if isinstance(p, Pattern) and not kwargs:
930935
_cache[id] = p
936+
if isinstance(p, Pattern):
931937
return p
932938
else:
933939
raise TypeError("can't compile '%s' object" % pattern.__class__.__name__)

0 commit comments

Comments
 (0)