forked from aimacode/aima-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_nlp4e.py
More file actions
139 lines (98 loc) · 4.16 KB
/
test_nlp4e.py
File metadata and controls
139 lines (98 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import random
import pytest
import nlp
from nlp4e import Rules, Lexicon, Grammar, ProbRules, ProbLexicon, ProbGrammar, E0
from nlp4e import Chart, CYK_parse, subspan, astar_search_parsing, beam_search_parsing
# Clumsy imports because we want to access certain nlp.py globals explicitly, because
# they are accessed by functions within nlp.py
random.seed("aima-python")
def test_rules():
check = {'A': [['B', 'C'], ['D', 'E']], 'B': [['E'], ['a'], ['b', 'c']]}
assert Rules(A="B C | D E", B="E | a | b c") == check
def test_lexicon():
check = {'Article': ['the', 'a', 'an'], 'Pronoun': ['i', 'you', 'he']}
lexicon = Lexicon(Article="the | a | an", Pronoun="i | you | he")
assert lexicon == check
def test_grammar():
rules = Rules(A="B C | D E", B="E | a | b c")
lexicon = Lexicon(Article="the | a | an", Pronoun="i | you | he")
grammar = Grammar("Simplegram", rules, lexicon)
assert grammar.rewrites_for('A') == [['B', 'C'], ['D', 'E']]
assert grammar.isa('the', 'Article')
grammar = nlp.E_Chomsky
for rule in grammar.cnf_rules():
assert len(rule) == 3
def test_generation():
lexicon = Lexicon(Article="the | a | an",
Pronoun="i | you | he")
rules = Rules(
S="Article | More | Pronoun",
More="Article Pronoun | Pronoun Pronoun"
)
grammar = Grammar("Simplegram", rules, lexicon)
sentence = grammar.generate_random('S')
for token in sentence.split():
found = False
for non_terminal, terminals in grammar.lexicon.items():
if token in terminals:
found = True
assert found
def test_prob_rules():
check = {'A': [(['B', 'C'], 0.3), (['D', 'E'], 0.7)],
'B': [(['E'], 0.1), (['a'], 0.2), (['b', 'c'], 0.7)]}
rules = ProbRules(A="B C [0.3] | D E [0.7]", B="E [0.1] | a [0.2] | b c [0.7]")
assert rules == check
def test_prob_lexicon():
check = {'Article': [('the', 0.5), ('a', 0.25), ('an', 0.25)],
'Pronoun': [('i', 0.4), ('you', 0.3), ('he', 0.3)]}
lexicon = ProbLexicon(Article="the [0.5] | a [0.25] | an [0.25]",
Pronoun="i [0.4] | you [0.3] | he [0.3]")
assert lexicon == check
def test_prob_grammar():
rules = ProbRules(A="B C [0.3] | D E [0.7]", B="E [0.1] | a [0.2] | b c [0.7]")
lexicon = ProbLexicon(Article="the [0.5] | a [0.25] | an [0.25]",
Pronoun="i [0.4] | you [0.3] | he [0.3]")
grammar = ProbGrammar("Simplegram", rules, lexicon)
assert grammar.rewrites_for('A') == [(['B', 'C'], 0.3), (['D', 'E'], 0.7)]
assert grammar.isa('the', 'Article')
grammar = nlp.E_Prob_Chomsky
for rule in grammar.cnf_rules():
assert len(rule) == 4
def test_prob_generation():
lexicon = ProbLexicon(Verb="am [0.5] | are [0.25] | is [0.25]",
Pronoun="i [0.4] | you [0.3] | he [0.3]")
rules = ProbRules(
S="Verb [0.5] | More [0.3] | Pronoun [0.1] | nobody is here [0.1]",
More="Pronoun Verb [0.7] | Pronoun Pronoun [0.3]")
grammar = ProbGrammar("Simplegram", rules, lexicon)
sentence = grammar.generate_random('S')
assert len(sentence) == 2
def test_chart_parsing():
chart = Chart(nlp.E0)
parses = chart.parses('the stench is in 2 2')
assert len(parses) == 1
def test_CYK_parse():
grammar = nlp.E_Prob_Chomsky
words = ['the', 'robot', 'is', 'good']
P = CYK_parse(words, grammar)
assert len(P) == 5
grammar = nlp.E_Prob_Chomsky_
words = ['astronomers', 'saw', 'stars']
P = CYK_parse(words, grammar)
assert len(P) == 3
def test_subspan():
spans = subspan(3)
assert spans.__next__() == (1, 1, 2)
assert spans.__next__() == (2, 2, 3)
assert spans.__next__() == (1, 1, 3)
assert spans.__next__() == (1, 2, 3)
def test_text_parsing():
words = ["the", "wumpus", "is", "dead"]
grammer = E0
assert astar_search_parsing(words, grammer) == 'S'
assert beam_search_parsing(words, grammer) == 'S'
words = ["the", "is", "wupus", "dead"]
assert astar_search_parsing(words, grammer) is False
assert beam_search_parsing(words, grammer) is False
if __name__ == '__main__':
pytest.main()