-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathModel.py
More file actions
157 lines (134 loc) · 5.45 KB
/
Model.py
File metadata and controls
157 lines (134 loc) · 5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from abc import abstractmethod
from io import TextIOWrapper
from Math.DiscreteDistribution import DiscreteDistribution
from Math.Matrix import Matrix
from Classification.Attribute.DiscreteAttribute import DiscreteAttribute
from Classification.Attribute.DiscreteIndexedAttribute import DiscreteIndexedAttribute
from Classification.Instance.Instance import Instance
from Classification.InstanceList.InstanceList import InstanceList
from Classification.Parameter.Parameter import Parameter
from Classification.Performance.ConfusionMatrix import ConfusionMatrix
from Classification.Performance.DetailedClassificationPerformance import DetailedClassificationPerformance
from Classification.Performance.Performance import Performance
class Model(object):
@abstractmethod
def train(self,
trainSet: InstanceList,
parameters: Parameter):
pass
@abstractmethod
def loadModel(self, fileName: str):
pass
@abstractmethod
def predict(self, instance: Instance) -> str:
"""
An abstract predict method that takes an Instance as an input.
PARAMETERS
----------
instance : Instance
Instance to make prediction.
RETURNS
-------
str
The class label as a String.
"""
pass
@abstractmethod
def predictProbability(self, instance: Instance) -> dict:
pass
def loadInstance(self, line: str, attributeTypes: list) -> Instance:
items = line.split(",")
instance = Instance(items[len(items) - 1])
for i in range(len(items) - 1):
if attributeTypes[i] == "DISCRETE":
instance.addDiscreteAttribute(items[i])
elif attributeTypes[i] == "CONTINUOUS":
instance.addContinuousAttribute(float(items[i]))
return instance
def loadMatrix(self, inputFile: TextIOWrapper) -> Matrix:
items = inputFile.readline().strip().split(" ")
matrix = Matrix(int(items[0]), int(items[1]))
for j in range(matrix.getRow()):
line = inputFile.readline().strip()
items = line.split(" ")
for k in range(matrix.getColumn()):
matrix.setValue(j, k, float(items[k]))
return matrix
@staticmethod
def loadClassDistribution(inputFile: TextIOWrapper) -> DiscreteDistribution:
distribution = DiscreteDistribution()
size = int(inputFile.readline().strip())
for i in range(size):
line = inputFile.readline().strip()
items = line.split(" ")
count = int(items[1])
for j in range(count):
distribution.addItem(items[0])
return distribution
def loadInstanceList(self, inputFile: TextIOWrapper) -> InstanceList:
"""
Loads an instance list from an input model file.
:param inputFile: Input model file.
:return: Instance list read from an input model file.
"""
types = inputFile.readline().strip().split(" ")
instance_count = int(inputFile.readline().strip())
instance_list = InstanceList()
for i in range(instance_count):
instance_list.add(self.loadInstance(inputFile.readline().strip(), types))
return instance_list
def discreteCheck(self, instance: Instance) -> bool:
"""
Checks given instance's attribute and returns true if it is a discrete indexed attribute, false otherwise.
PARAMETERS
----------
instance Instance to check.
RETURNS
-------
bool
True if instance is a discrete indexed attribute, false otherwise.
"""
for i in range(instance.attributeSize()):
if isinstance(instance.getAttribute(i), DiscreteAttribute) and not isinstance(instance.getAttribute(i),
DiscreteIndexedAttribute):
return False
return True
def test(self, testSet: InstanceList) -> Performance:
"""
TestClassification an instance list with the current model.
PARAMETERS
----------
testSet : InstanceList
Test data (list of instances) to be tested.
RETURNS
-------
Performance
The accuracy (and error) of the model as an instance of Performance class.
"""
class_labels = testSet.getUnionOfPossibleClassLabels()
confusion = ConfusionMatrix(class_labels)
for i in range(testSet.size()):
instance = testSet.get(i)
confusion.classify(instance.getClassLabel(), self.predict(instance))
return DetailedClassificationPerformance(confusion)
def singleRun(self,
parameter: Parameter,
trainSet: InstanceList,
testSet: InstanceList) -> Performance:
"""
Runs current classifier with the given train and test data.
PARAMETERS
----------
parameter : Parameter
Parameter of the classifier to be trained.
trainSet : InstanceList
Training data to be used in training the classifier.
testSet : InstanceList
Test data to be tested after training the model.
RETURNS
-------
Performance
The accuracy (and error) of the trained model as an instance of Performance class.
"""
self.train(trainSet, parameter)
return self.test(testSet)