-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSVMImageClassifier.py
More file actions
167 lines (123 loc) · 5.8 KB
/
Copy pathSVMImageClassifier.py
File metadata and controls
167 lines (123 loc) · 5.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
Image Classifier extracts shape features from images using Haralick and Hu Moments, as well as Histogram information.
These different features ensure that certain resilience to variability of shapes (such as cat ears and ears of certain dogs,
like a Chihuaha, for example) are countered by the colors (Histograms) these shapes delineate and their variability of these (Haralick)
For example: cats have pointy ears but are mostly gry or tortoise colored, whereas chihuahuas also have pointy ears
but are normaly beige or brown colored.
"""
import cv2
import numpy as np
import mahotas
import os
from sklearn.svm import SVC
from sklearn import svm
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
def fd_hu_moments(image):
"""
fd_hu_moments: image -> flattened vectors with hu moments
Purpose: Receives an image and returns a flattened vector with the image's Hu Moments, which is a
feature that can be used for SVM Classification
Example: def(cat.01.jpg) -> vector with hu moments of cat.01.jpg
"""
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
feature = cv2.HuMoments(cv2.moments(image)).flatten()
return feature
def fd_haralick(image):
"""
fd_haralick: image -> flattened vector with a haralick feature vector
Purpose: Receives an image and returns a flattened vector with the image's haralick texture feature, which is a
feature that can be used for SVM Classification
Example: def(cat.01.jpg) -> vector with haralick texture feature of cat.01.jpg
"""
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the haralick texture feature vector
haralick = mahotas.features.haralick(gray).mean(axis=0)
return haralick
def fd_histogram(image):
"""
fd_histogram: image -> flattened vector with a haralick feature vector
Purpose: Receives an image and returns a flattened vector with the image's haralick texture feature, which is a
feature that can be used for SVM Classification
Example: def(cat.01.jpg) -> vector with haralick texture feature of cat.01.jpg
"""
# convert the image to HSV color-space
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# compute the color histogram
hist = cv2.calcHist([image], [0, 1, 2], None, [5, 5, 5], [0, 256, 0, 256, 0, 256])
# normalize the histogram
cv2.normalize(hist, hist)
hist = hist.flatten()
return hist
def myFeatureExtract(fileList, numImages):
""" myFeatureExtract: array + int -> training and testing sets of features for SVM classification
Purpose: Receive an array with the images to be used and their class labels to return training and testing arrays with features and class
Example: def(fileList, 1000) -> returns testing set and training set for SVM classification
"""
trainFeat = []
testFeat = []
x = np.arange(0, len(fileList[:,0]), 1, dtype=int)
nTrain = int(numImages*.80)
subset = np.random.choice(x, numImages)
training = subset[:int(nTrain)]
test = subset[int(nTrain):]
#we extract features of each image file and add them to a huge array that we will use for classification
for i in range(len(training)):
klass = fileList[int(training[i]),1]
image = cv2.imread(fileList[int(training[i]),0])
histo = fd_histogram(image)
haralick = fd_haralick(image)
huMoms = fd_hu_moments(image)
trainFeat.append(np.hstack([fd_histogram(image), fd_haralick(image), fd_hu_moments(image), int(klass)]))
for i in range(len(test)):
klass=fileList[int(test[i]),1]
image = cv2.imread(fileList[int(test[i]),0])
histo = fd_histogram(image)
haralick = fd_haralick(image)
huMoms = fd_hu_moments(image)
testFeat.append(np.hstack([fd_histogram(image), fd_haralick(image), fd_hu_moments(image), int(klass)]))
return trainFeat, testFeat
def myKittyClassifier(numImages):
pth = 'D:\\Dataset'
fileList = []
xTrain = []
xTest = []
yTrain = []
yTest = []
#Listing files from the Dataset, and storing their paths for automation of feature extraction
#Done by adapting algorithm from https://note.nkmk.me/en/python-os-getcwd-chdir/
# r=root, d=directories, f = files
for r, d, f in os.walk(pth):
for file in f:
if ('.jpg' in file) & ('cat' in file):
fileList.append([os.path.join(r, file),0])
elif ('.jpg' in file) & ('dog' in file):
fileList.append([os.path.join(r, file),1])
fileList = np.array(fileList)
# We specify the # of Images we want to analyze in our Dataset. It will
# be distributed in a 80% train 20% test distribution. "0" for cats and "1" for dogs
xTrain, xTest = myFeatureExtract(fileList, numImages)
xTest = np.array(xTest)
xTrain = np.array(xTrain)
#we re-scale
scaler = MinMaxScaler(feature_range=(0, 1))
yTrain = xTrain[:,145]
yTest = xTest[:,145]
xTrain = scaler.fit_transform(xTrain)
xTest = scaler.fit_transform(xTest)
mySVC = sk.svm.SVC(C = 1000, kernel = 'linear')
mySVC.fit(xTrain[:,0:144], yTrain)
#Predicting with our model
yPred = mySVC.predict(xTest[:,0:144])
#Runnning Reports
myAccu = sk.metrics.accuracy_score(yTest, yPred, normalize = False)
myReport = classification_report(yTest, yPred, target_names = ['Cats', 'Dogs'])
myConfMat = confusion_matrix(yTest,yPred)
print("This is the Accuracy \n", myAccu)
print("This is the Classification Report \n", myReport)
print("This is the Confusion matrix \n", myConfMat)
myKittyClassifier(5000)
# In[ ]: