Skip to content

Commit 7f3710b

Browse files
add adaboost
1 parent 01c18b5 commit 7f3710b

2 files changed

Lines changed: 150 additions & 1 deletion

File tree

adaboost/AdaBoost.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
from sklearn.ensemble import AdaBoostClassifier
2+
3+
from utils.data_generater import *
4+
5+
class AdaBoost:
6+
def __init__(self, n_estimators=50, learning_rate=1.0):
7+
self.clf_num = n_estimators
8+
self.learning_rate = learning_rate
9+
10+
def init_args(self, datasets, labels):
11+
12+
self.X = datasets
13+
self.Y = labels
14+
self.M, self.N = datasets.shape
15+
16+
# 弱分类器数目和集合
17+
self.clf_sets = []
18+
19+
# 初始化weights
20+
self.weights = [1.0 / self.M] * self.M
21+
22+
# G(x)系数 alpha
23+
self.alpha = []
24+
25+
def _G(self, features, labels, weights):
26+
m = len(features)
27+
error = 100000.0 # 无穷大
28+
best_v = 0.0
29+
# 单维features
30+
features_min = min(features)
31+
features_max = max(features)
32+
n_step = (features_max - features_min + self.learning_rate) // self.learning_rate
33+
# print('n_step:{}'.format(n_step))
34+
direct, compare_array = None, None
35+
for i in range(1, int(n_step)):
36+
v = features_min + self.learning_rate * i
37+
38+
if v not in features:
39+
# 误分类计算
40+
compare_array_positive = np.array([1 if features[k] > v else -1 for k in range(m)])
41+
weight_error_positive = sum([weights[k] for k in range(m) if compare_array_positive[k] != labels[k]])
42+
43+
compare_array_nagetive = np.array([-1 if features[k] > v else 1 for k in range(m)])
44+
weight_error_nagetive = sum([weights[k] for k in range(m) if compare_array_nagetive[k] != labels[k]])
45+
46+
if weight_error_positive < weight_error_nagetive:
47+
weight_error = weight_error_positive
48+
_compare_array = compare_array_positive
49+
direct = 'positive'
50+
else:
51+
weight_error = weight_error_nagetive
52+
_compare_array = compare_array_nagetive
53+
direct = 'nagetive'
54+
55+
# print('v:{} error:{}'.format(v, weight_error))
56+
if weight_error < error:
57+
error = weight_error
58+
compare_array = _compare_array
59+
best_v = v
60+
return best_v, direct, error, compare_array
61+
62+
# 计算alpha
63+
def _alpha(self, error):
64+
return 0.5 * np.log((1 - error) / error)
65+
66+
# 规范化因子
67+
def _Z(self, weights, a, clf):
68+
return sum([weights[i] * np.exp(-1 * a * self.Y[i] * clf[i]) for i in range(self.M)])
69+
70+
# 权值更新
71+
def _w(self, a, clf, Z):
72+
for i in range(self.M):
73+
self.weights[i] = self.weights[i] * np.exp(-1 * a * self.Y[i] * clf[i]) / Z
74+
75+
# G(x)的线性组合
76+
def _f(self, alpha, clf_sets):
77+
pass
78+
79+
def G(self, x, v, direct):
80+
if direct == 'positive':
81+
return 1 if x > v else -1
82+
else:
83+
return -1 if x > v else 1
84+
85+
def fit(self, X, y):
86+
self.init_args(X, y)
87+
88+
for epoch in range(self.clf_num):
89+
best_clf_error, best_v, clf_result = 100000, None, None
90+
# 根据特征维度, 选择误差最小的
91+
for j in range(self.N):
92+
features = self.X[:, j]
93+
# 分类阈值,分类误差,分类结果
94+
v, direct, error, compare_array = self._G(features, self.Y, self.weights)
95+
96+
if error < best_clf_error:
97+
best_clf_error = error
98+
best_v = v
99+
final_direct = direct
100+
clf_result = compare_array
101+
axis = j
102+
103+
# print('epoch:{}/{} feature:{} error:{} v:{}'.format(epoch, self.clf_num, j, error, best_v))
104+
if best_clf_error == 0:
105+
break
106+
107+
# 计算G(x)系数a
108+
a = self._alpha(best_clf_error)
109+
self.alpha.append(a)
110+
# 记录分类器
111+
self.clf_sets.append((axis, best_v, final_direct))
112+
# 规范化因子
113+
Z = self._Z(self.weights, a, clf_result)
114+
# 权值更新
115+
self._w(a, clf_result, Z)
116+
117+
# print('classifier:{}/{} error:{:.3f} v:{} direct:{} a:{:.5f}'.format(epoch+1, self.clf_num, error, best_v, final_direct, a))
118+
# print('weight:{}'.format(self.weights))
119+
# print('\n')
120+
121+
def predict(self, feature):
122+
result = 0.0
123+
for i in range(len(self.clf_sets)):
124+
axis, clf_v, direct = self.clf_sets[i]
125+
f_input = feature[axis]
126+
result += self.alpha[i] * self.G(f_input, clf_v, direct)
127+
# sign
128+
return 1 if result > 0 else -1
129+
130+
def score(self, X_test, y_test):
131+
right_count = 0
132+
for i in range(len(X_test)):
133+
feature = X_test[i]
134+
if self.predict(feature) == y_test[i]:
135+
right_count += 1
136+
137+
return right_count / len(X_test)
138+
139+
if __name__ == "__main__":
140+
X_train, X_test, y_train, y_test = create_svm_data()
141+
my_ada = AdaBoost(n_estimators=10, learning_rate=0.2)
142+
my_ada.fit(X_train, y_train)
143+
print("my AdaBoost score", my_ada.score(X_test, y_test))
144+
145+
sk_ada = AdaBoostClassifier(n_estimators=100, learning_rate=0.5)
146+
sk_ada.fit(X_train, y_train)
147+
print("sklearn AdaBoost score", sk_ada.score(X_test, y_test))

svm/svm.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
from utils.data_generater import *
1+
22
from sklearn.svm import SVC
33

4+
from utils.data_generater import *
5+
46
class SVM:
57
def __init__(self, max_iter=100, kernel='linear'):
68
self.max_iter = max_iter

0 commit comments

Comments
 (0)