Skip to content

Commit 6a5a1e5

Browse files
add naivebayes notebook
1 parent 897629e commit 6a5a1e5

4 files changed

Lines changed: 348 additions & 2 deletions

File tree

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"import os\n",
11+
"searchPath=os.path.abspath('..')\n",
12+
"sys.path.append(searchPath)"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 2,
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"import numpy as np\n",
22+
"from sklearn.datasets import load_iris\n",
23+
"from sklearn.model_selection import train_test_split\n",
24+
"from naiveBayesBase import NaiveBayesBase\n",
25+
"from naiveBayesGaussian import GaussianNaiveBayes\n",
26+
"from utils.word_utils import *"
27+
]
28+
},
29+
{
30+
"cell_type": "markdown",
31+
"metadata": {},
32+
"source": [
33+
"# Test NaiveBayesBase"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 3,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"def loadDataSet():\n",
43+
" '''数据加载函数。这里是一个小例子'''\n",
44+
" postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],\n",
45+
" ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],\n",
46+
" ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],\n",
47+
" ['stop', 'posting', 'stupid', 'worthless', 'garbage'],\n",
48+
" ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],\n",
49+
" ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]\n",
50+
" classVec = [0, 1, 0, 1, 0, 1] # 1代表侮辱性文字,0代表正常言论,代表上面6个样本的类别\n",
51+
" return postingList, classVec"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": 4,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"def checkNB():\n",
61+
" '''测试'''\n",
62+
" listOPosts, lisClasses = loadDataSet()\n",
63+
" myVocabList = createVocabList(listOPosts)\n",
64+
" trainMat = []\n",
65+
" for postinDoc in listOPosts:\n",
66+
" trainMat.append(setOfWord2Vec(myVocabList, postinDoc))\n",
67+
"\n",
68+
" nb = NaiveBayesBase()\n",
69+
" nb.fit(np.array(trainMat), np.array(lisClasses))\n",
70+
"\n",
71+
" testEntry1 = ['love', 'my', 'dalmation']\n",
72+
" thisDoc = np.array(setOfWord2Vec(myVocabList, testEntry1))\n",
73+
" print(testEntry1, 'classified as:', nb.predict(thisDoc))\n",
74+
"\n",
75+
" testEntry2 = ['stupid', 'garbage']\n",
76+
" thisDoc2 = np.array(setOfWord2Vec(myVocabList, testEntry2))\n",
77+
" print(testEntry2, 'classified as:', nb.predict(thisDoc2))"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": 5,
83+
"metadata": {},
84+
"outputs": [
85+
{
86+
"name": "stdout",
87+
"output_type": "stream",
88+
"text": [
89+
"['love', 'my', 'dalmation'] classified as: 0\n",
90+
"['stupid', 'garbage'] classified as: 1\n"
91+
]
92+
}
93+
],
94+
"source": [
95+
"checkNB()"
96+
]
97+
},
98+
{
99+
"cell_type": "markdown",
100+
"metadata": {},
101+
"source": [
102+
"# Test GaussianNaiveBayes"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": 6,
108+
"metadata": {},
109+
"outputs": [
110+
{
111+
"ename": "NameError",
112+
"evalue": "name 'create_data' is not defined",
113+
"output_type": "error",
114+
"traceback": [
115+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
116+
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
117+
"\u001b[1;32m<ipython-input-6-4e0a4be29ab1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0miris\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mload_iris\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcreate_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miris\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0miris\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
118+
"\u001b[1;31mNameError\u001b[0m: name 'create_data' is not defined"
119+
]
120+
}
121+
],
122+
"source": [
123+
"iris = load_iris()\n",
124+
"X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": null,
130+
"metadata": {},
131+
"outputs": [],
132+
"source": [
133+
"print(len(X_train))\n",
134+
"print(len(X_test))\n",
135+
"model = GaussianNaiveBayes()\n",
136+
"model.fit(X_train, y_train)\n",
137+
"print(model.predict([4.4, 3.2, 1.3, 0.2]))\n",
138+
"print(model.score(X_test, y_test))"
139+
]
140+
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": null,
144+
"metadata": {},
145+
"outputs": [],
146+
"source": []
147+
}
148+
],
149+
"metadata": {
150+
"kernelspec": {
151+
"display_name": "Python 3",
152+
"language": "python",
153+
"name": "python3"
154+
},
155+
"language_info": {
156+
"codemirror_mode": {
157+
"name": "ipython",
158+
"version": 3
159+
},
160+
"file_extension": ".py",
161+
"mimetype": "text/x-python",
162+
"name": "python",
163+
"nbconvert_exporter": "python",
164+
"pygments_lexer": "ipython3",
165+
"version": "3.6.5"
166+
}
167+
},
168+
"nbformat": 4,
169+
"nbformat_minor": 2
170+
}

naive_bayes/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# 实现朴素贝叶斯的基本算法和高斯混合朴素贝叶斯算法
2+
3+
# 结果比较
4+
结果在naiveBayes.ipynb中展示
5+
6+
# 相关博客
7+
#### [朴素贝叶斯算法(Naive Bayes)](https://www.cnblogs.com/huangyc/p/9734956.html)

naive_bayes/naiveBayes.ipynb

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"import os\n",
11+
"searchPath=os.path.abspath('..')\n",
12+
"sys.path.append(searchPath)"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 2,
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"import numpy as np\n",
22+
"from sklearn.datasets import load_iris\n",
23+
"from sklearn.model_selection import train_test_split\n",
24+
"from naiveBayesBase import NaiveBayesBase\n",
25+
"from naiveBayesGaussian import GaussianNaiveBayes\n",
26+
"from utils.word_utils import *"
27+
]
28+
},
29+
{
30+
"cell_type": "markdown",
31+
"metadata": {},
32+
"source": [
33+
"# Test NaiveBayesBase"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 3,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"def loadDataSet():\n",
43+
" '''数据加载函数。这里是一个小例子'''\n",
44+
" postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],\n",
45+
" ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],\n",
46+
" ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],\n",
47+
" ['stop', 'posting', 'stupid', 'worthless', 'garbage'],\n",
48+
" ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],\n",
49+
" ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]\n",
50+
" classVec = [0, 1, 0, 1, 0, 1] # 1代表侮辱性文字,0代表正常言论,代表上面6个样本的类别\n",
51+
" return postingList, classVec"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": 4,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"def checkNB():\n",
61+
" '''测试'''\n",
62+
" listOPosts, lisClasses = loadDataSet()\n",
63+
" myVocabList = createVocabList(listOPosts)\n",
64+
" trainMat = []\n",
65+
" for postinDoc in listOPosts:\n",
66+
" trainMat.append(setOfWord2Vec(myVocabList, postinDoc))\n",
67+
"\n",
68+
" nb = NaiveBayesBase()\n",
69+
" nb.fit(np.array(trainMat), np.array(lisClasses))\n",
70+
"\n",
71+
" testEntry1 = ['love', 'my', 'dalmation']\n",
72+
" thisDoc = np.array(setOfWord2Vec(myVocabList, testEntry1))\n",
73+
" print(testEntry1, 'classified as:', nb.predict(thisDoc))\n",
74+
"\n",
75+
" testEntry2 = ['stupid', 'garbage']\n",
76+
" thisDoc2 = np.array(setOfWord2Vec(myVocabList, testEntry2))\n",
77+
" print(testEntry2, 'classified as:', nb.predict(thisDoc2))"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": 5,
83+
"metadata": {},
84+
"outputs": [
85+
{
86+
"name": "stdout",
87+
"output_type": "stream",
88+
"text": [
89+
"['love', 'my', 'dalmation'] classified as: 0\n",
90+
"['stupid', 'garbage'] classified as: 1\n"
91+
]
92+
}
93+
],
94+
"source": [
95+
"checkNB()"
96+
]
97+
},
98+
{
99+
"cell_type": "markdown",
100+
"metadata": {},
101+
"source": [
102+
"# Test GaussianNaiveBayes"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": 6,
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"iris = load_iris()\n",
112+
"X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)"
113+
]
114+
},
115+
{
116+
"cell_type": "code",
117+
"execution_count": 7,
118+
"metadata": {},
119+
"outputs": [
120+
{
121+
"name": "stdout",
122+
"output_type": "stream",
123+
"text": [
124+
"105\n",
125+
"45\n",
126+
"0\n",
127+
"0.9333333333333333\n"
128+
]
129+
}
130+
],
131+
"source": [
132+
"print(len(X_train))\n",
133+
"print(len(X_test))\n",
134+
"model = GaussianNaiveBayes()\n",
135+
"model.fit(X_train, y_train)\n",
136+
"print(model.predict([4.4, 3.2, 1.3, 0.2]))\n",
137+
"print(model.score(X_test, y_test))"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": []
146+
}
147+
],
148+
"metadata": {
149+
"kernelspec": {
150+
"display_name": "Python 3",
151+
"language": "python",
152+
"name": "python3"
153+
},
154+
"language_info": {
155+
"codemirror_mode": {
156+
"name": "ipython",
157+
"version": 3
158+
},
159+
"file_extension": ".py",
160+
"mimetype": "text/x-python",
161+
"name": "python",
162+
"nbconvert_exporter": "python",
163+
"pygments_lexer": "ipython3",
164+
"version": "3.6.5"
165+
}
166+
},
167+
"nbformat": 4,
168+
"nbformat_minor": 2
169+
}

naive_bayes/naiveBayesBase.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55

6-
class NavieBayesNase(object):
6+
class NaiveBayesBase(object):
77

88
def __init__(self):
99
pass
@@ -86,7 +86,7 @@ def checkNB():
8686
for postinDoc in listOPosts:
8787
trainMat.append(setOfWord2Vec(myVocabList, postinDoc))
8888

89-
nb = NavieBayesNase()
89+
nb = NaiveBayesBase()
9090
nb.fit(np.array(trainMat), np.array(lisClasses))
9191

9292
testEntry1 = ['love', 'my', 'dalmation']

0 commit comments

Comments
 (0)