@@ -639,19 +639,62 @@ words in documents
639639
640640.. code :: python
641641
642- from sklearn.naive_bayes import MultinomialNB
643- clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)
642+ from sklearn.naive_bayes import MultinomialNB
643+ from sklearn.pipeline import Pipeline
644+ from sklearn import metrics
645+ from sklearn.feature_extraction.text import CountVectorizer
646+ from sklearn.feature_extraction.text import TfidfTransformer
647+ from sklearn.datasets import fetch_20newsgroups
644648
649+ newsgroups_train = fetch_20newsgroups(subset = ' train' )
650+ newsgroups_test = fetch_20newsgroups(subset = ' test' )
651+ X_train = newsgroups_train.data
652+ X_test = newsgroups_test.data
653+ y_train = newsgroups_train.target
654+ y_test = newsgroups_test.target
645655
646- docs_new = [' God is love' , ' OpenGL on the GPU is fast' ]
647- X_new_counts = count_vect.transform(docs_new)
648- X_new_tfidf = tfidf_transformer.transform(X_new_counts)
656+ text_clf = Pipeline([(' vect' , CountVectorizer()),
657+ (' tfidf' , TfidfTransformer()),
658+ (' clf' , MultinomialNB()),
659+ ])
649660
650- predicted = clf.predict(X_new_tfidf)
661+ text_clf.fit(X_train, y_train)
662+
663+
664+ predicted = text_clf.predict(X_test)
651665
652- for doc, category in zip (docs_new , predicted):
653- print ( ' %r => %s ' % (doc, twenty_train.target_names[category]))
666+ print (metrics.classification_report(y_test , predicted))
667+
654668
669+ Output:
670+
671+ .. code :: python
672+
673+ precision recall f1- score support
674+
675+ 0 0.80 0.52 0.63 319
676+ 1 0.81 0.65 0.72 389
677+ 2 0.82 0.65 0.73 394
678+ 3 0.67 0.78 0.72 392
679+ 4 0.86 0.77 0.81 385
680+ 5 0.89 0.75 0.82 395
681+ 6 0.93 0.69 0.80 390
682+ 7 0.85 0.92 0.88 396
683+ 8 0.94 0.93 0.93 398
684+ 9 0.92 0.90 0.91 397
685+ 10 0.89 0.97 0.93 399
686+ 11 0.59 0.97 0.74 396
687+ 12 0.84 0.60 0.70 393
688+ 13 0.92 0.74 0.82 396
689+ 14 0.84 0.89 0.87 394
690+ 15 0.44 0.98 0.61 398
691+ 16 0.64 0.94 0.76 364
692+ 17 0.93 0.91 0.92 376
693+ 18 0.96 0.42 0.58 310
694+ 19 0.97 0.14 0.24 251
695+
696+ avg / total 0.82 0.77 0.77 7532
697+
655698
656699~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
657700K-nearest Neighbor
0 commit comments