@@ -592,16 +592,63 @@ When in nearest centroid classifier, we used for text as input data for classifi
592592
593593.. code :: python
594594
595- from sklearn.neighbors.nearest_centroid import NearestCentroid
596- import numpy as np
597- X = np.array([[ - 1 , - 1 ], [ - 2 , - 1 ], [ - 3 , - 2 ], [ 1 , 1 ], [ 2 , 1 ], [ 3 , 2 ]])
598- y = np.array([ 1 , 1 , 1 , 2 , 2 , 2 ])
599- clf = NearestCentroid()
600- clf.fit(X, y)
595+ from sklearn.neighbors.nearest_centroid import NearestCentroid
596+ from sklearn.pipeline import Pipeline
597+ from sklearn import metrics
598+ from sklearn.feature_extraction.text import CountVectorizer
599+ from sklearn.feature_extraction.text import TfidfTransformer
600+ from sklearn.datasets import fetch_20newsgroups
601601
602+ newsgroups_train = fetch_20newsgroups(subset = ' train' )
603+ newsgroups_test = fetch_20newsgroups(subset = ' test' )
604+ X_train = newsgroups_train.data
605+ X_test = newsgroups_test.data
606+ y_train = newsgroups_train.target
607+ y_test = newsgroups_test.target
602608
609+ text_clf = Pipeline([(' vect' , CountVectorizer()),
610+ (' tfidf' , TfidfTransformer()),
611+ (' clf' , NearestCentroid()),
612+ ])
603613
614+ text_clf.fit(X_train, y_train)
615+
616+
617+ predicted = text_clf.predict(X_test)
618+
619+ print (metrics.classification_report(y_test, predicted))
620+
621+
622+
623+
624+ Output:
625+
626+ .. code :: python
604627
628+ precision recall f1- score support
629+
630+ 0 0.75 0.49 0.60 319
631+ 1 0.44 0.76 0.56 389
632+ 2 0.75 0.68 0.71 394
633+ 3 0.71 0.59 0.65 392
634+ 4 0.81 0.71 0.76 385
635+ 5 0.83 0.66 0.74 395
636+ 6 0.49 0.88 0.63 390
637+ 7 0.86 0.76 0.80 396
638+ 8 0.91 0.86 0.89 398
639+ 9 0.85 0.79 0.82 397
640+ 10 0.95 0.80 0.87 399
641+ 11 0.94 0.66 0.78 396
642+ 12 0.40 0.70 0.51 393
643+ 13 0.84 0.49 0.62 396
644+ 14 0.89 0.72 0.80 394
645+ 15 0.55 0.73 0.63 398
646+ 16 0.68 0.76 0.71 364
647+ 17 0.97 0.70 0.81 376
648+ 18 0.54 0.53 0.53 310
649+ 19 0.58 0.39 0.47 251
650+
651+ avg / total 0.74 0.69 0.70 7532
605652
606653
607654
0 commit comments