@@ -561,43 +561,42 @@ Non-negative Matrix Factorization (NMF)
561561.. code :: python
562562
563563
564- from sklearn.feature_extraction.text import TfidfVectorizer
565- import numpy as np
566- from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
567-
564+ from sklearn.feature_extraction.text import TfidfVectorizer
565+ import numpy as np
566+ from sklearn.decomposition import NMF
568567
569- def TFIDF (X_train , X_test , MAX_NB_WORDS = 75000 ):
570- vectorizer_x = TfidfVectorizer(max_features = MAX_NB_WORDS )
571- X_train = vectorizer_x.fit_transform(X_train).toarray()
572- X_test = vectorizer_x.transform(X_test).toarray()
573- print (" tf-idf with" , str (np.array(X_train).shape[1 ]), " features" )
574- return (X_train, X_test)
575568
569+ def TFIDF (X_train , X_test , MAX_NB_WORDS = 75000 ):
570+ vectorizer_x = TfidfVectorizer(max_features = MAX_NB_WORDS )
571+ X_train = vectorizer_x.fit_transform(X_train).toarray()
572+ X_test = vectorizer_x.transform(X_test).toarray()
573+ print (" tf-idf with" , str (np.array(X_train).shape[1 ]), " features" )
574+ return (X_train, X_test)
576575
577- from sklearn.datasets import fetch_20newsgroups
578576
579- newsgroups_train = fetch_20newsgroups(subset = ' train' )
580- newsgroups_test = fetch_20newsgroups(subset = ' test' )
581- X_train = newsgroups_train.data
582- X_test = newsgroups_test.data
583- y_train = newsgroups_train.target
584- y_test = newsgroups_test.target
577+ from sklearn.datasets import fetch_20newsgroups
585578
586- X_train,X_test = TFIDF(X_train,X_test)
579+ newsgroups_train = fetch_20newsgroups(subset = ' train' )
580+ newsgroups_test = fetch_20newsgroups(subset = ' test' )
581+ X_train = newsgroups_train.data
582+ X_test = newsgroups_test.data
583+ y_train = newsgroups_train.target
584+ y_test = newsgroups_test.target
587585
586+ X_train,X_test = TFIDF(X_train,X_test)
588587
589588
590- LDA = LinearDiscriminantAnalysis(n_components = 2000 )
591- X_train_new = LDA .fit(X_train,y_train)
592- X_train_new = LDA .transform(X_train)
593- X_test_new = LDA .transform(X_test)
594589
595- print (" train with old features: " ,np.array(X_train).shape)
596- print (" train with new features:" ,np.array(X_train_new).shape)
590+ NMF_ = NMF(n_components = 2000 )
591+ X_train_new = NMF_ .fit(X_train)
592+ X_train_new = NMF_ .transform(X_train)
593+ X_test_new = NMF_ .transform(X_test)
597594
598- print (" test with old features: " ,np.array(X_test ).shape)
599- print (" test with new features:" ,np.array(X_test_new ).shape)
595+ print (" train with old features: " ,np.array(X_train ).shape)
596+ print (" train with new features:" ,np.array(X_train_new ).shape)
600597
598+ print (" test with old features: " ,np.array(X_test).shape)
599+ print (" test with new features:" ,np.array(X_test_new))
601600
602601 output:
603602
0 commit comments