@@ -450,15 +450,50 @@ we start to review some random projection techniques.
450450
451451.. code :: python
452452
453- import numpy as np
454- from sklearn import random_projection
455- X = np.random.rand(100 , 10000 )
456- transformer = random_projection.GaussianRandomProjection()
457- X_new = transformer.fit_transform(X)
458- X_new.shape
459- (100 , 3947 )
453+ from sklearn.feature_extraction.text import TfidfVectorizer
454+ import numpy as np
455+
456+ def TFIDF (X_train , X_test , MAX_NB_WORDS = 75000 ):
457+ vectorizer_x = TfidfVectorizer(max_features = MAX_NB_WORDS )
458+ X_train = vectorizer_x.fit_transform(X_train).toarray()
459+ X_test = vectorizer_x.transform(X_test).toarray()
460+ print (" tf-idf with" , str (np.array(X_train).shape[1 ]), " features" )
461+ return (X_train, X_test)
462+
463+
464+ from sklearn.datasets import fetch_20newsgroups
465+
466+ newsgroups_train = fetch_20newsgroups(subset = ' train' )
467+ newsgroups_test = fetch_20newsgroups(subset = ' test' )
468+ X_train = newsgroups_train.data
469+ X_test = newsgroups_test.data
470+ y_train = newsgroups_train.target
471+ y_test = newsgroups_test.target
460472
473+ X_train,X_test = TFIDF(X_train,X_test)
474+
475+ from sklearn import random_projection
476+
477+ RandomProjection = random_projection.GaussianRandomProjection(n_components = 2000 )
478+ X_train_new = RandomProjection.fit_transform(X_train)
479+ X_test_new = RandomProjection.transform(X_test)
480+
481+ print (" train with old features: " ,np.array(X_train).shape)
482+ print (" train with new features:" ,np.array(X_train_new).shape)
461483
484+ print (" test with old features: " ,np.array(X_test).shape)
485+ print (" test with new features:" ,np.array(X_test_new).shape)
486+
487+ output:
488+
489+ .. code :: python
490+
491+ tf- idf with 75000 features
492+ train with old features: (11314 , 75000 )
493+ train with new features: (11314 , 2000 )
494+ test with old features: (7532 , 75000 )
495+ test with new features: (7532 , 2000 )
496+
462497~~~~~~~~~~~
463498Autoencoder
464499~~~~~~~~~~~
0 commit comments