1- from keras .layers import Dropout , Dense ,Input ,Embedding ,Flatten , MaxPooling1D , Conv1D
1+ from keras .layers import Dropout , Dense ,Input ,Embedding ,Flatten , AveragePooling2D , Conv2D , Reshape
22from keras .models import Sequential ,Model
33from sklearn .feature_extraction .text import TfidfVectorizer
44import numpy as np
@@ -26,7 +26,7 @@ def loadData_Tokenizer(X_train, X_test,MAX_NB_WORDS=75000,MAX_SEQUENCE_LENGTH=50
2626 X_train = text [0 :len (X_train ), ]
2727 X_test = text [len (X_train ):, ]
2828 embeddings_index = {}
29- f = open ("C:\\ Users\\ kamran\\ Documents\\ GitHub\\ RMDL\\ Examples\\ Glove\\ glove.6B.50d .txt" , encoding = "utf8" )
29+ f = open ("C:\\ Users\\ kamran\\ Documents\\ GitHub\\ RMDL\\ Examples\\ Glove\\ glove.6B.100d .txt" , encoding = "utf8" )
3030 for line in f :
3131 values = line .split ()
3232 word = values [0 ]
@@ -41,7 +41,7 @@ def loadData_Tokenizer(X_train, X_test,MAX_NB_WORDS=75000,MAX_SEQUENCE_LENGTH=50
4141
4242
4343
44- def Build_Model_CNN_Text (word_index , embeddings_index , nclasses , MAX_SEQUENCE_LENGTH = 500 , EMBEDDING_DIM = 50 , dropout = 0.5 ):
44+ def Build_Model_CNN_Text (word_index , embeddings_index , nclasses , MAX_SEQUENCE_LENGTH = 500 , EMBEDDING_DIM = 100 , dropout = 0.5 ):
4545
4646 """
4747 def buildModel_CNN(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=500, EMBEDDING_DIM=50, dropout=0.5):
@@ -78,30 +78,29 @@ def buildModel_CNN(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=5
7878 layer = 5
7979 print ("Filter " ,layer )
8080 for fl in range (0 ,layer ):
81- filter_sizes .append ((fl + 2 ))
81+ filter_sizes .append ((fl + 2 , fl + 2 ))
8282
8383 node = 128
8484 sequence_input = Input (shape = (MAX_SEQUENCE_LENGTH ,), dtype = 'int32' )
8585 embedded_sequences = embedding_layer (sequence_input )
86+ emb = Reshape ((500 ,10 , 10 ), input_shape = (500 ,100 ))(embedded_sequences )
8687
8788 for fsz in filter_sizes :
88- l_conv = Conv1D (node , kernel_size = fsz , activation = 'relu' )(embedded_sequences )
89- l_pool = MaxPooling1D ( 5 )(l_conv )
89+ l_conv = Conv2D (node , padding = "same" , kernel_size = fsz , activation = 'relu' )(emb )
90+ l_pool = AveragePooling2D ( pool_size = ( 5 , 1 ), padding = "same" )(l_conv )
9091 #l_pool = Dropout(0.25)(l_pool)
9192 convs .append (l_pool )
9293
9394 l_merge = Concatenate (axis = 1 )(convs )
94- l_cov1 = Conv1D (node , 5 , activation = 'relu' )(l_merge )
95- l_cov1 = Dropout (dropout )(l_cov1 )
96- l_pool1 = MaxPooling1D (5 )(l_cov1 )
97- l_cov2 = Conv1D (node , 5 , activation = 'relu' )(l_pool1 )
98- l_cov2 = Dropout (dropout )(l_cov2 )
99- l_pool2 = MaxPooling1D (30 )(l_cov2 )
100- l_flat = Flatten ()(l_pool2 )
101- l_dense = Dense (1024 , activation = 'relu' )(l_flat )
102- l_dense = Dropout (dropout )(l_dense )
103- l_dense = Dense (512 , activation = 'relu' )(l_dense )
95+ l_cov1 = Conv2D (node , (5 ,5 ), padding = "same" , activation = 'relu' )(l_merge )
96+ l_cov1 = AveragePooling2D (pool_size = (5 ,2 ), padding = "same" )(l_cov1 )
97+ l_cov2 = Conv2D (node , (5 ,5 ), padding = "same" , activation = 'relu' )(l_cov1 )
98+ l_pool2 = AveragePooling2D (pool_size = (5 ,2 ), padding = "same" )(l_cov2 )
99+ l_cov2 = Dropout (dropout )(l_pool2 )
100+ l_flat = Flatten ()(l_cov2 )
101+ l_dense = Dense (128 , activation = 'relu' )(l_flat )
104102 l_dense = Dropout (dropout )(l_dense )
103+
105104 preds = Dense (nclasses , activation = 'softmax' )(l_dense )
106105 model = Model (sequence_input , preds )
107106
@@ -115,8 +114,8 @@ def buildModel_CNN(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=5
115114
116115
117116
118-
119-
117+ from sklearn . datasets import fetch_20newsgroups
118+ from RMDL import text_feature_extraction as txt
120119
121120newsgroups_train = fetch_20newsgroups (subset = 'train' )
122121newsgroups_test = fetch_20newsgroups (subset = 'test' )
@@ -125,6 +124,7 @@ def buildModel_CNN(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=5
125124y_train = newsgroups_train .target
126125y_test = newsgroups_test .target
127126
127+
128128X_train_Glove ,X_test_Glove , word_index ,embeddings_index = loadData_Tokenizer (X_train ,X_test )
129129
130130
@@ -135,7 +135,7 @@ def buildModel_CNN(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=5
135135
136136model_CNN .fit (X_train_Glove , y_train ,
137137 validation_data = (X_test_Glove , y_test ),
138- epochs = 15 ,
138+ epochs = 1000 ,
139139 batch_size = 128 ,
140140 verbose = 2 )
141141
0 commit comments