@@ -42,7 +42,33 @@ def prepare_data(seqs, labels, maxlen=None):
4242 return x , x_mask , labels
4343
4444
45- def load_data (path = "imdb.pkl" , n_words = 100000 , valid_portion = 0.1 ):
45+ def get_dataset_file (dataset , default_dataset , origin ):
46+ '''Look for it as if it was a full path, if not, try local file,
47+ if not try in the data directory.
48+
49+ Download dataset if it is not present
50+
51+ '''
52+ data_dir , data_file = os .path .split (dataset )
53+ if data_dir == "" and not os .path .isfile (dataset ):
54+ # Check if dataset is in the data directory.
55+ new_path = os .path .join (
56+ os .path .split (__file__ )[0 ],
57+ ".." ,
58+ "data" ,
59+ dataset
60+ )
61+ if os .path .isfile (new_path ) or data_file == default_dataset :
62+ dataset = new_path
63+
64+ if (not os .path .isfile (dataset )) and data_file == default_dataset :
65+ import urllib
66+ print 'Downloading data from %s' % origin
67+ urllib .urlretrieve (origin , dataset )
68+ return dataset
69+
70+
71+ def load_data (path = "imdb.pkl.gz" , n_words = 100000 , valid_portion = 0.1 ):
4672 ''' Loads the dataset
4773
4874 :type dataset: string
@@ -53,10 +79,12 @@ def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1):
5379 # LOAD DATA #
5480 #############
5581
56- print '... loading data'
57-
5882 # Load the dataset
59- f = open (path , 'rb' )
83+ path = get_dataset_file (
84+ path , "imdb.pkl.gz" ,
85+ "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz" )
86+
87+ f = gzip .open (path , 'rb' )
6088 train_set = cPickle .load (f )
6189 test_set = cPickle .load (f )
6290 f .close ()
0 commit comments