|
| 1 | +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' |
| 2 | +RMDL: Random Multimodel Deep Learning for Classification |
| 3 | + * Copyright (C) 2018 Kamran Kowsari <kk7nc@virginia.edu> |
| 4 | + * Last Update: 04/25/2018 |
| 5 | + * This file is part of RMDL project, University of Virginia. |
| 6 | + * Free to use, change, share and distribute source code of RMDL |
| 7 | + * Refrenced paper : RMDL: Random Multimodel Deep Learning for Classification |
| 8 | + * Refrenced paper : An Improvement of Data Classification using Random Multimodel Deep Learning (RMDL) |
| 9 | + * Comments and Error: email: kk7nc@virginia.edu |
| 10 | +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' |
| 11 | + |
| 12 | + |
| 13 | +from __future__ import print_function |
| 14 | + |
| 15 | +import os, sys, tarfile |
| 16 | +import numpy as np |
| 17 | +import zipfile |
| 18 | + |
| 19 | +if sys.version_info >= (3, 0, 0): |
| 20 | + import urllib.request as urllib # ugly but works |
| 21 | +else: |
| 22 | + import urllib |
| 23 | + |
| 24 | +print(sys.version_info) |
| 25 | + |
| 26 | +# image shape |
| 27 | + |
| 28 | + |
| 29 | +# path to the directory with the data |
| 30 | +DATA_DIR = '.\Glove' |
| 31 | + |
| 32 | +# url of the binary data |
| 33 | + |
| 34 | + |
| 35 | + |
| 36 | +# path to the binary train file with image data |
| 37 | + |
| 38 | + |
| 39 | +def download_and_extract(data='Wikipedia'): |
| 40 | + """ |
| 41 | + Download and extract the GloVe |
| 42 | + :return: None |
| 43 | + """ |
| 44 | + |
| 45 | + if data=='Wikipedia': |
| 46 | + DATA_URL = 'http://nlp.stanford.edu/data/glove.6B.zip' |
| 47 | + elif data=='Common_Crawl_840B': |
| 48 | + DATA_URL = 'http://nlp.stanford.edu/data/wordvecs/glove.840B.300d.zip' |
| 49 | + elif data=='Common_Crawl_42B': |
| 50 | + DATA_URL = 'http://nlp.stanford.edu/data/wordvecs/glove.42B.300d.zip' |
| 51 | + elif data=='Twitter': |
| 52 | + DATA_URL = 'http://nlp.stanford.edu/data/wordvecs/glove.twitter.27B.zip' |
| 53 | + else: |
| 54 | + print("prameter should be Twitter, Common_Crawl_42B, Common_Crawl_840B, or Wikipedia") |
| 55 | + exit(0) |
| 56 | + |
| 57 | + |
| 58 | + dest_directory = DATA_DIR |
| 59 | + if not os.path.exists(dest_directory): |
| 60 | + os.makedirs(dest_directory) |
| 61 | + filename = DATA_URL.split('/')[-1] |
| 62 | + filepath = os.path.join(dest_directory, filename) |
| 63 | + print(filepath) |
| 64 | + |
| 65 | + path = os.path.abspath(dest_directory) |
| 66 | + if not os.path.exists(filepath): |
| 67 | + def _progress(count, block_size, total_size): |
| 68 | + sys.stdout.write('\rDownloading %s %.2f%%' % (filename, |
| 69 | + float(count * block_size) / float(total_size) * 100.0)) |
| 70 | + sys.stdout.flush() |
| 71 | + |
| 72 | + filepath, _ = urllib.urlretrieve(DATA_URL, filepath)#, reporthook=_progress) |
| 73 | + |
| 74 | + |
| 75 | + zip_ref = zipfile.ZipFile(filepath, 'r') |
| 76 | + zip_ref.extractall(DATA_DIR) |
| 77 | + zip_ref.close() |
| 78 | + return path |
0 commit comments