Skip to content

Commit 4c61493

Browse files
committed
Update prepare-wmt14en2de.sh
1 parent 36ad5fe commit 4c61493

1 file changed

Lines changed: 10 additions & 7 deletions

File tree

PyTorch/Translation/Transformer/examples/translation/prepare-wmt14en2de.sh

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,24 @@ BPEROOT=subword-nmt
3131
BPE_TOKENS=40000
3232

3333
URLS=(
34-
"http://statmt.org/wmt13/training-parallel-europarl-v7.tgz"
35-
"http://statmt.org/wmt13/training-parallel-commoncrawl.tgz"
36-
"http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz"
34+
#"http://statmt.org/wmt13/training-parallel-europarl-v7.tgz"
35+
"https://gluonnlp-numpy-data.s3-accelerate.amazonaws.com/datasets/third_party_mirror/de-en-53bb5408d22977c89284bd755717e6bbb5b12bc5.tgz"
36+
#"http://statmt.org/wmt13/training-parallel-commoncrawl.tgz"
37+
"https://gluonnlp-numpy-data.s3-accelerate.amazonaws.com/datasets/third_party_mirror/training-parallel-commoncrawl-1c0ad85f0ebaf1d543acb009607205f5dae6627d.tgz"
38+
#"http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz"
39+
"https://gluonnlp-numpy-data.s3-accelerate.amazonaws.com/datasets/third_party_mirror/training-parallel-nc-v12-d98afc59e1d753485530b377ff65f1f891d3bced.tgz"
3740
"http://data.statmt.org/wmt17/translation-task/dev.tgz"
3841
"http://statmt.org/wmt14/test-full.tgz"
3942
)
4043
FILES=(
41-
"training-parallel-europarl-v7.tgz"
42-
"training-parallel-commoncrawl.tgz"
43-
"training-parallel-nc-v12.tgz"
44+
"de-en-53bb5408d22977c89284bd755717e6bbb5b12bc5.tgz"
45+
"training-parallel-commoncrawl-1c0ad85f0ebaf1d543acb009607205f5dae6627d.tgz"
46+
"training-parallel-nc-v12-d98afc59e1d753485530b377ff65f1f891d3bced.tgz"
4447
"dev.tgz"
4548
"test-full.tgz"
4649
)
4750
CORPORA=(
48-
"training/europarl-v7.de-en"
51+
"europarl-v7.de-en"
4952
"commoncrawl.de-en"
5053
"training/news-commentary-v12.de-en"
5154
)

0 commit comments

Comments
 (0)