feifeibear
diff --git a/‎PyTorch/LanguageModeling/BERT/.dockerignore‎
Lines changed: 19 additions & 7 deletions b/‎PyTorch/LanguageModeling/BERT/.dockerignore‎
Lines changed: 19 additions & 7 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/.gitignore‎
Lines changed: 3 additions & 6 deletions b/‎PyTorch/LanguageModeling/BERT/.gitignore‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/Dockerfile‎
Lines changed: 14 additions & 16 deletions b/‎PyTorch/LanguageModeling/BERT/Dockerfile‎
Lines changed: 14 additions & 16 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/LICENSE‎
Lines changed: 2 additions & 1 deletion b/‎PyTorch/LanguageModeling/BERT/LICENSE‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎PyTorch/LanguageModeling/BERT/README.md‎
Lines changed: 278 additions & 186 deletions b/‎PyTorch/LanguageModeling/BERT/README.md‎
Lines changed: 278 additions & 186 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/bind_pyt.py‎
Lines changed: 13 additions & 0 deletions b/‎PyTorch/LanguageModeling/BERT/bind_pyt.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/configurations.yml‎
Lines changed: 182 additions & 0 deletions b/‎PyTorch/LanguageModeling/BERT/configurations.yml‎
Lines changed: 182 additions & 0 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/create_pretraining_data.py‎
Lines changed: 3 additions & 2 deletions b/‎PyTorch/LanguageModeling/BERT/create_pretraining_data.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/data/BooksDownloader.py‎
Lines changed: 12 additions & 0 deletions b/‎PyTorch/LanguageModeling/BERT/data/BooksDownloader.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎PyTorch/LanguageModeling/BERT/data/BookscorpusTextFormatting.py‎
Lines changed: 12 additions & 0 deletions b/‎PyTorch/LanguageModeling/BERT/data/BookscorpusTextFormatting.py‎
Lines changed: 12 additions & 0 deletions
@@ -1,8 +1,20 @@
-data/download/
-data/extracted/
-data/formatted_one_article_per_line/
-data/sharded/
-data/hdf5/
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+data/download
+data/extracted
+data/formatted_one_article_per_line
+data/sharded
+data/hdf5
 vocab/
-results/
-checkpoints/*
+results/
@@ -8,14 +8,11 @@ __pycache__/
 # C extensions
 *.so
 
-#Data       
+#Data checkpoints and results       
 data/*/*/   
 data/*/*.zip
-data/*
-
-#checkpoints and results
-checkpoints/*
-results/*
+checkpoints/
+results/
 
 # Distribution / packaging
 .Python
 
@@ -1,24 +1,22 @@
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.07-py3
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.08-py3
 FROM ${FROM_IMAGE_NAME}
 RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract
 
 ENV BERT_PREP_WORKING_DIR /workspace/bert/data
 
-WORKDIR /opt
-RUN rm -rf /opt/pytorch/apex ; \
-  git clone https://github.com/NVIDIA/apex.git pytorch/apex ; \
-  cd pytorch/apex ; \
-  pip uninstall --yes apex; \
-  git checkout 880ab925bce9f817a93988b021e12db5f67f7787;  \
-  git pull; \
-  pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
-
-#WORKDIR /opt
-#RUN cd pytorch/apex \
-# && git fetch origin pull/334/head:multi_tensor_lamb_optimizer \
-# && git checkout multi_tensor_lamb_optimizer \
-# && python setup.py develop --cuda_ext --cpp_ext
-
 WORKDIR /workspace
 RUN git clone https://github.com/attardi/wikiextractor.git
 RUN git clone https://github.com/soskek/bookcorpus.git
 
@@ -1,4 +1,3 @@
-
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/
@@ -176,6 +175,8 @@
 
    END OF TERMS AND CONDITIONS
 
+   Copyright 2019 NVIDIA CORPORATION. All rights reserved.
+
    APPENDIX: How to apply the Apache License to your work.
 
       To apply the Apache License to your work, attach the following
 
@@ -1,3 +1,16 @@
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import subprocess
 import os
 
@@ -0,0 +1,182 @@
+# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#1 DGX1 phase1
+bert--DGX1:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX1
+  variables:
+    <<: *DGX1_VARS
+    NNODES: "1"
+    BATCHSIZE: "8192"
+    LR: "6e-3"
+    GRADIENT_STEPS: "512"
+    PHASE: "1"
+
+#4 DGX1 phase1
+bert--DGX1_4x8x16x128:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX1
+  variables:
+    <<: *DGX1_VARS
+    NNODES: "4"
+    BATCHSIZE: "2048"
+    LR: "6e-3"
+    GRADIENT_STEPS: "128"
+    PHASE: "1"
+
+#16 DGX1 phase1
+bert--DGX1_16x8x16x32:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX1
+  variables:
+    <<: *DGX1_VARS
+    NNODES: "16"
+    BATCHSIZE: "512"
+    LR: "6e-3"
+    GRADIENT_STEPS: "32"
+    PHASE: "1"
+
+#1 DGX2 phase1
+bert--DGX2:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "1"
+    BATCHSIZE: "4096"
+    LR: "6e-3"
+    GRADIENT_STEPS: "64"
+    PHASE: "1"
+
+#4 DGX2 phase1
+bert--DGX2_4x16x64x16:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "4"
+    BATCHSIZE: "1024"
+    LR: "6e-3"
+    GRADIENT_STEPS: "16"
+    PHASE: "1"
+
+#16 DGX2 phase1
+bert--DGX2_16x16x64x4:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "16"
+    BATCHSIZE: "256"
+    LR: "6e-3"
+    GRADIENT_STEPS: "4"
+    PHASE: "1"
+
+#64 DGX2 phase1
+bert--DGX2_64x16x64:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "64"
+    BATCHSIZE: "64"
+    LR: "6e-3"
+    GRADIENT_STEPS: "1"
+    PHASE: "1"
+
+#1 DGX1 phase2
+bert--DGX1_1x8x4x1024:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX1
+  variables:
+    <<: *DGX1_VARS
+    NNODES: "1"
+    BATCHSIZE: "4096"
+    LR: "4e-3"
+    GRADIENT_STEPS: "1024"
+    PHASE: "2"
+
+#4 DGX1 phase2
+bert--DGX1_4x8x4x256:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX1
+  variables:
+    <<: *DGX1_VARS
+    NNODES: "4"
+    BATCHSIZE: "1024"
+    LR: "4e-3"
+    GRADIENT_STEPS: "256"
+    PHASE: "2"
+
+#16 DGX1 phase2
+bert--DGX1_16x8x4x64:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX1
+  variables:
+    <<: *DGX1_VARS
+    NNODES: "16"
+    BATCHSIZE: "256"
+    LR: "4e-3"
+    GRADIENT_STEPS: "64"
+    PHASE: "2"
+
+#1 DGX2 phase2
+bert--DGX2_1x16x8x256:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "1"
+    BATCHSIZE: "2048"
+    LR: "4e-3"
+    GRADIENT_STEPS: "256"
+    PHASE: "2"
+
+#4 DGX2 phase2
+bert--DGX2_4x16x8x64:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "4"
+    BATCHSIZE: "512"
+    LR: "4e-3"
+    GRADIENT_STEPS: "64"
+    PHASE: "2"
+
+#16 DGX2 phase2
+bert--DGX2_16x16x8x16:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "16"
+    BATCHSIZE: "128"
+    LR: "4e-3"
+    GRADIENT_STEPS: "16"
+    PHASE: "2"
+
+#64 DGX2 phase2
+bert--DGX2_64x16x8x4:
+  <<: *BERT_ON_CLUSTER
+  <<: *DGX2
+  variables:
+    <<: *DGX2_VARS
+    NNODES: "64"
+    BATCHSIZE: "32"
+    LR: "4e-3"
+    GRADIENT_STEPS: "4"
+    PHASE: "2"
+
@@ -1,6 +1,6 @@
 # coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors.
-#
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 """Create masked LM/next sentence masked_lm TF examples for BERT."""
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 
@@ -1,4 +1,16 @@
 # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import subprocess
 
 class BooksDownloader:
 
@@ -1,4 +1,16 @@
 # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import glob
 import os