[Convnets/TF] Added barrier at the end of multiprocess run to allow step synchronization

hXl3s · nv-kkudrynski · commit e1af187301ab · 2022-02-02T05:03:43.000-08:00
diff --git a/TensorFlow/Classification/ConvNets/main.py b/TensorFlow/Classification/ConvNets/main.py
@@ -15,20 +15,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from utils.cmdline_helper import parse_cmdline
+from model.resnet import model_architectures
+from runtime import Runner
+import dllogger
+from utils import hvd_wrapper as hvd
+import tensorflow as tf
 import os
 
 import warnings
 warnings.simplefilter("ignore")
 
-import tensorflow as tf
-
-from utils import hvd_wrapper as hvd
-import dllogger
-
-from runtime import Runner
-from model.resnet import model_architectures
-
-from utils.cmdline_helper import parse_cmdline
 
 if __name__ == "__main__":
 
@@ -42,7 +39,8 @@
         os.makedirs(FLAGS.results_dir, exist_ok=True)
 
         dllogger.init(backends=[
-            dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=log_path),
+            dllogger.JSONStreamBackend(
+                verbosity=dllogger.Verbosity.VERBOSE, filename=log_path),
             dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)
         ])
     else:
@@ -100,7 +98,8 @@
     if FLAGS.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:
 
         if FLAGS.mode == 'inference_benchmark' and hvd.size() > 1:
-            raise NotImplementedError("Only single GPU inference is implemented.")
+            raise NotImplementedError(
+                "Only single GPU inference is implemented.")
 
         elif hvd.rank() == 0:
             runner.evaluate(iter_unit=FLAGS.iter_unit if FLAGS.mode != "train_and_evaluate" else "epoch",
@@ -114,6 +113,10 @@
                             symmetric=FLAGS.symmetric,
                             use_final_conv=FLAGS.use_final_conv,
                             use_qdq=FLAGS.use_qdq)
+        if hvd.size() > 1:
+            # Wait for all processes to finish
+            from mpi4py import MPI
+            MPI.COMM_WORLD.Barrier()
 
     if FLAGS.mode == 'predict':
         if FLAGS.to_predict is None:
@@ -123,7 +126,8 @@
             raise ValueError("Only prediction on single images is supported!")
 
         if hvd.size() > 1:
-            raise NotImplementedError("Only single GPU inference is implemented.")
+            raise NotImplementedError(
+                "Only single GPU inference is implemented.")
 
         else:
             runner.predict(FLAGS.to_predict,
diff --git a/TensorFlow/Classification/ConvNets/requirements.txt b/TensorFlow/Classification/ConvNets/requirements.txt
@@ -1 +1,2 @@
 git+https://github.com/NVIDIA/dllogger#egg=dllogger 
+mpi4py
diff --git a/TensorFlow/Classification/ConvNets/resnet50v1.5/README.md b/TensorFlow/Classification/ConvNets/resnet50v1.5/README.md
@@ -856,6 +856,7 @@ on NVIDIA T4 with (1x T4 16G) GPU.
 8. August 2020
   * Updated command line argument names
   * Added support for syntetic dataset with different image size
-
+9. January, 2022
+  * Added barrier at the end of multiprocess run
 ### Known issues
 Performance without XLA enabled is low due to BN + ReLU fusion bug.
diff --git a/TensorFlow/Classification/ConvNets/resnext101-32x4d/README.md b/TensorFlow/Classification/ConvNets/resnext101-32x4d/README.md
@@ -796,6 +796,7 @@ June 2020
 August 2020
    - Updated command line argument names
    - Added support for syntetic dataset with different image size
-
+January 2022
+   - Added barrier at the end of multiprocess run
 ### Known issues
 Performance without XLA enabled is low due to BN + ReLU fusion bug.
diff --git a/TensorFlow/Classification/ConvNets/se-resnext101-32x4d/README.md b/TensorFlow/Classification/ConvNets/se-resnext101-32x4d/README.md
@@ -789,6 +789,8 @@ April 2020
 August 2020
    - Updated command line argument names
    - Added support for syntetic dataset with different image size
+January 2022
+   - Added barrier at the end of multiprocess run
 
 ### Known issues
 Performance without XLA enabled is low due to BN + ReLU fusion bug.

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`git+https://github.com/NVIDIA/dllogger#egg=dllogger`
	`2`	`+mpi4py`