[EfficientNet/TF2] Fix inference numbers and calculations

subhankar-ghosh · web-flow · commit ec2bef8444c4 · 2021-04-22T13:52:13.000+02:00
Co-authored-by: Subhankar Ghosh &lt;subhankarg@nvidia.com&gt;
diff --git a/TensorFlow2/Classification/ConvNets/efficientnet/README.md b/TensorFlow2/Classification/ConvNets/efficientnet/README.md
@@ -577,20 +577,20 @@ FP16 Inference Latency
 | Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) |Latency 95% (ms) |Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |      1      |    224x224    | 111 |  8.97   | 8.88  | 8.92 | 8.96 |
-|      2      |    224x224      | 233 | 4.28 | 4.22 | 4.25 | 4.27 |
-|      4      |    224x224      | 432 | 2.31 | 2.28 | 2.29 | 2.30 |
-|      8      |    224x224      | 771 | 1.29 | 1.27 | 1.28 | 1.28 |
-|     1024       |    224x224     | 10269 |  0.10   |  0.10   |   0.10  | 0.10 |
+|      2      |    224x224      | 233 | 8.56 | 8.44 | 8.5 | 8.54 |
+|      4      |    224x224      | 432 | 9.24 | 9.12 | 9.16 | 9.2 |
+|      8      |    224x224      | 771 | 10.32 | 10.16 | 10.24 | 10.24 |
+|     1024       |    224x224     | 10269 |  102.4   |  102.4   |   102.4  | 102.4 |
 
 TF32 Inference Latency
 
 | Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) | Latency 95% (ms) | Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |     1       |      224x224   |   101     |  9.87   | 9.78    |  9.82   | 9.86    |
-|     2       |      224x224    | 204 |   4.89  |  4.83   |  4.85   |   4.88  |
-|     4       |      224x224    | 381 |  2.62   |  2.59   |  2.60   |  2.61   |
-|     8      |      224x224   | 584 |  1.71   |  1.69   |  1.70   | 1.71    |
-|      512      |   224x224      | 5480 | 0.18 | 0.18 | 0.18 | 0.18 |
+|     2       |      224x224    | 204 |   9.78  |  9.66   |  9.7   |   9.76  |
+|     4       |      224x224    | 381 |  10.48   |  10.36   |  10.4   |  10.44   |
+|     8      |      224x224   | 584 |  13.68   |  13.52   |  13.6   | 13.68   |
+|      512      |   224x224      | 5480 | 92.16 | 92.16 | 92.16 | 92.16 |
 
 To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
 
@@ -602,23 +602,22 @@ Our results were obtained by running the `inference-script-name.sh` inferencing
 
 FP16 Inference Latency
 
-| Batch size | Resolution | Throughput Avg | Latency Avg | Latency 90% |Latency 95% |Latency 99% |
+| Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) |Latency 95% (ms) |Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |     1      |    224x224     | 98.8 | 10.12 | 10.03 | 10.06 | 10.10 |
-|     2      |    224x224      | 199.3 | 5.01 | 4.95 | 4.97 | 5.00 |
-|     4      |    224x224      | 382.5 | 2.61 | 2.57 | 2.59 | 2.60 |
-|     8      |    224x224      | 681.2 | 1.46 | 1.44 | 1.45 | 1.46 |
-|      256      |   224x224      | 5271 | 0.19 | 0.18 | 0.18 | 0.19 |
-
+|     2      |    224x224      | 199.3 | 10.02 | 9.9 | 9.94 | 10.0 |
+|     4      |    224x224      | 382.5 | 10.44 | 10.28 | 10.36 | 10.4 |
+|     8      |    224x224      | 681.2 | 11.68 | 11.52 | 11.6 | 11.68 |
+|      256      |   224x224      | 5271 | 48.64 | 46.08 | 46.08 | 48.64 |
 FP32 Inference Latency
 
-| Batch size | Resolution | Throughput Avg | Latency Avg | Latency 90% | Latency 95% | Latency 99% |
+| Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) | Latency 95% (ms) | Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |      1      |    224x224     | 68.39 | 14.62 | 14.45 | 14.51 | 14.56 |
-|      2      |    224x224      | 125.62 | 7.96 | 7.89 | 7.91 | 7.94 |
-|      4      |    224x224      | 216.41 | 4.62 | 4.56 | 4.60 | 4.61 |
-|      8      |    224x224      | 401.60 | 2.49 | 2.45 | 2.47 | 2.48 |
-|     128      |   224x224       | 2713 | 0.37 | 0.36 | 0.36 | 0.37 |
+|      2      |    224x224      | 125.62 | 15.92 | 15.78 | 15.82 | 15.82 |
+|      4      |    224x224      | 216.41 | 18.48 | 18.24 | 18.4 | 18.44 |
+|      8      |    224x224      | 401.60 | 19.92 | 19.6 | 19.76 | 19.84 |
+|     128      |   224x224       | 2713 | 47.36 | 46.08 | 46.08 | 47.36 |
 
 To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
 
@@ -634,19 +633,19 @@ FP16 Inference Latency
 | Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) |Latency 95% (ms) |Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |      1      |    380x380    | 57.54 |  17.37   | 17.24  | 17.30 | 17.35 |
-|      2      |    380x380      | 112.06 | 8.92 | 8.85 | 8.88 | 8.91 |
-|      4      |    380x380      | 219.71 | 4.55 | 4.52 | 4.53 | 4.54 |
-|      8      |    380x380      | 383.39 | 2.60 | 2.58 | 2.59 | 2.60 |
-|     128       |    380x380     | 1470 |  0.68   |  0.67   |  0.67  | 0.68 |
+|      2      |    380x380      | 112.06 | 17.84 | 17.7 | 17.76 | 17.82 |
+|      4      |    380x380      | 219.71 | 18.2 | 18.08 | 18.12 | 18.16 |
+|      8      |    380x380      | 383.39 | 20.8 | 20.64 | 20.72 | 20.8 |
+|     128       |    380x380     | 1470 |  87.04   |  85.76   |  85.76  | 87.04 |
 
 TF32 Inference Latency
 | Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) | Latency 95% (ms) | Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |     1       |      380x380   |   52.68     |  18.98   | 18.86    |  18.91   | 18.96    |
-|     2       |      380x380    | 95.32 |   10.49  |  10.42   |  10.45   |  10.48  |
-|     4       |      380x380    | 182.14 |  5.49   | 5.46   |  5.47   |  5.48   |
-|     8      |      380x380   | 325.72 |  3.07   |  3.05   |  3.05   | 3.06    |
-|      64      |   380x380      | 694 | 1.43 | 1.42 | 1.43 | 1.43 |
+|     2       |      380x380    | 95.32 |   20.98  |  20.84   |  20.9   |  20.96  |
+|     4       |      380x380    | 182.14 |  21.96  | 21.84   |  21.88   |  21.92   |
+|     8      |      380x380   | 325.72 |  24.56   |  24.4   |  24.4   | 24.48  |
+|      64      |   380x380      | 694 | 91.52 | 90.88 | 91.52 | 91.52 |
 
 To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
 
@@ -658,24 +657,24 @@ Our results were obtained by running the `inference-script-name.sh` inferencing
 
 FP16 Inference Latency
 
-| Batch size | Resolution | Throughput Avg | Latency Avg | Latency 90% | Latency 95% | Latency 99% |
+| Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) | Latency 95% (ms) | Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |     1      |    380x380     | 54.27 | 18.35 | 18.20 | 18.25 | 18.32 |
-|     2      |    380x380      | 104.27 | 9.59 | 9.51 | 9.54 | 9.58 |
-|     4      |    380x380      | 182.61 | 5.47 | 5.41 | 5.43 | 5.46 |
-|     8      |    380x380      | 234.06 | 4.27 | 4.24 | 4.25 | 4.26 |
-|      64      |   380x380      | 782.47 | 1.28 | 1.25 | 1.26 | 1.27 |
+|     2      |    380x380      | 104.27 | 19.18 | 19.02 | 19.08 | 19.16 |
+|     4      |    380x380      | 182.61 | 21.88 | 21.64 | 21.72 | 21.84 |
+|     8      |    380x380      | 234.06 | 34.16 | 33.92 | 34.0 | 34.08 |
+|      64      |   380x380      | 782.47 | 81.92 | 80.0 | 80.64 | 81.28 |
 
 
 FP32 Inference Latency
 
-| Batch size | Resolution | Throughput Avg | Latency Avg | Latency 90% |Latency 95% |Latency 99% |
+| Batch size | Resolution | Throughput Avg | Latency Avg (ms) | Latency 90% (ms) |Latency 95% (ms) |Latency 99% (ms) |
 |------------|-----------------|-----|-----|-----|-----|-----|
 |      1      |    380x380     | 30.48 | 32.80 | 32.86 | 31.83 | 32.60 |
-|      2      |    380x380      | 58.59 | 17.06 | 15.96 | 16.51 | 16.95 |
-|      4      |    380x380      | 111.35 | 8.98 | 8.75 | 8.78 | 8.92 |
-|      8      |    380x380      | 199.00 | 5.03 | 4.84 | 4.88 | 5.00 |
-|     32      |   380x380       | 307.04  | 3.25 | 3.25 | 3.25 | 3.25 |
+|      2      |    380x380      | 58.59 | 34.12 | 31.92 | 33.02 | 33.9 |
+|      4      |    380x380      | 111.35 | 35.92 | 35.0 | 35.12 | 35.68 |
+|      8      |    380x380      | 199.00 | 40.24 | 38.72 | 39.04 | 40.0 |
+|     32      |   380x380       | 307.04  | 104.0 | 104.0 | 104.0 | 104.0 |
 
 To achieve these same results, follow the steps in the [Quick Start Guide](#quick-start-guide).
 
diff --git a/TensorFlow2/Classification/ConvNets/efficientnet/runtime/runner_utils.py b/TensorFlow2/Classification/ConvNets/efficientnet/runtime/runner_utils.py
@@ -219,12 +219,14 @@ def build_stats(history, validation_output, train_callbacks, eval_callback, logg
         stats['avg_time_per_exp_eval'] = 1000./stats['avg_exp_per_second_eval']
         batch_time = eval_callback.batch_time
         batch_time.sort()
+        latency_pct_per_batch = sum( batch_time[:-1] ) / int( len(batch_time) - 1 )
+        stats['latency_pct'] = 1000.0 * latency_pct_per_batch
         latency_90pct_per_batch = sum( batch_time[:int( 0.9 * len(batch_time) )] ) / int( 0.9 * len(batch_time) )
-        stats['latency_90pct'] = 1000.0 * latency_90pct_per_batch / eval_callback.batch_size
+        stats['latency_90pct'] = 1000.0 * latency_90pct_per_batch
         latency_95pct_per_batch = sum( batch_time[:int( 0.95 * len(batch_time) )] ) / int( 0.95 * len(batch_time) )
-        stats['latency_95pct'] = 1000.0 * latency_95pct_per_batch / eval_callback.batch_size
+        stats['latency_95pct'] = 1000.0 * latency_95pct_per_batch
         latency_99pct_per_batch = sum( batch_time[:int( 0.99 * len(batch_time) )] ) / int( 0.99 * len(batch_time) )
-        stats['latency_99pct'] = 1000.0 * latency_99pct_per_batch / eval_callback.batch_size
+        stats['latency_99pct'] = 1000.0 * latency_99pct_per_batch
 
     if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
         logger.log(step=(), data=stats)