@@ -35,7 +35,24 @@ def before_run(self, run_context):
3535 self ._timestamps .append (time .time ())
3636
3737 def end (self , session ):
38- deltas = [self ._timestamps [i + 1 ] - self ._timestamps [i ] for i in range (len (self ._timestamps ) - 1 )]
39- self ._logger .log (step = (), data = {
40- 'average_throughput_train' if self ._training else 'average_throughput_test' : self ._global_batch_size / np .mean (deltas )})
38+ deltas = np .array ([self ._timestamps [i + 1 ] - self ._timestamps [i ] for i in range (len (self ._timestamps ) - 1 )])
39+ stats = process_performance_stats (np .array (deltas ),
40+ self ._global_batch_size )
41+
42+ self ._logger .log (step = (), data = {metric : value for (metric , value ) in stats })
4143 self ._logger .flush ()
44+
45+
46+ def process_performance_stats (timestamps , batch_size ):
47+ timestamps_ms = 1000 * timestamps
48+ latency_ms = timestamps_ms .mean ()
49+ std = timestamps_ms .std ()
50+ n = np .sqrt (len (timestamps_ms ))
51+ throughput_imgps = (1000.0 * batch_size / timestamps_ms ).mean ()
52+
53+ stats = [("Throughput Avg" , str (throughput_imgps )),
54+ ('Latency Avg:' , str (latency_ms ))]
55+ for ci , lvl in zip (["90%:" , "95%:" , "99%:" ],
56+ [1.645 , 1.960 , 2.576 ]):
57+ stats .append (("Latency_" + ci , str (latency_ms + lvl * std / n )))
58+ return stats
0 commit comments