forked from tusen-ai/simpledet
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcallback.py
More file actions
91 lines (80 loc) · 3.55 KB
/
callback.py
File metadata and controls
91 lines (80 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import time
import logging
import mxnet as mx
class Speedometer(object):
def __init__(self, batch_size, frequent=50):
self.batch_size = batch_size
self.frequent = frequent
self.init = False
self.tic = 0
self.last_count = 0
def __call__(self, param):
"""Callback to Show speed."""
count = param.nbatch
if self.last_count > count:
self.init = False
self.last_count = count
if self.init:
if count % self.frequent == 0:
speed = self.frequent * self.batch_size / (time.time() - self.tic)
if param.eval_metric is not None:
name, value = param.eval_metric.get()
s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed)
for n, v in zip(name, value):
s += "%s=%f,\t" % (n, v)
logging.info(s)
else:
logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec",
param.epoch, count, speed)
self.tic = time.time()
else:
self.init = True
self.tic = time.time()
class DetailSpeedometer(object):
def __init__(self, batch_size, frequent=50):
self.batch_size = batch_size
self.frequent = frequent
self.init = False
self.tic = 0
self.last_count = 0
def __call__(self, param):
"""Callback to Show speed."""
count = param.nbatch
rank = param.rank
total_iter = param.total_iter
if self.last_count > count:
self.init = False
self.last_count = count
if self.init:
if count % self.frequent == 0:
speed = self.frequent * self.batch_size / (time.time() - self.tic)
if param.eval_metric is not None:
name, value = param.eval_metric.get()
s = "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\t" \
"data:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec\tTrain-" % (
param.epoch, rank, count, total_iter,
param.cur_batch_time, param.avg_batch_time,
param.cur_kvstore_sync_time, param.avg_kvstore_sync_time,
param.cur_data_time, param.avg_data_time,
param.cur_iter_total_time, param.avg_iter_total_time,
speed)
for n, v in zip(name, value):
s += "%s=%f,\t" % (n, v)
logging.info(s)
else:
logging.info(
"Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\tdata:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec",
param.epoch, rank, count, total_iter,
param.cur_batch_time, param.avg_batch_time,
param.cur_kvstore_sync_time, param.avg_kvstore_sync_time,
param.cur_data_time, param.avg_data_time,
param.cur_iter_total_time, param.avg_iter_total_time,
speed)
self.tic = time.time()
else:
self.init = True
self.tic = time.time()
def do_checkpoint(prefix):
def _callback(iter_no, sym, arg, aux):
mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
return _callback