Skip to content

Commit e372b7b

Browse files
jan-goldanv-kkudrynski
authored andcommitted
Add logging of units to dllogger metrics
1 parent 048370a commit e372b7b

57 files changed

Lines changed: 382 additions & 77 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DGLPyTorch/DrugDiscovery/SE3Transformer/se3_transformer/runtime/callbacks.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def __init__(self, logger: Optional[Logger] = None):
6464
self.logger = logger
6565
self.scheduler = None
6666

67+
self.logger.log_metadata('learning rate', {'unit': None})
68+
6769
@abstractmethod
6870
def get_scheduler(self, optimizer, args):
6971
pass
@@ -93,6 +95,9 @@ def __init__(self, logger, targets_std, prefix=''):
9395
self.prefix = prefix
9496
self.best_mae = float('inf')
9597

98+
self.logger.log_metadata(f'{self.prefix} MAE', {'unit': None})
99+
self.logger.log_metadata(f'{self.prefix} best MAE', {'unit': None})
100+
96101
def on_validation_step(self, input, target, pred):
97102
self.mae(pred.detach(), target.detach())
98103

@@ -126,6 +131,12 @@ def __init__(self, logger, batch_size: int, warmup_epochs: int = 1, mode: str =
126131
self.mode = mode
127132
self.logger = logger
128133

134+
logger.log_metadata(f"throughput_{self.mode}", {'unit': 'molecules/s'})
135+
logger.log_metadata(f"total_time_{self.mode}", {'unit': 's'})
136+
logger.log_metadata(f"latency_{self.mode}_mean", {'unit': 's'})
137+
for level in [90, 95, 99]:
138+
logger.log_metadata(f"latency_{self.mode}_{level}", {'unit': 's'})
139+
129140
def on_batch_start(self):
130141
if self.epoch >= self.warmup_epochs:
131142
self.timestamps.append(time.time() * 1000.0)

DGLPyTorch/DrugDiscovery/SE3Transformer/se3_transformer/runtime/loggers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ class Logger(ABC):
4040
def log_hyperparams(self, params):
4141
pass
4242

43+
@rank_zero_only
44+
@abstractmethod
45+
def log_metadata(self, metric, metadata):
46+
pass
47+
4348
@rank_zero_only
4449
@abstractmethod
4550
def log_metrics(self, metrics, step=None):
@@ -81,6 +86,11 @@ def log_hyperparams(self, params):
8186
for logger in self.loggers:
8287
logger.log_hyperparams(params)
8388

89+
@rank_zero_only
90+
def log_metadata(self, metric, metadata):
91+
for logger in self.loggers:
92+
logger.log_metadata(metric, metadata)
93+
8494

8595
class DLLogger(Logger):
8696
def __init__(self, save_dir: pathlib.Path, filename: str):
@@ -95,6 +105,10 @@ def log_hyperparams(self, params):
95105
params = self._sanitize_params(params)
96106
dllogger.log(step="PARAMETER", data=params)
97107

108+
@rank_zero_only
109+
def log_metadata(self, metric, metadata):
110+
dllogger.metadata(metric, metadata)
111+
98112
@rank_zero_only
99113
def log_metrics(self, metrics, step=None):
100114
if step is None:
@@ -126,6 +140,10 @@ def log_hyperparams(self, params: Dict[str, Any]) -> None:
126140
params = self._sanitize_params(params)
127141
self.experiment.config.update(params, allow_val_change=True)
128142

143+
@rank_zero_only
144+
def log_metadata(self, metric, metadata):
145+
pass
146+
129147
@rank_zero_only
130148
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
131149
if step is not None:

MxNet/Classification/RN50v1.5/log_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,11 @@ def setup_logging(args):
3131
])
3232
else:
3333
dllogger.init([])
34+
35+
dllogger.metadata("val.accuracy", {"unit": None})
36+
dllogger.metadata("val.top_k_accuracy_5", {"unit": None})
37+
dllogger.metadata("train.ips", {"unit": "images/s"})
38+
dllogger.metadata("val.ips", {"unit": "images/s"})
39+
dllogger.metadata("val.latency_50", {"unit": "s"})
40+
dllogger.metadata("val.latency_90", {"unit": "s"})
41+
dllogger.metadata("val.latency_avg", {"unit": "s"})

PyTorch/Classification/ConvNets/image_classification/logger.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,10 @@ def epoch_generator_wrapper(self, gen):
338338

339339
class Metrics:
340340
ACC_METADATA = {"unit": "%", "format": ":.2f"}
341-
IPS_METADATA = {"unit": "img/s", "format": ":.2f"}
341+
IPS_METADATA = {"unit": "images/s", "format": ":.2f"}
342342
TIME_METADATA = {"unit": "s", "format": ":.5f"}
343-
LOSS_METADATA = {"format": ":.5f"}
344-
LR_METADATA = {"format": ":.5f"}
343+
LOSS_METADATA = {"unit": None, "format": ":.5f"}
344+
LR_METADATA = {"unit": None, "format": ":.5f"}
345345

346346
def __init__(self, logger):
347347
self.logger = logger

PyTorch/Classification/GPUNet/train.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,11 @@ def main():
11001100
)
11011101
else:
11021102
dllogger.init(backends=[])
1103+
1104+
dllogger.metadata("top1", {"unit": None})
1105+
dllogger.metadata("top5", {"unit": None})
1106+
dllogger.metadata("average_ips", {"unit": "images/s"})
1107+
11031108
data_config = resolve_data_config(
11041109
vars(args), model=model, verbose=args.local_rank == 0
11051110
)

PyTorch/Detection/Efficientdet/train.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,13 @@ def main():
275275
assert args.rank >= 0
276276

277277
setup_dllogger(args.rank, filename=args.dllogger_file)
278+
dllogger.metadata('eval_batch_time', {'unit': 's'})
279+
dllogger.metadata('train_batch_time', {'unit': 's'})
280+
dllogger.metadata('eval_throughput', {'unit': 'images/s'})
281+
dllogger.metadata('train_throughout', {'unit': 'images/s'})
282+
dllogger.metadata('eval_loss', {'unit': None})
283+
dllogger.metadata('train_loss', {'unit': None})
284+
dllogger.metadata('map', {'unit': None})
278285

279286
if args.distributed:
280287
logging.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'

PyTorch/Detection/Efficientdet/validate.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ def add_bool_arg(parser, name, default=False, help=''): # FIXME move to utils
114114

115115
def validate(args):
116116
setup_dllogger(0, filename=args.dllogger_file)
117+
dllogger.metadata('total_inference_time', {'unit': 's'})
118+
dllogger.metadata('inference_throughput', {'unit': 'images/s'})
119+
dllogger.metadata('inference_time', {'unit': 's'})
120+
dllogger.metadata('map', {'unit': None})
121+
dllogger.metadata('total_eval_time', {'unit': 's'})
117122

118123
if args.checkpoint != '':
119124
args.pretrained = True

PyTorch/Detection/SSD/ssd/logger.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def __init__(self, name, json_output=None, log_interval=20):
6666
backends.append(DLLogger.JSONStreamBackend(DLLogger.Verbosity.VERBOSE, json_output))
6767

6868
DLLogger.init(backends)
69+
DLLogger.metadata("mAP", {"unit": None})
6970

7071
self.epoch = 0
7172
self.train_iter = 0
@@ -137,6 +138,11 @@ def __init__(self, *args, **kwargs):
137138
super().__init__(*args, **kwargs)
138139
self.images_per_ses = BenchmarkMeter(self.name)
139140

141+
DLLogger.metadata("avg_img/sec", {"unit": "images/s"})
142+
DLLogger.metadata("med_img/sec", {"unit": "images/s"})
143+
DLLogger.metadata("min_img/sec", {"unit": "images/s"})
144+
DLLogger.metadata("max_img/sec", {"unit": "images/s"})
145+
140146
def update(self, bs, time):
141147
self.images_per_ses.update(bs, time)
142148

PyTorch/Forecasting/TFT/log_helper.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -101,23 +101,26 @@ def step_format(step):
101101
container_setup_info = {**get_framework_env_vars(), **get_system_info()}
102102
dllogger.log(step='ENVIRONMENT', data=container_setup_info, verbosity=0)
103103

104-
dllogger.metadata('loss', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
105-
dllogger.metadata('P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
106-
dllogger.metadata('P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
107-
dllogger.metadata('P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
108-
dllogger.metadata('items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN', 'format': ':1f'})
109-
dllogger.metadata('val_loss', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format':':5f'})
110-
dllogger.metadata('val_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'})
111-
dllogger.metadata('val_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'})
112-
dllogger.metadata('val_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'})
113-
dllogger.metadata('val_items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'VAL', 'format': ':1f'})
114-
dllogger.metadata('test_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
115-
dllogger.metadata('test_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
116-
dllogger.metadata('test_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
117-
dllogger.metadata('throughput', {'GOAL': 'MAXIMIZE', 'STAGE': 'TEST', 'format': ':1f'})
118-
dllogger.metadata('latency_p90', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
119-
dllogger.metadata('latency_p95', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
120-
dllogger.metadata('latency_p99', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
104+
dllogger.metadata('loss', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f', 'unit': None})
105+
dllogger.metadata('P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f', 'unit': None})
106+
dllogger.metadata('P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f', 'unit': None})
107+
dllogger.metadata('P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f', 'unit': None})
108+
dllogger.metadata('items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN', 'format': ':1f', 'unit': 'items/s'})
109+
dllogger.metadata('val_loss', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format':':5f', 'unit': None})
110+
dllogger.metadata('val_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f', 'unit': None})
111+
dllogger.metadata('val_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f', 'unit': None})
112+
dllogger.metadata('val_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f', 'unit': None})
113+
dllogger.metadata('val_items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'VAL', 'format': ':1f', 'unit': 'items/s'})
114+
dllogger.metadata('test_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': None})
115+
dllogger.metadata('test_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': None})
116+
dllogger.metadata('test_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': None})
117+
dllogger.metadata('sum', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': None})
118+
dllogger.metadata('throughput', {'GOAL': 'MAXIMIZE', 'STAGE': 'TEST', 'format': ':1f', 'unit': 'items/s'})
119+
dllogger.metadata('latency_avg', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': 's'})
120+
dllogger.metadata('latency_p90', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': 's'})
121+
dllogger.metadata('latency_p95', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': 's'})
122+
dllogger.metadata('latency_p99', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f', 'unit': 's'})
123+
dllogger.metadata('average_ips', {'GOAL': 'MAXIMIZE', 'STAGE': 'TEST', 'format': ':1f', 'unit': 'items/s'})
121124

122125

123126
def get_framework_env_vars():

PyTorch/LanguageModeling/BART/finetune.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ def main(args, model=None) -> SummarizationModule:
560560
else:
561561
dllogger.init(backends=[])
562562

563+
dllogger.metadata("avg_train_time", {"unit": "s"})
564+
dllogger.metadata("avg_train_throughput", {"unit": "tokens/s"})
565+
563566
main(args)
564567

565568
dllogger.flush()

0 commit comments

Comments
 (0)