We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8cc635f commit 7121e21Copy full SHA for 7121e21
1 file changed
PyTorch/LanguageModeling/BERT/run_pretraining.py
@@ -527,7 +527,7 @@ def main():
527
average_loss /= torch.distributed.get_world_size()
528
torch.distributed.all_reduce(average_loss)
529
if is_main_process():
530
- logger.info("Total Steps:{} Final Loss = {}".format(training_steps, average_loss.item()))
+ logger.info("Total Steps:{} Final Loss = {}".format(training_steps / args.gradient_accumulation_steps, average_loss.item()))
531
elif training_steps % (args.log_freq * args.gradient_accumulation_steps) == 0:
532
533
print("Step:{} Average Loss = {} Step Loss = {} LR {}".format(global_step, average_loss / (
0 commit comments