Merge pull request NVIDIA#286 from eric-haibin-lin/patch-1

nvpstr · web-flow · commit 55c5f99f5993 · 2019-11-19T10:07:01.000+01:00
Fix a typo
diff --git a/TensorFlow/LanguageModeling/BERT/optimization.py b/TensorFlow/LanguageModeling/BERT/optimization.py
@@ -356,7 +356,7 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None,
       # the correct way of using L2 regularization/weight decay with Adam,
       # since that will interact with the m and v parameters in strange ways.
       #
-      # Instead we want ot decay the weights in a manner that doesn't interact
+      # Instead we want to decay the weights in a manner that doesn't interact
       # with the m/v parameters. This is equivalent to adding the square
       # of the weights to the loss with plain (non-momentum) SGD.
       if self._do_use_weight_decay(param_name):

Original file line number	Diff line number	Diff line change
`@@ -356,7 +356,7 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None,`
`356`	`356`	`# the correct way of using L2 regularization/weight decay with Adam,`
`357`	`357`	`# since that will interact with the m and v parameters in strange ways.`
`358`	`358`	`#`
`359`		`- # Instead we want ot decay the weights in a manner that doesn't interact`
	`359`	`+ # Instead we want to decay the weights in a manner that doesn't interact`
`360`	`360`	`# with the m/v parameters. This is equivalent to adding the square`
`361`	`361`	`# of the weights to the loss with plain (non-momentum) SGD.`
`362`	`362`	`if self._do_use_weight_decay(param_name):`