Skip to content

Commit b03ef02

Browse files
committed
trunk,nnet: changing nnet prototype, making the bottleneck networks more stable by reducing learninig rates of weights around bottleneck.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4076 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
1 parent d65e118 commit b03ef02

1 file changed

Lines changed: 10 additions & 8 deletions

File tree

egs/wsj/s5/utils/nnet/make_nnet_proto.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,20 @@ def Glorot(dim1, dim2):
115115
# Optionaly add bottleneck
116116
if o.bottleneck_dim != 0:
117117
assert(o.bottleneck_dim > 0)
118-
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
119-
(num_hid_neurons, o.bottleneck_dim, 0.0, 0.0, \
120-
(o.param_stddev_factor * Glorot(num_hid_neurons, o.bottleneck_dim)))
121-
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
118+
# 25% smaller stddev -> small bottleneck range, 10x smaller learning rate
119+
print "<AffineTransformNobias> <InputDim> %d <OutputDim> %d <ParamStddev> %f <LearnRateCoef> %f" % \
120+
(num_hid_neurons, o.bottleneck_dim, \
121+
(o.param_stddev_factor * Glorot(num_hid_neurons, o.bottleneck_dim) * 0.75 ), 0.1)
122+
# 25% smaller stddev -> smaller gradient in prev. layer, 10x smaller learning rate for weigts & biases
123+
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
122124
(o.bottleneck_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
123-
(o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons)))
125+
(o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons) * 0.75 ), 0.1, 0.1)
124126
print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons)
125127

126-
# Last AffineTransform
127-
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
128+
# Last AffineTransform (10x smaller learning rate on bias)
129+
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
128130
(num_hid_neurons, num_leaves, 0.0, 0.0, \
129-
(o.param_stddev_factor * Glorot(num_hid_neurons, num_leaves)))
131+
(o.param_stddev_factor * Glorot(num_hid_neurons, num_leaves)), 1.0, 0.1)
130132

131133
# Optionaly append softmax
132134
if o.with_softmax:

0 commit comments

Comments
 (0)