@@ -115,18 +115,20 @@ def Glorot(dim1, dim2):
115115# Optionaly add bottleneck
116116if o .bottleneck_dim != 0 :
117117 assert (o .bottleneck_dim > 0 )
118- print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
119- (num_hid_neurons , o .bottleneck_dim , 0.0 , 0.0 , \
120- (o .param_stddev_factor * Glorot (num_hid_neurons , o .bottleneck_dim )))
121- print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
118+ # 25% smaller stddev -> small bottleneck range, 10x smaller learning rate
119+ print "<AffineTransformNobias> <InputDim> %d <OutputDim> %d <ParamStddev> %f <LearnRateCoef> %f" % \
120+ (num_hid_neurons , o .bottleneck_dim , \
121+ (o .param_stddev_factor * Glorot (num_hid_neurons , o .bottleneck_dim ) * 0.75 ), 0.1 )
122+ # 25% smaller stddev -> smaller gradient in prev. layer, 10x smaller learning rate for weigts & biases
123+ print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
122124 (o .bottleneck_dim , num_hid_neurons , o .hid_bias_mean , o .hid_bias_range , \
123- (o .param_stddev_factor * Glorot (o .bottleneck_dim , num_hid_neurons )))
125+ (o .param_stddev_factor * Glorot (o .bottleneck_dim , num_hid_neurons ) * 0.75 ), 0.1 , 0.1 )
124126 print "%s <InputDim> %d <OutputDim> %d" % (o .activation_type , num_hid_neurons , num_hid_neurons )
125127
126- # Last AffineTransform
127- print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
128+ # Last AffineTransform (10x smaller learning rate on bias)
129+ print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f " % \
128130 (num_hid_neurons , num_leaves , 0.0 , 0.0 , \
129- (o .param_stddev_factor * Glorot (num_hid_neurons , num_leaves )))
131+ (o .param_stddev_factor * Glorot (num_hid_neurons , num_leaves )), 1.0 , 0.1 )
130132
131133# Optionaly append softmax
132134if o .with_softmax :
0 commit comments