Skip to content

Commit d65e118

Browse files
committed
trunk:added PowerComponent in the p-norm nnet training recipe.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4074 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
1 parent b26429b commit d65e118

15 files changed

Lines changed: 285 additions & 5 deletions

egs/wsj/s5/steps/nnet2/train_pnorm.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ softmax_learning_rate_factor=1.0 # In the default setting keep the same learning
2727
combine_regularizer=1.0e-14 # Small regularizer so that parameters won't go crazy.
2828
pnorm_input_dim=3000
2929
pnorm_output_dim=300
30+
first_component_power=1.0 # could set this to 0.5, often seems to improve results.
3031
p=2
3132
minibatch_size=128 # by default use a smallish minibatch size for neural net
3233
# training; this controls instability which would otherwise
@@ -213,6 +214,11 @@ SpliceComponent input-dim=$ext_feat_dim left-context=$splice_width right-context
213214
FixedAffineComponent matrix=$lda_mat
214215
AffineComponentPreconditioned input-dim=$ext_lda_dim output-dim=$pnorm_input_dim alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=$bias_stddev
215216
PnormComponent input-dim=$pnorm_input_dim output-dim=$pnorm_output_dim p=$p
217+
EOF
218+
if [ $first_component_power != 1.0 ]; then
219+
echo "PowerComponent dim=$pnorm_output_dim power=$first_component_power" >> $dir/nnet.config
220+
fi
221+
cat >>$dir/nnet.config <<EOF
216222
NormalizeComponent dim=$pnorm_output_dim
217223
AffineComponentPreconditioned input-dim=$pnorm_output_dim output-dim=$num_leaves alpha=$alpha max-change=$max_change learning-rate=$initial_learning_rate param-stddev=0 bias-stddev=0
218224
SoftmaxComponent dim=$num_leaves

src/cudamatrix/cu-kernels-ansi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ void cudaFD_copy_from_tp(dim3 Gr, dim3 Bl, float* A, const double* B, MatrixDim
5858
void cudaF_copy_col_from_vec(int Gr, int Bl, float* mat, const float* v, int col, MatrixDim d);
5959
void cudaF_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
6060
void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d);
61+
void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include_sign, MatrixDim d);
6162
void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
6263
void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, MatrixDim d);
6364
void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
@@ -187,6 +188,7 @@ void cudaDF_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const float* B, MatrixDim
187188
void cudaD_copy_col_from_vec(int Gr, int Bl, double* mat, const double* v, int col, MatrixDim d);
188189
void cudaD_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
189190
void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d);
191+
void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool include_sign, MatrixDim d);
190192
void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
191193
void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, MatrixDim d);
192194
void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);

src/cudamatrix/cu-kernels.cu

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ static void _apply_log(Real* mat, MatrixDim d) {
387387
mat[index] = log(mat[index]);
388388
}
389389

390-
391390
template<typename Real>
392391
__global__
393392
static void _mul_elements(Real* mat, const Real* A, MatrixDim dst_d, int src_stride) {
@@ -1161,6 +1160,40 @@ static void _apply_pow(Real* mat, Real power, MatrixDim d) {
11611160
}
11621161
}
11631162

1163+
template<typename Real>
1164+
__global__
1165+
static void _apply_pow_abs(Real* mat, Real power, bool include_sign, MatrixDim d) {
1166+
int i = blockIdx.x * blockDim.x + threadIdx.x;
1167+
int j = blockIdx.y * blockDim.y + threadIdx.y;
1168+
int index = i * d.stride + j;
1169+
1170+
if (i < d.rows && j < d.cols) {
1171+
if (include_sign == true && mat[index] < 0) {
1172+
if (power == 1.0)
1173+
mat[index] = -std::abs(mat[index]);
1174+
if (power == 2.0) {
1175+
mat[index] = -mat[index] * mat[index];
1176+
} else if (power == 0.5) {
1177+
mat[index] = -sqrt(std::abs(mat[index]));
1178+
} else {
1179+
mat[index] = -pow(std::abs(mat[index]), power);
1180+
}
1181+
} else {
1182+
if (power == 1.0)
1183+
mat[index] = std::abs(mat[index]);
1184+
if (power == 2.0) {
1185+
mat[index] = mat[index] * mat[index];
1186+
} else if (power == 0.5) {
1187+
mat[index] = sqrt(std::abs(mat[index]));
1188+
} else if (power < 0.0 && mat[index] == 0.0) {
1189+
mat[index] = 0.0;
1190+
} else {
1191+
mat[index] = pow(std::abs(mat[index]), power);
1192+
}
1193+
}
1194+
}
1195+
}
1196+
11641197
// Caution, here i/block{idx,dim}.x is the row index and j/block{idx,dim}.y is the col index.
11651198
// this is for no reason, really, I just happened to prefer this
11661199
// at the time. [dan]
@@ -1953,6 +1986,10 @@ void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d) {
19531986
_apply_pow<<<Gr,Bl>>>(mat, power, d);
19541987
}
19551988

1989+
void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include_sign, MatrixDim d) {
1990+
_apply_pow_abs<<<Gr,Bl>>>(mat, power, include_sign, d);
1991+
}
1992+
19561993
void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) {
19571994
_apply_heaviside<<<Gr,Bl>>>(mat, d);
19581995

@@ -2372,6 +2409,10 @@ void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d) {
23722409
_apply_pow<<<Gr,Bl>>>(mat, power, d);
23732410
}
23742411

2412+
void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool include_sign, MatrixDim d) {
2413+
_apply_pow_abs<<<Gr,Bl>>>(mat, power, include_sign, d);
2414+
}
2415+
23752416
void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) {
23762417
_apply_heaviside<<<Gr,Bl>>>(mat, d);
23772418
}

src/cudamatrix/cu-kernels.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ inline void cuda_copy_from_mat_trans(dim3 Gr, dim3 Bl, double* mat_out, const fl
8585
inline void cuda_copy_col_from_vec(int Gr, int Bl, float* mat, const float* v, int col, MatrixDim d) { cudaF_copy_col_from_vec(Gr,Bl,mat,v,col,d); }
8686
inline void cuda_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { cudaF_apply_exp(Gr,Bl,mat,d); }
8787
inline void cuda_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim dim) { cudaF_apply_pow(Gr,Bl,mat,power,dim); }
88+
inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include_sign, MatrixDim dim) { cudaF_apply_pow_abs(Gr,Bl,mat,power,include_sign, dim); }
8889
inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim dim) { cudaF_apply_heaviside(Gr,Bl,mat,dim); }
8990
inline void cuda_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, MatrixDim dim) { cudaF_apply_floor(Gr,Bl,mat,floor_val,dim); }
9091
inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val, MatrixDim dim) { cudaF_apply_ceiling(Gr,Bl,mat,ceiling_val,dim); }
@@ -254,6 +255,7 @@ inline void cuda_copy_from_tp(dim3 Gr, dim3 Bl, double* A, const float* B, Matri
254255
inline void cuda_copy_col_from_vec(int Gr, int Bl, double* mat, const double* v, int col, MatrixDim d) { cudaD_copy_col_from_vec(Gr,Bl,mat,v,col,d); }
255256
inline void cuda_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { cudaD_apply_exp(Gr,Bl,mat,d); }
256257
inline void cuda_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim dim) { cudaD_apply_pow(Gr,Bl,mat,power,dim); }
258+
inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool include_sign, MatrixDim dim) { cudaD_apply_pow_abs(Gr,Bl,mat,power,include_sign,dim); }
257259
inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim dim) { cudaD_apply_heaviside(Gr,Bl,mat,dim); }
258260
inline void cuda_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, MatrixDim dim) { cudaD_apply_floor(Gr,Bl,mat,floor_val,dim); }
259261
inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val, MatrixDim dim) { cudaD_apply_ceiling(Gr,Bl,mat,ceiling_val,dim); }

src/cudamatrix/cu-matrix-test.cc

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,27 @@ static void UnitTestCuMatrixApplyPow() {
305305
}
306306
}
307307

308+
template<typename Real>
309+
static void UnitTestCuMatrixApplyPowAbs() {
310+
311+
for (int32 i = 0; i < 2; i++) {
312+
BaseFloat pow = 0.5 * (rand() % 6);
313+
314+
Matrix<Real> H(10 + rand() % 60, 10 + rand() % 20);
315+
H.SetRandn();
316+
H.Row(0).Set(0.0);
317+
if (i == 2) { Matrix<Real> tmp(H, kTrans); H = tmp; }
318+
319+
CuMatrix<Real> cH(H);
320+
321+
cH.ApplyPowAbs(pow, true);
322+
323+
H.ApplyPowAbs(pow, true);
324+
Matrix<Real> H2(cH);
325+
AssertEqual(H, H2);
326+
}
327+
}
328+
308329

309330
template<typename Real>
310331
static void UnitTestCuMatrixCopyRowsFromVec() {
@@ -509,7 +530,6 @@ static void UnitTestCuMatrixApplyHeaviside() {
509530
}
510531

511532

512-
513533
template<typename Real>
514534
static void UnitTestCuMatrixMulElements() {
515535
for (int32 i = 0; i < 2; i++) {
@@ -1923,6 +1943,7 @@ template<typename Real> void CudaMatrixUnitTest() {
19231943
UnitTestCuMatrixSigmoid<Real>();
19241944
UnitTestCuMatrixSoftHinge<Real>();
19251945
UnitTestCuMatrixApplyPow<Real>();
1946+
UnitTestCuMatrixApplyPowAbs<Real>();
19261947
UnitTestCuMatrixSet<Real>();
19271948
UnitTestCuMatrixAdd<Real>();
19281949
UnitTestCuMatrixApplyFloor<Real>();

src/cudamatrix/cu-matrix.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,8 +602,6 @@ void CuMatrixBase<Real>::ApplyLog() {
602602
}
603603
}
604604

605-
606-
607605
template<typename Real>
608606
void CuMatrixBase<Real>::MulElements(const CuMatrixBase<Real>& A) {
609607
#if HAVE_CUDA == 1
@@ -1632,6 +1630,25 @@ void CuMatrixBase<Real>::ApplyPow(Real power) {
16321630
}
16331631
}
16341632

1633+
template<typename Real>
1634+
void CuMatrixBase<Real>::ApplyPowAbs(Real power, bool include_sign) {
1635+
#if HAVE_CUDA == 1
1636+
if (CuDevice::Instantiate().Enabled()) {
1637+
Timer tim;
1638+
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
1639+
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK),
1640+
n_blocks(NumCols(), CU2DBLOCK));
1641+
1642+
cuda_apply_pow_abs(dimGrid, dimBlock, data_, power, include_sign, Dim());
1643+
CU_SAFE_CALL(cudaGetLastError());
1644+
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
1645+
} else
1646+
#endif
1647+
{
1648+
Mat().ApplyPowAbs(power, include_sign);
1649+
}
1650+
}
1651+
16351652
template<typename Real>
16361653
void CuMatrixBase<Real>::ApplyHeaviside() {
16371654
#if HAVE_CUDA == 1

src/cudamatrix/cu-matrix.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,13 @@ class CuMatrixBase {
233233
void SymInvertPosDef();
234234

235235
void ApplyPow(Real power);
236+
///< Apply power to the absolute value of each element.
237+
///< If inlude_sign is true, the result will be multiplied with
238+
///< the sign of the input value.
239+
///< If the power is negative and the input to the power is zero,
240+
///< The output will be set zero. If include_sign is true, it will
241+
///< multiply the result by the sign of the input.
242+
void ApplyPowAbs(Real power, bool include_sign=false);
236243
void ApplyHeaviside(); ///< For each element, sets x = (x > 0 ? 1.0 : 0.0)
237244
void ApplyFloor(Real floor_val);
238245
void ApplyCeiling(Real ceiling_val);

src/matrix/kaldi-matrix.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,6 +1847,13 @@ void MatrixBase<Real>::ApplyPow(Real power) {
18471847
}
18481848
}
18491849

1850+
template<typename Real>
1851+
void MatrixBase<Real>::ApplyPowAbs(Real power, bool include_sign) {
1852+
for (MatrixIndexT i = 0; i < num_rows_; i++) {
1853+
Row(i).ApplyPowAbs(power, include_sign);
1854+
}
1855+
}
1856+
18501857
template<typename Real>
18511858
void MatrixBase<Real>::ApplyHeaviside() {
18521859
MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;

src/matrix/kaldi-matrix.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,12 @@ class MatrixBase {
298298
/// Applies power to all matrix elements
299299
void ApplyPow(Real power);
300300

301+
/// Apply power to the absolute value of each element.
302+
/// Include the sign of the input element if include_sign == true.
303+
/// If the power is negative and the input to the power is zero,
304+
/// The output will be set zero.
305+
void ApplyPowAbs(Real power, bool include_sign=false);
306+
301307
/// Applies the Heaviside step function (x > 0 ? 1 : 0) to all matrix elements
302308
/// Note: in general you can make different choices for x = 0, but for now
303309
/// please leave it as it (i.e. returning zero) because it affects the

src/matrix/kaldi-vector.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,40 @@ void VectorBase<Real>::ApplyPow(Real power) {
451451
}
452452
#endif
453453

454+
// takes absolute value of the elements to a power.
455+
// Throws exception if could not (but only for power != 1 and power != 2).
456+
template<typename Real>
457+
void VectorBase<Real>::ApplyPowAbs(Real power, bool include_sign) {
458+
if (power == 1.0)
459+
for (MatrixIndexT i = 0; i < dim_; i++)
460+
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * std::abs(data_[i]);
461+
if (power == 2.0) {
462+
for (MatrixIndexT i = 0; i < dim_; i++)
463+
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * data_[i] * data_[i];
464+
} else if (power == 0.5) {
465+
for (MatrixIndexT i = 0; i < dim_; i++) {
466+
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * std::sqrt(std::abs(data_[i]));
467+
}
468+
} else if (power < 0.0) {
469+
for (MatrixIndexT i = 0; i < dim_; i++) {
470+
data_[i] = (data_[i] == 0.0 ? 0.0 : pow(std::abs(data_[i]), power));
471+
data_[i] *= (include_sign && data_[i] < 0 ? -1 : 1);
472+
if (data_[i] == HUGE_VAL) { // HUGE_VAL is what errno returns on error.
473+
KALDI_ERR << "Could not raise element " << i << "to power "
474+
<< power << ": returned value = " << data_[i];
475+
}
476+
}
477+
} else {
478+
for (MatrixIndexT i = 0; i < dim_; i++) {
479+
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * pow(std::abs(data_[i]), power);
480+
if (data_[i] == HUGE_VAL) { // HUGE_VAL is what errno returns on error.
481+
KALDI_ERR << "Could not raise element " << i << "to power "
482+
<< power << ": returned value = " << data_[i];
483+
}
484+
}
485+
}
486+
}
487+
454488
// Computes the p-th norm. Throws exception if could not.
455489
template<typename Real>
456490
Real VectorBase<Real>::Norm(Real p) const {

0 commit comments

Comments
 (0)