You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
trunk: changes to Dan's neural net setup, with new preconditioning method (speed roughly doubled if you use train_pnorm_online.sh, which uses the new preconditioning method). Various bug-fixes, optimizations and cleanups in matrix code, cuda-matrix code and thread code. Still tuning this so recipes not checked in yet.
inlinevoidcuda_vec_max(constfloat* v, float* value, int dim) { cudaF_vec_max(v,value,dim); }
142
142
inlinevoidcuda_trace_mat_mat_trans(constfloat* A, constfloat* B, MatrixDim dA, int B_stride, float* value) { cudaF_trace_mat_mat_trans(A,B,dA,B_stride,value); }
143
143
inlinevoidcuda_trace_mat_mat(constfloat* A, constfloat* B, MatrixDim dA, int B_stride, float* value) { cudaF_trace_mat_mat(A,B,dA,B_stride,value); }
144
-
inlinevoidcuda_add_diag_mat_trans(int Gr, int Bl, float alpha, float* v, constfloat* mat, float beta, MatrixDim dmat, int dim) { cudaF_add_diag_mat_trans(Gr,Bl,alpha,v,mat,beta,dmat,dim); }
145
144
inlinevoidcuda_add_diag_mat_mat(int Gr, int Bl, float alpha, float* v, int v_dim, constfloat* M,
146
145
int M_cols, int M_row_stride, int M_col_stride, constfloat *N, int N_row_stride,
147
146
int N_col_stride, int threads_per_element, float beta) {
148
147
cudaF_add_diag_mat_mat(Gr, Bl, alpha, v, v_dim, M, M_cols, M_row_stride, M_col_stride, N, N_row_stride,
149
148
N_col_stride, threads_per_element, beta);
150
149
}
151
-
inlinevoidcuda_add_diag_mat(int Gr, int Bl, float alpha, float* v, constfloat* mat, float beta, MatrixDim dmat, int dim) { cudaF_add_diag_mat(Gr,Bl,alpha,v,mat,beta,dmat,dim); }
152
150
inlinevoidcuda_add_vec_vec(int Gr, int Bl, float alpha, float* v, constfloat* x, constfloat* y, float beta, int dim) { cudaF_add_vec_vec(Gr,Bl,alpha,v,x,y,beta,dim); }
153
151
inlinevoidcuda_copy_col_from_mat(int Gr, int Bl, float* v, int col, constfloat* mat, MatrixDim dmat, int dim) { cudaF_copy_col_from_mat(Gr,Bl,v,col,mat,dmat,dim); }
154
152
inlinevoidcuda_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, constfloat* mat, MatrixDim dmat, int dim) { cudaF_copy_col_from_mat_df(Gr,Bl,v,col,mat,dmat,dim); }
inlinevoidcuda_vec_max(constdouble* v, double* value, int dim) { cudaD_vec_max(v,value,dim); }
312
310
inlinevoidcuda_trace_mat_mat_trans(constdouble* A, constdouble* B, MatrixDim dA, int B_stride, double* value) { cudaD_trace_mat_mat_trans(A,B,dA,B_stride,value); }
313
311
inlinevoidcuda_trace_mat_mat(constdouble* A, constdouble* B, MatrixDim dA, int B_stride, double* value) { cudaD_trace_mat_mat(A,B,dA,B_stride,value); }
314
-
inlinevoidcuda_add_diag_mat_trans(int Gr, int Bl, double alpha, double* v, constdouble* mat, double beta, MatrixDim dmat, int dim) { cudaD_add_diag_mat_trans(Gr,Bl,alpha,v,mat,beta,dmat,dim); }
315
312
inlinevoidcuda_add_diag_mat_mat(int Gr, int Bl, double alpha, double* v, int v_dim, constdouble* M,
316
313
int M_cols, int M_row_stride, int M_col_stride, constdouble *N, int N_row_stride,
317
314
int N_col_stride, int threads_per_element, double beta) {
318
315
cudaD_add_diag_mat_mat(Gr, Bl, alpha, v, v_dim, M, M_cols, M_row_stride, M_col_stride, N, N_row_stride,
319
316
N_col_stride, threads_per_element, beta);
320
317
}
321
-
inlinevoidcuda_add_diag_mat(int Gr, int Bl, double alpha, double* v, constdouble* mat, double beta, MatrixDim dmat, int dim) { cudaD_add_diag_mat(Gr,Bl,alpha,v,mat,beta,dmat,dim); }
322
318
inlinevoidcuda_add_vec_vec(int Gr, int Bl, double alpha, double* v, constdouble* x, constdouble* y, double beta, int dim) { cudaD_add_vec_vec(Gr,Bl,alpha,v,x,y,beta,dim); }
323
319
inlinevoidcuda_copy_col_from_mat(int Gr, int Bl, double* v, int col, constdouble* mat, MatrixDim dmat, int dim) { cudaD_copy_col_from_mat(Gr,Bl,v,col,mat,dmat,dim); }
324
320
inlinevoidcuda_copy_col_from_mat_df(int Gr, int Bl, double* v, int col, constdouble* mat, MatrixDim dmat, int dim) { cudaD_copy_col_from_mat_df(Gr,Bl,v,col,mat,dmat,dim); }
0 commit comments