Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
model_trainer_kernels.cxx
Go to the documentation of this file.
1 #include <ctf.hpp>
2 using namespace CTF;
3 
4 struct grp{
5 #ifdef __CUDACC__
6  __device__ __host__
7 #endif
8  static double op1(double a, double b){ return b-b/a; };
9 #ifdef __CUDACC__
10  __device__ __host__
11 #endif
12  static void op2(double a, double & b){ b+=a; };
13  static double op2_t2(double a, double b){ return a+b; };
14  static void op2_red(double const * a, double * b, int n){
15  #pragma omp parallel for
16  for (int i=0; i<n; i++){
17  b[i] += a[i];
18  }
19  }
20 };
21 
22 
23 void train_off_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, bool sp_C){
24  MPI_Op madd;
25  MPI_Op_create([](void * a, void * b, int * n, MPI_Datatype*){
26  grp::op2_red((double*)a, (double*)b, *n);
27  }, 1, &madd);
28  Monoid<> mon(0, grp::op2_t2, madd);
29  for (double sp = .005; sp<.32; sp*=2.){
30  Matrix<> A(m, n, dw, mon);
31  Matrix<> B(m, n, dw, mon);
32  Matrix<> G(n, n, dw, mon);
33  Vector<> b(n, dw, mon);
34  Vector<> c(m, dw, mon);
35 
36  srand48(dw.rank);
37  b.fill_random(-.5, .5);
38  c.fill_random(-.5, .5);
39  A.fill_random(-.5, .5);
40  B.fill_random(-.5, .5);
41  G.fill_random(-.5, .5);
42 
44 
45  if (sp > .009){
46  if (sp_A)
47  A.sparsify([=](double a){ return fabs(a)<=.5*sp; });
48  if (sp_B){
49  G.sparsify([=](double a){ return fabs(a)<=.5*sp; });
50  b.sparsify([=](double a){ return fabs(a)<=.5*sp; });
51  }
52  if (sp_C){
53  B.sparsify([=](double a){ return fabs(a)<=.5*sp; });
54  c.sparsify([=](double a){ return fabs(a)<=.5*sp; });
55  }
56  }
57 
58  k1(A["ik"],G["kj"],B["ij"]);
59  k1(A["ij"],b["j"],c["i"]);
60 
61  }
62 }
63 
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
static double op2_t2(double a, double b)
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
static void op2_red(double const *a, double *b, int n)
static void op2(double a, double &b)
int rank
rank of local processor
Definition: world.h:24
void train_off_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
void sparsify()
reduce tensor to sparse format, storing only nonzero data, or data above a specified threshold...
Definition: tensor.cxx:449
static double op1(double a, double b)
Definition: apsp.cxx:17
A Monoid is a Set equipped with a binary addition operator &#39;+&#39; or a custom function addition must hav...
Definition: monoid.h:69