Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
matmul.cxx
Go to the documentation of this file.
1 
8 #include <ctf.hpp>
9 #include <float.h>
10 using namespace CTF;
11 
28 int matmul(int m,
29  int n,
30  int k,
31  World & dw,
32  int sym_A=NS,
33  int sym_B=NS,
34  int sym_C=NS,
35  double sp_A=1.,
36  double sp_B=1.,
37  double sp_C=1.,
38  bool test=true,
39  bool bench=false,
40  int niter=10){
41  assert(test || bench);
42 
43  int sA = sp_A < 1. ? SP : 0;
44  int sB = sp_B < 1. ? SP : 0;
45  int sC = sp_C < 1. ? SP : 0;
46 
47  /* initialize matrices with attributes */
48  Matrix<> A(m, k, sym_A|sA, dw);
49  Matrix<> B(k, n, sym_B|sB, dw);
50  Matrix<> C(m, n, sym_C|sC, dw, "C");
51 
52 
53  /* fill with random data */
54  srand48(dw.rank);
55  if (sp_A < 1.)
56  A.fill_sp_random(0.0,1.0,sp_A);
57  else
58  A.fill_random(0.0,1.0);
59  if (sp_B < 1.)
60  B.fill_sp_random(0.0,1.0,sp_B);
61  else
62  B.fill_random(0.0,1.0);
63  if (sp_C < 1.)
64  C.fill_sp_random(0.0,1.0,sp_C);
65  else
66  C.fill_random(0.0,1.0);
67 
68  bool pass = true;
69 
70  if (test){
71  /* initialize matrices with default attributes (nonsymmetric, dense) */
72  Matrix<> ref_A(m, k, dw);
73  Matrix<> ref_B(k, n, dw);
74  Matrix<> ref_C(m, n, dw, "ref_C");
75 
76  /* copy (sparse) initial data to a set of reference matrices */
77  ref_A["ij"] = A["ij"];
78  ref_B["ij"] = B["ij"];
79  ref_C["ij"] = C["ij"];
80 
81  /* compute reference answer */
82  switch (sym_C){
83  case NS:
84  ref_C["ik"] += ref_A["ij"]*ref_B["jk"];
85  break;
86  case SY:
87  case SH:
88  ref_C["ik"] += ref_A["ij"]*ref_B["jk"];
89  ref_C["ik"] += ref_A["kj"]*ref_B["ji"];
90  if (sym_C == SH) ref_C["ii"] = 0.0;
91  break;
92  case AS:
93  ref_C["ik"] += ref_A["ij"]*ref_B["jk"];
94  ref_C["ik"] -= ref_A["kj"]*ref_B["ji"];
95  break;
96  }
97  /* compute answer for matrices with attributes as specified */
98  C["ik"] += .5*A["ij"]*B["jk"];
99  C["ik"] += .5*B["jk"]*A["ij"];
100 
101 
102  /* compute difference in answer */
103  ref_C["ik"] -= C["ik"];
104 
105  pass = ref_C.norm2() <= 1.E-6;
106 
107  if (dw.rank == 0){
108  if (pass)
109  printf("{ C[\"ik\"] += A[\"ij\"]*B[\"jk\"] with A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) } passed \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
110  else
111  printf("{ C[\"ik\"] += A[\"ij\"]*B[\"jk\"] with A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) } failed \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
112  }
113  }
114  if (bench){
115  double min_time = DBL_MAX;
116  double max_time = 0.0;
117  double tot_time = 0.0;
118  double times[niter];
119 
120  if (dw.rank == 0){
121  printf("Starting %d benchmarking iterations of matrix multiplication with specified attributes...\n", niter);
122  }
123  min_time = DBL_MAX;
124  max_time = 0.0;
125  tot_time = 0.0;
126  Timer_epoch smatmul("specified matmul");
127  smatmul.begin();
128  for (int i=0; i<niter; i++){
129  //A.print();
130  //B.print();
131 
132  double start_time = MPI_Wtime();
133  C["ik"] = A["ij"]*B["jk"];
134  //C.print();
135  double end_time = MPI_Wtime();
136  double iter_time = end_time-start_time;
137  times[i] = iter_time;
138  tot_time += iter_time;
139  if (iter_time < min_time) min_time = iter_time;
140  if (iter_time > max_time) max_time = iter_time;
141  }
142  smatmul.end();
143 
144  if (dw.rank == 0){
145  printf("iterations completed.\n");
146  printf("All iterations times: ");
147  for (int i=0; i<niter; i++){
148  printf("%lf ", times[i]);
149  }
150  printf("\n");
151  std::sort(times,times+niter);
152  printf("A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) Min time = %lf, Avg time = %lf, Med time = %lf, Max time = %lf\n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C,min_time,tot_time/niter, times[niter/2], max_time);
153  }
154 
155  }
156 
157  return pass;
158 }
159 
160 
161 #ifndef TEST_SUITE
162 char* getCmdOption(char ** begin,
163  char ** end,
164  const std::string & option){
165  char ** itr = std::find(begin, end, option);
166  if (itr != end && ++itr != end){
167  return *itr;
168  }
169  return 0;
170 }
171 
172 
173 int main(int argc, char ** argv){
174  int rank, np, m, n, k, pass, niter, bench, sym_A, sym_B, sym_C, test;
175  double sp_A, sp_B, sp_C;
176  int const in_num = argc;
177  char ** input_str = argv;
178 
179  MPI_Init(&argc, &argv);
180  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
181  MPI_Comm_size(MPI_COMM_WORLD, &np);
182 
183  if (getCmdOption(input_str, input_str+in_num, "-m")){
184  m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
185  if (m < 0) m = 17;
186  } else m = 17;
187 
188  if (getCmdOption(input_str, input_str+in_num, "-n")){
189  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
190  if (n < 0) n = 32;
191  } else n = 32;
192 
193  if (getCmdOption(input_str, input_str+in_num, "-k")){
194  k = atoi(getCmdOption(input_str, input_str+in_num, "-k"));
195  if (k < 0) k = 9;
196  } else k = 9;
197 
198  if (getCmdOption(input_str, input_str+in_num, "-sym_A")){
199  sym_A = atoi(getCmdOption(input_str, input_str+in_num, "-sym_A"));
200  if (sym_A != AS && sym_A != SY && sym_A != SH) sym_A = NS;
201  } else sym_A = NS;
202 
203  if (getCmdOption(input_str, input_str+in_num, "-sym_B")){
204  sym_B = atoi(getCmdOption(input_str, input_str+in_num, "-sym_B"));
205  if (sym_B != AS && sym_B != SY && sym_B != SH) sym_B = NS;
206  } else sym_B = NS;
207 
208  if (getCmdOption(input_str, input_str+in_num, "-sym_C")){
209  sym_C = atoi(getCmdOption(input_str, input_str+in_num, "-sym_C"));
210  if (sym_C != AS && sym_C != SY && sym_C != SH) sym_C = NS;
211  } else sym_C = NS;
212 
213  if (getCmdOption(input_str, input_str+in_num, "-sp_A")){
214  sp_A = atof(getCmdOption(input_str, input_str+in_num, "-sp_A"));
215  if (sp_A < 0.0 || sp_A > 1.0) sp_A = .2;
216  } else sp_A = .2;
217 
218  if (getCmdOption(input_str, input_str+in_num, "-sp_B")){
219  sp_B = atof(getCmdOption(input_str, input_str+in_num, "-sp_B"));
220  if (sp_B < 0.0 || sp_B > 1.0) sp_B = .2;
221  } else sp_B = .2;
222 
223  if (getCmdOption(input_str, input_str+in_num, "-sp_C")){
224  sp_C = atof(getCmdOption(input_str, input_str+in_num, "-sp_C"));
225  if (sp_C < 0.0 || sp_C > 1.0) sp_C = .2;
226  } else sp_C = .2;
227 
228  if (getCmdOption(input_str, input_str+in_num, "-niter")){
229  niter = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
230  if (niter < 0) niter = 10;
231  } else niter = 10;
232 
233  if (getCmdOption(input_str, input_str+in_num, "-bench")){
234  bench = atoi(getCmdOption(input_str, input_str+in_num, "-bench"));
235  if (bench != 0 && bench != 1) bench = 1;
236  } else bench = 1;
237 
238  if (getCmdOption(input_str, input_str+in_num, "-test")){
239  test = atoi(getCmdOption(input_str, input_str+in_num, "-test"));
240  if (test != 0 && test != 1) test = 1;
241  } else test = 1;
242 
243 
244  {
245  World dw(argc, argv);
246 
247  if (rank == 0){
248  printf("Multiplying A (%d*%d sym %d sp %lf) and B (%d*%d sym %d sp %lf) into C (%d*%d sym %d sp %lf) \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
249  }
250  pass = matmul(m, n, k, dw, sym_A, sym_B, sym_C, sp_A, sp_B, sp_C, test, bench, niter);
251  assert(pass);
252  }
253 
254  MPI_Finalize();
255  return 0;
256 }
262 #endif
int main(int argc, char **argv)
Definition: matmul.cxx:173
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: matmul.cxx:162
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
Definition: common.h:37
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
int rank
rank of local processor
Definition: world.h:24
int matmul(int m, int n, int k, World &dw, int sym_A=NS, int sym_B=NS, int sym_C=NS, double sp_A=1., double sp_B=1., double sp_C=1., bool test=true, bool bench=false, int niter=10)
(if test) tests and (if bench) benchmarks m*n*k matrix multiplication with matrices of specified symm...
Definition: matmul.cxx:28
epoch during which to measure timers
Definition: timer.h:69
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
Definition: tensor.cxx:969
Definition: apsp.cxx:17
Definition: common.h:37
Definition: common.h:37
Definition: common.h:37
def np(self)
Definition: core.pyx:315