Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
gemm_4D.cxx
Go to the documentation of this file.
1 
8 #include <ctf.hpp>
9 using namespace CTF;
10 
11 int gemm_4D(int const n,
12  int const sym,
13  int const niter,
14  World &dw){
15  int rank, i, num_pes;
16  int64_t np;
17  double * pairs, * pairs_AB, * pairs_BC;
18  int64_t * indices, * indices_AB, * indices_BC;
19 
20  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
21  MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
22 
23 
24  int shapeN4[] = {sym,NS,sym,NS};
25  int sizeN4[] = {n,n,n,n};
26 
27  //* Creates distributed tensors initialized with zeros
28  Tensor<> A(4, sizeN4, shapeN4, dw);
29  Tensor<> B(4, sizeN4, shapeN4, dw);
30  Tensor<> C(4, sizeN4, shapeN4, dw);
31 
32  srand48(13*rank);
33  //* Writes noise to local data based on global index
34  A.get_local_data(&np, &indices, &pairs);
35  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
36  A.write(np, indices, pairs);
37  delete [] pairs;
38  free(indices);
39  B.get_local_data(&np, &indices, &pairs);
40  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
41  B.write(np, indices, pairs);
42  delete [] pairs;
43  free(indices);
44  C.get_local_data(&np, &indices, &pairs);
45  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
46  C.write(np, indices, pairs);
47  delete [] pairs;
48  free(indices);
49 
50 
51 #ifndef TEST_SUITE
52  double time;
53  double t = MPI_Wtime();
54  for (i=0; i<niter; i++){
55  C["ijkl"] += (.3*i)*A["ijmn"]*B["mnkl"];
56  }
57  time = MPI_Wtime()- t;
58  if (rank == 0){
59  double nd = (double)n;
60  double c = 2.E-9;
61  if (sym == SY || sym == AS){
62  c = c/8.;
63  }
64  printf("%lf seconds/GEMM %lf GF\n",
65  time/niter,niter*c*nd*nd*nd*nd*nd*nd/time);
66  printf("Verifying associativity\n");
67  }
68 #endif
69 
70  /* verify D=(A*B)*C = A*(B*C) */
71  Tensor<> D(4, sizeN4, shapeN4, dw);
72 
73  D["ijkl"] = A["ijmn"]*B["mnkl"];
74  D["ijkl"] = D["ijmn"]*C["mnkl"];
75  C["ijkl"] = B["ijmn"]*C["mnkl"];
76  C["ijkl"] = A["ijmn"]*C["mnkl"];
77 
78  C.align(D);
79  C.get_local_data(&np, &indices_BC, &pairs_BC);
80  D.get_local_data(&np, &indices_AB, &pairs_AB);
81  int pass = 1;
82  for (i=0; i<np; i++){
83  if (fabs((double)pairs_BC[i]-(double)pairs_AB[i])>=1.E-6) pass = 0;
84  }
85  delete [] pairs_AB;
86  delete [] pairs_BC;
87  free(indices_AB);
88  free(indices_BC);
89  if (rank == 0){
90  MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
91  if (pass)
92  printf("{ (A[\"ijmn\"]*B[\"mnpq\"])*C[\"pqkl\"] = A[\"ijmn\"]*(B[\"mnpq\"]*C[\"pqkl\"]) } passed\n");
93  else
94  printf("{ (A[\"ijmn\"]*B[\"mnpq\"])*C[\"pqkl\"] = A[\"ijmn\"]*(B[\"mnpq\"]*C[\"pqkl\"]) } failed!\n");
95  } else
96  MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
97  return pass;
98 }
99 
100 
101 #ifndef TEST_SUITE
102 char* getCmdOption(char ** begin,
103  char ** end,
104  const std::string & option){
105  char ** itr = std::find(begin, end, option);
106  if (itr != end && ++itr != end){
107  return *itr;
108  }
109  return 0;
110 }
111 
112 
113 int main(int argc, char ** argv){
114  int rank, np, niter, n, pass;
115  int const in_num = argc;
116  char ** input_str = argv;
117 
118  MPI_Init(&argc, &argv);
119  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
120  MPI_Comm_size(MPI_COMM_WORLD, &np);
121 
122  if (getCmdOption(input_str, input_str+in_num, "-n")){
123  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
124  if (n < 0) n = 7;
125  } else n = 7;
126 
127  if (getCmdOption(input_str, input_str+in_num, "-niter")){
128  niter = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
129  if (niter < 0) niter = 3;
130  } else niter = 3;
131 
132 
133 
134  {
135  World dw(argc, argv);
136 
137  if (rank == 0){
138  printf("Computing C_ijkl = A_ijmn*B_klmn\n");
139  printf("Non-symmetric: NS = NS*NS gemm:\n");
140  }
141  pass = gemm_4D(n, NS, niter, dw);
142  assert(pass);
143  if (rank == 0){
144  printf("Symmetric: SY = SY*SY gemm:\n");
145  }
146  pass = gemm_4D(n, SY, niter, dw);
147  assert(pass);
148  if (rank == 0){
149  printf("(Anti-)Skew-symmetric: AS = AS*AS gemm:\n");
150  }
151  pass = gemm_4D(n, AS, niter, dw);
152  assert(pass);
153  if (rank == 0){
154  printf("Symmetric-hollow: SH = SH*SH gemm:\n");
155  }
156  pass = gemm_4D(n, SH, niter, dw);
157  assert(pass);
158  }
159 
160  MPI_Finalize();
161  return 0;
162 }
168 #endif
def rank(self)
Definition: core.pyx:312
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: gemm_4D.cxx:102
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
int gemm_4D(int const n, int const sym, int const niter, World &dw)
Definition: gemm_4D.cxx:11
void align(CTF_int::tensor const &A)
aligns data mapping with tensor A
Definition: tensor.cxx:720
int main(int argc, char **argv)
Definition: gemm_4D.cxx:113
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
Definition: apsp.cxx:17
an instance of a tensor within a CTF world
Definition: tensor.h:74
Definition: common.h:37
Definition: common.h:37
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
Definition: common.h:37
def np(self)
Definition: core.pyx:315