Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
fast_3mm.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include <ctf.hpp>
4 
5 using namespace CTF;
6 
7 int fast_diagram(int const n,
8  World &ctf){
9  int rank, i, num_pes;
10 
11  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
12  MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
13 
14 
15  Matrix<> T(n,n,NS,ctf);
16  Matrix<> V(n,n,NS,ctf);
17  Matrix<> Z_SY(n,n,SY,ctf);
18  Matrix<> Z_AS(n,n,AS,ctf);
19  Matrix<> Z_NS(n,n,NS,ctf);
20  Vector<> Z_D(n,ctf);
21  Matrix<> W(n,n,SH,ctf);
22  Matrix<> W_ans(n,n,SH,ctf);
23 
24  int64_t * indices;
25  double * values;
26  int64_t size;
27  srand48(173*rank);
28 
29  T.read_local(&size, &indices, &values);
30  for (i=0; i<size; i++){
31  values[i] = drand48();
32  }
33  T.write(size, indices, values);
34  free(indices);
35  free(values);
36  V.read_local(&size, &indices, &values);
37  for (i=0; i<size; i++){
38  values[i] = drand48();
39  }
40  V.write(size, indices, values);
41  free(indices);
42  free(values);
43  Z_NS["af"] = T["ae"]*V["ef"];
44  W_ans["ab"] = Z_NS["af"]*T["fb"];
45  Z_AS["af"] = T["ae"]*V["ef"];
46  Z_SY["af"] = T["ae"]*V["ef"];
47 
48  W["ab"] = .5*Z_SY["af"]*T["fb"];
49  W["ab"] += .5*Z_SY["aa"]*T["ab"];
50  W["ab"] += .5*Z_AS["af"]*T["fb"];
51  W["ab"] -= W_ans["ab"];
52 
53  int pass = (W.norm2() <=1.E-10);
54 
55  if (rank == 0){
56  MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
57  if (pass) printf("{ C[\"(ij)\"] = A[\"(ik)\"]*B[\"(kj)\"] } passed\n");
58  else printf("{ C[\"(ij)\"] = A[\"(ik)\"]*B[\"(kj)\"] } failed\n");
59  } else
60  MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
61  return pass;
62 }
63 
64 #ifndef TEST_SUITE
65 char* getCmdOption(char ** begin,
66  char ** end,
67  const std::string & option){
68  char ** itr = std::find(begin, end, option);
69  if (itr != end && ++itr != end){
70  return *itr;
71  }
72  return 0;
73 }
74 
75 
76 int main(int argc, char ** argv){
77  int rank, np, n;
78  int const in_num = argc;
79  char ** input_str = argv;
80 
81  MPI_Init(&argc, &argv);
82  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
83  MPI_Comm_size(MPI_COMM_WORLD, &np);
84 
85  if (getCmdOption(input_str, input_str+in_num, "-n")){
86  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
87  if (n < 0) n = 5;
88  } else n = 5;
89 
90  {
91  World dw(MPI_COMM_WORLD, argc, argv);
92  if (rank == 0){
93  printf("Computing W^(ab)=sum_(ef)T^(ae)*V^(ef)*T^(fb)\n");
94  }
95  int pass = fast_diagram(n, dw);
96  assert(pass);
97  }
98 
99  MPI_Finalize();
100  return 0;
101 }
102 #endif
103 
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
void read_local(int64_t *npair, int64_t **global_idx, dtype **data, bool unpack_sym=false) const
Using get_local_data(), which returns an array that must be freed with delete [], is more efficient...
Definition: tensor.cxx:182
string
Definition: core.pyx:456
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
int main(int argc, char **argv)
Definition: fast_3mm.cxx:76
Definition: apsp.cxx:17
Definition: common.h:37
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: fast_3mm.cxx:65
Definition: common.h:37
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
Definition: common.h:37
int fast_diagram(int const n, World &ctf)
Definition: fast_3mm.cxx:7
def np(self)
Definition: core.pyx:315