Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
fast_sym.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
8 #include <ctf.hpp>
9 
10 using namespace CTF;
11 
12 int fast_sym(int const n,
13  World &ctf){
14  int rank, i, num_pes;
15 
16  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
17  MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
18 
19  int len3[] = {n,n,n};
20  int YYN[] = {SY,SY,NS};
21 
22  Matrix<> A(n, n, SH, ctf, "A");
23  Matrix<> B(n, n, SH, ctf, "B");
24  Matrix<> C(n, n, SH, ctf, "C");
25  Matrix<> C_ans(n, n, SH, ctf, "C_ans");
26 
27  //Tensor<> A_rep(3, len3, YYN, ctf);
28  //Tensor<> B_rep(3, len3, YYN, ctf);
29  //Tensor<> Z(3, len3, YYN, ctf);
30  Tensor<> A_rep(3, len3, YYN, ctf, "B_rep");
31  Tensor<> B_rep(3, len3, YYN, ctf, "A_rep");
32  Tensor<> Z(3, len3, YYN, ctf, "Z");
33  Vector<> As(n, ctf, "As");
34  Vector<> Bs(n, ctf, "Bs");
35  Vector<> Cs(n, ctf, "Cs");
36 
37  {
38  int64_t * indices;
39  double * values;
40  int64_t size;
41  srand48(173*rank);
42 
43  A.get_local_data(&size, &indices, &values);
44  for (i=0; i<size; i++){
45  values[i] = drand48();
46  }
47  A.write(size, indices, values);
48  free(indices);
49  delete [] values;
50  B.get_local_data(&size, &indices, &values);
51  for (i=0; i<size; i++){
52  values[i] = drand48();
53  }
54  B.write(size, indices, values);
55  free(indices);
56  delete [] values;
57  }
58  C_ans["ij"] = A["ik"]*B["kj"];
59  A_rep["ijk"] += A["ij"];
60  B_rep["ijk"] += B["ij"];
61  Z["ijk"] += A_rep["ijk"]*B_rep["ijk"];
62  C["ij"] += Z["ijk"];
63  Cs["i"] += A["ik"]*B["ik"];
64  As["i"] += A["ik"];
65  Bs["i"] += B["ik"];
66  C["ij"] -= ((double)n)*A["ij"]*B["ij"];
67  C["ij"] -= Cs["i"];
68  C["ij"] -= As["i"]*B["ij"];
69  C["ij"] -= A["ij"]*Bs["j"];
70  /*A_rep["ijk"] += A["ij"];
71  A_rep["ijk"] += A["ik"];
72  A_rep["ijk"] += A["jk"];
73  B_rep["ijk"] += B["ij"];
74  B_rep["ijk"] += B["ik"];
75  B_rep["ijk"] += B["jk"];
76  Z["ijk"] += A_rep["ijk"]*B_rep["ijk"];
77  C["ij"] += Z["ijk"];
78  C["ij"] += Z["ikj"];
79  C["ij"] += Z["kij"];
80  C["ij"] -= 2.*Z["ijj"];
81 // C["ij"] -= Z["ijj"];
82  Cs["i"] += A["ik"]*B["ik"];
83  As["i"] += A["ik"];
84  As["i"] += A["ki"];
85  Bs["i"] += B["ik"];
86  Bs["i"] += B["ki"];
87  C["ij"] -= ((double)n)*A["ij"]*B["ij"];
88  C["ij"] -= Cs["i"];
89  C["ij"] -= Cs["j"];
90  C["ij"] -= As["i"]*B["ij"];
91  C["ij"] -= A["ij"]*Bs["j"];*/
92 
93  if (n<8){
94  if (rank == 0) printf("A:\n");
95  A.print();
96  if (rank == 0) printf("B:\n");
97  B.print();
98  if (rank == 0) printf("C_ans:\n");
99  C_ans.print();
100  if (rank == 0) printf("C:\n");
101  C.print();
102  }
103  Matrix<> Diff(n, n, SY, ctf, "Diff");
104  Diff["ij"] += C["ij"];
105  Diff["ij"] -= C_ans["ij"];
106  double nrm = sqrt((double)(Diff["ij"]*Diff["ij"]));
107  int pass = (nrm <=1.E-10);
108  if (nrm > 1.E-10 && rank == 0) printf("nrm = %lf\n",nrm);
109 
110  if (rank == 0){
111  MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
112  if (pass) printf("{ C[\"(ij)\"] = A[\"(ik)\"]*B[\"(kj)\"] } passed\n");
113  else printf("{ C[\"(ij)\"] = A[\"(ik)\"]*B[\"(kj)\"] } failed\n");
114  } else
115  MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
116  return pass;
117 }
118 
119 #ifndef TEST_SUITE
120 char* getCmdOption(char ** begin,
121  char ** end,
122  const std::string & option){
123  char ** itr = std::find(begin, end, option);
124  if (itr != end && ++itr != end){
125  return *itr;
126  }
127  return 0;
128 }
129 
130 
131 int main(int argc, char ** argv){
132  int rank, np, n;
133  int const in_num = argc;
134  char ** input_str = argv;
135 
136  MPI_Init(&argc, &argv);
137  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
138  MPI_Comm_size(MPI_COMM_WORLD, &np);
139 
140  if (getCmdOption(input_str, input_str+in_num, "-n")){
141  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
142  if (n < 0) n = 13;
143  } else n = 13;
144 
145  {
146  World dw(MPI_COMM_WORLD, argc, argv);
147  if (rank == 0){
148  printf("Computing C_(ij) = A_(ik)*B_(kj)\n");
149  }
150  int pass = fast_sym(n, dw);
151  assert(pass);
152  }
153 
154  MPI_Finalize();
155  return 0;
156 }
162 #endif
163 
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
int main(int argc, char **argv)
Definition: fast_sym.cxx:131
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: fast_sym.cxx:120
void print(FILE *fp, dtype cutoff) const
prints tensor data to file using process 0 (modify print(...) overload in set.h if you would like a d...
Definition: tensor.cxx:407
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
int fast_sym(int const n, World &ctf)
Definition: fast_sym.cxx:12
Definition: apsp.cxx:17
an instance of a tensor within a CTF world
Definition: tensor.h:74
Definition: common.h:37
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
Definition: common.h:37
def np(self)
Definition: core.pyx:315