Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
fast_sym_4D.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
8 #include <ctf.hpp>
9 
10 using namespace CTF;
11 
12 int fast_sym_4D(int const n,
13  World &ctf){
14  int rank, num_pes;
15 
16  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
17  MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
18 
19  int len3[] = {n,n,n};
20  int len4[] = {n,n,n,n};
21  int len5[] = {n,n,n,n,n};
22  int NNN[] = {NS,NS,NS};
23  int HNNN[] = {SH,NS,NS,NS};
24  int YYNNN[] = {SY,SY,NS,NS,NS};
25 
26  Tensor<> A(4, len4, HNNN, ctf);
27  Tensor<> B(4, len4, HNNN, ctf);
28  Tensor<> C(4, len4, HNNN, ctf);
29  Tensor<> C_ans(4, len4, HNNN, ctf);
30 
31  Tensor<> A_rep(5, len5, YYNNN, ctf);
32  Tensor<> B_rep(5, len5, YYNNN, ctf);
33  Tensor<> Z(5, len5, YYNNN, ctf);
34  Tensor<> As(3, len3, NNN, ctf);
35  Tensor<> Bs(3, len3, NNN, ctf);
36  Tensor<> Cs(3, len3, NNN, ctf);
37 
38  srand48(rank*347+23);
39 
40  A.fill_random(-1.0, 1.0);
41  B.fill_random(-1.0, 1.0);
42 
43  /*{
44  int64_t * indices;
45  double * values;
46  int64_t size;
47  srand48(173*rank);
48 
49  A.read_local(&size, &indices, &values);
50  for (i=0; i<size; i++){
51  values[i] = drand48();
52  }
53  A.write(size, indices, values);
54  free(indices);
55  free(values);
56  B.read_local(&size, &indices, &values);
57  for (i=0; i<size; i++){
58  values[i] = drand48();
59  }
60  B.write(size, indices, values);
61  free(indices);
62  free(values);
63  }*/
64 
65 
66  C_ans["ijab"] = A["ikal"]*B["kjlb"];
67 
68  A_rep["ijkal"] += A["ijal"];
69  B_rep["ijklb"] += B["ijlb"];
70  Z["ijkab"] += A_rep["ijkal"]*B_rep["ijklb"];
71  C["ijab"] += Z["ijkab"];
72 /* Cs["iab"] += A["ikal"]*B["iklb"];
73  As["ial"] += A["ikal"];
74  Bs["ilb"] += B["iklb"];*/
75  C["ijab"] -= ((double)n)*A["ijal"]*B["ijlb"];
76  C["ijab"] -= A["ikal"]*B["iklb"];
77  C["ijab"] -= A["ikal"]*B["ijlb"];
78  C["ijab"] -= A["ijal"]*B["iklb"];
79 /*
80  A_rep["ijkal"] += A["ijal"];
81  A_rep["ijkal"] += A["ikal"];
82  A_rep["ijkal"] += A["jkal"];
83  B_rep["ijklb"] += B["ijlb"];
84  B_rep["ijklb"] += B["iklb"];
85  B_rep["ijklb"] += B["jklb"];
86  Z["ijkab"] += A_rep["ijkal"]*B_rep["ijklb"];
87  C["ijab"] += Z["ijkab"];
88  C["ijab"] += Z["ikjab"];
89  C["ijab"] += Z["kijab"];
90  C["ijab"] -= Z["ijjab"];
91  C["ijab"] -= Z["iijab"];
92  Cs["iab"] += A["ikal"]*B["iklb"];
93  As["ial"] += A["ikal"];
94  As["ial"] += A["kial"];
95  Bs["ilb"] += B["iklb"];
96  Bs["ilb"] += B["kilb"];
97  C["ijab"] -= ((double)n)*A["ijal"]*B["ijlb"];
98  C["ijab"] -= Cs["iab"];
99  C["ijab"] -= Cs["jab"];
100  C["ijab"] -= As["ial"]*B["ijlb"];
101  C["ijab"] -= A["ijal"]*Bs["jlb"];*/
102 
103  if (n<4){
104  printf("A:\n");
105  A.print();
106  printf("B:\n");
107  B.print();
108  printf("C_ans:\n");
109  C_ans.print();
110  printf("C:\n");
111  C.print();
112  }
113  Tensor<> Diff(4, len4, HNNN, ctf);
114  Diff["ijab"] += C["ijab"];
115  Diff["ijab"] -= C_ans["ijab"];
116  double nrm = Diff.norm2();
117  int pass = (nrm <=1.E-10);
118 
119  if (rank == 0){
120  MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
121  if (pass) printf("{ C[\"(ij)ab\"] = A[\"(ik)al\"]*B[\"(kj)lb\"] } passed\n");
122  else printf("{ C[\"(ij)ab\"] = A[\"(ik)al\"]*B[\"(kj)lb\"] } failed\n");
123  } else
124  MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
125  return pass;
126 }
127 
128 #ifndef TEST_SUITE
129 char* getCmdOption(char ** begin,
130  char ** end,
131  const std::string & option){
132  char ** itr = std::find(begin, end, option);
133  if (itr != end && ++itr != end){
134  return *itr;
135  }
136  return 0;
137 }
138 
139 
140 int main(int argc, char ** argv){
141  int rank, np, n;
142  int const in_num = argc;
143  char ** input_str = argv;
144 
145  MPI_Init(&argc, &argv);
146  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
147  MPI_Comm_size(MPI_COMM_WORLD, &np);
148 
149  if (getCmdOption(input_str, input_str+in_num, "-n")){
150  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
151  if (n < 0) n = 6;
152  } else n = 6;
153 
154  {
155  World dw(MPI_COMM_WORLD, argc, argv);
156  if (rank == 0){
157  printf("Computing C_(ij)ab = A_(ik)al*B_(kj)lb\n");
158  }
159  int pass = fast_sym_4D(n, dw);
160  assert(pass);
161  }
162 
163  MPI_Finalize();
164  return 0;
165 }
171 #endif
172 
char * getCmdOption(char **begin, char **end, const std::string &option)
def rank(self)
Definition: core.pyx:312
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
void print(FILE *fp, dtype cutoff) const
prints tensor data to file using process 0 (modify print(...) overload in set.h if you would like a d...
Definition: tensor.cxx:407
int fast_sym_4D(int const n, World &ctf)
Definition: fast_sym_4D.cxx:12
Definition: apsp.cxx:17
an instance of a tensor within a CTF world
Definition: tensor.h:74
Definition: common.h:37
int main(int argc, char **argv)
Definition: common.h:37
def np(self)
Definition: core.pyx:315