Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
scan.cxx
Go to the documentation of this file.
1 
8 #include <ctf.hpp>
9 #include <float.h>
10 using namespace CTF;
11 
12 template <typename dtype>
14 
15  if (V.order == 1){
16  Matrix<dtype> W(2, V.lens[0], V.lens[0], *V.wrld, *V.sr);
17  dtype mulid = ((dtype*)V.sr->mulid())[0];
18  W["ij"], [=](dtype & a){ a=mulid; };
19  int ssym[] = {SH, NS};
20  int nsym[] = {NS, NS};
21  Tensor<dtype> W1(W, ssym);
22  Tensor<dtype> W2(W1, nsym);
23  V["i"] = W2["ji"]*V["j"];
24  } else {
25  Tensor<dtype> V2(V.order-1, V.lens, *V.wrld, *V.sr);
26  char str[V.order];
27  for (int i=0; i<V.order; i++){ str[i] = 'a'+i; }
28  V2[str+1] += V[str];
29  rec_scan(V2);
30 
31  Matrix<dtype> W(2, V.lens[V.order-1], V.lens[V.order-1], *V.wrld, *V.sr);
32  dtype mulid = ((dtype*)V.sr->mulid())[0];
33  W["ij"], [=](dtype & a){ a=mulid; };
34  int hsym[] = {SH, NS};
35  int nsym[] = {NS, NS};
36  Tensor<dtype> W1(W, hsym);
37  Tensor<dtype> W2(W1, nsym);
38  char str2[V.order];
39  memcpy(str2+1, str+1, V.order-1);
40  str2[0] = 'a'+V.order;
41  char strW[2] = {str2[0],'a'};
42  V[str] = W2[strW]*V[str2];
43  V[str] += V2[str+1];
44  }
45 }
46 
47 template<typename dtype>
48 void scan(Vector<dtype> & v, int logn){
49  int64_t np;
50  int64_t * inds;
51  double * data;
52 
53  int lens[logn];
54  std::fill(lens, lens+logn, 2);
55 
56  // represent vector to scan as 2-by-...-by-2 tensor
57  Tensor<dtype> V(logn, lens, *v.wrld, *v.sr);
58 
59  v.get_local_data(&np, &inds, &data);
60  V.write(np, inds, data);
61 
62  free(inds);
63  delete [] data;
64 
65  rec_scan(V);
66 
67  // put the data from the tensor back into the vector
68  V.get_local_data(&np, &inds, &data);
69  v.write(np, inds, data);
70 
71  free(inds);
72  delete [] data;
73 }
74 
75 int scan_test(int logn,
76  World & dw){
77 
78  Vector<> v(1<<logn, dw);
79 
80  srand48(dw.rank*27);
81  v.fill_random(0.0, 1.0);
82 
83  double start_data[1<<logn];
84 
85  v.read_all(start_data);
86 
87  scan(v, logn);
88 
89  double data[1<<logn];
90 
91  v.read_all(data);
92 
93  int pass = 1;
94  for (int i=1; i<1<<logn; i++){
95  if (std::abs(data[i] - start_data[i-1] - data[i-1]) >= 1.E-9*(1<<logn)) pass = 0;
96  }
97  if (dw.rank == 0){
98  if (pass)
99  printf("{ scan via tensor contractions } passed \n");
100  else
101  printf("{ scan via tensor contractions } failed \n");
102  }
103  return pass;
104 }
105 
106 
107 #ifndef TEST_SUITE
108 char* getCmdOption(char ** begin,
109  char ** end,
110  const std::string & option){
111  char ** itr = std::find(begin, end, option);
112  if (itr != end && ++itr != end){
113  return *itr;
114  }
115  return 0;
116 }
117 
118 
119 int main(int argc, char ** argv){
120  int rank, np, logn, pass;
121  int const in_num = argc;
122  char ** input_str = argv;
123 
124  MPI_Init(&argc, &argv);
125  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
126  MPI_Comm_size(MPI_COMM_WORLD, &np);
127 
128  if (getCmdOption(input_str, input_str+in_num, "-logn")){
129  logn = atoi(getCmdOption(input_str, input_str+in_num, "-logn"));
130  if (logn < 0) logn = 4;
131  } else logn = 4;
132 
133 
134  {
135  World dw(argc, argv);
136 
137  if (rank == 0){
138  printf("Running scan on dimension %d vector\n",1<<logn);
139  }
140  pass = scan_test(logn, dw);
141  assert(pass);
142  }
143 
144  MPI_Finalize();
145  return 0;
146 }
152 #endif
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14
void read_all(int64_t *npair, dtype **data, bool unpack=false)
collects the entire tensor data on each process (not memory scalable)
Definition: tensor.cxx:377
Definition: common.h:37
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: scan.cxx:108
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
int order
number of tensor dimensions
string
Definition: core.pyx:456
CTF::World * wrld
distributed processor context on which tensor is defined
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
int rank
rank of local processor
Definition: world.h:24
int * lens
unpadded tensor edge lengths
void rec_scan(Tensor< dtype > &V)
Definition: scan.cxx:13
algstrct * sr
algstrct on which tensor elements and operations are defined
void scan(Vector< dtype > &v, int logn)
Definition: scan.cxx:48
def abs(initA)
Definition: core.pyx:5440
int scan_test(int logn, World &dw)
Definition: scan.cxx:75
int main(int argc, char **argv)
Definition: scan.cxx:119
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
Definition: apsp.cxx:17
an instance of a tensor within a CTF world
Definition: tensor.h:74
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
Definition: common.h:37
def np(self)
Definition: core.pyx:315