Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
block_sparse.cxx
Go to the documentation of this file.
1 
8 #include <ctf.hpp>
9 #include <float.h>
10 using namespace CTF;
11 
12 namespace CTF {
13  template <>
14  inline void Set< Tensor<> ,false>::print(char const * a, FILE * fp) const {
15  ((Tensor<>*)a)->print(fp);
16  }
17 }
18 
19 Matrix<> flatten_block_sparse_matrix(Matrix< Tensor<> > A, std::vector<int> ranges, World & dw){
20  int64_t nrng = ranges.size();
21  int64_t range_sum = 0;
22  int64_t range_pfx[nrng];
23  for (int64_t i=0; i<nrng; i++){
24  if (i==0) range_pfx[i] = 0;
25  else range_pfx[i] = range_sum;
26  range_sum += ranges[i];
27  }
28 
29  Matrix<> flatA(range_sum, range_sum, SP, dw);
30 
31  //below only works when the blocks are distributed over all processors
32  assert(A.wrld->np == 1);
33  int64_t nblk;
34  int64_t * blk_inds;
35  Tensor<> * A_blks;
36 
37  A.get_local_data(&nblk, &blk_inds, &A_blks, true);
38 
39  int zeros[] = {0,0};
40  int offs[2];
41  int ends[2];
42  for (int64_t b=0; b<nblk; b++){
43  assert(A_blks[b].order == 2);
44  int64_t i = blk_inds[b] / nblk;
45  int64_t j = blk_inds[b] % nblk;
46  offs[0] = range_pfx[j];
47  offs[1] = range_pfx[i];
48  ends[0] = range_pfx[j]+ranges[j];
49  ends[1] = range_pfx[i]+ranges[i];
50  flatA.slice(offs,ends,0.0,A_blks[b],zeros,A_blks[b].lens,1.0);
51  }
52  free(blk_inds);
53  delete [] A_blks;
54  return flatA;
55 
56 }
57 
64 int block_sparse(std::vector<int> ranges, World & dw){
65 
66  //Define Monoid over tensors that understands how to add
67  Monoid< Tensor<>,false >
68  tmon(Scalar<>(0.0,dw),
69  [](Tensor<> A, Tensor<> B){
70  int order_C = std::max(A.order,B.order);
71  int lens_C[order_C];
72  int sym_C[order_C];
73  char idx_A[order_C];
74  char idx_B[order_C];
75  char idx_C[order_C];
76  for (int i=0; i<order_C; i++){
77  sym_C[i] = NS;
78  lens_C[i] = -1;
79  if (A.order > i){
80  lens_C[i] = A.lens[i];
81  sym_C[i] = A.sym[i];
82  idx_A[i] = 'i'+i;
83  }
84  if (B.order > i){
85  assert(lens_C[i] == B.lens[i]);
86  if (B.sym[i] != NS){
87  assert(sym_C[i] == B.sym[i]);
88  } else {
89  sym_C[i] = NS;
90  }
91  lens_C[i] = B.lens[i];
92  idx_B[i] = 'i'+i;
93  }
94  idx_C[i] = 'i'+i;
95  }
96  int sp_C = (A.is_sparse & (A.order>=B.order)) || (B.is_sparse & (B.order>=A.order));
97  Tensor<> C(order_C, sp_C, lens_C, sym_C);
98  C[idx_C] += A[idx_A]+B[idx_B];
99  return C;
100  },
101  MPI_SUM); //MPI op not really valid, but should never be used if we only use Monoid on world with a single processor
102 
103  int nblk = ranges.size();
104 
105  World self_world(MPI_COMM_SELF);
106  Matrix< Tensor<> > A(nblk, nblk, SP, self_world, tmon);
107  Matrix< Tensor<> > B(nblk, nblk, SP, self_world, tmon);
108 
109  //set same integer random seed on all processors, to generate same set of blocks
110  srand(1000);
111  //set different double random seed on all processors, to generate different elements in blocks
112  srand48(dw.rank);
113 
114 // int64_t A_blk_inds[nblk];
115  CTF::Pair< Tensor<> > * A_blks = new CTF::Pair< Tensor<> >[nblk];
116  for (int64_t i=0; i<nblk; i++){
117  int64_t j = rand()%nblk;
118  A_blks[i].k = j + nblk*i;
119  A_blks[i].d = Matrix<>(ranges[j],ranges[i],dw);
120  A_blks[i].d.fill_random(0,1);
121  }
122  A.write(nblk,A_blks);
123  int64_t * B_blk_inds = new int64_t[nblk];
124  Tensor<> * B_blks = new Tensor<>[nblk];
125  for (int64_t i=0; i<nblk; i++){
126  int64_t j = rand()%nblk;
127  B_blk_inds[i] = i + j*nblk;
128  B_blks[i] = Matrix<>(ranges[i],ranges[j],dw);
129  B_blks[i].fill_random(1.,1.);
130  }
131  B.write(nblk,B_blk_inds,B_blks);
132  delete [] B_blks;
133  delete [] B_blk_inds;
134  Matrix< Tensor<> > C(nblk, nblk, SP, self_world, tmon);
135 
136  C["ij"] = Function< Tensor<> >(
137  [](Tensor<> mA, Tensor<> mB){
138  assert(mA.order == 2 && mB.order == 2);
139  Matrix<> mC(mA.lens[0], mB.lens[1]);
140  mC["ij"] += mA["ik"]*mB["kj"];
141  return mC;
142  }
143  )(A["ik"],B["kj"]);
144 
145  Matrix<> refA = flatten_block_sparse_matrix(A,ranges,dw);
146  Matrix<> refB = flatten_block_sparse_matrix(B,ranges,dw);
147  Matrix<> cmpC = flatten_block_sparse_matrix(C,ranges,dw);
148 
149  Matrix<> refC(cmpC);
150 
151  refC["ij"] = refA["ik"]*refB["kj"];
152 
153  /*refA.print_matrix();
154  refB.print_matrix();
155  cmpC.print_matrix();
156  refC.print_matrix();*/
157 
158  refC["ij"] -= cmpC["ij"];
159  double err_nrm = refC.norm2();
160 
161  bool pass = err_nrm <= 1.e-4;
162 
163  return pass;
164 }
165 
166 
167 #ifndef TEST_SUITE
168 char* getCmdOption(char ** begin,
169  char ** end,
170  const std::string & option){
171  char ** itr = std::find(begin, end, option);
172  if (itr != end && ++itr != end){
173  return *itr;
174  }
175  return 0;
176 }
177 
178 
179 int main(int argc, char ** argv){
180  int rank, np, n, pass, r;
181  int const in_num = argc;
182  char ** input_str = argv;
183 
184  MPI_Init(&argc, &argv);
185  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
186  MPI_Comm_size(MPI_COMM_WORLD, &np);
187 
188  if (getCmdOption(input_str, input_str+in_num, "-n")){
189  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
190  if (n < 0) n = 7;
191  } else n = 7;
192 
193  if (getCmdOption(input_str, input_str+in_num, "-r")){
194  r = atof(getCmdOption(input_str, input_str+in_num, "-r"));
195  if (r < 0) r = 10;
196  } else r = 10;
197 
198  {
199  World dw(argc, argv);
200 
201  if (rank == 0){
202  printf("Computing block-sparse with %d block ranges, all of size %d... ",r,n);
203  }
204  std::vector<int> ranges;
205  for (int i=0; i<r; i++){
206  ranges.push_back(n);
207  }
208  pass = block_sparse(ranges, dw);
209  if (rank == 0){
210  if (pass){
211  printf("successful, answer correct.\n");
212  } else {
213  printf("failed, answer wrong.\n");
214  }
215  }
216  assert(pass);
217  }
218 
219  MPI_Finalize();
220  return 0;
221 }
227 #endif
Set class defined by a datatype and a min/max function (if it is partially ordered i...
Definition: set.h:280
int * sym
symmetries among tensor dimensions
dtype d
tensor value associated with index
Definition: tensor.h:34
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
Tensor< dtype > slice(int const *offsets, int const *ends) const
cuts out a slice (block) of this tensor A[offsets,ends) result will always be fully nonsymmetric ...
Definition: tensor.cxx:643
Definition: common.h:37
char * getCmdOption(char **begin, char **end, const std::string &option)
Matrix flatten_block_sparse_matrix(Matrix< Tensor<> > A, std::vector< int > ranges, World &dw)
int64_t size
current size of local tensor data chunk (mapping-dependent)
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
int64_t k
key, global index [i1,i2,...] specified as i1+len[0]*i2+...
Definition: tensor.h:31
string
Definition: core.pyx:456
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
index-value pair used for tensor data input
Definition: tensor.h:28
Scalar class which encapsulates a 0D tensor.
Definition: scalar.h:13
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
int rank
rank of local processor
Definition: world.h:24
int * lens
unpadded tensor edge lengths
void print(char const *a, FILE *fp=stdout) const
prints the value
Definition: set.h:386
int main(int argc, char **argv)
int block_sparse(std::vector< int > ranges, World &dw)
perform block sparse matrix-matrix product
def zeros(shape, dtype=np.float64, order='F')
Definition: core.pyx:4157
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
Definition: apsp.cxx:17
A Monoid is a Set equipped with a binary addition operator &#39;+&#39; or a custom function addition must hav...
Definition: monoid.h:69
an instance of a tensor within a CTF world
Definition: tensor.h:74
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
def np(self)
Definition: core.pyx:315