Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
spr_seq_sum.cxx
Go to the documentation of this file.
1 #include "spr_seq_sum.h"
2 #include "../shared/iter_tsr.h"
3 #include "../shared/util.h"
4 
5 namespace CTF_int{
6  template<int idim>
7  void spA_dnB_seq_sum_loop(char const * alpha,
9  int64_t & size_A,
10  algstrct const * sr_A,
11  char const * beta,
12  char *& B,
13  algstrct const * sr_B,
14  int order_B,
15  int64_t idx_B,
16  int const * edge_len_B,
17  int64_t const * lda_B,
18  int const * sym_B,
19  univar_function const * func){
20  int imax = edge_len_B[idim];
21  if (sym_B[idim] != NS) imax = (idx_B/lda_B[idim+1])%edge_len_B[idim+1];
22 
23  for (int i=0; i<imax; i++){
24  //int nidx_B[order_B];
25  //memcpy(nidx_B, idx_B, order_B*sizeof(int));
26  spA_dnB_seq_sum_loop<idim-1>(alpha,A,size_A,sr_A,beta,B,sr_B,order_B,
27  idx_B+i*lda_B[idim],
28  edge_len_B, lda_B, sym_B, func);
29  }
30  }
31 
32  template<>
33  void spA_dnB_seq_sum_loop<0>(char const * alpha,
35  int64_t & size_A,
36  algstrct const * sr_A,
37  char const * beta,
38  char *& B,
39  algstrct const * sr_B,
40  int order_B,
41  int64_t idx_B,
42  int const * edge_len_B,
43  int64_t const * lda_B,
44  int const * sym_B,
45  univar_function const * func){
46 
47  int imax = edge_len_B[0];
48  if (sym_B[0] != NS) imax = (idx_B/lda_B[0+1])%edge_len_B[0+1];
49 
50  for (int i=0; i<imax; i++){
51  while (size_A > 0 && idx_B == A.k()){
52  if (func == NULL){
53  if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
54  sr_B->add(A.d(), B, B);
55  } else {
56  char tmp[sr_A->el_size];
57  sr_A->mul(A.d(), alpha, tmp);
58  sr_B->add(tmp, B, B);
59  }
60  } else {
61  if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
62  func->acc_f(A.d(), B, sr_B);
63  } else {
64  char tmp[sr_A->el_size];
65  sr_A->mul(A.d(), alpha, tmp);
66  func->acc_f(tmp, B, sr_B);
67  }
68  }
69  A = A[1];
70  size_A--;
71  }
72  B += sr_B->el_size;
73  idx_B++;
74  }
75  }
76 
77  template
79  (char const * alpha,
81  int64_t & size_A,
82  algstrct const * sr_A,
83  char const * beta,
84  char *& B,
85  algstrct const * sr_B,
86  int order_B,
87  int64_t idx_B,
88  int const * edge_len_B,
89  int64_t const * lda_B,
90  int const * sym_B,
91  univar_function const * func);
92 
93 
94 
95  void spA_dnB_seq_sum(char const * alpha,
96  char const * A,
97  int64_t size_A,
98  algstrct const * sr_A,
99  char const * beta,
100  char * B,
101  algstrct const * sr_B,
102  int order_B,
103  int const * edge_len_B,
104  int const * sym_B,
105  univar_function const * func){
107  if (order_B == 0){
108  if (!sr_B->isequal(beta, sr_B->mulid())){
109  sr_B->mul(beta, B, B);
110  }
111  ConstPairIterator pi(sr_A, A);
112  for (int64_t i=0; i<size_A; i++){
113  char tmp_buf[sr_A->el_size];
114  char const * tmp_ptr;
115  if (alpha != NULL){
116  sr_A->mul(alpha, pi[i].d(), tmp_buf);
117  tmp_ptr = tmp_buf;
118  } else tmp_ptr = pi[i].d();
119  if (func != NULL){
120  func->acc_f(tmp_ptr, B, sr_B);
121  } else {
122  sr_B->add(tmp_ptr, B, B);
123  }
124  }
125  } else {
126  int64_t sz_B = sy_packed_size(order_B, edge_len_B, sym_B);
127  if (!sr_B->isequal(beta, sr_B->mulid())){
128  if (sr_B->isequal(beta, sr_B->addid()) || sr_B->isequal(beta, NULL))
129  sr_B->set(B, sr_B->addid(), sz_B);
130  else
131  sr_B->scal(sz_B, beta, B, 1);
132  }
133 
134  int64_t lda_B[order_B];
135  for (int i=0; i<order_B; i++){
136  if (i==0) lda_B[i] = 1;
137  else lda_B[i] = lda_B[i-1]*edge_len_B[i-1];
138  }
139 
140  ASSERT(order_B<=MAX_ORD);
141 
142  ConstPairIterator pA(sr_A, A);
143  int64_t idx = 0;
144  SWITCH_ORD_CALL(spA_dnB_seq_sum_loop, order_B-1, alpha, pA, size_A, sr_A, beta, B, sr_B, order_B, idx, edge_len_B, lda_B, sym_B, func);
145  }
147  }
148 
149  void dnA_spB_seq_sum(char const * alpha,
150  char const * A,
151  algstrct const * sr_A,
152  int order_A,
153  int const * edge_len_A,
154  int const * sym_A,
155  char const * beta,
156  char const * B,
157  int64_t size_B,
158  char *& new_B,
159  int64_t & new_size_B,
160  algstrct const * sr_B,
161  univar_function const * func){
162  assert(0);
163  }
164 
183  void spspsum(algstrct const * sr_A,
184  int64_t nA,
185  ConstPairIterator prs_A,
186  char const * beta,
187  algstrct const * sr_B,
188  int64_t nB,
189  ConstPairIterator prs_B,
190  char const * alpha,
191  int64_t & nnew,
192  char *& pprs_new,
193  univar_function const * func,
194  int64_t map_pfx){
195 
197  // determine how many unique keys there are in prs_tsr and prs_Write
198  nnew = nB;
199  bool is_acc = (func != NULL && func->is_accumulator());
200  TAU_FSTART(spA_spB_seq_sum_pre);
201  for (int64_t t=0,ww=0; ww<nA*map_pfx; ww++){
202  while (ww<nA*map_pfx){
203  int64_t w = ww/map_pfx;
204  int64_t mw = ww%map_pfx;
205  if (t<nB && prs_B[t].k() < prs_A[w].k()*map_pfx+mw)
206  t++;
207  else if (t<nB && prs_B[t].k() == prs_A[w].k()*map_pfx+mw){
208  t++;
209  ww++;
210  } else {
211  //ASSERT(map_pfx == 1);
212  if (!is_acc && (map_pfx != 1 || ww==0 || prs_A[ww-1].k() != prs_A[ww].k()))
213  nnew++;
214  ww++; w=ww;
215  }
216  }
217  }
218  TAU_FSTOP(spA_spB_seq_sum_pre);
219 // printf("nB = %ld nA = %ld nnew = %ld\n",nB,nA,nnew);
220  pprs_new = sr_B->pair_alloc(nnew);
221  PairIterator prs_new(sr_B, pprs_new);
222  // each for loop computes one new value of prs_new
223  // (multiple writes may contribute to it),
224  // t, w, and n are incremented within
225  // only incrementing r allows multiple writes of the same val
226  int64_t n=0;
227  for (int64_t t=0,ww=0; n<nnew; n++){
228  /*if (n>0){
229  printf("n=%ld\n",n-1);
230  sr_A->print(prs_new[n-1].d());
231  }*/
232  int64_t w = ww/map_pfx;
233  int64_t mw = ww%map_pfx;
234  bool skip = 0;
235  if (t<nB && (w==nA || prs_B[t].k() < prs_A[w].k()*map_pfx+mw)){
236  sr_B->copy_pair(prs_new[n].ptr, prs_B[t].ptr);
237  if (beta != NULL)
238  sr_B->mul(prs_B[t].d(), beta, prs_new[n].d());
239  t++;
240  } else {
241  /*if (t<nB)
242  printf("%ld %ld\n",prs_B[t].k(), prs_A[w].k()*map_pfx+mw);*/
243  if (t>=nB || prs_B[t].k() > prs_A[w].k()*map_pfx+mw){
244  if (func == NULL){
245  if (map_pfx == 1){
246  sr_A->copy_pair(prs_new[n].ptr, prs_A[w].ptr);
247  } else {
248  ((int64_t*)prs_new[n].ptr)[0] = prs_A[w].k()*map_pfx+mw;
249  prs_new[n].write_val(prs_A[w].d());
250  }
251  if (alpha != NULL)
252  sr_A->mul(prs_new[n].d(), alpha, prs_new[n].d());
253  } else {
254  //((int64_t*)prs_new[n].ptr)[0] = prs_A[w].k();
255  if (!is_acc){
256  ((int64_t*)prs_new[n].ptr)[0] = prs_A[w].k()*map_pfx+mw;
257  if (alpha != NULL){
258  char a[sr_A->el_size];
259  sr_A->mul(prs_A[w].d(), alpha, a);
260  //if (sr_B->addid() != NULL){
261  // prs_new[n].write_val(sr_B->addid());
262  func->apply_f(a, prs_new[n].d());
263  // } else {
264 
265  //}
266  } else {
267  // prs_new[n].write_val(sr_B->addid());
268  func->apply_f(prs_A[w].d(), prs_new[n].d());
269  }
270  } else { n--; skip=1; }
271  }
272  ww++;
273  } else {
274  char a[sr_A->el_size];
275  char b[sr_B->el_size];
276  if (alpha != NULL){
277  sr_A->mul(prs_A[w].d(), alpha, a);
278  } else {
279  prs_A[w].read_val(a);
280  }
281  if (beta != NULL){
282  sr_B->mul(prs_B[t].d(), beta, b);
283  } else {
284  prs_B[t].read_val(b);
285  }
286  if (func == NULL){
287  sr_B->add(a, b, b);
288  } else {
289  func->acc_f(a, b, sr_B);
290  }
291  prs_new[n].write_val(b);
292  ((int64_t*)(prs_new[n].ptr))[0] = prs_B[t].k();
293  t++;
294  ww++;
295  }
296  // accumulate any repeated key writes
297  while (map_pfx == 1 && ww > 0 && ww<nA && prs_A[ww].k() == prs_A[ww-1].k()){
298  if (!skip){
299  if (alpha != NULL){
300  char a[sr_A->el_size];
301  sr_A->mul(prs_A[ww].d(), alpha, a);
302  if (func == NULL)
303  sr_B->add(prs_new[n].d(), a, prs_new[n].d());
304  else
305  func->acc_f(a, prs_new[n].d(), sr_B);
306  } else {
307  if (func == NULL)
308  sr_B->add(prs_new[n].d(), prs_A[ww].d(), prs_new[n].d());
309  else
310  func->acc_f(prs_A[ww].d(), prs_new[n].d(), sr_B);
311  }
312  }
313  ww++; w=ww;
314  }
315  }
316  /*if (n>=0){
317  printf("%ldth value is ", n);
318  sr_B->print(prs_new[n].d());
319  printf(" with key %ld\n",prs_new[n].k());
320  }*/
321  }
322  ASSERT(n==nnew);
324  }
325 
326 
327  void spA_spB_seq_sum(char const * alpha,
328  char const * A,
329  int64_t size_A,
330  algstrct const * sr_A,
331  char const * beta,
332  char * B,
333  int64_t size_B,
334  char *& new_B,
335  int64_t & new_size_B,
336  algstrct const * sr_B,
337  univar_function const * func,
338  int64_t map_pfx){
339 
340 /* if (!sr_B->isequal(beta, sr_B->mulid())){
341  printf("scaling B by 0\n");
342  sr_B->scal(size_B, beta, B, 1);
343  }*/
344  spspsum(sr_A, size_A, ConstPairIterator(sr_A, A), beta,
345  sr_B, size_B, ConstPairIterator(sr_B, B),alpha,
346  new_size_B, new_B, func, map_pfx);
347  }
348 
349 }
void dnA_spB_seq_sum(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, char const *beta, char const *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func)
performs summation between two sparse tensors assumes B contain key value pairs sorted by key...
char const * d() const
returns value of pair at head of ptr
Definition: algstrct.cxx:768
void spA_dnB_seq_sum_loop(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
Definition: spr_seq_sum.cxx:7
virtual char * pair_alloc(int64_t n) const
allocate space for n (int64_t,dtype) pairs, necessary for object types
Definition: algstrct.cxx:681
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
Definition: algstrct.cxx:340
#define MAX_ORD
Definition: util.h:103
void spA_dnB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, univar_function const *func)
performs summation between two sparse tensors assumes A contains key value pairs sorted by key...
Definition: spr_seq_sum.cxx:95
#define ASSERT(...)
Definition: util.h:88
Definition: common.h:37
untyped internal class for doubly-typed univariate function
Definition: sum_tsr.h:14
virtual char const * addid() const
MPI datatype for pairs.
Definition: algstrct.cxx:89
virtual void copy_pair(char *a, char const *b) const
copies pair b to element a
Definition: algstrct.cxx:542
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
Definition: algstrct.cxx:629
virtual void acc_f(char const *a, char *b, CTF_int::algstrct const *sr_B) const
compute b = b+f(a)
Definition: sum_tsr.h:33
virtual bool is_accumulator() const
Definition: sum_tsr.h:63
int64_t k() const
returns key of pair at head of ptr
Definition: algstrct.cxx:764
#define SWITCH_ORD_CALL(F, act_ord,...)
Definition: util.h:119
#define TAU_FSTOP(ARG)
Definition: util.h:281
#define TAU_FSTART(ARG)
Definition: util.h:280
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
Definition: algstrct.cxx:262
void read_val(char *buf) const
sets value to the value pointed by the iterator
Definition: algstrct.cxx:776
virtual void add(char const *a, char const *b, char *c) const
c = a+b
Definition: algstrct.cxx:109
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
void write_val(char const *buf)
sets value of head pair to what is in buf
Definition: algstrct.cxx:817
virtual void mul(char const *a, char const *b, char *c) const
c = a*b
Definition: algstrct.cxx:120
template void spA_dnB_seq_sum_loop< MAX_ORD >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
virtual char const * mulid() const
identity element for multiplication i.e. 1
Definition: algstrct.cxx:93
void spA_spB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func, int64_t map_pfx)
performs summation between two sparse tensors assumes A and B contain key value pairs sorted by key...
void spspsum(algstrct const *sr_A, int64_t nA, ConstPairIterator prs_A, char const *beta, algstrct const *sr_B, int64_t nB, ConstPairIterator prs_B, char const *alpha, int64_t &nnew, char *&pprs_new, univar_function const *func, int64_t map_pfx)
As pairs in a sparse A set to the sparse set of elements defining the tensor, resulting in a set of s...
void spA_dnB_seq_sum_loop< 0 >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
Definition: spr_seq_sum.cxx:33
int64_t sy_packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout
Definition: util.cxx:10
virtual void apply_f(char const *a, char *b) const
apply function f to value stored at a
Definition: sum_tsr.h:25