Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
sym_seq_sum.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include "../shared/iter_tsr.h"
4 #include "../shared/util.h"
5 #include <limits.h>
6 #include "sym_seq_sum.h"
7 
8 namespace CTF_int {
9 
10  template <int idim>
11  void sym_seq_sum_loop(char const * alpha,
12  char const * A,
13  algstrct const * sr_A,
14  int order_A,
15  int const * edge_len_A,
16  int const * sym_A,
17  int const * idx_map_A,
18  uint64_t *const* offsets_A,
19  char * B,
20  algstrct const * sr_B,
21  int order_B,
22  int const * edge_len_B,
23  int const * sym_B,
24  int const * idx_map_B,
25  uint64_t *const* offsets_B,
26  univar_function const * func,
27  int const * idx,
28  int const * rev_idx_map,
29  int idx_max){
30  int imax=0;
31  int rA = rev_idx_map[2*idim+0];
32  int rB = rev_idx_map[2*idim+1];
33 
34  if (rA != -1)
35  imax = edge_len_A[rA];
36  else if (rB != -1)
37  imax = edge_len_B[rB];
38 
39  if (rA != -1 && sym_A[rA] != NS){
40  int rrA = rA;
41  do {
42  if (idx_map_A[rrA+1] > idim)
43  imax = idx[idx_map_A[rrA+1]]+1;
44  rrA++;
45  } while (sym_A[rrA] != NS && idx_map_A[rrA] < idim);
46  }
47 
48  if (rB != -1 && sym_B[rB] != NS){
49  int rrB = rB;
50  do {
51  if (idx_map_B[rrB+1] > idim)
52  imax = std::min(imax,idx[idx_map_B[rrB+1]]+1);
53  rrB++;
54  } while (sym_B[rrB] != NS && idx_map_B[rrB] < idim);
55  }
56 
57  int imin = 0;
58 
59  if (rA > 0 && sym_A[rA-1] != NS){
60  int rrA = rA;
61  do {
62  if (idx_map_A[rrA-1] > idim)
63  imin = idx[idx_map_A[rrA-1]];
64  rrA--;
65  } while (rrA>0 && sym_A[rrA-1] != NS && idx_map_A[rrA] < idim);
66  }
67 
68  if (rB > 0 && sym_B[rB-1] != NS){
69  int rrB = rB;
70  do {
71  if (idx_map_B[rrB-1] > idim)
72  imin = std::max(imin,idx[idx_map_B[rrB-1]]);
73  rrB--;
74  } while (rrB>0 && sym_B[rrB-1] != NS && idx_map_B[rrB] < idim);
75  }
76 
77  if (rB != -1){
78 #ifdef USE_OMP
79  #pragma omp for
80 #endif
81  for (int i=imin; i<imax; i++){
82 #ifdef USE_OMP
83  #pragma omp parallel
84 #endif
85  {
86  int nidx[idx_max];
87  memcpy(nidx, idx, idx_max*sizeof(int));
88  nidx[idim] = i;
89  sym_seq_sum_loop<idim-1>(alpha, A+offsets_A[idim][nidx[idim]], sr_A, order_A, edge_len_A, sym_A, idx_map_A, offsets_A, B+offsets_B[idim][nidx[idim]], sr_B, order_B, edge_len_B, sym_B, idx_map_B, offsets_B, func, nidx, rev_idx_map, idx_max);
90  }
91  }
92  } else {
93  for (int i=imin; i<imax; i++){
94  int nidx[idx_max];
95  memcpy(nidx, idx, idx_max*sizeof(int));
96  nidx[idim] = i;
97  sym_seq_sum_loop<idim-1>(alpha, A+offsets_A[idim][nidx[idim]], sr_A, order_A, edge_len_A, sym_A, idx_map_A, offsets_A, B+offsets_B[idim][nidx[idim]], sr_B, order_B, edge_len_B, sym_B, idx_map_B, offsets_B, func, nidx, rev_idx_map, idx_max);
98  }
99 
100  }
101 // idx[idim] = 0;
102  }
103 
104 
105  template <>
107  (char const * alpha,
108  char const * A,
109  algstrct const * sr_A,
110  int order_A,
111  int const * edge_len_A,
112  int const * sym_A,
113  int const * idx_map_A,
114  uint64_t *const* offsets_A,
115  char * B,
116  algstrct const * sr_B,
117  int order_B,
118  int const * edge_len_B,
119  int const * sym_B,
120  int const * idx_map_B,
121  uint64_t *const* offsets_B,
122  univar_function const * func,
123  int const * idx,
124  int const * rev_idx_map,
125  int idx_max){
126  int imax=0;
127  int rA = rev_idx_map[0];
128  int rB = rev_idx_map[1];
129 
130  if (rA != -1)
131  imax = edge_len_A[rA];
132  else if (rB != -1)
133  imax = edge_len_B[rB];
134 
135  if (rA != -1 && sym_A[rA] != NS)
136  imax = idx[idx_map_A[rA+1]]+1;
137  if (rB != -1 && sym_B[rB] != NS)
138  imax = std::min(imax,idx[idx_map_B[rB+1]]+1);
139 
140  int imin = 0;
141 
142  if (rA > 0 && sym_A[rA-1] != NS)
143  imin = idx[idx_map_A[rA-1]];
144  if (rB > 0 && sym_B[rB-1] != NS)
145  imin = std::max(imin,idx[idx_map_B[rB-1]]);
146 
147  if (func == NULL){
148  if (alpha == NULL){
149  for (int i=imin; i<imax; i++){
150  sr_B->add(A+offsets_A[0][i],
151  B+offsets_B[0][i],
152  B+offsets_B[0][i]);
153  }
154  CTF_FLOPS_ADD(imax-imin);
155  } else {
156  for (int i=imin; i<imax; i++){
157  char tmp[sr_A->el_size];
158  sr_A->mul(A+offsets_A[0][i],
159  alpha,
160  tmp);
161  sr_B->add(tmp,
162  B+offsets_B[0][i],
163  B+offsets_B[0][i]);
164  }
165  CTF_FLOPS_ADD(2*(imax-imin));
166  }
167  } else assert(0); //FIXME else
168  }
169 
170  template
172  (char const * alpha,
173  char const * A,
174  algstrct const * sr_A,
175  int order_A,
176  int const * edge_len_A,
177  int const * sym_A,
178  int const * idx_map_A,
179  uint64_t *const* offsets_A,
180  char * B,
181  algstrct const * sr_B,
182  int order_B,
183  int const * edge_len_B,
184  int const * sym_B,
185  int const * idx_map_B,
186  uint64_t *const* offsets_B,
187  univar_function const * func,
188  int const * idx,
189  int const * rev_idx_map,
190  int idx_max);
191 
192 
193  void compute_syoffs(algstrct const * sr_A,
194  int order_A,
195  int const * edge_len_A,
196  int const * sym_A,
197  int const * idx_map_A,
198  algstrct const * sr_B,
199  int order_B,
200  int const * edge_len_B,
201  int const * sym_B,
202  int const * idx_map_B,
203  int tot_order,
204  int const * rev_idx_map,
205  uint64_t **& offsets_A,
206  uint64_t **& offsets_B){
208  offsets_A = (uint64_t**)CTF_int::alloc(sizeof(uint64_t*)*tot_order);
209  offsets_B = (uint64_t**)CTF_int::alloc(sizeof(uint64_t*)*tot_order);
210 
211  for (int idim=0; idim<tot_order; idim++){
212  int len=0;
213 
214  int rA = rev_idx_map[2*idim+0];
215  int rB = rev_idx_map[2*idim+1];
216 
217  if (rA != -1)
218  len = edge_len_A[rA];
219  else if (rB != -1)
220  len = edge_len_B[rB];
221 
222  offsets_A[idim] = (uint64_t*)CTF_int::alloc(sizeof(uint64_t)*len);
223  offsets_B[idim] = (uint64_t*)CTF_int::alloc(sizeof(uint64_t)*len);
224  compute_syoff(rA, len, sr_A, edge_len_A, sym_A, offsets_A[idim]);
225  compute_syoff(rB, len, sr_B, edge_len_B, sym_B, offsets_B[idim]);
226  }
228  }
229 
230 
231  #define SCAL_B do { \
232  if (!sr_B->isequal(beta, sr_B->mulid())){ \
233  memset(idx_glb, 0, sizeof(int)*idx_max); \
234  idx_A = 0, idx_B = 0; \
235  sym_pass = 1; \
236  for (;;){ \
237  if (sym_pass){ \
238  sr_B->mul(beta, B+idx_B*sr_B->el_size, B+idx_B*sr_B->el_size); \
239  CTF_FLOPS_ADD(1); \
240  } \
241  for (idx=0; idx<idx_max; idx++){ \
242  imin = 0, imax = INT_MAX; \
243  GET_MIN_MAX(B,1,2); \
244  if (rev_idx_map[2*idx+1] == -1) imax = imin+1; \
245  idx_glb[idx]++; \
246  if (idx_glb[idx] >= imax){ \
247  idx_glb[idx] = imin; \
248  } \
249  if (idx_glb[idx] != imin) { \
250  break; \
251  } \
252  } \
253  if (idx == idx_max) break; \
254  CHECK_SYM(B); \
255  if (!sym_pass) continue; \
256  if (order_B > 0) \
257  RESET_IDX(B); \
258  } \
259  } } while (0)
260 
261 
262  #define SCAL_B_inr do { \
263  if (!sr_B->isequal(beta, sr_B->mulid())){ \
264  memset(idx_glb, 0, sizeof(int)*idx_max); \
265  idx_A = 0, idx_B = 0; \
266  sym_pass = 1; \
267  for (;;){ \
268  if (sym_pass){ \
269  sr_B->scal(inr_stride, beta, B+idx_B*inr_stride*sr_B->el_size, 1); \
270  CTF_FLOPS_ADD(inr_stride); \
271  } \
272  for (idx=0; idx<idx_max; idx++){ \
273  imin = 0, imax = INT_MAX; \
274  GET_MIN_MAX(B,1,2); \
275  if (rev_idx_map[2*idx+1] == -1) imax = imin+1; \
276  idx_glb[idx]++; \
277  if (idx_glb[idx] >= imax){ \
278  idx_glb[idx] = imin; \
279  } \
280  if (idx_glb[idx] != imin) { \
281  break; \
282  } \
283  } \
284  if (idx == idx_max) break; \
285  CHECK_SYM(B); \
286  if (!sym_pass) continue; \
287  if (order_B > 0) \
288  RESET_IDX(B); \
289  } \
290  } } while (0)
291 
292  int sym_seq_sum_ref( char const * alpha,
293  char const * A,
294  algstrct const * sr_A,
295  int order_A,
296  int const * edge_len_A,
297  int const * sym_A,
298  int const * idx_map_A,
299  char const * beta,
300  char * B,
301  algstrct const * sr_B,
302  int order_B,
303  int const * edge_len_B,
304  int const * sym_B,
305  int const * idx_map_B){
307  int idx, i, idx_max, imin, imax, iA, iB, j, k;
308  int off_idx, sym_pass;
309  int * rev_idx_map;
310  int * dlen_A, * dlen_B;
311  int64_t idx_A, idx_B, off_lda;
312 
313  inv_idx(order_A, idx_map_A,
314  order_B, idx_map_B,
315  &idx_max, &rev_idx_map);
316 
317  bool rep_idx = false;
318  for (i=0; i<order_A; i++){
319  for (j=0; j<order_A; j++){
320  if (i!=j && idx_map_A[i] == idx_map_A[j]) rep_idx = true;
321  }
322  }
323  for (i=0; i<order_B; i++){
324  for (j=0; j<order_B; j++){
325  if (i!=j && idx_map_B[i] == idx_map_B[j]) rep_idx = true;
326  }
327  }
328 
329  dlen_A = (int*)CTF_int::alloc(sizeof(int)*order_A);
330  dlen_B = (int*)CTF_int::alloc(sizeof(int)*order_B);
331  memcpy(dlen_A, edge_len_A, sizeof(int)*order_A);
332  memcpy(dlen_B, edge_len_B, sizeof(int)*order_B);
333 
334  int * idx_glb = (int*)CTF_int::alloc(sizeof(int)*idx_max);
335  memset(idx_glb, 0, sizeof(int)*idx_max);
336  //FIXME do via scal()
338  if (rep_idx)
339  SCAL_B;
340  else {
341  int64_t sz_B = sy_packed_size(order_B, edge_len_B, sym_B);
342  if (beta != NULL || sr_B->mulid() != NULL){
343  if (beta == NULL || sr_B->isequal(beta, sr_B->addid()))
344  sr_B->set(B, sr_B->addid(), sz_B);
345  else if (!sr_B->isequal(beta, sr_B->mulid()))
346  sr_B->scal(sz_B, beta, B, 1);
347  }
348  }
349  TAU_FSTOP(SCAL_B);
350 
351  memset(idx_glb, 0, sizeof(int)*idx_max);
352  if (!rep_idx && idx_max>0 && idx_max <= MAX_ORD){
353  uint64_t ** offsets_A;
354  uint64_t ** offsets_B;
355  compute_syoffs(sr_A, order_A, edge_len_A, sym_A, idx_map_A, sr_B, order_B, edge_len_B, sym_B, idx_map_B, idx_max, rev_idx_map, offsets_A, offsets_B);
356  if (order_B > 1 || (order_B > 0 && idx_map_B[0] != 0)){
357 #ifdef USE_OMP
358  #pragma omp parallel
359 #endif
360  {
361  int * nidx_glb = (int*)CTF_int::alloc(sizeof(int)*idx_max);
362  memset(nidx_glb, 0, sizeof(int)*idx_max);
363 
364  SWITCH_ORD_CALL(sym_seq_sum_loop, idx_max-1, alpha, A, sr_A, order_A, edge_len_A, sym_A, idx_map_A, offsets_A, B, sr_B, order_B, edge_len_B, sym_B, idx_map_B, offsets_B, NULL, nidx_glb, rev_idx_map, idx_max);
365  cdealloc(nidx_glb);
366  }
367  } else {
368  SWITCH_ORD_CALL(sym_seq_sum_loop, idx_max-1, alpha, A, sr_A, order_A, edge_len_A, sym_A, idx_map_A, offsets_A, B, sr_B, order_B, edge_len_B, sym_B, idx_map_B, offsets_B, NULL, idx_glb, rev_idx_map, idx_max);
369  }
370  for (int l=0; l<idx_max; l++){
371  cdealloc(offsets_A[l]);
372  cdealloc(offsets_B[l]);
373  }
374  cdealloc(offsets_A);
375  cdealloc(offsets_B);
376  } else {
377  idx_A = 0, idx_B = 0;
378  sym_pass = 1;
379  for (;;){
380  if (sym_pass){
381  /* printf("B[%d] = %lf*(A[%d]=%lf)+%lf*(B[%d]=%lf\n",
382  idx_B,alpha,idx_A,A[idx_A],beta,idx_B,B[idx_B]);*/
383 // printf("adding to %d ",idx_B); sr_B->print(B+sr_B->el_size*idx_B); printf("\n");
384  if (alpha != NULL){
385  char tmp[sr_A->el_size];
386  sr_A->mul(A+sr_A->el_size*idx_A, alpha, tmp);
387  sr_B->add(tmp, B+sr_B->el_size*idx_B, B+sr_B->el_size*idx_B);
388  CTF_FLOPS_ADD(2);
389  } else {
390  sr_B->add(A+sr_A->el_size*idx_A, B+sr_B->el_size*idx_B, B+sr_B->el_size*idx_B);
391  CTF_FLOPS_ADD(1);
392  }
393 // printf("computed %d ",idx_B); sr_B->print(B+sr_B->el_size*idx_B); printf("\n");
394  }
395 
396  for (idx=0; idx<idx_max; idx++){
397  imin = 0, imax = INT_MAX;
398 
399  GET_MIN_MAX(A,0,2);
400  GET_MIN_MAX(B,1,2);
401 
402  ASSERT(idx_glb[idx] >= imin && idx_glb[idx] < imax);
403 
404  idx_glb[idx]++;
405 
406  if (idx_glb[idx] >= imax){
407  idx_glb[idx] = imin;
408  }
409  if (idx_glb[idx] != imin) {
410  break;
411  }
412  }
413  if (idx == idx_max) break;
414 
415  CHECK_SYM(A);
416  if (!sym_pass) continue;
417  CHECK_SYM(B);
418  if (!sym_pass) continue;
419 
420  if (order_A > 0)
421  RESET_IDX(A);
422  if (order_B > 0)
423  RESET_IDX(B);
424  }
425  }
426  CTF_int::cdealloc(dlen_A);
427  CTF_int::cdealloc(dlen_B);
428  CTF_int::cdealloc(idx_glb);
429  CTF_int::cdealloc(rev_idx_map);
431  return 0;
432  }
433 
434  int sym_seq_sum_inr( char const * alpha,
435  char const * A,
436  algstrct const * sr_A,
437  int order_A,
438  int const * edge_len_A,
439  int const * sym_A,
440  int const * idx_map_A,
441  char const * beta,
442  char * B,
443  algstrct const * sr_B,
444  int order_B,
445  int const * edge_len_B,
446  int const * sym_B,
447  int const * idx_map_B,
448  int inr_stride){
450  int idx, i, idx_max, imin, imax, iA, iB, j, k;
451  int off_idx, sym_pass;
452  int * idx_glb, * rev_idx_map;
453  int * dlen_A, * dlen_B;
454  int64_t idx_A, idx_B, off_lda;
455 
456  inv_idx(order_A, idx_map_A,
457  order_B, idx_map_B,
458  &idx_max, &rev_idx_map);
459 
460  dlen_A = (int*)CTF_int::alloc(sizeof(int)*order_A);
461  dlen_B = (int*)CTF_int::alloc(sizeof(int)*order_B);
462  memcpy(dlen_A, edge_len_A, sizeof(int)*order_A);
463  memcpy(dlen_B, edge_len_B, sizeof(int)*order_B);
464 
465  idx_glb = (int*)CTF_int::alloc(sizeof(int)*idx_max);
466 
467  SCAL_B_inr;
468 
469  memset(idx_glb, 0, sizeof(int)*idx_max);
470 
471  idx_A = 0, idx_B = 0;
472  sym_pass = 1;
473  for (;;){
474  if (sym_pass){
475  /* printf("B[%d] = %lf*(A[%d]=%lf)+%lf*(B[%d]=%lf\n",
476  idx_B,alpha,idx_A,A[idx_A],beta,idx_B,B[idx_B]);*/
477  // B[idx_B] = alpha*A[idx_A] + beta*B[idx_B];
478  /* if (beta != 1.0){
479  cxaxpy<dtype>(inr_stride, beta-1.0, B+idx_B*inr_stride, 1, B+idx_B*inr_stride, 1);
480  CTF_FLOPS_ADD(2*inr_stride);
481  }*/
482  //cxaxpy<dtype>(inr_stride, alpha, A+idx_A*inr_stride, 1, B+idx_B*inr_stride, 1);
483  sr_B->axpy(inr_stride, alpha, A+idx_A*sr_A->el_size*inr_stride, 1, B+idx_B*sr_B->el_size*inr_stride, 1);
484  CTF_FLOPS_ADD(2*inr_stride);
485  }
486 
487  for (idx=0; idx<idx_max; idx++){
488  imin = 0, imax = INT_MAX;
489 
490  GET_MIN_MAX(A,0,2);
491  GET_MIN_MAX(B,1,2);
492 
493  ASSERT(idx_glb[idx] >= imin && idx_glb[idx] < imax);
494 
495  idx_glb[idx]++;
496 
497  if (idx_glb[idx] >= imax){
498  idx_glb[idx] = imin;
499  }
500  if (idx_glb[idx] != imin) {
501  break;
502  }
503  }
504  if (idx == idx_max) break;
505 
506  CHECK_SYM(A);
507  if (!sym_pass) continue;
508  CHECK_SYM(B);
509  if (!sym_pass) continue;
510 
511  if (order_A > 0)
512  RESET_IDX(A);
513  if (order_B > 0)
514  RESET_IDX(B);
515  }
516  CTF_int::cdealloc(dlen_A);
517  CTF_int::cdealloc(dlen_B);
518  CTF_int::cdealloc(idx_glb);
519  CTF_int::cdealloc(rev_idx_map);
521  return 0;
522  }
523 
524  int sym_seq_sum_cust(char const * alpha,
525  char const * A,
526  algstrct const * sr_A,
527  int order_A,
528  int const * edge_len_A,
529  int const * sym_A,
530  int const * idx_map_A,
531  char const * beta,
532  char * B,
533  algstrct const * sr_B,
534  int order_B,
535  int const * edge_len_B,
536  int const * sym_B,
537  int const * idx_map_B,
538  univar_function const * func){
540  int idx, i, idx_max, imin, imax, iA, iB, j, k;
541  int off_idx, sym_pass;
542  int * idx_glb, * rev_idx_map;
543  int * dlen_A, * dlen_B;
544  int64_t idx_A, idx_B, off_lda;
545 
546  inv_idx(order_A, idx_map_A,
547  order_B, idx_map_B,
548  &idx_max, &rev_idx_map);
549 
550  dlen_A = (int*)CTF_int::alloc(sizeof(int)*order_A);
551  dlen_B = (int*)CTF_int::alloc(sizeof(int)*order_B);
552  memcpy(dlen_A, edge_len_A, sizeof(int)*order_A);
553  memcpy(dlen_B, edge_len_B, sizeof(int)*order_B);
554 
555  idx_glb = (int*)CTF_int::alloc(sizeof(int)*idx_max);
556  memset(idx_glb, 0, sizeof(int)*idx_max);
557 
558  SCAL_B;
559 
560  idx_A = 0, idx_B = 0;
561  sym_pass = 1;
562  for (;;){
563  if (sym_pass){
564  if (alpha != NULL){
565  char tmp_A[sr_A->el_size];
566  sr_A->mul(A+sr_A->el_size*idx_A, alpha, tmp_A);
567  func->acc_f(tmp_A, B+idx_B*sr_B->el_size, sr_B);
568 // func->apply_f(tmp_A, tmp_B);
569  // sr_B->add(B+idx_B*sr_B->el_size, tmp_B, B+sr_B->el_size*idx_B);
570  CTF_FLOPS_ADD(2);
571  } else {
572  func->acc_f(A+idx_A*sr_A->el_size, B+idx_B*sr_B->el_size, sr_B);
573  //func->apply_f(A+idx_A*sr_A->el_size, tmp_B);
574  //sr_B->add(B+idx_B*sr_B->el_size, tmp_B, B+idx_B*sr_B->el_size);
575  CTF_FLOPS_ADD(1);
576  }
577  }
578 
579  for (idx=0; idx<idx_max; idx++){
580  imin = 0, imax = INT_MAX;
581 
582  GET_MIN_MAX(A,0,2);
583  GET_MIN_MAX(B,1,2);
584 
585  ASSERT(idx_glb[idx] >= imin && idx_glb[idx] < imax);
586 
587  idx_glb[idx]++;
588 
589  if (idx_glb[idx] >= imax){
590  idx_glb[idx] = imin;
591  }
592  if (idx_glb[idx] != imin) {
593  break;
594  }
595  }
596  if (idx == idx_max) break;
597 
598  CHECK_SYM(A);
599  if (!sym_pass) continue;
600  CHECK_SYM(B);
601  if (!sym_pass) continue;
602 
603  if (order_A > 0)
604  RESET_IDX(A);
605  if (order_B > 0)
606  RESET_IDX(B);
607  }
608  CTF_int::cdealloc(dlen_A);
609  CTF_int::cdealloc(dlen_B);
610  CTF_int::cdealloc(idx_glb);
611  CTF_int::cdealloc(rev_idx_map);
613  return 0;
614  }
615 }
void compute_syoffs(algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, int tot_order, int const *rev_idx_map, uint64_t **&offsets_A, uint64_t **&offsets_B, uint64_t **&offsets_C)
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
Definition: algstrct.cxx:340
#define RESET_IDX(__X)
Definition: iter_tsr.h:67
int sym_seq_sum_ref(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B)
performs symmetric contraction with unblocked reference kernel
#define SCAL_B
void inv_idx(int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
invert index map
Definition: ctr_tsr.cxx:592
#define MAX_ORD
Definition: util.h:103
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
Definition: common.h:37
untyped internal class for doubly-typed univariate function
Definition: sum_tsr.h:14
virtual char const * addid() const
MPI datatype for pairs.
Definition: algstrct.cxx:89
#define GET_MIN_MAX(__X, nr, wd)
Definition: iter_tsr.h:16
#define CTF_FLOPS_ADD(n)
Definition: util.h:138
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
Definition: algstrct.cxx:629
virtual void acc_f(char const *a, char *b, CTF_int::algstrct const *sr_B) const
compute b = b+f(a)
Definition: sum_tsr.h:33
#define SWITCH_ORD_CALL(F, act_ord,...)
Definition: util.h:119
#define TAU_FSTOP(ARG)
Definition: util.h:281
#define TAU_FSTART(ARG)
Definition: util.h:280
#define CHECK_SYM(__X)
Definition: iter_tsr.h:52
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
Definition: algstrct.cxx:262
template void sym_seq_sum_loop< MAX_ORD >(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
virtual void axpy(int n, char const *alpha, char const *X, int incX, char *Y, int incY) const
Y["i"]+=alpha*X["i"];.
Definition: algstrct.cxx:280
void sym_seq_sum_loop< 0 >(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
virtual void add(char const *a, char const *b, char *c) const
c = a+b
Definition: algstrct.cxx:109
int sym_seq_sum_cust(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, univar_function const *func)
performs symmetric summation with custom elementwise function
#define SCAL_B_inr
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
int sym_seq_sum_inr(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, int inr_stride)
performs symmetric summation with blocked daxpy
void compute_syoff(int r, int len, algstrct const *sr, int const *edge_len, int const *sym, uint64_t *offsets)
void sym_seq_sum_loop(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
Definition: sym_seq_sum.cxx:11
virtual void mul(char const *a, char const *b, char *c) const
c = a*b
Definition: algstrct.cxx:120
virtual char const * mulid() const
identity element for multiplication i.e. 1
Definition: algstrct.cxx:93
int64_t sy_packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout
Definition: util.cxx:10