Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
sym_seq_scl.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include "../shared/iter_tsr.h"
4 #include "../shared/util.h"
5 #include <limits.h>
6 #include "sym_seq_scl.h"
7 #include "scaling.h"
8 #include "../interface/idx_tensor.h"
9 
10 namespace CTF_int {
11 
12  void endomorphism::operator()(Term const & A) const {
14  scaling s(op_A.parent, op_A.idx_map, op_A.scale, this);
15  s.execute();
16  }
17 
18 
19  void inv_idx(int const order_A,
20  int const * idx_A,
21  int * order_tot,
22  int ** idx_arr){
23  int i, dim_max;
24 
25  dim_max = -1;
26  for (i=0; i<order_A; i++){
27  if (idx_A[i] > dim_max) dim_max = idx_A[i];
28  }
29  dim_max++;
30  *order_tot = dim_max;
31  *idx_arr = (int*)CTF_int::alloc(sizeof(int)*dim_max);
32  std::fill((*idx_arr), (*idx_arr)+dim_max, -1);
33 
34  for (i=0; i<order_A; i++){
35  (*idx_arr)[idx_A[i]] = i;
36  }
37  }
38 
39 
40  int sym_seq_scl_ref(char const * alpha,
41  char * A,
42  algstrct const * sr_A,
43  int order_A,
44  int const * edge_len_A,
45  int const * sym_A,
46  int const * idx_map_A){
48  int idx, i, idx_max, imin, imax, iA, j, k;
49  int off_idx, sym_pass;
50  int * idx_glb, * rev_idx_map;
51  int * dlen_A;
52  int64_t idx_A, off_lda;
53 
54  inv_idx(order_A, idx_map_A,
55  &idx_max, &rev_idx_map);
56 
57  dlen_A = (int*)CTF_int::alloc(sizeof(int)*order_A);
58  memcpy(dlen_A, edge_len_A, sizeof(int)*order_A);
59 
60  idx_glb = (int*)CTF_int::alloc(sizeof(int)*idx_max);
61  memset(idx_glb, 0, sizeof(int)*idx_max);
62 
63 
64  idx_A = 0;
65  sym_pass = 1;
66  for (;;){
67  if (sym_pass){
68  //A[idx_A] = alpha*A[idx_A];
69  sr_A->mul(A+idx_A*sr_A->el_size, alpha, A+idx_A*sr_A->el_size);
70  CTF_FLOPS_ADD(1);
71  }
72 
73  for (idx=0; idx<idx_max; idx++){
74  imin = 0, imax = INT_MAX;
75 
76  GET_MIN_MAX(A,0,1);
77 
78  ASSERT(idx_glb[idx] >= imin && idx_glb[idx] < imax);
79 
80  idx_glb[idx]++;
81 
82  if (idx_glb[idx] >= imax){
83  idx_glb[idx] = imin;
84  }
85  if (idx_glb[idx] != imin) {
86  break;
87  }
88  }
89  if (idx == idx_max) break;
90 
91  CHECK_SYM(A);
92  if (!sym_pass) continue;
93 
94  if (order_A > 0)
95  RESET_IDX(A);
96  }
97  CTF_int::cdealloc(dlen_A);
98  CTF_int::cdealloc(idx_glb);
99  CTF_int::cdealloc(rev_idx_map);
101  return 0;
102  }
103 
104 
105  int sym_seq_scl_cust(char const * alpha,
106  char * A,
107  algstrct const * sr_A,
108  int const order_A,
109  int const * edge_len_A,
110  int const * sym_A,
111  int const * idx_map_A,
112  endomorphism const * func){
114  int idx, i, idx_max, imin, imax, iA, j, k;
115  int off_idx, sym_pass;
116  int * idx_glb, * rev_idx_map;
117  int * dlen_A;
118  int64_t idx_A, off_lda;
119 
120  inv_idx(order_A, idx_map_A,
121  &idx_max, &rev_idx_map);
122 
123  dlen_A = (int*)CTF_int::alloc(sizeof(int)*order_A);
124  memcpy(dlen_A, edge_len_A, sizeof(int)*order_A);
125 
126  idx_glb = (int*)CTF_int::alloc(sizeof(int)*idx_max);
127  memset(idx_glb, 0, sizeof(int)*idx_max);
128 
129 
130  idx_A = 0;
131  sym_pass = 1;
132  for (;;){
133  if (sym_pass){
134  if (alpha != NULL)
135  sr_A->mul(A+idx_A*sr_A->el_size, alpha, A+idx_A*sr_A->el_size);
136  func->apply_f(A+idx_A*sr_A->el_size);
137  CTF_FLOPS_ADD(1);
138  }
139 
140  for (idx=0; idx<idx_max; idx++){
141  imin = 0, imax = INT_MAX;
142 
143  GET_MIN_MAX(A,0,1);
144 
145  ASSERT(idx_glb[idx] >= imin && idx_glb[idx] < imax);
146 
147  idx_glb[idx]++;
148 
149  if (idx_glb[idx] >= imax){
150  idx_glb[idx] = imin;
151  }
152  if (idx_glb[idx] != imin) {
153  break;
154  }
155  }
156  if (idx == idx_max) break;
157 
158  CHECK_SYM(A);
159  if (!sym_pass) continue;
160 
161  if (order_A > 0)
162  RESET_IDX(A);
163  }
164  CTF_int::cdealloc(dlen_A);
165  CTF_int::cdealloc(idx_glb);
166  CTF_int::cdealloc(rev_idx_map);
168  return 0;
169  }
170 
171 
172 }
a term is an abstract object representing some expression of tensors
Definition: term.h:33
virtual void execute(CTF::Idx_Tensor output) const =0
evalues the expression, which just scales by default
untyped internal class for singly-typed single variable function (Endomorphism)
Definition: sym_seq_scl.h:12
#define RESET_IDX(__X)
Definition: iter_tsr.h:67
int sym_seq_sum_ref(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B)
performs symmetric contraction with unblocked reference kernel
char * idx_map
Definition: idx_tensor.h:18
void inv_idx(int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
invert index map
Definition: ctr_tsr.cxx:592
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
#define GET_MIN_MAX(__X, nr, wd)
Definition: iter_tsr.h:16
#define CTF_FLOPS_ADD(n)
Definition: util.h:138
virtual void apply_f(char *a) const
apply function f to value stored at a
Definition: sym_seq_scl.h:19
class for execution distributed scaling of a tensor
Definition: scaling.h:14
int sym_seq_scl_ref(char const *alpha, char *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A)
performs symmetric scaling using algstrct const * sr_A
Definition: sym_seq_scl.cxx:40
#define TAU_FSTOP(ARG)
Definition: util.h:281
int sym_seq_scl_cust(char const *alpha, char *A, algstrct const *sr_A, int const order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, endomorphism const *func)
performs symmetric scaling using custom func
#define TAU_FSTART(ARG)
Definition: util.h:280
#define CHECK_SYM(__X)
Definition: iter_tsr.h:52
virtual std::vector< char > get_uniq_inds() const =0
find list of unique indices that are involved in this term
int sym_seq_sum_cust(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, univar_function const *func)
performs symmetric summation with custom elementwise function
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
int execute()
run scaling
Definition: scaling.cxx:64
char * scale
Definition: term.h:35
void operator()(Term const &A) const
apply f to A
Definition: sym_seq_scl.cxx:12
virtual void mul(char const *a, char const *b, char *c) const
c = a*b
Definition: algstrct.cxx:120
a tensor with an index map associated with it (necessary for overloaded operators) ...
Definition: idx_tensor.h:15
CTF_int::tensor * parent
Definition: idx_tensor.h:17