Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
summation.h
Go to the documentation of this file.
1 #ifndef __INT_SUMMATION_H__
2 #define __INT_SUMMATION_H__
3 
4 #include <assert.h>
5 #include "sum_tsr.h"
6 #include "spsum_tsr.h"
7 
8 namespace CTF_int {
9  class tensor;
10  class topology;
11 
15  class summation {
16  public:
18  tensor * A;
20  tensor * B;
21 
23  char const * alpha;
25  char const * beta;
26 
28  int * idx_A;
30  int * idx_B;
32  bool is_custom;
35 
37 // summation(){ idx_A = NULL; idx_B = NULL; alpha=NULL; beta=NULL; is_custom=0; };
38 
40  ~summation();
41 
43  summation(summation const & other);
44 
55  summation(tensor * A,
56  int const * idx_A,
57  char const * alpha,
58  tensor * B,
59  int const * idx_B,
60  char const * beta);
61  summation(tensor * A,
62  char const * idx_A,
63  char const * alpha,
64  tensor * B,
65  char const * idx_B,
66  char const * beta);
67 
80  summation(tensor * A,
81  int const * idx_A,
82  char const * alpha,
83  tensor * B,
84  int const * idx_B,
85  char const * beta,
86  univar_function const * func);
87  summation(tensor * A,
88  char const * idx_A,
89  char const * alpha,
90  tensor * B,
91  char const * idx_B,
92  char const * beta,
93  univar_function const * func);
94 
98  void execute(bool run_diag=false);
99 
101  double estimate_time();
102 
107  int is_equal(summation const & os);
108 
109 
115  int sum_tensors(bool run_diag);
116 
118  void print();
119 
120  private:
129  void get_fold_indices(int * num_fold,
130  int ** fold_idx);
131 
136  int can_fold();
137 
146  void get_fold_sum(summation *& fold_sum,
147  int & all_fdim_A,
148  int & all_fdim_B,
149  int *& all_flen_A,
150  int *& all_flen_B);
151 
152 
157  int map_fold();
158 
163  double est_time_fold();
164 
165 
172  void get_len_ordering(int ** new_ordering_A,
173  int ** new_ordering_B);
174 
175 
180  tsum * construct_sum(int inner_stride=-1);
181 
189  tspsum * construct_sparse_sum(int const * phys_mapped);
190 
197  tsum * construct_dense_sum(int inner_stride,
198  int const * phys_mapped);
199 
200 
206  int home_sum_tsr(bool run_diag);
207 
213  int sym_sum_tsr(bool run_diag);
214 
220  int unfold_broken_sym(summation ** new_sum);
221 
227  bool check_consistency();
228 
229 
234  int check_mapping();
235 
242  int map_sum_indices(topology const * topo);
243 
248  int map();
249 
253  void sp_sum();
254  };
255 }
256 
257 #endif
bool is_custom
whether there is a elementwise custom function
Definition: summation.h:32
int * idx_A
indices of left operand
Definition: summation.h:28
void execute(bool run_diag=false)
run summation
Definition: summation.cxx:119
summation(summation const &other)
copy constructor
Definition: summation.cxx:24
untyped internal class for doubly-typed univariate function
Definition: sum_tsr.h:14
double estimate_time()
predicts execution time in seconds using performance models
Definition: summation.cxx:132
~summation()
lazy constructor
Definition: summation.cxx:19
char const * alpha
scaling of A
Definition: summation.h:23
int * idx_B
indices of output
Definition: summation.h:30
void print()
print contraction details
Definition: summation.cxx:2362
univar_function const * func
function to execute on elements
Definition: summation.h:34
char const * beta
scaling of existing B
Definition: summation.h:25
tensor * B
output
Definition: summation.h:20
int is_equal(summation const &os)
returns 1 if summations have same tensors and index map
Definition: summation.cxx:1821
internal distributed tensor class
tensor * A
left operand
Definition: summation.h:18
class for execution distributed summation of tensors
Definition: summation.h:15
int sum_tensors(bool run_diag)
PDAXPY: a*idx_map_A(A) + b*idx_map_B(B) -> idx_map_B(B). Treats symmetric as lower triangular...
Definition: summation.cxx:1384