1 #ifndef __INT_SUMMATION_H__ 2 #define __INT_SUMMATION_H__ 98 void execute(
bool run_diag=
false);
129 void get_fold_indices(
int * num_fold,
163 double est_time_fold();
172 void get_len_ordering(
int ** new_ordering_A,
173 int ** new_ordering_B);
180 tsum * construct_sum(
int inner_stride=-1);
189 tspsum * construct_sparse_sum(
int const * phys_mapped);
197 tsum * construct_dense_sum(
int inner_stride,
198 int const * phys_mapped);
206 int home_sum_tsr(
bool run_diag);
213 int sym_sum_tsr(
bool run_diag);
220 int unfold_broken_sym(
summation ** new_sum);
227 bool check_consistency();
242 int map_sum_indices(
topology const * topo);
bool is_custom
whether there is a elementwise custom function
int * idx_A
indices of left operand
void execute(bool run_diag=false)
run summation
summation(summation const &other)
copy constructor
untyped internal class for doubly-typed univariate function
double estimate_time()
predicts execution time in seconds using performance models
~summation()
lazy constructor
char const * alpha
scaling of A
int * idx_B
indices of output
void print()
print contraction details
univar_function const * func
function to execute on elements
char const * beta
scaling of existing B
int is_equal(summation const &os)
returns 1 if summations have same tensors and index map
internal distributed tensor class
class for execution distributed summation of tensors
int sum_tensors(bool run_diag)
PDAXPY: a*idx_map_A(A) + b*idx_map_B(B) -> idx_map_B(B). Treats symmetric as lower triangular...