3 #include "../shared/util.h" 27 for (i=0; i<
order; i++){
35 int i, ilda, toff, boff, ret;
49 memset(idx_arr, 0,
sizeof(
int)*
order);
52 for (i=0; i<
order; i++){
56 toff += idx_arr[i]*lda[i];
71 for (i=1; i<
order; i++){
72 toff -= idx_arr[i]*lda[i];
76 toff += idx_arr[i]*lda[i];
197 char * bA, * bB, * bC;
285 int * pmap, * edge_len, * sdim, * sidx;
290 std::fill(pmap, pmap+order_tot, -1);
294 for (i=0; i<order; i++){
296 ASSERT(pmap[idx_map[i]] == -1);
297 pmap[idx_map[i]] = i;
300 for (i=0; i<order; i++){
301 if (edge_map[i].type ==
VIRTUAL_MAP && pmap[idx_map[i]] != -1)
304 if (need_strip == 0) {
314 std::fill(sdim, sdim+order, 1);
315 std::fill(sidx, sidx+order, 0);
317 for (i=0; i<order; i++){
328 if (edge_map[i].type ==
VIRTUAL_MAP && pmap[idx_map[i]] != -1) {
329 sdim[i] = edge_len[i];
331 ASSERT(edge_map[i].
np == edge_map[pmap[idx_map[i]]].
np);
333 blk_edge_len[i] = blk_edge_len[i] / sdim[i];
334 *blk_sz = (*blk_sz) / sdim[i];
338 stripper->
order = order;
343 stripper->
blk_sz = vrt_sz;
scl * clone()
copies strp_scl object
int calc_phys_rank(topology const *topo) const
compute the physical rank of a mapping
int64_t mem_fp()
gets memory usage of op
strp_tsr * clone()
copies strp_tsr object
int64_t mem_fp()
returns the number of bytes of buffer space we need recursively
void run(char *A, char *B, char *C)
runs strip for contraction of tensors
int calc_phase() const
compute the phase of a mapping
virtual int64_t mem_rec()
int calc_phys_phase() const
compute the physical phase of a mapping
virtual void copy(char *a, char const *b) const
copies element b to element a
void * alloc(int64_t len)
alloc abstraction
tsum * clone()
copies strp_sum object
int strip_diag(int order, int order_tot, int const *idx_map, int64_t vrt_sz, mapping const *edge_map, topology const *topo, algstrct const *sr, int *blk_edge_len, int64_t *blk_sz, strp_tsr **stpr)
build stack required for stripping out diagonals of tensor
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
void run()
runs strip for scale of tensor
strp_ctr(ctr *other)
copies strp_ctr object
virtual double est_time_rec(int nlyr)
void run()
runs strip for sum of tensors
virtual void run(char *A, char *B, char *C)
int el_size
size of each element of algstrct in bytes
int cdealloc(void *ptr)
free abstraction
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
int64_t mem_fp()
gets memory usage of op
class for execution distributed summation of tensors
void free_exp()
deallocates buffer
ctr * clone()
copies strp_ctr object
int64_t mem_fp()
returns the number of bytes of buffer space we need
double est_time_rec(int nlyr)
returns the number of bytes sent recursively
void run(int const dir)
strips out part of tensor to be operated on