3 #include "../mapping/mapping.h"     4 #include "../mapping/distribution.h"     5 #include "../tensor/untyped_tensor.h"     6 #include "../shared/util.h"     7 #include "../shared/memcontrol.h"    21     idx_map = (
int*)
alloc(
sizeof(
int)*A->order);
    22     memcpy(idx_map, idx_map_, 
sizeof(
int)*A->order);
    26                    char const * cidx_map,
    32     conv_idx(A->order, cidx_map, &idx_map);
    44     idx_map = (
int*)
alloc(
sizeof(
int)*A->order);
    45     memcpy(idx_map, idx_map_, 
sizeof(
int)*A->order);
    49                    char const *         cidx_map,
    57     conv_idx(A->order, cidx_map, &idx_map);
    65     int st, is_top, order_tot, iA,  ret, itopo, btopo;
    66     int64_t blk_sz, vrt_sz;
    68     int * virt_dim, * idx_arr;
    69     int * virt_blk_len, * blk_len;
    71     int64_t memuse, bmemuse;
    75     scl * hscl = NULL, ** rec_scl = NULL;
    86       printf(
"Scaling tensor %s.\n", tsr->
name);
    87       printf(
"The index mapping is");
    88       for (
int i=0; i<tsr->
order; i++){
    89         printf(
" %d",idx_map[i]);
    92       printf(
"Old mapping for tensor %s\n",tsr->
name);
   106       ntsr = 
new tensor(tsr, 0, 0);
   126             &order_tot, &idx_arr);
   153             DPRINTF(1,
"Not enough memory to scale tensor on topo %d\n", itopo);
   162           } 
else if (memuse < bmemuse){
   173       if (btopo == -1 || btopo == INT_MAX) {
   175           printf(
"ERROR: FAILED TO MAP TENSOR SCALE\n");
   195         printf(
"New mapping for tensor %s\n",ntsr->
name);
   206              &vrt_sz, virt_blk_len, blk_len);
   210                            blk_len, &blk_sz, &str);
   213         DPRINTF(2,
"Stripping tensor\n");
   224     for (
int i=0; i<order_tot; i++){
   230           virt_dim[i] = map->
np;
   231           if (st) virt_dim[i] = virt_dim[i]/str->
strip_dim[iA];
   233         else virt_dim[i] = 1;
   234       } 
else virt_dim[i] = 1;
   235       nvirt *= virt_dim[i];
   266     sclseq->
alpha       = alpha;
   279     hscl->
A   = ntsr->
data;
   290     if (was_home && !ntsr->
is_home){
   292         DPRINTF(2,
"Migrating tensor %s back to home\n", tsr->
name);
   293       if (old_dst != NULL) 
delete old_dst;
   323     } 
else if (was_home){
   325         printf(
"Tensor %s is a copy of %s and did not leave home but buffer is %p was %p\n", ntsr->
name, tsr->
name, ntsr->
data, tsr->
data);
   336    if (old_dst != NULL) 
delete old_dst;
   339       printf(
"Done scaling tensor %s.\n", tsr->
name);
   349     bool has_rep_idx = 
false;
   350     for (
int i=0; i<A->order; i++){
   351       for (
int j=0; j<i; j++){
   352         if (idx_map[i] == idx_map[j]) has_rep_idx = 
true;
   361         #pragma omp parallel for   363         for (int64_t i=0; i<A->nnz_loc; i++){
   365             A->sr->mul(pi[i].d(), alpha, pi[i].d());
   366           func->apply_f(pi[i].d());
   370         #pragma omp parallel for   372         for (int64_t i=0; i<A->nnz_loc; i++){
   373           A->sr->mul(pi[i].d(), alpha, pi[i].d());
   378       int rep_inds[A->order];
   379       for (
int i=0; i<A->order; i++){
   380         for (
int j=0; j<A->order; j++){
   381           if (i!=j && idx_map[i] == idx_map[j]){
   382             rep_inds[nrep_idx] = i;
   389       int64_t ldas[A->order];
   391       for (
int i=1; i<A->order; i++){
   392         ldas[i] = ldas[i-1]*A->lens[i-1];
   395       #pragma omp parallel for   397       for (int64_t i=0; i<A->nnz_loc; i++){
   399         int64_t pkey[A->order];
   400         for (
int j=0; j<nrep_idx; j++){
   401           pkey[rep_inds[j]] = (pi[i].
k()/ldas[rep_inds[j]])%A->lens[rep_inds[j]];
   402           for (
int k=0; k<j; k++){
   403             if (idx_map[rep_inds[j]] == idx_map[rep_inds[k]] &&
   404                 pkey[rep_inds[j]] != pkey[rep_inds[k]]){
   411             A->sr->mul(pi[i].d(), alpha, pi[i].d());
   413             func->apply_f(pi[i].d());
 
char * home_buffer
buffer associated with home mapping of tensor, to which it is returned 
CTF_int::CommData cdt
communicator data for MPI comm defining this world 
bool is_home
whether the latest tensor data is in the home buffer 
int64_t * nnz_blk
nonzero elements in each block owned locally 
int * sym
symmetries among tensor dimensions 
void calc_dim(int order, int64_t size, int const *edge_len, mapping const *edge_map, int64_t *vrt_sz, int *vrt_edge_len, int *blk_edge_len)
calculate the block-sizes of a tensor 
untyped internal class for singly-typed single variable function (Endomorphism) 
int * pad_edge_len
padded tensor edge lengths 
void inv_idx(int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
invert index map 
scaling(tensor *A, int const *idx_map, char const *alpha)
constructor definining contraction with C's mul and add ops 
virtual void copy(char *a, char const *b) const 
copies element b to element a 
bool has_home
whether the tensor has a home mapping/buffer 
int64_t size
current size of local tensor data chunk (mapping-dependent) 
void * alloc(int64_t len)
alloc abstraction 
int get_best_topo(int64_t nvirt, int topo, CommData global_comm, int64_t bcomm_vol, int64_t bmemuse)
get the best topologoes (least nvirt) over all procs 
void set_new_nnz_glb(int64_t const *nnz_blk)
sets the number of nonzeros both locally (nnz_loc) and overall globally (nnz_tot) ...
virtual void dealloc(char *ptr) const 
deallocate given pointer containing contiguous array of values 
void copy_mapping(int order, mapping const *mapping_A, mapping *mapping_B)
copies mapping A to B 
bool is_sparse
whether only the non-zero elements of the tensor are stored 
int order
number of tensor dimensions 
void set_padding()
sets padding and local size of a tensor given a mapping 
CTF::World * wrld
distributed processor context on which tensor is defined 
class for execution distributed scaling of a tensor 
bool is_cyclic
whether the tensor data is cyclically distributed (blocked if false) 
int strip_diag(int order, int order_tot, int const *idx_map, int64_t vrt_sz, mapping const *edge_map, topology const *topo, algstrct const *sr, int *blk_edge_len, int64_t *blk_sz, strp_tsr **stpr)
build stack required for stripping out diagonals of tensor 
endomorphism const * func
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction 
void sp_scl()
scales a sparse tensor 
void print_map(FILE *stream=stdout, bool allcall=1) const 
displays mapping information 
bool is_data_aliased
whether the tensor data is an alias of another tensor object's data 
int64_t k() const 
returns key of pair at head of ptr 
algstrct * sr
algstrct on which tensor elements and operations are defined 
virtual void pair_dealloc(char *ptr) const 
deallocate given pointer containing contiguous array of pairs 
mapping * edge_map
mappings of each tensor dimension onto topology dimensions 
void unfold(bool was_mod=0)
undo the folding of a local tensor block unsets is_folded and deletes rec_tsr 
bool is_mapped
whether a mapping has been selected 
int map_tensor_rem(int num_phys_dims, CommData *phys_comm, int fill=0)
map the remainder of a tensor 
int64_t calc_nvirt() const 
calculate virtualization factor of tensor return virtualization factor 
int check_self_mapping(tensor const *tsr, int const *idx_map)
checks mapping in preparation for tensors scale, summ or contract 
int cdealloc(void *ptr)
free abstraction 
int64_t proc_bytes_available()
gives total memory available on this MPI process 
std::vector< CTF_int::topology * > topovec
derived topologies 
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
internal distributed tensor class 
int map_self_indices(tensor const *tsr, int const *idx_map)
create virtual mapping for idx_maps that have repeating indices 
topology * topo
topology to which the tensor is mapped 
int redistribute(distribution const &old_dist, int const *old_offsets=NULL, int *const *old_permutation=NULL, int const *new_offsets=NULL, int *const *new_permutation=NULL)
permutes the data of a tensor to its new layout 
bool has_zero_edge_len
if true tensor has a zero edge length, so is zero, which short-cuts stuff 
char * name
name given to tensor 
int conv_idx(int order, type const *cidx, int **iidx)
void clear_mapping()
zeros out mapping