3 #include "../mapping/mapping.h" 4 #include "../mapping/distribution.h" 5 #include "../tensor/untyped_tensor.h" 6 #include "../shared/util.h" 7 #include "../shared/memcontrol.h" 21 idx_map = (
int*)
alloc(
sizeof(
int)*A->order);
22 memcpy(idx_map, idx_map_,
sizeof(
int)*A->order);
26 char const * cidx_map,
32 conv_idx(A->order, cidx_map, &idx_map);
44 idx_map = (
int*)
alloc(
sizeof(
int)*A->order);
45 memcpy(idx_map, idx_map_,
sizeof(
int)*A->order);
49 char const * cidx_map,
57 conv_idx(A->order, cidx_map, &idx_map);
65 int st, is_top, order_tot, iA, ret, itopo, btopo;
66 int64_t blk_sz, vrt_sz;
68 int * virt_dim, * idx_arr;
69 int * virt_blk_len, * blk_len;
71 int64_t memuse, bmemuse;
75 scl * hscl = NULL, ** rec_scl = NULL;
86 printf(
"Scaling tensor %s.\n", tsr->
name);
87 printf(
"The index mapping is");
88 for (
int i=0; i<tsr->
order; i++){
89 printf(
" %d",idx_map[i]);
92 printf(
"Old mapping for tensor %s\n",tsr->
name);
106 ntsr =
new tensor(tsr, 0, 0);
126 &order_tot, &idx_arr);
153 DPRINTF(1,
"Not enough memory to scale tensor on topo %d\n", itopo);
162 }
else if (memuse < bmemuse){
173 if (btopo == -1 || btopo == INT_MAX) {
175 printf(
"ERROR: FAILED TO MAP TENSOR SCALE\n");
195 printf(
"New mapping for tensor %s\n",ntsr->
name);
206 &vrt_sz, virt_blk_len, blk_len);
210 blk_len, &blk_sz, &str);
213 DPRINTF(2,
"Stripping tensor\n");
224 for (
int i=0; i<order_tot; i++){
230 virt_dim[i] = map->
np;
231 if (st) virt_dim[i] = virt_dim[i]/str->
strip_dim[iA];
233 else virt_dim[i] = 1;
234 }
else virt_dim[i] = 1;
235 nvirt *= virt_dim[i];
266 sclseq->
alpha = alpha;
279 hscl->
A = ntsr->
data;
290 if (was_home && !ntsr->
is_home){
292 DPRINTF(2,
"Migrating tensor %s back to home\n", tsr->
name);
293 if (old_dst != NULL)
delete old_dst;
323 }
else if (was_home){
325 printf(
"Tensor %s is a copy of %s and did not leave home but buffer is %p was %p\n", ntsr->
name, tsr->
name, ntsr->
data, tsr->
data);
336 if (old_dst != NULL)
delete old_dst;
339 printf(
"Done scaling tensor %s.\n", tsr->
name);
349 bool has_rep_idx =
false;
350 for (
int i=0; i<A->order; i++){
351 for (
int j=0; j<i; j++){
352 if (idx_map[i] == idx_map[j]) has_rep_idx =
true;
361 #pragma omp parallel for 363 for (int64_t i=0; i<A->nnz_loc; i++){
365 A->sr->mul(pi[i].d(), alpha, pi[i].d());
366 func->apply_f(pi[i].d());
370 #pragma omp parallel for 372 for (int64_t i=0; i<A->nnz_loc; i++){
373 A->sr->mul(pi[i].d(), alpha, pi[i].d());
378 int rep_inds[A->order];
379 for (
int i=0; i<A->order; i++){
380 for (
int j=0; j<A->order; j++){
381 if (i!=j && idx_map[i] == idx_map[j]){
382 rep_inds[nrep_idx] = i;
389 int64_t ldas[A->order];
391 for (
int i=1; i<A->order; i++){
392 ldas[i] = ldas[i-1]*A->lens[i-1];
395 #pragma omp parallel for 397 for (int64_t i=0; i<A->nnz_loc; i++){
399 int64_t pkey[A->order];
400 for (
int j=0; j<nrep_idx; j++){
401 pkey[rep_inds[j]] = (pi[i].
k()/ldas[rep_inds[j]])%A->lens[rep_inds[j]];
402 for (
int k=0; k<j; k++){
403 if (idx_map[rep_inds[j]] == idx_map[rep_inds[k]] &&
404 pkey[rep_inds[j]] != pkey[rep_inds[k]]){
411 A->sr->mul(pi[i].d(), alpha, pi[i].d());
413 func->apply_f(pi[i].d());
char * home_buffer
buffer associated with home mapping of tensor, to which it is returned
CTF_int::CommData cdt
communicator data for MPI comm defining this world
bool is_home
whether the latest tensor data is in the home buffer
int64_t * nnz_blk
nonzero elements in each block owned locally
int * sym
symmetries among tensor dimensions
void calc_dim(int order, int64_t size, int const *edge_len, mapping const *edge_map, int64_t *vrt_sz, int *vrt_edge_len, int *blk_edge_len)
calculate the block-sizes of a tensor
untyped internal class for singly-typed single variable function (Endomorphism)
int * pad_edge_len
padded tensor edge lengths
void inv_idx(int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
invert index map
scaling(tensor *A, int const *idx_map, char const *alpha)
constructor definining contraction with C's mul and add ops
virtual void copy(char *a, char const *b) const
copies element b to element a
bool has_home
whether the tensor has a home mapping/buffer
int64_t size
current size of local tensor data chunk (mapping-dependent)
void * alloc(int64_t len)
alloc abstraction
int get_best_topo(int64_t nvirt, int topo, CommData global_comm, int64_t bcomm_vol, int64_t bmemuse)
get the best topologoes (least nvirt) over all procs
void set_new_nnz_glb(int64_t const *nnz_blk)
sets the number of nonzeros both locally (nnz_loc) and overall globally (nnz_tot) ...
virtual void dealloc(char *ptr) const
deallocate given pointer containing contiguous array of values
void copy_mapping(int order, mapping const *mapping_A, mapping *mapping_B)
copies mapping A to B
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
void set_padding()
sets padding and local size of a tensor given a mapping
CTF::World * wrld
distributed processor context on which tensor is defined
class for execution distributed scaling of a tensor
bool is_cyclic
whether the tensor data is cyclically distributed (blocked if false)
int strip_diag(int order, int order_tot, int const *idx_map, int64_t vrt_sz, mapping const *edge_map, topology const *topo, algstrct const *sr, int *blk_edge_len, int64_t *blk_sz, strp_tsr **stpr)
build stack required for stripping out diagonals of tensor
endomorphism const * func
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
void sp_scl()
scales a sparse tensor
void print_map(FILE *stream=stdout, bool allcall=1) const
displays mapping information
bool is_data_aliased
whether the tensor data is an alias of another tensor object's data
int64_t k() const
returns key of pair at head of ptr
algstrct * sr
algstrct on which tensor elements and operations are defined
virtual void pair_dealloc(char *ptr) const
deallocate given pointer containing contiguous array of pairs
mapping * edge_map
mappings of each tensor dimension onto topology dimensions
void unfold(bool was_mod=0)
undo the folding of a local tensor block unsets is_folded and deletes rec_tsr
bool is_mapped
whether a mapping has been selected
int map_tensor_rem(int num_phys_dims, CommData *phys_comm, int fill=0)
map the remainder of a tensor
int64_t calc_nvirt() const
calculate virtualization factor of tensor return virtualization factor
int check_self_mapping(tensor const *tsr, int const *idx_map)
checks mapping in preparation for tensors scale, summ or contract
int cdealloc(void *ptr)
free abstraction
int64_t proc_bytes_available()
gives total memory available on this MPI process
std::vector< CTF_int::topology * > topovec
derived topologies
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
internal distributed tensor class
int map_self_indices(tensor const *tsr, int const *idx_map)
create virtual mapping for idx_maps that have repeating indices
topology * topo
topology to which the tensor is mapped
int redistribute(distribution const &old_dist, int const *old_offsets=NULL, int *const *old_permutation=NULL, int const *new_offsets=NULL, int *const *new_permutation=NULL)
permutes the data of a tensor to its new layout
bool has_zero_edge_len
if true tensor has a zero edge length, so is zero, which short-cuts stuff
char * name
name given to tensor
int conv_idx(int order, type const *cidx, int **iidx)
void clear_mapping()
zeros out mapping