3 #ifndef __UNTYPED_TENSOR_H__ 4 #define __UNTYPED_TENSOR_H__ 6 #include "../mapping/mapping.h" 7 #include "../mapping/distribution.h" 8 #include "../interface/world.h" 9 #include "../interface/partition.h" 49 PairIterator read_all_pairs(int64_t * num_pair,
bool unpack);
55 void copy_tensor_data(
tensor const * other);
63 void set_distribution(
char const * idx,
171 int const * edge_len,
174 bool alloc_data=
true,
175 char const * name=NULL,
196 int const * edge_len,
202 char const * name=NULL,
226 int * calc_phase()
const;
232 int calc_tot_phase()
const;
238 int64_t calc_nvirt()
const;
244 int64_t calc_npe()
const;
260 int set(
char const * val);
265 int zero_out_padding();
271 void scale_diagonals(
int const * sym_mask);
281 void print_map(FILE * stream=stdout,
bool allcall=1)
const;
287 void set_name(
char const * name);
293 char const * get_name()
const;
307 void get_raw_data(
char ** data, int64_t * size)
const;
319 int write(int64_t num_pair,
335 void write(int64_t num_pair,
338 int64_t
const * inds,
351 void read(int64_t num_pair,
354 int64_t
const * inds,
366 int read(int64_t num_pair,
380 char * read(
char const * idx,
391 int read(int64_t num_pair,
399 int64_t get_tot_size(
bool packed);
408 int allread(int64_t * num_pair,
419 int allread(int64_t * num_pair,
434 void slice(
int const * offsets_B,
438 int const * offsets_A,
464 int *
const * permutation_A,
466 int *
const * permutation_B,
476 int sparsify(
char const * threshold=NULL,
483 int sparsify(std::function<
bool(
char const*)> f);
492 int read_local(int64_t * num_pair,
494 bool unpack_sym=
false)
const;
501 int read_local_nnz(int64_t * num_pair,
503 bool unpack_sym=
false)
const;
513 void read_local(int64_t * num_pair,
516 bool unpack_sym=
false)
const;
524 void read_local_nnz(int64_t * num_pair,
527 bool unpack_sym=
false)
const;
540 int align(
tensor const * B);
549 int reduce_sum(
char * result);
556 int reduce_sum(
char * result,
algstrct const * sr_other);
562 int reduce_sumabs(
char * result);
569 int reduce_sumabs(
char * result,
algstrct const * sr_other) ;
576 int reduce_sumsq(
char * result);
588 int get_max_abs(
int n,
char * data)
const;
595 void print(FILE * fp = stdout,
char const * cutoff = NULL)
const;
604 void compare(
const tensor * A, FILE * fp,
char const * cutoff);
615 void orient_subworld(
CTF::World * greater_world,
616 int & bw_mirror_rank,
617 int & fw_mirror_rank,
619 char ** sub_buffer_);
627 void add_to_subworld(
tensor * tsr_sub,
637 void add_from_subworld(
tensor * tsr_sub,
646 void unfold(
bool was_mod=0);
659 double est_time_unfold();
675 int const * fold_idx,
684 void pull_alias(
tensor const * other);
688 void clear_mapping();
700 int const * old_offsets = NULL,
701 int *
const * old_permutation = NULL,
702 int const * new_offsets = NULL,
703 int *
const * new_permutation = NULL);
705 double est_redist_time(
distribution const & old_dist,
double nnz_frac);
707 int64_t get_redist_mem(
distribution const & old_dist,
double nnz_frac);
715 int map_tensor_rem(
int num_phys_dims,
727 int extract_diag(
int const * idx_map,
735 void set_sym(
int const * sym);
741 void set_new_nnz_glb(int64_t
const * nnz_blk);
750 void spmatricize(
int m,
int n,
int nrow_idx,
bool csr);
757 void despmatricize(
int nrow_idx,
bool csr);
762 void leave_home_with_buffer();
767 void register_size(int64_t size);
772 void deregister_size();
779 void write_dense_to_file(MPI_File & file, int64_t offset=0);
785 void write_dense_to_file(
char const * filename);
792 void read_dense_from_file(MPI_File & file, int64_t offset=0);
799 void read_dense_from_file(
char const * filename);
805 template <
typename dtype_A,
typename dtype_B>
811 template <
typename dtype_A,
typename dtype_B>
812 void exp_helper(
tensor * A);
819 template <
typename dtype>
827 template <
typename dtype>
835 template <
typename dtype>
843 template <
typename dtype>
851 template <
typename dtype>
859 template <
typename dtype>
862 template <
typename dtype>
863 void true_divide(
tensor * A);
878 tensor * self_reduce(
int const * idx_A,
884 int const * idx_C=NULL,
885 int ** new_idx_C=NULL);
888 #endif// __UNTYPED_TENSOR_H__ void permute(int order, int const *perm, int *arr)
permute an array
char * home_buffer
buffer associated with home mapping of tensor, to which it is returned
bool is_home
whether the latest tensor data is in the home buffer
int64_t * nnz_blk
nonzero elements in each block owned locally
int * sym
symmetries among tensor dimensions
int * pad_edge_len
padded tensor edge lengths
int * inner_ordering
ordering of the dimensions according to which the tensori s folded
def conv_type(self, dtype)
bool has_home
whether the tensor has a home mapping/buffer
int64_t size
current size of local tensor data chunk (mapping-dependent)
bool is_csr
whether CSR or COO if folded
bool is_folded
whether the data is folded/transposed into a (lower-order) tensor
int64_t home_size
size of home buffer
an instance of the CTF library (world) on a MPI communicator
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
CTF::World * wrld
distributed processor context on which tensor is defined
int is_scp_padded
whether tensor data has additional padding
bool is_cyclic
whether the tensor data is cyclically distributed (blocked if false)
int * padding
padding along each edge length (less than distribution phase)
int * lens
unpadded tensor edge lengths
int nrow_idx
how many modes are folded into matricized row
bool is_data_aliased
whether the tensor data is an alias of another tensor object's data
int64_t nnz_tot
maximum number of local nonzero elements over all procs
algstrct * sr
algstrct on which tensor elements and operations are defined
mapping * edge_map
mappings of each tensor dimension onto topology dimensions
tensor * rec_tsr
representation of folded tensor (shares data pointer)
bool left_home_transp
whether the tensor left home to transpose
bool is_mapped
whether a mapping has been selected
int64_t registered_alloc_size
size CTF keeps track of for memory usage
int * sym_table
order-by-order table of dimensional symmetry relations
int * scp_padding
additional padding, may be greater than ScaLAPACK phase
int64_t nnz_loc
number of local nonzero elements
bool profile
whether profiling should be done for contractions/sums involving this tensor
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
internal distributed tensor class
topology * topo
topology to which the tensor is mapped
bool has_zero_edge_len
if true tensor has a zero edge length, so is zero, which short-cuts stuff
a tensor with an index map associated with it (necessary for overloaded operators) ...
tensor * slay
tensor object associated with tensor object whose data pointer needs to be preserved, needed for ScaLAPACK wrapper FIXME: home buffer should presumably take care of this...
char * name
name given to tensor