3 #include "../interface/common.h"     6 #include "../tensor/untyped_tensor.h"    11   template<
typename dtype>
    15   template<
typename dtype>
    23     : 
CTF_int::
tensor(&sr, order, len, sym, &world, 1, name, profile) {
    27   template<
typename dtype>
    35     : 
CTF_int::
tensor(&sr, order, len, sym, &world, 1, name, profile) {
    40   template<
typename dtype>
    49     : 
CTF_int::
tensor(&sr, order, len, sym, &world, 1, name, profile, is_sparse) {
    53   template<
typename dtype>
    61     : 
CTF_int::
tensor(&sr, order, len, NULL, &world, 1, name, profile, is_sparse) {
    66   template<
typename dtype>
    73     : 
CTF_int::
tensor(&sr, order, len, NULL, &world, 1, name, profile) {
    78   template<
typename dtype>
    89     : 
CTF_int::
tensor(&sr_, order, 0, len, sym, &world, idx, prl, blk, name, profile) {
    93   template<
typename dtype>
   105     : 
CTF_int::
tensor(&sr_, order, is_sparse_, len, sym, &world, idx, prl, blk, name, profile) {
   110   template<
typename dtype>
   115   template<
typename dtype>
   119   template<
typename dtype>
   123   template<
typename dtype>
   128   template<
typename dtype>
   133   template<
typename dtype>
   140   template<
typename dtype>
   148   template<
typename dtype>
   151   template<
typename dtype>
   154     tensor::get_raw_data((
char**)&data, size);
   158   template<
typename dtype>
   160                                      int64_t ** global_idx,
   163                                      bool       unpack_sym)
 const {
   170     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n"); 
IASSERT(0); 
return; }
   174     for (i=0; i<(*npair); i++){
   175       (*global_idx)[
i] = pairs[
i].
k();
   181   template<
typename dtype>
   183                                  int64_t ** global_idx,
   185                                  bool       unpack_sym)
 const {
   189     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n"); 
IASSERT(0); 
return; }
   193     for (i=0; i<(*npair); i++){
   194       (*global_idx)[
i] = pairs[
i].
k();
   200   template<
typename dtype>
   204                                       bool           unpack_sym)
 const {
   212     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n"); 
IASSERT(0); 
return; }
   215   template<
typename dtype>
   218                                  bool           unpack_sym)
 const {
   222     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n"); 
IASSERT(0); 
return; }
   225   template<
typename dtype>
   227                            int64_t 
const * global_idx,
   233     for (i=0; i<npair; i++){
   234       pairs[
i].
k = global_idx[
i];
   235       pairs[
i].
d = data[
i];
   239     for (i=0; i<npair; i++){
   240       data[
i] = pairs[
i].
d;
   245   template<
typename dtype>
   250     char * cpairs = (
char*)pairs; 
   252     IASSERT(cpairs == (
char*)pairs);
   263   template<
typename dtype>
   265                             int64_t 
const * global_idx,
   270     for (i=0; i<npair; i++){
   271       pairs[
i].
k = global_idx[
i];
   272       pairs[
i].
d = data[
i];
   285   template<
typename dtype>
   290     char const * cpairs = (
char const*)pairs; 
   297   template<
typename dtype>
   301                             int64_t 
const * global_idx,
   307     for (i=0; i<npair; i++){
   308       pairs[
i].
k = global_idx[
i];
   309       pairs[
i].
d = data[
i];
   323   template<
typename dtype>
   328     char const * cpairs = (
char const*)pairs; 
   335   template<
typename dtype>
   339                            int64_t 
const * global_idx,
   344     for (i=0; i<npair; i++){
   345       pairs[
i].
k = global_idx[
i];
   346       pairs[
i].
d = data[
i];
   350     for (i=0; i<npair; i++){
   351       data[
i] = pairs[
i].
d;
   356   template<
typename dtype>
   361     char * cpairs = (
char*)pairs; 
   363     IASSERT(cpairs == (
char*)pairs);
   376   template<
typename dtype>
   380     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_all\n"); 
IASSERT(0); 
return; }
   383   template<
typename dtype>
   391   template<
typename dtype>
   396   template<
typename dtype>
   401   template<
typename dtype>
   406   template<
typename dtype>
   411   template<
typename dtype>
   416   template<
typename dtype>
   423   template<
typename dtype>
   428   template<
typename dtype>
   431                               int * 
const *     perms_A,
   438   template<
typename dtype>
   444                                        perms_B, (
char*)&beta);
   448   template<
typename dtype>
   451     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function sparsify\n"); 
IASSERT(0); 
return; }
   454   template<
typename dtype>
   457     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function sparsify\n"); 
IASSERT(0); 
return; }
   460   template<
typename dtype>
   463     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function sparisfy\n"); 
IASSERT(0); 
return; }
   466   template <
typename dtype>
   469     int64_t my_nvals = CTF_int::read_data_mpiio<dtype>(T->
wrld, fpath, &datastr);
   473     CTF_int::parse_sparse_tensor_data<dtype>(datastr, T->
order, (
dtype*)T->
sr->mulid(), T->
lens, my_nvals, pairs, with_vals);
   479     T->
write(my_nvals,pairs);
   481     T->
sr->pair_dealloc((
char*)pairs);
   486     read_sparse_from_file_base<int>(fpath, with_vals, 
this);
   491     read_sparse_from_file_base<double>(fpath, with_vals, 
this);
   496     read_sparse_from_file_base<float>(fpath, with_vals, 
this);
   501     read_sparse_from_file_base<int64_t>(fpath, with_vals, 
this);
   505   template <
typename dtype>
   512     char * datastr = CTF_int::serialize_sparse_tensor_data<dtype>(T->
order, T->
lens, my_nvals, pairs, with_vals, str_len);
   513     CTF_int::write_data_mpiio<dtype>(T->
wrld, fpath, datastr, str_len);
   515     T->
sr->pair_dealloc((
char*)pairs);
   520     write_sparse_to_file_base<int>(fpath, with_vals, 
this);
   525     write_sparse_to_file_base<double>(fpath, with_vals, 
this);
   530     write_sparse_to_file_base<float>(fpath, with_vals, 
this);
   535     write_sparse_to_file_base<int64_t>(fpath, with_vals, 
this);
   539   template<
typename dtype>
   553   template<
typename dtype>
   559   template<
typename dtype>
   573   template<
typename dtype>
   585   template<
typename dtype>
   590                             int const *             offsets_A,
   598         printf(
"CTF ERROR: number of processors should not match in slice if worlds are different\n");
   604           offsets, ends, (
char*)&beta, (
Tensor *)&A,
   605           offsets_A, ends_A, (
char*)&alpha);
   608           offsets, ends, (
char*)&beta, (
Tensor *)&A,
   609           offsets_A, ends_A, (
char*)&alpha);
   613   template<
typename dtype>
   618                             int64_t                 corner_off_A,
   619                             int64_t                 corner_end_A,
   621     int * offsets, * ends, * offsets_A, * ends_A;
   642   template<
typename dtype>
   644                                      int const * ends)
 const {
   649   template<
typename dtype>
   651                                      int64_t corner_end)
 const {
   653     return slice(corner_off, corner_end, 
wrld);
   656   template<
typename dtype>
   659                                      World *      owrld)
 const {
   663     for (i=0; i<
order; i++){
   664       if (!(ends[i] - offsets[i] > 0 &&
   666                   ends[i] <= 
lens[i])){
   667         printf(
"CTF ERROR: invalid slice dimensions\n");
   672         if (offsets[i] == offsets[i+1] && ends[i] == ends[i+1]){
   675           if (!(ends[i+1] >= offsets[i])){
   676             printf(
"CTF ERROR: slice dimensions don't respect tensor symmetry\n");
   682       } 
else new_sym[
i] = 
NS;
   683       new_lens[
i] = ends[
i] - offsets[
i];
   688     std::fill(new_sym, new_sym+order, 0);
   698   template<
typename dtype>
   701                                      World *  owrld)
 const {
   703     int * offsets, * ends;
   719   template<
typename dtype>
   722       printf(
"CTF ERROR: cannot align tensors on different CTF instances\n");
   730   template<
typename dtype>
   767           sr->
min((
char*)&minval);
   775           sr->
max((
char*)&maxval);
   783           sr->
min((
char*)&minval);
   791           sr->
max((
char*)&maxval);
   802   template<
typename dtype>
   814     for (
int i=0; 
i<A.order; 
i++){
   822   template<
typename dtype>
   825       printf(
"CTF ERROR: norm not available for the type of tensor %s\n",
name);
   829 #define NORM1_INST(dtype) \   831   inline void Tensor<dtype>::norm1(double & nrm){ \   832     real_norm1<dtype>(*this, nrm); \   843   template<typename 
dtype>
   844   static 
void real_norm2(
Tensor<
dtype> & A, 
double & nrm){
   846     for (
int i=0; 
i<A.order; 
i++){
   850     nrm = std::sqrt((
double)Function<dtype,double>([](
dtype a){ 
return (
double)(a*a); })(A[inds]));
   853   template<
typename dtype>
   854   static void complex_norm2(Tensor<dtype> & A, 
double & nrm){
   856     for (
int i=0; 
i<A.order; 
i++){
   859     nrm = std::sqrt((
double)Function<dtype,double>([](
dtype a){ 
return (
double)std::norm(a); })(A[inds]));
   863   template<
typename dtype>
   866       printf(
"CTF ERROR: norm not available for the type of tensor %s\n",
name);
   870 #define NORM2_REAL_INST(dtype) \   872   inline void Tensor<dtype>::norm2(double & nrm){ \   873     real_norm2<dtype>(*this, nrm); \   876 #define NORM2_COMPLEX_INST(dtype) \   878   inline void Tensor< std::complex<dtype> >::norm2(double & nrm){ \   879     complex_norm2< std::complex<dtype> >(*this, nrm); \   893   template<typename 
dtype>
   896       printf(
"CTF ERROR: norm not available for the type of tensor %s\n",
name);
   900 #define NORM_INFTY_INST(dtype) \   902   inline void Tensor<dtype>::norm_infty(double & nrm){ \   903     nrm = this->norm_infty(); \   915 #undef NORM2_REAL_INST   916 #undef NORM2_COMPLEX_INST   917 #undef NORM_INFTY_INST   919   template<
typename dtype>
   924     if (ret != 
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function get_max_abs\n"); 
IASSERT(0); 
return; }
   927   template<
typename dtype>
   930       printf(
"CTF ERROR: fill_random(rmin, rmax) not available for the type of tensor %s\n",
name);
   934   template <
typename dtype>
   937       printf(
"CTF ERROR: fill_random should not be called on a sparse tensor, use fill_random_sp instead\n");
   941     for (int64_t 
i=0; 
i<T.
size; 
i++){
   949     fill_random_base<double>(rmin, rmax, *
this);
   954     fill_random_base<float>(rmin, rmax, *
this);
   959     fill_random_base<int64_t>(rmin, rmax, *
this);
   964     fill_random_base<int>(rmin, rmax, *
this);
   968   template<
typename dtype>
   971       printf(
"CTF ERROR: fill_sp_random(rmin, rmax, frac_sp) not available for the type of tensor %s\n",
name);
   975   template <
typename dtype>
   977     int64_t tot_size = 1; 
   979     double sf = tot_size*frac_sp;
   982     for (
int i=2; 
i<20; 
i++){
   986     int64_t gen_size = (int64_t)(dg+.5);
   987     int64_t my_gen_size = gen_size/T->
wrld->
np;
   992     for (int64_t 
i=0; 
i<my_gen_size; 
i++){
   995     T->
write(my_gen_size,pairs);
   996     T->
sr->pair_dealloc((
char*)pairs);
  1019     fill_sp_random_base<double>(rmin, rmax, frac_sp, 
this);
  1024     fill_sp_random_base<float>(rmin, rmax, frac_sp, 
this);
  1029     fill_sp_random_base<int>(rmin, rmax, frac_sp, 
this);
  1034     fill_sp_random_base<int64_t>(rmin, rmax, frac_sp, 
this);
  1037   template<
typename dtype>
  1044                                const char *     idx_C){
  1046       printf(
"CTF ERROR: worlds of contracted tensors must match\n");
  1055   template<
typename dtype>
  1065       printf(
"CTF ERROR: worlds of contracted tensors must match\n");
  1070       = 
CTF_int::contraction(&A, idx_A, &B, idx_B, (
char const *)&alpha, 
this, idx_C, (
char const *)&beta, &fseq);
  1075   template<
typename dtype>
  1080                           const char *     idx_B){
  1082       printf(
"CTF ERROR: worlds of summed tensors must match\n");
  1094   template<
typename dtype>
  1102       printf(
"CTF ERROR: worlds of summed tensors must match\n");
  1112   template<
typename dtype>
  1114                             const char * idx_A){
  1120   template<
typename dtype>
  1128   template<
typename dtype>
  1138   template<
typename dtype>
  1140     set((
char const*)&val);
  1149   template<
typename dtype>
  1155                                     const char *     idx_C){
  1161   template<
typename dtype>
  1165                                     const char *     idx_B){
  1172   template<
typename dtype>
  1198   template<
typename dtype>
 
void read(int64_t num_pair, char const *alpha, char const *beta, int64_t const *inds, char *data)
read tensor data with <key, value> pairs where key is the global index for the value, which gets filled in with beta times the old values plus alpha times the values read from the tensor. 
int reduce_sumabs(char *result)
Performs an elementwise absolute value summation reduction on a tensor. 
void profile_off()
turns off profiling for tensor 
void contract(dtype alpha, CTF_int::tensor &A, char const *idx_A, CTF_int::tensor &B, char const *idx_B, dtype beta, char const *idx_C)
contracts C[idx_C] = beta*C[idx_C] + alpha*A[idx_A]*B[idx_B] 
OP
reduction types for tensor data deprecated types: OP_NORM1=OP_SUMABS, OP_NORM2=call norm2()...
custom scalar function on tensor: e.g. A["ij"] = f(A["ij"]) 
CTF_int::CommData cdt
communicator data for MPI comm defining this world 
int * sym
symmetries among tensor dimensions 
void execute()
run contraction 
virtual algstrct * clone() const  =0
''copy constructor'' 
void slice(int const *offsets_B, int const *ends_B, char const *beta, tensor *A, int const *offsets_A, int const *ends_A, char const *alpha)
accumulates out a slice (block) of this tensor = B B[offsets,ends)=beta*B[offsets,ends) + alpha*A[offsets_A,ends_A) 
int reduce_sum(char *result)
Performs an elementwise summation reduction on a tensor. 
void profile_on()
turns on profiling for tensor 
dtype d
tensor value associated with index 
#define NORM1_INST(dtype)
Ring class defined by a datatype and addition and multiplicaton functions addition must have an ident...
virtual char * pair_alloc(int64_t n) const 
allocate space for n (int64_t,dtype) pairs, necessary for object types 
void add_to_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor to a tensor object defined on a different world 
Typ_Idx_Tensor< dtype > i(char const *idx_map)
void execute(bool run_diag=false)
run summation 
void get_local_pairs(int64_t *npair, Pair< dtype > **pairs, bool nonzeros_only=false, bool unpack_sym=false) const 
gives the global indices and values associated with the local data 
Tensor< dtype > slice(int const *offsets, int const *ends) const 
cuts out a slice (block) of this tensor A[offsets,ends) result will always be fully nonsymmetric ...
void real_norm1< bool >(Tensor< bool > &A, double &nrm)
void profile_off()
turn off profiling 
virtual bool is_ordered() const  =0
#define NORM2_COMPLEX_INST(dtype)
void permute(dtype beta, CTF_int::tensor &A, int *const *perms_A, dtype alpha)
Apply permutation to matrix, potentially extracting a slice B[i,j,...] = beta*B[...] + alpha*A[perms_A[0][i],perms_A[1][j],...]. 
double get_rand48()
returns new random number in [0,1) 
int sparsify(char const *threshold=NULL, bool take_abs=true)
reduce tensor to sparse format, storing only nonzero data, or data above a specified threshold...
int64_t size
current size of local tensor data chunk (mapping-dependent) 
Semiring is a Monoid with an addition multiplicaton function addition must have an identity and be as...
void * alloc(int64_t len)
alloc abstraction 
dtype reduce(OP op)
performs a reduction on the tensor 
void read_all(int64_t *npair, dtype **data, bool unpack=false)
collects the entire tensor data on each process (not memory scalable) 
custom bivariate function on two tensors: e.g. C["ij"] = f(A["ik"],B["kj"]) 
virtual char const * addid() const 
MPI datatype for pairs. 
an instance of the CTF library (world) on a MPI communicator 
void set_name(char const *name)
sets tensor name 
bool is_sparse
whether only the non-zero elements of the tensor are stored 
int order
number of tensor dimensions 
int64_t k
key, global index [i1,i2,...] specified as i1+len[0]*i2+... 
void read_val(char *buf) const 
sets external value to the value pointed by the iterator 
void read_local(int64_t *npair, int64_t **global_idx, dtype **data, bool unpack_sym=false) const 
Using get_local_data(), which returns an array that must be freed with delete [], is more efficient...
void compare(const Tensor< dtype > &A, FILE *fp=stdout, double cutoff=-1.0)
prints two sets of tensor data side-by-side to file using process 0 
int permute(tensor *A, int *const *permutation_A, char const *alpha, int *const *permutation_B, char const *beta)
virtual char * alloc(int64_t n) const 
allocate space for n items, necessary for object types 
#define NORM_INFTY_INST(dtype)
double estimate_time()
predicts execution time in seconds using performance models 
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!) 
int align(tensor const *B)
align mapping of thisa tensor to that of B 
index-value pair used for tensor data input 
void read_sparse_from_file(const char *fpath, bool with_vals=true)
read sparse tensor from file, entries of tensor must be stored one per line, as i_1 ...
CTF::World * wrld
distributed processor context on which tensor is defined 
custom function f : X -> Y to be applied to tensor elemetns: e.g. B["ij"] = f(A["ij"]) ...
class for execution distributed scaling of a tensor 
a sparse subset of a tensor 
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor 
virtual void min(char const *a, char const *b, char *c) const 
c = min(a,b) 
int write(int64_t num_pair, char const *alpha, char const *beta, char *mapped_data, char const rw='w')
Add tensor data new=alpha*new+beta*old with <key, value> pairs where key is the global index for the ...
class for execution distributed contraction of tensors 
void print(FILE *fp=stdout, char const *cutoff=NULL) const 
prints tensor data to file using process 0 
int zero_out_padding()
sets padded portion of tensor to zero (this should be maintained internally) 
int * lens
unpadded tensor edge lengths 
void add_from_subworld(tensor *tsr_sub, char const *alpha, char const *beta)
accumulates this tensor from a tensor object defined on a different world 
dtype norm_infty()
finds the max absolute value element of the tensor 
int allread(int64_t *num_pair, char **all_data, bool unpack)
read entire tensor with each processor (in packed layout). WARNING: will use an 'unscalable' amount o...
void sparsify()
reduce tensor to sparse format, storing only nonzero data, or data above a specified threshold...
~Tensor()
frees CTF tensor 
void align(CTF_int::tensor const &A)
aligns data mapping with tensor A 
Typ_Idx_Tensor< dtype > operator[](char const *idx_map)
associated an index map with the tensor for future operation 
int64_t k() const 
returns key of pair at head of ptr 
void compare(const tensor *A, FILE *fp, char const *cutoff)
prints two sets of tensor data side-by-side to file using process 0 
void print(FILE *fp, dtype cutoff) const 
prints tensor data to file using process 0 (modify print(...) overload in set.h if you would like a d...
algstrct * sr
algstrct on which tensor elements and operations are defined 
virtual void pair_dealloc(char *ptr) const 
deallocate given pointer containing contiguous array of pairs 
void read_sparse_from_file_base(const char *fpath, bool with_vals, Tensor< dtype > *T)
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
double estimate_time()
predicts execution time in seconds using performance models 
void add_from_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor from a tensor object defined on a different world 
void fill_sp_random_base(dtype rmin, dtype rmax, double frac_sp, Tensor< dtype > *T)
void write_sparse_to_file(const char *fpath, bool with_vals=true)
write sparse tensor to file, entries of tensor will be stored one per line, as i_1 ...
void real_norm1(Tensor< dtype > &A, double &nrm)
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const 
Gives the global indices and values associated with the local data. 
Tensor()
default constructor 
virtual void max(char const *a, char const *b, char *c) const 
c = max(a,b) 
void fill_random_base(dtype rmin, dtype rmax, Tensor< dtype > &T)
void add_to_subworld(tensor *tsr_sub, char const *alpha, char const *beta)
accumulates this tensor to a tensor object defined on a different world 
int el_size
size of each element of algstrct in bytes 
bool profile
whether profiling should be done for contractions/sums involving this tensor 
int cdealloc(void *ptr)
free abstraction 
dtype * get_raw_data(int64_t *size) const 
gives the raw current local data with padding included 
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void sum(dtype alpha, CTF_int::tensor &A, char const *idx_A, dtype beta, char const *idx_B)
sums B[idx_B] = beta*B[idx_B] + alpha*A[idx_A] 
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
A Monoid is a Set equipped with a binary addition operator '+' or a custom function addition must hav...
internal distributed tensor class 
dtype norm1()
computes the entrywise 1-norm of the tensor 
void profile_on()
turn on profiling 
dtype * get_mapped_data(char const *idx, Idx_Partition const &prl, Idx_Partition const &blk=Idx_Partition(), bool unpack=true)
returns local data of tensor with parallel distribution prl and local blocking blk ...
an instance of a tensor within a CTF world 
void read(int64_t npair, Pair< dtype > *pairs)
Gives the values associated with any set of indices. 
void scale(dtype alpha, char const *idx_A)
scales A[idx_A] = alpha*A[idx_A] 
Tensor< dtype > & operator=(dtype val)
sets all values in the tensor to val 
#define NORM2_REAL_INST(dtype)
class for execution distributed summation of tensors 
int read_local_nnz(int64_t *num_pair, char **mapped_data, bool unpack_sym=false) const 
read tensor data pairs local to processor that have nonzero values 
double estimate_time(CTF_int::tensor &A, char const *idx_A, CTF_int::tensor &B, char const *idx_B, char const *idx_C)
estimate the time of a contraction C[idx_C] = A[idx_A]*B[idx_B] 
void free_self()
destructor 
virtual char const * mulid() const 
identity element for multiplication i.e. 1 
int reduce_sumsq(char *result)
computes the sum of squares of the elements 
void write_sparse_to_file_base(const char *fpath, bool with_vals, Tensor< dtype > *T)
void copy_tensor_data(tensor const *other)
copies all tensor data from other 
void set_name(char const *name)
set the tensor name 
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
char * name
name given to tensor 
int np
number of processors 
void init(algstrct const *sr, int order, int const *edge_len, int const *sym, CTF::World *wrld, bool alloc_data, char const *name, bool profile, bool is_sparse)
initializes tensor data 
MPI_Comm comm
set of processors making up this world 
void get_max_abs(int n, dtype *data) const 
obtains a small number of the biggest elements of the tensor in sorted order (e.g. eigenvalues) 
int read_local(int64_t *num_pair, char **mapped_data, bool unpack_sym=false) const 
read tensor data pairs local to processor including those with zero values WARNING: for sparse tensor...
int get_max_abs(int n, char *data) const 
obtains the largest n elements (in absolute value) of the tensor 
void cvrt_idx(int order, int const *lens, int64_t idx, int *idx_arr)