Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
CTF_int Namespace Reference

Data Structures

class  accumulatable
 abstract class that knows how to add More...
 
class  algstrct
 algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction, virtual classes defined in derived typed classes or algstrctcpy More...
 
class  Bifun_Term
 
class  bivar_function
 untyped internal class for triply-typed bivariate function More...
 
struct  BoolPair
 
class  CommData
 
struct  CompPair
 
struct  CompPtrPair
 
class  ConstPairIterator
 
class  Contract_Term
 An experession representing a contraction of a set of tensors contained in operands. More...
 
class  contraction
 class for execution distributed contraction of tensors More...
 
class  COO_Matrix
 serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored, one of values, and two of row and column indices More...
 
class  CSR_Matrix
 abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout More...
 
class  ctr
 
class  ctr_2d_general
 
class  ctr_replicate
 
class  ctr_virt
 
class  CubicModel
 Cubic performance models, which given measurements, provides new model guess. More...
 
class  distribution
 
class  endomorphism
 untyped internal class for singly-typed single variable function (Endomorphism) More...
 
class  grid_wrapper
 
struct  int1
 
struct  int2
 
struct  IntPair
 
struct  iparam
 
class  LinModel
 Linear performance models, which given measurements, provides new model guess. More...
 
class  mapping
 
struct  mem_loc
 
struct  mem_transfer
 
class  Model
 
class  offload_arr
 offloaded array/buffer More...
 
class  offload_tsr
 offloaded and serialized tensor data More...
 
class  PairIterator
 
class  scaling
 class for execution distributed scaling of a tensor More...
 
class  scl
 
class  scl_virt
 
class  seq_tsr_ctr
 
class  seq_tsr_scl
 
class  seq_tsr_spctr
 
class  seq_tsr_spsum
 
class  seq_tsr_sum
 
struct  ShortPair
 
class  spctr
 
class  spctr_2d_general
 
class  spctr_pin_keys
 
class  spctr_replicate
 
class  spctr_virt
 
class  strp_ctr
 
class  strp_scl
 
class  strp_sum
 
class  strp_tsr
 
class  Sum_Term
 
class  summation
 class for execution distributed summation of tensors More...
 
class  tensor
 internal distributed tensor class More...
 
struct  tensor_name_less
 comparison function for sets of tensor pointers This ensures the set iteration order is consistent across nodes More...
 
class  Term
 a term is an abstract object representing some expression of tensors More...
 
struct  time_param
 
class  topology
 
class  tspsum
 
class  tspsum_map
 
class  tspsum_permute
 
class  tspsum_pin_keys
 
class  tspsum_replicate
 performs replication along a dimension, generates 2.5D algs More...
 
class  tspsum_virt
 
class  tsum
 
class  tsum_replicate
 performs replication along a dimension, generates 2.5D algs More...
 
class  tsum_virt
 
class  Unifun_Term
 
class  univar_function
 untyped internal class for doubly-typed univariate function More...
 

Typedefs

typedef bool TYPE1
 
typedef int TYPE2
 
typedef int64_t TYPE3
 
typedef float TYPE4
 
typedef double TYPE5
 
typedef std::complex< float > TYPE6
 
typedef std::complex< double > TYPE7
 
typedef int16_t TYPE8
 
typedef int8_t TYPE9
 

Enumerations

enum  { SUCCESS, ERROR, NEGATIVE }
 
enum  map_type { NOT_MAPPED, PHYSICAL_MAP, VIRTUAL_MAP }
 
enum  TOPOLOGY {
  TOPOLOGY_GENERIC, TOPOLOGY_BGP, TOPOLOGY_BGQ, TOPOLOGY_8D,
  NO_TOPOLOGY
}
 

Functions

void update_all_models (MPI_Comm comm)
 
void factorize (int n, int *nfactor, int **factor)
 computes the size of a tensor in packed symmetric layout More...
 
template<typename ptype >
void get_perm (int perm_order, ptype A, ptype B, ptype C, ptype &tA, ptype &tB, ptype &tC)
 
void calc_fold_lnmk (tensor const *A, tensor const *B, tensor const *C, int const *idx_A, int const *idx_B, int const *idx_C, int const *ordering_A, int const *ordering_B, iparam *inner_prm)
 calculate the dimensions of the matrix the contraction gets reduced to (A, B, and C may be permuted) More...
 
void get_len_ordering (tensor const *A, tensor const *B, tensor const *C, int const *idx_A, int const *idx_B, int const *idx_C, int **new_ordering_A, int **new_ordering_B, int **new_ordering_C)
 find ordering of indices of tensor to reduce to DGEMM (A, B, and C may be permuted More...
 
int ctr_2d_gen_build (int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_ctr_lda_A, int64_t &cg_ctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_ctr_lda_B, int64_t &cg_ctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_ctr_lda_C, int64_t &cg_ctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C)
 sets up a ctr_2d_general (2D SUMMA) level where A is not communicated function will be called with A/B/C permuted depending on desired alg More...
 
void inv_idx (int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
 invert index map More...
 
template<int idim>
void spA_dnB_dnC_ctrloop (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int64_t const *lda_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
template<>
void spA_dnB_dnC_ctrloop< 0 > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int64_t const *lda_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
template<>
void spA_dnB_dnC_ctrloop< MAX_ORD > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int64_t const *lda_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
void spA_dnB_dnC_seq_ctr (char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, bivar_function const *func)
 
char * bcast_step (int edge_len, char *A, bool is_sparse_A, bool move_A, algstrct const *sr_A, int64_t b_A, int64_t s_A, char *buf_A, CommData *cdt_A, int64_t ctr_sub_lda_A, int64_t ctr_lda_A, int nblk_A, int64_t const *size_blk_A, int &new_nblk_A, int64_t *&new_size_blk_A, int64_t *offsets_A, int ib)
 
char * reduce_step_pre (int edge_len, char *C, bool is_sparse_C, bool move_C, algstrct const *sr_C, int64_t b_C, int64_t s_C, char *buf_C, CommData *cdt_C, int64_t ctr_sub_lda_C, int64_t ctr_lda_C, int nblk_C, int64_t const *size_blk_C, int &new_nblk_C, int64_t *&new_size_blk_C, int64_t *offsets_C, int ib, char const *&rec_beta)
 
void reduce_step_post (int edge_len, char *C, bool is_sparse_C, bool move_C, algstrct const *sr_C, int64_t b_C, int64_t s_C, char *buf_C, CommData *cdt_C, int64_t ctr_sub_lda_C, int64_t ctr_lda_C, int nblk_C, int64_t *size_blk_C, int &new_nblk_C, int64_t *&new_size_blk_C, int64_t *offsets_C, int ib, char const *&rec_beta, char const *beta, char *&up_C, char *&new_C, int n_new_C_grps, int &i_new_C_grp, char **new_C_grps)
 
int spctr_2d_gen_build (int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_spctr_lda_A, int64_t &cg_spctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_spctr_lda_B, int64_t &cg_spctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_spctr_lda_C, int64_t &cg_spctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C)
 
template<int idim>
void sym_seq_ctr_loop (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
template<>
void sym_seq_ctr_loop< 0 > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
template void sym_seq_ctr_loop< MAX_ORD > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
void compute_syoff (int r, int len, algstrct const *sr, int const *edge_len, int const *sym, uint64_t *offsets)
 
void compute_syoffs (algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, int tot_order, int const *rev_idx_map, uint64_t **&offsets_A, uint64_t **&offsets_B, uint64_t **&offsets_C)
 
int sym_seq_ctr_ref (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C)
 performs symmetric contraction with reference (unblocked) kernel More...
 
int sym_seq_ctr_cust (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, bivar_function const *func)
 performs symmetric contraction with custom elementwise function More...
 
int sym_seq_ctr_inr (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, iparam const *prm, bivar_function const *func)
 performs symmetric contraction with blocked gemm More...
 
void init_rng (int rank)
 initialized random number generator More...
 
double get_rand48 ()
 returns new random number in [0,1) More...
 
template<typename type >
int conv_idx (int order, type const *cidx, int **iidx)
 
template<typename type >
int conv_idx (int order_A, type const *cidx_A, int **iidx_A, int order_B, type const *cidx_B, int **iidx_B)
 
template<typename type >
int conv_idx (int order_A, type const *cidx_A, int **iidx_A, int order_B, type const *cidx_B, int **iidx_B, int order_C, type const *cidx_C, int **iidx_C)
 
template int conv_idx< int > (int, int const *, int **)
 
template int conv_idx< char > (int, char const *, int **)
 
template int conv_idx< int > (int, int const *, int **, int, int const *, int **)
 
template int conv_idx< char > (int, char const *, int **, int, char const *, int **)
 
template int conv_idx< int > (int, int const *, int **, int, int const *, int **, int, int const *, int **)
 
template int conv_idx< char > (int, char const *, int **, int, char const *, int **, int, char const *, int **)
 
void flops_add (int64_t n)
 
int64_t get_flops ()
 
void handler ()
 
void cvrt_idx (int order, int const *lens, int64_t idx, int *idx_arr)
 
void cvrt_idx (int order, int const *lens, int64_t idx, int **idx_arr)
 
void cvrt_idx (int order, int const *lens, int const *idx_arr, int64_t *idx)
 
bool get_mpi_dt (int64_t count, int64_t datum_size, MPI_Datatype &dt)
 gives a datatype for arbitrary datum_size, errors if exceeding 32-bits More...
 
int64_t sy_packed_size (int order, const int *len, const int *sym)
 computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout More...
 
int64_t packed_size (int order, const int *len, const int *sym)
 computes the size of a tensor in packed symmetric (SY, SH, or AS) layout More...
 
int alloc_ptr (int64_t const len_, void **const ptr)
 alloc abstraction More...
 
int mst_alloc_ptr (int64_t const len, void **const ptr)
 mst_alloc abstraction More...
 
void * alloc (int64_t const len)
 alloc abstraction More...
 
void * mst_alloc (int64_t const len)
 mst_alloc allocates buffer on the specialized memory stack More...
 
int cdealloc (void *ptr)
 free abstraction More...
 
template<typename dtype >
const char * get_fmt ()
 return format string for templated type More...
 
template<>
const char * get_fmt< float > ()
 
template<>
const char * get_fmt< double > ()
 
template<>
const char * get_fmt< int > ()
 
template<>
const char * get_fmt< int64_t > ()
 
template<typename dtype >
void parse_sparse_tensor_data (char **lvals, int order, dtype const *pmulid, int *lens, int64_t nvals, CTF::Pair< dtype > *pairs, bool with_vals)
 parse string containing sparse tensor into data More...
 
template<typename dtype >
char * serialize_sparse_tensor_data (int order, int *lens, int64_t nvals, CTF::Pair< dtype > *pairs, bool with_vals, int64_t &str_len)
 serialize sparse tensor data to create string More...
 
template<typename dtype >
int64_t read_data_mpiio (CTF::World const *dw, char const *fpath, char ***datastr)
 read sparse tensor data from file using MPI-I/O, creating string with one entry per line (different entries on each process) More...
 
template<typename dtype >
void write_data_mpiio (CTF::World const *dw, char const *fpath, char *datastr, int64_t str_len)
 write sparse tensor data to file using MPI-I/O, from string with one entry per line (different entries on each process) More...
 
template<typename dtype >
dtype default_add (dtype a, dtype b)
 
template<typename dtype , void(*)(int, dtype const *, dtype *) fxpy>
void default_mxpy (void *X, void *Y, int *n, MPI_Datatype *d)
 
template<typename dtype >
void default_fxpy (int n, dtype const *X, dtype *Y)
 
template<typename dtype >
MPI_Op get_default_maddop ()
 
template<>
MPI_Op get_default_maddop< char > ()
 
template<>
MPI_Op get_default_maddop< bool > ()
 
template<>
MPI_Op get_default_maddop< int > ()
 
template<>
MPI_Op get_default_maddop< int64_t > ()
 
template<>
MPI_Op get_default_maddop< unsigned int > ()
 
template<>
MPI_Op get_default_maddop< uint64_t > ()
 
template<>
MPI_Op get_default_maddop< float > ()
 
template<>
MPI_Op get_default_maddop< double > ()
 
template<>
MPI_Op get_default_maddop< long double > ()
 
template<>
MPI_Op get_default_maddop< std::complex< float > > ()
 
template<>
MPI_Op get_default_maddop< std::complex< double > > ()
 
template<typename dtype >
MPI_Op get_maddop (void(*fxpy)(int, dtype const *, dtype *))
 
CTF_int::algstrct const * get_float_ring ()
 
CTF_int::algstrct const * get_double_ring ()
 
CTF_int::algstrct const * get_int_ring ()
 
CTF_int::algstrct const * get_int64_t_ring ()
 
template<typename dtype >
void gemm_batch (char taA, char taB, int l, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C)
 
template<typename dtype >
void gemm (char tA, char tB, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C)
 
template<>
void default_axpy< float > (int n, float alpha, float const *X, int incX, float *Y, int incY)
 
template<>
void default_axpy< double > (int n, double alpha, double const *X, int incX, double *Y, int incY)
 
template<>
void default_axpy< std::complex< float > > (int n, std::complex< float > alpha, std::complex< float > const *X, int incX, std::complex< float > *Y, int incY)
 
template<>
void default_axpy< std::complex< double > > (int n, std::complex< double > alpha, std::complex< double > const *X, int incX, std::complex< double > *Y, int incY)
 
template<>
void default_scal< float > (int n, float alpha, float *X, int incX)
 
template<>
void default_scal< double > (int n, double alpha, double *X, int incX)
 
template<>
void default_scal< std::complex< float > > (int n, std::complex< float > alpha, std::complex< float > *X, int incX)
 
template<>
void default_scal< std::complex< double > > (int n, std::complex< double > alpha, std::complex< double > *X, int incX)
 
template<>
void default_coomm< float > (int m, int n, int k, float alpha, float const *A, int const *rows_A, int const *cols_A, int nnz_A, float const *B, float beta, float *C)
 
template<>
void default_coomm< double > (int m, int n, int k, double alpha, double const *A, int const *rows_A, int const *cols_A, int nnz_A, double const *B, double beta, double *C)
 
template<>
void default_coomm< std::complex< float > > (int m, int n, int k, std::complex< float > alpha, std::complex< float > const *A, int const *rows_A, int const *cols_A, int nnz_A, std::complex< float > const *B, std::complex< float > beta, std::complex< float > *C)
 
template<>
void default_coomm< std::complex< double > > (int m, int n, int k, std::complex< double > alpha, std::complex< double > const *A, int const *rows_A, int const *cols_A, int nnz_A, std::complex< double > const *B, std::complex< double > beta, std::complex< double > *C)
 
template<typename dtype >
void muladd_csrmm (int m, int n, int k, dtype alpha, dtype const *A, int const *JA, int const *IA, int nnz_A, dtype const *B, dtype beta, dtype *C)
 
template<typename dtype >
void muladd_csrmultd (int m, int n, int k, dtype const *A, int const *JA, int const *IA, int nnz_A, dtype const *B, int const *JB, int const *IB, int nnz_B, dtype *C)
 
template<typename dtype >
dtype default_mul (dtype a, dtype b)
 
template<typename dtype >
void default_axpy (int n, dtype alpha, dtype const *X, int incX, dtype *Y, int incY)
 
template<typename dtype >
void default_scal (int n, dtype alpha, dtype *X, int incX)
 
template<typename dtype >
void default_gemm (char tA, char tB, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C)
 
template<typename dtype >
dtype ** get_grp_ptrs (int64_t grp_sz, int64_t ngrp, dtype const *data)
 
template<>
void default_gemm< float > (char tA, char tB, int m, int n, int k, float alpha, float const *A, float const *B, float beta, float *C)
 
template<>
void default_gemm< double > (char tA, char tB, int m, int n, int k, double alpha, double const *A, double const *B, double beta, double *C)
 
template<>
void default_gemm< std::complex< float > > (char tA, char tB, int m, int n, int k, std::complex< float > alpha, std::complex< float > const *A, std::complex< float > const *B, std::complex< float > beta, std::complex< float > *C)
 
template<>
void default_gemm< std::complex< double > > (char tA, char tB, int m, int n, int k, std::complex< double > alpha, std::complex< double > const *A, std::complex< double > const *B, std::complex< double > beta, std::complex< double > *C)
 
template<typename dtype >
void default_gemm_batch (char taA, char taB, int l, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C)
 
template<>
void default_gemm_batch< float > (char taA, char taB, int l, int m, int n, int k, float alpha, float const *A, float const *B, float beta, float *C)
 
template<>
void default_gemm_batch< double > (char taA, char taB, int l, int m, int n, int k, double alpha, double const *A, double const *B, double beta, double *C)
 
template<>
void default_gemm_batch< std::complex< float > > (char taA, char taB, int l, int m, int n, int k, std::complex< float > alpha, std::complex< float > const *A, std::complex< float > const *B, std::complex< float > beta, std::complex< float > *C)
 
template<>
void default_gemm_batch< std::complex< double > > (char taA, char taB, int l, int m, int n, int k, std::complex< double > alpha, std::complex< double > const *A, std::complex< double > const *B, std::complex< double > beta, std::complex< double > *C)
 
template<typename dtype >
void default_coomm (int m, int n, int k, dtype alpha, dtype const *A, int const *rows_A, int const *cols_A, int nnz_A, dtype const *B, dtype beta, dtype *C)
 
bool try_mkl_coo_to_csr (int64_t nz, int nrow, char *csr_vs, int *csr_ja, int *csr_ia, char const *coo_vs, int const *coo_rs, int const *coo_cs, int el_size)
 
bool try_mkl_csr_to_coo (int64_t nz, int nrow, char const *csr_vs, int const *csr_ja, int const *csr_ia, char *coo_vs, int *coo_rs, int *coo_cs, int el_size)
 
template<typename dtype >
void seq_coo_to_csr (int64_t nz, int nrow, dtype *csr_vs, int *csr_ja, int *csr_ia, dtype const *coo_vs, int const *coo_rs, int const *coo_cs)
 
template<typename dtype >
void seq_csr_to_coo (int64_t nz, int nrow, dtype const *csr_vs, int const *csr_ja, int const *csr_ia, dtype *coo_vs, int *coo_rs, int *coo_cs)
 
template<typename dtype >
void def_coo_to_csr (int64_t nz, int nrow, dtype *csr_vs, int *csr_ja, int *csr_ia, dtype const *coo_vs, int const *coo_rs, int const *coo_cs)
 
template<typename dtype >
void def_csr_to_coo (int64_t nz, int nrow, dtype const *csr_vs, int const *csr_ja, int const *csr_ia, dtype *coo_vs, int *coo_rs, int *coo_cs)
 
template<typename dtype >
dtype default_addinv (dtype a)
 
template<typename dtype , bool is_ord>
std::enable_if< is_ord, dtype >::type default_abs (dtype a)
 
template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype >::type default_abs (dtype a)
 
template<typename dtype , dtype(*)(dtype) abs>
void char_abs (char const *a, char *b)
 
template<typename dtype , bool is_ord>
std::enable_if< is_ord, dtype >::type default_min (dtype a, dtype b)
 
template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype >::type default_min (dtype a, dtype b)
 
template<typename dtype , bool is_ord>
std::enable_if< is_ord, dtype >::type default_max_lim ()
 
template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype >::type default_max_lim ()
 
template<typename dtype , bool is_ord>
std::enable_if< is_ord, dtype >::type default_min_lim ()
 
template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype >::type default_min_lim ()
 
template<typename dtype , bool is_ord>
std::enable_if< is_ord, dtype >::type default_max (dtype a, dtype b)
 
template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype >::type default_max (dtype a, dtype b)
 
template<typename dtype >
MPI_Datatype get_default_mdtype (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< bool > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< std::complex< double > > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< std::complex< long double > > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< char > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< int > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< int64_t > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< unsigned int > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< uint64_t > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< float > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< double > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< long double > (bool &is_custom)
 
template<>
MPI_Datatype get_default_mdtype< std::complex< float > > (bool &is_custom)
 
template<typename dtype >
constexpr bool get_default_is_ord ()
 
Idx_Tensorget_full_intm (Idx_Tensor &A, Idx_Tensor &B, std::vector< char > out_inds, bool create_dummy=false)
 
std::vector< char > det_uniq_inds (std::vector< Term * > const operands, std::vector< char > const out_inds)
 
std::vector< Term * > contract_down_terms (algstrct *sr, char *tscale, std::vector< Term * > operands, std::vector< char > out_inds, int terms_to_leave, bool est_time=false, double *cost=NULL)
 
void operator-= (double &d, CTF_int::Term const &tsr)
 
void operator+= (double &d, CTF_int::Term const &tsr)
 
void operator-= (int64_t &d, CTF_int::Term const &tsr)
 
void operator+= (int64_t &d, CTF_int::Term const &tsr)
 
CTF_int::Contract_Term operator* (double const &d, CTF_int::Term const &tsr)
 
CTF_int::Contract_Term operator* (int64_t const &i, CTF_int::Term const &tsr)
 
void calc_dim (int order, int64_t size, int const *edge_len, mapping const *edge_map, int64_t *vrt_sz, int *vrt_edge_len, int *blk_edge_len)
 calculate the block-sizes of a tensor More...
 
int get_distribution_size (int order)
 
int comp_dim_map (mapping const *map_A, mapping const *map_B)
 compares two mappings More...
 
void copy_mapping (int order, mapping const *mapping_A, mapping *mapping_B)
 copies mapping A to B More...
 
int copy_mapping (int order_A, int order_B, int const *idx_A, mapping const *mapping_A, int const *idx_B, mapping *mapping_B, int make_virt=1)
 copies mapping A to B More...
 
int map_tensor (int num_phys_dims, int tsr_order, int const *tsr_edge_len, int const *tsr_sym_table, int *restricted, CommData *phys_comm, int const *comm_idx, int fill, mapping *tsr_edge_map)
 map a tensor More...
 
int check_self_mapping (tensor const *tsr, int const *idx_map)
 checks mapping in preparation for tensors scale, summ or contract More...
 
int map_self_indices (tensor const *tsr, int const *idx_map)
 create virtual mapping for idx_maps that have repeating indices More...
 
int map_symtsr (int tsr_order, int const *tsr_sym_table, mapping *tsr_edge_map)
 adjust a mapping to maintan symmetry More...
 
int stretch_virt (int order, int stretch_factor, mapping *maps)
 stretch virtualization by a factor More...
 
topologyget_phys_topo (CommData glb_comm, TOPOLOGY mach)
 get dimension and torus lengths of specified topology More...
 
std::vector< topology * > get_all_topos (CommData cdt, int n_uf, int const *uniq_fact, int const *mults, int n_prepend, int const *prelens)
 computes all unique factorizations into non-primes each yielding a topology, prepending additional factors as specified More...
 
std::vector< topology * > get_generic_topovec (CommData cdt)
 computes all topology configurations given undelying physical topology information More...
 
std::vector< topology * > peel_perm_torus (topology *phys_topology, CommData cdt)
 folds specified topology and all of its permutations into all configurations of lesser dimensionality More...
 
std::vector< topology * > peel_torus (topology const *topo, CommData glb_comm)
 folds specified topology into all configurations of lesser dimensionality More...
 
int find_topology (topology const *topo, std::vector< topology * > &topovec)
 searches for an equivalent topology in avector of topologies More...
 
int get_best_topo (int64_t nvirt, int topo, CommData global_comm, int64_t bcomm_vol=0, int64_t bmemuse=0)
 get the best topologoes (least nvirt) over all procs More...
 
void extract_free_comms (topology const *topo, int order_A, mapping const *edge_map_A, int order_B, mapping const *edge_map_B, int &num_sub_phys_dims, CommData **psub_phys_comm, int **pcomm_idx)
 extracts the set of physical dimensions still available for mapping More...
 
int can_morph (topology const *topo_keep, topology const *topo_change)
 determines if two topologies are compatible with each other More...
 
void morph_topo (topology const *new_topo, topology const *old_topo, int order, mapping *edge_map)
 morphs a tensor topology into another More...
 
void pad_cyclic_pup_virt_buff (int const *sym, distribution const &old_dist, distribution const &new_dist, int const *len, int const *old_phys_dim, int const *old_phys_edge_len, int const *old_virt_edge_len, int64_t old_virt_nelem, int const *old_offsets, int *const *old_permutation, int total_np, int const *new_phys_dim, int const *new_phys_edge_len, int const *new_virt_edge_len, int64_t new_virt_nelem, char *old_data, char **new_data, int forward, int *const *bucket_offset, char const *alpha, char const *beta, algstrct const *sr)
 
void cyclic_reshuffle (int const *sym, distribution const &old_dist, int const *old_offsets, int *const *old_permutation, distribution const &new_dist, int const *new_offsets, int *const *new_permutation, char **tsr_data, char **tsr_cyclic_data, algstrct const *sr, CommData ord_glb_comm, bool reuse_buffers, char const *alpha, char const *beta)
 Goes from any set of phases to any new set of phases. More...
 
template<int idim>
void redist_bucket (int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
 
template<>
void redist_bucket< 0 > (int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
 
void redist_bucket_r0 (int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int rep_idx0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
 
int get_glb (int i, int s, int t)
 
int get_loc (int g, int s, int t)
 
template<int idim>
int64_t calc_cnt (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len)
 computes the cardinality of the set of elements of a tensor of order idim+1 that are owned by processor index gidx_off in a distribution with dimensions sphase More...
 
template<>
int64_t calc_cnt< 0 > (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len)
 
template<int idim>
int64_t * calc_sy_pfx (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len)
 computes the cardinality of the sets of elements of a tensor of order idim+1 for different values of the idim'th tensor dimension More...
 
template<>
int64_t * calc_sy_pfx< 1 > (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len)
 
template<int idim>
void calc_drv_cnts (int order, int const *sym, int64_t *counts, int const *rep_phase, int const *rep_phase_lda, int const *sphase, int const *phys_phase, int *gidx_off, int const *edge_len, int const *loc_edge_len)
 
template<>
void calc_drv_cnts< 0 > (int order, int const *sym, int64_t *counts, int const *rep_phase, int const *rep_phase_lda, int const *sphase, int const *phys_phase, int *gidx_off, int const *edge_len, int const *loc_edge_len)
 
template<int idim>
void calc_cnt_from_rep_cnt (int const *rep_phase, int *const *pe_offset, int *const *bucket_offset, int64_t const *old_counts, int64_t *counts, int bucket_off, int pe_off, int dir)
 
template<>
void calc_cnt_from_rep_cnt< 0 > (int const *rep_phase, int *const *pe_offset, int *const *bucket_offset, int64_t const *old_counts, int64_t *counts, int bucket_off, int pe_off, int dir)
 
void calc_drv_displs (int const *sym, int const *edge_len, distribution const &old_dist, distribution const &new_dist, int64_t *counts, int idx_lyr)
 
void precompute_offsets (distribution const &old_dist, distribution const &new_dist, int const *sym, int const *len, int const *rep_phase, int const *phys_edge_len, int const *virt_edge_len, int const *virt_dim, int const *virt_lda, int64_t virt_nelem, int **pe_offset, int **bucket_offset, int64_t **data_offset, int **ivmax_pre)
 
double dgtog_est_time (int64_t tot_sz, int np)
 estimates execution time, given this processor sends a receives tot_sz across np procs More...
 
void dgtog_reshuffle (int const *sym, int const *edge_len, distribution const &old_dist, distribution const &new_dist, char **ptr_tsr_data, char **ptr_tsr_new_data, algstrct const *sr, CommData ord_glb_comm)
 
void glb_ord_pup (int const *sym, distribution const &old_dist, distribution const &new_dist, int const *len, int const *old_phys_dim, int const *old_phys_edge_len, int const *old_virt_edge_len, int64_t old_virt_nelem, int const *old_offsets, int *const *old_permutation, int total_np, int const *new_phys_dim, int const *new_phys_edge_len, int const *new_virt_edge_len, int64_t new_virt_nelem, char *old_data, char **new_data, int forward, int *const *bucket_offset, char const *alpha, char const *beta, algstrct const *sr)
 
template<int idim>
void ord_glb (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int prev_idx=0, int64_t glb_ord_offset=0, int64_t blk_ord_offset=0)
 
template<>
void ord_glb< 0 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset)
 
template void ord_glb< 7 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset)
 
template<int idim>
void ord_glb_omp (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int const *idx_st, int const *idx_end, int prev_idx=0, int64_t glb_ord_offset=0, int64_t blk_ord_offset=0)
 
template<>
void ord_glb_omp< 0 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int const *idx_st, int const *idx_end, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset)
 
template void ord_glb_omp< 7 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int const *idx_st, int const *idx_end, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset)
 
void order_globally (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr)
 reorder local buffer so that elements are in ordered according to where they are in the global tensor (interleave virtual blocks) More...
 
char * glb_cyclic_reshuffle (int const *sym, distribution const &old_dist, int const *old_offsets, int *const *old_permutation, distribution const &new_dist, int const *new_offsets, int *const *new_permutation, char **ptr_tsr_data, char **ptr_tsr_cyclic_data, algstrct const *sr, CommData ord_glb_comm, bool reuse_buffers, char const *alpha, char const *beta)
 Goes from any set of phases to any new set of phases. More...
 
bool hptt_is_applicable (int order, int const *new_order, int elementSize)
 Checks if the HPTT library is applicable. More...
 
void nosym_transpose_hptt (int order, int const *st_new_order, int const *st_edge_len, int dir, char const *st_buffer, char *new_buffer, algstrct const *sr)
 
void nosym_transpose (tensor *A, int all_fdim_A, int const *all_flen_A, int const *new_order, int dir)
 
void nosym_transpose (int order, int const *new_order, int const *edge_len, char *data, int dir, algstrct const *sr)
 transposes a non-symmetric (folded) tensor More...
 
void nosym_transpose (int order, int const *new_order, int const *edge_len, char const *data, int dir, int max_ntd, char **tswap_data, int64_t *chunk_size, algstrct const *sr)
 transposes a non-symmetric (folded) tensor internal kernel More...
 
double est_time_transp (int order, int const *new_order, int const *edge_len, int dir, algstrct const *sr)
 estimates time needed to transposes a non-symmetric (folded) tensor based on performance models More...
 
void nosym_transpose_hptt (int order, int const *edge_len, int dir, tensor *&A)
 High-performance implementation of nosym_transpose using HPTT. More...
 
void pad_key (int order, int64_t num_pair, int const *edge_len, int const *padding, PairIterator pairs, algstrct const *sr, int const *offsets=NULL)
 applies padding to keys More...
 
void depad_tsr (int order, int64_t num_pair, int const *edge_len, int const *sym, int const *padding, int const *prepadding, char const *pairsb, char *new_pairsb, int64_t *new_num_pair, algstrct const *sr)
 retrieves the unpadded pairs More...
 
void zero_padding (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int const *cphase_rank, char *vdata, algstrct const *sr)
 sets to zero all values in padded region of tensor More...
 
void scal_diag (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int const *cphase_rank, char *vdata, algstrct const *sr, int const *sym_mask)
 scales each element by 1/(number of entries equivalent to it after permutation of indices for which sym_mask is 1) More...
 
void padded_reshuffle (int const *sym, distribution const &old_dist, distribution const &new_dist, char *tsr_data, char **tsr_cyclic_data, algstrct const *sr, CommData ord_glb_comm)
 Reshuffle elements using key-value pair read/write. More...
 
int ** compute_bucket_offsets (distribution const &old_dist, distribution const &new_dist, int const *len, int const *old_phys_edge_len, int const *old_virt_lda, int const *old_offsets, int *const *old_permutation, int const *new_phys_edge_len, int const *new_virt_lda, int forward, int old_virt_np, int new_virt_np, int const *old_virt_edge_len)
 computes offsets for redistribution targets along each edge length More...
 
void calc_cnt_displs (int const *sym, distribution const &old_dist, distribution const &new_dist, int new_nvirt, int np, int const *old_virt_edge_len, int const *new_virt_lda, int64_t *send_counts, int64_t *recv_counts, int64_t *send_displs, int64_t *recv_displs, CommData ord_glb_comm, int idx_lyr, int *const *bucket_offset)
 assigns keys to an array of values More...
 
double blres_est_time (int64_t tot_sz, int nv0, int nv1)
 estimates execution time, given this processor sends a receives tot_sz across np procs More...
 
void block_reshuffle (distribution const &old_dist, distribution const &new_dist, char *tsr_data, char *&tsr_cyclic_data, algstrct const *sr, CommData glb_comm)
 Reshuffle elements by block given the global phases stay the same. More...
 
int can_block_reshuffle (int order, int const *old_phase, mapping const *map)
 determines if tensor can be permuted by block More...
 
void permute_keys (int order, int num_pair, int const *edge_len, int const *new_edge_len, int *const *permutation, char *pairs, int64_t *new_num_pair, algstrct const *sr)
 permutes keys More...
 
void depermute_keys (int order, int num_pair, int const *edge_len, int const *new_edge_len, int *const *permutation, char *pairs, algstrct const *sr)
 depermutes keys (apply P^T) More...
 
void assign_keys (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *phase, int const *phys_phase, int const *virt_dim, int *phase_rank, char const *vdata, char *vpairs, algstrct const *sr)
 assigns keys to an array of values More...
 
void spsfy_tsr (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *phase, int const *phys_phase, int const *virt_dim, int *phase_rank, char const *vdata, char *&vpairs, int64_t *nnz_blk, algstrct const *sr, int64_t const *edge_lda, std::function< bool(char const *)> f)
 extracts all tensor values (in pair format) that pass a sparsifier function (including padded zeros if they pass the fliter) More...
 
void bucket_by_pe (int order, int64_t num_pair, int64_t np, int const *phys_phase, int const *virt_phase, int const *bucket_lda, int const *edge_len, ConstPairIterator mapped_data, int64_t *bucket_counts, int64_t *bucket_off, PairIterator bucket_data, algstrct const *sr)
 buckets key-value pairs by processor according to distribution More...
 
int64_t * bucket_by_virt (int order, int num_virt, int64_t num_pair, int const *phys_phase, int const *virt_phase, int const *edge_len, ConstPairIterator mapped_data, PairIterator bucket_data, algstrct const *sr)
 buckets key value pairs by block/virtual-processor More...
 
void readwrite (int order, int64_t size, char const *alpha, char const *beta, int nvirt, int const *edge_len, int const *sym, int const *phase, int const *phys_phase, int const *virt_dim, int *phase_rank, char *vdata, char *pairs, char rw, algstrct const *sr)
 read or write pairs from / to tensor More...
 
void wr_pairs_layout (int order, int np, int64_t inwrite, char const *alpha, char const *beta, char rw, int num_virt, int const *sym, int const *edge_len, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int *virt_phys_rank, int const *bucket_lda, char *wr_pairs_buf, char *rw_data, CommData glb_comm, algstrct const *sr, bool is_sparse, int64_t nnz_loc, int64_t *nnz_blk, char *&pprs_new, int64_t &nnz_loc_new)
 read or write pairs from / to tensor More...
 
void read_loc_pairs (int order, int64_t nval, int num_virt, int const *sym, int const *edge_len, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int *phase_rank, int64_t *nread, char const *data, char **pairs, algstrct const *sr)
 read tensor pairs local to processor More...
 
void sp_read (algstrct const *sr, int64_t ntsr, ConstPairIterator prs_tsr, char const *alpha, int64_t nread, PairIterator prs_read, char const *beta)
 reads elements of a sparse set defining the tensor, into a sparse read set with potentially repeating keys More...
 
void sp_write (int num_virt, algstrct const *sr, int64_t *vntsr, ConstPairIterator vprs_tsr, char const *beta, int64_t *vnwrite, ConstPairIterator vprs_write, char const *alpha, int64_t *vnnew, char *&pprs_new)
 writes pairs in a sparse write set to the sparse set of elements defining the tensor, resulting in a set of size between ntsr and ntsr+nwrite More...
 
void inv_idx (int order_A, int const *idx_A, int *order_tot, int **idx_arr)
 invert index map More...
 
int strip_diag (int order, int order_tot, int const *idx_map, int64_t vrt_sz, mapping const *edge_map, topology const *topo, algstrct const *sr, int *blk_edge_len, int64_t *blk_sz, strp_tsr **stpr)
 build stack required for stripping out diagonals of tensor More...
 
int sym_seq_scl_ref (char const *alpha, char *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A)
 performs symmetric scaling using algstrct const * sr_A More...
 
int sym_seq_scl_cust (char const *alpha, char *A, algstrct const *sr_A, int const order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, endomorphism const *func)
 performs symmetric scaling using custom func More...
 
void inc_tot_mem_used (int64_t a)
 
void set_mem_size (int64_t size)
 sets what fraction of the memory capacity CTF can use More...
 
void set_memcap (double cap)
 sets what fraction of the memory capacity CTF can use More...
 
std::list< mem_transfercontract_mst ()
 gets rid of empty space on the stack More...
 
std::list< mem_loc > * get_mst ()
 
void mst_create (int64_t size)
 initializes stack buffer More...
 
void mem_create ()
 create instance of memory manager More...
 
void mem_exit (int rank)
 exit instance of memory manager More...
 
int mst_free (void *ptr)
 frees buffer allocated on stack More...
 
int untag_mem (void *ptr)
 stops tracking memory allocated by CTF, so user doesn't have to call free More...
 
int cdealloc (void *ptr, int const tid)
 free abstraction More...
 
int cdealloc_cond (void *ptr)
 free abstraction (conditional (no error if not found)) More...
 
int get_num_instances ()
 
int64_t proc_bytes_used ()
 gives total memory used on this MPI process More...
 
int64_t proc_bytes_total ()
 gives total memory size per MPI process More...
 
int64_t proc_bytes_available ()
 gives total memory available on this MPI process More...
 
std::vector< Model * > & get_all_models ()
 
void print_all_models ()
 
void load_all_models (std::string file_name)
 
void write_all_models (std::string file_name)
 
void dump_all_models (std::string path)
 
double cddot (int n, const double *dX, int incX, const double *dY, int incY)
 
void cdgeqrf (int const M, int const N, double *A, int const LDA, double *TAU2, double *WORK, int const LWORK, int *INFO)
 
void cdormqr (char SIDE, char TRANS, int M, int N, int K, double const *A, int LDA, double const *TAU2, double *C, int LDC, double *WORK, int LWORK, int *INFO)
 
void cdgelsd (int m, int n, int k, double const *A, int lda_A, double *B, int lda_B, double *S, double cond, int *rank, double *work, int lwork, int *iwork, int *info)
 
template<int nparam>
bool comp_time_param (const time_param< nparam > &a, const time_param< nparam > &b)
 
void offload_init ()
 initialize offloading, e.g. create cublas More...
 
void offload_exit ()
 exit offloading, e.g. destroy cublas More...
 
double estimate_download_time (int64_t size)
 estimate time it takes to upload More...
 
double estimate_upload_time (int64_t size)
 estimate time it takes to download More...
 
void host_pinned_alloc (void **ptr, int64_t size)
 allocate a pinned host buffer More...
 
void host_pinned_free (void *ptr)
 free a pinned host buffer More...
 
template<typename dtype >
void offload_gemm (char tA, char tB, int m, int n, int k, dtype alpha, offload_tsr &A, int lda_A, offload_tsr &B, int lda_B, dtype beta, offload_tsr &C, int lda_C)
 
template<typename dtype >
void offload_gemm (char tA, char tB, int m, int n, int k, dtype alpha, dtype const *dev_A, int lda_A, dtype const *dev_B, int lda_B, dtype beta, dtype *dev_C, int lda_C)
 
void calc_idx_arr (int order, int const *lens, int const *sym, int64_t idx, int *idx_arr)
 
void sy_calc_idx_arr (int order, int const *lens, int const *sym, int64_t idx, int *idx_arr)
 same as above except assumes sym only NS or SY More...
 
void permute (int order, int const *perm, int *arr)
 permute an array More...
 
void permute_target (int order, int const *perm, int *arr)
 permutes a permutation array More...
 
void socopy (int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t *&sizes_b, int64_t *&offsets_b)
 
void spcopy (int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t const *offsets_a, char const *a, int64_t const *sizes_b, int64_t const *offsets_b, char *b)
 
int64_t fact (int64_t n)
 
int64_t choose (int64_t n, int64_t k)
 
void get_choice (int64_t n, int64_t k, int64_t ch, int *chs)
 
int64_t chchoose (int64_t n, int64_t k)
 
int64_t getTotalSystemMemory ()
 
int free_cond (void *ptr)
 
int gcd (int a, int b)
 
int lcm (int a, int b)
 
void lda_cpy (int el_size, int nrow, int ncol, int lda_A, int lda_B, const char *A, char *B)
 Copies submatrix to submatrix (column-major) More...
 
void coalesce_bwd (int el_size, char *B, char const *B_aux, int k, int n, int kb)
 we receive a contiguous buffer kb-by-n B and (k-kb)-by-n B_aux which is the block below. To get a k-by-n buffer, we need to combine this buffer with our original block. Since we are working with column-major ordering we need to interleave the blocks. Thats what this function does. More...
 
int64_t get_coo_size (int64_t nnz, int val_size)
 
int64_t get_csr_size (int64_t nnz, int nrow, int val_size)
 computes the size of a serialized CSR matrix More...
 
template<int idim>
void spA_dnB_seq_sum_loop (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
 
template<>
void spA_dnB_seq_sum_loop< 0 > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
 
template void spA_dnB_seq_sum_loop< MAX_ORD > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
 
void spA_dnB_seq_sum (char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, univar_function const *func)
 performs summation between two sparse tensors assumes A contains key value pairs sorted by key, with index permutation preapplied and with no repeated indices More...
 
void dnA_spB_seq_sum (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, char const *beta, char const *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func)
 performs summation between two sparse tensors assumes B contain key value pairs sorted by key, with index permutation preapplied and with no repeated indices More...
 
void spspsum (algstrct const *sr_A, int64_t nA, ConstPairIterator prs_A, char const *beta, algstrct const *sr_B, int64_t nB, ConstPairIterator prs_B, char const *alpha, int64_t &nnew, char *&pprs_new, univar_function const *func, int64_t map_pfx)
 As pairs in a sparse A set to the sparse set of elements defining the tensor, resulting in a set of size between nB and nB+nA. More...
 
void spA_spB_seq_sum (char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func, int64_t map_pfx)
 performs summation between two sparse tensors assumes A and B contain key value pairs sorted by key, with index permutation preapplied and with no repeated indices More...
 
void inv_idx (int order_A, int const *idx_A, int order_B, int const *idx_B, int *order_tot, int **idx_arr)
 invert index map More...
 
template<int idim>
void sym_seq_sum_loop (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
template<>
void sym_seq_sum_loop< 0 > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
template void sym_seq_sum_loop< MAX_ORD > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max)
 
void compute_syoffs (algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, int tot_order, int const *rev_idx_map, uint64_t **&offsets_A, uint64_t **&offsets_B)
 
int sym_seq_sum_ref (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B)
 performs symmetric contraction with unblocked reference kernel More...
 
int sym_seq_sum_inr (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, int inr_stride)
 performs symmetric summation with blocked daxpy More...
 
int sym_seq_sum_cust (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, univar_function const *func)
 performs symmetric summation with custom elementwise function More...
 
void desymmetrize (tensor *sym_tsr, tensor *nonsym_tsr, bool is_C)
 unfolds the data of a tensor More...
 
void symmetrize (tensor *sym_tsr, tensor *nonsym_tsr)
 folds the data of a tensor More...
 
void cmp_sym_perms (int ndim, int const *sym, int *nperm, int **perm, double *sign)
 finds all permutations of a tensor according to a symmetry More...
 
void order_perm (tensor const *A, tensor const *B, int *idx_arr, int off_A, int off_B, int *idx_A, int *idx_B, int &add_sign, int &mod)
 orders the summation indices of one tensor that don't break summation symmetries More...
 
void order_perm (tensor const *A, tensor const *B, tensor const *C, int *idx_arr, int off_A, int off_B, int off_C, int *idx_A, int *idx_B, int *idx_C, int &add_sign, int &mod)
 orders the contraction indices of one tensor that don't break contraction symmetries More...
 
void add_sym_perm (std::vector< summation > &perms, std::vector< int > &signs, summation const &new_perm, int new_sign)
 puts a summation map into a nice ordering according to preserved symmetries, and adds it if it is distinct More...
 
void add_sym_perm (std::vector< contraction > &perms, std::vector< int > &signs, contraction const &new_perm, int new_sign)
 puts a contraction map into a nice ordering according to preserved symmetries, and adds it if it is distinct More...
 
void get_sym_perms (summation const &sum, std::vector< summation > &perms, std::vector< int > &signs)
 finds all permutations of a summation that must be done for a broken symmetry More...
 
void get_sym_perms (contraction const &ctr, std::vector< contraction > &perms, std::vector< int > &signs)
 finds all permutations of a contraction that must be done for a broken symmetry More...
 
void depin (algstrct const *sr, int order, int const *lens, int const *divisor, int nvirt, int const *virt_dim, int const *phys_rank, char *X, int64_t &new_nnz_B, int64_t *nnz_blk, char *&new_B, bool check_padding)
 depins keys of n pairs More...
 
double spredist_est_time (int64_t size, int np)
 
template<typename dtype >
void abs_helper (tensor *A, tensor *B)
 
template<typename dtype >
void pow_helper (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template<typename dtype >
void all_helper (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template<typename dtype >
void conj_helper (tensor *A, tensor *B)
 
template<typename dtype >
void get_real (tensor *A, tensor *B)
 
template<typename dtype >
void get_imag (tensor *A, tensor *B)
 
template<typename dtype >
void set_real (tensor *A, tensor *B)
 
template<typename dtype >
void set_imag (tensor *A, tensor *B)
 
template<typename dtype >
void any_helper (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
int64_t sum_bool_tsr (tensor *A)
 sum all 1 values in boolean tensor More...
 
void subsample (tensor *A, double probability)
 extract a sample of the entries (if sparse of the current nonzeros) More...
 
void matrix_qr (tensor *A, tensor *Q, tensor *R)
 
void matrix_qr_cmplx (tensor *A, tensor *Q, tensor *R)
 
void matrix_svd (tensor *A, tensor *U, tensor *S, tensor *VT, int rank)
 
void matrix_svd_cmplx (tensor *A, tensor *U, tensor *S, tensor *VT, int rank)
 
void conv_type (int type_idx1, int type_idx2, tensor *A, tensor *B)
 convert tensor from one type to another More...
 
template void conj_helper< float > (tensor *A, tensor *B)
 
template void conj_helper< double > (tensor *A, tensor *B)
 
template void set_real< float > (tensor *A, tensor *B)
 
template void set_imag< float > (tensor *A, tensor *B)
 
template void set_real< double > (tensor *A, tensor *B)
 
template void set_imag< double > (tensor *A, tensor *B)
 
template void get_real< float > (tensor *A, tensor *B)
 
template void get_imag< float > (tensor *A, tensor *B)
 
template void get_real< double > (tensor *A, tensor *B)
 
template void get_imag< double > (tensor *A, tensor *B)
 
template void tensor::compare_elementwise< std::complex< double > > (tensor *A, tensor *B)
 
template void tensor::compare_elementwise< std::complex< float > > (tensor *A, tensor *B)
 
template void abs_helper< std::complex< double > > (tensor *A, tensor *B)
 
template void abs_helper< std::complex< float > > (tensor *A, tensor *B)
 
template void abs_helper< double > (tensor *A, tensor *B)
 
template void abs_helper< float > (tensor *A, tensor *B)
 
template void abs_helper< int64_t > (tensor *A, tensor *B)
 
template void abs_helper< bool > (tensor *A, tensor *B)
 
template void abs_helper< int32_t > (tensor *A, tensor *B)
 
template void abs_helper< int16_t > (tensor *A, tensor *B)
 
template void abs_helper< int8_t > (tensor *A, tensor *B)
 
template void pow_helper< std::complex< double > > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< std::complex< float > > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< double > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< float > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< int64_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< bool > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< int32_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< int16_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void pow_helper< int8_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C)
 
template void all_helper< std::complex< double > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< std::complex< float > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< int64_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< double > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< float > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< bool > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< int32_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< int16_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void all_helper< int8_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< std::complex< double > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< std::complex< float > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< double > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< float > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< int64_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< bool > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< int32_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< int16_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 
template void any_helper< int8_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B)
 

Variables

LinModel< 3 > seq_tsr_ctr_mdl_cst (seq_tsr_ctr_mdl_cst_init,"seq_tsr_ctr_mdl_cst")
 
LinModel< 3 > seq_tsr_ctr_mdl_ref (seq_tsr_ctr_mdl_ref_init,"seq_tsr_ctr_mdl_ref")
 
LinModel< 3 > seq_tsr_ctr_mdl_inr (seq_tsr_ctr_mdl_inr_init,"seq_tsr_ctr_mdl_inr")
 
LinModel< 3 > seq_tsr_ctr_mdl_off (seq_tsr_ctr_mdl_off_init,"seq_tsr_ctr_mdl_off")
 
LinModel< 3 > seq_tsr_ctr_mdl_cst_inr (seq_tsr_ctr_mdl_cst_inr_init,"seq_tsr_ctr_mdl_cst_inr")
 
LinModel< 3 > seq_tsr_ctr_mdl_cst_off (seq_tsr_ctr_mdl_cst_off_init,"seq_tsr_ctr_mdl_cst_off")
 
LinModel< 3 > seq_tsr_spctr_cst_off_k0 (seq_tsr_spctr_cst_off_k0_init,"seq_tsr_spctr_cst_off_k0")
 
LinModel< 3 > seq_tsr_spctr_cst_off_k1 (seq_tsr_spctr_cst_off_k1_init,"seq_tsr_spctr_cst_off_k1")
 
LinModel< 3 > seq_tsr_spctr_cst_off_k2 (seq_tsr_spctr_cst_off_k2_init,"seq_tsr_spctr_cst_off_k2")
 
LinModel< 3 > seq_tsr_spctr_off_k0 (seq_tsr_spctr_off_k0_init,"seq_tsr_spctr_off_k0")
 
LinModel< 3 > seq_tsr_spctr_off_k1 (seq_tsr_spctr_off_k1_init,"seq_tsr_spctr_off_k1")
 
LinModel< 3 > seq_tsr_spctr_off_k2 (seq_tsr_spctr_off_k2_init,"seq_tsr_spctr_off_k2")
 
LinModel< 3 > seq_tsr_spctr_cst_k0 (seq_tsr_spctr_cst_k0_init,"seq_tsr_spctr_cst_k0")
 
LinModel< 3 > seq_tsr_spctr_cst_k1 (seq_tsr_spctr_cst_k1_init,"seq_tsr_spctr_cst_k1")
 
LinModel< 3 > seq_tsr_spctr_cst_k2 (seq_tsr_spctr_cst_k2_init,"seq_tsr_spctr_cst_k2")
 
LinModel< 3 > seq_tsr_spctr_cst_k3 (seq_tsr_spctr_cst_k3_init,"seq_tsr_spctr_cst_k3")
 
LinModel< 3 > seq_tsr_spctr_cst_k4 (seq_tsr_spctr_cst_k4_init,"seq_tsr_spctr_cst_k4")
 
LinModel< 3 > seq_tsr_spctr_k0 (seq_tsr_spctr_k0_init,"seq_tsr_spctr_k0")
 
LinModel< 3 > seq_tsr_spctr_k1 (seq_tsr_spctr_k1_init,"seq_tsr_spctr_k1")
 
LinModel< 3 > seq_tsr_spctr_k2 (seq_tsr_spctr_k2_init,"seq_tsr_spctr_k2")
 
LinModel< 3 > seq_tsr_spctr_k3 (seq_tsr_spctr_k3_init,"seq_tsr_spctr_k3")
 
LinModel< 3 > seq_tsr_spctr_k4 (seq_tsr_spctr_k4_init,"seq_tsr_spctr_k4")
 
LinModel< 2 > pin_keys_mdl (pin_keys_mdl_init,"pin_keys_mdl")
 
std::mersenne_twister_engine< std::uint_fast64_t, 64, 312, 156, 31, 0xb5026f5aa96619e9, 29, 0x5555555555555555, 17, 0x71d67fffeda60000, 37, 0xfff7eee000000000, 43, 6364136223846793005 > rng
 
LinModel< 3 > alltoall_mdl (alltoall_mdl_init,"alltoall_mdl")
 
LinModel< 3 > alltoallv_mdl (alltoallv_mdl_init,"alltoallv_mdl")
 
LinModel< 3 > red_mdl (red_mdl_init,"red_mdl")
 
LinModel< 3 > red_mdl_cst (red_mdl_cst_init,"red_mdl_cst")
 
LinModel< 3 > allred_mdl (allred_mdl_init,"allred_mdl")
 
LinModel< 3 > allred_mdl_cst (allred_mdl_cst_init,"allred_mdl_cst")
 
LinModel< 3 > bcast_mdl (bcast_mdl_init,"bcast_mdl")
 
int64_t total_flop_count = 0
 
MPI_Datatype MPI_CTF_DOUBLE_COMPLEX = MPI_CXX_DOUBLE_COMPLEX
 
CTF::Ring< float > float_ring = CTF::Ring<float>()
 
CTF::Ring< double > double_ring = CTF::Ring<double>()
 
CTF::Ring< int > int_ring = CTF::Ring<int>()
 
CTF::Ring< int64_t > int64_t_ring = CTF::Ring<int64_t>()
 
MPI_Datatype MPI_CTF_BOOL = MPI_CXX_BOOL
 
MPI_Datatype MPI_CTF_LONG_DOUBLE_COMPLEX = MPI_CXX_LONG_DOUBLE_COMPLEX
 
std::set< grid_wrapperscalapack_grids
 index for ScaLAPACK processor grids More...
 
LinModel< 3 > dgtog_res_mdl (dgtog_res_mdl_init,"dgtog_res_mdl")
 
LinModel< 2 > long_contig_transp_mdl (long_contig_transp_mdl_init,"long_contig_transp_mdl")
 
LinModel< 2 > shrt_contig_transp_mdl (shrt_contig_transp_mdl_init,"shrt_contig_transp_mdl")
 
LinModel< 2 > non_contig_transp_mdl (non_contig_transp_mdl_init,"non_contig_transp_mdl")
 
LinModel< 2 > blres_mdl (blres_mdl_init,"blres_mdl")
 
double seq_tsr_spctr_cst_off_k0_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_cst_off_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_cst_off_k2_init [] = {-2.1996E-04, 3.1883E-09, 3.8743E-11}
 
double seq_tsr_spctr_off_k0_init [] = {8.6970E-06, 4.5598E-11, 1.1544E-09}
 
double seq_tsr_spctr_off_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_off_k2_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_cst_k0_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_cst_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_cst_k2_init [] = {-8.8459E-08, 8.1207E-10, -2.8486E-12}
 
double seq_tsr_spctr_cst_k3_init [] = {1.8504E-08, 2.9154E-11, 2.1973E-11}
 
double seq_tsr_spctr_cst_k4_init [] = {2.0948E-05, 1.2294E-09, 8.0037E-10}
 
double seq_tsr_spctr_k0_init [] = {2.2620E-08, -5.7494E-10, 2.2146E-09}
 
double seq_tsr_spctr_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10}
 
double seq_tsr_spctr_k2_init [] = {3.0917E-08, 5.2181E-11, 4.1634E-12}
 
double seq_tsr_spctr_k3_init [] = {7.2456E-08, 1.5128E-10, -1.5528E-12}
 
double seq_tsr_spctr_k4_init [] = {1.6880E-07, 4.9411E-10, 9.2847E-13}
 
double pin_keys_mdl_init [] = {3.1189E-09, 6.6717E-08}
 
double seq_tsr_ctr_mdl_cst_init [] = {5.1626E-06, -6.3215E-11, 3.9638E-09}
 
double seq_tsr_ctr_mdl_ref_init [] = {4.9138E-08, 5.8290E-10, 4.8575E-11}
 
double seq_tsr_ctr_mdl_inr_init [] = {2.0647E-08, 1.9721E-10, 2.9948E-11}
 
double seq_tsr_ctr_mdl_off_init [] = {6.2925E-05, 1.7449E-11, 1.7211E-12}
 
double seq_tsr_ctr_mdl_cst_inr_init [] = {1.3863E-04, 2.0119E-10, 9.8820E-09}
 
double seq_tsr_ctr_mdl_cst_off_init [] = {8.4844E-04, -5.9246E-11, 3.5247E-10}
 
double long_contig_transp_mdl_init [] = {2.9158E-10, 3.0501E-09}
 
double shrt_contig_transp_mdl_init [] = {1.3427E-08, 4.3168E-09}
 
double non_contig_transp_mdl_init [] = {4.0475E-08, 4.0463E-09}
 
double dgtog_res_mdl_init [] = {2.9786E-05, 2.4335E-04, 1.0845E-08}
 
double blres_mdl_init [] = {1.0598E-05, 7.2741E-08}
 
double alltoall_mdl_init [] = {1.0000E-06, 1.0000E-06, 5.0000E-10}
 
double alltoallv_mdl_init [] = {2.7437E-06, 2.2416E-05, 1.0469E-08}
 
double red_mdl_init [] = {6.2935E-07, 4.6276E-06, 9.2245E-10}
 
double red_mdl_cst_init [] = {5.7302E-07, 4.7347E-06, 6.0191E-10}
 
double allred_mdl_init [] = {8.4416E-07, 6.8651E-06, 3.5845E-08}
 
double allred_mdl_cst_init [] = {-3.3754E-04, 2.1343E-04, 3.0801E-09}
 
double bcast_mdl_init [] = {1.5045E-06, 1.4485E-05, 3.2876E-09}
 
double spredist_mdl_init [] = {1.2744E-04, 1.0278E-03, 7.6837E-08}
 
double csrred_mdl_init [] = {3.7005E-05, 1.1854E-04, 5.5165E-09}
 
double csrred_mdl_cst_init [] = {-1.8323E-04, 1.3076E-04, 2.8732E-09}
 
double upload_mdl_init []
 
double download_mdl_init []
 
double memcap = 0.5
 
int64_t mem_size = 0
 
int max_threads
 
int instance_counter = 0
 
int64_t mem_used [MAX_THREADS]
 
int64_t tot_mem_used
 
int64_t tot_mem_available = -1
 
std::list< mem_locmem_stacks [MAX_THREADS]
 
void * mst_buffer = 0
 
int64_t mst_buffer_size = 0
 
int64_t mst_buffer_used = 0
 
int64_t mst_buffer_ptr = 0
 
std::list< mem_locmst
 
char * cpy_buffer [CPY_BUFFER_SIZE]
 
LinModel< 3 > csrred_mdl (csrred_mdl_init,"csrred_mdl")
 
LinModel< 3 > csrred_mdl_cst (csrred_mdl_cst_init,"csrred_mdl_cst")
 
LinModel< 3 > spredist_mdl (spredist_mdl_init,"spredist_mdl")
 

Typedef Documentation

typedef bool CTF_int::TYPE1

Definition at line 6 of file ctf_ext.cxx.

typedef int CTF_int::TYPE2

Definition at line 7 of file ctf_ext.cxx.

typedef int64_t CTF_int::TYPE3

Definition at line 8 of file ctf_ext.cxx.

typedef float CTF_int::TYPE4

Definition at line 9 of file ctf_ext.cxx.

typedef double CTF_int::TYPE5

Definition at line 10 of file ctf_ext.cxx.

typedef std::complex<float> CTF_int::TYPE6

Definition at line 11 of file ctf_ext.cxx.

typedef std::complex<double> CTF_int::TYPE7

Definition at line 12 of file ctf_ext.cxx.

typedef int16_t CTF_int::TYPE8

Definition at line 13 of file ctf_ext.cxx.

typedef int8_t CTF_int::TYPE9

Definition at line 14 of file ctf_ext.cxx.

Enumeration Type Documentation

anonymous enum
Enumerator
SUCCESS 
ERROR 
NEGATIVE 

Definition at line 97 of file common.h.

Enumerator
NOT_MAPPED 
PHYSICAL_MAP 
VIRTUAL_MAP 

Definition at line 13 of file mapping.h.

Enumerator
TOPOLOGY_GENERIC 
TOPOLOGY_BGP 
TOPOLOGY_BGQ 
TOPOLOGY_8D 
NO_TOPOLOGY 

Definition at line 10 of file topology.h.

Function Documentation

template<typename dtype >
void CTF_int::abs_helper ( tensor A,
tensor B 
)

absolute value function

Parameters
[in]Atensor, param[in,out] B tensor (becomes absolute value of A)
Returns
None

Definition at line 17 of file ctf_ext.cxx.

References ctf.core::a, ctf.core::abs(), ctf.core::dtype, and CTF_int::tensor::order.

template void CTF_int::abs_helper< bool > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< double > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< float > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< int16_t > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< int32_t > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< int64_t > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< int8_t > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::abs_helper< std::complex< double > > ( tensor A,
tensor B 
)
template void CTF_int::abs_helper< std::complex< float > > ( tensor A,
tensor B 
)
void CTF_int::add_sym_perm ( std::vector< summation > &  perms,
std::vector< int > &  signs,
summation const &  new_perm,
int  new_sign 
)

puts a summation map into a nice ordering according to preserved symmetries, and adds it if it is distinct

Parameters
[in,out]permsthe permuted summation specifications
[in,out]signssign of each summation
[in]new_permsummation signature
[in]new_signalpha

Definition at line 549 of file symmetrization.cxx.

References CTF_int::summation::A, align_symmetric_indices(), CTF_int::summation::B, cdealloc(), CTF_int::summation::idx_A, CTF_int::summation::idx_B, inv_idx(), CTF_int::tensor::order, order_perm(), and CTF_int::tensor::sym.

void CTF_int::add_sym_perm ( std::vector< contraction > &  perms,
std::vector< int > &  signs,
contraction const &  new_perm,
int  new_sign 
)

puts a contraction map into a nice ordering according to preserved symmetries, and adds it if it is distinct

Parameters
[in,out]permsthe permuted contraction specifications
[in,out]signssign of each contraction
[in]new_permcontraction signature
[in]new_signalpha

Definition at line 593 of file symmetrization.cxx.

References CTF_int::contraction::A, align_symmetric_indices(), CTF_int::contraction::B, CTF_int::contraction::C, cdealloc(), CTF_int::contraction::idx_A, CTF_int::contraction::idx_B, CTF_int::contraction::idx_C, inv_idx(), CTF_int::tensor::order, order_perm(), and CTF_int::tensor::sym.

Referenced by get_sym_perms().

template<typename dtype >
void CTF_int::all_helper ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

all function

Parameters
[in]Atensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B
Returns
None

Definition at line 33 of file ctf_ext.cxx.

References ctf.core::a, and ctf.core::dtype.

template void CTF_int::all_helper< bool > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< double > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< float > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< int16_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< int32_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< int64_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< int8_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::all_helper< std::complex< double > > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)
template void CTF_int::all_helper< std::complex< float > > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)
void * CTF_int::alloc ( int64_t const  len)

alloc abstraction

Parameters
[in]lennumber of bytes

Definition at line 365 of file memcontrol.cxx.

References alloc_ptr(), ASSERT, and SUCCESS.

Referenced by CTF_int::algstrct::alloc(), calc_drv_displs(), calc_sy_pfx(), calc_sy_pfx< 1 >(), compute_syoffs(), CTF_int::contraction::contraction(), conv_idx(), CTF_int::COO_Matrix::COO_Matrix(), CTF_int::CSR_Matrix::csr_add(), CTF_int::CSR_Matrix::CSR_Matrix(), CTF_int::algstrct::csr_reduce(), CTF::Bivar_Function< dtype_A, dtype_B, dtype_C >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr_old(), CTF_int::ctr_virt::ctr_virt(), cvrt_idx(), depad_tsr(), depermute_keys(), depin(), desymmetrize(), dgtog_reshuffle(), CTF_int::summation::estimate_time(), factorize(), CTF::Semiring< dtype, is_ord >::gen_csrmultcsr(), CTF_int::COO_Matrix::get_data(), get_full_intm(), get_grp_ptrs(), get_len_ordering(), CTF::Tensor< dtype >::get_local_data(), get_phys_topo(), CTF_int::algstrct::has_mul(), CTF::Idx_Tensor::Idx_Tensor(), CTF_int::tensor::init(), inv_idx(), CTF_int::LinModel< nparam >::LinModel(), CTF_int::Term::operator-(), CTF::Partition::operator=(), order_globally(), CTF_int::tensor::orient_subworld(), CTF_int::algstrct::pair_alloc(), CTF::Partition::Partition(), peel_torus(), CTF::Function_timer::print(), CTF::print_timers(), read_data_mpiio(), CTF_int::tensor::read_dense_from_file(), CTF::Tensor< dtype >::read_local(), CTF_int::tensor::read_local(), CTF_int::tensor::read_local_nnz(), CTF_int::tensor::redistribute(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::tspsum_virt::run(), CTF_int::tspsum_replicate::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_permute::run(), CTF_int::algstrct::safecopy(), CTF::Semiring< dtype, is_ord >::safemul(), CTF_int::scaling::scaling(), CTF_int::scl_virt::scl_virt(), CTF_int::tensor::self_reduce(), CTF_int::seq_tsr_ctr::seq_tsr_ctr(), CTF_int::seq_tsr_scl::seq_tsr_scl(), CTF_int::seq_tsr_spctr::seq_tsr_spctr(), CTF_int::seq_tsr_spsum::seq_tsr_spsum(), CTF_int::seq_tsr_sum::seq_tsr_sum(), serialize_sparse_tensor_data(), CTF_int::COO_Matrix::set_data(), CTF_int::tensor::set_name(), CTF_int::tensor::set_zero(), CTF_int::tensor::slice(), CTF::Tensor< dtype >::slice(), socopy(), spA_dnB_dnC_seq_ctr(), CTF_int::tensor::sparsify(), CTF_int::spctr_pin_keys::spctr_pin_keys(), CTF_int::spctr_virt::spctr_virt(), CTF_int::tensor::spmatricize(), spsfy_tsr(), CTF_int::summation::summation(), CTF::Matrix< dtype >::svd(), sym_seq_ctr_cust(), sym_seq_ctr_inr(), sym_seq_ctr_ref(), sym_seq_scl_cust(), sym_seq_scl_ref(), sym_seq_sum_cust(), sym_seq_sum_inr(), sym_seq_sum_ref(), symmetrize(), CTF_int::tensor::tensor(), CTF_int::topology::topology(), CTF_int::tspsum::tspsum(), CTF_int::tspsum_map::tspsum_map(), CTF_int::tspsum_permute::tspsum_permute(), CTF_int::tspsum_pin_keys::tspsum_pin_keys(), CTF_int::tspsum_virt::tspsum_virt(), CTF_int::tsum_virt::tsum_virt(), CTF_int::LinModel< nparam >::update(), and CTF_int::tensor::write_dense_to_file().

int CTF_int::alloc_ptr ( int64_t const  len_,
void **const  ptr 
)

alloc abstraction

Parameters
[in]len_number of bytes
[in,out]ptrpointer to set to new allocation address

Definition at line 320 of file memcontrol.cxx.

References ALIGN_BYTES, ASSERT, CTF_int::mem_loc::len, MAX, CTF_int::mem_loc::ptr, and SUCCESS.

Referenced by alloc(), assign_keys(), bcast_step(), block_reshuffle(), bucket_by_pe(), bucket_by_virt(), calc_cnt_displs(), calc_fold_lnmk(), CTF_int::tensor::calc_phase(), check_self_mapping(), cmp_sym_perms(), CTF_int::tensor::compare(), compute_bucket_offsets(), CTF_int::tensor::copy_tensor_data(), CTF::Monoid< dtype, is_ord >::csr_add(), CTF_int::ctr_replicate::ctr_replicate(), cyclic_reshuffle(), depad_tsr(), depin(), desymmetrize(), dgtog_reshuffle(), CTF_int::distribution::distribution(), CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::tensor::extract_diag(), extract_free_comms(), CTF_int::tensor::fold(), get_len_ordering(), glb_cyclic_reshuffle(), glb_ord_pup(), CTF_int::tensor::init(), CTF_int::summation::is_equal(), map_self_indices(), CTF_int::tensor::map_tensor_rem(), morph_topo(), mst_alloc_ptr(), nosym_transpose(), pad_cyclic_pup_virt_buff(), padded_reshuffle(), CTF_int::CSR_Matrix::partition(), permute(), permute_target(), CTF_int::ConstPairIterator::pin(), precompute_offsets(), CTF_int::summation::print(), CTF_int::tensor::print(), CTF_int::tensor::read_all_pairs(), read_loc_pairs(), CTF_int::tensor::read_local(), readwrite(), reduce_step_post(), reduce_step_pre(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_map::run(), CTF_int::tspsum_permute::run(), scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::seq_tsr_ctr::seq_tsr_ctr(), CTF_int::seq_tsr_spctr::seq_tsr_spctr(), CTF_int::distribution::serialize(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_zero(), CTF_int::tensor::sparsify(), CTF_int::spctr_replicate::spctr_replicate(), CTF_int::tensor::spmatricize(), spsfy_tsr(), strip_diag(), CTF_int::summation::sum_tensors(), symmetrize(), CTF_int::tspsum_replicate::tspsum_replicate(), CTF_int::tsum_replicate::tsum_replicate(), CTF_int::tensor::unfold(), wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::tensor::zero_out_padding(), and zero_padding().

template<typename dtype >
void CTF_int::any_helper ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

any function

Parameters
[in]Atensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B
Returns
None

Definition at line 88 of file ctf_ext.cxx.

References ctf.core::a, and ctf.core::dtype.

template void CTF_int::any_helper< bool > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< double > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< float > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< int16_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< int32_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< int64_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< int8_t > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)

Referenced by conv_type().

template void CTF_int::any_helper< std::complex< double > > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)
template void CTF_int::any_helper< std::complex< float > > ( tensor A,
tensor B_bool,
char const *  idx_A,
char const *  idx_B 
)
void CTF_int::assign_keys ( int  order,
int64_t  size,
int  nvirt,
int const *  edge_len,
int const *  sym,
int const *  phase,
int const *  phys_phase,
int const *  virt_dim,
int *  phase_rank,
char const *  vdata,
char *  vpairs,
algstrct const *  sr 
)

assigns keys to an array of values

Parameters
[in]ordertensor dimension
[in]sizenumber of values
[in]nvirttotal virtualization factor
[in]edge_lentensor edge lengths
[in]symsymmetries of tensor
[in]phasetotal phase of the tensor on virtualized processor grid
[in]phys_phasephysical phase of the tensor
[in]virt_dimvirtual phase in each dimension
[in]phase_rankphysical phase rank multiplied by virtual phase
[in]vdataarray of input values
[out]vpairspairs of keys and inputted values
[in]sralgstrct defining data type of array

Definition at line 180 of file sparse_rw.cxx.

References ABORT, alloc_ptr(), ASSERT, cdealloc(), CTF_int::accumulatable::el_size, NS, CTF_int::algstrct::pair_size(), CTF_int::algstrct::set_pair(), TAU_FSTART, and TAU_FSTOP.

Referenced by read_loc_pairs().

char* CTF_int::bcast_step ( int  edge_len,
char *  A,
bool  is_sparse_A,
bool  move_A,
algstrct const *  sr_A,
int64_t  b_A,
int64_t  s_A,
char *  buf_A,
CommData cdt_A,
int64_t  ctr_sub_lda_A,
int64_t  ctr_lda_A,
int  nblk_A,
int64_t const *  size_blk_A,
int &  new_nblk_A,
int64_t *&  new_size_blk_A,
int64_t *  offsets_A,
int  ib 
)
void CTF_int::block_reshuffle ( distribution const &  old_dist,
distribution const &  new_dist,
char *  tsr_data,
char *&  tsr_cyclic_data,
algstrct const *  sr,
CommData  glb_comm 
)

Reshuffle elements by block given the global phases stay the same.

Parameters
[in]old_diststarting data distrubtion
[in]new_disttarget data distrubtion
[in]tsr_datastarting data buffer
[out]tsr_cyclic_datatarget data buffer
[in]sralgstrct defining data
[in]glb_commcommunicator on which to redistribute

Definition at line 454 of file redist.cxx.

References CTF_int::algstrct::addid(), CTF_int::algstrct::alloc(), alloc_ptr(), cdealloc(), CTF_int::CommData::cm, CTF_int::algstrct::copy(), DPRINTF, CTF_int::accumulatable::el_size, CTF_int::algstrct::mdtype(), CTF_int::LinModel< nparam >::observe(), CTF_int::distribution::order, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, CTF_int::CommData::rank, CTF_int::algstrct::set(), CTF_int::LinModel< nparam >::should_observe(), CTF_int::distribution::size, TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.

Referenced by CTF_int::tensor::redistribute().

double CTF_int::blres_est_time ( int64_t  tot_sz,
int  nv0,
int  nv1 
)

estimates execution time, given this processor sends a receives tot_sz across np procs

Parameters
[in]tot_szamount of data sent/recved
[in]nv0starting number of blocks
[in]nv1ending number of blocks

Definition at line 449 of file redist.cxx.

References CTF_int::LinModel< nparam >::est_time().

Referenced by CTF_int::tensor::est_redist_time().

void CTF_int::bucket_by_pe ( int  order,
int64_t  num_pair,
int64_t  np,
int const *  phys_phase,
int const *  virt_phase,
int const *  bucket_lda,
int const *  edge_len,
ConstPairIterator  mapped_data,
int64_t *  bucket_counts,
int64_t *  bucket_off,
PairIterator  bucket_data,
algstrct const *  sr 
)

buckets key-value pairs by processor according to distribution

Parameters
[in]ordernumber of tensor dims
[in]num_pairnumbers of values being written
[in]npnumber of processor buckets
[in]phys_phasephysical distribution phase
[in]virt_phasefactor of phase due to local blocking
[in]bucket_ldaiterator hop along each bucket dim
[in]edge_lenpadded edge lengths of tensor
[in]mapped_dataset of sparse key-value pairs
[out]bucket_countshow many keys belong to each processor
[out]bucket_offprefix sum of bucket_counts
[out]bucket_datamapped_data reordered by bucket
[in]sralgstrct context defining values

Definition at line 432 of file sparse_rw.cxx.

References alloc_ptr(), ASSERT, cdealloc(), CTF_int::ConstPairIterator::k(), ctf.core::np(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write().

Referenced by wr_pairs_layout().

int64_t * CTF_int::bucket_by_virt ( int  order,
int  num_virt,
int64_t  num_pair,
int const *  phys_phase,
int const *  virt_phase,
int const *  edge_len,
ConstPairIterator  mapped_data,
PairIterator  bucket_data,
algstrct const *  sr 
)

buckets key value pairs by block/virtual-processor

Parameters
[in]ordernumber of tensor dims
[in]num_virtnumber of local blocks
[in]num_pairnumbers of values being written
[in]phys_phasephysical distribution phase
[in]virt_phasefactor of phase due to local blocking
[in]edge_lenpadded edge lengths of tensor
[in]mapped_dataset of sparse key-value pairs
[out]bucket_datamapped_data reordered by bucket
[in]sralgstrct context defining values

Definition at line 539 of file sparse_rw.cxx.

References alloc_ptr(), ASSERT, cdealloc(), CTF_int::ConstPairIterator::k(), CTF_int::PairIterator::sort(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write().

Referenced by wr_pairs_layout().

template<int idim>
int64_t CTF_int::calc_cnt ( int const *  sym,
int const *  rep_phase,
int const *  sphase,
int const *  gidx_off,
int const *  edge_len,
int const *  loc_edge_len 
)

computes the cardinality of the set of elements of a tensor of order idim+1 that are owned by processor index gidx_off in a distribution with dimensions sphase

Definition at line 23 of file dgtog_calc_cnt.cxx.

References cdealloc(), get_loc(), and NS.

Referenced by calc_drv_cnts< 0 >(), and calc_sy_pfx().

template<>
int64_t CTF_int::calc_cnt< 0 > ( int const *  sym,
int const *  rep_phase,
int const *  sphase,
int const *  gidx_off,
int const *  edge_len,
int const *  loc_edge_len 
)

Definition at line 44 of file dgtog_calc_cnt.cxx.

References get_loc(), and NS.

Referenced by calc_sy_pfx< 1 >().

void CTF_int::calc_cnt_displs ( int const *  sym,
distribution const &  old_dist,
distribution const &  new_dist,
int  new_nvirt,
int  np,
int const *  old_virt_edge_len,
int const *  new_virt_lda,
int64_t *  send_counts,
int64_t *  recv_counts,
int64_t *  send_displs,
int64_t *  recv_displs,
CommData  ord_glb_comm,
int  idx_lyr,
int *const *  bucket_offset 
)

assigns keys to an array of values

Parameters
[in]symsymmetry relations between tensor dimensions
[in]old_diststarting data distrubtion
[in]new_disttarget data distrubtion
[in]new_nvirtnew total virtualization factor
[in]npnumber of processors
[in]old_virt_edge_lenold edge lengths of blocks
[in]new_virt_ldaprefix sum of new_dist.virt_phase
[out]send_countsoutgoing counts of pairs by pe
[out]recv_countsincoming counts of pairs by pe
[out]send_displsoutgoing displs of pairs by pe
[out]recv_displsincoming displs of pairs by pe
[in]ord_glb_commthe global communicator
[in]idx_lyrstarting processor layer (2.5D)
[in]bucket_offsetoffsets for target index for each dimension

Definition at line 170 of file redist.cxx.

References alloc_ptr(), ASSERT, blres_mdl, blres_mdl_init, cdealloc(), CTF_int::CommData::cm, ctf.core::dim, MAX, MIN, mst_alloc_ptr(), ctf.core::np(), NS, CTF_int::distribution::order, CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, SY, sy_packed_size(), and CTF_int::distribution::virt_phase.

Referenced by cyclic_reshuffle(), and glb_cyclic_reshuffle().

template<int idim>
void CTF_int::calc_cnt_from_rep_cnt ( int const *  rep_phase,
int *const *  pe_offset,
int *const *  bucket_offset,
int64_t const *  old_counts,
int64_t *  counts,
int  bucket_off,
int  pe_off,
int  dir 
)

Definition at line 155 of file dgtog_calc_cnt.cxx.

References calc_cnt_from_rep_cnt< 0 >().

Referenced by dgtog_reshuffle().

template<>
void CTF_int::calc_cnt_from_rep_cnt< 0 > ( int const *  rep_phase,
int *const *  pe_offset,
int *const *  bucket_offset,
int64_t const *  old_counts,
int64_t *  counts,
int  bucket_off,
int  pe_off,
int  dir 
)

Definition at line 174 of file dgtog_calc_cnt.cxx.

Referenced by calc_cnt_from_rep_cnt().

void CTF_int::calc_dim ( int  order,
int64_t  size,
int const *  edge_len,
mapping const *  edge_map,
int64_t *  vrt_sz,
int *  vrt_edge_len,
int *  blk_edge_len 
)

calculate the block-sizes of a tensor

Parameters
[in]ordernumber of dimensions of this tensor
[in]sizeis the size of the local tensor stored
[in]edge_lenedge lengths of global tensor
[in]edge_mapmapping of each dimension
[out]vrt_szsize of virtual block
[out]vrt_edge_lenedge lengths of virtual block
[out]blk_edge_lenedge lengths of local block

Definition at line 143 of file distribution.cxx.

References CTF_int::mapping::child, CTF_int::mapping::has_child, CTF_int::mapping::np, CTF_int::distribution::order, PHYSICAL_MAP, CTF_int::distribution::size, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::tensor::fold(), get_len_ordering(), and CTF_int::tensor::unfold().

template<int idim>
void CTF_int::calc_drv_cnts ( int  order,
int const *  sym,
int64_t *  counts,
int const *  rep_phase,
int const *  rep_phase_lda,
int const *  sphase,
int const *  phys_phase,
int *  gidx_off,
int const *  edge_len,
int const *  loc_edge_len 
)

Definition at line 120 of file dgtog_calc_cnt.cxx.

Referenced by calc_drv_displs().

template<>
void CTF_int::calc_drv_cnts< 0 > ( int  order,
int const *  sym,
int64_t *  counts,
int const *  rep_phase,
int const *  rep_phase_lda,
int const *  sphase,
int const *  phys_phase,
int *  gidx_off,
int const *  edge_len,
int const *  loc_edge_len 
)

Definition at line 138 of file dgtog_calc_cnt.cxx.

References calc_cnt(), and SWITCH_ORD_CALL_RET.

void CTF_int::calc_drv_displs ( int const *  sym,
int const *  edge_len,
distribution const &  old_dist,
distribution const &  new_dist,
int64_t *  counts,
int  idx_lyr 
)

Definition at line 220 of file dgtog_calc_cnt.cxx.

References alloc(), calc_drv_cnts(), cdealloc(), lcm(), SWITCH_ORD_CALL, TAU_FSTART, and TAU_FSTOP.

Referenced by dgtog_reshuffle().

void CTF_int::calc_fold_lnmk ( tensor const *  A,
tensor const *  B,
tensor const *  C,
int const *  idx_A,
int const *  idx_B,
int const *  idx_C,
int const *  ordering_A,
int const *  ordering_B,
iparam inner_prm 
)

calculate the dimensions of the matrix the contraction gets reduced to (A, B, and C may be permuted)

Parameters
[in]Atensor 1
[in]Btensor 2
[in]Ctensor 3
[in]idx_Aindices of tensor 1
[in]idx_Bindices of tensor 2
[in]idx_Cindices of tensor 3
[in]ordering_Athe dimensional-ordering of the inner mapping of A
[in]ordering_Bthe dimensional-ordering of the inner mapping of B
[out]inner_prmparameters includng l(number of matrix mutlplications),n,m,k

Definition at line 200 of file contraction.cxx.

References alloc_ptr(), cdealloc(), inv_idx(), CTF_int::iparam::k, CTF_int::iparam::l, CTF_int::iparam::m, CTF_int::iparam::n, NS, CTF_int::tensor::order, CTF_int::tensor::pad_edge_len, and CTF_int::iparam::sz_C.

Referenced by get_len_ordering().

void CTF_int::calc_idx_arr ( int  order,
int const *  lens,
int const *  sym,
int64_t  idx,
int *  idx_arr 
)

Definition at line 72 of file util.cxx.

References ASSERT, ctf.core::dim, NS, and packed_size().

Referenced by get_choice(), glb_ord_pup(), pad_cyclic_pup_virt_buff(), scal_diag(), and zero_padding().

template<int idim>
int64_t * CTF_int::calc_sy_pfx ( int const *  sym,
int const *  rep_phase,
int const *  sphase,
int const *  gidx_off,
int const *  edge_len,
int const *  loc_edge_len 
)

computes the cardinality of the sets of elements of a tensor of order idim+1 for different values of the idim'th tensor dimension

Definition at line 55 of file dgtog_calc_cnt.cxx.

References alloc(), calc_cnt(), cdealloc(), get_glb(), get_loc(), NS, and SY.

template<>
int64_t* CTF_int::calc_sy_pfx< 1 > ( int const *  sym,
int const *  rep_phase,
int const *  sphase,
int const *  gidx_off,
int const *  edge_len,
int const *  loc_edge_len 
)

Definition at line 97 of file dgtog_calc_cnt.cxx.

References alloc(), calc_cnt< 0 >(), get_glb(), get_loc(), NS, and SY.

int CTF_int::can_block_reshuffle ( int  order,
int const *  old_phase,
mapping const *  map 
)

determines if tensor can be permuted by block

Parameters
[in]orderdimension of tensor
[in]old_phaseold cyclic phases in each dimension
[in]mapnew mapping for each edge length
Returns
1 if block reshuffle allowed, 0 if not

Definition at line 618 of file redist.cxx.

References CTF_int::mapping::calc_phase().

Referenced by CTF_int::tensor::est_redist_time(), CTF_int::tensor::get_redist_mem(), CTF_int::summation::is_equal(), and CTF_int::tensor::redistribute().

int CTF_int::can_morph ( topology const *  topo_keep,
topology const *  topo_change 
)

determines if two topologies are compatible with each other

Parameters
topo_keeptopology to keep (larger dimension)
topo_changetopology to change (smaller dimension)
Returns
true if its possible to change

Definition at line 683 of file topology.cxx.

References CTF_int::topology::dim_comm, CTF_int::topology::lda, CTF_int::CommData::np, and CTF_int::topology::order.

Referenced by get_len_ordering().

double CTF_int::cddot ( int  n,
const double *  dX,
int  incX,
const double *  dY,
int  incY 
)

Definition at line 60 of file model.cxx.

References CTF_BLAS::DDOT().

int CTF_int::cdealloc ( void *  ptr)

free abstraction

Parameters
[in,out]ptrpointer to set to address to free

Definition at line 480 of file memcontrol.cxx.

References ABORT, cdealloc(), ERROR, max_threads, mst_free(), NEGATIVE, and SUCCESS.

Referenced by add_sym_perm(), CTF_int::CommData::all_to_allv(), assign_keys(), bcast_step(), block_reshuffle(), bucket_by_pe(), bucket_by_virt(), calc_cnt(), calc_cnt_displs(), calc_drv_displs(), calc_fold_lnmk(), calc_sy_pfx(), CTF_int::tensor::calc_tot_phase(), check_self_mapping(), CTF_int::tensor::compare(), copy_mapping(), CTF_int::tensor::copy_tensor_data(), CTF::Monoid< dtype, is_ord >::csr_add(), CTF_int::CSR_Matrix::csr_add(), CTF_int::algstrct::csr_reduce(), CTF::Bivar_Function< dtype_A, dtype_B, dtype_C >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr_old(), cyclic_reshuffle(), CTF_int::algstrct::dealloc(), depad_tsr(), depermute_keys(), depin(), CTF_int::tensor::despmatricize(), desymmetrize(), dgtog_reshuffle(), CTF_int::seq_tsr_spctr::est_fp(), CTF_int::seq_tsr_ctr::est_fp(), CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::Contract_Term::execute(), CTF_int::tensor::extract_diag(), CTF_int::tensor::fold(), CTF_int::strp_tsr::free_exp(), CTF_int::tensor::free_self(), CTF::Semiring< dtype, is_ord >::gen_csrmultcsr(), CTF_int::COO_Matrix::get_data(), get_full_intm(), get_generic_topovec(), get_len_ordering(), get_phys_topo(), glb_cyclic_reshuffle(), glb_ord_pup(), CTF_int::summation::is_equal(), map_self_indices(), CTF_int::tensor::map_tensor_rem(), morph_topo(), nosym_transpose(), CTF::Idx_Tensor::operator-=(), order_globally(), CTF_int::tensor::orient_subworld(), pad_cyclic_pup_virt_buff(), padded_reshuffle(), CTF_int::algstrct::pair_dealloc(), peel_torus(), permute(), permute_target(), CTF_int::ConstPairIterator::pin(), CTF::Function_timer::print(), CTF_int::summation::print(), CTF_int::tensor::print(), CTF::print_timers(), CTF_int::tensor::read_all_pairs(), read_data_mpiio(), CTF_int::tensor::read_dense_from_file(), read_loc_pairs(), CTF_int::tensor::read_local(), CTF::read_sparse_from_file_base(), readwrite(), CTF_int::tensor::redistribute(), reduce_step_post(), CTF_int::tensor::remove_fold(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::tspsum_replicate::run(), CTF_int::ctr_2d_general::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::tsum_replicate::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_map::run(), CTF_int::tspsum_permute::run(), CTF_int::spctr_pin_keys::run(), CTF_int::algstrct::safecopy(), CTF::Semiring< dtype, is_ord >::safemul(), scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::distribution::serialize(), CTF_int::COO_Matrix::set_data(), CTF_int::tensor::set_distribution(), CTF_int::tensor::set_name(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_zero(), CTF_int::tensor::slice(), CTF::Tensor< dtype >::slice(), spA_dnB_dnC_seq_ctr(), CTF_int::tensor::sparsify(), CTF_int::tensor::spmatricize(), spsfy_tsr(), strip_diag(), CTF_int::summation::sum_tensors(), CTF::Matrix< dtype >::svd(), sym_seq_ctr_cust(), sym_seq_ctr_inr(), sym_seq_ctr_ref(), sym_seq_scl_cust(), sym_seq_scl_ref(), sym_seq_sum_cust(), sym_seq_sum_inr(), sym_seq_sum_ref(), symmetrize(), CTF_int::tensor::tensor(), CTF_int::tensor::unfold(), CTF_int::LinModel< nparam >::update(), wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::tensor::write_dense_to_file(), CTF::write_sparse_to_file_base(), CTF_int::tensor::zero_out_padding(), zero_padding(), CTF_int::contraction::~contraction(), CTF_int::ctr_replicate::~ctr_replicate(), CTF_int::ctr_virt::~ctr_virt(), CTF::Idx_Tensor::~Idx_Tensor(), CTF_int::LinModel< nparam >::~LinModel(), CTF::Partition::~Partition(), CTF_int::scaling::~scaling(), CTF_int::scl::~scl(), CTF_int::scl_virt::~scl_virt(), CTF_int::seq_tsr_ctr::~seq_tsr_ctr(), CTF_int::seq_tsr_scl::~seq_tsr_scl(), CTF_int::seq_tsr_spctr::~seq_tsr_spctr(), CTF_int::seq_tsr_spsum::~seq_tsr_spsum(), CTF_int::seq_tsr_sum::~seq_tsr_sum(), CTF_int::spctr_pin_keys::~spctr_pin_keys(), CTF_int::spctr_replicate::~spctr_replicate(), CTF_int::spctr_virt::~spctr_virt(), CTF_int::strp_tsr::~strp_tsr(), CTF_int::summation::~summation(), CTF_int::Term::~Term(), CTF_int::topology::~topology(), CTF_int::tspsum::~tspsum(), CTF_int::tspsum_map::~tspsum_map(), CTF_int::tspsum_permute::~tspsum_permute(), CTF_int::tspsum_pin_keys::~tspsum_pin_keys(), CTF_int::tspsum_replicate::~tspsum_replicate(), CTF_int::tspsum_virt::~tspsum_virt(), CTF_int::tsum::~tsum(), CTF_int::tsum_replicate::~tsum_replicate(), and CTF_int::tsum_virt::~tsum_virt().

int CTF_int::cdealloc ( void *  ptr,
int const  tid 
)

free abstraction

Parameters
[in,out]ptrpointer to set to address to free
[in]tidthread id from whose stack pointer needs to be freed

Definition at line 410 of file memcontrol.cxx.

References ctf.core::it, mst_free(), NEGATIVE, and SUCCESS.

Referenced by cdealloc(), and cdealloc_cond().

int CTF_int::cdealloc_cond ( void *  ptr)

free abstraction (conditional (no error if not found))

Parameters
[in,out]ptrpointer to set to address to free

Definition at line 448 of file memcontrol.cxx.

References cdealloc(), max_threads, NEGATIVE, and SUCCESS.

void CTF_int::cdgelsd ( int  m,
int  n,
int  k,
double const *  A,
int  lda_A,
double *  B,
int  lda_B,
double *  S,
double  cond,
int *  rank,
double *  work,
int  lwork,
int *  iwork,
int *  info 
)

Definition at line 102 of file model.cxx.

References CTF_LAPACK::cdgelsd().

Referenced by CTF_int::LinModel< nparam >::update().

void CTF_int::cdgeqrf ( int const  M,
int const  N,
double *  A,
int const  LDA,
double *  TAU2,
double *  WORK,
int const  LWORK,
int *  INFO 
)

Definition at line 67 of file model.cxx.

References CTF_LAPACK::cdgeqrf().

Referenced by CTF_int::LinModel< nparam >::update().

void CTF_int::cdormqr ( char  SIDE,
char  TRANS,
int  M,
int  N,
int  K,
double const *  A,
int  LDA,
double const *  TAU2,
double *  C,
int  LDC,
double *  WORK,
int  LWORK,
int *  INFO 
)

Definition at line 80 of file model.cxx.

References CTF_LAPACK::cdormqr().

Referenced by CTF_int::LinModel< nparam >::update().

template<typename dtype , dtype(*)(dtype) abs>
void CTF_int::char_abs ( char const *  a,
char *  b 
)

Definition at line 130 of file set.h.

References ctf.core::abs(), and ctf.core::dtype.

int64_t CTF_int::chchoose ( int64_t  n,
int64_t  k 
)

Definition at line 305 of file util.cxx.

References fact().

Referenced by coalesce_bwd().

int CTF_int::check_self_mapping ( tensor const *  tsr,
int const *  idx_map 
)

checks mapping in preparation for tensors scale, summ or contract

Parameters
[in]tsrhandle to tensor
[in]idx_mapis the mapping of tensor to global indices
Returns
whether the self mapping is consistent

Definition at line 332 of file mapping.cxx.

References alloc_ptr(), CTF_int::mapping::calc_phase(), cdealloc(), CTF_int::mapping::cdt, CTF_int::mapping::child, DPRINTF, CTF_int::tensor::edge_map, CTF_int::mapping::has_child, CTF_int::tensor::order, PHYSICAL_MAP, and CTF_int::mapping::type.

Referenced by CTF_int::scaling::execute(), get_len_ordering(), CTF_int::summation::is_equal(), and CTF_int::tensor::set_distribution().

int64_t CTF_int::choose ( int64_t  n,
int64_t  k 
)

Definition at line 285 of file util.cxx.

References fact().

Referenced by coalesce_bwd(), and get_len_ordering().

void CTF_int::cmp_sym_perms ( int  ndim,
int const *  sym,
int *  nperm,
int **  perm,
double *  sign 
)

finds all permutations of a tensor according to a symmetry

Parameters
[in]ndimdimension of tensor
[in]symsymmetry specification of tensor
[out]npermnumber of symmeitrc permutations to do
[out]permthe permutation
[out]signsign of each permutation

Definition at line 400 of file symmetrization.cxx.

References alloc_ptr(), AS, ASSERT, ctf.core::np(), and NS.

void CTF_int::coalesce_bwd ( int  el_size,
char *  B,
char const *  B_aux,
int  k,
int  n,
int  kb 
)
inline

we receive a contiguous buffer kb-by-n B and (k-kb)-by-n B_aux which is the block below. To get a k-by-n buffer, we need to combine this buffer with our original block. Since we are working with column-major ordering we need to interleave the blocks. Thats what this function does.

Parameters
[in]el_sizeelement size
[in,out]Bthe buffer to coalesce into
[in]B_auxthe second buffer to coalesce from
[in]kthe total number of rows
[in]nthe number of columns
[in]kbthe number of rows in a B originally

Definition at line 398 of file util.h.

References ctf.core::a, ctf.core::b, chchoose(), choose(), fact(), get_choice(), permute(), permute_target(), socopy(), and spcopy().

int CTF_int::comp_dim_map ( mapping const *  map_A,
mapping const *  map_B 
)

compares two mappings

Parameters
map_Afirst map
map_Bsecond map return true if mapping is exactly the same, false otherwise

Definition at line 143 of file mapping.cxx.

References ASSERT, CTF_int::mapping::cdt, CTF_int::mapping::child, DEBUG_PRINTF, CTF_int::mapping::has_child, NOT_MAPPED, CTF_int::mapping::np, PHYSICAL_MAP, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by CTF_int::tensor::align(), ctr_2d_gen_build(), get_len_ordering(), and CTF_int::summation::is_equal().

template<int nparam>
bool CTF_int::comp_time_param ( const time_param< nparam > &  a,
const time_param< nparam > &  b 
)

Definition at line 114 of file model.cxx.

References CTF_int::time_param< nparam >::p.

int ** CTF_int::compute_bucket_offsets ( distribution const &  old_dist,
distribution const &  new_dist,
int const *  len,
int const *  old_phys_edge_len,
int const *  old_virt_lda,
int const *  old_offsets,
int *const *  old_permutation,
int const *  new_phys_edge_len,
int const *  new_virt_lda,
int  forward,
int  old_virt_np,
int  new_virt_np,
int const *  old_virt_edge_len 
)

computes offsets for redistribution targets along each edge length

Parameters
[in]old_diststarting data distrubtion
[in]new_disttarget data distrubtion
[in]lenunpadded edge lengths
[in]old_phys_edge_lentotal edge lengths of old local tensor chunk
[in]old_virt_ldaprefix sum of old_dist.virt_phase
[in]old_offsetsold offsets of each tensor edge (corner 1 of slice)
[in]old_permutationpermutation array for each edge length (no perm if NULL)
[in]new_phys_edge_lentotal edge lengths of new local tensor chunk
[in]new_virt_ldaprefix sum of new_dist.virt_phase
[in]forward1 for sending 0 for receiving
[in]old_virt_npnumber of blocks per processor in old_dist
[in]new_virt_npnumber of blocks per processor in new_dist
[in]old_virt_edge_lenedge lengths of each block in old_dist
Returns
2D array with dims [order][old_phys_edge_len[i]] with bucket offsets for each edge length

Definition at line 111 of file redist.cxx.

References alloc_ptr(), ctf.core::dim, MAX, CTF_int::distribution::order, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.

Referenced by cyclic_reshuffle(), and glb_cyclic_reshuffle().

void CTF_int::compute_syoff ( int  r,
int  len,
algstrct const *  sr,
int const *  edge_len,
int const *  sym,
uint64_t *  offsets 
)

Definition at line 301 of file sym_seq_ctr.cxx.

References CTF_int::accumulatable::el_size, NS, and sy_packed_size().

Referenced by compute_syoffs().

void CTF_int::compute_syoffs ( algstrct const *  sr_A,
int  order_A,
int const *  edge_len_A,
int const *  sym_A,
int const *  idx_map_A,
algstrct const *  sr_B,
int  order_B,
int const *  edge_len_B,
int const *  sym_B,
int const *  idx_map_B,
int  tot_order,
int const *  rev_idx_map,
uint64_t **&  offsets_A,
uint64_t **&  offsets_B 
)

Definition at line 193 of file sym_seq_sum.cxx.

References alloc(), compute_syoff(), compute_syoffs(), TAU_FSTART, and TAU_FSTOP.

void CTF_int::compute_syoffs ( algstrct const *  sr_A,
int  order_A,
int const *  edge_len_A,
int const *  sym_A,
int const *  idx_map_A,
algstrct const *  sr_B,
int  order_B,
int const *  edge_len_B,
int const *  sym_B,
int const *  idx_map_B,
algstrct const *  sr_C,
int  order_C,
int const *  edge_len_C,
int const *  sym_C,
int const *  idx_map_C,
int  tot_order,
int const *  rev_idx_map,
uint64_t **&  offsets_A,
uint64_t **&  offsets_B,
uint64_t **&  offsets_C 
)
template<typename dtype >
void CTF_int::conj_helper ( tensor A,
tensor B 
)

Definition at line 42 of file ctf_ext.cxx.

References ctf.core::a, and CTF_int::tensor::order.

template void CTF_int::conj_helper< double > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::conj_helper< float > ( tensor A,
tensor B 
)

Referenced by conv_type().

std::vector< Term* > CTF_int::contract_down_terms ( algstrct sr,
char *  tscale,
std::vector< Term * >  operands,
std::vector< char >  out_inds,
int  terms_to_leave,
bool  est_time = false,
double *  cost = NULL 
)
std::list< mem_transfer > CTF_int::contract_mst ( )
template<typename type >
int CTF_int::conv_idx ( int  order,
type const *  cidx,
int **  iidx 
)
template<typename type >
int CTF_int::conv_idx ( int  order_A,
type const *  cidx_A,
int **  iidx_A,
int  order_B,
type const *  cidx_B,
int **  iidx_B 
)

Definition at line 76 of file common.cxx.

References alloc(), and conv_idx().

template<typename type >
int CTF_int::conv_idx ( int  order_A,
type const *  cidx_A,
int **  iidx_A,
int  order_B,
type const *  cidx_B,
int **  iidx_B,
int  order_C,
type const *  cidx_C,
int **  iidx_C 
)

Definition at line 114 of file common.cxx.

References alloc(), conv_idx< char >(), and conv_idx< int >().

Referenced by conv_idx().

template int CTF_int::conv_idx< char > ( int  ,
char const *  ,
int **   
)
template int CTF_int::conv_idx< char > ( int  ,
char const *  ,
int **  ,
int  ,
char const *  ,
int **   
)
template int CTF_int::conv_idx< char > ( int  ,
char const *  ,
int **  ,
int  ,
char const *  ,
int **  ,
int  ,
char const *  ,
int **   
)

Referenced by conv_idx().

template int CTF_int::conv_idx< int > ( int  ,
int const *  ,
int **   
)
template int CTF_int::conv_idx< int > ( int  ,
int const *  ,
int **  ,
int  ,
int const *  ,
int **   
)
template int CTF_int::conv_idx< int > ( int  ,
int const *  ,
int **  ,
int  ,
int const *  ,
int **  ,
int  ,
int const *  ,
int **   
)

Referenced by conv_idx().

void CTF_int::copy_mapping ( int  order,
mapping const *  mapping_A,
mapping mapping_B 
)

copies mapping A to B

Parameters
[in]ordernumber of dimensions
[in]mapping_Amapping to copy from
[in,out]mapping_Bmapping to copy to

Definition at line 190 of file mapping.cxx.

References CTF_int::mapping::clear(), and CTF_int::mapping::mapping().

Referenced by CTF_int::tensor::align(), copy_mapping(), CTF_int::tensor::copy_tensor_data(), desymmetrize(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), get_len_ordering(), CTF_int::summation::is_equal(), CTF_int::tensor::pull_alias(), and symmetrize().

int CTF_int::copy_mapping ( int  order_A,
int  order_B,
int const *  idx_A,
mapping const *  mapping_A,
int const *  idx_B,
mapping mapping_B,
int  make_virt = 1 
)

copies mapping A to B

Parameters
[in]order_Anumber of dimensions in A
[in]order_Bnumber of dimensions in B
[in]idx_Aindex mapping of A
[in]mapping_Amapping to copy from
[in]idx_Bindex mapping of B
[in,out]mapping_Bmapping to copy to
[in]make_virtmakes virtual

Definition at line 210 of file mapping.cxx.

References ASSERT, cdealloc(), CTF_int::mapping::clear(), copy_mapping(), CTF_int::mapping::has_child, inv_idx(), CTF_int::mapping::np, SUCCESS, CTF_int::mapping::type, and VIRTUAL_MAP.

int CTF_int::ctr_2d_gen_build ( int  is_used,
CommData  global_comm,
int  i,
int *  virt_dim,
int &  cg_edge_len,
int &  total_iter,
tensor A,
int  i_A,
CommData *&  cg_cdt_A,
int64_t &  cg_ctr_lda_A,
int64_t &  cg_ctr_sub_lda_A,
bool &  cg_move_A,
int *  blk_len_A,
int64_t &  blk_sz_A,
int const *  virt_blk_len_A,
int &  load_phase_A,
tensor B,
int  i_B,
CommData *&  cg_cdt_B,
int64_t &  cg_ctr_lda_B,
int64_t &  cg_ctr_sub_lda_B,
bool &  cg_move_B,
int *  blk_len_B,
int64_t &  blk_sz_B,
int const *  virt_blk_len_B,
int &  load_phase_B,
tensor C,
int  i_C,
CommData *&  cg_cdt_C,
int64_t &  cg_ctr_lda_C,
int64_t &  cg_ctr_sub_lda_C,
bool &  cg_move_C,
int *  blk_len_C,
int64_t &  blk_sz_C,
int const *  virt_blk_len_C,
int &  load_phase_C 
)

sets up a ctr_2d_general (2D SUMMA) level where A is not communicated function will be called with A/B/C permuted depending on desired alg

Parameters
[in]is_usedwhether this ctr will actually be run
[in]global_commcomm for this CTF instance
[in]iindex in the total index map currently worked on
[in,out]virt_dimvirtual processor grid lengths
[out]cg_edge_lenedge lengths of ctr_2d_gen object to set
[in,out]total_iterthe total number of ctr_2d_gen iterations
[in]AA tensor
[in]i_Athe index in A to which index i corresponds
[out]cg_cdt_Athe communicator for A to be set for ctr_2d_gen
[out]cg_ctr_lda_Aparameter of ctr_2d_gen corresponding to upper lda for lda_cpy
[out]cg_ctr_sub_lda_Aparameter of ctr_2d_gen corresponding to lower lda for lda_cpy
[out]cg_move_Atells ctr_2d_gen whether A should be communicated
[in,out]blk_len_Alengths of local A piece after this ctr_2d_gen level
[in,out]blk_sz_Asize of local A piece after this ctr_2d_gen level
[in]virt_blk_edge_len_Aedge lengths of virtual blocks of A
[in]load_phase_Atells the offloader how often A buffer changes for ctr_2d_gen

... the other parameters are specified the same as for _A but this time for _B and _C

Definition at line 12 of file ctr_2d_general.cxx.

References ASSERT, CTF_int::mapping::calc_phase(), CTF_int::mapping::cdt, CTF_int::mapping::child, comp_dim_map(), CTF_int::topology::dim_comm, CTF_int::tensor::edge_map, CTF_int::mapping::has_child, lcm(), MAX, CTF_int::mapping::np, CTF_int::tensor::order, PHYSICAL_MAP, CTF_int::tensor::topo, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by CTF_int::contraction::contraction(), and get_len_ordering().

void CTF_int::cvrt_idx ( int  order,
int const *  lens,
int64_t  idx,
int *  idx_arr 
)

Definition at line 533 of file common.cxx.

Referenced by CTF::Tensor< dtype >::slice(), and wr_pairs_layout().

void CTF_int::cvrt_idx ( int  order,
int const *  lens,
int64_t  idx,
int **  idx_arr 
)

Definition at line 545 of file common.cxx.

References alloc(), and cvrt_idx().

void CTF_int::cvrt_idx ( int  order,
int const *  lens,
int const *  idx_arr,
int64_t *  idx 
)

Definition at line 553 of file common.cxx.

References MPI_CTF_DOUBLE_COMPLEX.

Referenced by cvrt_idx().

void CTF_int::cyclic_reshuffle ( int const *  sym,
distribution const &  old_dist,
int const *  old_offsets,
int *const *  old_permutation,
distribution const &  new_dist,
int const *  new_offsets,
int *const *  new_permutation,
char **  tsr_data,
char **  tsr_cyclic_data,
algstrct const *  sr,
CommData  ord_glb_comm,
bool  reuse_buffers,
char const *  alpha,
char const *  beta 
)

Goes from any set of phases to any new set of phases.

Parameters
[in]symsymmetry relations between tensor dimensions
[in]old_diststarting data distrubtion
[in]old_offsetsold offsets of each tensor edge (corner 1 of slice)
[in]old_permutationpermutation array for each edge length (no perm if NULL)
[in]new_disttarget data distrubtion
[in]new_offsetsold offsets of each tensor edge (corner 1 of slice)
[in]new_permutationpermutation array for each edge length (no perm if NULL)
[in]tsr_datastarting data buffer
[out]tsr_cyclic_datatarget data buffer
[in]sralgstrct defining data
[in]ord_glb_commcommunicator on which to redistribute
[in]reuse_buffersif 1: ptr_tsr_cyclic_data is allocated dynamically and ptr_tsr_data is overwritten with intermediate data if 0: ptr_tsr_cyclic_data is preallocated and can be scaled by beta, however, more memory is used for temp buffers
[in]alphascaling tensor for new data
[in]betascaling tensor for original data

Definition at line 477 of file cyclic_reshuffle.cxx.

References CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), CTF_int::CommData::all_to_allv(), alloc_ptr(), ASSERT, calc_cnt_displs(), cdealloc(), compute_bucket_offsets(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, CTF_int::distribution::is_cyclic, CTF_int::algstrct::isequal(), MAX, mst_alloc_ptr(), CTF_int::algstrct::mulid(), CTF_int::CommData::np, ctf.core::np(), CTF_int::distribution::order, pad_cyclic_pup_virt_buff(), CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::CommData::rank, CTF_int::algstrct::set(), CTF_int::distribution::size, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.

Referenced by CTF_int::tensor::add_from_subworld(), CTF_int::tensor::add_to_subworld(), and glb_cyclic_reshuffle().

template<typename dtype >
void CTF_int::def_coo_to_csr ( int64_t  nz,
int  nrow,
dtype *  csr_vs,
int *  csr_ja,
int *  csr_ia,
dtype const *  coo_vs,
int const *  coo_rs,
int const *  coo_cs 
)

Definition at line 100 of file set.h.

Referenced by CTF::Set< dtype, is_ord >::coo_to_csr().

template<typename dtype >
void CTF_int::def_csr_to_coo ( int64_t  nz,
int  nrow,
dtype const *  csr_vs,
int const *  csr_ja,
int const *  csr_ia,
dtype *  coo_vs,
int *  coo_rs,
int *  coo_cs 
)

Definition at line 105 of file set.h.

Referenced by CTF::Set< dtype, is_ord >::csr_to_coo().

template<typename dtype , bool is_ord>
std::enable_if<is_ord, dtype>::type CTF_int::default_abs ( dtype  a)
inline

Definition at line 116 of file set.h.

References ctf.core::a, ctf.core::b, and ctf.core::dtype.

template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype>::type CTF_int::default_abs ( dtype  a)
inline

Definition at line 123 of file set.h.

References ctf.core::a.

template<typename dtype >
dtype CTF_int::default_add ( dtype  a,
dtype  b 
)

Definition at line 6 of file monoid.h.

References ctf.core::b.

template<typename dtype >
dtype CTF_int::default_addinv ( dtype  a)

Definition at line 110 of file set.h.

References ctf.core::a.

template<typename dtype >
void CTF_int::default_axpy ( int  n,
dtype  alpha,
dtype const *  X,
int  incX,
dtype *  Y,
int  incY 
)

Definition at line 19 of file semiring.h.

References default_axpy< double >(), and default_axpy< float >().

template<>
void CTF_int::default_axpy< double > ( int  n,
double  alpha,
double const *  X,
int  incX,
double *  Y,
int  incY 
)

Definition at line 139 of file semiring.cxx.

References CTF_BLAS::DAXPY().

Referenced by default_axpy(), and default_axpy< float >().

template<>
void CTF_int::default_axpy< float > ( int  n,
float  alpha,
float const *  X,
int  incX,
float *  Y,
int  incY 
)

Definition at line 128 of file semiring.cxx.

References default_axpy< double >(), and CTF_BLAS::SAXPY().

Referenced by default_axpy().

template<>
void CTF_int::default_axpy< std::complex< double > > ( int  n,
std::complex< double >  alpha,
std::complex< double > const *  X,
int  incX,
std::complex< double > *  Y,
int  incY 
)

Definition at line 161 of file semiring.cxx.

References CTF_BLAS::ZAXPY().

template<>
void CTF_int::default_axpy< std::complex< float > > ( int  n,
std::complex< float >  alpha,
std::complex< float > const *  X,
int  incX,
std::complex< float > *  Y,
int  incY 
)

Definition at line 150 of file semiring.cxx.

References CTF_BLAS::CAXPY().

template<typename dtype >
void CTF_int::default_coomm ( int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  A,
int const *  rows_A,
int const *  cols_A,
int  nnz_A,
dtype const *  B,
dtype  beta,
dtype *  C 
)
template<>
void CTF_int::default_coomm< double > ( int  m,
int  n,
int  k,
double  alpha,
double const *  A,
int const *  rows_A,
int const *  cols_A,
int  nnz_A,
double const *  B,
double  beta,
double *  C 
)

Definition at line 233 of file semiring.cxx.

References DEF_COOMM_KERNEL, and CTF_BLAS::MKL_DCOOMM().

Referenced by default_coomm(), and default_coomm< float >().

template<>
void CTF_int::default_coomm< float > ( int  m,
int  n,
int  k,
float  alpha,
float const *  A,
int const *  rows_A,
int const *  cols_A,
int  nnz_A,
float const *  B,
float  beta,
float *  C 
)

Definition at line 208 of file semiring.cxx.

References DEF_COOMM_KERNEL, default_coomm< double >(), and CTF_BLAS::MKL_SCOOMM().

Referenced by default_coomm().

template<>
void CTF_int::default_coomm< std::complex< double > > ( int  m,
int  n,
int  k,
std::complex< double >  alpha,
std::complex< double > const *  A,
int const *  rows_A,
int const *  cols_A,
int  nnz_A,
std::complex< double > const *  B,
std::complex< double >  beta,
std::complex< double > *  C 
)

Definition at line 286 of file semiring.cxx.

References DEF_COOMM_KERNEL, CTF_BLAS::MKL_ZCOOMM(), and muladd_csrmm().

template<>
void CTF_int::default_coomm< std::complex< float > > ( int  m,
int  n,
int  k,
std::complex< float >  alpha,
std::complex< float > const *  A,
int const *  rows_A,
int const *  cols_A,
int  nnz_A,
std::complex< float > const *  B,
std::complex< float >  beta,
std::complex< float > *  C 
)

Definition at line 261 of file semiring.cxx.

References DEF_COOMM_KERNEL, and CTF_BLAS::MKL_CCOOMM().

template<typename dtype >
void CTF_int::default_fxpy ( int  n,
dtype const *  X,
dtype *  Y 
)

Definition at line 19 of file monoid.h.

template<typename dtype >
void CTF_int::default_gemm ( char  tA,
char  tB,
int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  A,
dtype const *  B,
dtype  beta,
dtype *  C 
)

Definition at line 71 of file semiring.h.

template<>
void CTF_int::default_gemm< double > ( char  tA,
char  tB,
int  m,
int  n,
int  k,
double  alpha,
double const *  A,
double const *  B,
double  beta,
double *  C 
)
inline
template<>
void CTF_int::default_gemm< float > ( char  tA,
char  tB,
int  m,
int  n,
int  k,
float  alpha,
float const *  A,
float const *  B,
float  beta,
float *  C 
)
inline

Definition at line 151 of file semiring.h.

References default_gemm< double >().

Referenced by get_grp_ptrs(), and CTF::Semiring< dtype, is_ord >::is_offloadable().

template<>
void CTF_int::default_gemm< std::complex< double > > ( char  tA,
char  tB,
int  m,
int  n,
int  k,
std::complex< double >  alpha,
std::complex< double > const *  A,
std::complex< double > const *  B,
std::complex< double >  beta,
std::complex< double > *  C 
)
inline

Definition at line 196 of file semiring.h.

References default_gemm_batch().

template<>
void CTF_int::default_gemm< std::complex< float > > ( char  tA,
char  tB,
int  m,
int  n,
int  k,
std::complex< float >  alpha,
std::complex< float > const *  A,
std::complex< float > const *  B,
std::complex< float >  beta,
std::complex< float > *  C 
)
inline

Definition at line 181 of file semiring.h.

template<typename dtype >
void CTF_int::default_gemm_batch ( char  taA,
char  taB,
int  l,
int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  A,
dtype const *  B,
dtype  beta,
dtype *  C 
)

Definition at line 211 of file semiring.h.

References default_gemm_batch< float >().

Referenced by default_gemm< std::complex< double > >().

template<>
void CTF_int::default_gemm_batch< double > ( char  taA,
char  taB,
int  l,
int  m,
int  n,
int  k,
double  alpha,
double const *  A,
double const *  B,
double  beta,
double *  C 
)
inline

Definition at line 251 of file semiring.h.

Referenced by default_gemm_batch< float >().

template<>
void CTF_int::default_gemm_batch< float > ( char  taA,
char  taB,
int  l,
int  m,
int  n,
int  k,
float  alpha,
float const *  A,
float const *  B,
float  beta,
float *  C 
)
inline

Definition at line 235 of file semiring.h.

References default_gemm_batch< double >().

Referenced by default_gemm_batch().

template<>
void CTF_int::default_gemm_batch< std::complex< double > > ( char  taA,
char  taB,
int  l,
int  m,
int  n,
int  k,
std::complex< double >  alpha,
std::complex< double > const *  A,
std::complex< double > const *  B,
std::complex< double >  beta,
std::complex< double > *  C 
)
inline

Definition at line 283 of file semiring.h.

References default_coomm().

template<>
void CTF_int::default_gemm_batch< std::complex< float > > ( char  taA,
char  taB,
int  l,
int  m,
int  n,
int  k,
std::complex< float >  alpha,
std::complex< float > const *  A,
std::complex< float > const *  B,
std::complex< float >  beta,
std::complex< float > *  C 
)
inline

Definition at line 267 of file semiring.h.

template<typename dtype , bool is_ord>
std::enable_if<is_ord, dtype>::type CTF_int::default_max ( dtype  a,
dtype  b 
)
inline

Definition at line 182 of file set.h.

References ctf.core::a.

template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype>::type CTF_int::default_max ( dtype  a,
dtype  b 
)
inline

Definition at line 188 of file set.h.

References ctf.core::a.

template<typename dtype , bool is_ord>
std::enable_if<is_ord, dtype>::type CTF_int::default_max_lim ( )
inline

Definition at line 152 of file set.h.

template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype>::type CTF_int::default_max_lim ( )
inline

Definition at line 158 of file set.h.

References ctf.core::a, and ctf.core::dtype.

template<typename dtype , bool is_ord>
std::enable_if<is_ord, dtype>::type CTF_int::default_min ( dtype  a,
dtype  b 
)
inline

Definition at line 138 of file set.h.

References ctf.core::a.

template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype>::type CTF_int::default_min ( dtype  a,
dtype  b 
)
inline

Definition at line 144 of file set.h.

References ctf.core::a.

template<typename dtype , bool is_ord>
std::enable_if<is_ord, dtype>::type CTF_int::default_min_lim ( )
inline

Definition at line 167 of file set.h.

template<typename dtype , bool is_ord>
std::enable_if<!is_ord, dtype>::type CTF_int::default_min_lim ( )
inline

Definition at line 173 of file set.h.

References ctf.core::a, and ctf.core::dtype.

template<typename dtype >
dtype CTF_int::default_mul ( dtype  a,
dtype  b 
)

Definition at line 14 of file semiring.h.

References ctf.core::b.

template<typename dtype , void(*)(int, dtype const *, dtype *) fxpy>
void CTF_int::default_mxpy ( void *  X,
void *  Y,
int *  n,
MPI_Datatype *  d 
)

Definition at line 11 of file monoid.h.

References ctf.core::dtype.

Referenced by get_default_maddop().

template<typename dtype >
void CTF_int::default_scal ( int  n,
dtype  alpha,
dtype *  X,
int  incX 
)

Definition at line 47 of file semiring.h.

References default_scal< double >(), and default_scal< float >().

template<>
void CTF_int::default_scal< double > ( int  n,
double  alpha,
double *  X,
int  incX 
)

Definition at line 176 of file semiring.cxx.

References CTF_BLAS::DSCAL().

Referenced by default_scal().

template<>
void CTF_int::default_scal< float > ( int  n,
float  alpha,
float *  X,
int  incX 
)

Definition at line 171 of file semiring.cxx.

References CTF_BLAS::SSCAL().

Referenced by default_scal().

template<>
void CTF_int::default_scal< std::complex< double > > ( int  n,
std::complex< double >  alpha,
std::complex< double > *  X,
int  incX 
)

Definition at line 188 of file semiring.cxx.

References CTF_BLAS::ZSCAL().

template<>
void CTF_int::default_scal< std::complex< float > > ( int  n,
std::complex< float >  alpha,
std::complex< float > *  X,
int  incX 
)

Definition at line 182 of file semiring.cxx.

References CTF_BLAS::CSCAL().

void CTF_int::depad_tsr ( int  order,
int64_t  num_pair,
int const *  edge_len,
int const *  sym,
int const *  padding,
int const *  prepadding,
char const *  pairsb,
char *  new_pairsb,
int64_t *  new_num_pair,
algstrct const *  sr 
)

retrieves the unpadded pairs

Parameters
[in]ordertensor dimension
[in]num_pairnumber of pairs
[in]edge_lentensor edge lengths
[in]symsymmetry types of tensor
[in]paddingpadding of tensor (included in edge_len)
[in]prepaddingpadding at start of tensor (included in edge_len)
[in]pairsbpadded array of pairs
[out]new_pairsbunpadded pairs
[out]new_num_pairnumber of unpadded pairs
[in]sralgstrct defines sizeo of each pair

Definition at line 51 of file pad.cxx.

References alloc(), alloc_ptr(), AS, cdealloc(), CTF_int::ConstPairIterator::k(), SH, SY, TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write().

Referenced by read_loc_pairs(), CTF_int::tensor::slice(), and CTF_int::tensor::sparsify().

void CTF_int::depermute_keys ( int  order,
int  num_pair,
int const *  edge_len,
int const *  new_edge_len,
int *const *  permutation,
char *  pairs,
algstrct const *  sr 
)

depermutes keys (apply P^T)

Parameters
[in]ordertensor dimension
[in]num_pairnumber of pairs
[in]edge_lenold nonpadded tensor edge lengths
[in]new_edge_lennew nonpadded tensor edge lengths
[in]permutationpermutation to apply to keys of each pair
[in,out]pairsthe keys and values as pairs
[in]sralgstrct defining data type of array

Definition at line 99 of file sparse_rw.cxx.

References alloc(), ASSERT, cdealloc(), CTF_int::PairIterator::k(), MIN, CTF_int::algstrct::pair_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write_key().

Referenced by CTF_int::tensor::permute().

void CTF_int::depin ( algstrct const *  sr,
int  order,
int const *  lens,
int const *  divisor,
int  nvirt,
int const *  virt_dim,
int const *  phys_rank,
char *  X,
int64_t &  new_nnz_B,
int64_t *  nnz_blk,
char *&  new_B,
bool  check_padding 
)
void CTF_int::desymmetrize ( tensor sym_tsr,
tensor nonsym_tsr,
bool  is_C 
)
std::vector<char> CTF_int::det_uniq_inds ( std::vector< Term * > const  operands,
std::vector< char > const  out_inds 
)
double CTF_int::dgtog_est_time ( int64_t  tot_sz,
int  np 
)

estimates execution time, given this processor sends a receives tot_sz across np procs

Parameters
[in]tot_szamount of data sent/recved
[in]npnumber of procs involved

Definition at line 11 of file dgtog_redist.cxx.

References dgtog_res_mdl.

Referenced by CTF_int::tensor::est_redist_time().

void CTF_int::dgtog_reshuffle ( int const *  sym,
int const *  edge_len,
distribution const &  old_dist,
distribution const &  new_dist,
char **  ptr_tsr_data,
char **  ptr_tsr_new_data,
algstrct const *  sr,
CommData  ord_glb_comm 
)

Definition at line 74 of file dgtog_redist.cxx.

Referenced by dgtog_reshuffle().

void CTF_int::dnA_spB_seq_sum ( char const *  alpha,
char const *  A,
algstrct const *  sr_A,
int  order_A,
int const *  edge_len_A,
int const *  sym_A,
char const *  beta,
char const *  B,
int64_t  size_B,
char *&  new_B,
int64_t &  new_size_B,
algstrct const *  sr_B,
univar_function const *  func 
)

performs summation between two sparse tensors assumes B contain key value pairs sorted by key, with index permutation preapplied and with no repeated indices

Parameters
[in]alphascaling factor of A
[in]Adata of right operand
[in]sr_Aalgebraic structure of right operand
[in]order_Aorder of tensor A
[in]edge_len_Adimensions of tensor A
[in]sym_Asymmetry relations of tensor A
[in]betascaling factor of left operand
[in]Bdata of left operand
[in]size_Bnumber of nonzero entries in left operand
[in,out]new_Bnew data of output
[in,out]new_size_Bnumber of nonzero entries in output
[in]sr_Balgebraic structure of left operand and output
[in]funcfunction (or NULL) to apply to right operand

Definition at line 149 of file spr_seq_sum.cxx.

Referenced by CTF_int::seq_tsr_spsum::run().

void CTF_int::dump_all_models ( std::string  path)

Definition at line 50 of file model.cxx.

References get_all_models().

Referenced by CTF_int::Model::dump_data(), and train_all().

double CTF_int::est_time_transp ( int  order,
int const *  new_order,
int const *  edge_len,
int  dir,
algstrct const *  sr 
)

estimates time needed to transposes a non-symmetric (folded) tensor based on performance models

Parameters
[in]orderdimension of tensor
[in]new_ordernew ordering of dimensions
[in]edge_lenoriginal edge lengths
[in]dirwhich way are we going?
[in]sralgstrct defining element size
Returns
estimated time in seconds

Definition at line 757 of file nosym_transp.cxx.

References long_contig_transp_mdl, non_contig_transp_mdl, and shrt_contig_transp_mdl.

Referenced by CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), and get_len_ordering().

double CTF_int::estimate_download_time ( int64_t  size)

estimate time it takes to upload

double CTF_int::estimate_upload_time ( int64_t  size)

estimate time it takes to download

void CTF_int::extract_free_comms ( topology const *  topo,
int  order_A,
mapping const *  edge_map_A,
int  order_B,
mapping const *  edge_map_B,
int &  num_sub_phys_dims,
CommData **  psub_phys_comm,
int **  pcomm_idx 
)

extracts the set of physical dimensions still available for mapping

Parameters
[in]topotopology
[in]order_Adimension of A
[in]edge_map_Amapping of A
[in]order_Bdimension of B
[in]edge_map_Bmapping of B
[out]num_sub_phys_dimsnumber of free torus dimensions
[out]psub_phys_commthe torus dimensions
[out]pcomm_idxindex of the free torus dimensions in the origin topology

Definition at line 628 of file topology.cxx.

References alloc_ptr(), CTF_int::mapping::cdt, CTF_int::mapping::child, CTF_int::topology::dim_comm, CTF_int::mapping::has_child, CTF_int::topology::order, PHYSICAL_MAP, and CTF_int::mapping::type.

Referenced by get_len_ordering().

int64_t CTF_int::fact ( int64_t  n)
void CTF_int::factorize ( int  n,
int *  nfactor,
int **  factor 
)

computes the size of a tensor in packed symmetric layout

Parameters
[in]na positive number
[out]nfactornumber of factors in n
[out]factorarray of length nfactor, corresponding to factorization of n

Definition at line 170 of file util.cxx.

References alloc().

Referenced by fft(), get_generic_topovec(), and get_phys_topo().

int CTF_int::find_topology ( topology const *  topo,
std::vector< topology * > &  topovec 
)

searches for an equivalent topology in avector of topologies

Parameters
[in]topotopology to match
[in]topovecvector of existing parameters
Returns
-1 if not found, otherwise index of first found topology

Definition at line 571 of file topology.cxx.

References CTF_int::topology::lens, and CTF_int::topology::order.

Referenced by peel_perm_torus(), peel_torus(), and CTF_int::tensor::set_distribution().

void CTF_int::flops_add ( int64_t  n)

Definition at line 173 of file common.cxx.

int CTF_int::free_cond ( void *  ptr)
int CTF_int::gcd ( int  a,
int  b 
)
inline

Definition at line 334 of file util.h.

References ctf.core::a.

Referenced by lcm().

template<typename dtype >
void CTF_int::gemm ( char  tA,
char  tB,
int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  A,
dtype const *  B,
dtype  beta,
dtype *  C 
)
template<typename dtype >
void CTF_int::gemm_batch ( char  taA,
char  taB,
int  l,
int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  A,
dtype const *  B,
dtype  beta,
dtype *  C 
)

Definition at line 15 of file semiring.cxx.

References ctf.core::dtype, and get_grp_ptrs().

Referenced by get_grp_ptrs(), and CTF_int::algstrct::has_mul().

std::vector<Model*>& CTF_int::get_all_models ( )
std::vector< topology* > CTF_int::get_all_topos ( CommData  cdt,
int  n_uf,
int const *  uniq_fact,
int const *  mults,
int  n_prepend,
int const *  prelens 
)

computes all unique factorizations into non-primes each yielding a topology, prepending additional factors as specified

Parameters
[in]cdtglobal communicator
[in]n_ufnumber of unique prime factors
[in]uniq_factlist of prime factors
[in]n_prependnumber of factors to prepend
[in]mults?
[in]prelensfactors to prepend
Returns
lens vector of factorizations

Definition at line 410 of file topology.cxx.

References ASSERT, and CTF_int::topology::topology().

Referenced by get_generic_topovec().

int CTF_int::get_best_topo ( int64_t  nvirt,
int  topo,
CommData  global_comm,
int64_t  bcomm_vol = 0,
int64_t  bmemuse = 0 
)

get the best topologoes (least nvirt) over all procs

Parameters
[in]nvirtbest virtualization achieved by this proc
[in]topotopology index corresponding to best virtualization
[in]global_commis the global communicator
[in]bcomm_volbest comm volume computed
[in]bmemusebest memory usage computed return virtualization factor

Definition at line 591 of file topology.cxx.

References ASSERT, and CTF_int::CommData::cm.

Referenced by CTF_int::scaling::execute(), CTF_int::summation::is_equal(), and CTF_int::tensor::set_zero().

void CTF_int::get_choice ( int64_t  n,
int64_t  k,
int64_t  ch,
int *  chs 
)

Definition at line 289 of file util.cxx.

References calc_idx_arr(), NS, and SH.

Referenced by coalesce_bwd(), and get_len_ordering().

int64_t CTF_int::get_coo_size ( int64_t  nnz,
int  val_size 
)
int64_t CTF_int::get_csr_size ( int64_t  nnz,
int  nrow,
int  val_size 
)

computes the size of a serialized CSR matrix

Parameters
[in]nnznumber of nonzeros in matrix
[in]nrownumber of rows in matrix
[in]val_sizesize of each matrix entry

Definition at line 8 of file csr.cxx.

References ALIGN.

Referenced by CTF_int::CSR_Matrix::CSR_Matrix(), CTF_int::CSR_Matrix::partition(), CTF_int::CSR_Matrix::size(), and CTF_int::tensor::spmatricize().

template<typename dtype >
constexpr bool CTF_int::get_default_is_ord ( )

Definition at line 234 of file set.h.

template<typename dtype >
MPI_Op CTF_int::get_default_maddop ( )

Definition at line 28 of file monoid.h.

References default_mxpy(), and ctf.core::dtype.

template<>
MPI_Op CTF_int::get_default_maddop< bool > ( )
inline

Definition at line 38 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< char > ( )
inline

Definition at line 37 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< double > ( )
inline

Definition at line 44 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< float > ( )
inline

Definition at line 43 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< int > ( )
inline

Definition at line 39 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< int64_t > ( )
inline

Definition at line 40 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< long double > ( )
inline

Definition at line 45 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< std::complex< double > > ( )
inline

Definition at line 47 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< std::complex< float > > ( )
inline

Definition at line 46 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< uint64_t > ( )
inline

Definition at line 42 of file monoid.h.

template<>
MPI_Op CTF_int::get_default_maddop< unsigned int > ( )
inline

Definition at line 41 of file monoid.h.

template<typename dtype >
MPI_Datatype CTF_int::get_default_mdtype ( bool &  is_custom)
template<>
MPI_Datatype CTF_int::get_default_mdtype< bool > ( bool &  is_custom)
inline

Definition at line 207 of file set.h.

References MPI_CTF_BOOL.

template<>
MPI_Datatype CTF_int::get_default_mdtype< char > ( bool &  is_custom)
inline

Definition at line 213 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< double > ( bool &  is_custom)
inline

Definition at line 225 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< float > ( bool &  is_custom)
inline

Definition at line 223 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< int > ( bool &  is_custom)
inline

Definition at line 215 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< int64_t > ( bool &  is_custom)
inline

Definition at line 217 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< long double > ( bool &  is_custom)
inline

Definition at line 227 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< std::complex< double > > ( bool &  is_custom)
inline

Definition at line 209 of file set.h.

References MPI_CTF_DOUBLE_COMPLEX.

template<>
MPI_Datatype CTF_int::get_default_mdtype< std::complex< float > > ( bool &  is_custom)
inline

Definition at line 229 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< std::complex< long double > > ( bool &  is_custom)
inline

Definition at line 211 of file set.h.

References MPI_CTF_LONG_DOUBLE_COMPLEX.

template<>
MPI_Datatype CTF_int::get_default_mdtype< uint64_t > ( bool &  is_custom)
inline

Definition at line 221 of file set.h.

template<>
MPI_Datatype CTF_int::get_default_mdtype< unsigned int > ( bool &  is_custom)
inline

Definition at line 219 of file set.h.

int CTF_int::get_distribution_size ( int  order)
inline
algstrct const * CTF_int::get_double_ring ( )

Definition at line 10 of file ring.cxx.

References double_ring.

Referenced by CTF_int::Term::operator double().

algstrct const * CTF_int::get_float_ring ( )

Definition at line 6 of file ring.cxx.

References float_ring.

Referenced by CTF_int::Term::operator float().

int64_t CTF_int::get_flops ( )
template<typename dtype >
const char* CTF_int::get_fmt ( )

return format string for templated type

Definition at line 6 of file graph_io_aux.cxx.

References IASSERT.

template<>
const char* CTF_int::get_fmt< double > ( )
inline

Definition at line 18 of file graph_io_aux.cxx.

template<>
const char* CTF_int::get_fmt< float > ( )
inline

Definition at line 13 of file graph_io_aux.cxx.

template<>
const char* CTF_int::get_fmt< int > ( )
inline

Definition at line 23 of file graph_io_aux.cxx.

template<>
const char* CTF_int::get_fmt< int64_t > ( )
inline

Definition at line 28 of file graph_io_aux.cxx.

std::vector< topology * > CTF_int::get_generic_topovec ( CommData  cdt)

computes all topology configurations given undelying physical topology information

Parameters
[in]cdtglobal communicator

Definition at line 449 of file topology.cxx.

References cdealloc(), DPRINTF, factorize(), get_all_topos(), CTF_int::CommData::np, CTF_int::CommData::rank, and CTF_int::topology::topology().

Referenced by CTF::World::~World().

int CTF_int::get_glb ( int  i,
int  s,
int  t 
)
inline

Definition at line 9 of file dgtog_calc_cnt.cxx.

Referenced by calc_sy_pfx(), calc_sy_pfx< 1 >(), and precompute_offsets().

template<typename dtype >
dtype** CTF_int::get_grp_ptrs ( int64_t  grp_sz,
int64_t  ngrp,
dtype const *  data 
)

Definition at line 110 of file semiring.h.

References alloc(), default_gemm< float >(), ctf.core::dtype, gemm(), and gemm_batch().

Referenced by gemm_batch().

template<typename dtype >
void CTF_int::get_imag ( tensor A,
tensor B 
)

function that get the imaginary part from complex numbers

Parameters
[in]Atensor, param[in] B tensor stores the imaginary part from tensor A
Returns
None

Definition at line 60 of file ctf_ext.cxx.

References ctf.core::a, ctf.core::dtype, and CTF_int::tensor::order.

template void CTF_int::get_imag< double > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::get_imag< float > ( tensor A,
tensor B 
)

Referenced by conv_type().

algstrct const * CTF_int::get_int64_t_ring ( )

Definition at line 18 of file ring.cxx.

References int64_t_ring.

Referenced by CTF_int::Term::operator int64_t().

algstrct const * CTF_int::get_int_ring ( )

Definition at line 14 of file ring.cxx.

References int_ring.

Referenced by CTF_int::Term::operator int().

void CTF_int::get_len_ordering ( tensor const *  A,
tensor const *  B,
tensor const *  C,
int const *  idx_A,
int const *  idx_B,
int const *  idx_C,
int **  new_ordering_A,
int **  new_ordering_B,
int **  new_ordering_C 
)

find ordering of indices of tensor to reduce to DGEMM (A, B, and C may be permuted

Parameters
[in]Atensor 1
[in]Btensor 2
[in]Ctensor 3
[in]idx_Aindices of tensor 1
[in]idx_Bindices of tensor 2
[in]idx_Cindices of tensor 3
[out]new_ordering_Athe new ordering for indices of A
[out]new_ordering_Bthe new ordering for indices of B
[out]new_ordering_Cthe new ordering for indices of C

Definition at line 451 of file contraction.cxx.

References CTF_int::contraction::A, ctf.core::a, ABORT, CTF_int::topology::activate(), CTF_int::algstrct::add(), CTF_int::algstrct::addid(), CTF_int::algstrct::addinv(), align_symmetric_indices(), alloc(), CTF_int::algstrct::alloc(), alloc_ptr(), CTF_int::contraction::alpha, ASSERT, CTF_int::mapping::aug_phys(), CTF_int::mapping::aug_virt(), CTF_int::contraction::B, CTF_int::contraction::beta, CTF_int::contraction::C, calc_dim(), calc_fold_lnmk(), CTF_int::tensor::calc_npe(), CTF_int::tensor::calc_nvirt(), CTF_int::mapping::calc_phase(), CTF_int::mapping::calc_phys_phase(), can_morph(), cdealloc(), CTF_int::mapping::cdt, CTF::World::cdt, CTF_int::ctr_2d_general::cdt_A, CTF_int::spctr_2d_general::cdt_A, CTF_int::ctr_2d_general::cdt_B, CTF_int::spctr_2d_general::cdt_B, CTF_int::ctr_2d_general::cdt_C, CTF_int::spctr_2d_general::cdt_C, check_self_mapping(), CTF_int::mapping::child, choose(), CTF_int::mapping::clear(), CTF_int::tensor::clear_mapping(), CTF_int::CommData::cm, CTF::World::comm, comp_dim_map(), contract_mst(), conv_idx< int >(), CTF_int::algstrct::copy(), copy_mapping(), ctr_2d_gen_build(), CTF_int::ctr_2d_general::ctr_lda_A, CTF_int::spctr_2d_general::ctr_lda_A, CTF_int::spctr_2d_general::ctr_lda_B, CTF_int::ctr_2d_general::ctr_lda_B, CTF_int::spctr_2d_general::ctr_lda_C, CTF_int::ctr_2d_general::ctr_lda_C, CTF_int::spctr_2d_general::ctr_sub_lda_A, CTF_int::ctr_2d_general::ctr_sub_lda_A, CTF_int::spctr_2d_general::ctr_sub_lda_B, CTF_int::ctr_2d_general::ctr_sub_lda_B, CTF_int::spctr_2d_general::ctr_sub_lda_C, CTF_int::ctr_2d_general::ctr_sub_lda_C, CTF_int::tensor::data, CTF_int::topology::deactivate(), CTF_int::algstrct::dealloc(), DEBUG_PRINTF, desymmetrize(), CTF_int::topology::dim_comm, CTF_int::spctr_2d_general::dns_vrt_sz_A, CTF_int::spctr_2d_general::dns_vrt_sz_B, CTF_int::spctr_2d_general::dns_vrt_sz_C, DPRINTF, ctf.core::dtype, CTF_int::ctr_2d_general::edge_len, CTF_int::spctr_2d_general::edge_len, CTF_int::tensor::edge_map, CTF_int::accumulatable::el_size, ERROR, CTF_int::tensor::est_redist_time(), CTF_int::ctr::est_time_rec(), est_time_transp(), CTF_int::scaling::execute(), CTF_int::summation::execute(), CTF_int::contraction::execute(), CTF_int::tensor::extract_diag(), extract_free_comms(), get_choice(), CTF_int::tensor::get_redist_mem(), get_sym_perms(), CTF_int::mapping::has_child, CTF_int::algstrct::has_coo_ker, CTF_int::tensor::has_home, CTF_int::tensor::has_zero_edge_len, CTF_int::tensor::home_buffer, CTF_int::tensor::home_size, CTF_int::contraction::idx_A, CTF_int::contraction::idx_B, CTF_int::contraction::idx_C, CTF_int::tensor::inner_ordering, inv_idx(), CTF_int::tensor::is_cyclic, CTF_int::tensor::is_data_aliased, CTF_int::tensor::is_home, CTF_int::tensor::is_mapped, CTF_int::algstrct::is_offloadable(), CTF_int::tensor::is_sparse, CTF_int::algstrct::isequal(), CTF_int::iparam::k, lcm(), CTF_int::topology::lens, CTF_int::tensor::lens, CTF_int::iparam::m, map_symtsr(), map_tensor(), CTF_int::tensor::map_tensor_rem(), MAX, CTF_int::ctr::mem_rec(), MIN, morph_topo(), CTF_int::ctr_2d_general::move_A, CTF_int::spctr_2d_general::move_A, CTF_int::ctr_2d_general::move_B, CTF_int::spctr_2d_general::move_B, CTF_int::ctr_2d_general::move_C, CTF_int::spctr_2d_general::move_C, mst_alloc(), CTF_int::algstrct::mulid(), CTF_int::iparam::n, CTF_int::tensor::name, NEGATIVE, CTF_int::tensor::nnz_blk, CTF_int::tensor::nnz_tot, nosym_transpose(), NOT_MAPPED, CTF_int::mapping::np, CTF::World::np, CTF_int::CommData::np, ctf.core::np(), NS, CTF_int::ctr::num_lyr, CTF_int::iparam::offload, CTF_int::topology::order, CTF_int::tensor::order, overcounting_factor(), CTF_int::tensor::pad_edge_len, CTF_int::algstrct::pair_size(), permute_target(), PHYSICAL_MAP, CTF_int::ctr::print(), CTF_int::tensor::print_map(), proc_bytes_available(), CTF_int::tensor::profile, CTF_int::tensor::pull_alias(), CTF::World::rank, CTF_int::CommData::rank, CTF_int::ctr_virt::rec_ctr, CTF_int::spctr_replicate::rec_ctr, CTF_int::ctr_2d_general::rec_ctr, CTF_int::spctr_2d_general::rec_ctr, CTF_int::spctr_virt::rec_ctr, CTF_int::spctr_pin_keys::rec_ctr, CTF_int::ctr_replicate::rec_ctr, CTF_int::tensor::rec_tsr, CTF_int::tensor::redistribute(), CTF_int::tensor::remove_fold(), CTF_int::ctr::run(), CTF_int::algstrct::safecopy(), CTF_int::tensor::scale_diagonals(), CTF_int::tensor::self_reduce(), CTF_int::tensor::set_new_nnz_glb(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_sym(), CTF_int::tensor::set_zero(), sign(), CTF_int::tensor::size, CTF_int::tensor::sparsify(), CTF_int::tensor::sr, SUCCESS, SY, sy_packed_size(), CTF_int::tensor::sym, CTF_int::tensor::sym_table, symmetrize(), CTF_int::iparam::tA, TAU_FSTART, TAU_FSTOP, CTF_int::iparam::tB, CTF_int::iparam::tC, CTF_int::tensor::topo, CTF::World::topovec, CTF_int::mapping::type, CTF_int::tensor::unfold(), VIRTUAL_MAP, VPRINTF, CTF_int::tensor::wrld, and CTF_int::tensor::zero_out_padding().

int CTF_int::get_loc ( int  g,
int  s,
int  t 
)
inline
template<typename dtype >
MPI_Op CTF_int::get_maddop ( void(*)(int, dtype const *, dtype *)  fxpy)

Definition at line 50 of file monoid.h.

bool CTF_int::get_mpi_dt ( int64_t  count,
int64_t  datum_size,
MPI_Datatype &  dt 
)

gives a datatype for arbitrary datum_size, errors if exceeding 32-bits

Parameters
[in]countnumber of elements we want to communicate
[in]datum_sizeelement size
[in]dtnew datatype to pass to MPI routine
Returns
whether the datatype is custom and needs to be freed

Definition at line 587 of file common.cxx.

References ASSERT, and MPI_CTF_DOUBLE_COMPLEX.

Referenced by CTF_int::tspsum_replicate::run().

std::list<mem_loc>* CTF_int::get_mst ( )

Definition at line 163 of file memcontrol.cxx.

References mst.

int CTF_int::get_num_instances ( )

Definition at line 531 of file memcontrol.cxx.

References instance_counter.

Referenced by CTF::World::~World().

template<typename ptype >
void CTF_int::get_perm ( int  perm_order,
ptype  A,
ptype  B,
ptype  C,
ptype &  tA,
ptype &  tB,
ptype &  tC 
)

Definition at line 117 of file contraction.cxx.

topology * CTF_int::get_phys_topo ( CommData  glb_comm,
TOPOLOGY  mach 
)

get dimension and torus lengths of specified topology

Parameters
[in]glb_commcommunicator
[in]machspecified topology

Definition at line 94 of file topology.cxx.

References alloc(), cdealloc(), ctf.core::dim, factorize(), MIN, NO_TOPOLOGY, CTF_int::CommData::np, ctf.core::np(), CTF_int::topology::order, CTF_int::topology::topology(), TOPOLOGY_8D, TOPOLOGY_BGP, TOPOLOGY_BGQ, and TOPOLOGY_GENERIC.

Referenced by CTF::World::~World().

template<typename dtype >
void CTF_int::get_real ( tensor A,
tensor B 
)

function that get the real part from complex numbers

Parameters
[in]Atensor, param[in] B tensor stores the real part from tensor A
Returns
None

Definition at line 51 of file ctf_ext.cxx.

References ctf.core::a, ctf.core::dtype, and CTF_int::tensor::order.

template void CTF_int::get_real< double > ( tensor A,
tensor B 
)

Referenced by conv_type().

template void CTF_int::get_real< float > ( tensor A,
tensor B 
)

Referenced by conv_type().

void CTF_int::get_sym_perms ( summation const &  sum,
std::vector< summation > &  perms,
std::vector< int > &  signs 
)

finds all permutations of a summation that must be done for a broken symmetry

Parameters
[in]sumsummation specification
[out]permsthe permuted summation specifications
[out]signssign of each summation

Definition at line 647 of file symmetrization.cxx.

References CTF_int::summation::A, add_sym_perm(), AS, CTF_int::summation::B, CTF_int::summation::idx_A, CTF_int::summation::idx_B, NS, CTF_int::tensor::order, sign(), and CTF_int::tensor::sym.

Referenced by CTF_int::summation::estimate_time(), and get_len_ordering().

void CTF_int::get_sym_perms ( contraction const &  ctr,
std::vector< contraction > &  perms,
std::vector< int > &  signs 
)

finds all permutations of a contraction that must be done for a broken symmetry

Parameters
[in]ctrcontraction specification
[out]permsthe permuted contraction specifications
[out]signssign of each contraction

Definition at line 691 of file symmetrization.cxx.

References CTF_int::contraction::A, add_sym_perm(), AS, CTF_int::contraction::B, CTF_int::contraction::C, CTF_int::contraction::idx_A, CTF_int::contraction::idx_B, CTF_int::contraction::idx_C, NS, CTF_int::tensor::order, sign(), and CTF_int::tensor::sym.

int64_t CTF_int::getTotalSystemMemory ( )
inline

Definition at line 155 of file util.h.

char * CTF_int::glb_cyclic_reshuffle ( int const *  sym,
distribution const &  old_dist,
int const *  old_offsets,
int *const *  old_permutation,
distribution const &  new_dist,
int const *  new_offsets,
int *const *  new_permutation,
char **  ptr_tsr_data,
char **  ptr_tsr_cyclic_data,
algstrct const *  sr,
CommData  ord_glb_comm,
bool  reuse_buffers,
char const *  alpha,
char const *  beta 
)

Goes from any set of phases to any new set of phases.

Parameters
[in]symsymmetry relations between tensor dimensions
[in]old_diststarting data distrubtion
[in]old_offsetsold offsets of each tensor edge (corner 1 of slice)
[in]old_permutationpermutation array for each edge length (no perm if NULL)
[in]new_disttarget data distrubtion
[in]new_offsetsold offsets of each tensor edge (corner 1 of slice)
[in]new_permutationpermutation array for each edge length (no perm if NULL)
[in]ptr_tsr_datastarting data buffer
[out]ptr_tsr_cyclic_datatarget data buffer
[in]sralgstrct defining data
[in]ord_glb_commcommunicator on which to redistribute
[in]reuse_buffersif 1: ptr_tsr_cyclic_data is allocated dynamically and ptr_tsr_data is overwritten with intermediate data if 0: ptr_tsr_cyclic_data is preallocated and can be scaled by beta, however, more memory is used for temp buffers
[in]alphascaling tensor for new data
[in]betascaling tensor for original data

Definition at line 879 of file glb_cyclic_reshuffle.cxx.

References CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), alloc_ptr(), ASSERT, calc_cnt_displs(), cdealloc(), compute_bucket_offsets(), CTF_int::algstrct::copy(), cyclic_reshuffle(), ctf.core::dim, CTF_int::accumulatable::el_size, glb_ord_pup(), CTF_int::distribution::is_cyclic, CTF_int::algstrct::isequal(), MAX, mst_alloc_ptr(), CTF_int::algstrct::mulid(), CTF_int::CommData::np, ctf.core::np(), CTF_int::distribution::order, order_globally(), CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, CTF_int::CommData::rank, CTF_int::distribution::size, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.

void CTF_int::glb_ord_pup ( int const *  sym,
distribution const &  old_dist,
distribution const &  new_dist,
int const *  len,
int const *  old_phys_dim,
int const *  old_phys_edge_len,
int const *  old_virt_edge_len,
int64_t  old_virt_nelem,
int const *  old_offsets,
int *const *  old_permutation,
int  total_np,
int const *  new_phys_dim,
int const *  new_phys_edge_len,
int const *  new_virt_edge_len,
int64_t  new_virt_nelem,
char *  old_data,
char **  new_data,
int  forward,
int *const *  bucket_offset,
char const *  alpha,
char const *  beta,
algstrct const *  sr 
)
void CTF_int::handler ( )

Definition at line 181 of file common.cxx.

References ctf.core::array(), and ctf.core::rank().

void CTF_int::host_pinned_alloc ( void **  ptr,
int64_t  size 
)

allocate a pinned host buffer

Parameters
[out]ptrpointer to define
[in]sizeamount of buffer space to allocate

Referenced by CTF_int::ctr_2d_general::run(), and CTF_int::spctr_2d_general::run().

void CTF_int::host_pinned_free ( void *  ptr)

free a pinned host buffer

Parameters
[in]ptrpointer to free

Referenced by CTF_int::ctr_2d_general::run(), and CTF_int::spctr_2d_general::run().

bool CTF_int::hptt_is_applicable ( int  order,
int const *  new_order,
int  elementSize 
)

Checks if the HPTT library is applicable.

Parameters
[in]orderdimension of tensor
[in]new_ordernew ordering of dimensions
[in]elementSizeelement size

Definition at line 319 of file nosym_transp.cxx.

Referenced by nosym_transpose().

void CTF_int::inc_tot_mem_used ( int64_t  a)

Definition at line 80 of file memcontrol.cxx.

References ctf.core::a, and ASSERT.

Referenced by CTF_int::tensor::deregister_size(), and CTF_int::tensor::register_size().

void CTF_int::init_rng ( int  rank)

initialized random number generator

Parameters
[in]rankprocessor index

Definition at line 23 of file common.cxx.

References rng.

Referenced by ctf.random::all_seed(), ctf.random::seed(), and CTF::World::~World().

void CTF_int::inv_idx ( int  order_A,
int const *  idx_A,
int *  order_tot,
int **  idx_arr 
)

invert index map

Parameters
[in]order_Anumber of dimensions of A
[in]idx_Aindex map of A
[in]order_Bnumber of dimensions of B
[in]idx_Bindex map of B
[out]order_totnumber of total dimensions
[out]idx_arr2*order_tot index array
[in]order_Anumber of dimensions of A
[in]idx_Aindex map of A
[out]order_totnumber of total dimensions
[out]idx_arr2*ndim_tot index array

Definition at line 19 of file sym_seq_scl.cxx.

References alloc().

void CTF_int::inv_idx ( int  order_A,
int const *  idx_A,
int  order_B,
int const *  idx_B,
int  order_C,
int const *  idx_C,
int *  order_tot,
int **  idx_arr 
)
void CTF_int::inv_idx ( int  order_A,
int const *  idx_A,
int  order_B,
int const *  idx_B,
int *  order_tot,
int **  idx_arr 
)

invert index map

Parameters
[in]order_Anumber of dimensions of A
[in]idx_Aindex map of A
[in]order_Bnumber of dimensions of B
[in]idx_Bindex map of B
[out]order_totnumber of total dimensions
[out]idx_arr2*order_tot index array

Definition at line 913 of file spsum_tsr.cxx.

References alloc().

int CTF_int::lcm ( int  a,
int  b 
)
inline
void CTF_int::lda_cpy ( int  el_size,
int  nrow,
int  ncol,
int  lda_A,
int  lda_B,
const char *  A,
char *  B 
)
inline

Copies submatrix to submatrix (column-major)

Parameters
[in]el_sizeelement size
[in]nrownumber of rows
[in]ncolnumber of columns
[in]lda_Alda along rows for A
[in]lda_Blda along rows for B
[in]Amatrix to read from
[in,out]Bmatrix to write to

Definition at line 355 of file util.h.

Referenced by CTF_int::LinModel< nparam >::update().

void CTF_int::load_all_models ( std::string  file_name)

Definition at line 34 of file model.cxx.

References get_all_models().

Referenced by CTF_int::Model::dump_data(), and CTF::World::~World().

int CTF_int::map_self_indices ( tensor const *  tsr,
int const *  idx_map 
)

create virtual mapping for idx_maps that have repeating indices

Parameters
[in]tsrtensor handle
[in]idx_mapmapping of tensor indices to contraction map

Definition at line 423 of file mapping.cxx.

References alloc_ptr(), cdealloc(), CTF_int::tensor::edge_map, CTF_int::mapping::has_child, map_symtsr(), NOT_MAPPED, CTF_int::mapping::np, CTF_int::tensor::order, SUCCESS, CTF_int::tensor::sym_table, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by CTF_int::scaling::execute(), and CTF_int::summation::is_equal().

int CTF_int::map_symtsr ( int  tsr_order,
int const *  tsr_sym_table,
mapping tsr_edge_map 
)

adjust a mapping to maintan symmetry

Parameters
[in]tsr_orderis the number of dimensions of the tensor
[in]tsr_sym_tablethe symmetry table of a tensor
[in,out]tsr_edge_mapis the mapping
Returns
CTF::SUCCESS if mapping successful, CTF::NEGATIVE if not, CTF::ERROR if err'ed out

Definition at line 470 of file mapping.cxx.

References ASSERT, CTF_int::mapping::calc_phase(), CTF_int::mapping::child, CTF_int::mapping::has_child, lcm(), CTF_int::mapping::mapping(), MAX_PHASE, MAXLOOP, NEGATIVE, NOT_MAPPED, CTF_int::mapping::np, PHYSICAL_MAP, SUCCESS, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by get_len_ordering(), map_self_indices(), and map_tensor().

int CTF_int::map_tensor ( int  num_phys_dims,
int  tsr_order,
int const *  tsr_edge_len,
int const *  tsr_sym_table,
int *  restricted,
CommData phys_comm,
int const *  comm_idx,
int  fill,
mapping tsr_edge_map 
)

map a tensor

Parameters
[in]num_phys_dimsnumber of physical processor grid dimensions
[in]tsr_ordernumber dims
[in]tsr_edge_lenedge lengths of the tensor
[in]tsr_sym_tablethe symmetry table of a tensor
[in,out]restrictedan array used to restricted the mapping of tensor dims
[in]phys_commdimensional communicators
[in]comm_idxdimensional ordering
[in]fillif set does recursive mappings and uses all phys dims
[in,out]tsr_edge_mapmapping of tensor
Returns
CTF_SUCCESS if mapping successful, CTF_NEGATIVE if not, CTF_ERROR if err'ed out

Definition at line 244 of file mapping.cxx.

References CTF_int::mapping::calc_phys_phase(), CTF_int::mapping::cdt, CTF_int::mapping::child, CTF_int::mapping::has_child, lcm(), map_symtsr(), CTF_int::mapping::mapping(), MAX_PHASE, NEGATIVE, NOT_MAPPED, CTF_int::mapping::np, CTF_int::CommData::np, PHYSICAL_MAP, SUCCESS, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by get_len_ordering(), CTF_int::summation::is_equal(), CTF_int::tensor::map_tensor_rem(), and CTF_int::tensor::set_zero().

void CTF_int::matrix_qr ( tensor A,
tensor Q,
tensor R 
)
void CTF_int::matrix_qr_cmplx ( tensor A,
tensor Q,
tensor R 
)
void CTF_int::matrix_svd ( tensor A,
tensor U,
tensor S,
tensor VT,
int  rank 
)
void CTF_int::matrix_svd_cmplx ( tensor A,
tensor U,
tensor S,
tensor VT,
int  rank 
)
void CTF_int::mem_create ( )

create instance of memory manager

Definition at line 187 of file memcontrol.cxx.

References max_threads.

Referenced by CTF::World::~World().

void CTF_int::mem_exit ( int  rank)

exit instance of memory manager

Parameters
[in]rankprocessor index

Definition at line 207 of file memcontrol.cxx.

References max_threads, and mst_buffer_ptr.

Referenced by CTF::World::~World().

void CTF_int::morph_topo ( topology const *  new_topo,
topology const *  old_topo,
int  order,
mapping edge_map 
)

morphs a tensor topology into another

Parameters
[in]new_topotopology to change to
[in]old_topotopology we are changing from
[in]ordernumber of tensor dimensions
[in,out]edge_mapmapping whose topology mapping we are changing

Definition at line 700 of file topology.cxx.

References alloc_ptr(), ASSERT, cdealloc(), CTF_int::mapping::cdt, CTF_int::mapping::child, CTF_int::mapping::clear(), CTF_int::topology::dim_comm, CTF_int::mapping::has_child, CTF_int::topology::lda, CTF_int::mapping::np, CTF_int::CommData::np, ctf.core::np(), CTF_int::topology::order, PHYSICAL_MAP, CTF_int::mapping::type, and VIRTUAL_MAP.

Referenced by get_len_ordering().

void * CTF_int::mst_alloc ( int64_t const  len)

mst_alloc allocates buffer on the specialized memory stack

Parameters
[in]lennumber of bytes

Definition at line 307 of file memcontrol.cxx.

References ASSERT, mst_alloc_ptr(), and SUCCESS.

Referenced by contract_mst(), and get_len_ordering().

int CTF_int::mst_alloc_ptr ( int64_t const  len,
void **const  ptr 
)
void CTF_int::mst_create ( int64_t  size)

initializes stack buffer

Definition at line 170 of file memcontrol.cxx.

References ALIGN_BYTES, and ASSERT.

Referenced by CTF::World::~World().

int CTF_int::mst_free ( void *  ptr)

frees buffer allocated on stack

Parameters
[in]ptrpointer to buffer on stack

Definition at line 234 of file memcontrol.cxx.

References ABORT, ASSERT, ERROR, ctf.core::it, and SUCCESS.

Referenced by cdealloc().

template<typename dtype >
void CTF_int::muladd_csrmm ( int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  A,
int const *  JA,
int const *  IA,
int  nnz_A,
dtype const *  B,
dtype  beta,
dtype *  C 
)

Definition at line 332 of file semiring.cxx.

References ctf.core::dtype, and muladd_csrmultd().

Referenced by default_coomm< std::complex< double > >().

template<typename dtype >
void CTF_int::muladd_csrmultd ( int  m,
int  n,
int  k,
dtype const *  A,
int const *  JA,
int const *  IA,
int  nnz_A,
dtype const *  B,
int const *  JB,
int const *  IB,
int  nnz_B,
dtype *  C 
)

Definition at line 370 of file semiring.cxx.

References CTF::Semiring< dtype, is_ord >::default_csrmm().

Referenced by muladd_csrmm().

void CTF_int::nosym_transpose ( int  order,
int const *  new_order,
int const *  edge_len,
char *  data,
int  dir,
algstrct const *  sr 
)

transposes a non-symmetric (folded) tensor

Parameters
[in]orderdimension of tensor
[in]new_ordernew ordering of dimensions
[in]edge_lenoriginal edge lengths
[in,out]datadata tp transpose
[in]dirwhich way are we going?
[in]sralgstrct defining element size

Definition at line 519 of file nosym_transp.cxx.

References alloc_ptr(), cdealloc(), CTF_int::accumulatable::el_size, MIN, nosym_transpose(), and TAU_FSTOP.

void CTF_int::nosym_transpose ( int  order,
int const *  new_order,
int const *  edge_len,
char const *  data,
int  dir,
int  max_ntd,
char **  tswap_data,
int64_t *  chunk_size,
algstrct const *  sr 
)

transposes a non-symmetric (folded) tensor internal kernel

Parameters
[in]orderdimension of tensor
[in]new_ordernew ordering of dimensions
[in]edge_lenoriginal edge lengths
[in]datadata tp transpose
[in]dirwhich way are we going?
[in]max_ntdhow many threads to use
[out]tswap_datatranposed data
[out]chunk_sizechunk sizes of tranposed data
[in]sralgstrct defining element size

Definition at line 586 of file nosym_transp.cxx.

References alloc_ptr(), ASSERT, cdealloc(), CTF_int::algstrct::copy(), CTF_int::accumulatable::el_size, TAU_FSTART, and TAU_FSTOP.

void CTF_int::nosym_transpose_hptt ( int  order,
int const *  edge_len,
int  dir,
tensor *&  A 
)

High-performance implementation of nosym_transpose using HPTT.

Parameters
[in]orderdimension of tensor
[in]edge_lenoriginal edge lengths
[in]dirwhich way are we going?
[in,out]Atensor to be transposed
void CTF_int::nosym_transpose_hptt ( int  order,
int const *  st_new_order,
int const *  st_edge_len,
int  dir,
char const *  st_buffer,
char *  new_buffer,
algstrct const *  sr 
)

Definition at line 332 of file nosym_transp.cxx.

References ABORT, CTF_int::accumulatable::el_size, and MIN.

Referenced by nosym_transpose().

void CTF_int::offload_exit ( )

exit offloading, e.g. destroy cublas

Referenced by CTF::World::~World().

template<typename dtype >
void CTF_int::offload_gemm ( char  tA,
char  tB,
int  m,
int  n,
int  k,
dtype  alpha,
offload_tsr A,
int  lda_A,
offload_tsr B,
int  lda_B,
dtype  beta,
offload_tsr C,
int  lda_C 
)
template<typename dtype >
void CTF_int::offload_gemm ( char  tA,
char  tB,
int  m,
int  n,
int  k,
dtype  alpha,
dtype const *  dev_A,
int  lda_A,
dtype const *  dev_B,
int  lda_B,
dtype  beta,
dtype *  dev_C,
int  lda_C 
)
void CTF_int::offload_init ( )

initialize offloading, e.g. create cublas

Referenced by CTF::World::~World().

template<int idim>
void CTF_int::ord_glb ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr,
int  prev_idx = 0,
int64_t  glb_ord_offset = 0,
int64_t  blk_ord_offset = 0 
)

Definition at line 533 of file glb_cyclic_reshuffle.cxx.

References NS, and CTF_int::distribution::virt_phase.

Referenced by ord_glb_omp().

template<>
void CTF_int::ord_glb< 0 > ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr,
int  prev_idx,
int64_t  glb_ord_offset,
int64_t  blk_ord_offset 
)
inline

Definition at line 566 of file glb_cyclic_reshuffle.cxx.

References NS, and ord_glb< 7 >().

Referenced by ord_glb_omp< 0 >(), and order_globally().

template void CTF_int::ord_glb< 7 > ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr,
int  prev_idx,
int64_t  glb_ord_offset,
int64_t  blk_ord_offset 
)

Referenced by ord_glb< 0 >().

template<int idim>
void CTF_int::ord_glb_omp ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr,
int const *  idx_st,
int const *  idx_end,
int  prev_idx = 0,
int64_t  glb_ord_offset = 0,
int64_t  blk_ord_offset = 0 
)

Definition at line 606 of file glb_cyclic_reshuffle.cxx.

References MIN, NS, ord_glb(), and CTF_int::distribution::virt_phase.

template<>
void CTF_int::ord_glb_omp< 0 > ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr,
int const *  idx_st,
int const *  idx_end,
int  prev_idx,
int64_t  glb_ord_offset,
int64_t  blk_ord_offset 
)

Definition at line 655 of file glb_cyclic_reshuffle.cxx.

References ord_glb< 0 >(), and ord_glb_omp< 7 >().

template void CTF_int::ord_glb_omp< 7 > ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr,
int const *  idx_st,
int const *  idx_end,
int  prev_idx,
int64_t  glb_ord_offset,
int64_t  blk_ord_offset 
)

Referenced by ord_glb_omp< 0 >().

void CTF_int::order_globally ( int const *  sym,
distribution const &  dist,
int const *  virt_edge_len,
int const *  virt_phase_lda,
int64_t  vbs,
bool  dir,
char const *  tsr_data_in,
char *  tsr_data_out,
algstrct const *  sr 
)

reorder local buffer so that elements are in ordered according to where they are in the global tensor (interleave virtual blocks)

Parameters
[in]symsymmetry relations between tensor dimensions
[in]distdistribution of data
[in]virt_edge_lendimensions of each block
[in]virt_phase_ldaprefix sum of virtual blocks
[in]vbssize of virtual blocks
[in]dirif 1 then go to global layout, if 0 than from
[in]tsr_data_instarting data buffer
[out]tsr_data_outtarget data buffer
[in]sralgstrct defining data

Definition at line 689 of file glb_cyclic_reshuffle.cxx.

References alloc(), ASSERT, CASE_ORD_GLB, cdealloc(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, MIN, NS, ord_glb< 0 >(), CTF_int::distribution::order, sy_calc_idx_arr(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.

Referenced by glb_cyclic_reshuffle().

void CTF_int::order_perm ( tensor const *  A,
tensor const *  B,
int *  idx_arr,
int  off_A,
int  off_B,
int *  idx_A,
int *  idx_B,
int &  add_sign,
int &  mod 
)

orders the summation indices of one tensor that don't break summation symmetries

Parameters
[in]A
[in]B
[in]idx_arrinverted summation index map
[in]off_Aoffset of A in inverted index map
[in]off_Boffset of B in inverted index map
[in]idx_Aindex map of A
[in]idx_Bindex map of B
[in,out]add_signsign of contraction
[in,out]mod1 if sum is permuted

Definition at line 440 of file symmetrization.cxx.

References AS, ASSERT, MAX, MIN, NS, CTF_int::tensor::order, and CTF_int::tensor::sym.

void CTF_int::order_perm ( tensor const *  A,
tensor const *  B,
tensor const *  C,
int *  idx_arr,
int  off_A,
int  off_B,
int  off_C,
int *  idx_A,
int *  idx_B,
int *  idx_C,
int &  add_sign,
int &  mod 
)

orders the contraction indices of one tensor that don't break contraction symmetries

Parameters
[in]A
[in]B
[in]C
[in]idx_arrinverted contraction index map
[in]off_Aoffset of A in inverted index map
[in]off_Boffset of B in inverted index map
[in]off_Coffset of C in inverted index map
[in]idx_Aindex map of A
[in]idx_Bindex map of B
[in]idx_Cindex map of C
[in,out]add_signsign of contraction
[in,out]mod1 if permutation done

Definition at line 492 of file symmetrization.cxx.

References AS, MAX, MIN, NS, CTF_int::tensor::order, and CTF_int::tensor::sym.

Referenced by add_sym_perm().

int64_t CTF_int::packed_size ( int  order,
const int *  len,
const int *  sym 
)

computes the size of a tensor in packed symmetric (SY, SH, or AS) layout

Parameters
[in]ordertensor dimension
[in]lentensor edge _elngths
[in]symtensor symmetries
Returns
size of tensor in packed layout

Definition at line 38 of file util.cxx.

Referenced by calc_idx_arr(), CTF_int::tensor::get_tot_size(), glb_ord_pup(), pad_cyclic_pup_virt_buff(), CTF_int::tensor::read_dense_from_file(), and CTF_int::tensor::write_dense_to_file().

void CTF_int::pad_cyclic_pup_virt_buff ( int const *  sym,
distribution const &  old_dist,
distribution const &  new_dist,
int const *  len,
int const *  old_phys_dim,
int const *  old_phys_edge_len,
int const *  old_virt_edge_len,
int64_t  old_virt_nelem,
int const *  old_offsets,
int *const *  old_permutation,
int  total_np,
int const *  new_phys_dim,
int const *  new_phys_edge_len,
int const *  new_virt_edge_len,
int64_t  new_virt_nelem,
char *  old_data,
char **  new_data,
int  forward,
int *const *  bucket_offset,
char const *  alpha,
char const *  beta,
algstrct const *  sr 
)
Parameters
[in]symsymmetry relations between tensor dimensions
[in]old_diststarting data distrubtion
[in]new_disttarget data distrubtion
[in]lennon-padded edge lengths of tensor
[in]old_phys_dimedge lengths of the old processor grid
[in]old_phys_edge_lenthe old tensor processor block lengths
[in]old_virt_edge_lenthe old tensor block lengths
[in]old_virt_nelemthe old number of elements per block
[in]old_offsetsold offsets of each tensor edge (corner 1 of slice)
[in]old_permutationpermutation array for each edge length (no perm if NULL)
[in]total_npthe total number of processors
[in]new_phys_dimedge lengths of the new processor grid
[in]new_phys_edge_lenthe new tensor processor block lengths
[in]new_virt_edge_lenthe new tensor block lengths
[in]new_virt_nelemthe new number of elements per block
[in,out]old_datathe previous set of values stored locally
[in,out]new_databuffers to fill with data to send to each process and virtual bucket
[in]forwardis 0 on the receiving side and reverses the role of all the previous parameters
[in]bucket_offsetoffsets for target index for each dimension
[in]alphascaling factor for received data
[in]betascaling factor for previous data
[in]sralgstrct defining elements and ops

Definition at line 8 of file cyclic_reshuffle.cxx.

References ABORT, CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), alloc_ptr(), ASSERT, calc_idx_arr(), cdealloc(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, CTF_int::algstrct::isequal(), MAX, MIN, mst_alloc_ptr(), CTF_int::algstrct::mulid(), NS, CTF_int::distribution::order, packed_size(), CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, ctf.core::rank(), SY, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.

Referenced by cyclic_reshuffle().

void CTF_int::pad_key ( int  order,
int64_t  num_pair,
int const *  edge_len,
int const *  padding,
PairIterator  pairs,
algstrct const *  sr,
int const *  offsets = NULL 
)

applies padding to keys

Parameters
[in]ordertensor dimension
[in]num_pairnumber of pairs
[in]edge_lentensor edge lengths
[in]paddingpadding of tensor (included in edge_len)
[in,out]pairsset of pairs which to pad
[in]sralgstrct defines sizeo of each pair
[in]offsets(default NULL, none applied), offsets keys

Definition at line 6 of file pad.cxx.

References CTF_int::PairIterator::k(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write_key().

Referenced by read_loc_pairs(), CTF_int::tensor::slice(), CTF_int::tensor::sparsify(), and wr_pairs_layout().

void CTF_int::padded_reshuffle ( int const *  sym,
distribution const &  old_dist,
distribution const &  new_dist,
char *  tsr_data,
char **  tsr_cyclic_data,
algstrct const *  sr,
CommData  ord_glb_comm 
)

Reshuffle elements using key-value pair read/write.

Parameters
[in]symsymmetry relations between tensor dimensions
[in]old_diststarting data distrubtion
[in]new_disttarget data distrubtion
[in]tsr_datastarting data buffer
[out]tsr_cyclic_datatarget data buffer
[in]sralgstrct defining data
[in]ord_glb_commcommunicator on which to redistribute

Definition at line 8 of file redist.cxx.

References CTF_int::algstrct::addid(), alloc_ptr(), cdealloc(), DEBUG_PRINTF, DPRINTF, CTF_int::accumulatable::el_size, CTF_int::algstrct::mulid(), CTF_int::CommData::np, CTF_int::distribution::order, CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, CTF_int::CommData::rank, read_loc_pairs(), CTF_int::algstrct::set(), CTF_int::distribution::size, sy_packed_size(), TAU_FSTART, TAU_FSTOP, CTF_int::distribution::virt_phase, and wr_pairs_layout().

Referenced by CTF_int::tensor::redistribute().

template<typename dtype >
void CTF_int::parse_sparse_tensor_data ( char **  lvals,
int  order,
dtype const *  pmulid,
int *  lens,
int64_t  nvals,
CTF::Pair< dtype > *  pairs,
bool  with_vals 
)

parse string containing sparse tensor into data

Parameters
[in]lvalsarray of string, one per line/entry, formatted as i1, ..., i_order v or i1, ..., i_order if with_vals=false
[in]ordernum modes in tensor
[in]pmulidpointer to multiplicative identity, used only if with_vals=false
[in]lensdimensions of tensor
[in]nvalsnumber of entries in lvals
[in]pairsarray of tensor index/value pairs to fill
[in]with_valswhether values are included in file

Definition at line 45 of file graph_io_aux.cxx.

References CTF::Pair< dtype >::d, ctf.core::dtype, and CTF::Pair< dtype >::k.

std::vector< topology * > CTF_int::peel_perm_torus ( topology phys_topology,
CommData  cdt 
)

folds specified topology and all of its permutations into all configurations of lesser dimensionality

Parameters
[in]phys_topologytopology to fold
[in]cdtglobal communicator

Definition at line 488 of file topology.cxx.

References find_topology(), CTF_int::topology::lens, CTF_int::topology::order, peel_torus(), and CTF_int::topology::topology().

Referenced by CTF::World::~World().

std::vector< topology * > CTF_int::peel_torus ( topology const *  topo,
CommData  glb_comm 
)

folds specified topology into all configurations of lesser dimensionality

Parameters
[in]topotopology to fold
[in]glb_commglobal communicator

Definition at line 537 of file topology.cxx.

References alloc(), cdealloc(), find_topology(), CTF_int::topology::lens, CTF_int::topology::order, and CTF_int::topology::topology().

Referenced by peel_perm_torus().

void CTF_int::permute ( int  order,
int const *  perm,
int *  arr 
)

permute an array

Parameters
ordernumber of elements
permpermutation array