Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
|
Data Structures | |
class | accumulatable |
abstract class that knows how to add More... | |
class | algstrct |
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction, virtual classes defined in derived typed classes or algstrctcpy More... | |
class | Bifun_Term |
class | bivar_function |
untyped internal class for triply-typed bivariate function More... | |
struct | BoolPair |
class | CommData |
struct | CompPair |
struct | CompPtrPair |
class | ConstPairIterator |
class | Contract_Term |
An experession representing a contraction of a set of tensors contained in operands. More... | |
class | contraction |
class for execution distributed contraction of tensors More... | |
class | COO_Matrix |
serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored, one of values, and two of row and column indices More... | |
class | CSR_Matrix |
abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout More... | |
class | ctr |
class | ctr_2d_general |
class | ctr_replicate |
class | ctr_virt |
class | CubicModel |
Cubic performance models, which given measurements, provides new model guess. More... | |
class | distribution |
class | endomorphism |
untyped internal class for singly-typed single variable function (Endomorphism) More... | |
class | grid_wrapper |
struct | int1 |
struct | int2 |
struct | IntPair |
struct | iparam |
class | LinModel |
Linear performance models, which given measurements, provides new model guess. More... | |
class | mapping |
struct | mem_loc |
struct | mem_transfer |
class | Model |
class | offload_arr |
offloaded array/buffer More... | |
class | offload_tsr |
offloaded and serialized tensor data More... | |
class | PairIterator |
class | scaling |
class for execution distributed scaling of a tensor More... | |
class | scl |
class | scl_virt |
class | seq_tsr_ctr |
class | seq_tsr_scl |
class | seq_tsr_spctr |
class | seq_tsr_spsum |
class | seq_tsr_sum |
struct | ShortPair |
class | spctr |
class | spctr_2d_general |
class | spctr_pin_keys |
class | spctr_replicate |
class | spctr_virt |
class | strp_ctr |
class | strp_scl |
class | strp_sum |
class | strp_tsr |
class | Sum_Term |
class | summation |
class for execution distributed summation of tensors More... | |
class | tensor |
internal distributed tensor class More... | |
struct | tensor_name_less |
comparison function for sets of tensor pointers This ensures the set iteration order is consistent across nodes More... | |
class | Term |
a term is an abstract object representing some expression of tensors More... | |
struct | time_param |
class | topology |
class | tspsum |
class | tspsum_map |
class | tspsum_permute |
class | tspsum_pin_keys |
class | tspsum_replicate |
performs replication along a dimension, generates 2.5D algs More... | |
class | tspsum_virt |
class | tsum |
class | tsum_replicate |
performs replication along a dimension, generates 2.5D algs More... | |
class | tsum_virt |
class | Unifun_Term |
class | univar_function |
untyped internal class for doubly-typed univariate function More... | |
Typedefs | |
typedef bool | TYPE1 |
typedef int | TYPE2 |
typedef int64_t | TYPE3 |
typedef float | TYPE4 |
typedef double | TYPE5 |
typedef std::complex< float > | TYPE6 |
typedef std::complex< double > | TYPE7 |
typedef int16_t | TYPE8 |
typedef int8_t | TYPE9 |
Enumerations | |
enum | { SUCCESS, ERROR, NEGATIVE } |
enum | map_type { NOT_MAPPED, PHYSICAL_MAP, VIRTUAL_MAP } |
enum | TOPOLOGY { TOPOLOGY_GENERIC, TOPOLOGY_BGP, TOPOLOGY_BGQ, TOPOLOGY_8D, NO_TOPOLOGY } |
Functions | |
void | update_all_models (MPI_Comm comm) |
void | factorize (int n, int *nfactor, int **factor) |
computes the size of a tensor in packed symmetric layout More... | |
template<typename ptype > | |
void | get_perm (int perm_order, ptype A, ptype B, ptype C, ptype &tA, ptype &tB, ptype &tC) |
void | calc_fold_lnmk (tensor const *A, tensor const *B, tensor const *C, int const *idx_A, int const *idx_B, int const *idx_C, int const *ordering_A, int const *ordering_B, iparam *inner_prm) |
calculate the dimensions of the matrix the contraction gets reduced to (A, B, and C may be permuted) More... | |
void | get_len_ordering (tensor const *A, tensor const *B, tensor const *C, int const *idx_A, int const *idx_B, int const *idx_C, int **new_ordering_A, int **new_ordering_B, int **new_ordering_C) |
find ordering of indices of tensor to reduce to DGEMM (A, B, and C may be permuted More... | |
int | ctr_2d_gen_build (int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_ctr_lda_A, int64_t &cg_ctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_ctr_lda_B, int64_t &cg_ctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_ctr_lda_C, int64_t &cg_ctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C) |
sets up a ctr_2d_general (2D SUMMA) level where A is not communicated function will be called with A/B/C permuted depending on desired alg More... | |
void | inv_idx (int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr) |
invert index map More... | |
template<int idim> | |
void | spA_dnB_dnC_ctrloop (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int64_t const *lda_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
template<> | |
void | spA_dnB_dnC_ctrloop< 0 > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int64_t const *lda_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
template<> | |
void | spA_dnB_dnC_ctrloop< MAX_ORD > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int64_t const *lda_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
void | spA_dnB_dnC_seq_ctr (char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, bivar_function const *func) |
char * | bcast_step (int edge_len, char *A, bool is_sparse_A, bool move_A, algstrct const *sr_A, int64_t b_A, int64_t s_A, char *buf_A, CommData *cdt_A, int64_t ctr_sub_lda_A, int64_t ctr_lda_A, int nblk_A, int64_t const *size_blk_A, int &new_nblk_A, int64_t *&new_size_blk_A, int64_t *offsets_A, int ib) |
char * | reduce_step_pre (int edge_len, char *C, bool is_sparse_C, bool move_C, algstrct const *sr_C, int64_t b_C, int64_t s_C, char *buf_C, CommData *cdt_C, int64_t ctr_sub_lda_C, int64_t ctr_lda_C, int nblk_C, int64_t const *size_blk_C, int &new_nblk_C, int64_t *&new_size_blk_C, int64_t *offsets_C, int ib, char const *&rec_beta) |
void | reduce_step_post (int edge_len, char *C, bool is_sparse_C, bool move_C, algstrct const *sr_C, int64_t b_C, int64_t s_C, char *buf_C, CommData *cdt_C, int64_t ctr_sub_lda_C, int64_t ctr_lda_C, int nblk_C, int64_t *size_blk_C, int &new_nblk_C, int64_t *&new_size_blk_C, int64_t *offsets_C, int ib, char const *&rec_beta, char const *beta, char *&up_C, char *&new_C, int n_new_C_grps, int &i_new_C_grp, char **new_C_grps) |
int | spctr_2d_gen_build (int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_spctr_lda_A, int64_t &cg_spctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_spctr_lda_B, int64_t &cg_spctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_spctr_lda_C, int64_t &cg_spctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C) |
template<int idim> | |
void | sym_seq_ctr_loop (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
template<> | |
void | sym_seq_ctr_loop< 0 > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
template void | sym_seq_ctr_loop< MAX_ORD > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, uint64_t *const *offsets_C, bivar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
void | compute_syoff (int r, int len, algstrct const *sr, int const *edge_len, int const *sym, uint64_t *offsets) |
void | compute_syoffs (algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, int tot_order, int const *rev_idx_map, uint64_t **&offsets_A, uint64_t **&offsets_B, uint64_t **&offsets_C) |
int | sym_seq_ctr_ref (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C) |
performs symmetric contraction with reference (unblocked) kernel More... | |
int | sym_seq_ctr_cust (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, bivar_function const *func) |
performs symmetric contraction with custom elementwise function More... | |
int | sym_seq_ctr_inr (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, char const *beta, char *C, algstrct const *sr_C, int order_C, int const *edge_len_C, int const *sym_C, int const *idx_map_C, iparam const *prm, bivar_function const *func) |
performs symmetric contraction with blocked gemm More... | |
void | init_rng (int rank) |
initialized random number generator More... | |
double | get_rand48 () |
returns new random number in [0,1) More... | |
template<typename type > | |
int | conv_idx (int order, type const *cidx, int **iidx) |
template<typename type > | |
int | conv_idx (int order_A, type const *cidx_A, int **iidx_A, int order_B, type const *cidx_B, int **iidx_B) |
template<typename type > | |
int | conv_idx (int order_A, type const *cidx_A, int **iidx_A, int order_B, type const *cidx_B, int **iidx_B, int order_C, type const *cidx_C, int **iidx_C) |
template int | conv_idx< int > (int, int const *, int **) |
template int | conv_idx< char > (int, char const *, int **) |
template int | conv_idx< int > (int, int const *, int **, int, int const *, int **) |
template int | conv_idx< char > (int, char const *, int **, int, char const *, int **) |
template int | conv_idx< int > (int, int const *, int **, int, int const *, int **, int, int const *, int **) |
template int | conv_idx< char > (int, char const *, int **, int, char const *, int **, int, char const *, int **) |
void | flops_add (int64_t n) |
int64_t | get_flops () |
void | handler () |
void | cvrt_idx (int order, int const *lens, int64_t idx, int *idx_arr) |
void | cvrt_idx (int order, int const *lens, int64_t idx, int **idx_arr) |
void | cvrt_idx (int order, int const *lens, int const *idx_arr, int64_t *idx) |
bool | get_mpi_dt (int64_t count, int64_t datum_size, MPI_Datatype &dt) |
gives a datatype for arbitrary datum_size, errors if exceeding 32-bits More... | |
int64_t | sy_packed_size (int order, const int *len, const int *sym) |
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout More... | |
int64_t | packed_size (int order, const int *len, const int *sym) |
computes the size of a tensor in packed symmetric (SY, SH, or AS) layout More... | |
int | alloc_ptr (int64_t const len_, void **const ptr) |
alloc abstraction More... | |
int | mst_alloc_ptr (int64_t const len, void **const ptr) |
mst_alloc abstraction More... | |
void * | alloc (int64_t const len) |
alloc abstraction More... | |
void * | mst_alloc (int64_t const len) |
mst_alloc allocates buffer on the specialized memory stack More... | |
int | cdealloc (void *ptr) |
free abstraction More... | |
template<typename dtype > | |
const char * | get_fmt () |
return format string for templated type More... | |
template<> | |
const char * | get_fmt< float > () |
template<> | |
const char * | get_fmt< double > () |
template<> | |
const char * | get_fmt< int > () |
template<> | |
const char * | get_fmt< int64_t > () |
template<typename dtype > | |
void | parse_sparse_tensor_data (char **lvals, int order, dtype const *pmulid, int *lens, int64_t nvals, CTF::Pair< dtype > *pairs, bool with_vals) |
parse string containing sparse tensor into data More... | |
template<typename dtype > | |
char * | serialize_sparse_tensor_data (int order, int *lens, int64_t nvals, CTF::Pair< dtype > *pairs, bool with_vals, int64_t &str_len) |
serialize sparse tensor data to create string More... | |
template<typename dtype > | |
int64_t | read_data_mpiio (CTF::World const *dw, char const *fpath, char ***datastr) |
read sparse tensor data from file using MPI-I/O, creating string with one entry per line (different entries on each process) More... | |
template<typename dtype > | |
void | write_data_mpiio (CTF::World const *dw, char const *fpath, char *datastr, int64_t str_len) |
write sparse tensor data to file using MPI-I/O, from string with one entry per line (different entries on each process) More... | |
template<typename dtype > | |
dtype | default_add (dtype a, dtype b) |
template<typename dtype , void(*)(int, dtype const *, dtype *) fxpy> | |
void | default_mxpy (void *X, void *Y, int *n, MPI_Datatype *d) |
template<typename dtype > | |
void | default_fxpy (int n, dtype const *X, dtype *Y) |
template<typename dtype > | |
MPI_Op | get_default_maddop () |
template<> | |
MPI_Op | get_default_maddop< char > () |
template<> | |
MPI_Op | get_default_maddop< bool > () |
template<> | |
MPI_Op | get_default_maddop< int > () |
template<> | |
MPI_Op | get_default_maddop< int64_t > () |
template<> | |
MPI_Op | get_default_maddop< unsigned int > () |
template<> | |
MPI_Op | get_default_maddop< uint64_t > () |
template<> | |
MPI_Op | get_default_maddop< float > () |
template<> | |
MPI_Op | get_default_maddop< double > () |
template<> | |
MPI_Op | get_default_maddop< long double > () |
template<> | |
MPI_Op | get_default_maddop< std::complex< float > > () |
template<> | |
MPI_Op | get_default_maddop< std::complex< double > > () |
template<typename dtype > | |
MPI_Op | get_maddop (void(*fxpy)(int, dtype const *, dtype *)) |
CTF_int::algstrct const * | get_float_ring () |
CTF_int::algstrct const * | get_double_ring () |
CTF_int::algstrct const * | get_int_ring () |
CTF_int::algstrct const * | get_int64_t_ring () |
template<typename dtype > | |
void | gemm_batch (char taA, char taB, int l, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C) |
template<typename dtype > | |
void | gemm (char tA, char tB, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C) |
template<> | |
void | default_axpy< float > (int n, float alpha, float const *X, int incX, float *Y, int incY) |
template<> | |
void | default_axpy< double > (int n, double alpha, double const *X, int incX, double *Y, int incY) |
template<> | |
void | default_axpy< std::complex< float > > (int n, std::complex< float > alpha, std::complex< float > const *X, int incX, std::complex< float > *Y, int incY) |
template<> | |
void | default_axpy< std::complex< double > > (int n, std::complex< double > alpha, std::complex< double > const *X, int incX, std::complex< double > *Y, int incY) |
template<> | |
void | default_scal< float > (int n, float alpha, float *X, int incX) |
template<> | |
void | default_scal< double > (int n, double alpha, double *X, int incX) |
template<> | |
void | default_scal< std::complex< float > > (int n, std::complex< float > alpha, std::complex< float > *X, int incX) |
template<> | |
void | default_scal< std::complex< double > > (int n, std::complex< double > alpha, std::complex< double > *X, int incX) |
template<> | |
void | default_coomm< float > (int m, int n, int k, float alpha, float const *A, int const *rows_A, int const *cols_A, int nnz_A, float const *B, float beta, float *C) |
template<> | |
void | default_coomm< double > (int m, int n, int k, double alpha, double const *A, int const *rows_A, int const *cols_A, int nnz_A, double const *B, double beta, double *C) |
template<> | |
void | default_coomm< std::complex< float > > (int m, int n, int k, std::complex< float > alpha, std::complex< float > const *A, int const *rows_A, int const *cols_A, int nnz_A, std::complex< float > const *B, std::complex< float > beta, std::complex< float > *C) |
template<> | |
void | default_coomm< std::complex< double > > (int m, int n, int k, std::complex< double > alpha, std::complex< double > const *A, int const *rows_A, int const *cols_A, int nnz_A, std::complex< double > const *B, std::complex< double > beta, std::complex< double > *C) |
template<typename dtype > | |
void | muladd_csrmm (int m, int n, int k, dtype alpha, dtype const *A, int const *JA, int const *IA, int nnz_A, dtype const *B, dtype beta, dtype *C) |
template<typename dtype > | |
void | muladd_csrmultd (int m, int n, int k, dtype const *A, int const *JA, int const *IA, int nnz_A, dtype const *B, int const *JB, int const *IB, int nnz_B, dtype *C) |
template<typename dtype > | |
dtype | default_mul (dtype a, dtype b) |
template<typename dtype > | |
void | default_axpy (int n, dtype alpha, dtype const *X, int incX, dtype *Y, int incY) |
template<typename dtype > | |
void | default_scal (int n, dtype alpha, dtype *X, int incX) |
template<typename dtype > | |
void | default_gemm (char tA, char tB, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C) |
template<typename dtype > | |
dtype ** | get_grp_ptrs (int64_t grp_sz, int64_t ngrp, dtype const *data) |
template<> | |
void | default_gemm< float > (char tA, char tB, int m, int n, int k, float alpha, float const *A, float const *B, float beta, float *C) |
template<> | |
void | default_gemm< double > (char tA, char tB, int m, int n, int k, double alpha, double const *A, double const *B, double beta, double *C) |
template<> | |
void | default_gemm< std::complex< float > > (char tA, char tB, int m, int n, int k, std::complex< float > alpha, std::complex< float > const *A, std::complex< float > const *B, std::complex< float > beta, std::complex< float > *C) |
template<> | |
void | default_gemm< std::complex< double > > (char tA, char tB, int m, int n, int k, std::complex< double > alpha, std::complex< double > const *A, std::complex< double > const *B, std::complex< double > beta, std::complex< double > *C) |
template<typename dtype > | |
void | default_gemm_batch (char taA, char taB, int l, int m, int n, int k, dtype alpha, dtype const *A, dtype const *B, dtype beta, dtype *C) |
template<> | |
void | default_gemm_batch< float > (char taA, char taB, int l, int m, int n, int k, float alpha, float const *A, float const *B, float beta, float *C) |
template<> | |
void | default_gemm_batch< double > (char taA, char taB, int l, int m, int n, int k, double alpha, double const *A, double const *B, double beta, double *C) |
template<> | |
void | default_gemm_batch< std::complex< float > > (char taA, char taB, int l, int m, int n, int k, std::complex< float > alpha, std::complex< float > const *A, std::complex< float > const *B, std::complex< float > beta, std::complex< float > *C) |
template<> | |
void | default_gemm_batch< std::complex< double > > (char taA, char taB, int l, int m, int n, int k, std::complex< double > alpha, std::complex< double > const *A, std::complex< double > const *B, std::complex< double > beta, std::complex< double > *C) |
template<typename dtype > | |
void | default_coomm (int m, int n, int k, dtype alpha, dtype const *A, int const *rows_A, int const *cols_A, int nnz_A, dtype const *B, dtype beta, dtype *C) |
bool | try_mkl_coo_to_csr (int64_t nz, int nrow, char *csr_vs, int *csr_ja, int *csr_ia, char const *coo_vs, int const *coo_rs, int const *coo_cs, int el_size) |
bool | try_mkl_csr_to_coo (int64_t nz, int nrow, char const *csr_vs, int const *csr_ja, int const *csr_ia, char *coo_vs, int *coo_rs, int *coo_cs, int el_size) |
template<typename dtype > | |
void | seq_coo_to_csr (int64_t nz, int nrow, dtype *csr_vs, int *csr_ja, int *csr_ia, dtype const *coo_vs, int const *coo_rs, int const *coo_cs) |
template<typename dtype > | |
void | seq_csr_to_coo (int64_t nz, int nrow, dtype const *csr_vs, int const *csr_ja, int const *csr_ia, dtype *coo_vs, int *coo_rs, int *coo_cs) |
template<typename dtype > | |
void | def_coo_to_csr (int64_t nz, int nrow, dtype *csr_vs, int *csr_ja, int *csr_ia, dtype const *coo_vs, int const *coo_rs, int const *coo_cs) |
template<typename dtype > | |
void | def_csr_to_coo (int64_t nz, int nrow, dtype const *csr_vs, int const *csr_ja, int const *csr_ia, dtype *coo_vs, int *coo_rs, int *coo_cs) |
template<typename dtype > | |
dtype | default_addinv (dtype a) |
template<typename dtype , bool is_ord> | |
std::enable_if< is_ord, dtype >::type | default_abs (dtype a) |
template<typename dtype , bool is_ord> | |
std::enable_if<!is_ord, dtype >::type | default_abs (dtype a) |
template<typename dtype , dtype(*)(dtype) abs> | |
void | char_abs (char const *a, char *b) |
template<typename dtype , bool is_ord> | |
std::enable_if< is_ord, dtype >::type | default_min (dtype a, dtype b) |
template<typename dtype , bool is_ord> | |
std::enable_if<!is_ord, dtype >::type | default_min (dtype a, dtype b) |
template<typename dtype , bool is_ord> | |
std::enable_if< is_ord, dtype >::type | default_max_lim () |
template<typename dtype , bool is_ord> | |
std::enable_if<!is_ord, dtype >::type | default_max_lim () |
template<typename dtype , bool is_ord> | |
std::enable_if< is_ord, dtype >::type | default_min_lim () |
template<typename dtype , bool is_ord> | |
std::enable_if<!is_ord, dtype >::type | default_min_lim () |
template<typename dtype , bool is_ord> | |
std::enable_if< is_ord, dtype >::type | default_max (dtype a, dtype b) |
template<typename dtype , bool is_ord> | |
std::enable_if<!is_ord, dtype >::type | default_max (dtype a, dtype b) |
template<typename dtype > | |
MPI_Datatype | get_default_mdtype (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< bool > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< std::complex< double > > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< std::complex< long double > > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< char > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< int > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< int64_t > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< unsigned int > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< uint64_t > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< float > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< double > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< long double > (bool &is_custom) |
template<> | |
MPI_Datatype | get_default_mdtype< std::complex< float > > (bool &is_custom) |
template<typename dtype > | |
constexpr bool | get_default_is_ord () |
Idx_Tensor * | get_full_intm (Idx_Tensor &A, Idx_Tensor &B, std::vector< char > out_inds, bool create_dummy=false) |
std::vector< char > | det_uniq_inds (std::vector< Term * > const operands, std::vector< char > const out_inds) |
std::vector< Term * > | contract_down_terms (algstrct *sr, char *tscale, std::vector< Term * > operands, std::vector< char > out_inds, int terms_to_leave, bool est_time=false, double *cost=NULL) |
void | operator-= (double &d, CTF_int::Term const &tsr) |
void | operator+= (double &d, CTF_int::Term const &tsr) |
void | operator-= (int64_t &d, CTF_int::Term const &tsr) |
void | operator+= (int64_t &d, CTF_int::Term const &tsr) |
CTF_int::Contract_Term | operator* (double const &d, CTF_int::Term const &tsr) |
CTF_int::Contract_Term | operator* (int64_t const &i, CTF_int::Term const &tsr) |
void | calc_dim (int order, int64_t size, int const *edge_len, mapping const *edge_map, int64_t *vrt_sz, int *vrt_edge_len, int *blk_edge_len) |
calculate the block-sizes of a tensor More... | |
int | get_distribution_size (int order) |
int | comp_dim_map (mapping const *map_A, mapping const *map_B) |
compares two mappings More... | |
void | copy_mapping (int order, mapping const *mapping_A, mapping *mapping_B) |
copies mapping A to B More... | |
int | copy_mapping (int order_A, int order_B, int const *idx_A, mapping const *mapping_A, int const *idx_B, mapping *mapping_B, int make_virt=1) |
copies mapping A to B More... | |
int | map_tensor (int num_phys_dims, int tsr_order, int const *tsr_edge_len, int const *tsr_sym_table, int *restricted, CommData *phys_comm, int const *comm_idx, int fill, mapping *tsr_edge_map) |
map a tensor More... | |
int | check_self_mapping (tensor const *tsr, int const *idx_map) |
checks mapping in preparation for tensors scale, summ or contract More... | |
int | map_self_indices (tensor const *tsr, int const *idx_map) |
create virtual mapping for idx_maps that have repeating indices More... | |
int | map_symtsr (int tsr_order, int const *tsr_sym_table, mapping *tsr_edge_map) |
adjust a mapping to maintan symmetry More... | |
int | stretch_virt (int order, int stretch_factor, mapping *maps) |
stretch virtualization by a factor More... | |
topology * | get_phys_topo (CommData glb_comm, TOPOLOGY mach) |
get dimension and torus lengths of specified topology More... | |
std::vector< topology * > | get_all_topos (CommData cdt, int n_uf, int const *uniq_fact, int const *mults, int n_prepend, int const *prelens) |
computes all unique factorizations into non-primes each yielding a topology, prepending additional factors as specified More... | |
std::vector< topology * > | get_generic_topovec (CommData cdt) |
computes all topology configurations given undelying physical topology information More... | |
std::vector< topology * > | peel_perm_torus (topology *phys_topology, CommData cdt) |
folds specified topology and all of its permutations into all configurations of lesser dimensionality More... | |
std::vector< topology * > | peel_torus (topology const *topo, CommData glb_comm) |
folds specified topology into all configurations of lesser dimensionality More... | |
int | find_topology (topology const *topo, std::vector< topology * > &topovec) |
searches for an equivalent topology in avector of topologies More... | |
int | get_best_topo (int64_t nvirt, int topo, CommData global_comm, int64_t bcomm_vol=0, int64_t bmemuse=0) |
get the best topologoes (least nvirt) over all procs More... | |
void | extract_free_comms (topology const *topo, int order_A, mapping const *edge_map_A, int order_B, mapping const *edge_map_B, int &num_sub_phys_dims, CommData **psub_phys_comm, int **pcomm_idx) |
extracts the set of physical dimensions still available for mapping More... | |
int | can_morph (topology const *topo_keep, topology const *topo_change) |
determines if two topologies are compatible with each other More... | |
void | morph_topo (topology const *new_topo, topology const *old_topo, int order, mapping *edge_map) |
morphs a tensor topology into another More... | |
void | pad_cyclic_pup_virt_buff (int const *sym, distribution const &old_dist, distribution const &new_dist, int const *len, int const *old_phys_dim, int const *old_phys_edge_len, int const *old_virt_edge_len, int64_t old_virt_nelem, int const *old_offsets, int *const *old_permutation, int total_np, int const *new_phys_dim, int const *new_phys_edge_len, int const *new_virt_edge_len, int64_t new_virt_nelem, char *old_data, char **new_data, int forward, int *const *bucket_offset, char const *alpha, char const *beta, algstrct const *sr) |
void | cyclic_reshuffle (int const *sym, distribution const &old_dist, int const *old_offsets, int *const *old_permutation, distribution const &new_dist, int const *new_offsets, int *const *new_permutation, char **tsr_data, char **tsr_cyclic_data, algstrct const *sr, CommData ord_glb_comm, bool reuse_buffers, char const *alpha, char const *beta) |
Goes from any set of phases to any new set of phases. More... | |
template<int idim> | |
void | redist_bucket (int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx) |
template<> | |
void | redist_bucket< 0 > (int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx) |
void | redist_bucket_r0 (int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int rep_idx0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx) |
int | get_glb (int i, int s, int t) |
int | get_loc (int g, int s, int t) |
template<int idim> | |
int64_t | calc_cnt (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len) |
computes the cardinality of the set of elements of a tensor of order idim+1 that are owned by processor index gidx_off in a distribution with dimensions sphase More... | |
template<> | |
int64_t | calc_cnt< 0 > (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len) |
template<int idim> | |
int64_t * | calc_sy_pfx (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len) |
computes the cardinality of the sets of elements of a tensor of order idim+1 for different values of the idim'th tensor dimension More... | |
template<> | |
int64_t * | calc_sy_pfx< 1 > (int const *sym, int const *rep_phase, int const *sphase, int const *gidx_off, int const *edge_len, int const *loc_edge_len) |
template<int idim> | |
void | calc_drv_cnts (int order, int const *sym, int64_t *counts, int const *rep_phase, int const *rep_phase_lda, int const *sphase, int const *phys_phase, int *gidx_off, int const *edge_len, int const *loc_edge_len) |
template<> | |
void | calc_drv_cnts< 0 > (int order, int const *sym, int64_t *counts, int const *rep_phase, int const *rep_phase_lda, int const *sphase, int const *phys_phase, int *gidx_off, int const *edge_len, int const *loc_edge_len) |
template<int idim> | |
void | calc_cnt_from_rep_cnt (int const *rep_phase, int *const *pe_offset, int *const *bucket_offset, int64_t const *old_counts, int64_t *counts, int bucket_off, int pe_off, int dir) |
template<> | |
void | calc_cnt_from_rep_cnt< 0 > (int const *rep_phase, int *const *pe_offset, int *const *bucket_offset, int64_t const *old_counts, int64_t *counts, int bucket_off, int pe_off, int dir) |
void | calc_drv_displs (int const *sym, int const *edge_len, distribution const &old_dist, distribution const &new_dist, int64_t *counts, int idx_lyr) |
void | precompute_offsets (distribution const &old_dist, distribution const &new_dist, int const *sym, int const *len, int const *rep_phase, int const *phys_edge_len, int const *virt_edge_len, int const *virt_dim, int const *virt_lda, int64_t virt_nelem, int **pe_offset, int **bucket_offset, int64_t **data_offset, int **ivmax_pre) |
double | dgtog_est_time (int64_t tot_sz, int np) |
estimates execution time, given this processor sends a receives tot_sz across np procs More... | |
void | dgtog_reshuffle (int const *sym, int const *edge_len, distribution const &old_dist, distribution const &new_dist, char **ptr_tsr_data, char **ptr_tsr_new_data, algstrct const *sr, CommData ord_glb_comm) |
void | glb_ord_pup (int const *sym, distribution const &old_dist, distribution const &new_dist, int const *len, int const *old_phys_dim, int const *old_phys_edge_len, int const *old_virt_edge_len, int64_t old_virt_nelem, int const *old_offsets, int *const *old_permutation, int total_np, int const *new_phys_dim, int const *new_phys_edge_len, int const *new_virt_edge_len, int64_t new_virt_nelem, char *old_data, char **new_data, int forward, int *const *bucket_offset, char const *alpha, char const *beta, algstrct const *sr) |
template<int idim> | |
void | ord_glb (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int prev_idx=0, int64_t glb_ord_offset=0, int64_t blk_ord_offset=0) |
template<> | |
void | ord_glb< 0 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset) |
template void | ord_glb< 7 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset) |
template<int idim> | |
void | ord_glb_omp (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int const *idx_st, int const *idx_end, int prev_idx=0, int64_t glb_ord_offset=0, int64_t blk_ord_offset=0) |
template<> | |
void | ord_glb_omp< 0 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int const *idx_st, int const *idx_end, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset) |
template void | ord_glb_omp< 7 > (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr, int const *idx_st, int const *idx_end, int prev_idx, int64_t glb_ord_offset, int64_t blk_ord_offset) |
void | order_globally (int const *sym, distribution const &dist, int const *virt_edge_len, int const *virt_phase_lda, int64_t vbs, bool dir, char const *tsr_data_in, char *tsr_data_out, algstrct const *sr) |
reorder local buffer so that elements are in ordered according to where they are in the global tensor (interleave virtual blocks) More... | |
char * | glb_cyclic_reshuffle (int const *sym, distribution const &old_dist, int const *old_offsets, int *const *old_permutation, distribution const &new_dist, int const *new_offsets, int *const *new_permutation, char **ptr_tsr_data, char **ptr_tsr_cyclic_data, algstrct const *sr, CommData ord_glb_comm, bool reuse_buffers, char const *alpha, char const *beta) |
Goes from any set of phases to any new set of phases. More... | |
bool | hptt_is_applicable (int order, int const *new_order, int elementSize) |
Checks if the HPTT library is applicable. More... | |
void | nosym_transpose_hptt (int order, int const *st_new_order, int const *st_edge_len, int dir, char const *st_buffer, char *new_buffer, algstrct const *sr) |
void | nosym_transpose (tensor *A, int all_fdim_A, int const *all_flen_A, int const *new_order, int dir) |
void | nosym_transpose (int order, int const *new_order, int const *edge_len, char *data, int dir, algstrct const *sr) |
transposes a non-symmetric (folded) tensor More... | |
void | nosym_transpose (int order, int const *new_order, int const *edge_len, char const *data, int dir, int max_ntd, char **tswap_data, int64_t *chunk_size, algstrct const *sr) |
transposes a non-symmetric (folded) tensor internal kernel More... | |
double | est_time_transp (int order, int const *new_order, int const *edge_len, int dir, algstrct const *sr) |
estimates time needed to transposes a non-symmetric (folded) tensor based on performance models More... | |
void | nosym_transpose_hptt (int order, int const *edge_len, int dir, tensor *&A) |
High-performance implementation of nosym_transpose using HPTT. More... | |
void | pad_key (int order, int64_t num_pair, int const *edge_len, int const *padding, PairIterator pairs, algstrct const *sr, int const *offsets=NULL) |
applies padding to keys More... | |
void | depad_tsr (int order, int64_t num_pair, int const *edge_len, int const *sym, int const *padding, int const *prepadding, char const *pairsb, char *new_pairsb, int64_t *new_num_pair, algstrct const *sr) |
retrieves the unpadded pairs More... | |
void | zero_padding (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int const *cphase_rank, char *vdata, algstrct const *sr) |
sets to zero all values in padded region of tensor More... | |
void | scal_diag (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int const *cphase_rank, char *vdata, algstrct const *sr, int const *sym_mask) |
scales each element by 1/(number of entries equivalent to it after permutation of indices for which sym_mask is 1) More... | |
void | padded_reshuffle (int const *sym, distribution const &old_dist, distribution const &new_dist, char *tsr_data, char **tsr_cyclic_data, algstrct const *sr, CommData ord_glb_comm) |
Reshuffle elements using key-value pair read/write. More... | |
int ** | compute_bucket_offsets (distribution const &old_dist, distribution const &new_dist, int const *len, int const *old_phys_edge_len, int const *old_virt_lda, int const *old_offsets, int *const *old_permutation, int const *new_phys_edge_len, int const *new_virt_lda, int forward, int old_virt_np, int new_virt_np, int const *old_virt_edge_len) |
computes offsets for redistribution targets along each edge length More... | |
void | calc_cnt_displs (int const *sym, distribution const &old_dist, distribution const &new_dist, int new_nvirt, int np, int const *old_virt_edge_len, int const *new_virt_lda, int64_t *send_counts, int64_t *recv_counts, int64_t *send_displs, int64_t *recv_displs, CommData ord_glb_comm, int idx_lyr, int *const *bucket_offset) |
assigns keys to an array of values More... | |
double | blres_est_time (int64_t tot_sz, int nv0, int nv1) |
estimates execution time, given this processor sends a receives tot_sz across np procs More... | |
void | block_reshuffle (distribution const &old_dist, distribution const &new_dist, char *tsr_data, char *&tsr_cyclic_data, algstrct const *sr, CommData glb_comm) |
Reshuffle elements by block given the global phases stay the same. More... | |
int | can_block_reshuffle (int order, int const *old_phase, mapping const *map) |
determines if tensor can be permuted by block More... | |
void | permute_keys (int order, int num_pair, int const *edge_len, int const *new_edge_len, int *const *permutation, char *pairs, int64_t *new_num_pair, algstrct const *sr) |
permutes keys More... | |
void | depermute_keys (int order, int num_pair, int const *edge_len, int const *new_edge_len, int *const *permutation, char *pairs, algstrct const *sr) |
depermutes keys (apply P^T) More... | |
void | assign_keys (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *phase, int const *phys_phase, int const *virt_dim, int *phase_rank, char const *vdata, char *vpairs, algstrct const *sr) |
assigns keys to an array of values More... | |
void | spsfy_tsr (int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *phase, int const *phys_phase, int const *virt_dim, int *phase_rank, char const *vdata, char *&vpairs, int64_t *nnz_blk, algstrct const *sr, int64_t const *edge_lda, std::function< bool(char const *)> f) |
extracts all tensor values (in pair format) that pass a sparsifier function (including padded zeros if they pass the fliter) More... | |
void | bucket_by_pe (int order, int64_t num_pair, int64_t np, int const *phys_phase, int const *virt_phase, int const *bucket_lda, int const *edge_len, ConstPairIterator mapped_data, int64_t *bucket_counts, int64_t *bucket_off, PairIterator bucket_data, algstrct const *sr) |
buckets key-value pairs by processor according to distribution More... | |
int64_t * | bucket_by_virt (int order, int num_virt, int64_t num_pair, int const *phys_phase, int const *virt_phase, int const *edge_len, ConstPairIterator mapped_data, PairIterator bucket_data, algstrct const *sr) |
buckets key value pairs by block/virtual-processor More... | |
void | readwrite (int order, int64_t size, char const *alpha, char const *beta, int nvirt, int const *edge_len, int const *sym, int const *phase, int const *phys_phase, int const *virt_dim, int *phase_rank, char *vdata, char *pairs, char rw, algstrct const *sr) |
read or write pairs from / to tensor More... | |
void | wr_pairs_layout (int order, int np, int64_t inwrite, char const *alpha, char const *beta, char rw, int num_virt, int const *sym, int const *edge_len, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int *virt_phys_rank, int const *bucket_lda, char *wr_pairs_buf, char *rw_data, CommData glb_comm, algstrct const *sr, bool is_sparse, int64_t nnz_loc, int64_t *nnz_blk, char *&pprs_new, int64_t &nnz_loc_new) |
read or write pairs from / to tensor More... | |
void | read_loc_pairs (int order, int64_t nval, int num_virt, int const *sym, int const *edge_len, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int *phase_rank, int64_t *nread, char const *data, char **pairs, algstrct const *sr) |
read tensor pairs local to processor More... | |
void | sp_read (algstrct const *sr, int64_t ntsr, ConstPairIterator prs_tsr, char const *alpha, int64_t nread, PairIterator prs_read, char const *beta) |
reads elements of a sparse set defining the tensor, into a sparse read set with potentially repeating keys More... | |
void | sp_write (int num_virt, algstrct const *sr, int64_t *vntsr, ConstPairIterator vprs_tsr, char const *beta, int64_t *vnwrite, ConstPairIterator vprs_write, char const *alpha, int64_t *vnnew, char *&pprs_new) |
writes pairs in a sparse write set to the sparse set of elements defining the tensor, resulting in a set of size between ntsr and ntsr+nwrite More... | |
void | inv_idx (int order_A, int const *idx_A, int *order_tot, int **idx_arr) |
invert index map More... | |
int | strip_diag (int order, int order_tot, int const *idx_map, int64_t vrt_sz, mapping const *edge_map, topology const *topo, algstrct const *sr, int *blk_edge_len, int64_t *blk_sz, strp_tsr **stpr) |
build stack required for stripping out diagonals of tensor More... | |
int | sym_seq_scl_ref (char const *alpha, char *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A) |
performs symmetric scaling using algstrct const * sr_A More... | |
int | sym_seq_scl_cust (char const *alpha, char *A, algstrct const *sr_A, int const order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, endomorphism const *func) |
performs symmetric scaling using custom func More... | |
void | inc_tot_mem_used (int64_t a) |
void | set_mem_size (int64_t size) |
sets what fraction of the memory capacity CTF can use More... | |
void | set_memcap (double cap) |
sets what fraction of the memory capacity CTF can use More... | |
std::list< mem_transfer > | contract_mst () |
gets rid of empty space on the stack More... | |
std::list< mem_loc > * | get_mst () |
void | mst_create (int64_t size) |
initializes stack buffer More... | |
void | mem_create () |
create instance of memory manager More... | |
void | mem_exit (int rank) |
exit instance of memory manager More... | |
int | mst_free (void *ptr) |
frees buffer allocated on stack More... | |
int | untag_mem (void *ptr) |
stops tracking memory allocated by CTF, so user doesn't have to call free More... | |
int | cdealloc (void *ptr, int const tid) |
free abstraction More... | |
int | cdealloc_cond (void *ptr) |
free abstraction (conditional (no error if not found)) More... | |
int | get_num_instances () |
int64_t | proc_bytes_used () |
gives total memory used on this MPI process More... | |
int64_t | proc_bytes_total () |
gives total memory size per MPI process More... | |
int64_t | proc_bytes_available () |
gives total memory available on this MPI process More... | |
std::vector< Model * > & | get_all_models () |
void | print_all_models () |
void | load_all_models (std::string file_name) |
void | write_all_models (std::string file_name) |
void | dump_all_models (std::string path) |
double | cddot (int n, const double *dX, int incX, const double *dY, int incY) |
void | cdgeqrf (int const M, int const N, double *A, int const LDA, double *TAU2, double *WORK, int const LWORK, int *INFO) |
void | cdormqr (char SIDE, char TRANS, int M, int N, int K, double const *A, int LDA, double const *TAU2, double *C, int LDC, double *WORK, int LWORK, int *INFO) |
void | cdgelsd (int m, int n, int k, double const *A, int lda_A, double *B, int lda_B, double *S, double cond, int *rank, double *work, int lwork, int *iwork, int *info) |
template<int nparam> | |
bool | comp_time_param (const time_param< nparam > &a, const time_param< nparam > &b) |
void | offload_init () |
initialize offloading, e.g. create cublas More... | |
void | offload_exit () |
exit offloading, e.g. destroy cublas More... | |
double | estimate_download_time (int64_t size) |
estimate time it takes to upload More... | |
double | estimate_upload_time (int64_t size) |
estimate time it takes to download More... | |
void | host_pinned_alloc (void **ptr, int64_t size) |
allocate a pinned host buffer More... | |
void | host_pinned_free (void *ptr) |
free a pinned host buffer More... | |
template<typename dtype > | |
void | offload_gemm (char tA, char tB, int m, int n, int k, dtype alpha, offload_tsr &A, int lda_A, offload_tsr &B, int lda_B, dtype beta, offload_tsr &C, int lda_C) |
template<typename dtype > | |
void | offload_gemm (char tA, char tB, int m, int n, int k, dtype alpha, dtype const *dev_A, int lda_A, dtype const *dev_B, int lda_B, dtype beta, dtype *dev_C, int lda_C) |
void | calc_idx_arr (int order, int const *lens, int const *sym, int64_t idx, int *idx_arr) |
void | sy_calc_idx_arr (int order, int const *lens, int const *sym, int64_t idx, int *idx_arr) |
same as above except assumes sym only NS or SY More... | |
void | permute (int order, int const *perm, int *arr) |
permute an array More... | |
void | permute_target (int order, int const *perm, int *arr) |
permutes a permutation array More... | |
void | socopy (int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t *&sizes_b, int64_t *&offsets_b) |
void | spcopy (int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t const *offsets_a, char const *a, int64_t const *sizes_b, int64_t const *offsets_b, char *b) |
int64_t | fact (int64_t n) |
int64_t | choose (int64_t n, int64_t k) |
void | get_choice (int64_t n, int64_t k, int64_t ch, int *chs) |
int64_t | chchoose (int64_t n, int64_t k) |
int64_t | getTotalSystemMemory () |
int | free_cond (void *ptr) |
int | gcd (int a, int b) |
int | lcm (int a, int b) |
void | lda_cpy (int el_size, int nrow, int ncol, int lda_A, int lda_B, const char *A, char *B) |
Copies submatrix to submatrix (column-major) More... | |
void | coalesce_bwd (int el_size, char *B, char const *B_aux, int k, int n, int kb) |
we receive a contiguous buffer kb-by-n B and (k-kb)-by-n B_aux which is the block below. To get a k-by-n buffer, we need to combine this buffer with our original block. Since we are working with column-major ordering we need to interleave the blocks. Thats what this function does. More... | |
int64_t | get_coo_size (int64_t nnz, int val_size) |
int64_t | get_csr_size (int64_t nnz, int nrow, int val_size) |
computes the size of a serialized CSR matrix More... | |
template<int idim> | |
void | spA_dnB_seq_sum_loop (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func) |
template<> | |
void | spA_dnB_seq_sum_loop< 0 > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func) |
template void | spA_dnB_seq_sum_loop< MAX_ORD > (char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func) |
void | spA_dnB_seq_sum (char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, univar_function const *func) |
performs summation between two sparse tensors assumes A contains key value pairs sorted by key, with index permutation preapplied and with no repeated indices More... | |
void | dnA_spB_seq_sum (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, char const *beta, char const *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func) |
performs summation between two sparse tensors assumes B contain key value pairs sorted by key, with index permutation preapplied and with no repeated indices More... | |
void | spspsum (algstrct const *sr_A, int64_t nA, ConstPairIterator prs_A, char const *beta, algstrct const *sr_B, int64_t nB, ConstPairIterator prs_B, char const *alpha, int64_t &nnew, char *&pprs_new, univar_function const *func, int64_t map_pfx) |
As pairs in a sparse A set to the sparse set of elements defining the tensor, resulting in a set of size between nB and nB+nA. More... | |
void | spA_spB_seq_sum (char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func, int64_t map_pfx) |
performs summation between two sparse tensors assumes A and B contain key value pairs sorted by key, with index permutation preapplied and with no repeated indices More... | |
void | inv_idx (int order_A, int const *idx_A, int order_B, int const *idx_B, int *order_tot, int **idx_arr) |
invert index map More... | |
template<int idim> | |
void | sym_seq_sum_loop (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
template<> | |
void | sym_seq_sum_loop< 0 > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
template void | sym_seq_sum_loop< MAX_ORD > (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, uint64_t *const *offsets_A, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, uint64_t *const *offsets_B, univar_function const *func, int const *idx, int const *rev_idx_map, int idx_max) |
void | compute_syoffs (algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, int tot_order, int const *rev_idx_map, uint64_t **&offsets_A, uint64_t **&offsets_B) |
int | sym_seq_sum_ref (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B) |
performs symmetric contraction with unblocked reference kernel More... | |
int | sym_seq_sum_inr (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, int inr_stride) |
performs symmetric summation with blocked daxpy More... | |
int | sym_seq_sum_cust (char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, int const *idx_map_B, univar_function const *func) |
performs symmetric summation with custom elementwise function More... | |
void | desymmetrize (tensor *sym_tsr, tensor *nonsym_tsr, bool is_C) |
unfolds the data of a tensor More... | |
void | symmetrize (tensor *sym_tsr, tensor *nonsym_tsr) |
folds the data of a tensor More... | |
void | cmp_sym_perms (int ndim, int const *sym, int *nperm, int **perm, double *sign) |
finds all permutations of a tensor according to a symmetry More... | |
void | order_perm (tensor const *A, tensor const *B, int *idx_arr, int off_A, int off_B, int *idx_A, int *idx_B, int &add_sign, int &mod) |
orders the summation indices of one tensor that don't break summation symmetries More... | |
void | order_perm (tensor const *A, tensor const *B, tensor const *C, int *idx_arr, int off_A, int off_B, int off_C, int *idx_A, int *idx_B, int *idx_C, int &add_sign, int &mod) |
orders the contraction indices of one tensor that don't break contraction symmetries More... | |
void | add_sym_perm (std::vector< summation > &perms, std::vector< int > &signs, summation const &new_perm, int new_sign) |
puts a summation map into a nice ordering according to preserved symmetries, and adds it if it is distinct More... | |
void | add_sym_perm (std::vector< contraction > &perms, std::vector< int > &signs, contraction const &new_perm, int new_sign) |
puts a contraction map into a nice ordering according to preserved symmetries, and adds it if it is distinct More... | |
void | get_sym_perms (summation const &sum, std::vector< summation > &perms, std::vector< int > &signs) |
finds all permutations of a summation that must be done for a broken symmetry More... | |
void | get_sym_perms (contraction const &ctr, std::vector< contraction > &perms, std::vector< int > &signs) |
finds all permutations of a contraction that must be done for a broken symmetry More... | |
void | depin (algstrct const *sr, int order, int const *lens, int const *divisor, int nvirt, int const *virt_dim, int const *phys_rank, char *X, int64_t &new_nnz_B, int64_t *nnz_blk, char *&new_B, bool check_padding) |
depins keys of n pairs More... | |
double | spredist_est_time (int64_t size, int np) |
template<typename dtype > | |
void | abs_helper (tensor *A, tensor *B) |
template<typename dtype > | |
void | pow_helper (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template<typename dtype > | |
void | all_helper (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template<typename dtype > | |
void | conj_helper (tensor *A, tensor *B) |
template<typename dtype > | |
void | get_real (tensor *A, tensor *B) |
template<typename dtype > | |
void | get_imag (tensor *A, tensor *B) |
template<typename dtype > | |
void | set_real (tensor *A, tensor *B) |
template<typename dtype > | |
void | set_imag (tensor *A, tensor *B) |
template<typename dtype > | |
void | any_helper (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
int64_t | sum_bool_tsr (tensor *A) |
sum all 1 values in boolean tensor More... | |
void | subsample (tensor *A, double probability) |
extract a sample of the entries (if sparse of the current nonzeros) More... | |
void | matrix_qr (tensor *A, tensor *Q, tensor *R) |
void | matrix_qr_cmplx (tensor *A, tensor *Q, tensor *R) |
void | matrix_svd (tensor *A, tensor *U, tensor *S, tensor *VT, int rank) |
void | matrix_svd_cmplx (tensor *A, tensor *U, tensor *S, tensor *VT, int rank) |
void | conv_type (int type_idx1, int type_idx2, tensor *A, tensor *B) |
convert tensor from one type to another More... | |
template void | conj_helper< float > (tensor *A, tensor *B) |
template void | conj_helper< double > (tensor *A, tensor *B) |
template void | set_real< float > (tensor *A, tensor *B) |
template void | set_imag< float > (tensor *A, tensor *B) |
template void | set_real< double > (tensor *A, tensor *B) |
template void | set_imag< double > (tensor *A, tensor *B) |
template void | get_real< float > (tensor *A, tensor *B) |
template void | get_imag< float > (tensor *A, tensor *B) |
template void | get_real< double > (tensor *A, tensor *B) |
template void | get_imag< double > (tensor *A, tensor *B) |
template void | tensor::compare_elementwise< std::complex< double > > (tensor *A, tensor *B) |
template void | tensor::compare_elementwise< std::complex< float > > (tensor *A, tensor *B) |
template void | abs_helper< std::complex< double > > (tensor *A, tensor *B) |
template void | abs_helper< std::complex< float > > (tensor *A, tensor *B) |
template void | abs_helper< double > (tensor *A, tensor *B) |
template void | abs_helper< float > (tensor *A, tensor *B) |
template void | abs_helper< int64_t > (tensor *A, tensor *B) |
template void | abs_helper< bool > (tensor *A, tensor *B) |
template void | abs_helper< int32_t > (tensor *A, tensor *B) |
template void | abs_helper< int16_t > (tensor *A, tensor *B) |
template void | abs_helper< int8_t > (tensor *A, tensor *B) |
template void | pow_helper< std::complex< double > > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< std::complex< float > > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< double > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< float > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< int64_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< bool > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< int32_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< int16_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | pow_helper< int8_t > (tensor *A, tensor *B, tensor *C, char const *idx_A, char const *idx_B, char const *idx_C) |
template void | all_helper< std::complex< double > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< std::complex< float > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< int64_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< double > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< float > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< bool > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< int32_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< int16_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | all_helper< int8_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< std::complex< double > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< std::complex< float > > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< double > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< float > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< int64_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< bool > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< int32_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< int16_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
template void | any_helper< int8_t > (tensor *A, tensor *B_bool, char const *idx_A, char const *idx_B) |
Variables | |
LinModel< 3 > | seq_tsr_ctr_mdl_cst (seq_tsr_ctr_mdl_cst_init,"seq_tsr_ctr_mdl_cst") |
LinModel< 3 > | seq_tsr_ctr_mdl_ref (seq_tsr_ctr_mdl_ref_init,"seq_tsr_ctr_mdl_ref") |
LinModel< 3 > | seq_tsr_ctr_mdl_inr (seq_tsr_ctr_mdl_inr_init,"seq_tsr_ctr_mdl_inr") |
LinModel< 3 > | seq_tsr_ctr_mdl_off (seq_tsr_ctr_mdl_off_init,"seq_tsr_ctr_mdl_off") |
LinModel< 3 > | seq_tsr_ctr_mdl_cst_inr (seq_tsr_ctr_mdl_cst_inr_init,"seq_tsr_ctr_mdl_cst_inr") |
LinModel< 3 > | seq_tsr_ctr_mdl_cst_off (seq_tsr_ctr_mdl_cst_off_init,"seq_tsr_ctr_mdl_cst_off") |
LinModel< 3 > | seq_tsr_spctr_cst_off_k0 (seq_tsr_spctr_cst_off_k0_init,"seq_tsr_spctr_cst_off_k0") |
LinModel< 3 > | seq_tsr_spctr_cst_off_k1 (seq_tsr_spctr_cst_off_k1_init,"seq_tsr_spctr_cst_off_k1") |
LinModel< 3 > | seq_tsr_spctr_cst_off_k2 (seq_tsr_spctr_cst_off_k2_init,"seq_tsr_spctr_cst_off_k2") |
LinModel< 3 > | seq_tsr_spctr_off_k0 (seq_tsr_spctr_off_k0_init,"seq_tsr_spctr_off_k0") |
LinModel< 3 > | seq_tsr_spctr_off_k1 (seq_tsr_spctr_off_k1_init,"seq_tsr_spctr_off_k1") |
LinModel< 3 > | seq_tsr_spctr_off_k2 (seq_tsr_spctr_off_k2_init,"seq_tsr_spctr_off_k2") |
LinModel< 3 > | seq_tsr_spctr_cst_k0 (seq_tsr_spctr_cst_k0_init,"seq_tsr_spctr_cst_k0") |
LinModel< 3 > | seq_tsr_spctr_cst_k1 (seq_tsr_spctr_cst_k1_init,"seq_tsr_spctr_cst_k1") |
LinModel< 3 > | seq_tsr_spctr_cst_k2 (seq_tsr_spctr_cst_k2_init,"seq_tsr_spctr_cst_k2") |
LinModel< 3 > | seq_tsr_spctr_cst_k3 (seq_tsr_spctr_cst_k3_init,"seq_tsr_spctr_cst_k3") |
LinModel< 3 > | seq_tsr_spctr_cst_k4 (seq_tsr_spctr_cst_k4_init,"seq_tsr_spctr_cst_k4") |
LinModel< 3 > | seq_tsr_spctr_k0 (seq_tsr_spctr_k0_init,"seq_tsr_spctr_k0") |
LinModel< 3 > | seq_tsr_spctr_k1 (seq_tsr_spctr_k1_init,"seq_tsr_spctr_k1") |
LinModel< 3 > | seq_tsr_spctr_k2 (seq_tsr_spctr_k2_init,"seq_tsr_spctr_k2") |
LinModel< 3 > | seq_tsr_spctr_k3 (seq_tsr_spctr_k3_init,"seq_tsr_spctr_k3") |
LinModel< 3 > | seq_tsr_spctr_k4 (seq_tsr_spctr_k4_init,"seq_tsr_spctr_k4") |
LinModel< 2 > | pin_keys_mdl (pin_keys_mdl_init,"pin_keys_mdl") |
std::mersenne_twister_engine< std::uint_fast64_t, 64, 312, 156, 31, 0xb5026f5aa96619e9, 29, 0x5555555555555555, 17, 0x71d67fffeda60000, 37, 0xfff7eee000000000, 43, 6364136223846793005 > | rng |
LinModel< 3 > | alltoall_mdl (alltoall_mdl_init,"alltoall_mdl") |
LinModel< 3 > | alltoallv_mdl (alltoallv_mdl_init,"alltoallv_mdl") |
LinModel< 3 > | red_mdl (red_mdl_init,"red_mdl") |
LinModel< 3 > | red_mdl_cst (red_mdl_cst_init,"red_mdl_cst") |
LinModel< 3 > | allred_mdl (allred_mdl_init,"allred_mdl") |
LinModel< 3 > | allred_mdl_cst (allred_mdl_cst_init,"allred_mdl_cst") |
LinModel< 3 > | bcast_mdl (bcast_mdl_init,"bcast_mdl") |
int64_t | total_flop_count = 0 |
MPI_Datatype | MPI_CTF_DOUBLE_COMPLEX = MPI_CXX_DOUBLE_COMPLEX |
CTF::Ring< float > | float_ring = CTF::Ring<float>() |
CTF::Ring< double > | double_ring = CTF::Ring<double>() |
CTF::Ring< int > | int_ring = CTF::Ring<int>() |
CTF::Ring< int64_t > | int64_t_ring = CTF::Ring<int64_t>() |
MPI_Datatype | MPI_CTF_BOOL = MPI_CXX_BOOL |
MPI_Datatype | MPI_CTF_LONG_DOUBLE_COMPLEX = MPI_CXX_LONG_DOUBLE_COMPLEX |
std::set< grid_wrapper > | scalapack_grids |
index for ScaLAPACK processor grids More... | |
LinModel< 3 > | dgtog_res_mdl (dgtog_res_mdl_init,"dgtog_res_mdl") |
LinModel< 2 > | long_contig_transp_mdl (long_contig_transp_mdl_init,"long_contig_transp_mdl") |
LinModel< 2 > | shrt_contig_transp_mdl (shrt_contig_transp_mdl_init,"shrt_contig_transp_mdl") |
LinModel< 2 > | non_contig_transp_mdl (non_contig_transp_mdl_init,"non_contig_transp_mdl") |
LinModel< 2 > | blres_mdl (blres_mdl_init,"blres_mdl") |
double | seq_tsr_spctr_cst_off_k0_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_cst_off_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_cst_off_k2_init [] = {-2.1996E-04, 3.1883E-09, 3.8743E-11} |
double | seq_tsr_spctr_off_k0_init [] = {8.6970E-06, 4.5598E-11, 1.1544E-09} |
double | seq_tsr_spctr_off_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_off_k2_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_cst_k0_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_cst_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_cst_k2_init [] = {-8.8459E-08, 8.1207E-10, -2.8486E-12} |
double | seq_tsr_spctr_cst_k3_init [] = {1.8504E-08, 2.9154E-11, 2.1973E-11} |
double | seq_tsr_spctr_cst_k4_init [] = {2.0948E-05, 1.2294E-09, 8.0037E-10} |
double | seq_tsr_spctr_k0_init [] = {2.2620E-08, -5.7494E-10, 2.2146E-09} |
double | seq_tsr_spctr_k1_init [] = {5.3745E-06, 3.6464E-08, 2.2334E-10} |
double | seq_tsr_spctr_k2_init [] = {3.0917E-08, 5.2181E-11, 4.1634E-12} |
double | seq_tsr_spctr_k3_init [] = {7.2456E-08, 1.5128E-10, -1.5528E-12} |
double | seq_tsr_spctr_k4_init [] = {1.6880E-07, 4.9411E-10, 9.2847E-13} |
double | pin_keys_mdl_init [] = {3.1189E-09, 6.6717E-08} |
double | seq_tsr_ctr_mdl_cst_init [] = {5.1626E-06, -6.3215E-11, 3.9638E-09} |
double | seq_tsr_ctr_mdl_ref_init [] = {4.9138E-08, 5.8290E-10, 4.8575E-11} |
double | seq_tsr_ctr_mdl_inr_init [] = {2.0647E-08, 1.9721E-10, 2.9948E-11} |
double | seq_tsr_ctr_mdl_off_init [] = {6.2925E-05, 1.7449E-11, 1.7211E-12} |
double | seq_tsr_ctr_mdl_cst_inr_init [] = {1.3863E-04, 2.0119E-10, 9.8820E-09} |
double | seq_tsr_ctr_mdl_cst_off_init [] = {8.4844E-04, -5.9246E-11, 3.5247E-10} |
double | long_contig_transp_mdl_init [] = {2.9158E-10, 3.0501E-09} |
double | shrt_contig_transp_mdl_init [] = {1.3427E-08, 4.3168E-09} |
double | non_contig_transp_mdl_init [] = {4.0475E-08, 4.0463E-09} |
double | dgtog_res_mdl_init [] = {2.9786E-05, 2.4335E-04, 1.0845E-08} |
double | blres_mdl_init [] = {1.0598E-05, 7.2741E-08} |
double | alltoall_mdl_init [] = {1.0000E-06, 1.0000E-06, 5.0000E-10} |
double | alltoallv_mdl_init [] = {2.7437E-06, 2.2416E-05, 1.0469E-08} |
double | red_mdl_init [] = {6.2935E-07, 4.6276E-06, 9.2245E-10} |
double | red_mdl_cst_init [] = {5.7302E-07, 4.7347E-06, 6.0191E-10} |
double | allred_mdl_init [] = {8.4416E-07, 6.8651E-06, 3.5845E-08} |
double | allred_mdl_cst_init [] = {-3.3754E-04, 2.1343E-04, 3.0801E-09} |
double | bcast_mdl_init [] = {1.5045E-06, 1.4485E-05, 3.2876E-09} |
double | spredist_mdl_init [] = {1.2744E-04, 1.0278E-03, 7.6837E-08} |
double | csrred_mdl_init [] = {3.7005E-05, 1.1854E-04, 5.5165E-09} |
double | csrred_mdl_cst_init [] = {-1.8323E-04, 1.3076E-04, 2.8732E-09} |
double | upload_mdl_init [] |
double | download_mdl_init [] |
double | memcap = 0.5 |
int64_t | mem_size = 0 |
int | max_threads |
int | instance_counter = 0 |
int64_t | mem_used [MAX_THREADS] |
int64_t | tot_mem_used |
int64_t | tot_mem_available = -1 |
std::list< mem_loc > | mem_stacks [MAX_THREADS] |
void * | mst_buffer = 0 |
int64_t | mst_buffer_size = 0 |
int64_t | mst_buffer_used = 0 |
int64_t | mst_buffer_ptr = 0 |
std::list< mem_loc > | mst |
char * | cpy_buffer [CPY_BUFFER_SIZE] |
LinModel< 3 > | csrred_mdl (csrred_mdl_init,"csrred_mdl") |
LinModel< 3 > | csrred_mdl_cst (csrred_mdl_cst_init,"csrred_mdl_cst") |
LinModel< 3 > | spredist_mdl (spredist_mdl_init,"spredist_mdl") |
typedef bool CTF_int::TYPE1 |
Definition at line 6 of file ctf_ext.cxx.
typedef int CTF_int::TYPE2 |
Definition at line 7 of file ctf_ext.cxx.
typedef int64_t CTF_int::TYPE3 |
Definition at line 8 of file ctf_ext.cxx.
typedef float CTF_int::TYPE4 |
Definition at line 9 of file ctf_ext.cxx.
typedef double CTF_int::TYPE5 |
Definition at line 10 of file ctf_ext.cxx.
typedef std::complex<float> CTF_int::TYPE6 |
Definition at line 11 of file ctf_ext.cxx.
typedef std::complex<double> CTF_int::TYPE7 |
Definition at line 12 of file ctf_ext.cxx.
typedef int16_t CTF_int::TYPE8 |
Definition at line 13 of file ctf_ext.cxx.
typedef int8_t CTF_int::TYPE9 |
Definition at line 14 of file ctf_ext.cxx.
enum CTF_int::map_type |
enum CTF_int::TOPOLOGY |
Enumerator | |
---|---|
TOPOLOGY_GENERIC | |
TOPOLOGY_BGP | |
TOPOLOGY_BGQ | |
TOPOLOGY_8D | |
NO_TOPOLOGY |
Definition at line 10 of file topology.h.
absolute value function
[in] | A | tensor, param[in,out] B tensor (becomes absolute value of A) |
Definition at line 17 of file ctf_ext.cxx.
References ctf.core::a, ctf.core::abs(), ctf.core::dtype, and CTF_int::tensor::order.
template void CTF_int::abs_helper< bool > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< double > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< float > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< int16_t > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< int32_t > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< int64_t > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< int8_t > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::abs_helper< std::complex< double > > | ( | tensor * | A, |
tensor * | B | ||
) |
template void CTF_int::abs_helper< std::complex< float > > | ( | tensor * | A, |
tensor * | B | ||
) |
void CTF_int::add_sym_perm | ( | std::vector< summation > & | perms, |
std::vector< int > & | signs, | ||
summation const & | new_perm, | ||
int | new_sign | ||
) |
puts a summation map into a nice ordering according to preserved symmetries, and adds it if it is distinct
[in,out] | perms | the permuted summation specifications |
[in,out] | signs | sign of each summation |
[in] | new_perm | summation signature |
[in] | new_sign | alpha |
Definition at line 549 of file symmetrization.cxx.
References CTF_int::summation::A, align_symmetric_indices(), CTF_int::summation::B, cdealloc(), CTF_int::summation::idx_A, CTF_int::summation::idx_B, inv_idx(), CTF_int::tensor::order, order_perm(), and CTF_int::tensor::sym.
void CTF_int::add_sym_perm | ( | std::vector< contraction > & | perms, |
std::vector< int > & | signs, | ||
contraction const & | new_perm, | ||
int | new_sign | ||
) |
puts a contraction map into a nice ordering according to preserved symmetries, and adds it if it is distinct
[in,out] | perms | the permuted contraction specifications |
[in,out] | signs | sign of each contraction |
[in] | new_perm | contraction signature |
[in] | new_sign | alpha |
Definition at line 593 of file symmetrization.cxx.
References CTF_int::contraction::A, align_symmetric_indices(), CTF_int::contraction::B, CTF_int::contraction::C, cdealloc(), CTF_int::contraction::idx_A, CTF_int::contraction::idx_B, CTF_int::contraction::idx_C, inv_idx(), CTF_int::tensor::order, order_perm(), and CTF_int::tensor::sym.
Referenced by get_sym_perms().
void CTF_int::all_helper | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
all function
[in] | A | tensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B |
Definition at line 33 of file ctf_ext.cxx.
References ctf.core::a, and ctf.core::dtype.
template void CTF_int::all_helper< bool > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< double > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< float > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< int16_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< int32_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< int64_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< int8_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::all_helper< std::complex< double > > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
template void CTF_int::all_helper< std::complex< float > > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
void * CTF_int::alloc | ( | int64_t const | len | ) |
alloc abstraction
[in] | len | number of bytes |
Definition at line 365 of file memcontrol.cxx.
References alloc_ptr(), ASSERT, and SUCCESS.
Referenced by CTF_int::algstrct::alloc(), calc_drv_displs(), calc_sy_pfx(), calc_sy_pfx< 1 >(), compute_syoffs(), CTF_int::contraction::contraction(), conv_idx(), CTF_int::COO_Matrix::COO_Matrix(), CTF_int::CSR_Matrix::csr_add(), CTF_int::CSR_Matrix::CSR_Matrix(), CTF_int::algstrct::csr_reduce(), CTF::Bivar_Function< dtype_A, dtype_B, dtype_C >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr_old(), CTF_int::ctr_virt::ctr_virt(), cvrt_idx(), depad_tsr(), depermute_keys(), depin(), desymmetrize(), dgtog_reshuffle(), CTF_int::summation::estimate_time(), factorize(), CTF::Semiring< dtype, is_ord >::gen_csrmultcsr(), CTF_int::COO_Matrix::get_data(), get_full_intm(), get_grp_ptrs(), get_len_ordering(), CTF::Tensor< dtype >::get_local_data(), get_phys_topo(), CTF_int::algstrct::has_mul(), CTF::Idx_Tensor::Idx_Tensor(), CTF_int::tensor::init(), inv_idx(), CTF_int::LinModel< nparam >::LinModel(), CTF_int::Term::operator-(), CTF::Partition::operator=(), order_globally(), CTF_int::tensor::orient_subworld(), CTF_int::algstrct::pair_alloc(), CTF::Partition::Partition(), peel_torus(), CTF::Function_timer::print(), CTF::print_timers(), read_data_mpiio(), CTF_int::tensor::read_dense_from_file(), CTF::Tensor< dtype >::read_local(), CTF_int::tensor::read_local(), CTF_int::tensor::read_local_nnz(), CTF_int::tensor::redistribute(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::tspsum_virt::run(), CTF_int::tspsum_replicate::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_permute::run(), CTF_int::algstrct::safecopy(), CTF::Semiring< dtype, is_ord >::safemul(), CTF_int::scaling::scaling(), CTF_int::scl_virt::scl_virt(), CTF_int::tensor::self_reduce(), CTF_int::seq_tsr_ctr::seq_tsr_ctr(), CTF_int::seq_tsr_scl::seq_tsr_scl(), CTF_int::seq_tsr_spctr::seq_tsr_spctr(), CTF_int::seq_tsr_spsum::seq_tsr_spsum(), CTF_int::seq_tsr_sum::seq_tsr_sum(), serialize_sparse_tensor_data(), CTF_int::COO_Matrix::set_data(), CTF_int::tensor::set_name(), CTF_int::tensor::set_zero(), CTF_int::tensor::slice(), CTF::Tensor< dtype >::slice(), socopy(), spA_dnB_dnC_seq_ctr(), CTF_int::tensor::sparsify(), CTF_int::spctr_pin_keys::spctr_pin_keys(), CTF_int::spctr_virt::spctr_virt(), CTF_int::tensor::spmatricize(), spsfy_tsr(), CTF_int::summation::summation(), CTF::Matrix< dtype >::svd(), sym_seq_ctr_cust(), sym_seq_ctr_inr(), sym_seq_ctr_ref(), sym_seq_scl_cust(), sym_seq_scl_ref(), sym_seq_sum_cust(), sym_seq_sum_inr(), sym_seq_sum_ref(), symmetrize(), CTF_int::tensor::tensor(), CTF_int::topology::topology(), CTF_int::tspsum::tspsum(), CTF_int::tspsum_map::tspsum_map(), CTF_int::tspsum_permute::tspsum_permute(), CTF_int::tspsum_pin_keys::tspsum_pin_keys(), CTF_int::tspsum_virt::tspsum_virt(), CTF_int::tsum_virt::tsum_virt(), CTF_int::LinModel< nparam >::update(), and CTF_int::tensor::write_dense_to_file().
int CTF_int::alloc_ptr | ( | int64_t const | len_, |
void **const | ptr | ||
) |
alloc abstraction
[in] | len_ | number of bytes |
[in,out] | ptr | pointer to set to new allocation address |
Definition at line 320 of file memcontrol.cxx.
References ALIGN_BYTES, ASSERT, CTF_int::mem_loc::len, MAX, CTF_int::mem_loc::ptr, and SUCCESS.
Referenced by alloc(), assign_keys(), bcast_step(), block_reshuffle(), bucket_by_pe(), bucket_by_virt(), calc_cnt_displs(), calc_fold_lnmk(), CTF_int::tensor::calc_phase(), check_self_mapping(), cmp_sym_perms(), CTF_int::tensor::compare(), compute_bucket_offsets(), CTF_int::tensor::copy_tensor_data(), CTF::Monoid< dtype, is_ord >::csr_add(), CTF_int::ctr_replicate::ctr_replicate(), cyclic_reshuffle(), depad_tsr(), depin(), desymmetrize(), dgtog_reshuffle(), CTF_int::distribution::distribution(), CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::tensor::extract_diag(), extract_free_comms(), CTF_int::tensor::fold(), get_len_ordering(), glb_cyclic_reshuffle(), glb_ord_pup(), CTF_int::tensor::init(), CTF_int::summation::is_equal(), map_self_indices(), CTF_int::tensor::map_tensor_rem(), morph_topo(), mst_alloc_ptr(), nosym_transpose(), pad_cyclic_pup_virt_buff(), padded_reshuffle(), CTF_int::CSR_Matrix::partition(), permute(), permute_target(), CTF_int::ConstPairIterator::pin(), precompute_offsets(), CTF_int::summation::print(), CTF_int::tensor::print(), CTF_int::tensor::read_all_pairs(), read_loc_pairs(), CTF_int::tensor::read_local(), readwrite(), reduce_step_post(), reduce_step_pre(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_map::run(), CTF_int::tspsum_permute::run(), scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::seq_tsr_ctr::seq_tsr_ctr(), CTF_int::seq_tsr_spctr::seq_tsr_spctr(), CTF_int::distribution::serialize(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_zero(), CTF_int::tensor::sparsify(), CTF_int::spctr_replicate::spctr_replicate(), CTF_int::tensor::spmatricize(), spsfy_tsr(), strip_diag(), CTF_int::summation::sum_tensors(), symmetrize(), CTF_int::tspsum_replicate::tspsum_replicate(), CTF_int::tsum_replicate::tsum_replicate(), CTF_int::tensor::unfold(), wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::tensor::zero_out_padding(), and zero_padding().
void CTF_int::any_helper | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
any function
[in] | A | tensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B |
Definition at line 88 of file ctf_ext.cxx.
References ctf.core::a, and ctf.core::dtype.
template void CTF_int::any_helper< bool > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< double > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< float > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< int16_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< int32_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< int64_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< int8_t > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
Referenced by conv_type().
template void CTF_int::any_helper< std::complex< double > > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
template void CTF_int::any_helper< std::complex< float > > | ( | tensor * | A, |
tensor * | B_bool, | ||
char const * | idx_A, | ||
char const * | idx_B | ||
) |
void CTF_int::assign_keys | ( | int | order, |
int64_t | size, | ||
int | nvirt, | ||
int const * | edge_len, | ||
int const * | sym, | ||
int const * | phase, | ||
int const * | phys_phase, | ||
int const * | virt_dim, | ||
int * | phase_rank, | ||
char const * | vdata, | ||
char * | vpairs, | ||
algstrct const * | sr | ||
) |
assigns keys to an array of values
[in] | order | tensor dimension |
[in] | size | number of values |
[in] | nvirt | total virtualization factor |
[in] | edge_len | tensor edge lengths |
[in] | sym | symmetries of tensor |
[in] | phase | total phase of the tensor on virtualized processor grid |
[in] | phys_phase | physical phase of the tensor |
[in] | virt_dim | virtual phase in each dimension |
[in] | phase_rank | physical phase rank multiplied by virtual phase |
[in] | vdata | array of input values |
[out] | vpairs | pairs of keys and inputted values |
[in] | sr | algstrct defining data type of array |
Definition at line 180 of file sparse_rw.cxx.
References ABORT, alloc_ptr(), ASSERT, cdealloc(), CTF_int::accumulatable::el_size, NS, CTF_int::algstrct::pair_size(), CTF_int::algstrct::set_pair(), TAU_FSTART, and TAU_FSTOP.
Referenced by read_loc_pairs().
char* CTF_int::bcast_step | ( | int | edge_len, |
char * | A, | ||
bool | is_sparse_A, | ||
bool | move_A, | ||
algstrct const * | sr_A, | ||
int64_t | b_A, | ||
int64_t | s_A, | ||
char * | buf_A, | ||
CommData * | cdt_A, | ||
int64_t | ctr_sub_lda_A, | ||
int64_t | ctr_lda_A, | ||
int | nblk_A, | ||
int64_t const * | size_blk_A, | ||
int & | new_nblk_A, | ||
int64_t *& | new_size_blk_A, | ||
int64_t * | offsets_A, | ||
int | ib | ||
) |
Definition at line 138 of file spctr_2d_general.cxx.
References alloc_ptr(), ASSERT, CTF_int::CommData::bcast(), cdealloc(), CTF_int::algstrct::copy(), CTF_int::spctr_2d_general::ctr_sub_lda_A, CTF_int::spctr_2d_general::edge_len, CTF_int::accumulatable::el_size, CTF_int::algstrct::mdtype(), mst_alloc_ptr(), CTF_int::CommData::np, CTF_int::CommData::rank, socopy(), and spcopy().
Referenced by CTF_int::spctr_2d_general::run().
void CTF_int::block_reshuffle | ( | distribution const & | old_dist, |
distribution const & | new_dist, | ||
char * | tsr_data, | ||
char *& | tsr_cyclic_data, | ||
algstrct const * | sr, | ||
CommData | glb_comm | ||
) |
Reshuffle elements by block given the global phases stay the same.
[in] | old_dist | starting data distrubtion |
[in] | new_dist | target data distrubtion |
[in] | tsr_data | starting data buffer |
[out] | tsr_cyclic_data | target data buffer |
[in] | sr | algstrct defining data |
[in] | glb_comm | communicator on which to redistribute |
Definition at line 454 of file redist.cxx.
References CTF_int::algstrct::addid(), CTF_int::algstrct::alloc(), alloc_ptr(), cdealloc(), CTF_int::CommData::cm, CTF_int::algstrct::copy(), DPRINTF, CTF_int::accumulatable::el_size, CTF_int::algstrct::mdtype(), CTF_int::LinModel< nparam >::observe(), CTF_int::distribution::order, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, CTF_int::CommData::rank, CTF_int::algstrct::set(), CTF_int::LinModel< nparam >::should_observe(), CTF_int::distribution::size, TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
Referenced by CTF_int::tensor::redistribute().
double CTF_int::blres_est_time | ( | int64_t | tot_sz, |
int | nv0, | ||
int | nv1 | ||
) |
estimates execution time, given this processor sends a receives tot_sz across np procs
[in] | tot_sz | amount of data sent/recved |
[in] | nv0 | starting number of blocks |
[in] | nv1 | ending number of blocks |
Definition at line 449 of file redist.cxx.
References CTF_int::LinModel< nparam >::est_time().
Referenced by CTF_int::tensor::est_redist_time().
void CTF_int::bucket_by_pe | ( | int | order, |
int64_t | num_pair, | ||
int64_t | np, | ||
int const * | phys_phase, | ||
int const * | virt_phase, | ||
int const * | bucket_lda, | ||
int const * | edge_len, | ||
ConstPairIterator | mapped_data, | ||
int64_t * | bucket_counts, | ||
int64_t * | bucket_off, | ||
PairIterator | bucket_data, | ||
algstrct const * | sr | ||
) |
buckets key-value pairs by processor according to distribution
[in] | order | number of tensor dims |
[in] | num_pair | numbers of values being written |
[in] | np | number of processor buckets |
[in] | phys_phase | physical distribution phase |
[in] | virt_phase | factor of phase due to local blocking |
[in] | bucket_lda | iterator hop along each bucket dim |
[in] | edge_len | padded edge lengths of tensor |
[in] | mapped_data | set of sparse key-value pairs |
[out] | bucket_counts | how many keys belong to each processor |
[out] | bucket_off | prefix sum of bucket_counts |
[out] | bucket_data | mapped_data reordered by bucket |
[in] | sr | algstrct context defining values |
Definition at line 432 of file sparse_rw.cxx.
References alloc_ptr(), ASSERT, cdealloc(), CTF_int::ConstPairIterator::k(), ctf.core::np(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write().
Referenced by wr_pairs_layout().
int64_t * CTF_int::bucket_by_virt | ( | int | order, |
int | num_virt, | ||
int64_t | num_pair, | ||
int const * | phys_phase, | ||
int const * | virt_phase, | ||
int const * | edge_len, | ||
ConstPairIterator | mapped_data, | ||
PairIterator | bucket_data, | ||
algstrct const * | sr | ||
) |
buckets key value pairs by block/virtual-processor
[in] | order | number of tensor dims |
[in] | num_virt | number of local blocks |
[in] | num_pair | numbers of values being written |
[in] | phys_phase | physical distribution phase |
[in] | virt_phase | factor of phase due to local blocking |
[in] | edge_len | padded edge lengths of tensor |
[in] | mapped_data | set of sparse key-value pairs |
[out] | bucket_data | mapped_data reordered by bucket |
[in] | sr | algstrct context defining values |
Definition at line 539 of file sparse_rw.cxx.
References alloc_ptr(), ASSERT, cdealloc(), CTF_int::ConstPairIterator::k(), CTF_int::PairIterator::sort(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write().
Referenced by wr_pairs_layout().
int64_t CTF_int::calc_cnt | ( | int const * | sym, |
int const * | rep_phase, | ||
int const * | sphase, | ||
int const * | gidx_off, | ||
int const * | edge_len, | ||
int const * | loc_edge_len | ||
) |
computes the cardinality of the set of elements of a tensor of order idim+1 that are owned by processor index gidx_off in a distribution with dimensions sphase
Definition at line 23 of file dgtog_calc_cnt.cxx.
References cdealloc(), get_loc(), and NS.
Referenced by calc_drv_cnts< 0 >(), and calc_sy_pfx().
int64_t CTF_int::calc_cnt< 0 > | ( | int const * | sym, |
int const * | rep_phase, | ||
int const * | sphase, | ||
int const * | gidx_off, | ||
int const * | edge_len, | ||
int const * | loc_edge_len | ||
) |
Definition at line 44 of file dgtog_calc_cnt.cxx.
Referenced by calc_sy_pfx< 1 >().
void CTF_int::calc_cnt_displs | ( | int const * | sym, |
distribution const & | old_dist, | ||
distribution const & | new_dist, | ||
int | new_nvirt, | ||
int | np, | ||
int const * | old_virt_edge_len, | ||
int const * | new_virt_lda, | ||
int64_t * | send_counts, | ||
int64_t * | recv_counts, | ||
int64_t * | send_displs, | ||
int64_t * | recv_displs, | ||
CommData | ord_glb_comm, | ||
int | idx_lyr, | ||
int *const * | bucket_offset | ||
) |
assigns keys to an array of values
[in] | sym | symmetry relations between tensor dimensions |
[in] | old_dist | starting data distrubtion |
[in] | new_dist | target data distrubtion |
[in] | new_nvirt | new total virtualization factor |
[in] | np | number of processors |
[in] | old_virt_edge_len | old edge lengths of blocks |
[in] | new_virt_lda | prefix sum of new_dist.virt_phase |
[out] | send_counts | outgoing counts of pairs by pe |
[out] | recv_counts | incoming counts of pairs by pe |
[out] | send_displs | outgoing displs of pairs by pe |
[out] | recv_displs | incoming displs of pairs by pe |
[in] | ord_glb_comm | the global communicator |
[in] | idx_lyr | starting processor layer (2.5D) |
[in] | bucket_offset | offsets for target index for each dimension |
Definition at line 170 of file redist.cxx.
References alloc_ptr(), ASSERT, blres_mdl, blres_mdl_init, cdealloc(), CTF_int::CommData::cm, ctf.core::dim, MAX, MIN, mst_alloc_ptr(), ctf.core::np(), NS, CTF_int::distribution::order, CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, SY, sy_packed_size(), and CTF_int::distribution::virt_phase.
Referenced by cyclic_reshuffle(), and glb_cyclic_reshuffle().
void CTF_int::calc_cnt_from_rep_cnt | ( | int const * | rep_phase, |
int *const * | pe_offset, | ||
int *const * | bucket_offset, | ||
int64_t const * | old_counts, | ||
int64_t * | counts, | ||
int | bucket_off, | ||
int | pe_off, | ||
int | dir | ||
) |
Definition at line 155 of file dgtog_calc_cnt.cxx.
References calc_cnt_from_rep_cnt< 0 >().
Referenced by dgtog_reshuffle().
void CTF_int::calc_cnt_from_rep_cnt< 0 > | ( | int const * | rep_phase, |
int *const * | pe_offset, | ||
int *const * | bucket_offset, | ||
int64_t const * | old_counts, | ||
int64_t * | counts, | ||
int | bucket_off, | ||
int | pe_off, | ||
int | dir | ||
) |
Definition at line 174 of file dgtog_calc_cnt.cxx.
Referenced by calc_cnt_from_rep_cnt().
void CTF_int::calc_dim | ( | int | order, |
int64_t | size, | ||
int const * | edge_len, | ||
mapping const * | edge_map, | ||
int64_t * | vrt_sz, | ||
int * | vrt_edge_len, | ||
int * | blk_edge_len | ||
) |
calculate the block-sizes of a tensor
[in] | order | number of dimensions of this tensor |
[in] | size | is the size of the local tensor stored |
[in] | edge_len | edge lengths of global tensor |
[in] | edge_map | mapping of each dimension |
[out] | vrt_sz | size of virtual block |
[out] | vrt_edge_len | edge lengths of virtual block |
[out] | blk_edge_len | edge lengths of local block |
Definition at line 143 of file distribution.cxx.
References CTF_int::mapping::child, CTF_int::mapping::has_child, CTF_int::mapping::np, CTF_int::distribution::order, PHYSICAL_MAP, CTF_int::distribution::size, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::tensor::fold(), get_len_ordering(), and CTF_int::tensor::unfold().
void CTF_int::calc_drv_cnts | ( | int | order, |
int const * | sym, | ||
int64_t * | counts, | ||
int const * | rep_phase, | ||
int const * | rep_phase_lda, | ||
int const * | sphase, | ||
int const * | phys_phase, | ||
int * | gidx_off, | ||
int const * | edge_len, | ||
int const * | loc_edge_len | ||
) |
Definition at line 120 of file dgtog_calc_cnt.cxx.
Referenced by calc_drv_displs().
void CTF_int::calc_drv_cnts< 0 > | ( | int | order, |
int const * | sym, | ||
int64_t * | counts, | ||
int const * | rep_phase, | ||
int const * | rep_phase_lda, | ||
int const * | sphase, | ||
int const * | phys_phase, | ||
int * | gidx_off, | ||
int const * | edge_len, | ||
int const * | loc_edge_len | ||
) |
Definition at line 138 of file dgtog_calc_cnt.cxx.
References calc_cnt(), and SWITCH_ORD_CALL_RET.
void CTF_int::calc_drv_displs | ( | int const * | sym, |
int const * | edge_len, | ||
distribution const & | old_dist, | ||
distribution const & | new_dist, | ||
int64_t * | counts, | ||
int | idx_lyr | ||
) |
Definition at line 220 of file dgtog_calc_cnt.cxx.
References alloc(), calc_drv_cnts(), cdealloc(), lcm(), SWITCH_ORD_CALL, TAU_FSTART, and TAU_FSTOP.
Referenced by dgtog_reshuffle().
void CTF_int::calc_fold_lnmk | ( | tensor const * | A, |
tensor const * | B, | ||
tensor const * | C, | ||
int const * | idx_A, | ||
int const * | idx_B, | ||
int const * | idx_C, | ||
int const * | ordering_A, | ||
int const * | ordering_B, | ||
iparam * | inner_prm | ||
) |
calculate the dimensions of the matrix the contraction gets reduced to (A, B, and C may be permuted)
[in] | A | tensor 1 |
[in] | B | tensor 2 |
[in] | C | tensor 3 |
[in] | idx_A | indices of tensor 1 |
[in] | idx_B | indices of tensor 2 |
[in] | idx_C | indices of tensor 3 |
[in] | ordering_A | the dimensional-ordering of the inner mapping of A |
[in] | ordering_B | the dimensional-ordering of the inner mapping of B |
[out] | inner_prm | parameters includng l(number of matrix mutlplications),n,m,k |
Definition at line 200 of file contraction.cxx.
References alloc_ptr(), cdealloc(), inv_idx(), CTF_int::iparam::k, CTF_int::iparam::l, CTF_int::iparam::m, CTF_int::iparam::n, NS, CTF_int::tensor::order, CTF_int::tensor::pad_edge_len, and CTF_int::iparam::sz_C.
Referenced by get_len_ordering().
void CTF_int::calc_idx_arr | ( | int | order, |
int const * | lens, | ||
int const * | sym, | ||
int64_t | idx, | ||
int * | idx_arr | ||
) |
Definition at line 72 of file util.cxx.
References ASSERT, ctf.core::dim, NS, and packed_size().
Referenced by get_choice(), glb_ord_pup(), pad_cyclic_pup_virt_buff(), scal_diag(), and zero_padding().
int64_t * CTF_int::calc_sy_pfx | ( | int const * | sym, |
int const * | rep_phase, | ||
int const * | sphase, | ||
int const * | gidx_off, | ||
int const * | edge_len, | ||
int const * | loc_edge_len | ||
) |
computes the cardinality of the sets of elements of a tensor of order idim+1 for different values of the idim'th tensor dimension
Definition at line 55 of file dgtog_calc_cnt.cxx.
References alloc(), calc_cnt(), cdealloc(), get_glb(), get_loc(), NS, and SY.
int64_t* CTF_int::calc_sy_pfx< 1 > | ( | int const * | sym, |
int const * | rep_phase, | ||
int const * | sphase, | ||
int const * | gidx_off, | ||
int const * | edge_len, | ||
int const * | loc_edge_len | ||
) |
Definition at line 97 of file dgtog_calc_cnt.cxx.
References alloc(), calc_cnt< 0 >(), get_glb(), get_loc(), NS, and SY.
int CTF_int::can_block_reshuffle | ( | int | order, |
int const * | old_phase, | ||
mapping const * | map | ||
) |
determines if tensor can be permuted by block
[in] | order | dimension of tensor |
[in] | old_phase | old cyclic phases in each dimension |
[in] | map | new mapping for each edge length |
Definition at line 618 of file redist.cxx.
References CTF_int::mapping::calc_phase().
Referenced by CTF_int::tensor::est_redist_time(), CTF_int::tensor::get_redist_mem(), CTF_int::summation::is_equal(), and CTF_int::tensor::redistribute().
determines if two topologies are compatible with each other
topo_keep | topology to keep (larger dimension) |
topo_change | topology to change (smaller dimension) |
Definition at line 683 of file topology.cxx.
References CTF_int::topology::dim_comm, CTF_int::topology::lda, CTF_int::CommData::np, and CTF_int::topology::order.
Referenced by get_len_ordering().
double CTF_int::cddot | ( | int | n, |
const double * | dX, | ||
int | incX, | ||
const double * | dY, | ||
int | incY | ||
) |
Definition at line 60 of file model.cxx.
References CTF_BLAS::DDOT().
int CTF_int::cdealloc | ( | void * | ptr | ) |
free abstraction
[in,out] | ptr | pointer to set to address to free |
Definition at line 480 of file memcontrol.cxx.
References ABORT, cdealloc(), ERROR, max_threads, mst_free(), NEGATIVE, and SUCCESS.
Referenced by add_sym_perm(), CTF_int::CommData::all_to_allv(), assign_keys(), bcast_step(), block_reshuffle(), bucket_by_pe(), bucket_by_virt(), calc_cnt(), calc_cnt_displs(), calc_drv_displs(), calc_fold_lnmk(), calc_sy_pfx(), CTF_int::tensor::calc_tot_phase(), check_self_mapping(), CTF_int::tensor::compare(), copy_mapping(), CTF_int::tensor::copy_tensor_data(), CTF::Monoid< dtype, is_ord >::csr_add(), CTF_int::CSR_Matrix::csr_add(), CTF_int::algstrct::csr_reduce(), CTF::Bivar_Function< dtype_A, dtype_B, dtype_C >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr(), CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::csrmultcsr_old(), cyclic_reshuffle(), CTF_int::algstrct::dealloc(), depad_tsr(), depermute_keys(), depin(), CTF_int::tensor::despmatricize(), desymmetrize(), dgtog_reshuffle(), CTF_int::seq_tsr_spctr::est_fp(), CTF_int::seq_tsr_ctr::est_fp(), CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::Contract_Term::execute(), CTF_int::tensor::extract_diag(), CTF_int::tensor::fold(), CTF_int::strp_tsr::free_exp(), CTF_int::tensor::free_self(), CTF::Semiring< dtype, is_ord >::gen_csrmultcsr(), CTF_int::COO_Matrix::get_data(), get_full_intm(), get_generic_topovec(), get_len_ordering(), get_phys_topo(), glb_cyclic_reshuffle(), glb_ord_pup(), CTF_int::summation::is_equal(), map_self_indices(), CTF_int::tensor::map_tensor_rem(), morph_topo(), nosym_transpose(), CTF::Idx_Tensor::operator-=(), order_globally(), CTF_int::tensor::orient_subworld(), pad_cyclic_pup_virt_buff(), padded_reshuffle(), CTF_int::algstrct::pair_dealloc(), peel_torus(), permute(), permute_target(), CTF_int::ConstPairIterator::pin(), CTF::Function_timer::print(), CTF_int::summation::print(), CTF_int::tensor::print(), CTF::print_timers(), CTF_int::tensor::read_all_pairs(), read_data_mpiio(), CTF_int::tensor::read_dense_from_file(), read_loc_pairs(), CTF_int::tensor::read_local(), CTF::read_sparse_from_file_base(), readwrite(), CTF_int::tensor::redistribute(), reduce_step_post(), CTF_int::tensor::remove_fold(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::tspsum_replicate::run(), CTF_int::ctr_2d_general::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::tsum_replicate::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_map::run(), CTF_int::tspsum_permute::run(), CTF_int::spctr_pin_keys::run(), CTF_int::algstrct::safecopy(), CTF::Semiring< dtype, is_ord >::safemul(), scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::distribution::serialize(), CTF_int::COO_Matrix::set_data(), CTF_int::tensor::set_distribution(), CTF_int::tensor::set_name(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_zero(), CTF_int::tensor::slice(), CTF::Tensor< dtype >::slice(), spA_dnB_dnC_seq_ctr(), CTF_int::tensor::sparsify(), CTF_int::tensor::spmatricize(), spsfy_tsr(), strip_diag(), CTF_int::summation::sum_tensors(), CTF::Matrix< dtype >::svd(), sym_seq_ctr_cust(), sym_seq_ctr_inr(), sym_seq_ctr_ref(), sym_seq_scl_cust(), sym_seq_scl_ref(), sym_seq_sum_cust(), sym_seq_sum_inr(), sym_seq_sum_ref(), symmetrize(), CTF_int::tensor::tensor(), CTF_int::tensor::unfold(), CTF_int::LinModel< nparam >::update(), wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::tensor::write_dense_to_file(), CTF::write_sparse_to_file_base(), CTF_int::tensor::zero_out_padding(), zero_padding(), CTF_int::contraction::~contraction(), CTF_int::ctr_replicate::~ctr_replicate(), CTF_int::ctr_virt::~ctr_virt(), CTF::Idx_Tensor::~Idx_Tensor(), CTF_int::LinModel< nparam >::~LinModel(), CTF::Partition::~Partition(), CTF_int::scaling::~scaling(), CTF_int::scl::~scl(), CTF_int::scl_virt::~scl_virt(), CTF_int::seq_tsr_ctr::~seq_tsr_ctr(), CTF_int::seq_tsr_scl::~seq_tsr_scl(), CTF_int::seq_tsr_spctr::~seq_tsr_spctr(), CTF_int::seq_tsr_spsum::~seq_tsr_spsum(), CTF_int::seq_tsr_sum::~seq_tsr_sum(), CTF_int::spctr_pin_keys::~spctr_pin_keys(), CTF_int::spctr_replicate::~spctr_replicate(), CTF_int::spctr_virt::~spctr_virt(), CTF_int::strp_tsr::~strp_tsr(), CTF_int::summation::~summation(), CTF_int::Term::~Term(), CTF_int::topology::~topology(), CTF_int::tspsum::~tspsum(), CTF_int::tspsum_map::~tspsum_map(), CTF_int::tspsum_permute::~tspsum_permute(), CTF_int::tspsum_pin_keys::~tspsum_pin_keys(), CTF_int::tspsum_replicate::~tspsum_replicate(), CTF_int::tspsum_virt::~tspsum_virt(), CTF_int::tsum::~tsum(), CTF_int::tsum_replicate::~tsum_replicate(), and CTF_int::tsum_virt::~tsum_virt().
int CTF_int::cdealloc | ( | void * | ptr, |
int const | tid | ||
) |
free abstraction
[in,out] | ptr | pointer to set to address to free |
[in] | tid | thread id from whose stack pointer needs to be freed |
Definition at line 410 of file memcontrol.cxx.
References ctf.core::it, mst_free(), NEGATIVE, and SUCCESS.
Referenced by cdealloc(), and cdealloc_cond().
int CTF_int::cdealloc_cond | ( | void * | ptr | ) |
free abstraction (conditional (no error if not found))
[in,out] | ptr | pointer to set to address to free |
Definition at line 448 of file memcontrol.cxx.
References cdealloc(), max_threads, NEGATIVE, and SUCCESS.
void CTF_int::cdgelsd | ( | int | m, |
int | n, | ||
int | k, | ||
double const * | A, | ||
int | lda_A, | ||
double * | B, | ||
int | lda_B, | ||
double * | S, | ||
double | cond, | ||
int * | rank, | ||
double * | work, | ||
int | lwork, | ||
int * | iwork, | ||
int * | info | ||
) |
Definition at line 102 of file model.cxx.
References CTF_LAPACK::cdgelsd().
Referenced by CTF_int::LinModel< nparam >::update().
void CTF_int::cdgeqrf | ( | int const | M, |
int const | N, | ||
double * | A, | ||
int const | LDA, | ||
double * | TAU2, | ||
double * | WORK, | ||
int const | LWORK, | ||
int * | INFO | ||
) |
Definition at line 67 of file model.cxx.
References CTF_LAPACK::cdgeqrf().
Referenced by CTF_int::LinModel< nparam >::update().
void CTF_int::cdormqr | ( | char | SIDE, |
char | TRANS, | ||
int | M, | ||
int | N, | ||
int | K, | ||
double const * | A, | ||
int | LDA, | ||
double const * | TAU2, | ||
double * | C, | ||
int | LDC, | ||
double * | WORK, | ||
int | LWORK, | ||
int * | INFO | ||
) |
Definition at line 80 of file model.cxx.
References CTF_LAPACK::cdormqr().
Referenced by CTF_int::LinModel< nparam >::update().
void CTF_int::char_abs | ( | char const * | a, |
char * | b | ||
) |
Definition at line 130 of file set.h.
References ctf.core::abs(), and ctf.core::dtype.
int64_t CTF_int::chchoose | ( | int64_t | n, |
int64_t | k | ||
) |
int CTF_int::check_self_mapping | ( | tensor const * | tsr, |
int const * | idx_map | ||
) |
checks mapping in preparation for tensors scale, summ or contract
[in] | tsr | handle to tensor |
[in] | idx_map | is the mapping of tensor to global indices |
Definition at line 332 of file mapping.cxx.
References alloc_ptr(), CTF_int::mapping::calc_phase(), cdealloc(), CTF_int::mapping::cdt, CTF_int::mapping::child, DPRINTF, CTF_int::tensor::edge_map, CTF_int::mapping::has_child, CTF_int::tensor::order, PHYSICAL_MAP, and CTF_int::mapping::type.
Referenced by CTF_int::scaling::execute(), get_len_ordering(), CTF_int::summation::is_equal(), and CTF_int::tensor::set_distribution().
int64_t CTF_int::choose | ( | int64_t | n, |
int64_t | k | ||
) |
Definition at line 285 of file util.cxx.
References fact().
Referenced by coalesce_bwd(), and get_len_ordering().
void CTF_int::cmp_sym_perms | ( | int | ndim, |
int const * | sym, | ||
int * | nperm, | ||
int ** | perm, | ||
double * | sign | ||
) |
finds all permutations of a tensor according to a symmetry
[in] | ndim | dimension of tensor |
[in] | sym | symmetry specification of tensor |
[out] | nperm | number of symmeitrc permutations to do |
[out] | perm | the permutation |
[out] | sign | sign of each permutation |
Definition at line 400 of file symmetrization.cxx.
References alloc_ptr(), AS, ASSERT, ctf.core::np(), and NS.
|
inline |
we receive a contiguous buffer kb-by-n B and (k-kb)-by-n B_aux which is the block below. To get a k-by-n buffer, we need to combine this buffer with our original block. Since we are working with column-major ordering we need to interleave the blocks. Thats what this function does.
[in] | el_size | element size |
[in,out] | B | the buffer to coalesce into |
[in] | B_aux | the second buffer to coalesce from |
[in] | k | the total number of rows |
[in] | n | the number of columns |
[in] | kb | the number of rows in a B originally |
Definition at line 398 of file util.h.
References ctf.core::a, ctf.core::b, chchoose(), choose(), fact(), get_choice(), permute(), permute_target(), socopy(), and spcopy().
compares two mappings
map_A | first map |
map_B | second map return true if mapping is exactly the same, false otherwise |
Definition at line 143 of file mapping.cxx.
References ASSERT, CTF_int::mapping::cdt, CTF_int::mapping::child, DEBUG_PRINTF, CTF_int::mapping::has_child, NOT_MAPPED, CTF_int::mapping::np, PHYSICAL_MAP, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by CTF_int::tensor::align(), ctr_2d_gen_build(), get_len_ordering(), and CTF_int::summation::is_equal().
bool CTF_int::comp_time_param | ( | const time_param< nparam > & | a, |
const time_param< nparam > & | b | ||
) |
Definition at line 114 of file model.cxx.
References CTF_int::time_param< nparam >::p.
int ** CTF_int::compute_bucket_offsets | ( | distribution const & | old_dist, |
distribution const & | new_dist, | ||
int const * | len, | ||
int const * | old_phys_edge_len, | ||
int const * | old_virt_lda, | ||
int const * | old_offsets, | ||
int *const * | old_permutation, | ||
int const * | new_phys_edge_len, | ||
int const * | new_virt_lda, | ||
int | forward, | ||
int | old_virt_np, | ||
int | new_virt_np, | ||
int const * | old_virt_edge_len | ||
) |
computes offsets for redistribution targets along each edge length
[in] | old_dist | starting data distrubtion |
[in] | new_dist | target data distrubtion |
[in] | len | unpadded edge lengths |
[in] | old_phys_edge_len | total edge lengths of old local tensor chunk |
[in] | old_virt_lda | prefix sum of old_dist.virt_phase |
[in] | old_offsets | old offsets of each tensor edge (corner 1 of slice) |
[in] | old_permutation | permutation array for each edge length (no perm if NULL) |
[in] | new_phys_edge_len | total edge lengths of new local tensor chunk |
[in] | new_virt_lda | prefix sum of new_dist.virt_phase |
[in] | forward | 1 for sending 0 for receiving |
[in] | old_virt_np | number of blocks per processor in old_dist |
[in] | new_virt_np | number of blocks per processor in new_dist |
[in] | old_virt_edge_len | edge lengths of each block in old_dist |
Definition at line 111 of file redist.cxx.
References alloc_ptr(), ctf.core::dim, MAX, CTF_int::distribution::order, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
Referenced by cyclic_reshuffle(), and glb_cyclic_reshuffle().
void CTF_int::compute_syoff | ( | int | r, |
int | len, | ||
algstrct const * | sr, | ||
int const * | edge_len, | ||
int const * | sym, | ||
uint64_t * | offsets | ||
) |
Definition at line 301 of file sym_seq_ctr.cxx.
References CTF_int::accumulatable::el_size, NS, and sy_packed_size().
Referenced by compute_syoffs().
void CTF_int::compute_syoffs | ( | algstrct const * | sr_A, |
int | order_A, | ||
int const * | edge_len_A, | ||
int const * | sym_A, | ||
int const * | idx_map_A, | ||
algstrct const * | sr_B, | ||
int | order_B, | ||
int const * | edge_len_B, | ||
int const * | sym_B, | ||
int const * | idx_map_B, | ||
int | tot_order, | ||
int const * | rev_idx_map, | ||
uint64_t **& | offsets_A, | ||
uint64_t **& | offsets_B | ||
) |
Definition at line 193 of file sym_seq_sum.cxx.
References alloc(), compute_syoff(), compute_syoffs(), TAU_FSTART, and TAU_FSTOP.
void CTF_int::compute_syoffs | ( | algstrct const * | sr_A, |
int | order_A, | ||
int const * | edge_len_A, | ||
int const * | sym_A, | ||
int const * | idx_map_A, | ||
algstrct const * | sr_B, | ||
int | order_B, | ||
int const * | edge_len_B, | ||
int const * | sym_B, | ||
int const * | idx_map_B, | ||
algstrct const * | sr_C, | ||
int | order_C, | ||
int const * | edge_len_C, | ||
int const * | sym_C, | ||
int const * | idx_map_C, | ||
int | tot_order, | ||
int const * | rev_idx_map, | ||
uint64_t **& | offsets_A, | ||
uint64_t **& | offsets_B, | ||
uint64_t **& | offsets_C | ||
) |
Definition at line 332 of file sym_seq_ctr.cxx.
References alloc(), compute_syoff(), TAU_FSTART, and TAU_FSTOP.
Referenced by compute_syoffs(), spA_dnB_dnC_seq_ctr(), sym_seq_ctr_cust(), sym_seq_ctr_ref(), and sym_seq_sum_ref().
Definition at line 42 of file ctf_ext.cxx.
References ctf.core::a, and CTF_int::tensor::order.
template void CTF_int::conj_helper< double > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::conj_helper< float > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
std::vector< Term* > CTF_int::contract_down_terms | ( | algstrct * | sr, |
char * | tscale, | ||
std::vector< Term * > | operands, | ||
std::vector< char > | out_inds, | ||
int | terms_to_leave, | ||
bool | est_time = false , |
||
double * | cost = NULL |
||
) |
Definition at line 563 of file term.cxx.
References CTF::Idx_Tensor::clone(), CTF_int::Contract_Term::clone(), det_uniq_inds(), CTF_int::Term::estimate_time(), CTF_int::contraction::estimate_time(), CTF_int::Term::execute(), CTF_int::contraction::execute(), get_full_intm(), CTF::Idx_Tensor::idx_map, CTF_int::algstrct::mulid(), CTF::Idx_Tensor::parent, CTF_int::algstrct::safecopy(), CTF_int::algstrct::safemul(), and CTF_int::Term::scale.
Referenced by CTF_int::Contract_Term::estimate_time(), and CTF_int::Contract_Term::execute().
std::list< mem_transfer > CTF_int::contract_mst | ( | ) |
gets rid of empty space on the stack
Definition at line 125 of file memcontrol.cxx.
References CPY_BUFFER_SIZE, ctf.core::it, MIN, mst, mst_alloc(), CTF_int::mem_transfer::new_ptr, CTF_int::mem_transfer::old_ptr, TAU_FSTART, and TAU_FSTOP.
Referenced by CTF_int::summation::estimate_time(), and get_len_ordering().
int CTF_int::conv_idx | ( | int | order, |
type const * | cidx, | ||
int ** | iidx | ||
) |
Definition at line 50 of file common.cxx.
References alloc().
Referenced by CTF_int::contraction::contraction(), CTF_int::scaling::scaling(), CTF_int::tensor::set_distribution(), and CTF_int::summation::summation().
int CTF_int::conv_idx | ( | int | order_A, |
type const * | cidx_A, | ||
int ** | iidx_A, | ||
int | order_B, | ||
type const * | cidx_B, | ||
int ** | iidx_B | ||
) |
Definition at line 76 of file common.cxx.
References alloc(), and conv_idx().
int CTF_int::conv_idx | ( | int | order_A, |
type const * | cidx_A, | ||
int ** | iidx_A, | ||
int | order_B, | ||
type const * | cidx_B, | ||
int ** | iidx_B, | ||
int | order_C, | ||
type const * | cidx_C, | ||
int ** | iidx_C | ||
) |
Definition at line 114 of file common.cxx.
References alloc(), conv_idx< char >(), and conv_idx< int >().
Referenced by conv_idx().
template int CTF_int::conv_idx< char > | ( | int | , |
char const * | , | ||
int ** | |||
) |
template int CTF_int::conv_idx< char > | ( | int | , |
char const * | , | ||
int ** | , | ||
int | , | ||
char const * | , | ||
int ** | |||
) |
template int CTF_int::conv_idx< char > | ( | int | , |
char const * | , | ||
int ** | , | ||
int | , | ||
char const * | , | ||
int ** | , | ||
int | , | ||
char const * | , | ||
int ** | |||
) |
Referenced by conv_idx().
template int CTF_int::conv_idx< int > | ( | int | , |
int const * | , | ||
int ** | |||
) |
Referenced by CTF_int::summation::estimate_time(), and get_len_ordering().
template int CTF_int::conv_idx< int > | ( | int | , |
int const * | , | ||
int ** | , | ||
int | , | ||
int const * | , | ||
int ** | |||
) |
template int CTF_int::conv_idx< int > | ( | int | , |
int const * | , | ||
int ** | , | ||
int | , | ||
int const * | , | ||
int ** | , | ||
int | , | ||
int const * | , | ||
int ** | |||
) |
Referenced by conv_idx().
convert tensor from one type to another
[in] | type_idx1 | index of first ype |
[in] | type_idx2 | index of second ype |
[in] | A | tensor to convert |
[in] | B | tensor to convert to |
Definition at line 327 of file ctf_ext.cxx.
References abs_helper< bool >(), abs_helper< double >(), abs_helper< float >(), abs_helper< int16_t >(), abs_helper< int32_t >(), abs_helper< int64_t >(), abs_helper< int8_t >(), all_helper< bool >(), all_helper< double >(), all_helper< float >(), all_helper< int16_t >(), all_helper< int32_t >(), all_helper< int64_t >(), all_helper< int8_t >(), any_helper< bool >(), any_helper< double >(), any_helper< float >(), any_helper< int16_t >(), any_helper< int32_t >(), any_helper< int64_t >(), any_helper< int8_t >(), conj_helper< double >(), conj_helper< float >(), get_imag< double >(), get_imag< float >(), get_real< double >(), get_real< float >(), pow_helper< bool >(), pow_helper< double >(), pow_helper< float >(), pow_helper< int16_t >(), pow_helper< int32_t >(), pow_helper< int64_t >(), pow_helper< int8_t >(), set_imag< double >(), set_imag< float >(), set_real< double >(), set_real< float >(), and SWITCH_TYPE.
copies mapping A to B
[in] | order | number of dimensions |
[in] | mapping_A | mapping to copy from |
[in,out] | mapping_B | mapping to copy to |
Definition at line 190 of file mapping.cxx.
References CTF_int::mapping::clear(), and CTF_int::mapping::mapping().
Referenced by CTF_int::tensor::align(), copy_mapping(), CTF_int::tensor::copy_tensor_data(), desymmetrize(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), get_len_ordering(), CTF_int::summation::is_equal(), CTF_int::tensor::pull_alias(), and symmetrize().
int CTF_int::copy_mapping | ( | int | order_A, |
int | order_B, | ||
int const * | idx_A, | ||
mapping const * | mapping_A, | ||
int const * | idx_B, | ||
mapping * | mapping_B, | ||
int | make_virt = 1 |
||
) |
copies mapping A to B
[in] | order_A | number of dimensions in A |
[in] | order_B | number of dimensions in B |
[in] | idx_A | index mapping of A |
[in] | mapping_A | mapping to copy from |
[in] | idx_B | index mapping of B |
[in,out] | mapping_B | mapping to copy to |
[in] | make_virt | makes virtual |
Definition at line 210 of file mapping.cxx.
References ASSERT, cdealloc(), CTF_int::mapping::clear(), copy_mapping(), CTF_int::mapping::has_child, inv_idx(), CTF_int::mapping::np, SUCCESS, CTF_int::mapping::type, and VIRTUAL_MAP.
int CTF_int::ctr_2d_gen_build | ( | int | is_used, |
CommData | global_comm, | ||
int | i, | ||
int * | virt_dim, | ||
int & | cg_edge_len, | ||
int & | total_iter, | ||
tensor * | A, | ||
int | i_A, | ||
CommData *& | cg_cdt_A, | ||
int64_t & | cg_ctr_lda_A, | ||
int64_t & | cg_ctr_sub_lda_A, | ||
bool & | cg_move_A, | ||
int * | blk_len_A, | ||
int64_t & | blk_sz_A, | ||
int const * | virt_blk_len_A, | ||
int & | load_phase_A, | ||
tensor * | B, | ||
int | i_B, | ||
CommData *& | cg_cdt_B, | ||
int64_t & | cg_ctr_lda_B, | ||
int64_t & | cg_ctr_sub_lda_B, | ||
bool & | cg_move_B, | ||
int * | blk_len_B, | ||
int64_t & | blk_sz_B, | ||
int const * | virt_blk_len_B, | ||
int & | load_phase_B, | ||
tensor * | C, | ||
int | i_C, | ||
CommData *& | cg_cdt_C, | ||
int64_t & | cg_ctr_lda_C, | ||
int64_t & | cg_ctr_sub_lda_C, | ||
bool & | cg_move_C, | ||
int * | blk_len_C, | ||
int64_t & | blk_sz_C, | ||
int const * | virt_blk_len_C, | ||
int & | load_phase_C | ||
) |
sets up a ctr_2d_general (2D SUMMA) level where A is not communicated function will be called with A/B/C permuted depending on desired alg
[in] | is_used | whether this ctr will actually be run |
[in] | global_comm | comm for this CTF instance |
[in] | i | index in the total index map currently worked on |
[in,out] | virt_dim | virtual processor grid lengths |
[out] | cg_edge_len | edge lengths of ctr_2d_gen object to set |
[in,out] | total_iter | the total number of ctr_2d_gen iterations |
[in] | A | A tensor |
[in] | i_A | the index in A to which index i corresponds |
[out] | cg_cdt_A | the communicator for A to be set for ctr_2d_gen |
[out] | cg_ctr_lda_A | parameter of ctr_2d_gen corresponding to upper lda for lda_cpy |
[out] | cg_ctr_sub_lda_A | parameter of ctr_2d_gen corresponding to lower lda for lda_cpy |
[out] | cg_move_A | tells ctr_2d_gen whether A should be communicated |
[in,out] | blk_len_A | lengths of local A piece after this ctr_2d_gen level |
[in,out] | blk_sz_A | size of local A piece after this ctr_2d_gen level |
[in] | virt_blk_edge_len_A | edge lengths of virtual blocks of A |
[in] | load_phase_A | tells the offloader how often A buffer changes for ctr_2d_gen |
... the other parameters are specified the same as for _A but this time for _B and _C
Definition at line 12 of file ctr_2d_general.cxx.
References ASSERT, CTF_int::mapping::calc_phase(), CTF_int::mapping::cdt, CTF_int::mapping::child, comp_dim_map(), CTF_int::topology::dim_comm, CTF_int::tensor::edge_map, CTF_int::mapping::has_child, lcm(), MAX, CTF_int::mapping::np, CTF_int::tensor::order, PHYSICAL_MAP, CTF_int::tensor::topo, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by CTF_int::contraction::contraction(), and get_len_ordering().
void CTF_int::cvrt_idx | ( | int | order, |
int const * | lens, | ||
int64_t | idx, | ||
int * | idx_arr | ||
) |
Definition at line 533 of file common.cxx.
Referenced by CTF::Tensor< dtype >::slice(), and wr_pairs_layout().
void CTF_int::cvrt_idx | ( | int | order, |
int const * | lens, | ||
int64_t | idx, | ||
int ** | idx_arr | ||
) |
Definition at line 545 of file common.cxx.
References alloc(), and cvrt_idx().
void CTF_int::cvrt_idx | ( | int | order, |
int const * | lens, | ||
int const * | idx_arr, | ||
int64_t * | idx | ||
) |
Definition at line 553 of file common.cxx.
References MPI_CTF_DOUBLE_COMPLEX.
Referenced by cvrt_idx().
void CTF_int::cyclic_reshuffle | ( | int const * | sym, |
distribution const & | old_dist, | ||
int const * | old_offsets, | ||
int *const * | old_permutation, | ||
distribution const & | new_dist, | ||
int const * | new_offsets, | ||
int *const * | new_permutation, | ||
char ** | tsr_data, | ||
char ** | tsr_cyclic_data, | ||
algstrct const * | sr, | ||
CommData | ord_glb_comm, | ||
bool | reuse_buffers, | ||
char const * | alpha, | ||
char const * | beta | ||
) |
Goes from any set of phases to any new set of phases.
[in] | sym | symmetry relations between tensor dimensions |
[in] | old_dist | starting data distrubtion |
[in] | old_offsets | old offsets of each tensor edge (corner 1 of slice) |
[in] | old_permutation | permutation array for each edge length (no perm if NULL) |
[in] | new_dist | target data distrubtion |
[in] | new_offsets | old offsets of each tensor edge (corner 1 of slice) |
[in] | new_permutation | permutation array for each edge length (no perm if NULL) |
[in] | tsr_data | starting data buffer |
[out] | tsr_cyclic_data | target data buffer |
[in] | sr | algstrct defining data |
[in] | ord_glb_comm | communicator on which to redistribute |
[in] | reuse_buffers | if 1: ptr_tsr_cyclic_data is allocated dynamically and ptr_tsr_data is overwritten with intermediate data if 0: ptr_tsr_cyclic_data is preallocated and can be scaled by beta, however, more memory is used for temp buffers |
[in] | alpha | scaling tensor for new data |
[in] | beta | scaling tensor for original data |
Definition at line 477 of file cyclic_reshuffle.cxx.
References CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), CTF_int::CommData::all_to_allv(), alloc_ptr(), ASSERT, calc_cnt_displs(), cdealloc(), compute_bucket_offsets(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, CTF_int::distribution::is_cyclic, CTF_int::algstrct::isequal(), MAX, mst_alloc_ptr(), CTF_int::algstrct::mulid(), CTF_int::CommData::np, ctf.core::np(), CTF_int::distribution::order, pad_cyclic_pup_virt_buff(), CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::CommData::rank, CTF_int::algstrct::set(), CTF_int::distribution::size, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
Referenced by CTF_int::tensor::add_from_subworld(), CTF_int::tensor::add_to_subworld(), and glb_cyclic_reshuffle().
void CTF_int::def_coo_to_csr | ( | int64_t | nz, |
int | nrow, | ||
dtype * | csr_vs, | ||
int * | csr_ja, | ||
int * | csr_ia, | ||
dtype const * | coo_vs, | ||
int const * | coo_rs, | ||
int const * | coo_cs | ||
) |
Definition at line 100 of file set.h.
Referenced by CTF::Set< dtype, is_ord >::coo_to_csr().
void CTF_int::def_csr_to_coo | ( | int64_t | nz, |
int | nrow, | ||
dtype const * | csr_vs, | ||
int const * | csr_ja, | ||
int const * | csr_ia, | ||
dtype * | coo_vs, | ||
int * | coo_rs, | ||
int * | coo_cs | ||
) |
Definition at line 105 of file set.h.
Referenced by CTF::Set< dtype, is_ord >::csr_to_coo().
|
inline |
Definition at line 116 of file set.h.
References ctf.core::a, ctf.core::b, and ctf.core::dtype.
|
inline |
Definition at line 123 of file set.h.
References ctf.core::a.
dtype CTF_int::default_add | ( | dtype | a, |
dtype | b | ||
) |
Definition at line 6 of file monoid.h.
References ctf.core::b.
dtype CTF_int::default_addinv | ( | dtype | a | ) |
Definition at line 110 of file set.h.
References ctf.core::a.
void CTF_int::default_axpy | ( | int | n, |
dtype | alpha, | ||
dtype const * | X, | ||
int | incX, | ||
dtype * | Y, | ||
int | incY | ||
) |
Definition at line 19 of file semiring.h.
References default_axpy< double >(), and default_axpy< float >().
void CTF_int::default_axpy< double > | ( | int | n, |
double | alpha, | ||
double const * | X, | ||
int | incX, | ||
double * | Y, | ||
int | incY | ||
) |
Definition at line 139 of file semiring.cxx.
References CTF_BLAS::DAXPY().
Referenced by default_axpy(), and default_axpy< float >().
void CTF_int::default_axpy< float > | ( | int | n, |
float | alpha, | ||
float const * | X, | ||
int | incX, | ||
float * | Y, | ||
int | incY | ||
) |
Definition at line 128 of file semiring.cxx.
References default_axpy< double >(), and CTF_BLAS::SAXPY().
Referenced by default_axpy().
void CTF_int::default_axpy< std::complex< double > > | ( | int | n, |
std::complex< double > | alpha, | ||
std::complex< double > const * | X, | ||
int | incX, | ||
std::complex< double > * | Y, | ||
int | incY | ||
) |
Definition at line 161 of file semiring.cxx.
References CTF_BLAS::ZAXPY().
void CTF_int::default_axpy< std::complex< float > > | ( | int | n, |
std::complex< float > | alpha, | ||
std::complex< float > const * | X, | ||
int | incX, | ||
std::complex< float > * | Y, | ||
int | incY | ||
) |
Definition at line 150 of file semiring.cxx.
References CTF_BLAS::CAXPY().
void CTF_int::default_coomm | ( | int | m, |
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | A, | ||
int const * | rows_A, | ||
int const * | cols_A, | ||
int | nnz_A, | ||
dtype const * | B, | ||
dtype | beta, | ||
dtype * | C | ||
) |
Definition at line 299 of file semiring.h.
References default_coomm< double >(), and default_coomm< float >().
Referenced by default_gemm_batch< std::complex< double > >().
void CTF_int::default_coomm< double > | ( | int | m, |
int | n, | ||
int | k, | ||
double | alpha, | ||
double const * | A, | ||
int const * | rows_A, | ||
int const * | cols_A, | ||
int | nnz_A, | ||
double const * | B, | ||
double | beta, | ||
double * | C | ||
) |
Definition at line 233 of file semiring.cxx.
References DEF_COOMM_KERNEL, and CTF_BLAS::MKL_DCOOMM().
Referenced by default_coomm(), and default_coomm< float >().
void CTF_int::default_coomm< float > | ( | int | m, |
int | n, | ||
int | k, | ||
float | alpha, | ||
float const * | A, | ||
int const * | rows_A, | ||
int const * | cols_A, | ||
int | nnz_A, | ||
float const * | B, | ||
float | beta, | ||
float * | C | ||
) |
Definition at line 208 of file semiring.cxx.
References DEF_COOMM_KERNEL, default_coomm< double >(), and CTF_BLAS::MKL_SCOOMM().
Referenced by default_coomm().
void CTF_int::default_coomm< std::complex< double > > | ( | int | m, |
int | n, | ||
int | k, | ||
std::complex< double > | alpha, | ||
std::complex< double > const * | A, | ||
int const * | rows_A, | ||
int const * | cols_A, | ||
int | nnz_A, | ||
std::complex< double > const * | B, | ||
std::complex< double > | beta, | ||
std::complex< double > * | C | ||
) |
Definition at line 286 of file semiring.cxx.
References DEF_COOMM_KERNEL, CTF_BLAS::MKL_ZCOOMM(), and muladd_csrmm().
void CTF_int::default_coomm< std::complex< float > > | ( | int | m, |
int | n, | ||
int | k, | ||
std::complex< float > | alpha, | ||
std::complex< float > const * | A, | ||
int const * | rows_A, | ||
int const * | cols_A, | ||
int | nnz_A, | ||
std::complex< float > const * | B, | ||
std::complex< float > | beta, | ||
std::complex< float > * | C | ||
) |
Definition at line 261 of file semiring.cxx.
References DEF_COOMM_KERNEL, and CTF_BLAS::MKL_CCOOMM().
void CTF_int::default_fxpy | ( | int | n, |
dtype const * | X, | ||
dtype * | Y | ||
) |
void CTF_int::default_gemm | ( | char | tA, |
char | tB, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | A, | ||
dtype const * | B, | ||
dtype | beta, | ||
dtype * | C | ||
) |
Definition at line 71 of file semiring.h.
|
inline |
Definition at line 166 of file semiring.h.
Referenced by default_gemm< float >(), and CTF::Semiring< dtype, is_ord >::is_offloadable().
|
inline |
Definition at line 151 of file semiring.h.
References default_gemm< double >().
Referenced by get_grp_ptrs(), and CTF::Semiring< dtype, is_ord >::is_offloadable().
|
inline |
Definition at line 196 of file semiring.h.
References default_gemm_batch().
|
inline |
Definition at line 181 of file semiring.h.
void CTF_int::default_gemm_batch | ( | char | taA, |
char | taB, | ||
int | l, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | A, | ||
dtype const * | B, | ||
dtype | beta, | ||
dtype * | C | ||
) |
Definition at line 211 of file semiring.h.
References default_gemm_batch< float >().
Referenced by default_gemm< std::complex< double > >().
|
inline |
Definition at line 251 of file semiring.h.
Referenced by default_gemm_batch< float >().
|
inline |
Definition at line 235 of file semiring.h.
References default_gemm_batch< double >().
Referenced by default_gemm_batch().
|
inline |
Definition at line 283 of file semiring.h.
References default_coomm().
|
inline |
Definition at line 267 of file semiring.h.
|
inline |
Definition at line 182 of file set.h.
References ctf.core::a.
|
inline |
Definition at line 188 of file set.h.
References ctf.core::a.
|
inline |
|
inline |
Definition at line 158 of file set.h.
References ctf.core::a, and ctf.core::dtype.
|
inline |
Definition at line 138 of file set.h.
References ctf.core::a.
|
inline |
Definition at line 144 of file set.h.
References ctf.core::a.
|
inline |
|
inline |
Definition at line 173 of file set.h.
References ctf.core::a, and ctf.core::dtype.
dtype CTF_int::default_mul | ( | dtype | a, |
dtype | b | ||
) |
Definition at line 14 of file semiring.h.
References ctf.core::b.
void CTF_int::default_mxpy | ( | void * | X, |
void * | Y, | ||
int * | n, | ||
MPI_Datatype * | d | ||
) |
Definition at line 11 of file monoid.h.
References ctf.core::dtype.
Referenced by get_default_maddop().
void CTF_int::default_scal | ( | int | n, |
dtype | alpha, | ||
dtype * | X, | ||
int | incX | ||
) |
Definition at line 47 of file semiring.h.
References default_scal< double >(), and default_scal< float >().
void CTF_int::default_scal< double > | ( | int | n, |
double | alpha, | ||
double * | X, | ||
int | incX | ||
) |
Definition at line 176 of file semiring.cxx.
References CTF_BLAS::DSCAL().
Referenced by default_scal().
void CTF_int::default_scal< float > | ( | int | n, |
float | alpha, | ||
float * | X, | ||
int | incX | ||
) |
Definition at line 171 of file semiring.cxx.
References CTF_BLAS::SSCAL().
Referenced by default_scal().
void CTF_int::default_scal< std::complex< double > > | ( | int | n, |
std::complex< double > | alpha, | ||
std::complex< double > * | X, | ||
int | incX | ||
) |
Definition at line 188 of file semiring.cxx.
References CTF_BLAS::ZSCAL().
void CTF_int::default_scal< std::complex< float > > | ( | int | n, |
std::complex< float > | alpha, | ||
std::complex< float > * | X, | ||
int | incX | ||
) |
Definition at line 182 of file semiring.cxx.
References CTF_BLAS::CSCAL().
void CTF_int::depad_tsr | ( | int | order, |
int64_t | num_pair, | ||
int const * | edge_len, | ||
int const * | sym, | ||
int const * | padding, | ||
int const * | prepadding, | ||
char const * | pairsb, | ||
char * | new_pairsb, | ||
int64_t * | new_num_pair, | ||
algstrct const * | sr | ||
) |
retrieves the unpadded pairs
[in] | order | tensor dimension |
[in] | num_pair | number of pairs |
[in] | edge_len | tensor edge lengths |
[in] | sym | symmetry types of tensor |
[in] | padding | padding of tensor (included in edge_len) |
[in] | prepadding | padding at start of tensor (included in edge_len) |
[in] | pairsb | padded array of pairs |
[out] | new_pairsb | unpadded pairs |
[out] | new_num_pair | number of unpadded pairs |
[in] | sr | algstrct defines sizeo of each pair |
Definition at line 51 of file pad.cxx.
References alloc(), alloc_ptr(), AS, cdealloc(), CTF_int::ConstPairIterator::k(), SH, SY, TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write().
Referenced by read_loc_pairs(), CTF_int::tensor::slice(), and CTF_int::tensor::sparsify().
void CTF_int::depermute_keys | ( | int | order, |
int | num_pair, | ||
int const * | edge_len, | ||
int const * | new_edge_len, | ||
int *const * | permutation, | ||
char * | pairs, | ||
algstrct const * | sr | ||
) |
depermutes keys (apply P^T)
[in] | order | tensor dimension |
[in] | num_pair | number of pairs |
[in] | edge_len | old nonpadded tensor edge lengths |
[in] | new_edge_len | new nonpadded tensor edge lengths |
[in] | permutation | permutation to apply to keys of each pair |
[in,out] | pairs | the keys and values as pairs |
[in] | sr | algstrct defining data type of array |
Definition at line 99 of file sparse_rw.cxx.
References alloc(), ASSERT, cdealloc(), CTF_int::PairIterator::k(), MIN, CTF_int::algstrct::pair_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write_key().
Referenced by CTF_int::tensor::permute().
void CTF_int::depin | ( | algstrct const * | sr, |
int | order, | ||
int const * | lens, | ||
int const * | divisor, | ||
int | nvirt, | ||
int const * | virt_dim, | ||
int const * | phys_rank, | ||
char * | X, | ||
int64_t & | new_nnz_B, | ||
int64_t * | nnz_blk, | ||
char *& | new_B, | ||
bool | check_padding | ||
) |
depins keys of n pairs
Definition at line 883 of file algstrct.cxx.
References alloc(), alloc_ptr(), cdealloc(), CTF_int::PairIterator::k(), CTF_int::algstrct::pair_alloc(), CTF_int::algstrct::pair_size(), TAU_FSTART, and TAU_FSTOP.
Referenced by CTF_int::spctr_pin_keys::run(), CTF_int::tspsum_pin_keys::run(), and CTF_int::algstrct::~algstrct().
unfolds the data of a tensor
[in] | sym_tsr | starting symmetric tensor (where data starts) |
[in] | nonsym_tsr | new tensor with a potentially unfolded symmetry |
[in] | is_C | whether the tensor is an output of the operation |
Definition at line 12 of file symmetrization.cxx.
References CTF_int::algstrct::addinv(), alloc(), CTF_int::algstrct::alloc(), alloc_ptr(), AS, CTF_int::tensor::calc_nvirt(), CTF_int::algstrct::cast_double(), cdealloc(), CTF_int::tensor::clear_mapping(), copy_mapping(), CTF_int::tensor::data, CTF_int::tensor::edge_map, CTF_int::accumulatable::el_size, CTF_int::scaling::execute(), CTF_int::summation::execute(), CTF_int::tensor::has_home, CTF_int::tensor::home_buffer, CTF_int::tensor::home_size, CTF_int::tensor::is_data_aliased, CTF_int::tensor::is_home, CTF_int::tensor::is_mapped, CTF_int::tensor::is_sparse, CTF_int::algstrct::mulid(), CTF_int::tensor::name, CTF_int::tensor::nnz_blk, NS, CTF_int::tensor::order, CTF_int::tensor::profile, CTF::World::rank, scal_diag(), CTF_int::tensor::set_new_nnz_glb(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_zero(), SH, CTF_int::tensor::size, CTF_int::tensor::sr, CTF::Timer::start(), CTF::Timer::stop(), CTF_int::summation::sum_tensors(), SY, CTF_int::tensor::sym, TAU_FSTART, TAU_FSTOP, CTF_int::tensor::topo, VPRINTF, CTF_int::tensor::wrld, and CTF_int::tensor::zero_out_padding().
Referenced by CTF_int::summation::estimate_time(), and get_len_ordering().
std::vector<char> CTF_int::det_uniq_inds | ( | std::vector< Term * > const | operands, |
std::vector< char > const | out_inds | ||
) |
Definition at line 422 of file term.cxx.
Referenced by contract_down_terms(), CTF_int::Contract_Term::estimate_time(), CTF_int::Contract_Term::execute(), CTF_int::Sum_Term::get_uniq_inds(), and CTF_int::Contract_Term::get_uniq_inds().
double CTF_int::dgtog_est_time | ( | int64_t | tot_sz, |
int | np | ||
) |
estimates execution time, given this processor sends a receives tot_sz across np procs
[in] | tot_sz | amount of data sent/recved |
[in] | np | number of procs involved |
Definition at line 11 of file dgtog_redist.cxx.
References dgtog_res_mdl.
Referenced by CTF_int::tensor::est_redist_time().
void CTF_int::dgtog_reshuffle | ( | int const * | sym, |
int const * | edge_len, | ||
distribution const & | old_dist, | ||
distribution const & | new_dist, | ||
char ** | ptr_tsr_data, | ||
char ** | ptr_tsr_new_data, | ||
algstrct const * | sr, | ||
CommData | ord_glb_comm | ||
) |
Definition at line 74 of file dgtog_redist.cxx.
Referenced by dgtog_reshuffle().
void CTF_int::dnA_spB_seq_sum | ( | char const * | alpha, |
char const * | A, | ||
algstrct const * | sr_A, | ||
int | order_A, | ||
int const * | edge_len_A, | ||
int const * | sym_A, | ||
char const * | beta, | ||
char const * | B, | ||
int64_t | size_B, | ||
char *& | new_B, | ||
int64_t & | new_size_B, | ||
algstrct const * | sr_B, | ||
univar_function const * | func | ||
) |
performs summation between two sparse tensors assumes B contain key value pairs sorted by key, with index permutation preapplied and with no repeated indices
[in] | alpha | scaling factor of A |
[in] | A | data of right operand |
[in] | sr_A | algebraic structure of right operand |
[in] | order_A | order of tensor A |
[in] | edge_len_A | dimensions of tensor A |
[in] | sym_A | symmetry relations of tensor A |
[in] | beta | scaling factor of left operand |
[in] | B | data of left operand |
[in] | size_B | number of nonzero entries in left operand |
[in,out] | new_B | new data of output |
[in,out] | new_size_B | number of nonzero entries in output |
[in] | sr_B | algebraic structure of left operand and output |
[in] | func | function (or NULL) to apply to right operand |
Definition at line 149 of file spr_seq_sum.cxx.
Referenced by CTF_int::seq_tsr_spsum::run().
void CTF_int::dump_all_models | ( | std::string | path | ) |
Definition at line 50 of file model.cxx.
References get_all_models().
Referenced by CTF_int::Model::dump_data(), and train_all().
double CTF_int::est_time_transp | ( | int | order, |
int const * | new_order, | ||
int const * | edge_len, | ||
int | dir, | ||
algstrct const * | sr | ||
) |
estimates time needed to transposes a non-symmetric (folded) tensor based on performance models
[in] | order | dimension of tensor |
[in] | new_order | new ordering of dimensions |
[in] | edge_len | original edge lengths |
[in] | dir | which way are we going? |
[in] | sr | algstrct defining element size |
Definition at line 757 of file nosym_transp.cxx.
References long_contig_transp_mdl, non_contig_transp_mdl, and shrt_contig_transp_mdl.
Referenced by CTF_int::tensor::est_time_unfold(), CTF_int::summation::estimate_time(), and get_len_ordering().
double CTF_int::estimate_download_time | ( | int64_t | size | ) |
estimate time it takes to upload
double CTF_int::estimate_upload_time | ( | int64_t | size | ) |
estimate time it takes to download
void CTF_int::extract_free_comms | ( | topology const * | topo, |
int | order_A, | ||
mapping const * | edge_map_A, | ||
int | order_B, | ||
mapping const * | edge_map_B, | ||
int & | num_sub_phys_dims, | ||
CommData ** | psub_phys_comm, | ||
int ** | pcomm_idx | ||
) |
extracts the set of physical dimensions still available for mapping
[in] | topo | topology |
[in] | order_A | dimension of A |
[in] | edge_map_A | mapping of A |
[in] | order_B | dimension of B |
[in] | edge_map_B | mapping of B |
[out] | num_sub_phys_dims | number of free torus dimensions |
[out] | psub_phys_comm | the torus dimensions |
[out] | pcomm_idx | index of the free torus dimensions in the origin topology |
Definition at line 628 of file topology.cxx.
References alloc_ptr(), CTF_int::mapping::cdt, CTF_int::mapping::child, CTF_int::topology::dim_comm, CTF_int::mapping::has_child, CTF_int::topology::order, PHYSICAL_MAP, and CTF_int::mapping::type.
Referenced by get_len_ordering().
int64_t CTF_int::fact | ( | int64_t | n | ) |
Definition at line 277 of file util.cxx.
Referenced by align_symmetric_indices(), chchoose(), choose(), coalesce_bwd(), and overcounting_factor().
void CTF_int::factorize | ( | int | n, |
int * | nfactor, | ||
int ** | factor | ||
) |
computes the size of a tensor in packed symmetric layout
[in] | n | a positive number |
[out] | nfactor | number of factors in n |
[out] | factor | array of length nfactor, corresponding to factorization of n |
Definition at line 170 of file util.cxx.
References alloc().
Referenced by fft(), get_generic_topovec(), and get_phys_topo().
searches for an equivalent topology in avector of topologies
[in] | topo | topology to match |
[in] | topovec | vector of existing parameters |
Definition at line 571 of file topology.cxx.
References CTF_int::topology::lens, and CTF_int::topology::order.
Referenced by peel_perm_torus(), peel_torus(), and CTF_int::tensor::set_distribution().
void CTF_int::flops_add | ( | int64_t | n | ) |
Definition at line 173 of file common.cxx.
int CTF_int::free_cond | ( | void * | ptr | ) |
|
inline |
void CTF_int::gemm | ( | char | tA, |
char | tB, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | A, | ||
dtype const * | B, | ||
dtype | beta, | ||
dtype * | C | ||
) |
Definition at line 82 of file semiring.cxx.
Referenced by CTF::Bivar_Kernel< dtype_A, dtype_B, dtype_C, f, g >::cgemm(), CTF::Semiring< dtype, is_ord >::gemm_batch(), get_grp_ptrs(), CTF_int::algstrct::has_mul(), and sym_seq_ctr_inr().
void CTF_int::gemm_batch | ( | char | taA, |
char | taB, | ||
int | l, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | A, | ||
dtype const * | B, | ||
dtype | beta, | ||
dtype * | C | ||
) |
Definition at line 15 of file semiring.cxx.
References ctf.core::dtype, and get_grp_ptrs().
Referenced by get_grp_ptrs(), and CTF_int::algstrct::has_mul().
std::vector<Model*>& CTF_int::get_all_models | ( | ) |
Definition at line 10 of file model.cxx.
Referenced by dump_all_models(), CTF_int::LinModel< nparam >::LinModel(), load_all_models(), print_all_models(), update_all_models(), and write_all_models().
std::vector< topology* > CTF_int::get_all_topos | ( | CommData | cdt, |
int | n_uf, | ||
int const * | uniq_fact, | ||
int const * | mults, | ||
int | n_prepend, | ||
int const * | prelens | ||
) |
computes all unique factorizations into non-primes each yielding a topology, prepending additional factors as specified
[in] | cdt | global communicator |
[in] | n_uf | number of unique prime factors |
[in] | uniq_fact | list of prime factors |
[in] | n_prepend | number of factors to prepend |
[in] | mults | ? |
[in] | prelens | factors to prepend |
Definition at line 410 of file topology.cxx.
References ASSERT, and CTF_int::topology::topology().
Referenced by get_generic_topovec().
int CTF_int::get_best_topo | ( | int64_t | nvirt, |
int | topo, | ||
CommData | global_comm, | ||
int64_t | bcomm_vol = 0 , |
||
int64_t | bmemuse = 0 |
||
) |
get the best topologoes (least nvirt) over all procs
[in] | nvirt | best virtualization achieved by this proc |
[in] | topo | topology index corresponding to best virtualization |
[in] | global_comm | is the global communicator |
[in] | bcomm_vol | best comm volume computed |
[in] | bmemuse | best memory usage computed return virtualization factor |
Definition at line 591 of file topology.cxx.
References ASSERT, and CTF_int::CommData::cm.
Referenced by CTF_int::scaling::execute(), CTF_int::summation::is_equal(), and CTF_int::tensor::set_zero().
void CTF_int::get_choice | ( | int64_t | n, |
int64_t | k, | ||
int64_t | ch, | ||
int * | chs | ||
) |
Definition at line 289 of file util.cxx.
References calc_idx_arr(), NS, and SH.
Referenced by coalesce_bwd(), and get_len_ordering().
int64_t CTF_int::get_coo_size | ( | int64_t | nnz, |
int | val_size | ||
) |
Definition at line 7 of file coo.cxx.
Referenced by CTF_int::COO_Matrix::COO_Matrix(), CTF_int::COO_Matrix::size(), and CTF_int::tensor::spmatricize().
int64_t CTF_int::get_csr_size | ( | int64_t | nnz, |
int | nrow, | ||
int | val_size | ||
) |
computes the size of a serialized CSR matrix
[in] | nnz | number of nonzeros in matrix |
[in] | nrow | number of rows in matrix |
[in] | val_size | size of each matrix entry |
Definition at line 8 of file csr.cxx.
References ALIGN.
Referenced by CTF_int::CSR_Matrix::CSR_Matrix(), CTF_int::CSR_Matrix::partition(), CTF_int::CSR_Matrix::size(), and CTF_int::tensor::spmatricize().
constexpr bool CTF_int::get_default_is_ord | ( | ) |
MPI_Op CTF_int::get_default_maddop | ( | ) |
Definition at line 28 of file monoid.h.
References default_mxpy(), and ctf.core::dtype.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
MPI_Datatype CTF_int::get_default_mdtype | ( | bool & | is_custom | ) |
Definition at line 194 of file set.h.
References ctf.core::dtype, MPI_CTF_BOOL, MPI_CTF_DOUBLE_COMPLEX, and MPI_CTF_LONG_DOUBLE_COMPLEX.
|
inline |
Definition at line 207 of file set.h.
References MPI_CTF_BOOL.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Definition at line 209 of file set.h.
References MPI_CTF_DOUBLE_COMPLEX.
|
inline |
|
inline |
Definition at line 211 of file set.h.
References MPI_CTF_LONG_DOUBLE_COMPLEX.
|
inline |
|
inline |
|
inline |
Definition at line 13 of file distribution.h.
Referenced by CTF_int::distribution::distribution(), CTF_int::tensor::orient_subworld(), and CTF_int::distribution::serialize().
algstrct const * CTF_int::get_double_ring | ( | ) |
Definition at line 10 of file ring.cxx.
References double_ring.
Referenced by CTF_int::Term::operator double().
algstrct const * CTF_int::get_float_ring | ( | ) |
Definition at line 6 of file ring.cxx.
References float_ring.
Referenced by CTF_int::Term::operator float().
int64_t CTF_int::get_flops | ( | ) |
Definition at line 177 of file common.cxx.
References total_flop_count.
Referenced by CTF::Flop_counter::count(), CTF::Flop_counter::Flop_counter(), and CTF::Flop_counter::zero().
const char* CTF_int::get_fmt | ( | ) |
return format string for templated type
Definition at line 6 of file graph_io_aux.cxx.
References IASSERT.
|
inline |
Definition at line 18 of file graph_io_aux.cxx.
|
inline |
Definition at line 13 of file graph_io_aux.cxx.
|
inline |
Definition at line 23 of file graph_io_aux.cxx.
|
inline |
Definition at line 28 of file graph_io_aux.cxx.
Idx_Tensor* CTF_int::get_full_intm | ( | Idx_Tensor & | A, |
Idx_Tensor & | B, | ||
std::vector< char > | out_inds, | ||
bool | create_dummy = false |
||
) |
Definition at line 121 of file term.cxx.
References alloc(), cdealloc(), CTF::Idx_Tensor::idx_map, CTF::Idx_Tensor::is_intm, CTF_int::tensor::is_sparse, CTF_int::tensor::lens, NS, CTF_int::tensor::order, CTF::Idx_Tensor::parent, CTF_int::tensor::sr, CTF_int::tensor::sym, and CTF_int::tensor::wrld.
Referenced by contract_down_terms(), CTF_int::Sum_Term::estimate_time(), and CTF_int::Sum_Term::execute().
computes all topology configurations given undelying physical topology information
[in] | cdt | global communicator |
Definition at line 449 of file topology.cxx.
References cdealloc(), DPRINTF, factorize(), get_all_topos(), CTF_int::CommData::np, CTF_int::CommData::rank, and CTF_int::topology::topology().
Referenced by CTF::World::~World().
|
inline |
Definition at line 9 of file dgtog_calc_cnt.cxx.
Referenced by calc_sy_pfx(), calc_sy_pfx< 1 >(), and precompute_offsets().
dtype** CTF_int::get_grp_ptrs | ( | int64_t | grp_sz, |
int64_t | ngrp, | ||
dtype const * | data | ||
) |
Definition at line 110 of file semiring.h.
References alloc(), default_gemm< float >(), ctf.core::dtype, gemm(), and gemm_batch().
Referenced by gemm_batch().
function that get the imaginary part from complex numbers
[in] | A | tensor, param[in] B tensor stores the imaginary part from tensor A |
Definition at line 60 of file ctf_ext.cxx.
References ctf.core::a, ctf.core::dtype, and CTF_int::tensor::order.
template void CTF_int::get_imag< double > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::get_imag< float > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
algstrct const * CTF_int::get_int64_t_ring | ( | ) |
Definition at line 18 of file ring.cxx.
References int64_t_ring.
Referenced by CTF_int::Term::operator int64_t().
algstrct const * CTF_int::get_int_ring | ( | ) |
Definition at line 14 of file ring.cxx.
References int_ring.
Referenced by CTF_int::Term::operator int().
void CTF_int::get_len_ordering | ( | tensor const * | A, |
tensor const * | B, | ||
tensor const * | C, | ||
int const * | idx_A, | ||
int const * | idx_B, | ||
int const * | idx_C, | ||
int ** | new_ordering_A, | ||
int ** | new_ordering_B, | ||
int ** | new_ordering_C | ||
) |
find ordering of indices of tensor to reduce to DGEMM (A, B, and C may be permuted
[in] | A | tensor 1 |
[in] | B | tensor 2 |
[in] | C | tensor 3 |
[in] | idx_A | indices of tensor 1 |
[in] | idx_B | indices of tensor 2 |
[in] | idx_C | indices of tensor 3 |
[out] | new_ordering_A | the new ordering for indices of A |
[out] | new_ordering_B | the new ordering for indices of B |
[out] | new_ordering_C | the new ordering for indices of C |
Definition at line 451 of file contraction.cxx.
References CTF_int::contraction::A, ctf.core::a, ABORT, CTF_int::topology::activate(), CTF_int::algstrct::add(), CTF_int::algstrct::addid(), CTF_int::algstrct::addinv(), align_symmetric_indices(), alloc(), CTF_int::algstrct::alloc(), alloc_ptr(), CTF_int::contraction::alpha, ASSERT, CTF_int::mapping::aug_phys(), CTF_int::mapping::aug_virt(), CTF_int::contraction::B, CTF_int::contraction::beta, CTF_int::contraction::C, calc_dim(), calc_fold_lnmk(), CTF_int::tensor::calc_npe(), CTF_int::tensor::calc_nvirt(), CTF_int::mapping::calc_phase(), CTF_int::mapping::calc_phys_phase(), can_morph(), cdealloc(), CTF_int::mapping::cdt, CTF::World::cdt, CTF_int::ctr_2d_general::cdt_A, CTF_int::spctr_2d_general::cdt_A, CTF_int::ctr_2d_general::cdt_B, CTF_int::spctr_2d_general::cdt_B, CTF_int::ctr_2d_general::cdt_C, CTF_int::spctr_2d_general::cdt_C, check_self_mapping(), CTF_int::mapping::child, choose(), CTF_int::mapping::clear(), CTF_int::tensor::clear_mapping(), CTF_int::CommData::cm, CTF::World::comm, comp_dim_map(), contract_mst(), conv_idx< int >(), CTF_int::algstrct::copy(), copy_mapping(), ctr_2d_gen_build(), CTF_int::ctr_2d_general::ctr_lda_A, CTF_int::spctr_2d_general::ctr_lda_A, CTF_int::spctr_2d_general::ctr_lda_B, CTF_int::ctr_2d_general::ctr_lda_B, CTF_int::spctr_2d_general::ctr_lda_C, CTF_int::ctr_2d_general::ctr_lda_C, CTF_int::spctr_2d_general::ctr_sub_lda_A, CTF_int::ctr_2d_general::ctr_sub_lda_A, CTF_int::spctr_2d_general::ctr_sub_lda_B, CTF_int::ctr_2d_general::ctr_sub_lda_B, CTF_int::spctr_2d_general::ctr_sub_lda_C, CTF_int::ctr_2d_general::ctr_sub_lda_C, CTF_int::tensor::data, CTF_int::topology::deactivate(), CTF_int::algstrct::dealloc(), DEBUG_PRINTF, desymmetrize(), CTF_int::topology::dim_comm, CTF_int::spctr_2d_general::dns_vrt_sz_A, CTF_int::spctr_2d_general::dns_vrt_sz_B, CTF_int::spctr_2d_general::dns_vrt_sz_C, DPRINTF, ctf.core::dtype, CTF_int::ctr_2d_general::edge_len, CTF_int::spctr_2d_general::edge_len, CTF_int::tensor::edge_map, CTF_int::accumulatable::el_size, ERROR, CTF_int::tensor::est_redist_time(), CTF_int::ctr::est_time_rec(), est_time_transp(), CTF_int::scaling::execute(), CTF_int::summation::execute(), CTF_int::contraction::execute(), CTF_int::tensor::extract_diag(), extract_free_comms(), get_choice(), CTF_int::tensor::get_redist_mem(), get_sym_perms(), CTF_int::mapping::has_child, CTF_int::algstrct::has_coo_ker, CTF_int::tensor::has_home, CTF_int::tensor::has_zero_edge_len, CTF_int::tensor::home_buffer, CTF_int::tensor::home_size, CTF_int::contraction::idx_A, CTF_int::contraction::idx_B, CTF_int::contraction::idx_C, CTF_int::tensor::inner_ordering, inv_idx(), CTF_int::tensor::is_cyclic, CTF_int::tensor::is_data_aliased, CTF_int::tensor::is_home, CTF_int::tensor::is_mapped, CTF_int::algstrct::is_offloadable(), CTF_int::tensor::is_sparse, CTF_int::algstrct::isequal(), CTF_int::iparam::k, lcm(), CTF_int::topology::lens, CTF_int::tensor::lens, CTF_int::iparam::m, map_symtsr(), map_tensor(), CTF_int::tensor::map_tensor_rem(), MAX, CTF_int::ctr::mem_rec(), MIN, morph_topo(), CTF_int::ctr_2d_general::move_A, CTF_int::spctr_2d_general::move_A, CTF_int::ctr_2d_general::move_B, CTF_int::spctr_2d_general::move_B, CTF_int::ctr_2d_general::move_C, CTF_int::spctr_2d_general::move_C, mst_alloc(), CTF_int::algstrct::mulid(), CTF_int::iparam::n, CTF_int::tensor::name, NEGATIVE, CTF_int::tensor::nnz_blk, CTF_int::tensor::nnz_tot, nosym_transpose(), NOT_MAPPED, CTF_int::mapping::np, CTF::World::np, CTF_int::CommData::np, ctf.core::np(), NS, CTF_int::ctr::num_lyr, CTF_int::iparam::offload, CTF_int::topology::order, CTF_int::tensor::order, overcounting_factor(), CTF_int::tensor::pad_edge_len, CTF_int::algstrct::pair_size(), permute_target(), PHYSICAL_MAP, CTF_int::ctr::print(), CTF_int::tensor::print_map(), proc_bytes_available(), CTF_int::tensor::profile, CTF_int::tensor::pull_alias(), CTF::World::rank, CTF_int::CommData::rank, CTF_int::ctr_virt::rec_ctr, CTF_int::spctr_replicate::rec_ctr, CTF_int::ctr_2d_general::rec_ctr, CTF_int::spctr_2d_general::rec_ctr, CTF_int::spctr_virt::rec_ctr, CTF_int::spctr_pin_keys::rec_ctr, CTF_int::ctr_replicate::rec_ctr, CTF_int::tensor::rec_tsr, CTF_int::tensor::redistribute(), CTF_int::tensor::remove_fold(), CTF_int::ctr::run(), CTF_int::algstrct::safecopy(), CTF_int::tensor::scale_diagonals(), CTF_int::tensor::self_reduce(), CTF_int::tensor::set_new_nnz_glb(), CTF_int::tensor::set_padding(), CTF_int::tensor::set_sym(), CTF_int::tensor::set_zero(), sign(), CTF_int::tensor::size, CTF_int::tensor::sparsify(), CTF_int::tensor::sr, SUCCESS, SY, sy_packed_size(), CTF_int::tensor::sym, CTF_int::tensor::sym_table, symmetrize(), CTF_int::iparam::tA, TAU_FSTART, TAU_FSTOP, CTF_int::iparam::tB, CTF_int::iparam::tC, CTF_int::tensor::topo, CTF::World::topovec, CTF_int::mapping::type, CTF_int::tensor::unfold(), VIRTUAL_MAP, VPRINTF, CTF_int::tensor::wrld, and CTF_int::tensor::zero_out_padding().
|
inline |
Definition at line 16 of file dgtog_calc_cnt.cxx.
Referenced by calc_cnt(), calc_cnt< 0 >(), calc_sy_pfx(), calc_sy_pfx< 1 >(), and precompute_offsets().
MPI_Op CTF_int::get_maddop | ( | void(*)(int, dtype const *, dtype *) | fxpy | ) |
bool CTF_int::get_mpi_dt | ( | int64_t | count, |
int64_t | datum_size, | ||
MPI_Datatype & | dt | ||
) |
gives a datatype for arbitrary datum_size, errors if exceeding 32-bits
[in] | count | number of elements we want to communicate |
[in] | datum_size | element size |
[in] | dt | new datatype to pass to MPI routine |
Definition at line 587 of file common.cxx.
References ASSERT, and MPI_CTF_DOUBLE_COMPLEX.
Referenced by CTF_int::tspsum_replicate::run().
std::list<mem_loc>* CTF_int::get_mst | ( | ) |
Definition at line 163 of file memcontrol.cxx.
References mst.
int CTF_int::get_num_instances | ( | ) |
Definition at line 531 of file memcontrol.cxx.
References instance_counter.
Referenced by CTF::World::~World().
void CTF_int::get_perm | ( | int | perm_order, |
ptype | A, | ||
ptype | B, | ||
ptype | C, | ||
ptype & | tA, | ||
ptype & | tB, | ||
ptype & | tC | ||
) |
Definition at line 117 of file contraction.cxx.
get dimension and torus lengths of specified topology
[in] | glb_comm | communicator |
[in] | mach | specified topology |
Definition at line 94 of file topology.cxx.
References alloc(), cdealloc(), ctf.core::dim, factorize(), MIN, NO_TOPOLOGY, CTF_int::CommData::np, ctf.core::np(), CTF_int::topology::order, CTF_int::topology::topology(), TOPOLOGY_8D, TOPOLOGY_BGP, TOPOLOGY_BGQ, and TOPOLOGY_GENERIC.
Referenced by CTF::World::~World().
double CTF_int::get_rand48 | ( | ) |
returns new random number in [0,1)
Definition at line 27 of file common.cxx.
References allred_mdl, allred_mdl_cst, allred_mdl_cst_init, allred_mdl_init, alltoall_mdl, alltoall_mdl_init, alltoallv_mdl, alltoallv_mdl_init, bcast_mdl, bcast_mdl_init, red_mdl, red_mdl_cst, red_mdl_cst_init, red_mdl_init, and rng.
Referenced by CTF::fill_random_base(), CTF::fill_sp_random_base(), and subsample().
function that get the real part from complex numbers
[in] | A | tensor, param[in] B tensor stores the real part from tensor A |
Definition at line 51 of file ctf_ext.cxx.
References ctf.core::a, ctf.core::dtype, and CTF_int::tensor::order.
template void CTF_int::get_real< double > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
template void CTF_int::get_real< float > | ( | tensor * | A, |
tensor * | B | ||
) |
Referenced by conv_type().
void CTF_int::get_sym_perms | ( | summation const & | sum, |
std::vector< summation > & | perms, | ||
std::vector< int > & | signs | ||
) |
finds all permutations of a summation that must be done for a broken symmetry
[in] | sum | summation specification |
[out] | perms | the permuted summation specifications |
[out] | signs | sign of each summation |
Definition at line 647 of file symmetrization.cxx.
References CTF_int::summation::A, add_sym_perm(), AS, CTF_int::summation::B, CTF_int::summation::idx_A, CTF_int::summation::idx_B, NS, CTF_int::tensor::order, sign(), and CTF_int::tensor::sym.
Referenced by CTF_int::summation::estimate_time(), and get_len_ordering().
void CTF_int::get_sym_perms | ( | contraction const & | ctr, |
std::vector< contraction > & | perms, | ||
std::vector< int > & | signs | ||
) |
finds all permutations of a contraction that must be done for a broken symmetry
[in] | ctr | contraction specification |
[out] | perms | the permuted contraction specifications |
[out] | signs | sign of each contraction |
Definition at line 691 of file symmetrization.cxx.
References CTF_int::contraction::A, add_sym_perm(), AS, CTF_int::contraction::B, CTF_int::contraction::C, CTF_int::contraction::idx_A, CTF_int::contraction::idx_B, CTF_int::contraction::idx_C, NS, CTF_int::tensor::order, sign(), and CTF_int::tensor::sym.
char * CTF_int::glb_cyclic_reshuffle | ( | int const * | sym, |
distribution const & | old_dist, | ||
int const * | old_offsets, | ||
int *const * | old_permutation, | ||
distribution const & | new_dist, | ||
int const * | new_offsets, | ||
int *const * | new_permutation, | ||
char ** | ptr_tsr_data, | ||
char ** | ptr_tsr_cyclic_data, | ||
algstrct const * | sr, | ||
CommData | ord_glb_comm, | ||
bool | reuse_buffers, | ||
char const * | alpha, | ||
char const * | beta | ||
) |
Goes from any set of phases to any new set of phases.
[in] | sym | symmetry relations between tensor dimensions |
[in] | old_dist | starting data distrubtion |
[in] | old_offsets | old offsets of each tensor edge (corner 1 of slice) |
[in] | old_permutation | permutation array for each edge length (no perm if NULL) |
[in] | new_dist | target data distrubtion |
[in] | new_offsets | old offsets of each tensor edge (corner 1 of slice) |
[in] | new_permutation | permutation array for each edge length (no perm if NULL) |
[in] | ptr_tsr_data | starting data buffer |
[out] | ptr_tsr_cyclic_data | target data buffer |
[in] | sr | algstrct defining data |
[in] | ord_glb_comm | communicator on which to redistribute |
[in] | reuse_buffers | if 1: ptr_tsr_cyclic_data is allocated dynamically and ptr_tsr_data is overwritten with intermediate data if 0: ptr_tsr_cyclic_data is preallocated and can be scaled by beta, however, more memory is used for temp buffers |
[in] | alpha | scaling tensor for new data |
[in] | beta | scaling tensor for original data |
Definition at line 879 of file glb_cyclic_reshuffle.cxx.
References CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), alloc_ptr(), ASSERT, calc_cnt_displs(), cdealloc(), compute_bucket_offsets(), CTF_int::algstrct::copy(), cyclic_reshuffle(), ctf.core::dim, CTF_int::accumulatable::el_size, glb_ord_pup(), CTF_int::distribution::is_cyclic, CTF_int::algstrct::isequal(), MAX, mst_alloc_ptr(), CTF_int::algstrct::mulid(), CTF_int::CommData::np, ctf.core::np(), CTF_int::distribution::order, order_globally(), CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, CTF_int::CommData::rank, CTF_int::distribution::size, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
void CTF_int::glb_ord_pup | ( | int const * | sym, |
distribution const & | old_dist, | ||
distribution const & | new_dist, | ||
int const * | len, | ||
int const * | old_phys_dim, | ||
int const * | old_phys_edge_len, | ||
int const * | old_virt_edge_len, | ||
int64_t | old_virt_nelem, | ||
int const * | old_offsets, | ||
int *const * | old_permutation, | ||
int | total_np, | ||
int const * | new_phys_dim, | ||
int const * | new_phys_edge_len, | ||
int const * | new_virt_edge_len, | ||
int64_t | new_virt_nelem, | ||
char * | old_data, | ||
char ** | new_data, | ||
int | forward, | ||
int *const * | bucket_offset, | ||
char const * | alpha, | ||
char const * | beta, | ||
algstrct const * | sr | ||
) |
Definition at line 8 of file glb_cyclic_reshuffle.cxx.
References ABORT, CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), alloc_ptr(), ASSERT, calc_idx_arr(), cdealloc(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, CTF_int::algstrct::isequal(), lcm(), MAX, MIN, mst_alloc_ptr(), CTF_int::algstrct::mulid(), NS, CTF_int::distribution::order, packed_size(), CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, ctf.core::rank(), SY, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
Referenced by glb_cyclic_reshuffle().
void CTF_int::handler | ( | ) |
Definition at line 181 of file common.cxx.
References ctf.core::array(), and ctf.core::rank().
void CTF_int::host_pinned_alloc | ( | void ** | ptr, |
int64_t | size | ||
) |
allocate a pinned host buffer
[out] | ptr | pointer to define |
[in] | size | amount of buffer space to allocate |
Referenced by CTF_int::ctr_2d_general::run(), and CTF_int::spctr_2d_general::run().
void CTF_int::host_pinned_free | ( | void * | ptr | ) |
free a pinned host buffer
[in] | ptr | pointer to free |
Referenced by CTF_int::ctr_2d_general::run(), and CTF_int::spctr_2d_general::run().
bool CTF_int::hptt_is_applicable | ( | int | order, |
int const * | new_order, | ||
int | elementSize | ||
) |
Checks if the HPTT library is applicable.
[in] | order | dimension of tensor |
[in] | new_order | new ordering of dimensions |
[in] | elementSize | element size |
Definition at line 319 of file nosym_transp.cxx.
Referenced by nosym_transpose().
void CTF_int::inc_tot_mem_used | ( | int64_t | a | ) |
Definition at line 80 of file memcontrol.cxx.
References ctf.core::a, and ASSERT.
Referenced by CTF_int::tensor::deregister_size(), and CTF_int::tensor::register_size().
void CTF_int::init_rng | ( | int | rank | ) |
initialized random number generator
[in] | rank | processor index |
Definition at line 23 of file common.cxx.
References rng.
Referenced by ctf.random::all_seed(), ctf.random::seed(), and CTF::World::~World().
void CTF_int::inv_idx | ( | int | order_A, |
int const * | idx_A, | ||
int * | order_tot, | ||
int ** | idx_arr | ||
) |
invert index map
[in] | order_A | number of dimensions of A |
[in] | idx_A | index map of A |
[in] | order_B | number of dimensions of B |
[in] | idx_B | index map of B |
[out] | order_tot | number of total dimensions |
[out] | idx_arr | 2*order_tot index array |
[in] | order_A | number of dimensions of A |
[in] | idx_A | index map of A |
[out] | order_tot | number of total dimensions |
[out] | idx_arr | 2*ndim_tot index array |
Definition at line 19 of file sym_seq_scl.cxx.
References alloc().
void CTF_int::inv_idx | ( | int | order_A, |
int const * | idx_A, | ||
int | order_B, | ||
int const * | idx_B, | ||
int | order_C, | ||
int const * | idx_C, | ||
int * | order_tot, | ||
int ** | idx_arr | ||
) |
invert index map
[in] | order_A | number of dimensions of A |
[in] | idx_A | index map of A |
[in] | order_B | number of dimensions of B |
[in] | idx_B | index map of B |
[in] | order_C | number of dimensions of C |
[in] | idx_C | index map of C |
[out] | order_tot | number of total dimensions |
[out] | idx_arr | 3*order_tot index array |
Definition at line 592 of file ctr_tsr.cxx.
References alloc(), CTF_int::seq_tsr_ctr::order_A, CTF_int::seq_tsr_ctr::order_B, and CTF_int::seq_tsr_ctr::order_C.
Referenced by add_sym_perm(), calc_fold_lnmk(), copy_mapping(), CTF_int::seq_tsr_spctr::est_fp(), CTF_int::seq_tsr_ctr::est_fp(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), get_len_ordering(), CTF_int::summation::is_equal(), CTF_int::seq_tsr_scl::seq_tsr_scl(), spA_dnB_dnC_seq_ctr(), CTF_int::summation::sum_tensors(), sym_seq_ctr_cust(), sym_seq_ctr_inr(), sym_seq_ctr_ref(), sym_seq_scl_cust(), sym_seq_scl_ref(), sym_seq_sum_cust(), sym_seq_sum_inr(), sym_seq_sum_ref(), CTF_int::endomorphism::~endomorphism(), CTF_int::seq_tsr_ctr::~seq_tsr_ctr(), and CTF_int::seq_tsr_sum::~seq_tsr_sum().
void CTF_int::inv_idx | ( | int | order_A, |
int const * | idx_A, | ||
int | order_B, | ||
int const * | idx_B, | ||
int * | order_tot, | ||
int ** | idx_arr | ||
) |
invert index map
[in] | order_A | number of dimensions of A |
[in] | idx_A | index map of A |
[in] | order_B | number of dimensions of B |
[in] | idx_B | index map of B |
[out] | order_tot | number of total dimensions |
[out] | idx_arr | 2*order_tot index array |
Definition at line 913 of file spsum_tsr.cxx.
References alloc().
|
inline |
Definition at line 340 of file util.h.
References gcd().
Referenced by calc_drv_displs(), ctr_2d_gen_build(), dgtog_reshuffle(), get_len_ordering(), glb_ord_pup(), map_symtsr(), map_tensor(), and CTF_int::tensor::set_zero().
|
inline |
Copies submatrix to submatrix (column-major)
[in] | el_size | element size |
[in] | nrow | number of rows |
[in] | ncol | number of columns |
[in] | lda_A | lda along rows for A |
[in] | lda_B | lda along rows for B |
[in] | A | matrix to read from |
[in,out] | B | matrix to write to |
Definition at line 355 of file util.h.
Referenced by CTF_int::LinModel< nparam >::update().
void CTF_int::load_all_models | ( | std::string | file_name | ) |
Definition at line 34 of file model.cxx.
References get_all_models().
Referenced by CTF_int::Model::dump_data(), and CTF::World::~World().
int CTF_int::map_self_indices | ( | tensor const * | tsr, |
int const * | idx_map | ||
) |
create virtual mapping for idx_maps that have repeating indices
[in] | tsr | tensor handle |
[in] | idx_map | mapping of tensor indices to contraction map |
Definition at line 423 of file mapping.cxx.
References alloc_ptr(), cdealloc(), CTF_int::tensor::edge_map, CTF_int::mapping::has_child, map_symtsr(), NOT_MAPPED, CTF_int::mapping::np, CTF_int::tensor::order, SUCCESS, CTF_int::tensor::sym_table, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by CTF_int::scaling::execute(), and CTF_int::summation::is_equal().
int CTF_int::map_symtsr | ( | int | tsr_order, |
int const * | tsr_sym_table, | ||
mapping * | tsr_edge_map | ||
) |
adjust a mapping to maintan symmetry
[in] | tsr_order | is the number of dimensions of the tensor |
[in] | tsr_sym_table | the symmetry table of a tensor |
[in,out] | tsr_edge_map | is the mapping |
Definition at line 470 of file mapping.cxx.
References ASSERT, CTF_int::mapping::calc_phase(), CTF_int::mapping::child, CTF_int::mapping::has_child, lcm(), CTF_int::mapping::mapping(), MAX_PHASE, MAXLOOP, NEGATIVE, NOT_MAPPED, CTF_int::mapping::np, PHYSICAL_MAP, SUCCESS, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by get_len_ordering(), map_self_indices(), and map_tensor().
int CTF_int::map_tensor | ( | int | num_phys_dims, |
int | tsr_order, | ||
int const * | tsr_edge_len, | ||
int const * | tsr_sym_table, | ||
int * | restricted, | ||
CommData * | phys_comm, | ||
int const * | comm_idx, | ||
int | fill, | ||
mapping * | tsr_edge_map | ||
) |
map a tensor
[in] | num_phys_dims | number of physical processor grid dimensions |
[in] | tsr_order | number dims |
[in] | tsr_edge_len | edge lengths of the tensor |
[in] | tsr_sym_table | the symmetry table of a tensor |
[in,out] | restricted | an array used to restricted the mapping of tensor dims |
[in] | phys_comm | dimensional communicators |
[in] | comm_idx | dimensional ordering |
[in] | fill | if set does recursive mappings and uses all phys dims |
[in,out] | tsr_edge_map | mapping of tensor |
Definition at line 244 of file mapping.cxx.
References CTF_int::mapping::calc_phys_phase(), CTF_int::mapping::cdt, CTF_int::mapping::child, CTF_int::mapping::has_child, lcm(), map_symtsr(), CTF_int::mapping::mapping(), MAX_PHASE, NEGATIVE, NOT_MAPPED, CTF_int::mapping::np, CTF_int::CommData::np, PHYSICAL_MAP, SUCCESS, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by get_len_ordering(), CTF_int::summation::is_equal(), CTF_int::tensor::map_tensor_rem(), and CTF_int::tensor::set_zero().
Definition at line 109 of file ctf_ext.cxx.
References CTF_int::accumulatable::el_size, CTF::Matrix< dtype >::qr(), and CTF_int::tensor::sr.
Referenced by ctf.core::MPI_Stop(), and ctf.core::qr().
Definition at line 141 of file ctf_ext.cxx.
References CTF_int::accumulatable::el_size, CTF::Matrix< dtype >::qr(), and CTF_int::tensor::sr.
Referenced by ctf.core::MPI_Stop(), and ctf.core::qr().
Definition at line 175 of file ctf_ext.cxx.
References CTF_int::accumulatable::el_size, CTF_int::tensor::sr, and CTF::Matrix< dtype >::svd().
Referenced by ctf.core::MPI_Stop(), and ctf.core::svd().
Definition at line 213 of file ctf_ext.cxx.
References CTF_int::accumulatable::el_size, CTF_int::tensor::sr, and CTF::Matrix< dtype >::svd().
Referenced by ctf.core::MPI_Stop(), and ctf.core::svd().
void CTF_int::mem_create | ( | ) |
create instance of memory manager
Definition at line 187 of file memcontrol.cxx.
References max_threads.
Referenced by CTF::World::~World().
void CTF_int::mem_exit | ( | int | rank | ) |
exit instance of memory manager
[in] | rank | processor index |
Definition at line 207 of file memcontrol.cxx.
References max_threads, and mst_buffer_ptr.
Referenced by CTF::World::~World().
void CTF_int::morph_topo | ( | topology const * | new_topo, |
topology const * | old_topo, | ||
int | order, | ||
mapping * | edge_map | ||
) |
morphs a tensor topology into another
[in] | new_topo | topology to change to |
[in] | old_topo | topology we are changing from |
[in] | order | number of tensor dimensions |
[in,out] | edge_map | mapping whose topology mapping we are changing |
Definition at line 700 of file topology.cxx.
References alloc_ptr(), ASSERT, cdealloc(), CTF_int::mapping::cdt, CTF_int::mapping::child, CTF_int::mapping::clear(), CTF_int::topology::dim_comm, CTF_int::mapping::has_child, CTF_int::topology::lda, CTF_int::mapping::np, CTF_int::CommData::np, ctf.core::np(), CTF_int::topology::order, PHYSICAL_MAP, CTF_int::mapping::type, and VIRTUAL_MAP.
Referenced by get_len_ordering().
void * CTF_int::mst_alloc | ( | int64_t const | len | ) |
mst_alloc allocates buffer on the specialized memory stack
[in] | len | number of bytes |
Definition at line 307 of file memcontrol.cxx.
References ASSERT, mst_alloc_ptr(), and SUCCESS.
Referenced by contract_mst(), and get_len_ordering().
int CTF_int::mst_alloc_ptr | ( | int64_t const | len, |
void **const | ptr | ||
) |
mst_alloc abstraction
[in] | len | number of bytes |
[in,out] | ptr | pointer to set to new allocation address |
Definition at line 269 of file memcontrol.cxx.
References ALIGN_BYTES, alloc_ptr(), ASSERT, DPRINTF, CTF_int::mem_loc::len, MST_ALIGN_BYTES, mst_buffer_ptr, mst_buffer_used, CTF_int::mem_loc::ptr, and SUCCESS.
Referenced by CTF_int::CommData::all_to_allv(), bcast_step(), calc_cnt_displs(), cyclic_reshuffle(), glb_cyclic_reshuffle(), glb_ord_pup(), mst_alloc(), pad_cyclic_pup_virt_buff(), CTF_int::ctr_2d_general::run(), CTF_int::spctr_2d_general::run(), and CTF_int::spctr_pin_keys::run().
void CTF_int::mst_create | ( | int64_t | size | ) |
initializes stack buffer
Definition at line 170 of file memcontrol.cxx.
References ALIGN_BYTES, and ASSERT.
Referenced by CTF::World::~World().
int CTF_int::mst_free | ( | void * | ptr | ) |
frees buffer allocated on stack
[in] | ptr | pointer to buffer on stack |
Definition at line 234 of file memcontrol.cxx.
References ABORT, ASSERT, ERROR, ctf.core::it, and SUCCESS.
Referenced by cdealloc().
void CTF_int::muladd_csrmm | ( | int | m, |
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | A, | ||
int const * | JA, | ||
int const * | IA, | ||
int | nnz_A, | ||
dtype const * | B, | ||
dtype | beta, | ||
dtype * | C | ||
) |
Definition at line 332 of file semiring.cxx.
References ctf.core::dtype, and muladd_csrmultd().
Referenced by default_coomm< std::complex< double > >().
void CTF_int::muladd_csrmultd | ( | int | m, |
int | n, | ||
int | k, | ||
dtype const * | A, | ||
int const * | JA, | ||
int const * | IA, | ||
int | nnz_A, | ||
dtype const * | B, | ||
int const * | JB, | ||
int const * | IB, | ||
int | nnz_B, | ||
dtype * | C | ||
) |
Definition at line 370 of file semiring.cxx.
References CTF::Semiring< dtype, is_ord >::default_csrmm().
Referenced by muladd_csrmm().
void CTF_int::nosym_transpose | ( | tensor * | A, |
int | all_fdim_A, | ||
int const * | all_flen_A, | ||
int const * | new_order, | ||
int | dir | ||
) |
Definition at line 403 of file nosym_transp.cxx.
References CTF_int::algstrct::alloc(), CTF_int::tensor::calc_nvirt(), CTF_int::tensor::data, CTF_int::algstrct::dealloc(), CTF_int::accumulatable::el_size, CTF_int::tensor::has_home, CTF_int::tensor::home_buffer, hptt_is_applicable(), CTF_int::tensor::is_home, CTF_int::tensor::left_home_transp, long_contig_transp_mdl, non_contig_transp_mdl, nosym_transpose_hptt(), shrt_contig_transp_mdl, CTF_int::tensor::size, CTF_int::tensor::sr, TAU_FSTART, and TAU_FSTOP.
Referenced by bench_nosym_transp(), CTF_int::summation::estimate_time(), get_len_ordering(), nosym_transpose(), and CTF_int::tensor::unfold().
void CTF_int::nosym_transpose | ( | int | order, |
int const * | new_order, | ||
int const * | edge_len, | ||
char * | data, | ||
int | dir, | ||
algstrct const * | sr | ||
) |
transposes a non-symmetric (folded) tensor
[in] | order | dimension of tensor |
[in] | new_order | new ordering of dimensions |
[in] | edge_len | original edge lengths |
[in,out] | data | data tp transpose |
[in] | dir | which way are we going? |
[in] | sr | algstrct defining element size |
Definition at line 519 of file nosym_transp.cxx.
References alloc_ptr(), cdealloc(), CTF_int::accumulatable::el_size, MIN, nosym_transpose(), and TAU_FSTOP.
void CTF_int::nosym_transpose | ( | int | order, |
int const * | new_order, | ||
int const * | edge_len, | ||
char const * | data, | ||
int | dir, | ||
int | max_ntd, | ||
char ** | tswap_data, | ||
int64_t * | chunk_size, | ||
algstrct const * | sr | ||
) |
transposes a non-symmetric (folded) tensor internal kernel
[in] | order | dimension of tensor |
[in] | new_order | new ordering of dimensions |
[in] | edge_len | original edge lengths |
[in] | data | data tp transpose |
[in] | dir | which way are we going? |
[in] | max_ntd | how many threads to use |
[out] | tswap_data | tranposed data |
[out] | chunk_size | chunk sizes of tranposed data |
[in] | sr | algstrct defining element size |
Definition at line 586 of file nosym_transp.cxx.
References alloc_ptr(), ASSERT, cdealloc(), CTF_int::algstrct::copy(), CTF_int::accumulatable::el_size, TAU_FSTART, and TAU_FSTOP.
void CTF_int::nosym_transpose_hptt | ( | int | order, |
int const * | edge_len, | ||
int | dir, | ||
tensor *& | A | ||
) |
High-performance implementation of nosym_transpose using HPTT.
[in] | order | dimension of tensor |
[in] | edge_len | original edge lengths |
[in] | dir | which way are we going? |
[in,out] | A | tensor to be transposed |
void CTF_int::nosym_transpose_hptt | ( | int | order, |
int const * | st_new_order, | ||
int const * | st_edge_len, | ||
int | dir, | ||
char const * | st_buffer, | ||
char * | new_buffer, | ||
algstrct const * | sr | ||
) |
Definition at line 332 of file nosym_transp.cxx.
References ABORT, CTF_int::accumulatable::el_size, and MIN.
Referenced by nosym_transpose().
void CTF_int::offload_exit | ( | ) |
exit offloading, e.g. destroy cublas
Referenced by CTF::World::~World().
void CTF_int::offload_gemm | ( | char | tA, |
char | tB, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dtype | alpha, | ||
offload_tsr & | A, | ||
int | lda_A, | ||
offload_tsr & | B, | ||
int | lda_B, | ||
dtype | beta, | ||
offload_tsr & | C, | ||
int | lda_C | ||
) |
void CTF_int::offload_gemm | ( | char | tA, |
char | tB, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dtype | alpha, | ||
dtype const * | dev_A, | ||
int | lda_A, | ||
dtype const * | dev_B, | ||
int | lda_B, | ||
dtype | beta, | ||
dtype * | dev_C, | ||
int | lda_C | ||
) |
void CTF_int::offload_init | ( | ) |
initialize offloading, e.g. create cublas
Referenced by CTF::World::~World().
void CTF_int::ord_glb | ( | int const * | sym, |
distribution const & | dist, | ||
int const * | virt_edge_len, | ||
int const * | virt_phase_lda, | ||
int64_t | vbs, | ||
bool | dir, | ||
char const * | tsr_data_in, | ||
char * | tsr_data_out, | ||
algstrct const * | sr, | ||
int | prev_idx = 0 , |
||
int64_t | glb_ord_offset = 0 , |
||
int64_t | blk_ord_offset = 0 |
||
) |
Definition at line 533 of file glb_cyclic_reshuffle.cxx.
References NS, and CTF_int::distribution::virt_phase.
Referenced by ord_glb_omp().
|
inline |
Definition at line 566 of file glb_cyclic_reshuffle.cxx.
References NS, and ord_glb< 7 >().
Referenced by ord_glb_omp< 0 >(), and order_globally().
template void CTF_int::ord_glb< 7 > | ( | int const * | sym, |
distribution const & | dist, | ||
int const * | virt_edge_len, | ||
int const * | virt_phase_lda, | ||
int64_t | vbs, | ||
bool | dir, | ||
char const * | tsr_data_in, | ||
char * | tsr_data_out, | ||
algstrct const * | sr, | ||
int | prev_idx, | ||
int64_t | glb_ord_offset, | ||
int64_t | blk_ord_offset | ||
) |
Referenced by ord_glb< 0 >().
void CTF_int::ord_glb_omp | ( | int const * | sym, |
distribution const & | dist, | ||
int const * | virt_edge_len, | ||
int const * | virt_phase_lda, | ||
int64_t | vbs, | ||
bool | dir, | ||
char const * | tsr_data_in, | ||
char * | tsr_data_out, | ||
algstrct const * | sr, | ||
int const * | idx_st, | ||
int const * | idx_end, | ||
int | prev_idx = 0 , |
||
int64_t | glb_ord_offset = 0 , |
||
int64_t | blk_ord_offset = 0 |
||
) |
Definition at line 606 of file glb_cyclic_reshuffle.cxx.
References MIN, NS, ord_glb(), and CTF_int::distribution::virt_phase.
void CTF_int::ord_glb_omp< 0 > | ( | int const * | sym, |
distribution const & | dist, | ||
int const * | virt_edge_len, | ||
int const * | virt_phase_lda, | ||
int64_t | vbs, | ||
bool | dir, | ||
char const * | tsr_data_in, | ||
char * | tsr_data_out, | ||
algstrct const * | sr, | ||
int const * | idx_st, | ||
int const * | idx_end, | ||
int | prev_idx, | ||
int64_t | glb_ord_offset, | ||
int64_t | blk_ord_offset | ||
) |
Definition at line 655 of file glb_cyclic_reshuffle.cxx.
References ord_glb< 0 >(), and ord_glb_omp< 7 >().
template void CTF_int::ord_glb_omp< 7 > | ( | int const * | sym, |
distribution const & | dist, | ||
int const * | virt_edge_len, | ||
int const * | virt_phase_lda, | ||
int64_t | vbs, | ||
bool | dir, | ||
char const * | tsr_data_in, | ||
char * | tsr_data_out, | ||
algstrct const * | sr, | ||
int const * | idx_st, | ||
int const * | idx_end, | ||
int | prev_idx, | ||
int64_t | glb_ord_offset, | ||
int64_t | blk_ord_offset | ||
) |
Referenced by ord_glb_omp< 0 >().
void CTF_int::order_globally | ( | int const * | sym, |
distribution const & | dist, | ||
int const * | virt_edge_len, | ||
int const * | virt_phase_lda, | ||
int64_t | vbs, | ||
bool | dir, | ||
char const * | tsr_data_in, | ||
char * | tsr_data_out, | ||
algstrct const * | sr | ||
) |
reorder local buffer so that elements are in ordered according to where they are in the global tensor (interleave virtual blocks)
[in] | sym | symmetry relations between tensor dimensions |
[in] | dist | distribution of data |
[in] | virt_edge_len | dimensions of each block |
[in] | virt_phase_lda | prefix sum of virtual blocks |
[in] | vbs | size of virtual blocks |
[in] | dir | if 1 then go to global layout, if 0 than from |
[in] | tsr_data_in | starting data buffer |
[out] | tsr_data_out | target data buffer |
[in] | sr | algstrct defining data |
Definition at line 689 of file glb_cyclic_reshuffle.cxx.
References alloc(), ASSERT, CASE_ORD_GLB, cdealloc(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, MIN, NS, ord_glb< 0 >(), CTF_int::distribution::order, sy_calc_idx_arr(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
Referenced by glb_cyclic_reshuffle().
void CTF_int::order_perm | ( | tensor const * | A, |
tensor const * | B, | ||
int * | idx_arr, | ||
int | off_A, | ||
int | off_B, | ||
int * | idx_A, | ||
int * | idx_B, | ||
int & | add_sign, | ||
int & | mod | ||
) |
orders the summation indices of one tensor that don't break summation symmetries
[in] | A | |
[in] | B | |
[in] | idx_arr | inverted summation index map |
[in] | off_A | offset of A in inverted index map |
[in] | off_B | offset of B in inverted index map |
[in] | idx_A | index map of A |
[in] | idx_B | index map of B |
[in,out] | add_sign | sign of contraction |
[in,out] | mod | 1 if sum is permuted |
Definition at line 440 of file symmetrization.cxx.
References AS, ASSERT, MAX, MIN, NS, CTF_int::tensor::order, and CTF_int::tensor::sym.
void CTF_int::order_perm | ( | tensor const * | A, |
tensor const * | B, | ||
tensor const * | C, | ||
int * | idx_arr, | ||
int | off_A, | ||
int | off_B, | ||
int | off_C, | ||
int * | idx_A, | ||
int * | idx_B, | ||
int * | idx_C, | ||
int & | add_sign, | ||
int & | mod | ||
) |
orders the contraction indices of one tensor that don't break contraction symmetries
[in] | A | |
[in] | B | |
[in] | C | |
[in] | idx_arr | inverted contraction index map |
[in] | off_A | offset of A in inverted index map |
[in] | off_B | offset of B in inverted index map |
[in] | off_C | offset of C in inverted index map |
[in] | idx_A | index map of A |
[in] | idx_B | index map of B |
[in] | idx_C | index map of C |
[in,out] | add_sign | sign of contraction |
[in,out] | mod | 1 if permutation done |
Definition at line 492 of file symmetrization.cxx.
References AS, MAX, MIN, NS, CTF_int::tensor::order, and CTF_int::tensor::sym.
Referenced by add_sym_perm().
int64_t CTF_int::packed_size | ( | int | order, |
const int * | len, | ||
const int * | sym | ||
) |
computes the size of a tensor in packed symmetric (SY, SH, or AS) layout
[in] | order | tensor dimension |
[in] | len | tensor edge _elngths |
[in] | sym | tensor symmetries |
Definition at line 38 of file util.cxx.
Referenced by calc_idx_arr(), CTF_int::tensor::get_tot_size(), glb_ord_pup(), pad_cyclic_pup_virt_buff(), CTF_int::tensor::read_dense_from_file(), and CTF_int::tensor::write_dense_to_file().
void CTF_int::pad_cyclic_pup_virt_buff | ( | int const * | sym, |
distribution const & | old_dist, | ||
distribution const & | new_dist, | ||
int const * | len, | ||
int const * | old_phys_dim, | ||
int const * | old_phys_edge_len, | ||
int const * | old_virt_edge_len, | ||
int64_t | old_virt_nelem, | ||
int const * | old_offsets, | ||
int *const * | old_permutation, | ||
int | total_np, | ||
int const * | new_phys_dim, | ||
int const * | new_phys_edge_len, | ||
int const * | new_virt_edge_len, | ||
int64_t | new_virt_nelem, | ||
char * | old_data, | ||
char ** | new_data, | ||
int | forward, | ||
int *const * | bucket_offset, | ||
char const * | alpha, | ||
char const * | beta, | ||
algstrct const * | sr | ||
) |
[in] | sym | symmetry relations between tensor dimensions |
[in] | old_dist | starting data distrubtion |
[in] | new_dist | target data distrubtion |
[in] | len | non-padded edge lengths of tensor |
[in] | old_phys_dim | edge lengths of the old processor grid |
[in] | old_phys_edge_len | the old tensor processor block lengths |
[in] | old_virt_edge_len | the old tensor block lengths |
[in] | old_virt_nelem | the old number of elements per block |
[in] | old_offsets | old offsets of each tensor edge (corner 1 of slice) |
[in] | old_permutation | permutation array for each edge length (no perm if NULL) |
[in] | total_np | the total number of processors |
[in] | new_phys_dim | edge lengths of the new processor grid |
[in] | new_phys_edge_len | the new tensor processor block lengths |
[in] | new_virt_edge_len | the new tensor block lengths |
[in] | new_virt_nelem | the new number of elements per block |
[in,out] | old_data | the previous set of values stored locally |
[in,out] | new_data | buffers to fill with data to send to each process and virtual bucket |
[in] | forward | is 0 on the receiving side and reverses the role of all the previous parameters |
[in] | bucket_offset | offsets for target index for each dimension |
[in] | alpha | scaling factor for received data |
[in] | beta | scaling factor for previous data |
[in] | sr | algstrct defining elements and ops |
Definition at line 8 of file cyclic_reshuffle.cxx.
References ABORT, CTF_int::algstrct::acc(), CTF_int::algstrct::addid(), alloc_ptr(), ASSERT, calc_idx_arr(), cdealloc(), CTF_int::algstrct::copy(), ctf.core::dim, CTF_int::accumulatable::el_size, CTF_int::algstrct::isequal(), MAX, MIN, mst_alloc_ptr(), CTF_int::algstrct::mulid(), NS, CTF_int::distribution::order, packed_size(), CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, ctf.core::rank(), SY, sy_packed_size(), TAU_FSTART, TAU_FSTOP, and CTF_int::distribution::virt_phase.
Referenced by cyclic_reshuffle().
void CTF_int::pad_key | ( | int | order, |
int64_t | num_pair, | ||
int const * | edge_len, | ||
int const * | padding, | ||
PairIterator | pairs, | ||
algstrct const * | sr, | ||
int const * | offsets = NULL |
||
) |
applies padding to keys
[in] | order | tensor dimension |
[in] | num_pair | number of pairs |
[in] | edge_len | tensor edge lengths |
[in] | padding | padding of tensor (included in edge_len) |
[in,out] | pairs | set of pairs which to pad |
[in] | sr | algstrct defines sizeo of each pair |
[in] | offsets | (default NULL, none applied), offsets keys |
Definition at line 6 of file pad.cxx.
References CTF_int::PairIterator::k(), TAU_FSTART, TAU_FSTOP, and CTF_int::PairIterator::write_key().
Referenced by read_loc_pairs(), CTF_int::tensor::slice(), CTF_int::tensor::sparsify(), and wr_pairs_layout().
void CTF_int::padded_reshuffle | ( | int const * | sym, |
distribution const & | old_dist, | ||
distribution const & | new_dist, | ||
char * | tsr_data, | ||
char ** | tsr_cyclic_data, | ||
algstrct const * | sr, | ||
CommData | ord_glb_comm | ||
) |
Reshuffle elements using key-value pair read/write.
[in] | sym | symmetry relations between tensor dimensions |
[in] | old_dist | starting data distrubtion |
[in] | new_dist | target data distrubtion |
[in] | tsr_data | starting data buffer |
[out] | tsr_cyclic_data | target data buffer |
[in] | sr | algstrct defining data |
[in] | ord_glb_comm | communicator on which to redistribute |
Definition at line 8 of file redist.cxx.
References CTF_int::algstrct::addid(), alloc_ptr(), cdealloc(), DEBUG_PRINTF, DPRINTF, CTF_int::accumulatable::el_size, CTF_int::algstrct::mulid(), CTF_int::CommData::np, CTF_int::distribution::order, CTF_int::distribution::pad_edge_len, CTF_int::distribution::padding, CTF_int::distribution::pe_lda, CTF_int::distribution::perank, CTF_int::distribution::phase, CTF_int::distribution::phys_phase, CTF_int::CommData::rank, read_loc_pairs(), CTF_int::algstrct::set(), CTF_int::distribution::size, sy_packed_size(), TAU_FSTART, TAU_FSTOP, CTF_int::distribution::virt_phase, and wr_pairs_layout().
Referenced by CTF_int::tensor::redistribute().
void CTF_int::parse_sparse_tensor_data | ( | char ** | lvals, |
int | order, | ||
dtype const * | pmulid, | ||
int * | lens, | ||
int64_t | nvals, | ||
CTF::Pair< dtype > * | pairs, | ||
bool | with_vals | ||
) |
parse string containing sparse tensor into data
[in] | lvals | array of string, one per line/entry, formatted as i1, ..., i_order v or i1, ..., i_order if with_vals=false |
[in] | order | num modes in tensor |
[in] | pmulid | pointer to multiplicative identity, used only if with_vals=false |
[in] | lens | dimensions of tensor |
[in] | nvals | number of entries in lvals |
[in] | pairs | array of tensor index/value pairs to fill |
[in] | with_vals | whether values are included in file |
Definition at line 45 of file graph_io_aux.cxx.
References CTF::Pair< dtype >::d, ctf.core::dtype, and CTF::Pair< dtype >::k.
folds specified topology and all of its permutations into all configurations of lesser dimensionality
[in] | phys_topology | topology to fold |
[in] | cdt | global communicator |
Definition at line 488 of file topology.cxx.
References find_topology(), CTF_int::topology::lens, CTF_int::topology::order, peel_torus(), and CTF_int::topology::topology().
Referenced by CTF::World::~World().
folds specified topology into all configurations of lesser dimensionality
[in] | topo | topology to fold |
[in] | glb_comm | global communicator |
Definition at line 537 of file topology.cxx.
References alloc(), cdealloc(), find_topology(), CTF_int::topology::lens, CTF_int::topology::order, and CTF_int::topology::topology().
Referenced by peel_perm_torus().
void CTF_int::permute | ( | int | order, |
int const * | perm, | ||
int * | arr | ||
) |
permute an array
order | number of elements |
perm | permutation array |