Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
util.h File Reference
#include "../interface/common.h"
#include <inttypes.h>
#include <execinfo.h>
#include <signal.h>
#include "omp.h"
#include "int_timer.h"
#include "pmpi.h"
#include "fompi_wrapper.h"
Include dependency graph for util.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  CTF_int::mem_transfer
 

Namespaces

 CTF_int
 

Macros

#define USE_OMP
 
#define COST_LATENCY   (1.e-6)
 
#define COST_MEMBW   (1.e-9)
 
#define COST_NETWBW   (5.e-10)
 
#define COST_FLOP   (2.e-11)
 
#define COST_OFFLOADBW   (5.e-10)
 
#define MAX_NVIRT   256
 
#define MIN_NVIRT   1
 
#define ENABLE_ASSERT   0
 
#define ASSERT(...)   do {} while(0 && (__VA_ARGS__))
 
#define REDIST   0
 
#define VERIFY_REMAP   0
 
#define FOLD_TSR   1
 
#define USE_SYM_SUM
 
#define HOME_CONTRACT
 
#define USE_BLOCK_RESHUFFLE
 
#define MAX_ORD   12
 
#define LOOP_MAX_ORD(F, ...)
 
#define ORD_CASE(ord, F, ...)
 
#define ORD_CASE_RET(ord, R, F, ...)
 
#define SWITCH_ORD_CALL(F, act_ord, ...)
 
#define SWITCH_ORD_CALL_RET(R, F, act_ord, ...)
 
#define CTF_COUNT_FLOPS
 
#define CTF_FLOPS_ADD(n)   CTF_int::flops_add(n)
 
#define ABORT
 
#define WRAP(a, b)    ((a + b)%b)
 
#define ALIGN_BYTES   32
 
#define MIN(a, b)   ( ((a) < (b)) ? (a) : (b) )
 
#define MAX(a, b)   ( ((a) > (b)) ? (a) : (b) )
 
#define LOC   do { printf("debug:%s:%d ",__FILE__,__LINE__); } while(0)
 
#define THROW_ERROR(...)   do { printf("error:%s:%d ",__FILE__,__LINE__); printf(__VA_ARGS__); printf("\n"); quit(1); } while(0)
 
#define WARN(...)   do { printf("warning: "); printf(__VA_ARGS__); printf("\n"); } while(0)
 
#define VPRINTF(...)   do { } while (0)
 
#define DPRINTF(...)   do { } while (0)
 
#define DEBUG_PRINTF(...)   do {} while (0)
 
#define RANK_PRINTF(...)   do { } while (0)
 
#define PRINT_INT(var)
 
#define DUMPDEBUG_PRINTF(...)
 
#define TAU_PROFILE(NAME, ARG, USER)
 
#define TAU_PROFILE_TIMER(ARG1, ARG2, ARG3, ARG4)
 
#define TAU_PROFILER_CREATE(ARG1, ARG2, ARG3, ARG4)
 
#define TAU_PROFILE_STOP(ARG)
 
#define TAU_PROFILE_START(ARG)
 
#define TAU_PROFILE_SET_NODE(ARG)
 
#define TAU_PROFILE_SET_CONTEXT(ARG)
 
#define TAU_FSTART(ARG)
 
#define TAU_FSTOP(ARG)
 
#define TIME(STRING)   TAU_PROFILE(STRING, " ", TAU_DEFAULT)
 
#define MST_ALIGN_BYTES   ALIGN_BYTES
 

Functions

int64_t CTF_int::getTotalSystemMemory ()
 
std::list< mem_transfer > CTF_int::contract_mst ()
 gets rid of empty space on the stack More...
 
int CTF_int::untag_mem (void *ptr)
 stops tracking memory allocated by CTF, so user doesn't have to call free More...
 
int CTF_int::free_cond (void *ptr)
 
void CTF_int::mem_create ()
 create instance of memory manager More...
 
void CTF_int::mst_create (int64_t size)
 initializes stack buffer More...
 
void CTF_int::mem_exit (int rank)
 exit instance of memory manager More...
 
void CTF_int::calc_idx_arr (int order, int const *lens, int const *sym, int64_t idx, int *idx_arr)
 
void CTF_int::sy_calc_idx_arr (int order, int const *lens, int const *sym, int64_t idx, int *idx_arr)
 same as above except assumes sym only NS or SY More...
 
void CTF_int::factorize (int n, int *nfactor, int **factor)
 computes the size of a tensor in packed symmetric layout More...
 
int CTF_int::gcd (int a, int b)
 
int CTF_int::lcm (int a, int b)
 
void CTF_int::lda_cpy (int el_size, int nrow, int ncol, int lda_A, int lda_B, const char *A, char *B)
 Copies submatrix to submatrix (column-major) More...
 
void CTF_int::coalesce_bwd (int el_size, char *B, char const *B_aux, int k, int n, int kb)
 we receive a contiguous buffer kb-by-n B and (k-kb)-by-n B_aux which is the block below. To get a k-by-n buffer, we need to combine this buffer with our original block. Since we are working with column-major ordering we need to interleave the blocks. Thats what this function does. More...
 
void CTF_int::permute (int order, int const *perm, int *arr)
 permute an array More...
 
void CTF_int::permute_target (int order, int const *perm, int *arr)
 permutes a permutation array More...
 
void CTF_int::socopy (int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t *&sizes_b, int64_t *&offsets_b)
 
void CTF_int::spcopy (int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t const *offsets_a, char const *a, int64_t const *sizes_b, int64_t const *offsets_b, char *b)
 
int64_t CTF_int::fact (int64_t n)
 
int64_t CTF_int::choose (int64_t n, int64_t k)
 
void CTF_int::get_choice (int64_t n, int64_t k, int64_t ch, int *chs)
 
int64_t CTF_int::chchoose (int64_t n, int64_t k)
 

Macro Definition Documentation

#define ALIGN_BYTES   32

Definition at line 172 of file util.h.

Referenced by CTF_int::alloc_ptr(), CTF_int::mst_alloc_ptr(), and CTF_int::mst_create().

#define ASSERT (   ...)    do {} while(0 && (__VA_ARGS__))

Definition at line 88 of file util.h.

Referenced by CTF_int::CommData::activate(), CTF_int::algstrct::add(), CTF_int::tensor::add_to_subworld(), CTF_int::algstrct::addinv(), CTF_int::algstrct::addmop(), CTF_int::tensor::align(), CTF_int::alloc(), CTF_int::alloc_ptr(), CTF_int::assign_keys(), CTF_int::mapping::aug_virt(), CTF_int::algstrct::axpy(), CTF_int::bcast_step(), CTF_int::bucket_by_pe(), CTF_int::bucket_by_virt(), CTF_int::calc_cnt_displs(), CTF_int::calc_idx_arr(), CTF_int::mapping::calc_phase(), CTF_int::algstrct::cast_double(), CTF_int::algstrct::cast_int(), CTF_int::algstrct::cast_to_double(), CTF_int::algstrct::cast_to_int(), CTF_int::cmp_sym_perms(), CTF_int::CommData::CommData(), CTF_int::comp_dim_map(), CTF_int::algstrct::coo_to_csr(), CTF_int::COO_Matrix::coomm(), CTF_int::algstrct::coomm(), CTF_int::copy_mapping(), CTF_int::tensor::copy_tensor_data(), CTF_int::CSR_Matrix::csr_add(), CTF_int::CSR_Matrix::CSR_Matrix(), CTF_int::algstrct::csr_to_coo(), CTF_int::CSR_Matrix::csrmm(), CTF_int::algstrct::csrmm(), CTF_int::CSR_Matrix::csrmultcsr(), CTF_int::algstrct::csrmultcsr(), CTF_int::CSR_Matrix::csrmultd(), CTF_int::algstrct::csrmultd(), CTF_int::ctr_2d_gen_build(), CTF_int::cyclic_reshuffle(), CTF_int::depermute_keys(), CTF_int::tensor::despmatricize(), dgtog_reshuffle(), CTF_int::distribution::distribution(), CTF_int::seq_tsr_spctr::est_fp(), CTF_int::seq_tsr_ctr::est_fp(), CTF_int::seq_tsr_ctr::est_membw(), CTF_int::spctr_replicate::est_time_fp(), CTF_int::ctr_replicate::est_time_fp(), CTF_int::summation::estimate_time(), CTF_int::Bifun_Term::execute(), CTF_int::scaling::execute(), CTF::Function_timer::Function_timer(), CTF_BLAS::gemm(), CTF_int::algstrct::gemm(), CTF_int::algstrct::gemm_batch(), CTF_int::get_all_topos(), CTF_int::get_best_topo(), CTF_int::COO_Matrix::get_data(), CTF_int::get_len_ordering(), CTF_int::get_mpi_dt(), CTF_int::glb_cyclic_reshuffle(), CTF_int::glb_ord_pup(), CTF_int::inc_tot_mem_used(), CTF_int::tensor::init(), CTF_int::summation::is_equal(), isendrecv< 0 >(), CTF_int::map_symtsr(), CTF_int::algstrct::max(), CTF_int::algstrct::mdtype(), CTF_int::algstrct::min(), CTF_int::morph_topo(), CTF_int::mst_alloc(), CTF_int::mst_alloc_ptr(), CTF_int::mst_create(), CTF_int::mst_free(), CTF_int::algstrct::mul(), CTF_int::nosym_transpose(), CTF_int::algstrct::offload_gemm(), CTF_int::order_globally(), CTF_int::order_perm(), CTF_int::tensor::orient_subworld(), CTF_int::pad_cyclic_pup_virt_buff(), CTF_int::tensor::permute(), CTF::print_timers(), CTF_int::tensor::pull_alias(), CTF_int::tensor::read_local(), CTF_int::readwrite(), CTF_int::reduce_step_post(), CTF_int::tensor::reduce_sum(), CTF_int::tensor::reduce_sumabs(), CTF_int::tensor::reduce_sumsq(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::tspsum_replicate::run(), CTF_int::ctr_2d_general::run(), CTF_int::seq_tsr_spctr::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::seq_tsr_ctr::run(), CTF_int::spctr_virt::run(), CTF_int::seq_tsr_sum::run(), CTF_int::spctr_pin_keys::run(), CTF_int::algstrct::safeaddinv(), CTF_int::algstrct::safemul(), CTF_int::algstrct::scal(), CTF_int::scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::distribution::serialize(), CTF_int::tensor::set_distribution(), CTF_int::tensor::set_zero(), CTF_int::LinModel< nparam >::should_observe(), CTF_int::tensor::slice(), CTF_int::algstrct::sort(), CTF_int::spA_dnB_dnC_ctrloop(), CTF_int::spA_dnB_dnC_ctrloop< 0 >(), CTF_int::spA_dnB_dnC_seq_ctr(), CTF_int::spA_dnB_seq_sum(), CTF_int::tensor::sparsify(), CTF_int::tensor::spmatricize(), CTF_int::spsfy_tsr(), CTF_int::spspsum(), CTF_int::strip_diag(), CTF_int::summation::sum_tensors(), CTF_int::sy_calc_idx_arr(), CTF_int::sym_seq_ctr_cust(), CTF_int::sym_seq_ctr_inr(), CTF_int::sym_seq_ctr_ref(), CTF_int::sym_seq_scl_cust(), CTF_int::sym_seq_scl_ref(), CTF_int::sym_seq_sum_cust(), CTF_int::sym_seq_sum_inr(), CTF_int::sym_seq_sum_ref(), CTF_int::tspsum_permute::tspsum_permute(), CTF_int::tspsum_replicate::tspsum_replicate(), CTF_int::tsum_replicate::tsum_replicate(), CTF_int::tensor::unfold(), CTF_int::wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::zero_padding(), and CTF::World::~World().

#define COST_FLOP   (2.e-11)

Definition at line 56 of file util.h.

#define COST_LATENCY   (1.e-6)

Definition at line 50 of file util.h.

#define COST_MEMBW   (1.e-9)

Definition at line 52 of file util.h.

Referenced by CTF_int::summation::sum_tensors().

#define COST_NETWBW   (5.e-10)

Definition at line 54 of file util.h.

#define COST_OFFLOADBW   (5.e-10)

Definition at line 58 of file util.h.

#define CTF_COUNT_FLOPS

Definition at line 136 of file util.h.

#define DEBUG_PRINTF (   ...)    do {} while (0)
#define DUMPDEBUG_PRINTF (   ...)

Definition at line 257 of file util.h.

#define ENABLE_ASSERT   0

Definition at line 78 of file util.h.

#define FOLD_TSR   1

Definition at line 98 of file util.h.

#define HOME_CONTRACT

Definition at line 100 of file util.h.

#define LOC   do { printf("debug:%s:%d ",__FILE__,__LINE__); } while(0)

Definition at line 184 of file util.h.

#define LOOP_MAX_ORD (   F,
  ... 
)
Value:
F(0,__VA_ARGS__) F(1,__VA_ARGS__) F(2,__VA_ARGS__) F(3,__VA_ARGS__) \
F(4,__VA_ARGS__) F(5,__VA_ARGS__) F(6,__VA_ARGS__) F(7,__VA_ARGS__) \
F(8,__VA_ARGS__) F(9,__VA_ARGS__) F(10,__VA_ARGS__) F(11,__VA_ARGS__)

Definition at line 104 of file util.h.

#define MAX_NVIRT   256

Definition at line 67 of file util.h.

#define MIN_NVIRT   1

Definition at line 71 of file util.h.

#define MST_ALIGN_BYTES   ALIGN_BYTES

Definition at line 284 of file util.h.

Referenced by CTF_int::mst_alloc_ptr().

#define ORD_CASE (   ord,
  F,
  ... 
)
Value:
case ord: \
F<ord>(__VA_ARGS__); \
break;

Definition at line 109 of file util.h.

#define ORD_CASE_RET (   ord,
  R,
  F,
  ... 
)
Value:
case ord: \
R = F<ord>(__VA_ARGS__); \
break;

Definition at line 114 of file util.h.

#define PRINT_INT (   var)

Definition at line 245 of file util.h.

#define RANK_PRINTF (   ...)    do { } while (0)

Definition at line 241 of file util.h.

#define REDIST   0

Definition at line 95 of file util.h.

#define SWITCH_ORD_CALL (   F,
  act_ord,
  ... 
)
Value:
switch (act_ord){ \
LOOP_MAX_ORD(ORD_CASE,F,__VA_ARGS__) \
default: \
ASSERT(0); \
break; \
}
#define ORD_CASE(ord, F,...)
Definition: util.h:109

Definition at line 119 of file util.h.

Referenced by CTF_int::calc_drv_displs(), dgtog_reshuffle(), isendrecv< 0 >(), CTF_int::spA_dnB_dnC_seq_ctr(), CTF_int::spA_dnB_seq_sum(), CTF_int::sym_seq_ctr_cust(), CTF_int::sym_seq_ctr_ref(), and CTF_int::sym_seq_sum_ref().

#define SWITCH_ORD_CALL_RET (   R,
  F,
  act_ord,
  ... 
)
Value:
switch (act_ord){ \
LOOP_MAX_ORD(ORD_CASE_RET,R,F,__VA_ARGS__) \
default: \
ASSERT(0); \
break; \
}
#define ORD_CASE_RET(ord, R, F,...)
Definition: util.h:114

Definition at line 127 of file util.h.

Referenced by CTF_int::calc_drv_cnts< 0 >().

#define TAU_FSTART (   ARG)

Definition at line 280 of file util.h.

Referenced by CTF_int::assign_keys(), CTF_int::block_reshuffle(), CTF_int::bucket_by_pe(), CTF_int::bucket_by_virt(), CTF_int::calc_drv_displs(), CTF_int::compute_bucket_offsets(), CTF_int::compute_syoffs(), CTF_int::contract_mst(), CTF::Monoid< dtype, is_ord >::csr_add(), CTF_int::CSR_Matrix::csr_add(), CTF_int::algstrct::csr_reduce(), CTF_int::cyclic_reshuffle(), CTF_int::depad_tsr(), CTF_int::depermute_keys(), CTF_int::depin(), CTF_int::tensor::despmatricize(), CTF_int::desymmetrize(), dgtog_reshuffle(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::COO_Matrix::get_data(), CTF_int::get_len_ordering(), CTF_int::glb_cyclic_reshuffle(), CTF_int::glb_ord_pup(), CTF_int::tensor::init(), CTF_int::summation::is_equal(), CTF_int::nosym_transpose(), CTF_int::order_globally(), CTF_int::pad_cyclic_pup_virt_buff(), CTF_int::pad_key(), CTF_int::padded_reshuffle(), CTF_int::permute_keys(), CTF_int::ConstPairIterator::pin(), CTF_int::precompute_offsets(), CTF_int::tensor::read_local(), CTF_int::readwrite(), CTF_int::reduce_step_post(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::ctr_2d_general::run(), CTF_int::seq_tsr_spctr::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_map::run(), CTF_int::tspsum_permute::run(), CTF_int::spctr_pin_keys::run(), CTF_int::tspsum_pin_keys::run(), CTF_int::scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::COO_Matrix::set_data(), CTF_int::tensor::set_zero(), CTF_int::scaling::sp_scl(), CTF_int::spA_dnB_dnC_seq_ctr(), CTF_int::spA_dnB_seq_sum(), CTF_int::tensor::sparsify(), CTF_int::tensor::spmatricize(), CTF_int::spsfy_tsr(), CTF_int::spspsum(), CTF_int::summation::sum_tensors(), CTF_int::sym_seq_ctr_cust(), CTF_int::sym_seq_ctr_inr(), CTF_int::sym_seq_ctr_ref(), CTF_int::sym_seq_scl_cust(), CTF_int::sym_seq_scl_ref(), CTF_int::sym_seq_sum_cust(), CTF_int::sym_seq_sum_inr(), CTF_int::sym_seq_sum_ref(), CTF_int::symmetrize(), CTF_int::wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::tensor::zero_out_padding(), CTF_int::zero_padding(), and CTF::World::~World().

#define TAU_FSTOP (   ARG)

Definition at line 281 of file util.h.

Referenced by CTF_int::assign_keys(), CTF_int::block_reshuffle(), CTF_int::bucket_by_pe(), CTF_int::bucket_by_virt(), CTF_int::calc_drv_displs(), CTF_int::compute_bucket_offsets(), CTF_int::compute_syoffs(), CTF_int::contract_mst(), CTF::Monoid< dtype, is_ord >::csr_add(), CTF_int::CSR_Matrix::csr_add(), CTF_int::algstrct::csr_reduce(), CTF_int::cyclic_reshuffle(), CTF_int::depad_tsr(), CTF_int::depermute_keys(), CTF_int::depin(), CTF_int::tensor::despmatricize(), CTF_int::desymmetrize(), dgtog_reshuffle(), CTF_int::summation::estimate_time(), CTF_int::scaling::execute(), CTF_int::COO_Matrix::get_data(), CTF_int::get_len_ordering(), CTF_int::glb_cyclic_reshuffle(), CTF_int::glb_ord_pup(), CTF_int::tensor::init(), CTF_int::summation::is_equal(), CTF_int::nosym_transpose(), CTF_int::order_globally(), CTF_int::pad_cyclic_pup_virt_buff(), CTF_int::pad_key(), CTF_int::padded_reshuffle(), CTF_int::permute_keys(), CTF_int::ConstPairIterator::pin(), CTF_int::precompute_offsets(), CTF_int::tensor::read_local(), CTF_int::readwrite(), CTF_int::reduce_step_post(), CTF_int::ctr_virt::run(), CTF_int::spctr_replicate::run(), CTF_int::strp_tsr::run(), CTF_int::scl_virt::run(), CTF_int::tspsum_virt::run(), CTF_int::ctr_2d_general::run(), CTF_int::seq_tsr_spctr::run(), CTF_int::spctr_2d_general::run(), CTF_int::tsum_virt::run(), CTF_int::spctr_virt::run(), CTF_int::tspsum_map::run(), CTF_int::tspsum_permute::run(), CTF_int::spctr_pin_keys::run(), CTF_int::tspsum_pin_keys::run(), CTF_int::scal_diag(), CTF_int::tensor::scale_diagonals(), CTF_int::COO_Matrix::set_data(), CTF_int::tensor::set_zero(), CTF_int::scaling::sp_scl(), CTF_int::spA_dnB_dnC_seq_ctr(), CTF_int::spA_dnB_seq_sum(), CTF_int::tensor::sparsify(), CTF_int::tensor::spmatricize(), CTF_int::spsfy_tsr(), CTF_int::spspsum(), CTF_int::summation::sum_tensors(), CTF_int::sym_seq_ctr_cust(), CTF_int::sym_seq_ctr_inr(), CTF_int::sym_seq_ctr_ref(), CTF_int::sym_seq_scl_cust(), CTF_int::sym_seq_scl_ref(), CTF_int::sym_seq_sum_cust(), CTF_int::sym_seq_sum_inr(), CTF_int::sym_seq_sum_ref(), CTF_int::symmetrize(), CTF_int::wr_pairs_layout(), CTF_int::tensor::write(), CTF_int::tensor::zero_out_padding(), CTF_int::zero_padding(), and CTF::World::~World().

#define TAU_PROFILE (   NAME,
  ARG,
  USER 
)

Definition at line 273 of file util.h.

#define TAU_PROFILE_SET_CONTEXT (   ARG)

Definition at line 279 of file util.h.

#define TAU_PROFILE_SET_NODE (   ARG)

Definition at line 278 of file util.h.

#define TAU_PROFILE_START (   ARG)

Definition at line 277 of file util.h.

#define TAU_PROFILE_STOP (   ARG)

Definition at line 276 of file util.h.

#define TAU_PROFILE_TIMER (   ARG1,
  ARG2,
  ARG3,
  ARG4 
)

Definition at line 274 of file util.h.

#define TAU_PROFILER_CREATE (   ARG1,
  ARG2,
  ARG3,
  ARG4 
)

Definition at line 275 of file util.h.

#define THROW_ERROR (   ...)    do { printf("error:%s:%d ",__FILE__,__LINE__); printf(__VA_ARGS__); printf("\n"); quit(1); } while(0)

Definition at line 189 of file util.h.

#define TIME (   STRING)    TAU_PROFILE(STRING, " ", TAU_DEFAULT)

Definition at line 283 of file util.h.

#define USE_BLOCK_RESHUFFLE

Definition at line 101 of file util.h.

#define USE_OMP

Definition at line 15 of file util.h.

#define USE_SYM_SUM

Definition at line 99 of file util.h.

#define VERIFY_REMAP   0

Definition at line 97 of file util.h.

#define VPRINTF (   ...)    do { } while (0)
#define WARN (   ...)    do { printf("warning: "); printf(__VA_ARGS__); printf("\n"); } while(0)

Definition at line 194 of file util.h.

#define WRAP (   a,
 
)    ((a + b)%b)

Definition at line 168 of file util.h.