4 #ifndef __SPCTR_2D_GENERAL_H__ 5 #define __SPCTR_2D_GENERAL_H__ 18 int64_t & cg_spctr_lda_A,
19 int64_t & cg_spctr_sub_lda_A,
23 int const * virt_blk_len_A,
28 int64_t & cg_spctr_lda_B,
29 int64_t & cg_spctr_sub_lda_B,
33 int const * virt_blk_len_B,
38 int64_t & cg_spctr_lda_C,
39 int64_t & cg_spctr_sub_lda_C,
43 int const * virt_blk_len_C,
86 void run(
char * A,
int nblk_A, int64_t
const * size_blk_A,
87 char * B,
int nblk_B, int64_t
const * size_blk_B,
88 char * C,
int nblk_C, int64_t * size_blk_C,
96 int64_t
spmem_fp(
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
101 int64_t
spmem_rec(
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
106 double est_time_fp(
int nlyr,
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
112 double est_time_rec(
int nlyr,
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the time this kernel will take including calls to rec_ctr
double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the time this kernel will take including calls to rec_ctr
int spctr_2d_gen_build(int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_spctr_lda_A, int64_t &cg_spctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_spctr_lda_B, int64_t &cg_spctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_spctr_lda_C, int64_t &cg_spctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C)
int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need
class for execution distributed contraction of tensors
int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need recursively
spctr_2d_general(contraction *c)
partial constructor, most of the logic is in the spctr_2d_gen_build function
void run(char *A, int nblk_A, int64_t const *size_blk_A, char *B, int nblk_B, int64_t const *size_blk_B, char *C, int nblk_C, int64_t *size_blk_C, char *&new_C)
Basically doing SUMMA, except assumes equal block size on each processor. Performs rank-b updates whe...
spctr_2d_general(spctr *other)
copies spctr object
void find_bsizes(int64_t &b_A, int64_t &b_B, int64_t &b_C, int64_t &s_A, int64_t &s_B, int64_t &s_C, int64_t &aux_size)
determines buffer and block sizes needed for spctr_2d_general
void print()
print ctr object
~spctr_2d_general()
deallocs spctr_2d_general object