3 #ifndef __SPCTR_COMM_H__ 4 #define __SPCTR_COMM_H__ 30 void run(
char * A,
int nblk_A, int64_t
const * size_blk_A,
31 char * B,
int nblk_B, int64_t
const * size_blk_B,
32 char * C,
int nblk_C, int64_t * size_blk_C,
43 int64_t
spmem_fp(
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
48 int64_t
spmem_rec(
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
49 double est_time_fp(
int nlyr,
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
50 double est_time_rec(
int nlyr,
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C);
57 int const * phys_mapped,
64 #endif // __CTR_COMM_H__
double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time this kernel and its recursive calls are estimated to take ...
void run(char *A, int nblk_A, int64_t const *size_blk_A, char *B, int nblk_B, int64_t const *size_blk_B, char *C, int nblk_C, int64_t *size_blk_C, char *&new_C)
spctr_replicate(spctr *other)
int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need
class for execution distributed contraction of tensors
double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time the local part this kernel is estimated to take
int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes need by each processor in this kernel and its recursive calls ...