4 #include "../shared/util.h"     8   spctr_offload::spctr_offload(contraction 
const * c,
    15                                int download_phase_C_) : spctr(c) {
    19     total_iter = total_iter_; 
    20     upload_phase_A = upload_phase_A_; 
    21     upload_phase_B = upload_phase_B_; 
    22     download_phase_C = download_phase_C_; 
    30   spctr_offload::~spctr_offload(){
    34   spctr_offload::spctr_offload(
spctr * other) : 
spctr(other) {
    35     spctr_offload * o = (spctr_offload*)other;
    40     iter_counter = o->iter_counter;
    41     total_iter = o->total_iter;
    42     upload_phase_A = o->upload_phase_A;
    43     upload_phase_B = o->upload_phase_B;
    44     download_phase_C = o->download_phase_C;
    50   spctr * spctr_offload::clone() {
    51     return new spctr_offload(
this);
    54   void spctr_offload::print() {
    55     printf(
"spctr_offload: \n");
    56     printf(
"total_iter = %d\n", total_iter);
    57     printf(
"upload_phase_A = %d\n",
    59     printf(
"upload_phase_B = %d\n",
    61     printf(
"download_phase_C = %d\n",
    66   double spctr_offload::est_time_fp(
int nlyr, 
double nnz_frac_A, 
double nnz_frac_B, 
double nnz_frac_C){
    67     double tot_time = 0.0;
    76   double spctr_offload::est_time_rec(
int nlyr, 
double nnz_frac_A, 
double nnz_frac_B, 
double nnz_frac_C){
    80   int64_t spctr_offload::spmem_fp(
double nnz_frac_A, 
double nnz_frac_B, 
double nnz_frac_C){
    84   int64_t spctr_offload::mem_rec(
double nnz_frac_A, 
double nnz_frac_B, 
double nnz_frac_C) {
    85     return rec_ctr->
mem_rec(nnz_frac_A, nnz_frac_B, nnz_frac_C) + 
spmem_fp(nnz_frac_A, nnz_frac_B, nnz_frac_C);
    88   void spctr_offload::run(
char * A, 
int nblk_A, int64_t 
const * size_blk_A,
    89                           char * B, 
int nblk_B, int64_t 
const * size_blk_B,
    90                           char * C, 
int nblk_C, int64_t * size_blk_C,
    93     ASSERT(iter_counter < total_iter);
    94     if (iter_counter % upload_phase_A == 0){
    96         if (iter_counter != 0){
    99         int64_t sp_size_A = 0;
   100         for (
int i=0; i<nblk_A; i++){
   101           sp_size_A += size_blk_A[i];
   103         spr_A = 
new offload_arr(sp_size_A);
   105         if (iter_counter == 0){
   111     if (iter_counter % upload_phase_B == 0){
   113         if (iter_counter != 0){
   116         int64_t sp_size_B = 0;
   117         for (
int i=0; i<nblk_B; i++){
   118           sp_size_B += size_blk_B[i];
   120         spr_B = 
new offload_arr(sp_size_B);
   122         if (iter_counter == 0){
   128     if (iter_counter == 0){
   130         int64_t sp_size_C = 0;
   131         for (
int i=0; i<nblk_C; i++){
   132           sp_size_C += size_blk_C[i];
   134         spr_C = 
new offload_arr(sp_size_C);
   137         offload_tsr * tspr_C = 
new offload_tsr(
sr_C, 
size_C);
   145       ASSERT(iter_counter % download_phase_C == 0);
   163     rec_ctr->
run(spr_A->dev_spr, nblk_A, size_blk_A,
   164                  spr_B->dev_spr, nblk_B, size_blk_B,
   165                  spr_C->dev_spr, nblk_C, size_blk_C,
   171     if (iter_counter % download_phase_C == 0){
   175       spr_C->download(C_host_ptr);
   187       if (iter_counter != total_iter)
   188         ((offload_tsr*)spr_C)->set_zero();
   192     if (iter_counter == total_iter){
 
virtual bool isequal(char const *a, char const *b) const 
returns true if algstrct elements a and b are equal 
virtual int64_t mem_rec()
virtual double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time this kernel and its recursive calls are estimated to take ...
void host_pinned_alloc(void **ptr, int64_t size)
allocate a pinned host buffer 
virtual char const * addid() const 
MPI datatype for pairs. 
int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need 
virtual void set(char *a, char const *b, int64_t n) const 
sets n elements of array a to value b 
void host_pinned_free(void *ptr)
free a pinned host buffer 
double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time the local part this kernel is estimated to take 
virtual void scal(int n, char const *alpha, char *X, int incX) const 
X["i"]=alpha*X["i"];. 
virtual void axpy(int n, char const *alpha, char const *X, int incX, char *Y, int incY) const 
Y["i"]+=alpha*X["i"];. 
int el_size
size of each element of algstrct in bytes 
double estimate_upload_time(int64_t size)
estimate time it takes to download 
virtual char const * mulid() const 
identity element for multiplication i.e. 1 
void run(char *A, char *B, char *C)
double estimate_download_time(int64_t size)
estimate time it takes to upload