4 #include "../shared/util.h" 8 spctr_offload::spctr_offload(contraction
const * c,
15 int download_phase_C_) : spctr(c) {
19 total_iter = total_iter_;
20 upload_phase_A = upload_phase_A_;
21 upload_phase_B = upload_phase_B_;
22 download_phase_C = download_phase_C_;
30 spctr_offload::~spctr_offload(){
34 spctr_offload::spctr_offload(
spctr * other) :
spctr(other) {
35 spctr_offload * o = (spctr_offload*)other;
40 iter_counter = o->iter_counter;
41 total_iter = o->total_iter;
42 upload_phase_A = o->upload_phase_A;
43 upload_phase_B = o->upload_phase_B;
44 download_phase_C = o->download_phase_C;
50 spctr * spctr_offload::clone() {
51 return new spctr_offload(
this);
54 void spctr_offload::print() {
55 printf(
"spctr_offload: \n");
56 printf(
"total_iter = %d\n", total_iter);
57 printf(
"upload_phase_A = %d\n",
59 printf(
"upload_phase_B = %d\n",
61 printf(
"download_phase_C = %d\n",
66 double spctr_offload::est_time_fp(
int nlyr,
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C){
67 double tot_time = 0.0;
76 double spctr_offload::est_time_rec(
int nlyr,
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C){
80 int64_t spctr_offload::spmem_fp(
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C){
84 int64_t spctr_offload::mem_rec(
double nnz_frac_A,
double nnz_frac_B,
double nnz_frac_C) {
85 return rec_ctr->
mem_rec(nnz_frac_A, nnz_frac_B, nnz_frac_C) +
spmem_fp(nnz_frac_A, nnz_frac_B, nnz_frac_C);
88 void spctr_offload::run(
char * A,
int nblk_A, int64_t
const * size_blk_A,
89 char * B,
int nblk_B, int64_t
const * size_blk_B,
90 char * C,
int nblk_C, int64_t * size_blk_C,
93 ASSERT(iter_counter < total_iter);
94 if (iter_counter % upload_phase_A == 0){
96 if (iter_counter != 0){
99 int64_t sp_size_A = 0;
100 for (
int i=0; i<nblk_A; i++){
101 sp_size_A += size_blk_A[i];
103 spr_A =
new offload_arr(sp_size_A);
105 if (iter_counter == 0){
111 if (iter_counter % upload_phase_B == 0){
113 if (iter_counter != 0){
116 int64_t sp_size_B = 0;
117 for (
int i=0; i<nblk_B; i++){
118 sp_size_B += size_blk_B[i];
120 spr_B =
new offload_arr(sp_size_B);
122 if (iter_counter == 0){
128 if (iter_counter == 0){
130 int64_t sp_size_C = 0;
131 for (
int i=0; i<nblk_C; i++){
132 sp_size_C += size_blk_C[i];
134 spr_C =
new offload_arr(sp_size_C);
137 offload_tsr * tspr_C =
new offload_tsr(
sr_C,
size_C);
145 ASSERT(iter_counter % download_phase_C == 0);
163 rec_ctr->
run(spr_A->dev_spr, nblk_A, size_blk_A,
164 spr_B->dev_spr, nblk_B, size_blk_B,
165 spr_C->dev_spr, nblk_C, size_blk_C,
171 if (iter_counter % download_phase_C == 0){
175 spr_C->download(C_host_ptr);
187 if (iter_counter != total_iter)
188 ((offload_tsr*)spr_C)->set_zero();
192 if (iter_counter == total_iter){
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
virtual int64_t mem_rec()
virtual double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time this kernel and its recursive calls are estimated to take ...
void host_pinned_alloc(void **ptr, int64_t size)
allocate a pinned host buffer
virtual char const * addid() const
MPI datatype for pairs.
int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
void host_pinned_free(void *ptr)
free a pinned host buffer
double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time the local part this kernel is estimated to take
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
virtual void axpy(int n, char const *alpha, char const *X, int incX, char *Y, int incY) const
Y["i"]+=alpha*X["i"];.
int el_size
size of each element of algstrct in bytes
double estimate_upload_time(int64_t size)
estimate time it takes to download
virtual char const * mulid() const
identity element for multiplication i.e. 1
void run(char *A, char *B, char *C)
double estimate_download_time(int64_t size)
estimate time it takes to upload