3 #include "../shared/util.h" 7 ctr_offload::ctr_offload(contraction
const * c,
14 int download_phase_C_) : ctr(c) {
18 total_iter = total_iter_;
19 upload_phase_A = upload_phase_A_;
20 upload_phase_B = upload_phase_B_;
21 download_phase_C = download_phase_C_;
29 ctr_offload::~ctr_offload(){
33 ctr_offload::ctr_offload(
ctr * other) :
ctr(other) {
34 ctr_offload * o = (ctr_offload*)other;
39 iter_counter = o->iter_counter;
40 total_iter = o->total_iter;
41 upload_phase_A = o->upload_phase_A;
42 upload_phase_B = o->upload_phase_B;
43 download_phase_C = o->download_phase_C;
49 ctr * ctr_offload::clone() {
50 return new ctr_offload(
this);
53 void ctr_offload::print() {
54 printf(
"ctr_offload: \n");
55 printf(
"total_iter = %d\n", total_iter);
56 printf(
"size_A = %ld, upload_phase_A = %d\n",
58 printf(
"size_B = %ld, upload_phase_B = %d\n",
60 printf(
"size_C = %ld, download_phase_C = %d\n",
65 double ctr_offload::est_time_fp(
int nlyr){
66 double tot_time = 0.0;
73 double ctr_offload::est_time_rec(
int nlyr) {
77 int64_t ctr_offload::mem_fp(){
81 int64_t ctr_offload::mem_rec() {
85 void ctr_offload::run(
char * A,
char * B,
char * C){
87 ASSERT(iter_counter < total_iter);
88 if (iter_counter == 0){
98 if (iter_counter % upload_phase_A == 0)
100 if (iter_counter % upload_phase_B == 0)
104 ASSERT(iter_counter % download_phase_C == 0);
121 rec_ctr->
run(ptr_A->dev_spr, ptr_B->dev_spr, ptr_C->dev_spr);
126 if (iter_counter % download_phase_C == 0){
129 ptr_C->download(C_host_ptr);
135 if (iter_counter != total_iter)
140 if (iter_counter == total_iter){
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
virtual int64_t mem_rec()
int64_t mem_fp()
returns the number of bytes of buffer space we need
void host_pinned_alloc(void **ptr, int64_t size)
allocate a pinned host buffer
virtual char const * addid() const
MPI datatype for pairs.
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
ctr(ctr *other)
copies generic ctr object
void host_pinned_free(void *ptr)
free a pinned host buffer
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
virtual double est_time_rec(int nlyr)
virtual void axpy(int n, char const *alpha, char const *X, int incX, char *Y, int incY) const
Y["i"]+=alpha*X["i"];.
virtual void run(char *A, char *B, char *C)
int el_size
size of each element of algstrct in bytes
double estimate_upload_time(int64_t size)
estimate time it takes to download
virtual char const * mulid() const
identity element for multiplication i.e. 1
double estimate_download_time(int64_t size)
estimate time it takes to upload
double est_time_fp(int nlyr)
returns the execution time the local part this kernel is estimated to take