51 void upload(
char const * host_spr);
89 template <
typename dtype>
104 template <
typename dtype>
offloaded and serialized tensor data
void upload(char const *host_spr)
write data from host to device
void host_pinned_alloc(void **ptr, int64_t size)
allocate a pinned host buffer
int64_t nbytes
number of bytes
void host_pinned_free(void *ptr)
free a pinned host buffer
void download(char *host_spr)
read data from device to host pointer
void offload_exit()
exit offloading, e.g. destroy cublas
~offload_arr()
destructor allocates device buffer
int64_t size
number of elements
double estimate_upload_time(int64_t size)
estimate time it takes to download
offload_arr(int64_t nbytes)
constructor allocates device buffer
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void offload_gemm(char tA, char tB, int m, int n, int k, dtype alpha, offload_tsr &A, int lda_A, offload_tsr &B, int lda_B, dtype beta, offload_tsr &C, int lda_C)
char * dev_spr
device pointer
void offload_init()
initialize offloading, e.g. create cublas
double estimate_download_time(int64_t size)
estimate time it takes to upload
algstrct const * sr
algebraic structure