Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
offload.h
Go to the documentation of this file.
1 /*Copyright (c) 2014, Edgar Solomonik, all rights reserved.*/
2 
3 #ifndef __OFFLOAD_H__
4 #define __OFFLOAD_H__
5 
6 //#include "../interface/common.h"
7 
8 namespace CTF_int{
9  class algstrct;
10 
12  void offload_init();
14  void offload_exit();
15 
17  double estimate_download_time(int64_t size);
18 
20  double estimate_upload_time(int64_t size);
21 
23  class offload_arr {
24  public:
26  char * dev_spr;
28  int64_t nbytes;
29 
34  offload_arr(int64_t nbytes);
35 
39  ~offload_arr();
40 
45  void download(char * host_spr);
46 
51  void upload(char const * host_spr);
52  };
53 
55  class offload_tsr : public offload_arr {
56  public:
58  algstrct const * sr;
60  int64_t size;
61 
67  offload_tsr(algstrct const * sr, int64_t size);
68 
72  void set_zero();
73  };
74 
75 
81  void host_pinned_alloc(void ** ptr, int64_t size);
82 
87  void host_pinned_free(void * ptr);
88 
89  template <typename dtype>
90  void offload_gemm(char tA,
91  char tB,
92  int m,
93  int n,
94  int k,
95  dtype alpha,
96  offload_tsr & A,
97  int lda_A,
98  offload_tsr & B,
99  int lda_B,
100  dtype beta,
101  offload_tsr & C,
102  int lda_C);
103 
104  template <typename dtype>
105  void offload_gemm(char tA,
106  char tB,
107  int m,
108  int n,
109  int k,
110  dtype alpha,
111  dtype const * dev_A,
112  int lda_A,
113  dtype const * dev_B,
114  int lda_B,
115  dtype beta,
116  dtype * dev_C,
117  int lda_C);
118 }
119 #endif
120 
offloaded and serialized tensor data
Definition: offload.h:55
void upload(char const *host_spr)
write data from host to device
void host_pinned_alloc(void **ptr, int64_t size)
allocate a pinned host buffer
int64_t nbytes
number of bytes
Definition: offload.h:28
void host_pinned_free(void *ptr)
free a pinned host buffer
void download(char *host_spr)
read data from device to host pointer
void offload_exit()
exit offloading, e.g. destroy cublas
offloaded array/buffer
Definition: offload.h:23
~offload_arr()
destructor allocates device buffer
int64_t size
number of elements
Definition: offload.h:60
double estimate_upload_time(int64_t size)
estimate time it takes to download
offload_arr(int64_t nbytes)
constructor allocates device buffer
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
void offload_gemm(char tA, char tB, int m, int n, int k, dtype alpha, offload_tsr &A, int lda_A, offload_tsr &B, int lda_B, dtype beta, offload_tsr &C, int lda_C)
char * dev_spr
device pointer
Definition: offload.h:26
void offload_init()
initialize offloading, e.g. create cublas
double estimate_download_time(int64_t size)
estimate time it takes to upload
algstrct const * sr
algebraic structure
Definition: offload.h:58