Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
spsum_tsr.h
Go to the documentation of this file.
1 #ifndef __SPSUM_TSR_H__
2 #define __SPSUM_TSR_H__
3 
4 #include "sum_tsr.h"
5 
6 namespace CTF_int {
7 
8  class tspsum : public tsum {
9  public:
11  int64_t nnz_A;
12  int nvirt_A;
13  int64_t * nnz_blk_A;
15  int64_t nnz_B;
16  int nvirt_B;
17  int64_t * nnz_blk_B;
18  int64_t new_nnz_B;
19  char * new_B;
20 
21  ~tspsum();
22  tspsum(tspsum * other);
23  virtual tspsum * clone() { return NULL; }
24  tspsum(summation const * s);
25  virtual void set_nnz_blk_A(int64_t const * nnbA){
26  if (nnbA != NULL) memcpy(nnz_blk_A, nnbA, nvirt_A*sizeof(int64_t));
27  }
28  };
29 
30  class tspsum_virt : public tspsum {
31  public:
32  /* Class to be called on sub-blocks */
34 
35  int num_dim;
36  int * virt_dim;
37  int order_A;
38  int64_t blk_sz_A; //if dense
39  int const * idx_map_A;
40  int order_B;
41  int64_t blk_sz_B; //if dense
42  int const * idx_map_B;
43 
44  void run();
45  void print();
46  int64_t mem_fp();
47  void set_nnz_blk_A(int64_t const * nnbA){
49  rec_tsum->set_nnz_blk_A(nnbA);
50  }
51  tspsum * clone();
52 
56  tspsum_virt(tspsum * other);
57  ~tspsum_virt();
58  tspsum_virt(summation const * s);
59  };
60 
64  class tspsum_replicate : public tspsum {
65  public:
66  int64_t size_A; /* size of A blocks */
67  int64_t size_B; /* size of B blocks */
68  int ncdt_A; /* number of processor dimensions to replicate A along */
69  int ncdt_B; /* number of processor dimensions to replicate B along */
70 
73  /* Class to be called on sub-blocks */
75 
76  void run();
77  void print();
78  int64_t mem_fp();
79  tspsum * clone();
80  void set_nnz_blk_A(int64_t const * nnbA){
82  rec_tsum->set_nnz_blk_A(nnbA);
83  }
84 
85  tspsum_replicate(tspsum * other);
87  tspsum_replicate(summation const * s,
88  int const * phys_mapped,
89  int64_t blk_sz_A,
90  int64_t blk_sz_B);
91  };
92 
93  class seq_tsr_spsum : public tspsum {
94  public:
95  int order_A;
96  int * edge_len_A;
97  int const * idx_map_A;
98  int * sym_A;
99  int order_B;
100  int * edge_len_B;
101  int const * idx_map_B;
102  int * sym_B;
103  //fseq_tsr_sum func_ptr;
104 
105  int is_inner;
107 
108  int64_t map_pfx;
109 
111  univar_function const * func; //fseq_elm_sum custom_params;
112 
116  void run();
117  void print();
118  int64_t mem_fp();
119  tspsum * clone();
120  void set_nnz_blk_A(int64_t const * nnbA){
121  tspsum::set_nnz_blk_A(nnbA);
122  }
123 
128  seq_tsr_spsum(tspsum * other);
129  ~seq_tsr_spsum(){ CTF_int::cdealloc(edge_len_A), CTF_int::cdealloc(edge_len_B),
130  CTF_int::cdealloc(sym_A), CTF_int::cdealloc(sym_B); };
131  seq_tsr_spsum(summation const * s);
132 
133  };
134 
135  class tspsum_map : public tspsum {
136  public:
138  int nmap_idx;
139  int64_t * map_idx_len;
140  int64_t * map_idx_lda;
141 
142  void run();
143  void print();
144  int64_t mem_fp();
145  tspsum * clone();
146  void set_nnz_blk_A(int64_t const * nnbA){
147  tspsum::set_nnz_blk_A(nnbA);
148  rec_tsum->set_nnz_blk_A(nnbA);
149  }
150 
151  tspsum_map(tspsum * other);
152  ~tspsum_map();
153  tspsum_map(summation const * s);
154  };
155 
156  class tspsum_permute : public tspsum {
157  public:
159  bool A_or_B; //if false perm_B
160  int order;
161  int * lens_new;
162  int * lens_old; // FIXME = lens_new?
163  int * p;
164  bool skip;
165 
166  void run();
167  void print();
168  int64_t mem_fp();
169  tspsum * clone();
170  void set_nnz_blk_A(int64_t const * nnbA){
171  tspsum::set_nnz_blk_A(nnbA);
172  rec_tsum->set_nnz_blk_A(nnbA);
173  }
174 
175  tspsum_permute(tspsum * other);
176  ~tspsum_permute();
177  tspsum_permute(summation const * s, bool A_or_B, int const * lens);
178  };
179 
180  class tspsum_pin_keys : public tspsum {
181  public:
183  bool A_or_B;
184  int order;
185  int const * lens;
186  int * divisor;
187  int * virt_dim;
188  int * phys_rank;
189 
190  void run();
191  void print();
192  int64_t mem_fp();
193  tspsum * clone();
194  void set_nnz_blk_A(int64_t const * nnbA){
195  tspsum::set_nnz_blk_A(nnbA);
196  rec_tsum->set_nnz_blk_A(nnbA);
197  }
198 
199  tspsum_pin_keys(tspsum * other);
200  ~tspsum_pin_keys();
201  tspsum_pin_keys(summation const * s, bool A_or_B);
202 
203  };
204 
205 }
206 
207 #endif
void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:80
tspsum(tspsum *other)
Definition: spsum_tsr.cxx:11
int64_t * nnz_blk_B
Definition: spsum_tsr.h:17
bool is_sparse_A
Definition: spsum_tsr.h:10
int const * idx_map_B
Definition: spsum_tsr.h:101
virtual void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:25
void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:47
bool is_sparse_B
Definition: spsum_tsr.h:14
untyped internal class for doubly-typed univariate function
Definition: sum_tsr.h:14
void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:194
int64_t new_nnz_B
Definition: spsum_tsr.h:18
void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:120
virtual int64_t mem_fp()
returns the number of bytes of buffer space needed
Definition: sum_tsr.h:84
int64_t * map_idx_lda
Definition: spsum_tsr.h:140
int const * idx_map_A
Definition: spsum_tsr.h:97
virtual void run()
Definition: sum_tsr.h:77
int64_t nnz_B
Definition: spsum_tsr.h:15
void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:170
univar_function const * func
Definition: spsum_tsr.h:111
virtual tspsum * clone()
Definition: spsum_tsr.h:23
int64_t nnz_A
Definition: spsum_tsr.h:11
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
int const * idx_map_A
Definition: spsum_tsr.h:39
int const * idx_map_B
Definition: spsum_tsr.h:42
int64_t * nnz_blk_A
Definition: spsum_tsr.h:13
int64_t * map_idx_len
Definition: spsum_tsr.h:139
performs replication along a dimension, generates 2.5D algs
Definition: spsum_tsr.h:64
class for execution distributed summation of tensors
Definition: summation.h:15
char * new_B
Definition: spsum_tsr.h:19
void set_nnz_blk_A(int64_t const *nnbA)
Definition: spsum_tsr.h:146
virtual void print()
Definition: sum_tsr.h:78