Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
coo.cxx
Go to the documentation of this file.
1 #include "coo.h"
2 #include "csr.h"
3 #include "../shared/util.h"
4 #include "../contraction/ctr_comm.h"
5 
6 namespace CTF_int {
7  int64_t get_coo_size(int64_t nnz, int val_size){
8  val_size = std::max(val_size,64*((val_size + 63)/64));
9  return nnz*(val_size+sizeof(int)*2)+2*sizeof(int64_t);
10  }
11 
12  COO_Matrix::COO_Matrix(int64_t nnz, algstrct const * sr){
13  int64_t size = get_coo_size(nnz, sr->el_size);
14  all_data = (char*)alloc(size);
15  ((int64_t*)all_data)[0] = nnz;
16  ((int64_t*)all_data)[1] = sr->el_size;
17  //printf("all_data %p vals %p\n",all_data,this->vals());
18  }
19 
20  COO_Matrix::COO_Matrix(char * all_data_){
21  all_data = all_data_;
22  }
23 
24  COO_Matrix::COO_Matrix(CSR_Matrix const & csr, algstrct const * sr){
25  int64_t nnz = csr.nnz();
26  int64_t v_sz = csr.val_size();
27  int const * csr_ja = csr.JA();
28  int const * csr_ia = csr.IA();
29  char const * csr_vs = csr.vals();
30 
31  int64_t size = get_coo_size(nnz, v_sz);
32  all_data = (char*)alloc(size);
33  ((int64_t*)all_data)[0] = nnz;
34  ((int64_t*)all_data)[1] = v_sz;
35 
36  char * vs = vals();
37  int * coo_rs = rows();
38  int * coo_cs = cols();
39 
40  sr->init_shell(nnz, vs);
41 
42  sr->csr_to_coo(nnz, csr.nrow(), csr_vs, csr_ja, csr_ia, vs, coo_rs, coo_cs);
43  }
44 
45  int64_t COO_Matrix::nnz() const {
46  return ((int64_t*)all_data)[0];
47  }
48 
49  int COO_Matrix::val_size() const {
50  return ((int64_t*)all_data)[1];
51  }
52 
53  int64_t COO_Matrix::size() const {
54  return get_coo_size(nnz(),val_size());
55  }
56 
57  char * COO_Matrix::vals() const {
58  return all_data + 2*sizeof(int64_t);
59  }
60 
61  int * COO_Matrix::rows() const {
62  int64_t n = this->nnz();
63  int v_sz = this->val_size();
64 
65  return (int*)(all_data + n*v_sz+2*sizeof(int64_t));
66  }
67 
68  int * COO_Matrix::cols() const {
69  int64_t n = this->nnz();
70  int v_sz = ((int64_t*)all_data)[1];
71 
72  return (int*)(all_data + n*(v_sz+sizeof(int))+2*sizeof(int64_t));
73  }
74 
75  void COO_Matrix::set_data(int64_t nz, int order, int const * lens, int const * rev_ordering, int nrow_idx, char const * tsr_data, algstrct const * sr, int const * phase){
76  TAU_FSTART(convert_to_COO);
77  ((int64_t*)all_data)[0] = nz;
78  ((int64_t*)all_data)[1] = sr->el_size;
79  int v_sz = sr->el_size;
80 
81  int * rev_ord_lens = (int*)alloc(sizeof(int)*order);
82  int * ordering = (int*)alloc(sizeof(int)*order);
83  int64_t * lda_col = (int64_t*)alloc(sizeof(int64_t)*(order-nrow_idx));
84  int64_t * lda_row = (int64_t*)alloc(sizeof(int64_t)*nrow_idx);
85 
86  for (int i=0; i<order; i++){
87  ordering[rev_ordering[i]]=i;
88  }
89  for (int i=0; i<order; i++){
90  // printf("[%d] %d -> %d\n", lens[i], i, ordering[i]);
91  rev_ord_lens[ordering[i]] = lens[i]/phase[i];
92  if (lens[i]%phase[i] > 0) rev_ord_lens[ordering[i]]++;
93  }
94 
95  for (int i=0; i<order; i++){
96  if (i==0 && i<nrow_idx){
97  lda_row[0] = 1;
98  }
99  if (i>0 && i<nrow_idx){
100  lda_row[i] = lda_row[i-1]*rev_ord_lens[i-1];
101  }
102  if (i==nrow_idx){
103  lda_col[0] = 1;
104  }
105  if (i>nrow_idx){
106  lda_col[i-nrow_idx] = lda_col[i-nrow_idx-1]*rev_ord_lens[i-1];
107  // printf("lda_col[%d] = %ld len[%d] = %d\n",i-nrow_idx, lda_col[i-nrow_idx], i, rev_ord_lens[i]);
108  }
109  }
110 
111  int * rs = rows();
112  int * cs = cols();
113  char * vs = vals();
114 
115 #ifdef USE_OMP
116  #pragma omp parallel for
117 #endif
118  for (int64_t i=0; i<nz; i++){
119  ConstPairIterator pi(sr, tsr_data);
120  int64_t k = pi[i].k();
121  cs[i] = 1;
122  rs[i] = 1;
123  for (int j=0; j<order; j++){
124  int64_t kpart = (k%lens[j])/phase[j];
125  if (ordering[j] < nrow_idx){
126  rs[i] += kpart*lda_row[ordering[j]];
127  } else {
128  cs[i] += kpart*lda_col[ordering[j]-nrow_idx];
129  // printf("%d %ld %d %d %ld\n",j,kpart,ordering[j],nrow_idx,lda_col[ordering[j]-nrow_idx]);
130  }
131  k=k/lens[j];
132  }
133  //printf("k=%ld col = %d row = %d\n", pi[i].k(), cs[i], rs[i]);
134  pi[i].read_val(vs+v_sz*i);
135  //printf("wrote value at %p v_Sz = %d\n",vs+v_sz*i, v_sz);
136  //sr->print(pi[i].d());
137  //sr->print(vs+v_sz*i);
138  }
139  cdealloc(ordering);
140  cdealloc(rev_ord_lens);
141  cdealloc(lda_col);
142  cdealloc(lda_row);
143  TAU_FSTOP(convert_to_COO);
144  }
145 
146  void COO_Matrix::get_data(int64_t nz, int order, int const * lens, int const * rev_ordering, int nrow_idx, char * tsr_data, algstrct const * sr, int const * phase, int const * phase_rank){
147  TAU_FSTART(convert_to_COO);
148  ASSERT(((int64_t*)all_data)[0] == nz);
149  ASSERT(((int64_t*)all_data)[1] == sr->el_size);
150  int v_sz = sr->el_size;
151 
152  int * rev_ord_lens = (int*)alloc(sizeof(int)*order);
153  int * ordering = (int*)alloc(sizeof(int)*order);
154  int64_t * lda_col = (int64_t*)alloc(sizeof(int64_t)*(order-nrow_idx));
155  int64_t * lda_row = (int64_t*)alloc(sizeof(int64_t)*nrow_idx);
156 
157  for (int i=0; i<order; i++){
158  ordering[rev_ordering[i]]=i;
159  }
160  for (int i=0; i<order; i++){
161  // printf("[%d] %d -> %d\n", lens[i], i, ordering[i]);
162  rev_ord_lens[ordering[i]] = lens[i]/phase[i];
163  if (lens[i]%phase[i] > 0) rev_ord_lens[ordering[i]]++;
164  }
165 
166  for (int i=0; i<order; i++){
167  if (i==0 && i<nrow_idx){
168  lda_row[0] = 1;
169  }
170  if (i>0 && i<nrow_idx){
171  lda_row[i] = lda_row[i-1]*rev_ord_lens[i-1];
172  }
173  if (i==nrow_idx){
174  lda_col[0] = 1;
175  }
176  if (i>nrow_idx){
177  lda_col[i-nrow_idx] = lda_col[i-nrow_idx-1]*rev_ord_lens[i-1];
178  // printf("lda_col[%d] = %ld len[%d] = %d\n",i-nrow_idx, lda_col[i-nrow_idx], i, rev_ord_lens[i]);
179  }
180  }
181 
182  int * rs = rows();
183  int * cs = cols();
184  char * vs = vals();
185 
186 #ifdef USE_OMP
187  #pragma omp parallel for
188 #endif
189  for (int64_t i=0; i<nz; i++){
190  PairIterator pi(sr, tsr_data);
191  int64_t k = 0;
192  int64_t lda_k = 1;
193  for (int j=0; j<order; j++){
194  int64_t kpart;
195  if (ordering[j] < nrow_idx){
196  kpart = ((rs[i]-1)/lda_row[ordering[j]])%rev_ord_lens[ordering[j]];
197  } else {
198  kpart = ((cs[i]-1)/lda_col[ordering[j]-nrow_idx])%rev_ord_lens[ordering[j]];
199  }
200  // printf("%d %ld %d %d %ld\n",j,kpart,ordering[j],nrow_idx,lda_col[ordering[j]-nrow_idx]);
201  // if (j>0){ kpart *= lens[j-1]; }
202  k+=(kpart*phase[j]+phase_rank[j])*lda_k;
203 /* if (k>=tot_sz){
204  printf("%d kpart %ld k1 %d phase[j-1] %d phase_rank[j-1] %d lda_k %ld\n",j-1,((k-(kpart*phase[j]+phase_rank[j])*lda_k)/(lda_k/lens[j-1])-phase_rank[j-1])/phase[j-1],k-(kpart*phase[j]+phase_rank[j])*lda_k,phase[j-1],phase_rank[j-1],lda_k/lens[j-1]);
205  printf("%d kpart %ld k2 %ld phase[j] %d phase_rank[j] %d lda_k %ld\n",j,kpart,(kpart*phase[j]+phase_rank[j])*lda_k,phase[j],phase_rank[j],lda_k);
206  }*/
207  lda_k *= lens[j];
208  }
209  //ASSERT(k<tot_sz);
210 // if (k>=tot_sz) printf("k=%ld tot_sz=%ld c = %d r = %d\n",k,tot_sz,cs[i],rs[i]);
211 // printf("p[%d %d] [%d,%d]->%ld\n",phase_rank[0],phase_rank[1],rs[i],cs[i],k);
212  pi[i].write_key(k);
213  pi[i].write_val(vs+v_sz*i);
214  //printf("k=%ld col = %d row = %d\n", pi[i].k(), cs[i], rs[i]);
215  //sr->print(pi[i].d());
216 // memcpy(pi[i].d(), vs+v_sz*i, v_sz);
217  }
218  PairIterator pi2(sr, tsr_data);
219  TAU_FSTART(COO_to_kvpair_sort);
220  pi2.sort(nz);
221  TAU_FSTOP(COO_to_kvpair_sort);
222  cdealloc(ordering);
223  cdealloc(rev_ord_lens);
224  cdealloc(lda_col);
225  cdealloc(lda_row);
226  TAU_FSTOP(convert_to_COO);
227  }
228 
229 
230  void COO_Matrix::coomm(char const * A, algstrct const * sr_A, int m, int n, int k, char const * alpha, char const * B, algstrct const * sr_B, char const * beta, char * C, algstrct const * sr_C, bivar_function const * func){
231  COO_Matrix cA((char*)A);
232  int64_t nz = cA.nnz();
233  int const * rs = cA.rows();
234  int const * cs = cA.cols();
235  char const * vs = cA.vals();
236  if (func != NULL){
237  assert(sr_C->isequal(beta, sr_C->mulid()));
238  assert(alpha == NULL || sr_C->isequal(alpha, sr_C->mulid()));
239  func->ccoomm(m,n,k,vs,rs,cs,nz,B,C);
240  } else {
241  ASSERT(sr_B->el_size == sr_A->el_size);
242  ASSERT(sr_C->el_size == sr_A->el_size);
243  sr_A->coomm(m,n,k,alpha,vs,rs,cs,nz,B,beta,C,func);
244  }
245  }
246 }
int * rows() const
retrieves pointer to array row indices of each value
Definition: coo.cxx:61
void write_key(int64_t key)
sets key of head pair to key
Definition: algstrct.cxx:821
int * IA() const
retrieves prefix sum of number of nonzeros for each row (of size nrow()+1) out of all_data ...
Definition: csr.cxx:107
int64_t get_coo_size(int64_t nnz, int val_size)
Definition: coo.cxx:7
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
Definition: algstrct.cxx:340
int64_t size() const
retrieves buffer size out of all_data
Definition: coo.cxx:53
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
void sort(int64_t n)
sorts set of pairs using std::sort
Definition: algstrct.cxx:825
untyped internal class for triply-typed bivariate function
Definition: ctr_comm.h:16
virtual void coomm(int m, int n, int k, char const *alpha, char const *A, int const *rows_A, int const *cols_A, int64_t nnz_A, char const *B, char const *beta, char *C, bivar_function const *func) const
sparse version of gemm using coordinate format for A
Definition: algstrct.cxx:703
char * all_data
serialized buffer containing info and data
Definition: coo.h:17
COO_Matrix(int64_t nnz, algstrct const *sr)
constructor that allocates empty buffer
Definition: coo.cxx:12
int * cols() const
retrieves pointer to array of column indices for each value
Definition: coo.cxx:68
int64_t nnz() const
retrieves number of nonzeros out of all_data
Definition: coo.cxx:45
int * JA() const
retrieves column indices of each value in vals stored in sorted form by row
Definition: csr.cxx:119
virtual void csr_to_coo(int64_t nz, int nrow, char const *csr_vs, int const *csr_ja, int const *csr_ia, char *coo_vs, int *coo_rs, int *coo_cs) const
converts CSR sparse matrix layout to coordinate (COO) layout
Definition: algstrct.cxx:355
virtual void ccoomm(int m, int n, int k, char const *A, int const *rows_A, int const *cols_A, int64_t nnz_A, char const *B, char *C) const
Definition: ctr_comm.h:96
void get_data(int64_t nz, int order, int const *lens, int const *rev_ordering, int nrow_idx, char *tsr_data, algstrct const *sr, int const *phase, int const *phase_rank)
unfolds tensor data from COO format based on prespecification of row and column modes ...
Definition: coo.cxx:146
int64_t nnz() const
retrieves number of nonzeros out of all_data
Definition: csr.cxx:80
int64_t k() const
returns key of pair at head of ptr
Definition: algstrct.cxx:764
serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored...
Definition: coo.h:14
abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout ...
Definition: csr.h:22
static void coomm(char const *A, algstrct const *sr_A, int m, int n, int k, char const *alpha, char const *B, algstrct const *sr_B, char const *beta, char *C, algstrct const *sr_C, bivar_function const *func)
computes C = beta*C + func(alpha*A*B) where A is a COO_Matrix, while B and C are dense ...
Definition: coo.cxx:230
#define TAU_FSTOP(ARG)
Definition: util.h:281
#define TAU_FSTART(ARG)
Definition: util.h:280
void read_val(char *buf) const
sets value to the value pointed by the iterator
Definition: algstrct.cxx:776
void set_data(int64_t nz, int order, int const *lens, int const *ordering, int nrow_idx, char const *tsr_data, algstrct const *sr, int const *phase)
folds tensor data into COO format based on prespecification of row and column modes ...
Definition: coo.cxx:75
int nrow() const
retrieves number of rows out of all_data
Definition: csr.cxx:93
int val_size() const
retrieves matrix entry size out of all_data
Definition: coo.cxx:49
char * vals() const
retrieves array of values out of all_data
Definition: csr.cxx:101
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
void write_val(char const *buf)
sets value of head pair to what is in buf
Definition: algstrct.cxx:817
int val_size() const
retrieves matrix entry size out of all_data
Definition: csr.cxx:84
virtual void init_shell(int64_t n, char *arr) const
initialize n objects to zero
Definition: algstrct.h:26
virtual char const * mulid() const
identity element for multiplication i.e. 1
Definition: algstrct.cxx:93
char * vals() const
retrieves pointer to array of values out of all_data
Definition: coo.cxx:57