2 #include "../contraction/ctr_comm.h" 3 #include "../shared/util.h" 9 int offset = 4*
sizeof(int64_t);
11 offset += nnz*val_size;
13 offset += (nrow_+1)*
sizeof(
int);
15 offset +=
sizeof(int)*nnz;
26 ((int64_t*)
all_data)[2] = (int64_t)nrow_;
38 int64_t nz = coom.
nnz();
40 int const * coo_rs = coom.
rows();
41 int const * coo_cs = coom.
cols();
42 char const * vs = coom.
vals();
57 ((int64_t*)
all_data)[2] = (int64_t)nrow_;
60 char * csr_vs =
vals();
70 sr->
coo_to_csr(nz, nrow_, csr_vs, csr_ja, csr_ia, vs, coo_rs, coo_cs);
102 int offset = 4*
sizeof(int64_t);
108 int64_t n = this->
nnz();
111 int offset = 4*
sizeof(int64_t);
120 int64_t n = this->
nnz();
121 int64_t nr = this->
nrow();
124 int offset = 4*
sizeof(int64_t);
128 offset += (nr+1)*
sizeof(
int);
134 void CSR_Matrix::csrmm(
char const * A,
algstrct const * sr_A,
int m,
int n,
int k,
char const * alpha,
char const * B,
algstrct const * sr_B,
char const * beta,
char * C,
algstrct const * sr_C,
bivar_function const * func,
bool do_offload){
137 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
141 int64_t nz = cA.
nnz();
142 int const * ja = cA.
JA();
143 int const * ia = cA.
IA();
144 char const * vs = cA.
vals();
147 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
148 func->
ccsrmm(m,n,k,vs,ja,ia,nz,B,C,sr_C);
153 sr_C->
csrmm(m,n,k,alpha,vs,ja,ia,nz,B,beta,C,func);
158 void CSR_Matrix::csrmultd(
char const * A,
algstrct const * sr_A,
int m,
int n,
int k,
char const * alpha,
char const * B,
algstrct const * sr_B,
char const * beta,
char * C,
algstrct const * sr_C,
bivar_function const * func,
bool do_offload){
162 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
165 int64_t nzA = cA.
nnz();
166 int const * jA = cA.
JA();
167 int const * iA = cA.
IA();
168 char const * vsA = cA.
vals();
170 int64_t nzB = cB.
nnz();
171 int const * jB = cB.
JA();
172 int const * iB = cB.
IA();
173 char const * vsB = cB.
vals();
176 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
177 func->
ccsrmultd(m,n,k,vsA,jA,iA,nzA,vsB,jB,iB,nzB,C,sr_C);
182 sr_C->
csrmultd(m,n,k,alpha,vsA,jA,iA,nzA,vsB,jB,iB,nzB,beta,C);
188 void CSR_Matrix::csrmultcsr(
char const * A,
algstrct const * sr_A,
int m,
int n,
int k,
char const * alpha,
char const * B,
algstrct const * sr_B,
char const * beta,
char *& C,
algstrct const * sr_C,
bivar_function const * func,
bool do_offload){
192 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
195 int64_t nzA = cA.
nnz();
196 int const * jA = cA.
JA();
197 int const * iA = cA.
IA();
198 char const * vsA = cA.
vals();
200 int64_t nzB = cB.
nnz();
201 int const * jB = cB.
JA();
202 int const * iB = cB.
IA();
203 char const * vsB = cB.
vals();
206 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
207 func->
ccsrmultcsr(m,n,k,vsA,jA,iA,nzA,vsB,jB,iB,nzB,C,sr_C);
212 sr_C->
csrmultcsr(m,n,k,alpha,vsA,jA,iA,nzA,vsB,jB,iB,nzB,beta,C);
220 int part_nnz[s], part_nrows[s];
223 char * org_vals =
vals();
226 for (
int i=0; i<s; i++){
230 for (
int i=0; i<m; i++){
232 part_nnz[i%s]+=org_ia[i+1]-org_ia[i];
235 for (
int i=0; i<s; i++){
236 tot_sz +=
get_csr_size(part_nnz[i], part_nrows[i], v_sz);
239 char * part_data = *parts_buffer;
240 for (
int i=0; i<s; i++){
241 ((int64_t*)part_data)[0] = part_nnz[i];
242 ((int64_t*)part_data)[1] = v_sz;
243 ((int64_t*)part_data)[2] = part_nrows[i];
244 ((int64_t*)part_data)[3] =
ncol();
246 char * pvals = parts[i]->
vals();
247 int * pja = parts[i]->
JA();
248 int * pia = parts[i]->
IA();
250 for (
int j=i, k=0; j<m; j+=s, k++){
251 memcpy(pvals+(pia[k]-1)*v_sz, org_vals+(org_ia[j]-1)*v_sz, (org_ia[j+1]-org_ia[j])*v_sz);
252 memcpy(pja+(pia[k]-1), org_ja+(org_ia[j]-1), (org_ia[j+1]-org_ia[j])*
sizeof(
int));
253 pia[k+1] = pia[k]+org_ia[j+1]-org_ia[j];
255 part_data +=
get_csr_size(part_nnz[i], part_nrows[i], v_sz);
261 int64_t tot_nnz=0, tot_nrow=0;
262 for (
int i=0; i<s; i++){
264 tot_nnz += csrs[i]->
nnz();
265 tot_nrow += csrs[i]->
nrow();
268 int64_t tot_ncol = csrs[0]->
ncol();
271 ((int64_t*)all_data)[1] = v_sz;
272 ((int64_t*)all_data)[2] = tot_nrow;
273 ((int64_t*)all_data)[3] = tot_ncol;
275 char * csr_vs =
vals();
281 for (
int i=0; i<tot_nrow; i++){
283 int const * pja = csrs[ipart]->
JA();
284 int const * pia = csrs[ipart]->
IA();
285 int i_nnz = pia[i/s+1]-pia[i/s];
286 memcpy(csr_vs+(csr_ia[i]-1)*v_sz,
287 csrs[ipart]->
vals()+(pia[i/s]-1)*v_sz,
289 memcpy(csr_ja+(csr_ia[i]-1),
292 csr_ia[i+1] = csr_ia[i]+i_nnz;
294 for (
int i=0; i<s; i++){
300 char * csr_vs =
vals();
306 printf(
"CSR Matrix has %ld nonzeros %d rows %d cols\n", nz,
nrow(),
ncol());
307 for (int64_t i=0; i<nz; i++){
308 while (i>=csr_ia[irow+1]-1) irow++;
309 printf(
"[%d,%d] ",irow,csr_ja[i]);
310 sr->
print(csr_vs+v_sz*i);
323 for (
int j=0; j<IA[i+1]-IA[i]; j++){
324 int row_B = JA[IA[i]+j-1]-1;
325 for (
int k=0; k<IB[row_B+1]-IB[row_B]; k++){
326 int idx_B = IB[row_B]+k-1;
327 has_col[JB[idx_B]-1] = 1;
339 char const * vA = A.
vals();
340 int const *
JA = A.
JA();
341 int const *
IA = A.
IA();
343 char const * vB = B.
vals();
344 int const * JB = B.
JA();
345 int const * IB = B.
IA();
348 int * IC = (
int*)
alloc(
sizeof(
int)*(nrow+1));
349 int * has_col = (
int*)
alloc(
sizeof(
int)*
ncol);
351 for (
int i=0; i<
nrow; i++){
352 memset(has_col, 0,
sizeof(
int)*ncol);
354 for (
int j=0; j<IA[i+1]-IA[i]; j++){
355 has_col[JA[IA[i]+j-1]-1] = 1;
357 for (
int j=0; j<IB[i+1]-IB[i]; j++){
358 has_col[JB[IB[i]+j-1]-1] = 1;
360 for (
int j=0; j<
ncol; j++){
361 IC[i+1] += has_col[j];
365 char * vC = C.
vals();
367 memcpy(C.
IA(), IC,
sizeof(int)*(nrow+1));
370 int64_t * rev_col = (int64_t*)
alloc(
sizeof(int64_t)*
ncol);
371 for (
int i=0; i<
nrow; i++){
372 memset(has_col, 0,
sizeof(
int)*ncol);
373 for (
int j=0; j<IA[i+1]-IA[i]; j++){
374 has_col[JA[IA[i]+j-1]-1] = 1;
376 for (
int j=0; j<IB[i+1]-IB[i]; j++){
377 has_col[JB[IB[i]+j-1]-1] = 1;
380 for (
int j=0; j<
ncol; j++){
382 JC[IC[i]+vs-1] = j+1;
384 rev_col[j] = (IC[i]+vs-1)*el_size;
388 memset(has_col, 0,
sizeof(
int)*ncol);
389 for (
int j=0; j<IA[i+1]-IA[i]; j++){
390 int idx_A = IA[i]+j-1;
391 memcpy(vC+rev_col[JA[idx_A]-1],vA+idx_A*el_size,el_size);
392 has_col[JA[idx_A]-1] = 1;
394 for (
int j=0; j<IB[i+1]-IB[i]; j++){
395 int idx_B = IB[i]+j-1;
396 if (has_col[JB[idx_B]-1])
397 adder->
accum(vB+idx_B*el_size,vC+rev_col[JB[idx_B]-1]);
399 memcpy(vC+rev_col[JB[idx_B]-1],vB+idx_B*el_size,el_size);
virtual void csrmultd(int m, int n, int k, char const *alpha, char const *A, int const *JA, int const *IA, int64_t nnz_A, char const *B, int const *JB, int const *IB, int64_t nnz_B, char const *beta, char *C) const
sparse version of gemm using CSR format for A and B
int * rows() const
retrieves pointer to array row indices of each value
int * IA() const
retrieves prefix sum of number of nonzeros for each row (of size nrow()+1) out of all_data ...
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
virtual void ccsrmultcsr(int m, int n, int k, char const *A, int const *JA, int const *IA, int nnz_A, char const *B, int const *JB, int const *IB, int nnz_B, char *&C_CSR, algstrct const *sr_C) const
static char * csr_add(char *cA, char *cB, accumulatable const *adder)
void * alloc(int64_t len)
alloc abstraction
virtual void accum(char const *a, char *b) const
b+=a
untyped internal class for triply-typed bivariate function
static void csrmultcsr(char const *A, algstrct const *sr_A, int m, int n, int k, char const *alpha, char const *B, algstrct const *sr_B, char const *beta, char *&C, algstrct const *sr_C, bivar_function const *func, bool do_offload)
computes C = beta*C + func(alpha*A*B) where A, B, and C are CSR_Matrices, while C is dense ...
static void compute_has_col(int const *JA, int const *IA, int const *JB, int const *IB, int i, int *has_col)
int ncol() const
retrieves number of columns out of all_data
void print(algstrct const *sr)
outputs matrix data to stdout, intended for debugging
virtual void coffload_csrmm(int m, int n, int k, char const *all_data, char const *B, char *C) const
int * cols() const
retrieves pointer to array of column indices for each value
int64_t nnz() const
retrieves number of nonzeros out of all_data
int * JA() const
retrieves column indices of each value in vals stored in sorted form by row
int64_t nnz() const
retrieves number of nonzeros out of all_data
serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored...
abstract class that knows how to add
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout ...
virtual void csrmm(int m, int n, int k, char const *alpha, char const *A, int const *JA, int const *IA, int64_t nnz_A, char const *B, char const *beta, char *C, bivar_function const *func) const
sparse version of gemm using CSR format for A
virtual void ccsrmultd(int m, int n, int k, char const *A, int const *JA, int const *IA, int nnz_A, char const *B, int const *JB, int const *IB, int nnz_B, char *C, algstrct const *sr_C) const
virtual void print(char const *a, FILE *fp=stdout) const
prints the value
virtual void ccsrmm(int m, int n, int k, char const *A, int const *JA, int const *IA, int64_t nnz_A, char const *B, char *C, algstrct const *sr_C) const
int nrow() const
retrieves number of rows out of all_data
int val_size() const
retrieves matrix entry size out of all_data
char * all_data
serialized buffer containing all info, index, and values related to matrix
char * vals() const
retrieves array of values out of all_data
int el_size
size of each element of algstrct in bytes
static void csrmm(char const *A, algstrct const *sr_A, int m, int n, int k, char const *alpha, char const *B, algstrct const *sr_B, char const *beta, char *C, algstrct const *sr_C, bivar_function const *func, bool do_offload)
computes C = beta*C + func(alpha*A*B) where A is a CSR_Matrix, while B and C are dense ...
int cdealloc(void *ptr)
free abstraction
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
int64_t get_csr_size(int64_t nnz, int nrow_, int val_size)
computes the size of a serialized CSR matrix
void partition(int s, char **parts_buffer, CSR_Matrix **parts)
splits CSR matrix into s submatrices (returned) corresponding to subsets of rows, all parts allocated...
virtual void csrmultcsr(int m, int n, int k, char const *alpha, char const *A, int const *JA, int const *IA, int64_t nnz_A, char const *B, int const *JB, int const *IB, int64_t nnz_B, char const *beta, char *&C_CSR) const
int val_size() const
retrieves matrix entry size out of all_data
virtual void coo_to_csr(int64_t nz, int nrow, char *csr_vs, int *csr_cs, int *csr_rs, char const *coo_vs, int const *coo_rs, int const *coo_cs) const
converts coordinate sparse matrix layout to CSR layout
virtual void init_shell(int64_t n, char *arr) const
initialize n objects to zero
int64_t size() const
retrieves buffer size out of all_data
static void csrmultd(char const *A, algstrct const *sr_A, int m, int n, int k, char const *alpha, char const *B, algstrct const *sr_B, char const *beta, char *C, algstrct const *sr_C, bivar_function const *func, bool do_offload)
computes C = beta*C + func(alpha*A*B) where A and B are CSR_Matrices, while C is dense ...
virtual char const * mulid() const
identity element for multiplication i.e. 1
char * vals() const
retrieves pointer to array of values out of all_data