3 #include "../shared/util.h" 4 #include "../contraction/ctr_comm.h" 8 val_size = std::max(val_size,64*((val_size + 63)/64));
9 return nnz*(val_size+
sizeof(int)*2)+2*
sizeof(int64_t);
27 int const * csr_ja = csr.
JA();
28 int const * csr_ia = csr.
IA();
29 char const * csr_vs = csr.
vals();
37 int * coo_rs =
rows();
38 int * coo_cs =
cols();
42 sr->
csr_to_coo(nnz, csr.
nrow(), csr_vs, csr_ja, csr_ia, vs, coo_rs, coo_cs);
62 int64_t n = this->
nnz();
65 return (
int*)(
all_data + n*v_sz+2*
sizeof(int64_t));
69 int64_t n = this->
nnz();
72 return (
int*)(
all_data + n*(v_sz+
sizeof(int))+2*
sizeof(int64_t));
75 void COO_Matrix::set_data(int64_t nz,
int order,
int const * lens,
int const * rev_ordering,
int nrow_idx,
char const * tsr_data,
algstrct const * sr,
int const * phase){
81 int * rev_ord_lens = (
int*)
alloc(
sizeof(
int)*order);
82 int * ordering = (
int*)
alloc(
sizeof(
int)*order);
83 int64_t * lda_col = (int64_t*)
alloc(
sizeof(int64_t)*(order-nrow_idx));
84 int64_t * lda_row = (int64_t*)
alloc(
sizeof(int64_t)*nrow_idx);
86 for (
int i=0; i<order; i++){
87 ordering[rev_ordering[i]]=i;
89 for (
int i=0; i<order; i++){
91 rev_ord_lens[ordering[i]] = lens[i]/phase[i];
92 if (lens[i]%phase[i] > 0) rev_ord_lens[ordering[i]]++;
95 for (
int i=0; i<order; i++){
96 if (i==0 && i<nrow_idx){
99 if (i>0 && i<nrow_idx){
100 lda_row[i] = lda_row[i-1]*rev_ord_lens[i-1];
106 lda_col[i-nrow_idx] = lda_col[i-nrow_idx-1]*rev_ord_lens[i-1];
116 #pragma omp parallel for 118 for (int64_t i=0; i<nz; i++){
120 int64_t k = pi[i].
k();
123 for (
int j=0; j<order; j++){
124 int64_t kpart = (k%lens[j])/phase[j];
125 if (ordering[j] < nrow_idx){
126 rs[i] += kpart*lda_row[ordering[j]];
128 cs[i] += kpart*lda_col[ordering[j]-nrow_idx];
146 void COO_Matrix::get_data(int64_t nz,
int order,
int const * lens,
int const * rev_ordering,
int nrow_idx,
char * tsr_data,
algstrct const * sr,
int const * phase,
int const * phase_rank){
152 int * rev_ord_lens = (
int*)
alloc(
sizeof(
int)*order);
153 int * ordering = (
int*)
alloc(
sizeof(
int)*order);
154 int64_t * lda_col = (int64_t*)
alloc(
sizeof(int64_t)*(order-nrow_idx));
155 int64_t * lda_row = (int64_t*)
alloc(
sizeof(int64_t)*nrow_idx);
157 for (
int i=0; i<order; i++){
158 ordering[rev_ordering[i]]=i;
160 for (
int i=0; i<order; i++){
162 rev_ord_lens[ordering[i]] = lens[i]/phase[i];
163 if (lens[i]%phase[i] > 0) rev_ord_lens[ordering[i]]++;
166 for (
int i=0; i<order; i++){
167 if (i==0 && i<nrow_idx){
170 if (i>0 && i<nrow_idx){
171 lda_row[i] = lda_row[i-1]*rev_ord_lens[i-1];
177 lda_col[i-nrow_idx] = lda_col[i-nrow_idx-1]*rev_ord_lens[i-1];
187 #pragma omp parallel for 189 for (int64_t i=0; i<nz; i++){
193 for (
int j=0; j<order; j++){
195 if (ordering[j] < nrow_idx){
196 kpart = ((rs[i]-1)/lda_row[ordering[j]])%rev_ord_lens[ordering[j]];
198 kpart = ((cs[i]-1)/lda_col[ordering[j]-nrow_idx])%rev_ord_lens[ordering[j]];
202 k+=(kpart*phase[j]+phase_rank[j])*lda_k;
230 void COO_Matrix::coomm(
char const * A,
algstrct const * sr_A,
int m,
int n,
int k,
char const * alpha,
char const * B,
algstrct const * sr_B,
char const * beta,
char * C,
algstrct const * sr_C,
bivar_function const * func){
232 int64_t nz = cA.
nnz();
233 int const * rs = cA.
rows();
234 int const * cs = cA.
cols();
235 char const * vs = cA.
vals();
238 assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
239 func->
ccoomm(m,n,k,vs,rs,cs,nz,B,C);
243 sr_A->
coomm(m,n,k,alpha,vs,rs,cs,nz,B,beta,C,func);
int * rows() const
retrieves pointer to array row indices of each value
void write_key(int64_t key)
sets key of head pair to key
int * IA() const
retrieves prefix sum of number of nonzeros for each row (of size nrow()+1) out of all_data ...
int64_t get_coo_size(int64_t nnz, int val_size)
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
int64_t size() const
retrieves buffer size out of all_data
void * alloc(int64_t len)
alloc abstraction
void sort(int64_t n)
sorts set of pairs using std::sort
untyped internal class for triply-typed bivariate function
virtual void coomm(int m, int n, int k, char const *alpha, char const *A, int const *rows_A, int const *cols_A, int64_t nnz_A, char const *B, char const *beta, char *C, bivar_function const *func) const
sparse version of gemm using coordinate format for A
char * all_data
serialized buffer containing info and data
COO_Matrix(int64_t nnz, algstrct const *sr)
constructor that allocates empty buffer
int * cols() const
retrieves pointer to array of column indices for each value
int64_t nnz() const
retrieves number of nonzeros out of all_data
int * JA() const
retrieves column indices of each value in vals stored in sorted form by row
virtual void csr_to_coo(int64_t nz, int nrow, char const *csr_vs, int const *csr_ja, int const *csr_ia, char *coo_vs, int *coo_rs, int *coo_cs) const
converts CSR sparse matrix layout to coordinate (COO) layout
virtual void ccoomm(int m, int n, int k, char const *A, int const *rows_A, int const *cols_A, int64_t nnz_A, char const *B, char *C) const
void get_data(int64_t nz, int order, int const *lens, int const *rev_ordering, int nrow_idx, char *tsr_data, algstrct const *sr, int const *phase, int const *phase_rank)
unfolds tensor data from COO format based on prespecification of row and column modes ...
int64_t nnz() const
retrieves number of nonzeros out of all_data
int64_t k() const
returns key of pair at head of ptr
serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored...
abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout ...
static void coomm(char const *A, algstrct const *sr_A, int m, int n, int k, char const *alpha, char const *B, algstrct const *sr_B, char const *beta, char *C, algstrct const *sr_C, bivar_function const *func)
computes C = beta*C + func(alpha*A*B) where A is a COO_Matrix, while B and C are dense ...
void read_val(char *buf) const
sets value to the value pointed by the iterator
void set_data(int64_t nz, int order, int const *lens, int const *ordering, int nrow_idx, char const *tsr_data, algstrct const *sr, int const *phase)
folds tensor data into COO format based on prespecification of row and column modes ...
int nrow() const
retrieves number of rows out of all_data
int val_size() const
retrieves matrix entry size out of all_data
char * vals() const
retrieves array of values out of all_data
int el_size
size of each element of algstrct in bytes
int cdealloc(void *ptr)
free abstraction
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void write_val(char const *buf)
sets value of head pair to what is in buf
int val_size() const
retrieves matrix entry size out of all_data
virtual void init_shell(int64_t n, char *arr) const
initialize n objects to zero
virtual char const * mulid() const
identity element for multiplication i.e. 1
char * vals() const
retrieves pointer to array of values out of all_data