3 #include "../shared/util.h"     4 #include "../contraction/ctr_comm.h"     8     val_size = std::max(val_size,64*((val_size + 63)/64));
     9     return nnz*(val_size+
sizeof(int)*2)+2*
sizeof(int64_t);
    27     int const * csr_ja = csr.
JA();
    28     int const * csr_ia = csr.
IA();
    29     char const * csr_vs = csr.
vals();
    37     int * coo_rs = 
rows();
    38     int * coo_cs = 
cols();
    42     sr->
csr_to_coo(nnz, csr.
nrow(), csr_vs, csr_ja, csr_ia, vs, coo_rs, coo_cs);
    62     int64_t n = this->
nnz();
    65     return (
int*)(
all_data + n*v_sz+2*
sizeof(int64_t));
    69     int64_t n = this->
nnz();
    72     return (
int*)(
all_data + n*(v_sz+
sizeof(int))+2*
sizeof(int64_t));
    75   void COO_Matrix::set_data(int64_t nz, 
int order, 
int const * lens, 
int const * rev_ordering, 
int nrow_idx, 
char const * tsr_data, 
algstrct const * sr, 
int const * phase){
    81     int * rev_ord_lens = (
int*)
alloc(
sizeof(
int)*order);
    82     int * ordering = (
int*)
alloc(
sizeof(
int)*order);
    83     int64_t * lda_col = (int64_t*)
alloc(
sizeof(int64_t)*(order-nrow_idx));
    84     int64_t * lda_row = (int64_t*)
alloc(
sizeof(int64_t)*nrow_idx);
    86     for (
int i=0; i<order; i++){
    87       ordering[rev_ordering[i]]=i;
    89     for (
int i=0; i<order; i++){
    91       rev_ord_lens[ordering[i]] = lens[i]/phase[i];
    92       if (lens[i]%phase[i] > 0) rev_ord_lens[ordering[i]]++;
    95     for (
int i=0; i<order; i++){
    96       if (i==0 && i<nrow_idx){
    99       if (i>0 && i<nrow_idx){
   100         lda_row[i] = lda_row[i-1]*rev_ord_lens[i-1];
   106         lda_col[i-nrow_idx] = lda_col[i-nrow_idx-1]*rev_ord_lens[i-1];
   116     #pragma omp parallel for   118     for (int64_t i=0; i<nz; i++){
   120       int64_t k = pi[i].
k();
   123       for (
int j=0; j<order; j++){
   124         int64_t kpart = (k%lens[j])/phase[j];
   125         if (ordering[j] < nrow_idx){
   126           rs[i] += kpart*lda_row[ordering[j]];
   128           cs[i] += kpart*lda_col[ordering[j]-nrow_idx];
   146   void COO_Matrix::get_data(int64_t nz, 
int order, 
int const * lens, 
int const * rev_ordering, 
int nrow_idx, 
char * tsr_data, 
algstrct const * sr, 
int const * phase, 
int const * phase_rank){
   152     int * rev_ord_lens = (
int*)
alloc(
sizeof(
int)*order);
   153     int * ordering = (
int*)
alloc(
sizeof(
int)*order);
   154     int64_t * lda_col = (int64_t*)
alloc(
sizeof(int64_t)*(order-nrow_idx));
   155     int64_t * lda_row = (int64_t*)
alloc(
sizeof(int64_t)*nrow_idx);
   157     for (
int i=0; i<order; i++){
   158       ordering[rev_ordering[i]]=i;
   160     for (
int i=0; i<order; i++){
   162       rev_ord_lens[ordering[i]] = lens[i]/phase[i];
   163       if (lens[i]%phase[i] > 0) rev_ord_lens[ordering[i]]++;
   166     for (
int i=0; i<order; i++){
   167       if (i==0 && i<nrow_idx){
   170       if (i>0 && i<nrow_idx){
   171         lda_row[i] = lda_row[i-1]*rev_ord_lens[i-1];
   177         lda_col[i-nrow_idx] = lda_col[i-nrow_idx-1]*rev_ord_lens[i-1];
   187     #pragma omp parallel for   189     for (int64_t i=0; i<nz; i++){
   193       for (
int j=0; j<order; j++){
   195         if (ordering[j] < nrow_idx){
   196           kpart = ((rs[i]-1)/lda_row[ordering[j]])%rev_ord_lens[ordering[j]];
   198           kpart = ((cs[i]-1)/lda_col[ordering[j]-nrow_idx])%rev_ord_lens[ordering[j]];
   202         k+=(kpart*phase[j]+phase_rank[j])*lda_k;
   230   void COO_Matrix::coomm(
char const * A, 
algstrct const * sr_A, 
int m, 
int n, 
int k, 
char const * alpha, 
char const * B, 
algstrct const * sr_B, 
char const * beta, 
char * C, 
algstrct const * sr_C, 
bivar_function const * func){
   232     int64_t nz = cA.
nnz(); 
   233     int const * rs = cA.
rows();
   234     int const * cs = cA.
cols();
   235     char const * vs = cA.
vals();
   238       assert(alpha == NULL || sr_C->
isequal(alpha, sr_C->
mulid()));
   239       func->
ccoomm(m,n,k,vs,rs,cs,nz,B,C);
   243       sr_A->
coomm(m,n,k,alpha,vs,rs,cs,nz,B,beta,C,func);
 int * rows() const 
retrieves pointer to array row indices of each value 
void write_key(int64_t key)
sets key of head pair to key 
int * IA() const 
retrieves prefix sum of number of nonzeros for each row (of size nrow()+1) out of all_data ...
int64_t get_coo_size(int64_t nnz, int val_size)
virtual bool isequal(char const *a, char const *b) const 
returns true if algstrct elements a and b are equal 
int64_t size() const 
retrieves buffer size out of all_data 
void * alloc(int64_t len)
alloc abstraction 
void sort(int64_t n)
sorts set of pairs using std::sort 
untyped internal class for triply-typed bivariate function 
virtual void coomm(int m, int n, int k, char const *alpha, char const *A, int const *rows_A, int const *cols_A, int64_t nnz_A, char const *B, char const *beta, char *C, bivar_function const *func) const 
sparse version of gemm using coordinate format for A 
char * all_data
serialized buffer containing info and data 
COO_Matrix(int64_t nnz, algstrct const *sr)
constructor that allocates empty buffer 
int * cols() const 
retrieves pointer to array of column indices for each value 
int64_t nnz() const 
retrieves number of nonzeros out of all_data 
int * JA() const 
retrieves column indices of each value in vals stored in sorted form by row 
virtual void csr_to_coo(int64_t nz, int nrow, char const *csr_vs, int const *csr_ja, int const *csr_ia, char *coo_vs, int *coo_rs, int *coo_cs) const 
converts CSR sparse matrix layout to coordinate (COO) layout 
virtual void ccoomm(int m, int n, int k, char const *A, int const *rows_A, int const *cols_A, int64_t nnz_A, char const *B, char *C) const 
void get_data(int64_t nz, int order, int const *lens, int const *rev_ordering, int nrow_idx, char *tsr_data, algstrct const *sr, int const *phase, int const *phase_rank)
unfolds tensor data from COO format based on prespecification of row and column modes ...
int64_t nnz() const 
retrieves number of nonzeros out of all_data 
int64_t k() const 
returns key of pair at head of ptr 
serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored...
abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout ...
static void coomm(char const *A, algstrct const *sr_A, int m, int n, int k, char const *alpha, char const *B, algstrct const *sr_B, char const *beta, char *C, algstrct const *sr_C, bivar_function const *func)
computes C = beta*C + func(alpha*A*B) where A is a COO_Matrix, while B and C are dense ...
void read_val(char *buf) const 
sets value to the value pointed by the iterator 
void set_data(int64_t nz, int order, int const *lens, int const *ordering, int nrow_idx, char const *tsr_data, algstrct const *sr, int const *phase)
folds tensor data into COO format based on prespecification of row and column modes ...
int nrow() const 
retrieves number of rows out of all_data 
int val_size() const 
retrieves matrix entry size out of all_data 
char * vals() const 
retrieves array of values out of all_data 
int el_size
size of each element of algstrct in bytes 
int cdealloc(void *ptr)
free abstraction 
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void write_val(char const *buf)
sets value of head pair to what is in buf 
int val_size() const 
retrieves matrix entry size out of all_data 
virtual void init_shell(int64_t n, char *arr) const 
initialize n objects to zero 
virtual char const * mulid() const 
identity element for multiplication i.e. 1 
char * vals() const 
retrieves pointer to array of values out of all_data