ctf/spr__seq__sum_8cxx_source.html

 #include "spr_seq_sum.h"
 #include "../shared/iter_tsr.h"
 #include "../shared/util.h"

 namespace CTF_int{
   template<int idim>
   void spA_dnB_seq_sum_loop(char const *            alpha,
                             ConstPairIterator &     A,
                             int64_t &               size_A,
                             algstrct const *        sr_A,
                             char const *            beta,
                             char *&                 B,
                             algstrct const *        sr_B,
                             int                     order_B,
                             int64_t                 idx_B,
                             int const *             edge_len_B,
                             int64_t const *         lda_B,
                             int const *             sym_B,
                             univar_function const * func){
     int imax = edge_len_B[idim];
     if (sym_B[idim] != NS) imax = (idx_B/lda_B[idim+1])%edge_len_B[idim+1];

     for (int i=0; i<imax; i++){
       //int nidx_B[order_B];
       //memcpy(nidx_B, idx_B, order_B*sizeof(int));
       spA_dnB_seq_sum_loop<idim-1>(alpha,A,size_A,sr_A,beta,B,sr_B,order_B,
                                    idx_B+i*lda_B[idim],
                                    edge_len_B, lda_B, sym_B, func);
     }
   }

   template<>
   void spA_dnB_seq_sum_loop<0>(char const *            alpha,
                                ConstPairIterator &     A,
                                int64_t &               size_A,
                                algstrct const *        sr_A,
                                char const *            beta,
                                char *&                 B,
                                algstrct const *        sr_B,
                                int                     order_B,
                                int64_t                 idx_B,
                                int const *             edge_len_B,
                                int64_t const *         lda_B,
                                int const *             sym_B,
                                univar_function const * func){

     int imax = edge_len_B[0];
     if (sym_B[0] != NS) imax = (idx_B/lda_B[0+1])%edge_len_B[0+1];

     for (int i=0; i<imax; i++){
       while (size_A > 0 && idx_B == A.k()){
         if (func == NULL){
           if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
             sr_B->add(A.d(), B, B);
           } else {
             char tmp[sr_A->el_size];
             sr_A->mul(A.d(), alpha, tmp);
             sr_B->add(tmp, B, B);
           }
         } else {
           if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
             func->acc_f(A.d(), B, sr_B);
           } else {
             char tmp[sr_A->el_size];
             sr_A->mul(A.d(), alpha, tmp);
             func->acc_f(tmp, B, sr_B);
           }
         }
         A = A[1];
         size_A--;
       }
       B += sr_B->el_size;
       idx_B++;
     }
   }

   template
   void spA_dnB_seq_sum_loop< MAX_ORD >
                                  (char const *            alpha,
                                   ConstPairIterator &     A,
                                   int64_t &               size_A,
                                   algstrct const *        sr_A,
                                   char const *            beta,
                                   char *&                 B,
                                   algstrct const *        sr_B,
                                   int                     order_B,
                                   int64_t                 idx_B,
                                   int const *             edge_len_B,
                                   int64_t const *         lda_B,
                                   int const *             sym_B,
                                   univar_function const * func);


   void spA_dnB_seq_sum(char const *            alpha,
                        char const *            A,
                        int64_t                 size_A,
                        algstrct const *        sr_A,
                        char const *            beta,
                        char *                  B,
                        algstrct const *        sr_B,
                        int                     order_B,
                        int const *             edge_len_B,
                        int const *             sym_B,
                        univar_function const * func){
     TAU_FSTART(spA_dnB_seq_sum);
     if (order_B == 0){
       if (!sr_B->isequal(beta, sr_B->mulid())){
         sr_B->mul(beta, B, B);
       }
       ConstPairIterator pi(sr_A, A);
       for (int64_t i=0; i<size_A; i++){
         char tmp_buf[sr_A->el_size];
         char const * tmp_ptr;
         if (alpha != NULL){
           sr_A->mul(alpha, pi[i].d(), tmp_buf);
           tmp_ptr = tmp_buf;
         } else tmp_ptr = pi[i].d();
         if (func != NULL){
           func->acc_f(tmp_ptr, B, sr_B);
         } else {
           sr_B->add(tmp_ptr, B, B);
         }
       }
     } else {
       int64_t sz_B = sy_packed_size(order_B, edge_len_B, sym_B);
       if (!sr_B->isequal(beta, sr_B->mulid())){
         if (sr_B->isequal(beta, sr_B->addid()) || sr_B->isequal(beta, NULL))
           sr_B->set(B, sr_B->addid(), sz_B);
         else
           sr_B->scal(sz_B, beta, B, 1);
       }

       int64_t lda_B[order_B];
       for (int i=0; i<order_B; i++){
         if (i==0) lda_B[i] = 1;
         else      lda_B[i] = lda_B[i-1]*edge_len_B[i-1];
       }

       ASSERT(order_B<=MAX_ORD);

       ConstPairIterator pA(sr_A, A);
       int64_t idx = 0;
       SWITCH_ORD_CALL(spA_dnB_seq_sum_loop, order_B-1, alpha, pA, size_A, sr_A, beta, B, sr_B, order_B, idx, edge_len_B, lda_B, sym_B, func);
     }
     TAU_FSTOP(spA_dnB_seq_sum);
   }

   void dnA_spB_seq_sum(char const *            alpha,
                        char const *            A,
                        algstrct const *        sr_A,
                        int                     order_A,
                        int const *             edge_len_A,
                        int const *             sym_A,
                        char const *            beta,
                        char const *            B,
                        int64_t                 size_B,
                        char *&                 new_B,
                        int64_t &               new_size_B,
                        algstrct const *        sr_B,
                        univar_function const * func){
     assert(0);
   }

   void spspsum(algstrct const *        sr_A,
                int64_t                 nA,
                ConstPairIterator       prs_A,
                char const *            beta,
                algstrct const *        sr_B,
                int64_t                 nB,
                ConstPairIterator       prs_B,
                char const *            alpha,
                int64_t &               nnew,
                char *&                 pprs_new,
                univar_function const * func,
                int64_t                 map_pfx){

     TAU_FSTART(spA_spB_seq_sum);
     // determine how many unique keys there are in prs_tsr and prs_Write
     nnew = nB;
     bool is_acc = (func != NULL && func->is_accumulator());
     TAU_FSTART(spA_spB_seq_sum_pre);
     for (int64_t t=0,ww=0; ww<nA*map_pfx; ww++){
       while (ww<nA*map_pfx){
         int64_t w = ww/map_pfx;
         int64_t mw = ww%map_pfx;
         if (t<nB && prs_B[t].k() < prs_A[w].k()*map_pfx+mw)
           t++;
         else if (t<nB && prs_B[t].k() == prs_A[w].k()*map_pfx+mw){
           t++;
           ww++;
         } else {
           //ASSERT(map_pfx == 1);
           if (!is_acc && (map_pfx != 1 || ww==0 || prs_A[ww-1].k() != prs_A[ww].k()))
             nnew++;
           ww++; w=ww;
         }
       }
     }
     TAU_FSTOP(spA_spB_seq_sum_pre);
 //    printf("nB = %ld nA = %ld nnew = %ld\n",nB,nA,nnew);
     pprs_new = sr_B->pair_alloc(nnew);
     PairIterator prs_new(sr_B, pprs_new);
     // each for loop computes one new value of prs_new
     //    (multiple writes may contribute to it),
     //    t, w, and n are incremented within
     // only incrementing r allows multiple writes of the same val
     int64_t n=0;
     for (int64_t t=0,ww=0; n<nnew; n++){
       /*if (n>0){
         printf("n=%ld\n",n-1);
         sr_A->print(prs_new[n-1].d());
       }*/
       int64_t w = ww/map_pfx;
       int64_t mw = ww%map_pfx;
       bool skip = 0;
       if (t<nB && (w==nA || prs_B[t].k() < prs_A[w].k()*map_pfx+mw)){
         sr_B->copy_pair(prs_new[n].ptr, prs_B[t].ptr);
         if (beta != NULL)
           sr_B->mul(prs_B[t].d(), beta, prs_new[n].d());
         t++;
       } else {
         /*if (t<nB)
           printf("%ld %ld\n",prs_B[t].k(), prs_A[w].k()*map_pfx+mw);*/
         if (t>=nB || prs_B[t].k() > prs_A[w].k()*map_pfx+mw){
           if (func == NULL){
             if (map_pfx == 1){
               sr_A->copy_pair(prs_new[n].ptr, prs_A[w].ptr);
             } else {
               ((int64_t*)prs_new[n].ptr)[0] = prs_A[w].k()*map_pfx+mw;
               prs_new[n].write_val(prs_A[w].d());
             }
             if (alpha != NULL)
               sr_A->mul(prs_new[n].d(), alpha, prs_new[n].d());
           } else {
             //((int64_t*)prs_new[n].ptr)[0] = prs_A[w].k();
             if (!is_acc){
               ((int64_t*)prs_new[n].ptr)[0] = prs_A[w].k()*map_pfx+mw;
               if (alpha != NULL){
                 char a[sr_A->el_size];
                 sr_A->mul(prs_A[w].d(), alpha, a);
                 //if (sr_B->addid() != NULL){
                  // prs_new[n].write_val(sr_B->addid());
                   func->apply_f(a, prs_new[n].d());
                // } else {

                 //}
               } else {
              //   prs_new[n].write_val(sr_B->addid());
                 func->apply_f(prs_A[w].d(), prs_new[n].d());
               }
             } else { n--; skip=1; }
           }
           ww++;
         } else {
           char a[sr_A->el_size];
           char b[sr_B->el_size];
           if (alpha != NULL){
             sr_A->mul(prs_A[w].d(), alpha, a);
           } else {
             prs_A[w].read_val(a);
           }
           if (beta != NULL){
             sr_B->mul(prs_B[t].d(), beta, b);
           } else {
             prs_B[t].read_val(b);
           }
           if (func == NULL){
             sr_B->add(a, b, b);
           } else {
             func->acc_f(a, b, sr_B);
           }
           prs_new[n].write_val(b);
           ((int64_t*)(prs_new[n].ptr))[0] = prs_B[t].k();
           t++;
           ww++;
         }
         // accumulate any repeated key writes
         while (map_pfx == 1 && ww > 0 && ww<nA && prs_A[ww].k() == prs_A[ww-1].k()){
           if (!skip){
             if (alpha != NULL){
               char a[sr_A->el_size];
               sr_A->mul(prs_A[ww].d(), alpha, a);
               if (func == NULL)
                 sr_B->add(prs_new[n].d(), a, prs_new[n].d());
               else
                 func->acc_f(a, prs_new[n].d(), sr_B);
             } else {
               if (func == NULL)
                 sr_B->add(prs_new[n].d(), prs_A[ww].d(), prs_new[n].d());
               else
                 func->acc_f(prs_A[ww].d(), prs_new[n].d(), sr_B);
             }
           }
           ww++; w=ww;
         }
       }
       /*if (n>=0){
         printf("%ldth value is ", n);
         sr_B->print(prs_new[n].d());
         printf(" with key %ld\n",prs_new[n].k());
       }*/
     }
     ASSERT(n==nnew);
     TAU_FSTOP(spA_spB_seq_sum);
   }


   void spA_spB_seq_sum(char const *            alpha,
                        char const *            A,
                        int64_t                 size_A,
                        algstrct const *        sr_A,
                        char const *            beta,
                        char *                  B,
                        int64_t                 size_B,
                        char *&                 new_B,
                        int64_t &               new_size_B,
                        algstrct const *        sr_B,
                        univar_function const * func,
                        int64_t                 map_pfx){

 /*      if (!sr_B->isequal(beta, sr_B->mulid())){
         printf("scaling B by 0\n");
         sr_B->scal(size_B, beta, B, 1);
       }*/
       spspsum(sr_A, size_A, ConstPairIterator(sr_A, A), beta,
               sr_B, size_B, ConstPairIterator(sr_B, B),alpha,
               new_size_B, new_B, func, map_pfx);
   }

 }
CTF_int::dnA_spB_seq_sum
void dnA_spB_seq_sum(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, char const *beta, char const *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func)
performs summation between two sparse tensors assumes B contain key value pairs sorted by key...
Definition: spr_seq_sum.cxx:149

CTF_int::ConstPairIterator::d
char const * d() const
returns value of pair at head of ptr
Definition: algstrct.cxx:768

CTF_int::spA_dnB_seq_sum_loop
void spA_dnB_seq_sum_loop(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
Definition: spr_seq_sum.cxx:7

CTF_int::algstrct::pair_alloc
virtual char * pair_alloc(int64_t n) const
allocate space for n (int64_t,dtype) pairs, necessary for object types
Definition: algstrct.cxx:681

CTF_int::algstrct::isequal
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
Definition: algstrct.cxx:340

MAX_ORD
#define MAX_ORD
Definition: util.h:103

CTF_int::spA_dnB_seq_sum
void spA_dnB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, univar_function const *func)
performs summation between two sparse tensors assumes A contains key value pairs sorted by key...
Definition: spr_seq_sum.cxx:95

ctf.core.b
b
Definition: core.pyx:386

ASSERT
#define ASSERT(...)
Definition: util.h:88

NS
Definition: common.h:37

CTF_int::univar_function
untyped internal class for doubly-typed univariate function
Definition: sum_tsr.h:14

CTF_int::algstrct::addid
virtual char const * addid() const
MPI datatype for pairs.
Definition: algstrct.cxx:89

CTF_int::PairIterator
Definition: algstrct.h:434

CTF_int::algstrct::copy_pair
virtual void copy_pair(char *a, char const *b) const
copies pair b to element a
Definition: algstrct.cxx:542

ctf.core.a
a
Definition: core.pyx:385

CTF_int::algstrct::set
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
Definition: algstrct.cxx:629

spr_seq_sum.h

CTF_int::univar_function::acc_f
virtual void acc_f(char const *a, char *b, CTF_int::algstrct const *sr_B) const
compute b = b+f(a)
Definition: sum_tsr.h:33

CTF_int::univar_function::is_accumulator
virtual bool is_accumulator() const
Definition: sum_tsr.h:63

CTF_int::ConstPairIterator::k
int64_t k() const
returns key of pair at head of ptr
Definition: algstrct.cxx:764

SWITCH_ORD_CALL
#define SWITCH_ORD_CALL(F, act_ord,...)
Definition: util.h:119

CTF_int::ConstPairIterator
Definition: algstrct.h:380

TAU_FSTOP
#define TAU_FSTOP(ARG)
Definition: util.h:281

TAU_FSTART
#define TAU_FSTART(ARG)
Definition: util.h:280

CTF_int::algstrct::scal
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
Definition: algstrct.cxx:262

CTF_int::ConstPairIterator::read_val
void read_val(char *buf) const
sets value to the value pointed by the iterator
Definition: algstrct.cxx:776

CTF_int::algstrct::add
virtual void add(char const *a, char const *b, char *c) const
c = a+b
Definition: algstrct.cxx:109

CTF_int::accumulatable::el_size
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16

CTF_int::algstrct
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34

CTF_int::PairIterator::write_val
void write_val(char const *buf)
sets value of head pair to what is in buf
Definition: algstrct.cxx:817

ctf.core.w
w
Definition: core.pyx:307

CTF_int::algstrct::mul
virtual void mul(char const *a, char const *b, char *c) const
c = a*b
Definition: algstrct.cxx:120

CTF_int
Definition: model_trainer.cxx:16

CTF_int::spA_dnB_seq_sum_loop< MAX_ORD >
template void spA_dnB_seq_sum_loop< MAX_ORD >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)

CTF_int::algstrct::mulid
virtual char const * mulid() const
identity element for multiplication i.e. 1
Definition: algstrct.cxx:93

CTF_int::spA_spB_seq_sum
void spA_spB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func, int64_t map_pfx)
performs summation between two sparse tensors assumes A and B contain key value pairs sorted by key...
Definition: spr_seq_sum.cxx:327

CTF_int::spspsum
void spspsum(algstrct const *sr_A, int64_t nA, ConstPairIterator prs_A, char const *beta, algstrct const *sr_B, int64_t nB, ConstPairIterator prs_B, char const *alpha, int64_t &nnew, char *&pprs_new, univar_function const *func, int64_t map_pfx)
As pairs in a sparse A set to the sparse set of elements defining the tensor, resulting in a set of s...
Definition: spr_seq_sum.cxx:183

CTF_int::spA_dnB_seq_sum_loop< 0 >
void spA_dnB_seq_sum_loop< 0 >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
Definition: spr_seq_sum.cxx:33

CTF_int::sy_packed_size
int64_t sy_packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout
Definition: util.cxx:10

CTF_int::univar_function::apply_f
virtual void apply_f(char const *a, char *b) const
apply function f to value stored at a
Definition: sum_tsr.h:25