2 #include "../shared/iter_tsr.h"     3 #include "../shared/util.h"    16                             int const *             edge_len_B,
    17                             int64_t 
const *         lda_B,
    20     int imax = edge_len_B[idim];
    21     if (sym_B[idim] != 
NS) imax = (idx_B/lda_B[idim+1])%edge_len_B[idim+1];
    23     for (
int i=0; i<imax; i++){
    28                                    edge_len_B, lda_B, sym_B, func);
    42                                int const *             edge_len_B,
    43                                int64_t 
const *         lda_B,
    47     int imax = edge_len_B[0];
    48     if (sym_B[0] != 
NS) imax = (idx_B/lda_B[0+1])%edge_len_B[0+1];
    50     for (
int i=0; i<imax; i++){
    51       while (size_A > 0 && idx_B == A.k()){
    53           if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
    54             sr_B->add(A.d(), B, B);
    56             char tmp[sr_A->el_size];
    57             sr_A->mul(A.d(), alpha, tmp);
    61           if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
    62             func->acc_f(A.d(), B, sr_B);
    64             char tmp[sr_A->el_size];
    65             sr_A->mul(A.d(), alpha, tmp);
    66             func->acc_f(tmp, B, sr_B);
    88                                   int const *             edge_len_B,
    89                                   int64_t 
const *         lda_B,
   103                        int const *             edge_len_B,
   109         sr_B->
mul(beta, B, B);
   112       for (int64_t i=0; i<size_A; i++){
   114         char const * tmp_ptr;
   116           sr_A->
mul(alpha, pi[i].d(), tmp_buf);
   118         } 
else tmp_ptr = pi[i].
d();
   120           func->
acc_f(tmp_ptr, B, sr_B); 
   122           sr_B->
add(tmp_ptr, B, B);
   131           sr_B->
scal(sz_B, beta, B, 1);
   134       int64_t lda_B[order_B];
   135       for (
int i=0; i<order_B; i++){
   136         if (i==0) lda_B[i] = 1;
   137         else      lda_B[i] = lda_B[i-1]*edge_len_B[i-1];
   144       SWITCH_ORD_CALL(
spA_dnB_seq_sum_loop, order_B-1, alpha, pA, size_A, sr_A, beta, B, sr_B, order_B, idx, edge_len_B, lda_B, sym_B, func);
   153                        int const *             edge_len_A,
   159                        int64_t &               new_size_B,
   201     for (int64_t t=0,ww=0; ww<nA*map_pfx; ww++){
   202       while (ww<nA*map_pfx){
   203         int64_t 
w = ww/map_pfx;
   204         int64_t mw = ww%map_pfx;
   205         if (t<nB && prs_B[t].k() < prs_A[w].k()*map_pfx+mw)
   207         else if (t<nB && prs_B[t].k() == prs_A[w].k()*map_pfx+mw){
   212           if (!is_acc && (map_pfx != 1 || ww==0 || prs_A[ww-1].k() != prs_A[ww].k()))
   227     for (int64_t t=0,ww=0; n<nnew; n++){
   232       int64_t 
w = ww/map_pfx;
   233       int64_t mw = ww%map_pfx;
   235       if (t<nB && (w==nA || prs_B[t].k() < prs_A[w].k()*map_pfx+mw)){
   236         sr_B->
copy_pair(prs_new[n].ptr, prs_B[t].ptr);
   238           sr_B->
mul(prs_B[t].d(), beta, prs_new[n].d());
   243         if (t>=nB || prs_B[t].k() > prs_A[w].k()*map_pfx+mw){
   246               sr_A->
copy_pair(prs_new[n].ptr, prs_A[w].ptr);
   248               ((int64_t*)prs_new[n].ptr)[0] = prs_A[
w].
k()*map_pfx+mw; 
   252               sr_A->
mul(prs_new[n].d(), alpha, prs_new[n].d());
   256               ((int64_t*)prs_new[n].ptr)[0] = prs_A[
w].
k()*map_pfx+mw; 
   259                 sr_A->
mul(prs_A[w].d(), alpha, a);
   262                   func->
apply_f(a, prs_new[n].d());
   268                 func->
apply_f(prs_A[w].d(), prs_new[n].d());
   270             } 
else { n--; skip=1; }
   277             sr_A->
mul(prs_A[w].d(), alpha, a);
   282             sr_B->
mul(prs_B[t].d(), beta, b);
   289             func->
acc_f(a, b, sr_B);
   292           ((int64_t*)(prs_new[n].ptr))[0] = prs_B[t].k();
   297         while (map_pfx == 1 && ww > 0 && ww<nA && prs_A[ww].k() == prs_A[ww-1].
k()){
   301               sr_A->
mul(prs_A[ww].d(), alpha, a);
   303                 sr_B->
add(prs_new[n].d(), a, prs_new[n].d());
   305                 func->
acc_f(a, prs_new[n].d(), sr_B);
   308                 sr_B->
add(prs_new[n].d(), prs_A[ww].d(), prs_new[n].d());
   310                 func->
acc_f(prs_A[ww].d(), prs_new[n].d(), sr_B);
   335                        int64_t &               new_size_B,
   346               new_size_B, new_B, func, map_pfx);
 void dnA_spB_seq_sum(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, char const *beta, char const *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func)
performs summation between two sparse tensors assumes B contain key value pairs sorted by key...
char const * d() const 
returns value of pair at head of ptr 
void spA_dnB_seq_sum_loop(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
virtual char * pair_alloc(int64_t n) const 
allocate space for n (int64_t,dtype) pairs, necessary for object types 
virtual bool isequal(char const *a, char const *b) const 
returns true if algstrct elements a and b are equal 
void spA_dnB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, univar_function const *func)
performs summation between two sparse tensors assumes A contains key value pairs sorted by key...
untyped internal class for doubly-typed univariate function 
virtual char const * addid() const 
MPI datatype for pairs. 
virtual void copy_pair(char *a, char const *b) const 
copies pair b to element a 
virtual void set(char *a, char const *b, int64_t n) const 
sets n elements of array a to value b 
virtual void acc_f(char const *a, char *b, CTF_int::algstrct const *sr_B) const 
compute b = b+f(a) 
virtual bool is_accumulator() const 
int64_t k() const 
returns key of pair at head of ptr 
#define SWITCH_ORD_CALL(F, act_ord,...)
virtual void scal(int n, char const *alpha, char *X, int incX) const 
X["i"]=alpha*X["i"];. 
void read_val(char *buf) const 
sets value to the value pointed by the iterator 
virtual void add(char const *a, char const *b, char *c) const 
c = a+b 
int el_size
size of each element of algstrct in bytes 
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void write_val(char const *buf)
sets value of head pair to what is in buf 
virtual void mul(char const *a, char const *b, char *c) const 
c = a*b 
template void spA_dnB_seq_sum_loop< MAX_ORD >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
virtual char const * mulid() const 
identity element for multiplication i.e. 1 
void spA_spB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func, int64_t map_pfx)
performs summation between two sparse tensors assumes A and B contain key value pairs sorted by key...
void spspsum(algstrct const *sr_A, int64_t nA, ConstPairIterator prs_A, char const *beta, algstrct const *sr_B, int64_t nB, ConstPairIterator prs_B, char const *alpha, int64_t &nnew, char *&pprs_new, univar_function const *func, int64_t map_pfx)
As pairs in a sparse A set to the sparse set of elements defining the tensor, resulting in a set of s...
void spA_dnB_seq_sum_loop< 0 >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
int64_t sy_packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout 
virtual void apply_f(char const *a, char *b) const 
apply function f to value stored at a