2 #include "../shared/iter_tsr.h" 3 #include "../shared/util.h" 16 int const * edge_len_B,
17 int64_t
const * lda_B,
20 int imax = edge_len_B[idim];
21 if (sym_B[idim] !=
NS) imax = (idx_B/lda_B[idim+1])%edge_len_B[idim+1];
23 for (
int i=0; i<imax; i++){
28 edge_len_B, lda_B, sym_B, func);
42 int const * edge_len_B,
43 int64_t
const * lda_B,
47 int imax = edge_len_B[0];
48 if (sym_B[0] !=
NS) imax = (idx_B/lda_B[0+1])%edge_len_B[0+1];
50 for (
int i=0; i<imax; i++){
51 while (size_A > 0 && idx_B == A.k()){
53 if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
54 sr_B->add(A.d(), B, B);
56 char tmp[sr_A->el_size];
57 sr_A->mul(A.d(), alpha, tmp);
61 if (sr_A->isequal(alpha, sr_A->mulid()) || alpha == NULL){
62 func->acc_f(A.d(), B, sr_B);
64 char tmp[sr_A->el_size];
65 sr_A->mul(A.d(), alpha, tmp);
66 func->acc_f(tmp, B, sr_B);
88 int const * edge_len_B,
89 int64_t
const * lda_B,
103 int const * edge_len_B,
109 sr_B->
mul(beta, B, B);
112 for (int64_t i=0; i<size_A; i++){
114 char const * tmp_ptr;
116 sr_A->
mul(alpha, pi[i].d(), tmp_buf);
118 }
else tmp_ptr = pi[i].
d();
120 func->
acc_f(tmp_ptr, B, sr_B);
122 sr_B->
add(tmp_ptr, B, B);
131 sr_B->
scal(sz_B, beta, B, 1);
134 int64_t lda_B[order_B];
135 for (
int i=0; i<order_B; i++){
136 if (i==0) lda_B[i] = 1;
137 else lda_B[i] = lda_B[i-1]*edge_len_B[i-1];
144 SWITCH_ORD_CALL(
spA_dnB_seq_sum_loop, order_B-1, alpha, pA, size_A, sr_A, beta, B, sr_B, order_B, idx, edge_len_B, lda_B, sym_B, func);
153 int const * edge_len_A,
159 int64_t & new_size_B,
201 for (int64_t t=0,ww=0; ww<nA*map_pfx; ww++){
202 while (ww<nA*map_pfx){
203 int64_t
w = ww/map_pfx;
204 int64_t mw = ww%map_pfx;
205 if (t<nB && prs_B[t].k() < prs_A[w].k()*map_pfx+mw)
207 else if (t<nB && prs_B[t].k() == prs_A[w].k()*map_pfx+mw){
212 if (!is_acc && (map_pfx != 1 || ww==0 || prs_A[ww-1].k() != prs_A[ww].k()))
227 for (int64_t t=0,ww=0; n<nnew; n++){
232 int64_t
w = ww/map_pfx;
233 int64_t mw = ww%map_pfx;
235 if (t<nB && (w==nA || prs_B[t].k() < prs_A[w].k()*map_pfx+mw)){
236 sr_B->
copy_pair(prs_new[n].ptr, prs_B[t].ptr);
238 sr_B->
mul(prs_B[t].d(), beta, prs_new[n].d());
243 if (t>=nB || prs_B[t].k() > prs_A[w].k()*map_pfx+mw){
246 sr_A->
copy_pair(prs_new[n].ptr, prs_A[w].ptr);
248 ((int64_t*)prs_new[n].ptr)[0] = prs_A[
w].
k()*map_pfx+mw;
252 sr_A->
mul(prs_new[n].d(), alpha, prs_new[n].d());
256 ((int64_t*)prs_new[n].ptr)[0] = prs_A[
w].
k()*map_pfx+mw;
259 sr_A->
mul(prs_A[w].d(), alpha, a);
262 func->
apply_f(a, prs_new[n].d());
268 func->
apply_f(prs_A[w].d(), prs_new[n].d());
270 }
else { n--; skip=1; }
277 sr_A->
mul(prs_A[w].d(), alpha, a);
282 sr_B->
mul(prs_B[t].d(), beta, b);
289 func->
acc_f(a, b, sr_B);
292 ((int64_t*)(prs_new[n].ptr))[0] = prs_B[t].k();
297 while (map_pfx == 1 && ww > 0 && ww<nA && prs_A[ww].k() == prs_A[ww-1].
k()){
301 sr_A->
mul(prs_A[ww].d(), alpha, a);
303 sr_B->
add(prs_new[n].d(), a, prs_new[n].d());
305 func->
acc_f(a, prs_new[n].d(), sr_B);
308 sr_B->
add(prs_new[n].d(), prs_A[ww].d(), prs_new[n].d());
310 func->
acc_f(prs_A[ww].d(), prs_new[n].d(), sr_B);
335 int64_t & new_size_B,
346 new_size_B, new_B, func, map_pfx);
void dnA_spB_seq_sum(char const *alpha, char const *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, char const *beta, char const *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func)
performs summation between two sparse tensors assumes B contain key value pairs sorted by key...
char const * d() const
returns value of pair at head of ptr
void spA_dnB_seq_sum_loop(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
virtual char * pair_alloc(int64_t n) const
allocate space for n (int64_t,dtype) pairs, necessary for object types
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
void spA_dnB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, algstrct const *sr_B, int order_B, int const *edge_len_B, int const *sym_B, univar_function const *func)
performs summation between two sparse tensors assumes A contains key value pairs sorted by key...
untyped internal class for doubly-typed univariate function
virtual char const * addid() const
MPI datatype for pairs.
virtual void copy_pair(char *a, char const *b) const
copies pair b to element a
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
virtual void acc_f(char const *a, char *b, CTF_int::algstrct const *sr_B) const
compute b = b+f(a)
virtual bool is_accumulator() const
int64_t k() const
returns key of pair at head of ptr
#define SWITCH_ORD_CALL(F, act_ord,...)
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
void read_val(char *buf) const
sets value to the value pointed by the iterator
virtual void add(char const *a, char const *b, char *c) const
c = a+b
int el_size
size of each element of algstrct in bytes
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void write_val(char const *buf)
sets value of head pair to what is in buf
virtual void mul(char const *a, char const *b, char *c) const
c = a*b
template void spA_dnB_seq_sum_loop< MAX_ORD >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
virtual char const * mulid() const
identity element for multiplication i.e. 1
void spA_spB_seq_sum(char const *alpha, char const *A, int64_t size_A, algstrct const *sr_A, char const *beta, char *B, int64_t size_B, char *&new_B, int64_t &new_size_B, algstrct const *sr_B, univar_function const *func, int64_t map_pfx)
performs summation between two sparse tensors assumes A and B contain key value pairs sorted by key...
void spspsum(algstrct const *sr_A, int64_t nA, ConstPairIterator prs_A, char const *beta, algstrct const *sr_B, int64_t nB, ConstPairIterator prs_B, char const *alpha, int64_t &nnew, char *&pprs_new, univar_function const *func, int64_t map_pfx)
As pairs in a sparse A set to the sparse set of elements defining the tensor, resulting in a set of s...
void spA_dnB_seq_sum_loop< 0 >(char const *alpha, ConstPairIterator &A, int64_t &size_A, algstrct const *sr_A, char const *beta, char *&B, algstrct const *sr_B, int order_B, int64_t idx_B, int const *edge_len_B, int64_t const *lda_B, int const *sym_B, univar_function const *func)
int64_t sy_packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout
virtual void apply_f(char const *a, char *b) const
apply function f to value stored at a