1 #ifndef __FUNCTIONS_H__ 2 #define __FUNCTIONS_H__ 4 #include "../scaling/scaling.h" 5 #include "../summation/summation.h" 6 #include "../contraction/contraction.h" 22 template<
typename dtype=
double>
29 std::function<void(dtype&)>
f;
54 template<
typename dtype_A=
double,
typename dtype_B=dtype_A>
61 std::function<dtype_B(dtype_A)>
f;
75 void apply_f(
char const *
a,
char *
b)
const { ((dtype_B*)b)[0]=
f(((dtype_A*)a)[0]); }
84 dtype_B tb=
f(((dtype_A*)a)[0]);
85 sr_B->
add(b, (
char const *)&tb, b);
95 template<
typename dtype_A=
double,
typename dtype_B=dtype_A>
102 std::function<void(dtype_A, dtype_B &)>
f;
116 void apply_f(
char const *
a,
char *
b)
const { acc_f(a,b,NULL); }
125 f(((dtype_A*)a)[0], ((dtype_B*)b)[0]);
136 template<
typename dtype_A=
double,
typename dtype_B=dtype_A,
typename dtype_C=dtype_A>
143 std::function<dtype_C (dtype_A, dtype_B)>
f;
177 void apply_f(
char const *
a,
char const *
b,
char * c)
const {
178 ((dtype_C*)c)[0] =
f(((dtype_A
const*)a)[0],((dtype_B
const*)b)[0]);
190 tmp =
f(((dtype_A
const*)a)[0],((dtype_B
const*)b)[0]);
191 sr_C->
add(c, (
char const *)&tmp, c);
208 #pragma omp parallel for 210 for (
int row_A=0; row_A<m; row_A++){
212 #pragma omp parallel for 214 for (
int col_B=0; col_B<n; col_B++){
215 for (
int i_A=IA[row_A]-1; i_A<IA[row_A+1]-1; i_A++){
216 int col_A = JA[i_A]-1;
217 dtype_C tmp =
f(A[i_A],B[col_B*k+col_A]);
218 sr_C->
add((
char const *)&C[col_B*m+row_A],(
char const*)&tmp,(
char *)&C[col_B*m+row_A]);
242 #pragma omp parallel for 244 for (
int row_A=0; row_A<m; row_A++){
245 for (
int i_A=IA[row_A]-1; i_A<IA[row_A+1]-1; i_A++){
246 int row_B = JA[i_A]-1;
247 for (
int i_B=IB[row_B]-1; i_B<IB[row_B+1]-1; i_B++){
248 int col_B = JB[i_B]-1;
249 dtype_C tmp =
f(A[i_A],B[i_B]);
250 sr_C->
add((
char const*)&C[col_B*m+row_A],(
char const*)&tmp,(
char *)&C[col_B*m+row_A]);
274 for (
int i=0; i<m; i++){
275 memset(has_col, 0,
sizeof(
int)*n);
278 for (
int j=0; j<n; j++){
279 IC[i+1] += has_col[j];
283 dtype_C * vC = (dtype_C*)C.
vals();
285 memcpy(C.
IA(), IC,
sizeof(int)*(m+1));
289 for (
int i=0; i<m; i++){
290 memset(has_col, 0,
sizeof(
int)*n);
293 for (
int j=0; j<n; j++){
295 JC[IC[i]+vs-1] = j+1;
296 rev_col[j] = IC[i]+vs-1;
300 memset(has_col, 0,
sizeof(
int)*n);
301 for (
int j=0; j<IA[i+1]-IA[i]; j++){
302 int row_B = JA[IA[i]+j-1]-1;
303 int idx_A = IA[i]+j-1;
304 for (
int l=0; l<IB[row_B+1]-IB[row_B]; l++){
305 int idx_B = IB[row_B]+l-1;
306 if (has_col[JB[idx_B]-1]){
307 dtype_C tmp =
f(A[idx_A],B[idx_B]);
308 sr_C->
add((
char const *)&vC[rev_col[JB[idx_B]-1]], (
char const *)&tmp, (
char *)&vC[rev_col[JB[idx_B]-1]]);
310 vC[rev_col[JB[idx_B]-1]] =
f(A[idx_A],B[idx_B]);
312 has_col[JB[idx_B]-1] = 1;
317 if (C_CSR == NULL || C_in.
nnz() == 0){
338 csrmm(m,n,k,(dtype_A
const *)A,JA,IA,nnz_A,(dtype_B
const *)B, (dtype_C *)C, sr_C);
355 csrmultd(m,n,k,(dtype_A
const *)A,JA,IA,nnz_A,(dtype_B
const *)B,JB,IB,nnz_B,(dtype_C *)C,sr_C);
372 csrmultcsr(m,n,k,(dtype_A
const *)A,JA,IA,nnz_A,(dtype_B
const *)B, JB, IB, nnz_B, C_CSR, sr_C);
384 template<
typename dtype_A=
double,
typename dtype_B=dtype_A,
typename dtype_C=dtype_A>
391 std::function<void(dtype_A, dtype_B, dtype_C &)>
f;
421 f(((dtype_A*)a)[0], ((dtype_B*)b)[0], ((dtype_C*)c)[0]);
430 void apply_f(
char const *
a,
char const *
b,
char * c)
const { acc_f(a,b,c,NULL); }
440 template<
typename dtype_A=
double,
typename dtype_B=dtype_A,
typename dtype_C=dtype_A>
455 Function(std::function<dtype_C(dtype_A,dtype_B)> f_,
bool is_comm=
false){
463 return univar->operator()(A);
468 return bivar->operator()(A,B);
482 if (is_univar)
delete(univar);
483 if (is_bivar)
delete(bivar);
487 template<
typename dtype_A=
double,
typename dtype_B=dtype_A,
typename dtype_C=dtype_A>
511 Transform(std::function<
void(dtype_A, dtype_B, dtype_C&)> f_){
520 if (is_endo)
delete endo;
521 if (is_univar)
delete univar;
522 if (is_bivar)
delete bivar;
532 univar->operator()(A,B);
537 bivar->operator()(A,B,C);
CTF_int::Unifun_Term operator()(CTF_int::Term const &A) const
a term is an abstract object representing some expression of tensors
std::function< dtype_C(dtype_A, dtype_B)> f
function signature for element-wise multiplication, compute C=f(A,B)
Bivar_Function(std::function< dtype_C(dtype_A, dtype_B)> f_)
constructor takes function pointers to compute C=f(A,B);
custom scalar function on tensor: e.g. A["ij"] = f(A["ij"])
void csrmm(int m, int n, int k, dtype_A const *A, int const *JA, int const *IA, int64_t nnz_A, dtype_B const *B, dtype_C *C, CTF_int::algstrct const *sr_C) const
int * IA() const
retrieves prefix sum of number of nonzeros for each row (of size nrow()+1) out of all_data ...
Endomorphism()
default constructor
untyped internal class for singly-typed single variable function (Endomorphism)
static char * csr_add(char *cA, char *cB, accumulatable const *adder)
void apply_f(char *a) const
apply function f to value stored at a
int bivar_function(int n, World &dw)
void * alloc(int64_t len)
alloc abstraction
custom bivariate function on two tensors: e.g. C["ij"] = f(A["ik"],B["kj"])
untyped internal class for doubly-typed univariate function
std::function< void(dtype &)> f
function signature for element-wise operation a=f(a)
untyped internal class for triply-typed bivariate function
Bivar_Function(std::function< dtype_C(dtype_A, dtype_B)> f_, bool is_comm)
constructor takes function pointers to compute C=f(A,B);
Univar_Function< dtype_A, dtype_B > * univar
static void compute_has_col(int const *JA, int const *IA, int const *JB, int const *IB, int i, int *has_col)
custom function f : X -> Y to be applied to tensor elemetns: e.g. B["ij"] = f(A["ij"]) ...
Univar_Function(std::function< dtype_B(dtype_A)> f_)
constructor takes function pointers to compute B=f(A));
Endomorphism(std::function< void(dtype &)> f_)
constructor takes function pointer
int * JA() const
retrieves column indices of each value in vals stored in sorted form by row
void acc_f(char const *a, char const *b, char *c, CTF_int::algstrct const *sr_C) const
compute c = c+ f(a,b)
int64_t nnz() const
retrieves number of nonzeros out of all_data
void acc_f(char const *a, char *b, CTF_int::algstrct const *sr_B) const
compute b = b+f(a)
abstraction for a serialized sparse matrix stored in column-sparse-row (CSR) layout ...
std::function< dtype_B(dtype_A)> f
function signature for element-wise multiplication, compute b=f(a)
Bivar_Function< dtype_A, dtype_B, dtype_C > * bivar
Function(std::function< dtype_B(dtype_A)> f_)
char * all_data
serialized buffer containing all info, index, and values related to matrix
char * vals() const
retrieves array of values out of all_data
virtual void add(char const *a, char const *b, char *c) const
c = a+b
int cdealloc(void *ptr)
free abstraction
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void ccsrmm(int m, int n, int k, char const *A, int const *JA, int const *IA, int64_t nnz_A, char const *B, char *C, CTF_int::algstrct const *sr_C) const
CTF_int::Bifun_Term operator()(CTF_int::Term const &A, CTF_int::Term const &B) const
void apply_f(char const *a, char const *b, char *c) const
compute c = f(a,b)
Function(std::function< dtype_C(dtype_A, dtype_B)> f_, bool is_comm=false)
void apply_f(char const *a, char *b) const
apply function f to value stored at a