3 #include "../shared/util.h" 6 #include "../interface/fun_term.h" 7 #include "../interface/idx_tensor.h" 8 #include "../tensor/untyped_tensor.h" 44 int const * phys_mapped,
60 for (i=0; i<nphys_dim; i++){
61 if (phys_mapped[3*i+0] == 0 &&
62 phys_mapped[3*i+1] == 0 &&
63 phys_mapped[3*i+2] == 0){
72 if (phys_mapped[3*i+0] == 0){
75 if (phys_mapped[3*i+1] == 0){
78 if (phys_mapped[3*i+2] == 0){
92 for (i=0; i<nphys_dim; i++){
93 if (!(phys_mapped[3*i+0] == 0 &&
94 phys_mapped[3*i+1] == 0 &&
95 phys_mapped[3*i+2] == 0)){
96 if (phys_mapped[3*i+0] == 0){
102 if (phys_mapped[3*i+1] == 0){
108 if (phys_mapped[3*i+2] == 0){
154 printf(
"ctr_replicate: \n");
155 printf(
"cdt_A = %p, size_A = %ld, ncdt_A = %d\n",
158 printf(
"cdt_A[%d] length = %d\n",i,
cdt_A[i]->
np);
160 printf(
"cdt_B = %p, size_B = %ld, ncdt_B = %d\n",
163 printf(
"cdt_B[%d] length = %d\n",i,
cdt_B[i]->
np);
165 printf(
"cdt_C = %p, size_C = %ld, ncdt_C = %d\n",
168 printf(
"cdt_C[%d] length = %d\n",i,
cdt_C[i]->
np);
206 int arank, brank, crank, i;
208 arank = 0, brank = 0, crank = 0;
258 if (arank != 0 && this->
sr_A->
addid() != NULL){
261 if (brank != 0 && this->
sr_B->
addid() != NULL){
a term is an abstract object representing some expression of tensors
virtual void execute(CTF::Idx_Tensor output) const =0
evalues the expression, which just scales by default
double est_time_rec(int nlyr)
returns the execution time this kernel and its recursive calls are estimated to take ...
void red(void *inbuf, void *outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root)
reduce, same interface as MPI_Reduce, but excluding the comm
virtual bool isequal(char const *a, char const *b) const
returns true if algstrct elements a and b are equal
virtual int64_t mem_rec()
int64_t mem_fp()
returns the number of bytes of buffer space we need
void operator()(Term const &A, Term const &B, Term const &C) const
evaluate C+=f(A,B) or f(A,B,C) if transform
virtual char const * addid() const
MPI datatype for pairs.
char const * beta
scaling of existing C
double estimate_red_time(int64_t msg_sz, MPI_Op op)
virtual void set(char *a, char const *b, int64_t n) const
sets n elements of array a to value b
virtual ~ctr()
deallocates generic ctr object
ctr(ctr *other)
copies generic ctr object
class for execution distributed contraction of tensors
void run(char *A, char *B, char *C)
virtual Term * clone(std::map< tensor *, tensor * > *remap=NULL) const =0
base classes must implement this copy function to retrieve pointer
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
algstrct * sr
algstrct on which tensor elements and operations are defined
double estimate_bcast_time(int64_t msg_sz)
virtual MPI_Op addmop() const
MPI addition operation for reductions.
virtual void scal(int n, char const *alpha, char *X, int incX) const
X["i"]=alpha*X["i"];.
void bcast(void *buf, int64_t count, MPI_Datatype mdtype, int root)
broadcast, same interface as MPI_Bcast, but excluding the comm
void execute(CTF::Idx_Tensor output) const
evalues the expression, which just scales by default
virtual double est_time_rec(int nlyr)
virtual std::vector< char > get_uniq_inds() const =0
find list of unique indices that are involved in this term
int64_t mem_rec()
returns the number of bytes need by each processor in this kernel and its recursive calls ...
virtual void run(char *A, char *B, char *C)
int el_size
size of each element of algstrct in bytes
int cdealloc(void *ptr)
free abstraction
topology * topo
topology to which the tensor is mapped
ctr_replicate(ctr *other)
virtual char const * mulid() const
identity element for multiplication i.e. 1
virtual MPI_Datatype mdtype() const
MPI datatype.
double est_time_fp(int nlyr)
returns the execution time the local part this kernel is estimated to take