3 #include "../shared/util.h"     6 #include "../interface/fun_term.h"     7 #include "../interface/idx_tensor.h"     8 #include "../tensor/untyped_tensor.h"    44                                int const *         phys_mapped,
    60     for (i=0; i<nphys_dim; i++){
    61       if (phys_mapped[3*i+0] == 0 &&
    62           phys_mapped[3*i+1] == 0 &&
    63           phys_mapped[3*i+2] == 0){
    72         if (phys_mapped[3*i+0] == 0){
    75         if (phys_mapped[3*i+1] == 0){
    78         if (phys_mapped[3*i+2] == 0){
    92     for (i=0; i<nphys_dim; i++){
    93       if (!(phys_mapped[3*i+0] == 0 &&
    94           phys_mapped[3*i+1] == 0 &&
    95           phys_mapped[3*i+2] == 0)){
    96         if (phys_mapped[3*i+0] == 0){
   102         if (phys_mapped[3*i+1] == 0){
   108         if (phys_mapped[3*i+2] == 0){
   154     printf(
"ctr_replicate: \n");
   155     printf(
"cdt_A = %p, size_A = %ld, ncdt_A = %d\n",
   158       printf(
"cdt_A[%d] length = %d\n",i,
cdt_A[i]->
np);
   160     printf(
"cdt_B = %p, size_B = %ld, ncdt_B = %d\n",
   163       printf(
"cdt_B[%d] length = %d\n",i,
cdt_B[i]->
np);
   165     printf(
"cdt_C = %p, size_C = %ld, ncdt_C = %d\n",
   168       printf(
"cdt_C[%d] length = %d\n",i,
cdt_C[i]->
np);
   206     int arank, brank, crank, i;
   208     arank = 0, brank = 0, crank = 0;
   258     if (arank != 0 && this->
sr_A->
addid() != NULL){
   261     if (brank != 0 && this->
sr_B->
addid() != NULL){
 
a term is an abstract object representing some expression of tensors 
virtual void execute(CTF::Idx_Tensor output) const  =0
evalues the expression, which just scales by default 
double est_time_rec(int nlyr)
returns the execution time this kernel and its recursive calls are estimated to take ...
void red(void *inbuf, void *outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root)
reduce, same interface as MPI_Reduce, but excluding the comm 
virtual bool isequal(char const *a, char const *b) const 
returns true if algstrct elements a and b are equal 
virtual int64_t mem_rec()
int64_t mem_fp()
returns the number of bytes of buffer space we need 
void operator()(Term const &A, Term const &B, Term const &C) const 
evaluate C+=f(A,B) or f(A,B,C) if transform 
virtual char const * addid() const 
MPI datatype for pairs. 
char const * beta
scaling of existing C 
double estimate_red_time(int64_t msg_sz, MPI_Op op)
virtual void set(char *a, char const *b, int64_t n) const 
sets n elements of array a to value b 
virtual ~ctr()
deallocates generic ctr object 
ctr(ctr *other)
copies generic ctr object 
class for execution distributed contraction of tensors 
void run(char *A, char *B, char *C)
virtual Term * clone(std::map< tensor *, tensor * > *remap=NULL) const  =0
base classes must implement this copy function to retrieve pointer 
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction 
algstrct * sr
algstrct on which tensor elements and operations are defined 
double estimate_bcast_time(int64_t msg_sz)
virtual MPI_Op addmop() const 
MPI addition operation for reductions. 
virtual void scal(int n, char const *alpha, char *X, int incX) const 
X["i"]=alpha*X["i"];. 
void bcast(void *buf, int64_t count, MPI_Datatype mdtype, int root)
broadcast, same interface as MPI_Bcast, but excluding the comm 
void execute(CTF::Idx_Tensor output) const 
evalues the expression, which just scales by default 
virtual double est_time_rec(int nlyr)
virtual std::vector< char > get_uniq_inds() const  =0
find list of unique indices that are involved in this term 
int64_t mem_rec()
returns the number of bytes need by each processor in this kernel and its recursive calls ...
virtual void run(char *A, char *B, char *C)
int el_size
size of each element of algstrct in bytes 
int cdealloc(void *ptr)
free abstraction 
topology * topo
topology to which the tensor is mapped 
ctr_replicate(ctr *other)
virtual char const * mulid() const 
identity element for multiplication i.e. 1 
virtual MPI_Datatype mdtype() const 
MPI datatype. 
double est_time_fp(int nlyr)
returns the execution time the local part this kernel is estimated to take