Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
spctr_comm.h
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #ifndef __SPCTR_COMM_H__
4 #define __SPCTR_COMM_H__
5 
6 #include "spctr_tsr.h"
7 
8 namespace CTF_int{
9  class contraction;
10 
11  class spctr_replicate : public spctr {
12  public:
13  int ncdt_A; /* number of processor dimensions to replicate A along */
14  int ncdt_B; /* number of processor dimensions to replicate B along */
15  int ncdt_C; /* number of processor dimensions to replicate C along */
16  int64_t size_A; /* size of A blocks */
17  int64_t size_B; /* size of B blocks */
18  int64_t size_C; /* size of C blocks */
19 
23  /* Class to be called on sub-blocks */
25  /* void set_size_blk_A(int new_nblk_A, int64_t const * nnbA){
26  spctr::set_size_blk_A(new_nblk_A, nnbA);
27  rec_ctr->set_size_blk_A(new_nblk_A, nnbA);
28  }*/
29 
30  void run(char * A, int nblk_A, int64_t const * size_blk_A,
31  char * B, int nblk_B, int64_t const * size_blk_B,
32  char * C, int nblk_C, int64_t * size_blk_C,
33  char *& new_C);
43  int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
48  int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
49  double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
50  double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
51  void print();
52  spctr * clone();
53 
54  spctr_replicate(spctr * other);
56  spctr_replicate(contraction const * c,
57  int const * phys_mapped,
58  int64_t blk_sz_A,
59  int64_t blk_sz_B,
60  int64_t blk_sz_C);
61  };
62 
63 }
64 #endif // __CTR_COMM_H__
double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time this kernel and its recursive calls are estimated to take ...
Definition: spctr_comm.cxx:169
void run(char *A, int nblk_A, int64_t const *size_blk_A, char *B, int nblk_B, int64_t const *size_blk_B, char *C, int nblk_C, int64_t *size_blk_C, char *&new_C)
Definition: spctr_comm.cxx:186
spctr_replicate(spctr *other)
Definition: spctr_comm.cxx:104
int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need
Definition: spctr_comm.cxx:173
class for execution distributed contraction of tensors
Definition: contraction.h:16
char * new_C
Definition: spctr_tsr.h:15
double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the execution time the local part this kernel is estimated to take
Definition: spctr_comm.cxx:140
int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes need by each processor in this kernel and its recursive calls ...
Definition: spctr_comm.cxx:181