Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
spctr_2d_general.h
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 #include "spctr_comm.h"
3 
4 #ifndef __SPCTR_2D_GENERAL_H__
5 #define __SPCTR_2D_GENERAL_H__
6 
7 namespace CTF_int{
8  class tensor;
9  int spctr_2d_gen_build(int is_used,
10  CommData global_comm,
11  int i,
12  int * virt_dim,
13  int & cg_edge_len,
14  int & total_iter,
15  tensor * A,
16  int i_A,
17  CommData *& cg_cdt_A,
18  int64_t & cg_spctr_lda_A,
19  int64_t & cg_spctr_sub_lda_A,
20  bool & cg_move_A,
21  int * blk_len_A,
22  int64_t & blk_sz_A,
23  int const * virt_blk_len_A,
24  int & load_phase_A,
25  tensor * B,
26  int i_B,
27  CommData *& cg_cdt_B,
28  int64_t & cg_spctr_lda_B,
29  int64_t & cg_spctr_sub_lda_B,
30  bool & cg_move_B,
31  int * blk_len_B,
32  int64_t & blk_sz_B,
33  int const * virt_blk_len_B,
34  int & load_phase_B,
35  tensor * C,
36  int i_C,
37  CommData *& cg_cdt_C,
38  int64_t & cg_spctr_lda_C,
39  int64_t & cg_spctr_sub_lda_C,
40  bool & cg_move_C,
41  int * blk_len_C,
42  int64_t & blk_sz_C,
43  int const * virt_blk_len_C,
44  int & load_phase_C);
45 
46 
47  class spctr_2d_general : public spctr {
48  public:
49  int edge_len;
50 
51  int64_t ctr_lda_A; /* local lda_A of contraction dimension 'k' */
52  int64_t ctr_sub_lda_A; /* elements per local lda_A
53  of contraction dimension 'k' */
54  int64_t ctr_lda_B; /* local lda_B of contraction dimension 'k' */
55  int64_t ctr_sub_lda_B; /* elements per local lda_B
56  of contraction dimension 'k' */
57  int64_t ctr_lda_C; /* local lda_C of contraction dimension 'k' */
58  int64_t ctr_sub_lda_C; /* elements per local lda_C
59  of contraction dimension 'k' */
60  int64_t dns_vrt_sz_A; /* size of each block of A when stored as dense, needed for time estimation given nnz frac */
61  int64_t dns_vrt_sz_B; /* size of each block of A when stored as dense, needed for time estimation given nnz frac */
62  int64_t dns_vrt_sz_C; /* size of each block of A when stored as dense, needed for time estimation given nnz frac */
63  #ifdef OFFLOAD
64  bool alloc_host_buf;
65  #endif
66 
67  bool move_A;
68  bool move_B;
69  bool move_C;
70 
74  /* Class to be called on sub-blocks */
76 
80  void print();
86  void run(char * A, int nblk_A, int64_t const * size_blk_A,
87  char * B, int nblk_B, int64_t const * size_blk_B,
88  char * C, int nblk_C, int64_t * size_blk_C,
89  char *& new_C);
90 
96  int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
101  int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
106  double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
107 
112  double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
113 
114  spctr * clone();
115 /* void set_size_blk_A(int new_nblk_A, int64_t const * nnbA){
116  spctr::set_size_blk_A(new_nblk_A, nnbA);
117  rec_ctr->set_size_blk_A(new_nblk_A, nnbA);
118  }*/
119 
131  void find_bsizes(int64_t & b_A,
132  int64_t & b_B,
133  int64_t & b_C,
134  int64_t & s_A,
135  int64_t & s_B,
136  int64_t & s_C,
137  int64_t & aux_size);
141  spctr_2d_general(spctr * other);
150  spctr_2d_general(contraction * c) : spctr(c){ move_A=0; move_B=0; move_C=0; }
151  };
152 }
153 
154 #endif
double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the time this kernel will take including calls to rec_ctr
double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the time this kernel will take including calls to rec_ctr
int spctr_2d_gen_build(int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_spctr_lda_A, int64_t &cg_spctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_spctr_lda_B, int64_t &cg_spctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_spctr_lda_C, int64_t &cg_spctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C)
int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need
class for execution distributed contraction of tensors
Definition: contraction.h:16
int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C)
returns the number of bytes of buffer space we need recursively
char * new_C
Definition: spctr_tsr.h:15
spctr_2d_general(contraction *c)
partial constructor, most of the logic is in the spctr_2d_gen_build function
void run(char *A, int nblk_A, int64_t const *size_blk_A, char *B, int nblk_B, int64_t const *size_blk_B, char *C, int nblk_C, int64_t *size_blk_C, char *&new_C)
Basically doing SUMMA, except assumes equal block size on each processor. Performs rank-b updates whe...
spctr_2d_general(spctr *other)
copies spctr object
void find_bsizes(int64_t &b_A, int64_t &b_B, int64_t &b_C, int64_t &s_A, int64_t &s_B, int64_t &s_C, int64_t &aux_size)
determines buffer and block sizes needed for spctr_2d_general
void print()
print ctr object
~spctr_2d_general()
deallocs spctr_2d_general object