Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
subworld_gemm.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
9 #include <ctf.hpp>
10 using namespace CTF;
11 
12 
14  int m,
15  int k,
16  int div_,
17  World &dw){
18  int rank, num_pes;
19  int64_t i, np;
20  double * pairs, err;
21  int64_t * indices;
22 
23 
24  Matrix<> C(m, n, NS, dw, "C");
25  Matrix<> C_ans(m, n, NS, dw, "C_ans");
26  Matrix<> A(m, k, NS, dw, "A");
27  Matrix<> B(k, n, NS, dw);
28 
29  MPI_Comm pcomm = dw.comm;
30  MPI_Comm_rank(pcomm, &rank);
31  MPI_Comm_size(pcomm, &num_pes);
32 
33  int div = div_;
34  if (div > num_pes) div = num_pes;
35 
36 
37  srand48(13*rank);
38  A.get_local_data(&np, &indices, &pairs);
39  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5;
40  A.write(np, indices, pairs);
41  delete [] pairs;
42  free(indices);
43  B.get_local_data(&np, &indices, &pairs);
44  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5;
45  B.write(np, indices, pairs);
46  delete [] pairs;
47  free(indices);
48 
49 
50  int cnum_pes = num_pes / div;
51  int color = rank/cnum_pes;
52  int crank = rank%cnum_pes;
53 
54  MPI_Comm ccomm;
55  MPI_Comm_split(pcomm, color, crank, &ccomm);
56  World sworld(ccomm);
57 
58  C_ans["ij"] = ((double)div)*A["ik"]*B["kj"];
59 
60  Matrix<> subA(m, k, NS, sworld, "subA");
61  Matrix<> subB(k, n, NS, sworld);
62  Matrix<> subC(m, n, NS, sworld, "subC");
63 
64  for (int c=0; c<num_pes/cnum_pes; c++){
65  if (c==color){
66  A.add_to_subworld(&subA,1.0,0.0);
67  B.add_to_subworld(&subB,1.0,0.0);
68  } else {
69  A.add_to_subworld(NULL,1.0,0.0);
70  B.add_to_subworld(NULL,1.0,0.0);
71  }
72  }
73 
74  if (rank < cnum_pes*div)
75  subC["ij"] = subA["ik"]*subB["kj"];
76 
77 
78  for (int c=0; c<num_pes/cnum_pes; c++){
79  if (c==color){
80  C.add_from_subworld(&subC, 1.0, 1.0);
81  } else {
82  C.add_from_subworld(NULL, 1.0, 1.0);
83  }
84  }
85 
86  C_ans["ij"] -= C["ij"];
87 
88  err = C_ans.norm2();
89 
90  if (rank == 0){
91  if (err<1.E-9)
92  printf("{ GEMM on subworlds } passed\n");
93  else
94  printf("{ GEMM on subworlds } FAILED, error norm = %E\n",err);
95  }
96  MPI_Comm_free(&ccomm);
97  return err<1.E-9;
98 }
99 
100 
101 #ifndef TEST_SUITE
102 char* getCmdOption(char ** begin,
103  char ** end,
104  const std::string & option){
105  char ** itr = std::find(begin, end, option);
106  if (itr != end && ++itr != end){
107  return *itr;
108  }
109  return 0;
110 }
111 
112 int main(int argc, char ** argv){
113  int rank, np, n, m, k, div;
114  int const in_num = argc;
115  char ** input_str = argv;
116 
117  MPI_Init(&argc, &argv);
118  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
119  MPI_Comm_size(MPI_COMM_WORLD, &np);
120 
121  if (getCmdOption(input_str, input_str+in_num, "-n")){
122  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
123  if (n < 0) n = 23;
124  } else n = 23;
125  if (getCmdOption(input_str, input_str+in_num, "-m")){
126  m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
127  if (m < 0) m = 17;
128  } else m = 17;
129  if (getCmdOption(input_str, input_str+in_num, "-k")){
130  k = atoi(getCmdOption(input_str, input_str+in_num, "-k"));
131  if (k < 0) k = 31;
132  } else k = 31;
133  if (getCmdOption(input_str, input_str+in_num, "-div")){
134  div = atoi(getCmdOption(input_str, input_str+in_num, "-div"));
135  if (div < 0) div = 2;
136  } else div = 2;
137 
138  {
139  World dw(MPI_COMM_WORLD, argc, argv);
140  int pass;
141  if (rank == 0){
142  printf("Non-symmetric: NS = NS*NS test_subworld_gemm:\n");
143  }
144  pass = test_subworld_gemm(n, m, k, div, dw);
145  assert(pass);
146  }
147 
148  MPI_Finalize();
149  return 0;
150 }
155 #endif
156 
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
void add_to_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor to a tensor object defined on a different world
Definition: tensor.cxx:540
def rank(self)
Definition: core.pyx:312
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
int test_subworld_gemm(int n, int m, int k, int div_, World &dw)
char * getCmdOption(char **begin, char **end, const std::string &option)
int main(int argc, char **argv)
void add_from_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor from a tensor object defined on a different world
Definition: tensor.cxx:560
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
Definition: apsp.cxx:17
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
MPI_Comm comm
set of processors making up this world
Definition: world.h:22
def np(self)
Definition: core.pyx:315