Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
permute_multiworld.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
10 #include <ctf.hpp>
11 using namespace CTF;
12 
14  int sym,
15  World & dw){
16  int np, rank, nprow, npcol, rrow, rcol, nrow, ncol, pass;
17  int64_t i, nvals, row_pfx, col_pfx;
18  int64_t * indices;
19  double * data;
20  int * perm_row, * perm_col;
21  int ** perms;
22 
23  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
24  MPI_Comm_size(MPI_COMM_WORLD, &np);
25 
26  nprow = 1;
27  for (i=1; i<np; i++){
28  if (np%i == 0 && i > nprow && i <= np/i){
29  nprow = i;
30  }
31  }
32  npcol = np/nprow;
33 
34  rrow = rank%nprow;
35  rcol = rank/nprow;
36 
37  nrow = n/nprow;
38  row_pfx = nrow*rrow;
39  row_pfx += std::min(n%nprow, rrow);
40  if (rrow < n%nprow) nrow++;
41  ncol = n/npcol;
42  col_pfx = ncol*rcol;
43  col_pfx += std::min(n%npcol, rcol);
44  if (rcol < n%npcol) ncol++;
45 
46  perms = (int**)malloc(sizeof(int*)*2);
47  perm_row = (int*)malloc(sizeof(int)*nrow);
48  perm_col = (int*)malloc(sizeof(int)*ncol);
49  perms[0] = perm_row;
50  perms[1] = perm_col;
51 
52  //permutation extracts blocked layout
53  for (i=0; i<nrow; i++){
54  perm_row[i] = row_pfx+i;
55  }
56  for (i=0; i<ncol; i++){
57  perm_col[i] = col_pfx+i;
58  }
59 
60  Matrix<> A(n, n, sym, dw);
61  A.get_local_data(&nvals, &indices, &data);
62 
63  for (i=0; i<nvals; i++){
64  data[i] = (double)indices[i];
65  }
66 
67  A.write(nvals, indices, data);
68  free(indices);
69  delete [] data;
70 
71  World id_world(MPI_COMM_SELF);
72 
73  int Bsym;
74  if (rrow == rcol) Bsym = sym;
75  else Bsym = NS;
76 
77  if (sym != NS && rrow > rcol){
78  Scalar<> B(id_world);
79  B.permute(perms, 1.0, A, 1.0);
80  nvals = 0;
81  } else {
82  Matrix<> B(nrow, ncol, Bsym, id_world);
83 
84  B.permute(perms, 1.0, A, 1.0);
85 
86  B.get_local_data(&nvals, &indices, &data);
87  }
88 
89 
90  pass = 1;
91  for (i=0; i<nvals; i++){
92  if (data[i] != (double)((indices[i]/nrow + col_pfx)*n + (indices[i]%nrow)+row_pfx)){
93  pass = 0;
94  }
95  }
96 
97  MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
98 
99  if (!pass){
100  if (rank == 0){
101  if (pass)
102  printf("{ permuted-read among multiple worlds } passed\n");
103  else
104  printf("{ permuted-read among multiple worlds } failed\n");
105  }
106  delete [] data;
107  free(indices);
108  return pass;
109  }
110 
111 
112  for (i=0; i<nvals; i++){
113  data[i] = n*n-((indices[i]/nrow + col_pfx)*n + (indices[i]%nrow)+row_pfx);
114  }
115 
116  A["ij"] = 0.0;
117 
118  if (sym != NS && rrow > rcol){
119  Scalar<> B(id_world);
120  A.permute(1.0, B, perms, 1.0);
121  nvals = 0;
122  } else {
123  Matrix<> B(nrow, ncol, Bsym, id_world);
124  B.write(nvals,indices,data);
125  A.permute(1.0, B, perms, 1.0);
126  }
127 
128  if (nvals > 0){
129  delete [] data;
130  free(indices);
131  }
132 
133  A.get_local_data(&nvals, &indices, &data);
134 
135  pass = 1;
136  for (i=0; i<nvals; i++){
137  if (abs(data[i] - (double)(n*n-indices[i])) >= 1.E-9){
138  pass = 0;
139  }
140  }
141  MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
142 
143  if (rank == 0){
144  if (pass)
145  printf("{ permuted read and write among multiple worlds } passed\n");
146  else
147  printf("{ permuted read and write among multiple worlds } failed\n");
148  }
149  free(indices);
150  delete [] data;
151 
152  return pass;
153 }
154 
155 
156 #ifndef TEST_SUITE
157 char* getCmdOption(char ** begin,
158  char ** end,
159  const std::string & option){
160  char ** itr = std::find(begin, end, option);
161  if (itr != end && ++itr != end){
162  return *itr;
163  }
164  return 0;
165 }
166 
167 int main(int argc, char ** argv){
168  int rank, np, n;
169  int const in_num = argc;
170  char ** input_str = argv;
171 
172  MPI_Init(&argc, &argv);
173  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
174  MPI_Comm_size(MPI_COMM_WORLD, &np);
175 
176  if (getCmdOption(input_str, input_str+in_num, "-n")){
177  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
178  if (n < 0) n = 256;
179  } else n = 256;
180 
181  {
182  World dw(MPI_COMM_WORLD, argc, argv);
183  int pass;
184  if (rank == 0){
185  printf("Testing nonsymmetric multiworld permutation with n=%d\n",n);
186  }
187  pass = permute_multiworld(n, NS, dw);
188  assert(pass);
189  if (np == sqrt(np)*sqrt(np)){
190  if (rank == 0){
191  printf("Testing symmetric multiworld permutation with n=%d\n",n);
192  }
193  pass = permute_multiworld(n, SY, dw);
194  assert(pass);
195  if (rank == 0){
196  printf("Testing skew-symmetric multiworld permutation with n=%d\n",n);
197  }
198  pass = permute_multiworld(n, SH, dw);
199  assert(pass);
200  if (rank == 0){
201  printf("Testing asymmetric multiworld permutation with n=%d\n",n);
202  }
203  pass = permute_multiworld(n, AS, dw);
204  assert(pass);
205  }
206  }
207 
208  MPI_Finalize();
209  return 0;
210 }
211 
217 #endif
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
void permute(dtype beta, CTF_int::tensor &A, int *const *perms_A, dtype alpha)
Apply permutation to matrix, potentially extracting a slice B[i,j,...] = beta*B[...] + alpha*A[perms_A[0][i],perms_A[1][j],...].
Definition: tensor.cxx:429
Definition: common.h:37
int permute_multiworld(int n, int sym, World &dw)
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
Scalar class which encapsulates a 0D tensor.
Definition: scalar.h:13
def abs(initA)
Definition: core.pyx:5440
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
Definition: apsp.cxx:17
Definition: common.h:37
int main(int argc, char **argv)
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: common.h:37
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
Definition: common.h:37
def np(self)
Definition: core.pyx:315