Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
bench_nosym_transp.cxx
Go to the documentation of this file.
1 
9 #include <ctf.hpp>
10 #include "../src/redistribution/nosym_transp.h"
11 #ifdef USE_OMP
12 #include "omp.h"
13 #endif
14 #include <assert.h>
15 
16 using namespace CTF;
17 
18 void bench_nosym_transp(int n,
19  int order,
20  int niter,
21  char const * iA,
22  char const * iB){
23 
24  printf("Performing transposes n=%d, order=%d, %s<->%s:\n",n,order,iA,iB);
25  Ring<> r;
26 
27  int edge_len[order];
28  int new_order[order];
29 
30  int64_t N=1;
31  for (int i=0; i<order; i++){
32  N*=n;
33  edge_len[i] = n;
34  new_order[i] = -1;
35  for (int j=0; j<order; j++){
36  if (iA[i] == iB[j]){
37  assert(new_order[i] == -1);
38  new_order[i] = j;
39  }
40  }
41  assert(new_order[i] != -1);
42  }
43 
44  double * data;
45  int pm = posix_memalign((void**)&data, 16, N*sizeof(double));
46  assert(pm==0);
47 
48  srand48(7);
49  for (int64_t i=0; i<N; i++){
50  data[i] = drand48()-.5;
51  }
52 
53  //check correctness of transpose
54  CTF_int::nosym_transpose(order, new_order, edge_len, (char*)data, 1, &r);
55  CTF_int::nosym_transpose(order, new_order, edge_len, (char*)data, 0, &r);
56 
57  srand48(7);
58  for (int64_t i=0; i<N; i++){
59  assert(data[i] == drand48()-.5);
60  }
61  printf("Passed correctness test\n");
62 
63  double * data2;
64  pm = posix_memalign((void**)&data2, 16, N*sizeof(double));
65  assert(pm==0);
66 
67  double t_cpy_st = MPI_Wtime();
68  memcpy(data2, data, N*sizeof(double));
69  double t_cpy = MPI_Wtime()-t_cpy_st;
70  printf("single-threaded memcpy %ld bandwidth is %lf sec %lf GB/sec\n",
71  N, t_cpy, 1.E-9*N*sizeof(double)/t_cpy);
72 
73 #ifdef USE_OMP
74  t_cpy_st = MPI_Wtime();
75  #pragma omp parallel
76  {
77  int ti = omp_get_thread_num();
78  int nt = omp_get_num_threads();
79  int64_t Nt = N/nt;
80  memcpy(data2+Nt*ti, data+Nt*ti, Nt*sizeof(double));
81  }
82  t_cpy = MPI_Wtime()-t_cpy_st;
83  printf("multi-threaded memcpy %ld bandwidth is %lf sec %lf GB/sec\n",
84  N, t_cpy, 1.E-9*N*sizeof(double)/t_cpy);
85 #endif
86  free(data2);
87 
88  double t_fwd = 0.0;
89  double t_min_fwd;
90  double t_max_fwd;
91  double t_bwd = 0.0;
92  double t_min_bwd;
93  double t_max_bwd;
94 
95 
96  for (int i=0; i<niter; i++){
97  double t_st_fwd = MPI_Wtime();
98 
99  CTF_int::nosym_transpose(order, new_order, edge_len, (char*)data, 1, &r);
100 
101  t_fwd += MPI_Wtime() - t_st_fwd;
102  if (i==0){
103  t_min_fwd = t_fwd;
104  t_max_fwd = t_fwd;
105  } else {
106  t_min_fwd = std::min(MPI_Wtime() - t_st_fwd, t_min_fwd);
107  t_max_fwd = std::max(MPI_Wtime() - t_st_fwd, t_max_fwd);
108  }
109 
110  double t_st_bwd = MPI_Wtime();
111 
112  CTF_int::nosym_transpose(order, new_order, edge_len, (char*)data, 0, &r);
113 
114  t_bwd += MPI_Wtime() - t_st_bwd;
115  if (i==0){
116  t_min_bwd = t_bwd;
117  t_max_bwd = t_bwd;
118  } else {
119  t_min_bwd = std::min(MPI_Wtime() - t_st_bwd, t_min_bwd);
120  t_max_bwd = std::max(MPI_Wtime() - t_st_bwd, t_max_bwd);
121  }
122 
123  }
124 
125  printf("Performed %d iteartions\n",niter);
126  printf("Forward sec/iter: average = %lf (GB/s = %lf), range = [%lf, %lf]\n",
127  t_fwd/niter, 1.E-9*N*sizeof(double)/(t_fwd/niter), t_min_fwd, t_max_fwd);
128  printf("Backward sec/iter: average = %lf (GB/s = %lf), range = [%lf, %lf]\n",
129  t_bwd/niter, 1.E-9*N*sizeof(double)/(t_bwd/niter), t_min_bwd, t_max_bwd);
130 
131  free(data);
132 }
133 
134 char* getCmdOption(char ** begin,
135  char ** end,
136  const std::string & option){
137  char ** itr = std::find(begin, end, option);
138  if (itr != end && ++itr != end){
139  return *itr;
140  }
141  return 0;
142 }
143 
144 
145 int main(int argc, char ** argv){
146  int niter, n;
147  int const in_num = argc;
148  char ** input_str = argv;
149  char const * A;
150  char const * B;
151  MPI_Init(NULL, NULL);
152  if (getCmdOption(input_str, input_str+in_num, "-n")){
153  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
154  if (n < 0) n = 10;
155  } else n = 10;
156 
157  if (getCmdOption(input_str, input_str+in_num, "-niter")){
158  niter = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
159  if (niter < 0) niter = 8;
160  } else niter = 8;
161 
162  if (getCmdOption(input_str, input_str+in_num, "-A")){
163  A = getCmdOption(input_str, input_str+in_num, "-A");
164  } else A = "ij";
165  if (getCmdOption(input_str, input_str+in_num, "-B")){
166  B = getCmdOption(input_str, input_str+in_num, "-B");
167  } else B = "ji";
168 
169 
170  bench_nosym_transp(n, strlen(A), niter, A, B);
171 
172  MPI_Finalize();
173  return 0;
174 }
Ring class defined by a datatype and addition and multiplicaton functions addition must have an ident...
Definition: ring.h:18
int main(int argc, char **argv)
void bench_nosym_transp(int n, int order, int niter, char const *iA, char const *iB)
string
Definition: core.pyx:456
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: apsp.cxx:17
void nosym_transpose(tensor *A, int all_fdim_A, int const *all_flen_A, int const *new_order, int dir)