10 #include "../src/redistribution/nosym_transp.h" 24 printf(
"Performing transposes n=%d, order=%d, %s<->%s:\n",n,order,iA,iB);
31 for (
int i=0; i<order; i++){
35 for (
int j=0; j<order; j++){
37 assert(new_order[i] == -1);
41 assert(new_order[i] != -1);
45 int pm = posix_memalign((
void**)&data, 16, N*
sizeof(
double));
49 for (int64_t i=0; i<N; i++){
50 data[i] = drand48()-.5;
58 for (int64_t i=0; i<N; i++){
59 assert(data[i] == drand48()-.5);
61 printf(
"Passed correctness test\n");
64 pm = posix_memalign((
void**)&data2, 16, N*
sizeof(
double));
67 double t_cpy_st = MPI_Wtime();
68 memcpy(data2, data, N*
sizeof(
double));
69 double t_cpy = MPI_Wtime()-t_cpy_st;
70 printf(
"single-threaded memcpy %ld bandwidth is %lf sec %lf GB/sec\n",
71 N, t_cpy, 1.E-9*N*
sizeof(
double)/t_cpy);
74 t_cpy_st = MPI_Wtime();
77 int ti = omp_get_thread_num();
78 int nt = omp_get_num_threads();
80 memcpy(data2+Nt*ti, data+Nt*ti, Nt*
sizeof(
double));
82 t_cpy = MPI_Wtime()-t_cpy_st;
83 printf(
"multi-threaded memcpy %ld bandwidth is %lf sec %lf GB/sec\n",
84 N, t_cpy, 1.E-9*N*
sizeof(
double)/t_cpy);
96 for (
int i=0; i<niter; i++){
97 double t_st_fwd = MPI_Wtime();
101 t_fwd += MPI_Wtime() - t_st_fwd;
106 t_min_fwd = std::min(MPI_Wtime() - t_st_fwd, t_min_fwd);
107 t_max_fwd = std::max(MPI_Wtime() - t_st_fwd, t_max_fwd);
110 double t_st_bwd = MPI_Wtime();
114 t_bwd += MPI_Wtime() - t_st_bwd;
119 t_min_bwd = std::min(MPI_Wtime() - t_st_bwd, t_min_bwd);
120 t_max_bwd = std::max(MPI_Wtime() - t_st_bwd, t_max_bwd);
125 printf(
"Performed %d iteartions\n",niter);
126 printf(
"Forward sec/iter: average = %lf (GB/s = %lf), range = [%lf, %lf]\n",
127 t_fwd/niter, 1.E-9*N*
sizeof(
double)/(t_fwd/niter), t_min_fwd, t_max_fwd);
128 printf(
"Backward sec/iter: average = %lf (GB/s = %lf), range = [%lf, %lf]\n",
129 t_bwd/niter, 1.E-9*N*
sizeof(
double)/(t_bwd/niter), t_min_bwd, t_max_bwd);
137 char ** itr = std::find(begin, end, option);
138 if (itr != end && ++itr != end){
145 int main(
int argc,
char ** argv){
147 int const in_num = argc;
148 char ** input_str = argv;
151 MPI_Init(NULL, NULL);
153 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
157 if (
getCmdOption(input_str, input_str+in_num,
"-niter")){
158 niter = atoi(
getCmdOption(input_str, input_str+in_num,
"-niter"));
159 if (niter < 0) niter = 8;
Ring class defined by a datatype and addition and multiplicaton functions addition must have an ident...
int main(int argc, char **argv)
void bench_nosym_transp(int n, int order, int niter, char const *iA, char const *iB)
char * getCmdOption(char **begin, char **end, const std::string &option)
void nosym_transpose(tensor *A, int all_fdim_A, int const *all_flen_A, int const *new_order, int dir)