41 assert(test || bench);
43 int sA = sp_A < 1. ?
SP : 0;
44 int sB = sp_B < 1. ?
SP : 0;
45 int sC = sp_C < 1. ?
SP : 0;
77 ref_A[
"ij"] = A[
"ij"];
78 ref_B[
"ij"] = B[
"ij"];
79 ref_C[
"ij"] = C[
"ij"];
84 ref_C[
"ik"] += ref_A[
"ij"]*ref_B[
"jk"];
88 ref_C[
"ik"] += ref_A[
"ij"]*ref_B[
"jk"];
89 ref_C[
"ik"] += ref_A[
"kj"]*ref_B[
"ji"];
90 if (sym_C ==
SH) ref_C[
"ii"] = 0.0;
93 ref_C[
"ik"] += ref_A[
"ij"]*ref_B[
"jk"];
94 ref_C[
"ik"] -= ref_A[
"kj"]*ref_B[
"ji"];
98 C[
"ik"] += .5*A[
"ij"]*B[
"jk"];
99 C[
"ik"] += .5*B[
"jk"]*A[
"ij"];
103 ref_C[
"ik"] -= C[
"ik"];
105 pass = ref_C.
norm2() <= 1.E-6;
109 printf(
"{ C[\"ik\"] += A[\"ij\"]*B[\"jk\"] with A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) } passed \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
111 printf(
"{ C[\"ik\"] += A[\"ij\"]*B[\"jk\"] with A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) } failed \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
115 double min_time = DBL_MAX;
116 double max_time = 0.0;
117 double tot_time = 0.0;
121 printf(
"Starting %d benchmarking iterations of matrix multiplication with specified attributes...\n", niter);
128 for (
int i=0; i<niter; i++){
132 double start_time = MPI_Wtime();
133 C[
"ik"] = A[
"ij"]*B[
"jk"];
135 double end_time = MPI_Wtime();
136 double iter_time = end_time-start_time;
137 times[i] = iter_time;
138 tot_time += iter_time;
139 if (iter_time < min_time) min_time = iter_time;
140 if (iter_time > max_time) max_time = iter_time;
145 printf(
"iterations completed.\n");
146 printf(
"All iterations times: ");
147 for (
int i=0; i<niter; i++){
148 printf(
"%lf ", times[i]);
151 std::sort(times,times+niter);
152 printf(
"A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) Min time = %lf, Avg time = %lf, Med time = %lf, Max time = %lf\n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C,min_time,tot_time/niter, times[niter/2], max_time);
165 char ** itr = std::find(begin, end, option);
166 if (itr != end && ++itr != end){
173 int main(
int argc,
char ** argv){
174 int rank,
np, m, n, k, pass, niter, bench, sym_A, sym_B, sym_C, test;
175 double sp_A, sp_B, sp_C;
176 int const in_num = argc;
177 char ** input_str = argv;
179 MPI_Init(&argc, &argv);
180 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
181 MPI_Comm_size(MPI_COMM_WORLD, &np);
184 m = atoi(
getCmdOption(input_str, input_str+in_num,
"-m"));
189 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
194 k = atoi(
getCmdOption(input_str, input_str+in_num,
"-k"));
198 if (
getCmdOption(input_str, input_str+in_num,
"-sym_A")){
199 sym_A = atoi(
getCmdOption(input_str, input_str+in_num,
"-sym_A"));
200 if (sym_A !=
AS && sym_A !=
SY && sym_A !=
SH) sym_A =
NS;
203 if (
getCmdOption(input_str, input_str+in_num,
"-sym_B")){
204 sym_B = atoi(
getCmdOption(input_str, input_str+in_num,
"-sym_B"));
205 if (sym_B !=
AS && sym_B !=
SY && sym_B !=
SH) sym_B =
NS;
208 if (
getCmdOption(input_str, input_str+in_num,
"-sym_C")){
209 sym_C = atoi(
getCmdOption(input_str, input_str+in_num,
"-sym_C"));
210 if (sym_C !=
AS && sym_C !=
SY && sym_C !=
SH) sym_C =
NS;
213 if (
getCmdOption(input_str, input_str+in_num,
"-sp_A")){
214 sp_A = atof(
getCmdOption(input_str, input_str+in_num,
"-sp_A"));
215 if (sp_A < 0.0 || sp_A > 1.0) sp_A = .2;
218 if (
getCmdOption(input_str, input_str+in_num,
"-sp_B")){
219 sp_B = atof(
getCmdOption(input_str, input_str+in_num,
"-sp_B"));
220 if (sp_B < 0.0 || sp_B > 1.0) sp_B = .2;
223 if (
getCmdOption(input_str, input_str+in_num,
"-sp_C")){
224 sp_C = atof(
getCmdOption(input_str, input_str+in_num,
"-sp_C"));
225 if (sp_C < 0.0 || sp_C > 1.0) sp_C = .2;
228 if (
getCmdOption(input_str, input_str+in_num,
"-niter")){
229 niter = atoi(
getCmdOption(input_str, input_str+in_num,
"-niter"));
230 if (niter < 0) niter = 10;
233 if (
getCmdOption(input_str, input_str+in_num,
"-bench")){
234 bench = atoi(
getCmdOption(input_str, input_str+in_num,
"-bench"));
235 if (bench != 0 && bench != 1) bench = 1;
238 if (
getCmdOption(input_str, input_str+in_num,
"-test")){
239 test = atoi(
getCmdOption(input_str, input_str+in_num,
"-test"));
240 if (test != 0 && test != 1) test = 1;
245 World dw(argc, argv);
248 printf(
"Multiplying A (%d*%d sym %d sp %lf) and B (%d*%d sym %d sp %lf) into C (%d*%d sym %d sp %lf) \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
250 pass =
matmul(m, n, k, dw, sym_A, sym_B, sym_C, sp_A, sp_B, sp_C, test, bench, niter);
int main(int argc, char **argv)
char * getCmdOption(char **begin, char **end, const std::string &option)
Matrix class which encapsulates a 2D tensor.
an instance of the CTF library (world) on a MPI communicator
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor
int matmul(int m, int n, int k, World &dw, int sym_A=NS, int sym_B=NS, int sym_C=NS, double sp_A=1., double sp_B=1., double sp_C=1., bool test=true, bool bench=false, int niter=10)
(if test) tests and (if bench) benchmarks m*n*k matrix multiplication with matrices of specified symm...
epoch during which to measure timers
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...