41   assert(test || bench);
    43   int sA = sp_A < 1. ? 
SP : 0;
    44   int sB = sp_B < 1. ? 
SP : 0;
    45   int sC = sp_C < 1. ? 
SP : 0;
    77     ref_A[
"ij"] = A[
"ij"];
    78     ref_B[
"ij"] = B[
"ij"];
    79     ref_C[
"ij"] = C[
"ij"];
    84         ref_C[
"ik"] += ref_A[
"ij"]*ref_B[
"jk"];
    88         ref_C[
"ik"] += ref_A[
"ij"]*ref_B[
"jk"];
    89         ref_C[
"ik"] += ref_A[
"kj"]*ref_B[
"ji"];
    90         if (sym_C == 
SH) ref_C[
"ii"] = 0.0;
    93         ref_C[
"ik"] += ref_A[
"ij"]*ref_B[
"jk"];
    94         ref_C[
"ik"] -= ref_A[
"kj"]*ref_B[
"ji"];
    98     C[
"ik"] += .5*A[
"ij"]*B[
"jk"];
    99     C[
"ik"] += .5*B[
"jk"]*A[
"ij"];
   103     ref_C[
"ik"] -= C[
"ik"];
   105     pass = ref_C.
norm2() <= 1.E-6;
   109         printf(
"{ C[\"ik\"] += A[\"ij\"]*B[\"jk\"] with A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) } passed \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
   111         printf(
"{ C[\"ik\"] += A[\"ij\"]*B[\"jk\"] with A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) } failed \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
   115     double min_time = DBL_MAX;
   116     double max_time = 0.0;
   117     double tot_time = 0.0;
   121       printf(
"Starting %d benchmarking iterations of matrix multiplication with specified attributes...\n", niter);
   128     for (
int i=0; i<niter; i++){
   132       double start_time = MPI_Wtime();
   133       C[
"ik"] = A[
"ij"]*B[
"jk"];
   135       double end_time = MPI_Wtime();
   136       double iter_time = end_time-start_time;
   137       times[i] = iter_time;
   138       tot_time += iter_time;
   139       if (iter_time < min_time) min_time = iter_time;
   140       if (iter_time > max_time) max_time = iter_time;
   145       printf(
"iterations completed.\n");
   146       printf(
"All iterations times: ");
   147       for (
int i=0; i<niter; i++){
   148         printf(
"%lf ", times[i]);
   151       std::sort(times,times+niter);
   152       printf(
"A (%d*%d sym %d sp %lf), B (%d*%d sym %d sp %lf), C (%d*%d sym %d sp %lf) Min time = %lf, Avg time = %lf, Med time = %lf, Max time = %lf\n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C,min_time,tot_time/niter, times[niter/2], max_time);
   165   char ** itr = std::find(begin, end, option);
   166   if (itr != end && ++itr != end){
   173 int main(
int argc, 
char ** argv){
   174   int rank, 
np, m, n, k, pass, niter, bench, sym_A, sym_B, sym_C, test;
   175   double sp_A, sp_B, sp_C;
   176   int const in_num = argc;
   177   char ** input_str = argv;
   179   MPI_Init(&argc, &argv);
   180   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   181   MPI_Comm_size(MPI_COMM_WORLD, &np);
   184     m = atoi(
getCmdOption(input_str, input_str+in_num, 
"-m"));
   189     n = atoi(
getCmdOption(input_str, input_str+in_num, 
"-n"));
   194     k = atoi(
getCmdOption(input_str, input_str+in_num, 
"-k"));
   198   if (
getCmdOption(input_str, input_str+in_num, 
"-sym_A")){
   199     sym_A = atoi(
getCmdOption(input_str, input_str+in_num, 
"-sym_A"));
   200     if (sym_A != 
AS && sym_A != 
SY && sym_A != 
SH) sym_A = 
NS;
   203   if (
getCmdOption(input_str, input_str+in_num, 
"-sym_B")){
   204     sym_B = atoi(
getCmdOption(input_str, input_str+in_num, 
"-sym_B"));
   205     if (sym_B != 
AS && sym_B != 
SY && sym_B != 
SH) sym_B = 
NS;
   208   if (
getCmdOption(input_str, input_str+in_num, 
"-sym_C")){
   209     sym_C = atoi(
getCmdOption(input_str, input_str+in_num, 
"-sym_C"));
   210     if (sym_C != 
AS && sym_C != 
SY && sym_C != 
SH) sym_C = 
NS;
   213   if (
getCmdOption(input_str, input_str+in_num, 
"-sp_A")){
   214     sp_A = atof(
getCmdOption(input_str, input_str+in_num, 
"-sp_A"));
   215     if (sp_A < 0.0 || sp_A > 1.0) sp_A = .2;
   218   if (
getCmdOption(input_str, input_str+in_num, 
"-sp_B")){
   219     sp_B = atof(
getCmdOption(input_str, input_str+in_num, 
"-sp_B"));
   220     if (sp_B < 0.0 || sp_B > 1.0) sp_B = .2;
   223   if (
getCmdOption(input_str, input_str+in_num, 
"-sp_C")){
   224     sp_C = atof(
getCmdOption(input_str, input_str+in_num, 
"-sp_C"));
   225     if (sp_C < 0.0 || sp_C > 1.0) sp_C = .2;
   228   if (
getCmdOption(input_str, input_str+in_num, 
"-niter")){
   229     niter = atoi(
getCmdOption(input_str, input_str+in_num, 
"-niter"));
   230     if (niter < 0) niter = 10;
   233   if (
getCmdOption(input_str, input_str+in_num, 
"-bench")){
   234     bench = atoi(
getCmdOption(input_str, input_str+in_num, 
"-bench"));
   235     if (bench != 0 && bench != 1) bench = 1;
   238   if (
getCmdOption(input_str, input_str+in_num, 
"-test")){
   239     test = atoi(
getCmdOption(input_str, input_str+in_num, 
"-test"));
   240     if (test != 0 && test != 1) test = 1;
   245     World dw(argc, argv);
   248       printf(
"Multiplying A (%d*%d sym %d sp %lf) and B (%d*%d sym %d sp %lf) into C (%d*%d sym %d sp %lf) \n",m,k,sym_A,sp_A,k,n,sym_B,sp_B,m,n,sym_C,sp_C);
   250     pass = 
matmul(m, n, k, dw, sym_A, sym_B, sym_C, sp_A, sp_B, sp_C, test, bench, niter);
 
int main(int argc, char **argv)
char * getCmdOption(char **begin, char **end, const std::string &option)
Matrix class which encapsulates a 2D tensor. 
an instance of the CTF library (world) on a MPI communicator 
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!) 
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor 
int matmul(int m, int n, int k, World &dw, int sym_A=NS, int sym_B=NS, int sym_C=NS, double sp_A=1., double sp_B=1., double sp_C=1., bool test=true, bool bench=false, int niter=10)
(if test) tests and (if bench) benchmarks m*n*k matrix multiplication with matrices of specified symm...
epoch during which to measure timers 
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...