ctf/model__trainer_8cxx_source.html

 #include <ctf.hpp>
 #define TEST_SUITE
 #include "../examples/ccsd.cxx"
 #include "../examples/sparse_mp3.cxx"
 #undef TEST_SUITE
 using namespace CTF;

 namespace CTF_int{
   void update_all_models(MPI_Comm comm);
 }

 void train_off_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, bool sp_C);

 void train_ttm(int64_t sz, int64_t r, World & dw){
   Timer TTM("TTM");
   TTM.start();
   for (int order=2; order<7; order++){
     int64_t n = 1;
     while (std::pow(n,order) < sz){
       n++;
     }
     int64_t m = r;
     Matrix<> M(n,m,dw);
     M.fill_random(-.5,.5);
     int * lens_n = (int*)malloc(order*sizeof(int));
     int * lens_nm = (int*)malloc(order*sizeof(int));
     int * lens_nmm = (int*)malloc(order*sizeof(int));
     char * base_inds = (char*)malloc((order-1)*sizeof(char));
     for (int i=0; i<order; i++){
       if (i<order-2)
         base_inds[i] = 'a'+i;
       lens_n[i] = n;
       lens_nm[i] = n;
       lens_nmm[i] = n;

       if (i>=order-2){
         lens_nmm[i] = m;
       }
       if (i>=order-1){
         lens_nm[i] = m;
       }
     }
     base_inds[order-2] = '\0';
     char * inds_C = (char*)malloc((order+1)*sizeof(char));
     char * inds_A = (char*)malloc((order+1)*sizeof(char));
     char const * inds_M = "xy";
     Tensor<> T(order,lens_n,dw);
     Tensor<> U(order,lens_nm,dw);
     Tensor<> V(order,lens_nmm,dw);
     Tensor<> W(order-1,lens_nmm,dw);
     T.fill_random(-.2,.8);
     strcpy(inds_A, base_inds);
     strcpy(inds_C, base_inds);
     strcat(inds_A, "zx");
     strcat(inds_C, "zy");
     U[inds_C] = T[inds_A]*M[inds_M];
     strcpy(inds_A, base_inds);
     strcpy(inds_C, base_inds);
     strcat(inds_A, "xq");
     strcat(inds_C, "yq");
     V[inds_C] = U[inds_A]*M[inds_M];
     //include one weigh index
     strcpy(inds_A, base_inds);
     strcpy(inds_C, base_inds);
     strcat(inds_A, "xy");
     strcat(inds_C, "y");
     W[inds_C] = U[inds_A]*M[inds_M];
     free(lens_n);
     free(lens_nm);
     free(lens_nmm);
     free(base_inds);
     free(inds_C);
     free(inds_A);
   }
   TTM.stop();
 }

 void train_dns_vec_mat(int64_t n, int64_t m, World & dw){
   Timer dns_vec_mat("dns_vec_mat");
   dns_vec_mat.start();
   Vector<> b(n, dw);
   Vector<> c(m, dw);
   Matrix<> A(m, n, dw);
   Matrix<> A1(m, n, dw);
   Matrix<> A2(m, n, dw);
   Matrix<> G(n, n, SY, dw);
   Matrix<> F(m, m, AS, dw);

   srand48(dw.rank);
   b.fill_random(-.5, .5);
   c.fill_random(-.5, .5);
   A.fill_random(-.5, .5);
   A1.fill_random(-.5, .5);
   A2.fill_random(-.5, .5);
   G.fill_random(-.5, .5);
   F.fill_random(-.5, .5);

   A["ij"] += A["ik"]*G["kj"];
   A["ij"] += A["ij"]*A1["ij"];
   A["ij"] += F["ik"]*A["kj"];
   c["i"]  += A["ij"]*b["j"];
   b["j"]  += .2*A["ij"]*c["i"];
   b["i"]  += b["i"]*b["i"];

   Function<> f1([](double a){ return a*a; });

   A2["ij"] = f1(A["ij"]);

   c["i"] += f1(A["ij"]);

   Function<> f2([](double a, double b){ return a*a+b*b; });

   A1["ij"] -= f2(A["kj"], F["ki"]);

   Transform<> t1([](double & a){ a*=a; });

   t1(b["i"]);
   t1(A["ij"]);

   Transform<> t2([](double a, double & b){ b-=b/a; });

   t2(b["i"],b["i"]);
   t2(A["ij"],A2["ij"]);


   /*Transform<> t3([](double a, double b, double & c){ c=c*c-b*a; });

   t3(c["i"],b["i"],b["i"]);
   t3(A["ij"],G["ij"],F["ij"]);*/
   dns_vec_mat.stop();
 }


 void train_sps_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, bool sp_C){
   Timer sps_vec_mat("sps_vec_mat");
   sps_vec_mat.start();
   Vector<> b(n, dw);
   Vector<> c(m, dw);
   Matrix<> A(m, n, dw);
   Matrix<> B(m, n, dw);
   Matrix<> A1(m, n, dw);
   Matrix<> A2(m, n, dw);
   Matrix<> G(n, n, NS, dw);
   Matrix<> F(m, m, NS, dw);

   srand48(dw.rank);
   for (double sp = .01; sp<.32; sp*=2.){
     b.fill_sp_random(-.5, .5, sp);
     c.fill_sp_random(-.5, .5, sp);
     A.fill_sp_random(-.5, .5, sp);
     B.fill_sp_random(-.5, .5, sp);
     A1.fill_sp_random(-.5, .5, sp);
     A2.fill_sp_random(-.5, .5, sp);
     G.fill_sp_random(-.5, .5, sp);
     F.fill_sp_random(-.5, .5, sp);

     B["ij"] += A["ik"]*G["kj"];
     if (!sp_C) B["ij"] += A["ij"]*A1["ij"];
     B["ij"] += F["ik"]*A["kj"];
     c["i"]  += A["ij"]*b["j"];
     b["j"]  += .2*A["ij"]*c["i"];
     if (!sp_C) b["i"]  += b["i"]*b["i"];

     Function<> f1([](double a){ return a*a; });

     A2["ij"] = f1(A["ij"]);

     c["i"] += f1(A["ij"]);

     Function<> f2([](double a, double b){ return a*a+b*b; });

     A2["ji"] -= f2(A1["ki"], F["kj"]);

     Transform<> t1([](double & a){ a*=a; });

     t1(b["i"]);
     t1(A["ij"]);

     Transform<> t2([](double a, double & b){ b-=b/a; });

     t2(b["i"],b["i"]);
     t2(A["ij"],A2["ij"]);

     /*Transform<> t3([](double a, double b, double & c){ c=c*c-b*a; });

     t3(c["i"],b["i"],b["i"]);
     t3(A["ij"],G["ij"],F["ij"]);*/
   }
   sps_vec_mat.stop();
 }

 void train_ccsd(int64_t n, int64_t m, World & dw){
   Timer ccsd_t("CCSD");
   ccsd_t.start();
   int nv = sqrt(n);
   int no = sqrt(m);
   Integrals V(no, nv, dw);
   V.fill_rand();
   Amplitudes T(no, nv, dw);
   T.fill_rand();
   ccsd(V,T,0);
   T["ai"] = (1./T.ai->norm2())*T["ai"];
   T["abij"] = (1./T.abij->norm2())*T["abij"];
   ccsd_t.stop();
 }


 void train_sparse_mp3(int64_t n, int64_t m, World & dw){
   Timer sparse_mp3_t("spoarse_mp3");
   sparse_mp3_t.start();
   int nv = sqrt(n);
   int no = sqrt(m);
   for (double sp = .001; sp<.2; sp*=4.){
     sparse_mp3(nv, no, dw, sp, 0, 1, 1, 0, 0);
     sparse_mp3(nv, no, dw, sp, 0, 1, 0, 1, 0);
     sparse_mp3(nv, no, dw, sp, 0, 1, 0, 1, 1);
   }
   sparse_mp3_t.stop();
 }


 void train_world(double dtime, World & dw, double step_size){
   int n0 = 19, m0 = 75;
   int64_t n = n0;
   int64_t approx_niter = std::max(1,(int)(step_size*step_size*10*log(dtime))); //log((dtime*2000./15.)/dw.np);
   double ddtime = dtime/approx_niter;

   // Question # 1:
   // ddtime = dime / (10*log(dtime)), which is a function that increase really slow
   int rnk;
   MPI_Comm_rank(MPI_COMM_WORLD, &rnk);
   for (;;){
     double t_st = MPI_Wtime();
     int niter = 0;
     int64_t m = m0;
     volatile double ctime = 0.0;
     do {
       if (n<80){
         train_ttm(n*m+13,n,dw);
       }
       train_dns_vec_mat(n, m, dw);
       train_sps_vec_mat(n-2, m, dw, 0, 0, 0);
       train_sps_vec_mat(n-4, m-2, dw, 1, 0, 0);
       train_sps_vec_mat(n-1, m-4, dw, 1, 1, 0);
       train_sps_vec_mat(n-2, m-3, dw, 1, 1, 1);
       train_off_vec_mat(n+7, m-4, dw, 0, 0, 0);
       train_off_vec_mat(n-2, m+6, dw, 1, 0, 0);
       train_off_vec_mat(n-5, m+2, dw, 1, 1, 0);
       train_off_vec_mat(n-3, m-1, dw, 1, 1, 1);
       train_ccsd(n/2, m/2, dw);
       train_sparse_mp3(n,m,dw);
       niter++;
       // m *= 1.9;
       m *= step_size;
       n += 2;
       ctime = MPI_Wtime() - t_st;
       MPI_Allreduce(MPI_IN_PLACE, (void*)&ctime, 1, MPI_DOUBLE, MPI_MAX, dw.comm);

       //printf("rank = %d executing p = %d n= %ld m = %ld ctime = %lf ddtime = %lf\n", rnk, dw.np, n, m, ctime, ddtime);

     } while (ctime < ddtime && m<= 1000000);

     if (niter <= 2 || n>=1000000) break;
     // n *= 1.7;
     n *= step_size;
     m += 3;
     // Question # 2:
     // If m is reassigned to m0 in the for loop, why is this necessary?
   }
 }

 void frize(std::set<int> & ps, int p){
   ps.insert(p);
   if (p>=1){
     for (int i=2; i<p; i++){
       if (p%i == 0) frize(ps, p/i);
     }
   }
 }

 void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir){
   World dw(MPI_COMM_WORLD);
   int np = dw.np;
   int rank;
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   /* compute membership for each process
   first process belongs to group 0
   next 2 belong to group 1
   next 4 belong to group 2
   next 8 belong to group 3
   and so on
   */

   int color = (int)log2(rank + 1);
   int end_color =  (int)log2(np + 1);
   int key = rank + 1 - (1<<color);

   // split out the communicator
   int cm;
   MPI_Comm_split(dw.comm, color, key, &cm);
   World w(cm);

   // number of iterations for training
   int num_iterations = 5;

   // control how much dtime should be increased upon each iteration
   // dtime = dtime * time_dump at the end of each iteration
   double time_jump = 1.5;

   double dtime = (time / (1- 1/time_jump)) / pow(time_jump, num_iterations - 1.0);
   for (int i=0; i<num_iterations; i++){
     // TODO probably need to adjust
     double step_size = 1.0 + 1.5 / pow(2.0, (double)i);
     if (rank == 0){
       printf("Starting iteration %d/%d with dimension increment factor %lf\n", i+1,num_iterations,step_size);
     }
     // discard the last process
     if (color != end_color){
       train_world(dtime/5, w, step_size);
       CTF_int::update_all_models(cm);
       if (rank == 0){
         printf("Completed training round 1/5\n");
       }
     }

     if (color != end_color)
       train_world(dtime/5, w, step_size);
     CTF_int::update_all_models(MPI_COMM_WORLD);
     if (rank == 0){
       printf("Completed training round 2/5\n");
     }
     if (color != end_color){
       train_world(dtime/5, w, step_size);
       CTF_int::update_all_models(cm);
       if (rank == 0){
         printf("Completed training round 3/5\n");
       }
     }

     if (color != end_color)
       train_world(dtime/5, w, step_size);
     CTF_int::update_all_models(MPI_COMM_WORLD);
     if (rank == 0){
       printf("Completed training round 4/5\n");
     }
     train_world(dtime/5, dw, step_size);
     CTF_int::update_all_models(MPI_COMM_WORLD);

     if (rank == 0){
       printf("Completed training round 5/5\n");
     }
     // double dtime for next iteration
     dtime *= time_jump;
   }


   if(write_coeff)
     CTF_int::write_all_models(coeff_file);
   if(dump_data){
     int rank, np;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &np);
     CTF_int::dump_all_models(data_dir);
   }
   MPI_Comm_free(&cm);
 }

 char* getCmdOption(char ** begin,
                    char ** end,
                    const   std::string & option){
   char ** itr = std::find(begin, end, option);
   if (itr != end && ++itr != end){
     return *itr;
   }
   return 0;
 }


 int main(int argc, char ** argv){
   int rank, np;
   double time;
   char * file_path;
   int const in_num = argc;
   char ** input_str = argv;

   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &np);

   if (getCmdOption(input_str, input_str+in_num, "-write")){
     file_path = getCmdOption(input_str, input_str+in_num, "-write");
   } else file_path = NULL;

   if (getCmdOption(input_str, input_str+in_num, "-time")){
     time = atof(getCmdOption(input_str, input_str+in_num, "-time"));
     if (time < 0) time = 5.0;
   } else time = 5.0;


   // Boolean expression that are used to pass command line argument to function train_all
   bool write_coeff = false;
   bool dump_data = false;

   // Get the environment variable FILE_PATH
   std::string coeff_file;
   if (file_path != NULL){
     write_coeff = true;
     coeff_file = std::string(file_path);
   }

   char * data_dir = getenv("MODEL_DATA_DIR");
   std::string data_dir_str;
   if(!data_dir){
     data_dir_str = std::string("./src/shared/data");
   }
   else{
     data_dir_str = std::string(data_dir);
   }

   if(std::find(input_str, input_str+in_num, std::string("-dump")) != input_str + in_num){
     dump_data = true;
   }

   {
     World dw(MPI_COMM_WORLD, argc, argv);

     if (rank == 0){
       printf("Executing a wide set of contractions to train model with time budget of %lf sec\n", time);
       if (write_coeff) printf("At the end of execution write new coefficients will be written to model file %s\n",file_path);
     }
     train_all(time, write_coeff, dump_data, coeff_file, data_dir_str);
   }


   MPI_Finalize();
   return 0;
 }

train_sps_vec_mat
void train_sps_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
Definition: model_trainer.cxx:142

CTF::Transform
Definition: functions.h:488

CTF_int::update_all_models
void update_all_models(MPI_Comm comm)
Definition: model.cxx:15

Amplitudes::fill_rand
void fill_rand()
Definition: ccsd.cxx:179

CTF::comm
MPI_Comm comm
Definition: int_timer.cxx:22

CTF::Function
Definition: functions.h:441

ctf.hpp

train_ttm
void train_ttm(int64_t sz, int64_t r, World &dw)
Definition: model_trainer.cxx:22

CTF::Matrix
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18

CTF_int::write_all_models
void write_all_models(std::string file_name)
Definition: model.cxx:42

main
int main(int argc, char **argv)
Definition: model_trainer.cxx:388

ctf.core.rank
def rank(self)
Definition: core.pyx:312

Amplitudes::ai
Tensor * ai
Definition: ccsd.cxx:156

sparse_mp3
int sparse_mp3(int nv, int no, World &dw, double sp=.8, bool test=1, int niter=0, bool bnd=1, bool bns=1, bool sparse_T=1)
Definition: sparse_mp3.cxx:96

Integrals::fill_rand
void fill_rand()
Definition: ccsd.cxx:93

CTF::Timer::stop
void stop()
Definition: int_timer.cxx:151

ctf.core.b
b
Definition: core.pyx:386

CTF::Timer
local process walltime measurement
Definition: timer.h:50

CTF::Vector
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14

NS
Definition: common.h:37

CTF::World
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19

Integrals
Definition: ccsd.cxx:18

f2
double f2(double a, double b)
Definition: bivar_function.cxx:13

ctf.core.string
string
Definition: core.pyx:456

CTF::Tensor::norm2
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811

train_all
void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir)
Definition: model_trainer.cxx:289

ctf.core.a
a
Definition: core.pyx:385

train_dns_vec_mat
void train_dns_vec_mat(int64_t n, int64_t m, World &dw)
Definition: model_trainer.cxx:86

CTF::Tensor::fill_random
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928

CTF::World::rank
int rank
rank of local processor
Definition: world.h:24

train_off_vec_mat
void train_off_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
Definition: model_trainer_kernels.cxx:23

getCmdOption
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: model_trainer.cxx:377

frize
void frize(std::set< int > &ps, int p)
Definition: model_trainer.cxx:280

Amplitudes::abij
Tensor * abij
Definition: ccsd.cxx:157

CTF_int::dump_all_models
void dump_all_models(std::string path)
Definition: model.cxx:50

CTF::Tensor::fill_sp_random
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
Definition: tensor.cxx:969

Amplitudes
Definition: ccsd.cxx:154

CTF::Timer::start
void start()
Definition: int_timer.cxx:141

CTF
Definition: apsp.cxx:17

CTF::Tensor
an instance of a tensor within a CTF world
Definition: tensor.h:74

ctf.core.w
w
Definition: core.pyx:307

train_sparse_mp3
void train_sparse_mp3(int64_t n, int64_t m, World &dw)
Definition: model_trainer.cxx:216

key
int64_t key
Definition: back_comp.h:66

train_world
void train_world(double dtime, World &dw, double step_size)
Definition: model_trainer.cxx:230

SY
Definition: common.h:37

train_ccsd
void train_ccsd(int64_t n, int64_t m, World &dw)
Definition: model_trainer.cxx:200

CTF_int
Definition: model_trainer.cxx:16

AS
Definition: common.h:37

ccsd
void ccsd(Integrals &V, Amplitudes &T, int sched_nparts=0)
Definition: ccsd.cxx:200

CTF::World::np
int np
number of processors
Definition: world.h:26

CTF::World::comm
MPI_Comm comm
set of processors making up this world
Definition: world.h:22

ctf.core.np
def np(self)
Definition: core.pyx:315