11 #include "../examples/ccsd.cxx"    12 #include "../examples/sparse_mp3.cxx"    25   for (
int order=2; order<7; order++){
    27     while (std::pow(n,order) < sz){
    33     int * lens_n = (
int*)malloc(order*
sizeof(
int));
    34     int * lens_nm = (
int*)malloc(order*
sizeof(
int));
    35     int * lens_nmm = (
int*)malloc(order*
sizeof(
int));
    36     char * base_inds = (
char*)malloc((order-1)*
sizeof(char));
    37     for (
int i=0; i<order; i++){
    51     base_inds[order-2] = 
'\0';
    52     char * inds_C = (
char*)malloc((order+1)*
sizeof(char));
    53     char * inds_A = (
char*)malloc((order+1)*
sizeof(char));
    54     char const * inds_M = 
"xy";
    60     strcpy(inds_A, base_inds);
    61     strcpy(inds_C, base_inds);
    64     U[inds_C] = T[inds_A]*M[inds_M];
    65     strcpy(inds_A, base_inds);
    66     strcpy(inds_C, base_inds);
    69     V[inds_C] = U[inds_A]*M[inds_M];
    71     strcpy(inds_A, base_inds);
    72     strcpy(inds_C, base_inds);
    75     W[inds_C] = U[inds_A]*M[inds_M];
    87   Timer dns_vec_mat(
"dns_vec_mat");
   106   A[
"ij"] += A[
"ik"]*G[
"kj"];
   107   A[
"ij"] += A[
"ij"]*A1[
"ij"];
   108   A[
"ij"] += F[
"ik"]*A[
"kj"];
   109   c[
"i"]  += A[
"ij"]*b[
"j"];
   110   b[
"j"]  += .2*A[
"ij"]*c[
"i"];
   111   b[
"i"]  += b[
"i"]*b[
"i"];
   115   A2[
"ij"] = f1(A[
"ij"]);
   117   c[
"i"] += f1(A[
"ij"]);
   121   A1[
"ij"] -= 
f2(A[
"kj"], F[
"ki"]);
   131   t2(A[
"ij"],A2[
"ij"]);
   143   Timer sps_vec_mat(
"sps_vec_mat");
   155   for (
double sp = .01; sp<.32; sp*=2.){
   165     B[
"ij"] += A[
"ik"]*G[
"kj"];
   166     if (!sp_C) B[
"ij"] += A[
"ij"]*A1[
"ij"];
   167     B[
"ij"] += F[
"ik"]*A[
"kj"];
   168     c[
"i"]  += A[
"ij"]*b[
"j"];
   169     b[
"j"]  += .2*A[
"ij"]*c[
"i"];
   170     if (!sp_C) b[
"i"]  += b[
"i"]*b[
"i"];
   174     A2[
"ij"] = f1(A[
"ij"]);
   176     c[
"i"] += f1(A[
"ij"]);
   180     A2[
"ji"] -= 
f2(A1[
"ki"], F[
"kj"]);
   190     t2(A[
"ij"],A2[
"ij"]);
   201   Timer ccsd_t(
"CCSD");
   210   T[
"ai"] = (1./T.
ai->
norm2())*T[
"ai"];
   211   T[
"abij"] = (1./T.
abij->
norm2())*T[
"abij"];
   217   Timer sparse_mp3_t(
"spoarse_mp3");
   218   sparse_mp3_t.
start();
   221   for (
double sp = .001; sp<.2; sp*=4.){
   231   int n0 = 19, m0 = 75;
   233   int64_t approx_niter = std::max(1,(
int)(step_size*step_size*10*log(dtime))); 
   234   double ddtime = dtime/approx_niter;
   239   MPI_Comm_rank(MPI_COMM_WORLD, &rnk);
   241     double t_st = MPI_Wtime();
   244     volatile double ctime = 0.0;
   264       ctime = MPI_Wtime() - t_st;
   265       MPI_Allreduce(MPI_IN_PLACE, (
void*)&ctime, 1, MPI_DOUBLE, MPI_MAX, dw.
comm);
   269     } 
while (ctime < ddtime && m<= 1000000);
   271     if (niter <= 2 || n>=1000000) 
break;
   280 void frize(std::set<int> & ps, 
int p){
   283     for (
int i=2; i<p; i++){
   284       if (p%i == 0) 
frize(ps, p/i);
   290   World dw(MPI_COMM_WORLD);
   293   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   303   int color = (int)log2(rank + 1);
   304   int end_color =  (int)log2(np + 1);
   305   int key = rank + 1 - (1<<color);
   309   MPI_Comm_split(dw.
comm, color, key, &cm);
   313   int num_iterations = 5;
   317   double time_jump = 1.5;
   319   double dtime = (time / (1- 1/time_jump)) / pow(time_jump, num_iterations - 1.0);
   320   for (
int i=0; i<num_iterations; i++){
   322     double step_size = 1.0 + 1.5 / pow(2.0, (
double)i);
   324       printf(
"Starting iteration %d/%d with dimension increment factor %lf\n", i+1,num_iterations,step_size);
   327     if (color != end_color){
   331         printf(
"Completed training round 1/5\n");
   335     if (color != end_color)
   339       printf(
"Completed training round 2/5\n");
   341     if (color != end_color){
   345         printf(
"Completed training round 3/5\n");
   349     if (color != end_color)
   353       printf(
"Completed training round 4/5\n");
   359       printf(
"Completed training round 5/5\n");
   370     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   371     MPI_Comm_size(MPI_COMM_WORLD, &np);
   380   char ** itr = std::find(begin, end, option);
   381   if (itr != end && ++itr != end){
   388 int main(
int argc, 
char ** argv){
   392   int const in_num = argc;
   393   char ** input_str = argv;
   395   MPI_Init(&argc, &argv);
   396   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   397   MPI_Comm_size(MPI_COMM_WORLD, &np);
   399   if (
getCmdOption(input_str, input_str+in_num, 
"-write")){
   400     file_path = 
getCmdOption(input_str, input_str+in_num, 
"-write");
   401   } 
else file_path = NULL;
   403   if (
getCmdOption(input_str, input_str+in_num, 
"-time")){
   404     time = atof(
getCmdOption(input_str, input_str+in_num, 
"-time"));
   405     if (time < 0) time = 5.0;
   410   bool write_coeff = 
false;
   411   bool dump_data = 
false;
   415   if (file_path != NULL){
   420   char * data_dir = getenv(
"MODEL_DATA_DIR");
   429   if(std::find(input_str, input_str+in_num, 
std::string(
"-dump")) != input_str + in_num){
   434     World dw(MPI_COMM_WORLD, argc, argv);
   437       printf(
"Executing a wide set of contractions to train model with time budget of %lf sec\n", time);
   438       if (write_coeff) printf(
"At the end of execution write new coefficients will be written to model file %s\n",file_path);
   440     train_all(time, write_coeff, dump_data, coeff_file, data_dir_str);
 void train_sps_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
void update_all_models(MPI_Comm comm)
void train_ttm(int64_t sz, int64_t r, World &dw)
Matrix class which encapsulates a 2D tensor. 
void write_all_models(std::string file_name)
int main(int argc, char **argv)
int sparse_mp3(int nv, int no, World &dw, double sp=.8, bool test=1, int niter=0, bool bnd=1, bool bns=1, bool sparse_T=1)
local process walltime measurement 
Vector class which encapsulates a 1D tensor. 
an instance of the CTF library (world) on a MPI communicator 
double f2(double a, double b)
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!) 
void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir)
void train_dns_vec_mat(int64_t n, int64_t m, World &dw)
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor 
void train_off_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
char * getCmdOption(char **begin, char **end, const std::string &option)
void frize(std::set< int > &ps, int p)
void dump_all_models(std::string path)
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
an instance of a tensor within a CTF world 
void train_sparse_mp3(int64_t n, int64_t m, World &dw)
void train_world(double dtime, World &dw, double step_size)
void train_ccsd(int64_t n, int64_t m, World &dw)
void ccsd(Integrals &V, Amplitudes &T, int sched_nparts=0)
int np
number of processors 
MPI_Comm comm
set of processors making up this world