11 #include "../examples/ccsd.cxx" 12 #include "../examples/sparse_mp3.cxx" 25 for (
int order=2; order<7; order++){
27 while (std::pow(n,order) < sz){
33 int * lens_n = (
int*)malloc(order*
sizeof(
int));
34 int * lens_nm = (
int*)malloc(order*
sizeof(
int));
35 int * lens_nmm = (
int*)malloc(order*
sizeof(
int));
36 char * base_inds = (
char*)malloc((order-1)*
sizeof(char));
37 for (
int i=0; i<order; i++){
51 base_inds[order-2] =
'\0';
52 char * inds_C = (
char*)malloc((order+1)*
sizeof(char));
53 char * inds_A = (
char*)malloc((order+1)*
sizeof(char));
54 char const * inds_M =
"xy";
60 strcpy(inds_A, base_inds);
61 strcpy(inds_C, base_inds);
64 U[inds_C] = T[inds_A]*M[inds_M];
65 strcpy(inds_A, base_inds);
66 strcpy(inds_C, base_inds);
69 V[inds_C] = U[inds_A]*M[inds_M];
71 strcpy(inds_A, base_inds);
72 strcpy(inds_C, base_inds);
75 W[inds_C] = U[inds_A]*M[inds_M];
87 Timer dns_vec_mat(
"dns_vec_mat");
106 A[
"ij"] += A[
"ik"]*G[
"kj"];
107 A[
"ij"] += A[
"ij"]*A1[
"ij"];
108 A[
"ij"] += F[
"ik"]*A[
"kj"];
109 c[
"i"] += A[
"ij"]*b[
"j"];
110 b[
"j"] += .2*A[
"ij"]*c[
"i"];
111 b[
"i"] += b[
"i"]*b[
"i"];
115 A2[
"ij"] = f1(A[
"ij"]);
117 c[
"i"] += f1(A[
"ij"]);
121 A1[
"ij"] -=
f2(A[
"kj"], F[
"ki"]);
131 t2(A[
"ij"],A2[
"ij"]);
143 Timer sps_vec_mat(
"sps_vec_mat");
155 for (
double sp = .01; sp<.32; sp*=2.){
165 B[
"ij"] += A[
"ik"]*G[
"kj"];
166 if (!sp_C) B[
"ij"] += A[
"ij"]*A1[
"ij"];
167 B[
"ij"] += F[
"ik"]*A[
"kj"];
168 c[
"i"] += A[
"ij"]*b[
"j"];
169 b[
"j"] += .2*A[
"ij"]*c[
"i"];
170 if (!sp_C) b[
"i"] += b[
"i"]*b[
"i"];
174 A2[
"ij"] = f1(A[
"ij"]);
176 c[
"i"] += f1(A[
"ij"]);
180 A2[
"ji"] -=
f2(A1[
"ki"], F[
"kj"]);
190 t2(A[
"ij"],A2[
"ij"]);
201 Timer ccsd_t(
"CCSD");
210 T[
"ai"] = (1./T.
ai->
norm2())*T[
"ai"];
211 T[
"abij"] = (1./T.
abij->
norm2())*T[
"abij"];
217 Timer sparse_mp3_t(
"spoarse_mp3");
218 sparse_mp3_t.
start();
221 for (
double sp = .001; sp<.2; sp*=4.){
231 int n0 = 19, m0 = 75;
233 int64_t approx_niter = std::max(1,(
int)(step_size*step_size*10*log(dtime)));
234 double ddtime = dtime/approx_niter;
239 MPI_Comm_rank(MPI_COMM_WORLD, &rnk);
241 double t_st = MPI_Wtime();
244 volatile double ctime = 0.0;
264 ctime = MPI_Wtime() - t_st;
265 MPI_Allreduce(MPI_IN_PLACE, (
void*)&ctime, 1, MPI_DOUBLE, MPI_MAX, dw.
comm);
269 }
while (ctime < ddtime && m<= 1000000);
271 if (niter <= 2 || n>=1000000)
break;
280 void frize(std::set<int> & ps,
int p){
283 for (
int i=2; i<p; i++){
284 if (p%i == 0)
frize(ps, p/i);
290 World dw(MPI_COMM_WORLD);
293 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
303 int color = (int)log2(rank + 1);
304 int end_color = (int)log2(np + 1);
305 int key = rank + 1 - (1<<color);
309 MPI_Comm_split(dw.
comm, color, key, &cm);
313 int num_iterations = 5;
317 double time_jump = 1.5;
319 double dtime = (time / (1- 1/time_jump)) / pow(time_jump, num_iterations - 1.0);
320 for (
int i=0; i<num_iterations; i++){
322 double step_size = 1.0 + 1.5 / pow(2.0, (
double)i);
324 printf(
"Starting iteration %d/%d with dimension increment factor %lf\n", i+1,num_iterations,step_size);
327 if (color != end_color){
331 printf(
"Completed training round 1/5\n");
335 if (color != end_color)
339 printf(
"Completed training round 2/5\n");
341 if (color != end_color){
345 printf(
"Completed training round 3/5\n");
349 if (color != end_color)
353 printf(
"Completed training round 4/5\n");
359 printf(
"Completed training round 5/5\n");
370 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
371 MPI_Comm_size(MPI_COMM_WORLD, &np);
380 char ** itr = std::find(begin, end, option);
381 if (itr != end && ++itr != end){
388 int main(
int argc,
char ** argv){
392 int const in_num = argc;
393 char ** input_str = argv;
395 MPI_Init(&argc, &argv);
396 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
397 MPI_Comm_size(MPI_COMM_WORLD, &np);
399 if (
getCmdOption(input_str, input_str+in_num,
"-write")){
400 file_path =
getCmdOption(input_str, input_str+in_num,
"-write");
401 }
else file_path = NULL;
403 if (
getCmdOption(input_str, input_str+in_num,
"-time")){
404 time = atof(
getCmdOption(input_str, input_str+in_num,
"-time"));
405 if (time < 0) time = 5.0;
410 bool write_coeff =
false;
411 bool dump_data =
false;
415 if (file_path != NULL){
420 char * data_dir = getenv(
"MODEL_DATA_DIR");
429 if(std::find(input_str, input_str+in_num,
std::string(
"-dump")) != input_str + in_num){
434 World dw(MPI_COMM_WORLD, argc, argv);
437 printf(
"Executing a wide set of contractions to train model with time budget of %lf sec\n", time);
438 if (write_coeff) printf(
"At the end of execution write new coefficients will be written to model file %s\n",file_path);
440 train_all(time, write_coeff, dump_data, coeff_file, data_dir_str);
void train_sps_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
void update_all_models(MPI_Comm comm)
void train_ttm(int64_t sz, int64_t r, World &dw)
Matrix class which encapsulates a 2D tensor.
void write_all_models(std::string file_name)
int main(int argc, char **argv)
int sparse_mp3(int nv, int no, World &dw, double sp=.8, bool test=1, int niter=0, bool bnd=1, bool bns=1, bool sparse_T=1)
local process walltime measurement
Vector class which encapsulates a 1D tensor.
an instance of the CTF library (world) on a MPI communicator
double f2(double a, double b)
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir)
void train_dns_vec_mat(int64_t n, int64_t m, World &dw)
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor
void train_off_vec_mat(int64_t n, int64_t m, World &dw, bool sp_A, bool sp_B, bool sp_C)
char * getCmdOption(char **begin, char **end, const std::string &option)
void frize(std::set< int > &ps, int p)
void dump_all_models(std::string path)
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
an instance of a tensor within a CTF world
void train_sparse_mp3(int64_t n, int64_t m, World &dw)
void train_world(double dtime, World &dw, double step_size)
void train_ccsd(int64_t n, int64_t m, World &dw)
void ccsd(Integrals &V, Amplitudes &T, int sched_nparts=0)
int np
number of processors
MPI_Comm comm
set of processors making up this world