Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
ao_mo_transf.cxx
Go to the documentation of this file.
1 
7 #include <ctf.hpp>
8 #include <float.h>
9 using namespace CTF;
10 
20 template <typename dtype>
22  int n = C.nrow;
23  int m = C.ncol;
24 
25  int sym_aabb[] = {AS, NS, AS, NS};
26  int sym_acbb[] = {NS, NS, AS, NS};
27  int sym_aabc[] = {AS, NS, NS, NS};
28 
29  int len_U1[] = {m, n, n, n};
30  Tensor<dtype> U1(4, len_U1, sym_acbb, *U.wrld);
31  U1["ajkl"] += U["ijkl"]*C["ia"];
32 
33  int len_U2[] = {m, m, n, n};
34  Tensor<dtype> U2_NS(4, len_U2, sym_acbb, *U.wrld);
35 
36  U2_NS["abkl"] += U1["ajkl"]*C["jb"];
37 
38  U1.free_self();
39 
40  Tensor<dtype> U2(U2_NS, sym_aabb);
41 
42  U2_NS.free_self();
43 
44  int len_U3[] = {m, m, m, n};
45  Tensor<dtype> U3(4, len_U3, sym_aabc, *U.wrld);
46 
47  U3["abcl"] += U2["abkl"]*C["kc"];
48 
49  U2.free_self();
50 
51  int len_V[] = {m, m, m, m};
52  Tensor<dtype> V_NS(4, len_V, sym_aabc, *U.wrld);
53 
54  V_NS["abcd"] += U3["abcl"]*C["ld"];
55 
56  U3.free_self();
57 
58  Tensor<dtype> V(V_NS, sym_aabb);
59 
60  return V;
61 }
62 
69 template <typename dtype>
71  int n = C.nrow;
72  int m = C.ncol;
73  int k = U.lens[3];
74 
75  int sym_aabb[] = {AS, NS, NS, NS};
76  int sym_acbb[] = {NS, NS, NS, NS};
77  int sym_aabc[] = {AS, NS, NS, NS};
78 
79  int len_U1[] = {m, n, n, k};
80  Tensor<dtype> U1(4, len_U1, sym_acbb, *U.wrld);
81  U1["ajkl"] += U["ijkl"]*C["ia"];
82 
83  int len_U2[] = {m, m, n, k};
84  Tensor<dtype> U2_NS(4, len_U2, sym_acbb, *U.wrld);
85 
86  U2_NS["abkl"] += U1["ajkl"]*C["jb"];
87 
88  U1.free_self();
89 
90  Tensor<dtype> U2(U2_NS, sym_aabb);
91 
92  U2_NS.free_self();
93 
94  int len_U3[] = {m, m, m, k};
95  Tensor<dtype> U3(4, len_U3, sym_aabc, *U.wrld);
96 
97  U3["abcl"] += U2["abkl"]*C["kc"];
98  // do same contraction again as dummy, in reality would have to write to disk then read back and reorder
99  U3["abcl"] += U2["abkl"]*C["kc"];
100 
101  U2.free_self();
102 
103 /* int len_V[] = {m, m, m, k};
104 
105  Tensor<dtype> V_NS(4, len_V, sym_aabc, *U.wrld);
106  for (int i =0; i<m/k; i++){
107  Tensor<dtype> Cslice = C.slice(i*n*k,(i+1)*n*k+k-1-n);
108  printf("%d,%d\n",Cslice.lens[0],Cslice.lens[1]);
109  printf("%d,%d,%d,%d\n",U3.lens[0],U3.lens[1],U3.lens[2],U3.lens[3]);
110 
111  V_NS["abcd"] += U3["abcl"]*Cslice["ld"];
112  }
113 
114  U3.free_self();
115 
116  Tensor<dtype> V(V_NS, sym_aabb);*/
117 
118  return U2;
119 }
120 
121 
122 void test_ao_mo_transf(int n, int m, int k, MPI_Comm cm=MPI_COMM_WORLD, bool flt_test = true, bool ns_test = true){
123  World dw(cm);
124  int lens_U[] = {n, n, n, n};
125  int sym_aabb[] = {AS, NS, AS, NS};
126 
127  Tensor<> U(4, lens_U, sym_aabb, dw);
128  U.fill_random(-1., 1.);
129 
130  Matrix<> C(n, m, dw);
131  C.fill_random(-1., 1.);
132 
133  double t_st = MPI_Wtime();
134  Tensor<> V = ao_mo_transf_naive(U, C);
135  double t_end = MPI_Wtime();
136 
137  if (dw.rank == 0)
138  printf("AO to MO transformation with antisymmetry in double precision took %lf sec\n", t_end-t_st);
139 
140  if (flt_test){
141  Tensor<float> U_flt(4, lens_U, sym_aabb, dw);
142  Matrix<float> C_flt(n, m, dw);
143  U_flt["ijkl"] = Function<double,float>([](double a){ return (float)a; })(U["ijkl"]);
144  C_flt["ij"] = Function<double,float>([](double a){ return (float)a; })(C["ij"]);
145  t_st = MPI_Wtime();
146  Tensor<float> V_flt = ao_mo_transf_naive(U_flt, C_flt);
147  t_end = MPI_Wtime();
148  if (dw.rank == 0)
149  printf("AO to MO transformation with antisymmetry in single precision took %lf sec\n", t_end-t_st);
150  Tensor<> V2(V);
151  V2["ijkl"] += Function<float,double>([](float a){ return -(double)a; })(V_flt["ijkl"]);
152  double frob_norm = V2.norm2();
153  if (dw.rank == 0)
154  printf("Frobenius norm of error in float vs double is %E\n", frob_norm);
155  }
156 
157  if (ns_test){
158  int lens_V[] = {m, m, m, m};
159  Tensor<> U_NS(4, lens_U, dw);
160  Tensor<> V_NS(4, lens_V, dw);
161  U_NS["ijkl"] = U["ijkl"];
162  t_st = MPI_Wtime();
163  V_NS["abcd"] = C["ia"]*C["jb"]*C["kc"]*C["ld"]*U_NS["ijkl"];
164  t_end = MPI_Wtime();
165  if (dw.rank == 0)
166  printf("AO to MO transformation without symmetry and with dynamic intermediates in double precision took %lf sec\n", t_end-t_st);
167  V_NS["abcd"] -= V["abcd"];
168  double frob_norm = V_NS.norm2();
169  if (dw.rank == 0)
170  printf("Frobenius norm of error in nonsymmetric vs antisymmetric is %E\n", frob_norm);
171  }
172 }
173 
174 template <typename dtype>
175 void bench_ao_mo_transf(int n, int m, int k){
176  World dw(MPI_COMM_WORLD);
177  int lens_U[] = {n, n, n, k};
178  int sym_aabb[] = {AS, NS, AS, NS};
179  if (n!=k) sym_aabb[2] = NS;
180 
181  Tensor<dtype> U(4, lens_U, sym_aabb, dw);
182  U.fill_random(-1., 1.);
183 
184  Matrix<dtype> C(n, m, dw);
185  C.fill_random(-1., 1.);
186 
187  double t_st = MPI_Wtime();
188  if (n==k)
190  else
192  double t_end = MPI_Wtime();
193 
194  if (sizeof(dtype) == 4){
195  if (dw.rank == 0)
196  printf("AO to MO transformation n=%d m=%d k=%d with antisymmetry in single precision took %lf sec\n", n,m,k,t_end-t_st);
197  } else {
198  assert(sizeof(dtype) == 8);
199  if (dw.rank == 0){
200  printf("AO to MO transformation n=%d m=%d k=%d with antisymmetry in double precision took %lf sec\n", n,m,k,t_end-t_st);
201  printf("Overall AO to MO transformation n=%d m=%d k=%d with antisymmetry in double precision would take %lf sec\n", n,m,k,((t_end-t_st)*n)/k);
202  }
203  }
204 }
205 
206 #ifndef TEST_SUITE
207 char* getCmdOption(char ** begin,
208  char ** end,
209  const std::string & option){
210  char ** itr = std::find(begin, end, option);
211  if (itr != end && ++itr != end){
212  return *itr;
213  }
214  return 0;
215 }
216 
217 int main(int argc, char ** argv){
218  int m, n, k, bench;
219  int const in_num = argc;
220  char ** input_str = argv;
221 
222  MPI_Init(&argc, &argv);
223  if (getCmdOption(input_str, input_str+in_num, "-n")){
224  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
225  if (n < 0) n = 6;
226  } else n = 6;
227 
228  if (getCmdOption(input_str, input_str+in_num, "-m")){
229  m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
230  if (m < 0) m = 9;
231  } else m = 9;
232 
233  if (getCmdOption(input_str, input_str+in_num, "-k")){
234  k = atoi(getCmdOption(input_str, input_str+in_num, "-k"));
235  if (k < 0) k = 9;
236  } else k = 9;
237 
238  if (getCmdOption(input_str, input_str+in_num, "-bench")){
239  bench = atoi(getCmdOption(input_str, input_str+in_num, "-bench"));
240  if (bench < 0) bench = 0;
241  } else bench = 0;
242 
243 
244  if (bench == 0){
245  test_ao_mo_transf(n, m, k);
246  } else {
247  bench_ao_mo_transf<float>(n, m, k);
248  bench_ao_mo_transf<double>(n, m, k);
249  }
250 
251  MPI_Finalize();
252  return 0;
253 }
254 #endif
255 
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
Tensor< dtype > ao_mo_transf_slice(Tensor< dtype > &U, Matrix< dtype > &C)
AO-MO orbital transformation applied to a slice.
Definition: common.h:37
void bench_ao_mo_transf(int n, int m, int k)
int main(int argc, char **argv)
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
string
Definition: core.pyx:456
int ncol
Definition: matrix.h:20
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
void test_ao_mo_transf(int n, int m, int k, MPI_Comm cm=MPI_COMM_WORLD, bool flt_test=true, bool ns_test=true)
CTF::World * wrld
distributed processor context on which tensor is defined
Tensor< dtype > ao_mo_transf_naive(Tensor< dtype > &U, Matrix< dtype > &C)
naive implementation of AO-MO orbital transformation LIMITATIONS: (1) does not exploit output (syrk-l...
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
int rank
rank of local processor
Definition: world.h:24
int * lens
unpadded tensor edge lengths
char * getCmdOption(char **begin, char **end, const std::string &option)
int nrow
Definition: matrix.h:20
Definition: apsp.cxx:17
an instance of a tensor within a CTF world
Definition: tensor.h:74
void free_self()
destructor
Definition: common.h:37