20 template <
typename dtype>
25 int sym_aabb[] = {
AS,
NS,
AS, NS};
26 int sym_acbb[] = {
NS,
NS,
AS, NS};
27 int sym_aabc[] = {
AS,
NS,
NS, NS};
29 int len_U1[] = {m, n, n, n};
31 U1[
"ajkl"] += U[
"ijkl"]*C[
"ia"];
33 int len_U2[] = {m, m, n, n};
36 U2_NS[
"abkl"] += U1[
"ajkl"]*C[
"jb"];
44 int len_U3[] = {m, m, m, n};
47 U3[
"abcl"] += U2[
"abkl"]*C[
"kc"];
51 int len_V[] = {m, m, m, m};
54 V_NS[
"abcd"] += U3[
"abcl"]*C[
"ld"];
69 template <
typename dtype>
75 int sym_aabb[] = {
AS,
NS,
NS, NS};
76 int sym_acbb[] = {
NS,
NS,
NS, NS};
77 int sym_aabc[] = {
AS,
NS,
NS, NS};
79 int len_U1[] = {m, n, n, k};
81 U1[
"ajkl"] += U[
"ijkl"]*C[
"ia"];
83 int len_U2[] = {m, m, n, k};
86 U2_NS[
"abkl"] += U1[
"ajkl"]*C[
"jb"];
94 int len_U3[] = {m, m, m, k};
97 U3[
"abcl"] += U2[
"abkl"]*C[
"kc"];
99 U3[
"abcl"] += U2[
"abkl"]*C[
"kc"];
122 void test_ao_mo_transf(
int n,
int m,
int k, MPI_Comm cm=MPI_COMM_WORLD,
bool flt_test =
true,
bool ns_test =
true){
124 int lens_U[] = {n, n, n, n};
125 int sym_aabb[] = {
AS,
NS,
AS, NS};
127 Tensor<> U(4, lens_U, sym_aabb, dw);
133 double t_st = MPI_Wtime();
135 double t_end = MPI_Wtime();
138 printf(
"AO to MO transformation with antisymmetry in double precision took %lf sec\n", t_end-t_st);
149 printf(
"AO to MO transformation with antisymmetry in single precision took %lf sec\n", t_end-t_st);
152 double frob_norm = V2.
norm2();
154 printf(
"Frobenius norm of error in float vs double is %E\n", frob_norm);
158 int lens_V[] = {m, m, m, m};
161 U_NS[
"ijkl"] = U[
"ijkl"];
163 V_NS[
"abcd"] = C[
"ia"]*C[
"jb"]*C[
"kc"]*C[
"ld"]*U_NS[
"ijkl"];
166 printf(
"AO to MO transformation without symmetry and with dynamic intermediates in double precision took %lf sec\n", t_end-t_st);
167 V_NS[
"abcd"] -= V[
"abcd"];
168 double frob_norm = V_NS.
norm2();
170 printf(
"Frobenius norm of error in nonsymmetric vs antisymmetric is %E\n", frob_norm);
174 template <
typename dtype>
176 World dw(MPI_COMM_WORLD);
177 int lens_U[] = {n, n, n, k};
178 int sym_aabb[] = {
AS,
NS,
AS, NS};
179 if (n!=k) sym_aabb[2] =
NS;
187 double t_st = MPI_Wtime();
192 double t_end = MPI_Wtime();
194 if (
sizeof(
dtype) == 4){
196 printf(
"AO to MO transformation n=%d m=%d k=%d with antisymmetry in single precision took %lf sec\n", n,m,k,t_end-t_st);
198 assert(
sizeof(
dtype) == 8);
200 printf(
"AO to MO transformation n=%d m=%d k=%d with antisymmetry in double precision took %lf sec\n", n,m,k,t_end-t_st);
201 printf(
"Overall AO to MO transformation n=%d m=%d k=%d with antisymmetry in double precision would take %lf sec\n", n,m,k,((t_end-t_st)*n)/k);
210 char ** itr = std::find(begin, end, option);
211 if (itr != end && ++itr != end){
217 int main(
int argc,
char ** argv){
219 int const in_num = argc;
220 char ** input_str = argv;
222 MPI_Init(&argc, &argv);
224 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
229 m = atoi(
getCmdOption(input_str, input_str+in_num,
"-m"));
234 k = atoi(
getCmdOption(input_str, input_str+in_num,
"-k"));
238 if (
getCmdOption(input_str, input_str+in_num,
"-bench")){
239 bench = atoi(
getCmdOption(input_str, input_str+in_num,
"-bench"));
240 if (bench < 0) bench = 0;
247 bench_ao_mo_transf<float>(n, m, k);
248 bench_ao_mo_transf<double>(n, m, k);
Matrix class which encapsulates a 2D tensor.
Tensor< dtype > ao_mo_transf_slice(Tensor< dtype > &U, Matrix< dtype > &C)
AO-MO orbital transformation applied to a slice.
void bench_ao_mo_transf(int n, int m, int k)
int main(int argc, char **argv)
an instance of the CTF library (world) on a MPI communicator
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
void test_ao_mo_transf(int n, int m, int k, MPI_Comm cm=MPI_COMM_WORLD, bool flt_test=true, bool ns_test=true)
CTF::World * wrld
distributed processor context on which tensor is defined
Tensor< dtype > ao_mo_transf_naive(Tensor< dtype > &U, Matrix< dtype > &C)
naive implementation of AO-MO orbital transformation LIMITATIONS: (1) does not exploit output (syrk-l...
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor
int * lens
unpadded tensor edge lengths
char * getCmdOption(char **begin, char **end, const std::string &option)
an instance of a tensor within a CTF world
void free_self()
destructor