20 template <
typename dtype>
    25   int sym_aabb[] = {
AS, 
NS, 
AS, NS};
    26   int sym_acbb[] = {
NS, 
NS, 
AS, NS};
    27   int sym_aabc[] = {
AS, 
NS, 
NS, NS};
    29   int len_U1[] = {m, n, n, n};
    31   U1[
"ajkl"] += U[
"ijkl"]*C[
"ia"];
    33   int len_U2[] = {m, m, n, n};
    36   U2_NS[
"abkl"] += U1[
"ajkl"]*C[
"jb"];
    44   int len_U3[] = {m, m, m, n};
    47   U3[
"abcl"] += U2[
"abkl"]*C[
"kc"];
    51   int len_V[] = {m, m, m, m};
    54   V_NS[
"abcd"] += U3[
"abcl"]*C[
"ld"];
    69 template <
typename dtype>
    75   int sym_aabb[] = {
AS, 
NS, 
NS, NS};
    76   int sym_acbb[] = {
NS, 
NS, 
NS, NS};
    77   int sym_aabc[] = {
AS, 
NS, 
NS, NS};
    79   int len_U1[] = {m, n, n, k};
    81   U1[
"ajkl"] += U[
"ijkl"]*C[
"ia"];
    83   int len_U2[] = {m, m, n, k};
    86   U2_NS[
"abkl"] += U1[
"ajkl"]*C[
"jb"];
    94   int len_U3[] = {m, m, m, k};
    97   U3[
"abcl"] += U2[
"abkl"]*C[
"kc"];
    99   U3[
"abcl"] += U2[
"abkl"]*C[
"kc"];
   122 void test_ao_mo_transf(
int n, 
int m, 
int k, MPI_Comm cm=MPI_COMM_WORLD, 
bool flt_test = 
true, 
bool ns_test = 
true){
   124   int lens_U[] = {n, n, n, n};
   125   int sym_aabb[] = {
AS, 
NS, 
AS, NS};
   127   Tensor<> U(4, lens_U, sym_aabb, dw);
   133   double t_st = MPI_Wtime();
   135   double t_end = MPI_Wtime();
   138     printf(
"AO to MO transformation with antisymmetry in double precision took %lf sec\n", t_end-t_st);
   149       printf(
"AO to MO transformation with antisymmetry in single precision took %lf sec\n", t_end-t_st);
   152     double frob_norm = V2.
norm2();
   154       printf(
"Frobenius norm of error in float vs double is %E\n", frob_norm);
   158     int lens_V[] = {m, m, m, m};
   161     U_NS[
"ijkl"] = U[
"ijkl"];
   163     V_NS[
"abcd"] = C[
"ia"]*C[
"jb"]*C[
"kc"]*C[
"ld"]*U_NS[
"ijkl"];
   166       printf(
"AO to MO transformation without symmetry and with dynamic intermediates in double precision took %lf sec\n", t_end-t_st);
   167     V_NS[
"abcd"] -= V[
"abcd"];
   168     double frob_norm = V_NS.
norm2();
   170       printf(
"Frobenius norm of error in nonsymmetric vs antisymmetric is %E\n", frob_norm);
   174 template <
typename dtype>
   176   World dw(MPI_COMM_WORLD);
   177   int lens_U[] = {n, n, n, k};
   178   int sym_aabb[] = {
AS, 
NS, 
AS, NS};
   179   if (n!=k) sym_aabb[2] = 
NS;
   187   double t_st = MPI_Wtime();
   192   double t_end = MPI_Wtime();
   194   if (
sizeof(
dtype) == 4){
   196       printf(
"AO to MO transformation n=%d m=%d k=%d with antisymmetry in single precision took %lf sec\n", n,m,k,t_end-t_st);
   198     assert(
sizeof(
dtype) == 8);
   200       printf(
"AO to MO transformation n=%d m=%d k=%d with antisymmetry in double precision took %lf sec\n", n,m,k,t_end-t_st);
   201       printf(
"Overall AO to MO transformation n=%d m=%d k=%d with antisymmetry in double precision would take %lf sec\n", n,m,k,((t_end-t_st)*n)/k);
   210   char ** itr = std::find(begin, end, option);
   211   if (itr != end && ++itr != end){
   217 int main(
int argc, 
char ** argv){
   219   int const in_num = argc;
   220   char ** input_str = argv;
   222   MPI_Init(&argc, &argv);
   224     n = atoi(
getCmdOption(input_str, input_str+in_num, 
"-n"));
   229     m = atoi(
getCmdOption(input_str, input_str+in_num, 
"-m"));
   234     k = atoi(
getCmdOption(input_str, input_str+in_num, 
"-k"));
   238   if (
getCmdOption(input_str, input_str+in_num, 
"-bench")){
   239     bench = atoi(
getCmdOption(input_str, input_str+in_num, 
"-bench"));
   240     if (bench < 0) bench = 0;
   247     bench_ao_mo_transf<float>(n, m, k);
   248     bench_ao_mo_transf<double>(n, m, k);
 
Matrix class which encapsulates a 2D tensor. 
Tensor< dtype > ao_mo_transf_slice(Tensor< dtype > &U, Matrix< dtype > &C)
AO-MO orbital transformation applied to a slice. 
void bench_ao_mo_transf(int n, int m, int k)
int main(int argc, char **argv)
an instance of the CTF library (world) on a MPI communicator 
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!) 
void test_ao_mo_transf(int n, int m, int k, MPI_Comm cm=MPI_COMM_WORLD, bool flt_test=true, bool ns_test=true)
CTF::World * wrld
distributed processor context on which tensor is defined 
Tensor< dtype > ao_mo_transf_naive(Tensor< dtype > &U, Matrix< dtype > &C)
naive implementation of AO-MO orbital transformation LIMITATIONS: (1) does not exploit output (syrk-l...
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor 
int * lens
unpadded tensor edge lengths 
char * getCmdOption(char **begin, char **end, const std::string &option)
an instance of a tensor within a CTF world 
void free_self()
destructor