ctf/block__sparse_8cxx_source.html

 #include <ctf.hpp>
 #include <float.h>
 using namespace CTF;

 namespace CTF {
   template <>
   inline void Set< Tensor<> ,false>::print(char const * a, FILE * fp) const {
     ((Tensor<>*)a)->print(fp);
   }
 }

 Matrix<> flatten_block_sparse_matrix(Matrix< Tensor<> > A, std::vector<int> ranges, World & dw){
   int64_t nrng = ranges.size();
   int64_t range_sum = 0;
   int64_t range_pfx[nrng];
   for (int64_t i=0; i<nrng; i++){
     if (i==0) range_pfx[i] = 0;
     else      range_pfx[i] = range_sum;
     range_sum += ranges[i];
   }

   Matrix<> flatA(range_sum, range_sum, SP, dw);

   //below only works when the blocks are distributed over all processors
   assert(A.wrld->np == 1);
   int64_t nblk;
   int64_t * blk_inds;
   Tensor<> * A_blks;

   A.get_local_data(&nblk, &blk_inds, &A_blks, true);

   int zeros[] = {0,0};
   int offs[2];
   int ends[2];
   for (int64_t b=0; b<nblk; b++){
     assert(A_blks[b].order == 2);
     int64_t i = blk_inds[b] / nblk;
     int64_t j = blk_inds[b] % nblk;
     offs[0] = range_pfx[j];
     offs[1] = range_pfx[i];
     ends[0] = range_pfx[j]+ranges[j];
     ends[1] = range_pfx[i]+ranges[i];
     flatA.slice(offs,ends,0.0,A_blks[b],zeros,A_blks[b].lens,1.0);
   }
   free(blk_inds);
   delete [] A_blks;
   return flatA;

 }

 int block_sparse(std::vector<int> ranges, World & dw){

   //Define Monoid over tensors that understands how to add
   Monoid< Tensor<>,false >
              tmon(Scalar<>(0.0,dw),
                   [](Tensor<> A, Tensor<> B){
                     int order_C = std::max(A.order,B.order);
                     int lens_C[order_C];
                     int sym_C[order_C];
                     char idx_A[order_C];
                     char idx_B[order_C];
                     char idx_C[order_C];
                     for (int i=0; i<order_C; i++){
                       sym_C[i] = NS;
                       lens_C[i] = -1;
                       if (A.order > i){
                         lens_C[i] = A.lens[i];
                         sym_C[i] = A.sym[i];
                         idx_A[i] = 'i'+i;
                       }
                       if (B.order > i){
                         assert(lens_C[i] == B.lens[i]);
                         if (B.sym[i] != NS){
                           assert(sym_C[i] == B.sym[i]);
                         } else {
                           sym_C[i] = NS;
                         }
                         lens_C[i] = B.lens[i];
                         idx_B[i] = 'i'+i;
                       }
                       idx_C[i] = 'i'+i;
                     }
                     int sp_C = (A.is_sparse & (A.order>=B.order)) || (B.is_sparse & (B.order>=A.order));
                     Tensor<> C(order_C, sp_C, lens_C, sym_C);
                     C[idx_C] += A[idx_A]+B[idx_B];
                     return C;
                   },
                   MPI_SUM); //MPI op not really valid, but should never be used if we only use Monoid on world with a single processor

   int nblk = ranges.size();

   World self_world(MPI_COMM_SELF);
   Matrix< Tensor<> > A(nblk, nblk, SP, self_world, tmon);
   Matrix< Tensor<> > B(nblk, nblk, SP, self_world, tmon);

   //set same integer random seed on all processors, to generate same set of blocks
   srand(1000);
   //set different double random seed on all processors, to generate different elements in blocks
   srand48(dw.rank);

 //  int64_t A_blk_inds[nblk];
   CTF::Pair< Tensor<> > * A_blks = new CTF::Pair< Tensor<> >[nblk];
   for (int64_t i=0; i<nblk; i++){
     int64_t j = rand()%nblk;
     A_blks[i].k = j + nblk*i;
     A_blks[i].d = Matrix<>(ranges[j],ranges[i],dw);
     A_blks[i].d.fill_random(0,1);
   }
   A.write(nblk,A_blks);
   int64_t * B_blk_inds = new int64_t[nblk];
   Tensor<> * B_blks = new Tensor<>[nblk];
   for (int64_t i=0; i<nblk; i++){
     int64_t j = rand()%nblk;
     B_blk_inds[i] = i + j*nblk;
     B_blks[i] = Matrix<>(ranges[i],ranges[j],dw);
     B_blks[i].fill_random(1.,1.);
   }
   B.write(nblk,B_blk_inds,B_blks);
   delete [] B_blks;
   delete [] B_blk_inds;
   Matrix< Tensor<> > C(nblk, nblk, SP, self_world, tmon);

   C["ij"] = Function< Tensor<> >(
               [](Tensor<> mA, Tensor<> mB){
                 assert(mA.order == 2 && mB.order == 2);
                 Matrix<> mC(mA.lens[0], mB.lens[1]);
                 mC["ij"] += mA["ik"]*mB["kj"];
                 return mC;
               }
             )(A["ik"],B["kj"]);

   Matrix<> refA = flatten_block_sparse_matrix(A,ranges,dw);
   Matrix<> refB = flatten_block_sparse_matrix(B,ranges,dw);
   Matrix<> cmpC = flatten_block_sparse_matrix(C,ranges,dw);

   Matrix<> refC(cmpC);

   refC["ij"] = refA["ik"]*refB["kj"];

   /*refA.print_matrix();
   refB.print_matrix();
   cmpC.print_matrix();
   refC.print_matrix();*/

   refC["ij"] -= cmpC["ij"];
   double err_nrm = refC.norm2();

   bool pass = err_nrm <= 1.e-4;

   return pass;
 }


 #ifndef TEST_SUITE
 char* getCmdOption(char ** begin,
                    char ** end,
                    const   std::string & option){
   char ** itr = std::find(begin, end, option);
   if (itr != end && ++itr != end){
     return *itr;
   }
   return 0;
 }


 int main(int argc, char ** argv){
   int rank, np, n, pass, r;
   int const in_num = argc;
   char ** input_str = argv;

   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &np);

   if (getCmdOption(input_str, input_str+in_num, "-n")){
     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
     if (n < 0) n = 7;
   } else n = 7;

   if (getCmdOption(input_str, input_str+in_num, "-r")){
     r = atof(getCmdOption(input_str, input_str+in_num, "-r"));
     if (r < 0) r = 10;
   } else r = 10;

   {
     World dw(argc, argv);

     if (rank == 0){
       printf("Computing block-sparse with %d block ranges, all of size %d... ",r,n);
     }
     std::vector<int> ranges;
     for (int i=0; i<r; i++){
       ranges.push_back(n);
     }
     pass = block_sparse(ranges, dw);
     if (rank == 0){
       if (pass){
         printf("successful, answer correct.\n");
       } else {
         printf("failed, answer wrong.\n");
       }
     }
     assert(pass);
   }

   MPI_Finalize();
   return 0;
 }
 #endif
CTF::Set
Set class defined by a datatype and a min/max function (if it is partially ordered i...
Definition: set.h:280

CTF::Function
Definition: functions.h:441

ctf.hpp

CTF_int::tensor::sym
int * sym
symmetries among tensor dimensions
Definition: untyped_tensor.h:74

CTF::Pair::d
dtype d
tensor value associated with index
Definition: tensor.h:34

CTF::Matrix
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18

ctf.core.rank
def rank(self)
Definition: core.pyx:312

CTF::Tensor::slice
Tensor< dtype > slice(int const *offsets, int const *ends) const
cuts out a slice (block) of this tensor A[offsets,ends) result will always be fully nonsymmetric ...
Definition: tensor.cxx:643

SP
Definition: common.h:37

getCmdOption
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: block_sparse.cxx:168

ctf.core.b
b
Definition: core.pyx:386

flatten_block_sparse_matrix
Matrix flatten_block_sparse_matrix(Matrix< Tensor<> > A, std::vector< int > ranges, World &dw)
Definition: block_sparse.cxx:19

CTF_int::tensor::size
int64_t size
current size of local tensor data chunk (mapping-dependent)
Definition: untyped_tensor.h:98

NS
Definition: common.h:37

CTF::World
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19

CTF_int::tensor::is_sparse
bool is_sparse
whether only the non-zero elements of the tensor are stored
Definition: untyped_tensor.h:131

CTF_int::tensor::order
int order
number of tensor dimensions
Definition: untyped_tensor.h:76

CTF::Pair::k
int64_t k
key, global index [i1,i2,...] specified as i1+len[0]*i2+...
Definition: tensor.h:31

ctf.core.string
string
Definition: core.pyx:456

CTF::Tensor::norm2
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811

CTF::Pair
index-value pair used for tensor data input
Definition: tensor.h:28

CTF::Scalar
Scalar class which encapsulates a 0D tensor.
Definition: scalar.h:13

ctf.core.a
a
Definition: core.pyx:385

CTF::Tensor::fill_random
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928

CTF::World::rank
int rank
rank of local processor
Definition: world.h:24

CTF_int::tensor::lens
int * lens
unpadded tensor edge lengths
Definition: untyped_tensor.h:78

CTF::Set::print
void print(char const *a, FILE *fp=stdout) const
prints the value
Definition: set.h:386

main
int main(int argc, char **argv)
Definition: block_sparse.cxx:179

block_sparse
int block_sparse(std::vector< int > ranges, World &dw)
perform block sparse matrix-matrix product
Definition: block_sparse.cxx:64

ctf.core.zeros
def zeros(shape, dtype=np.float64, order='F')
Definition: core.pyx:4157

CTF::Tensor::get_local_data
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159

CTF
Definition: apsp.cxx:17

CTF::Monoid
A Monoid is a Set equipped with a binary addition operator &#39;+&#39; or a custom function addition must hav...
Definition: monoid.h:69

CTF::Tensor
an instance of a tensor within a CTF world
Definition: tensor.h:74

CTF::Tensor::write
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264

ctf.core.np
def np(self)
Definition: core.pyx:315