20   int64_t nrng = ranges.
size();
    21   int64_t range_sum = 0;
    22   int64_t range_pfx[nrng];
    23   for (int64_t i=0; i<nrng; i++){
    24     if (i==0) range_pfx[i] = 0;
    25     else      range_pfx[i] = range_sum;
    26     range_sum += ranges[i];
    32   assert(A.wrld->np == 1);
    42   for (int64_t 
b=0; 
b<nblk; 
b++){
    43     assert(A_blks[
b].order == 2);
    44     int64_t i = blk_inds[
b] / nblk;
    45     int64_t j = blk_inds[
b] % nblk;
    46     offs[0] = range_pfx[j];
    47     offs[1] = range_pfx[i];
    48     ends[0] = range_pfx[j]+ranges[j];
    49     ends[1] = range_pfx[i]+ranges[i];
    50     flatA.
slice(offs,ends,0.0,A_blks[
b],zeros,A_blks[b].lens,1.0);
    76                     for (
int i=0; i<order_C; i++){
    80                         lens_C[i] = A.
lens[i];
    85                         assert(lens_C[i] == B.
lens[i]);
    87                           assert(sym_C[i] == B.
sym[i]);
    91                         lens_C[i] = B.
lens[i];
    97                     Tensor<> C(order_C, sp_C, lens_C, sym_C);
    98                     C[idx_C] += A[idx_A]+B[idx_B];
   103   int nblk = ranges.size();
   105   World self_world(MPI_COMM_SELF);
   116   for (int64_t i=0; i<nblk; i++){
   117     int64_t j = rand()%nblk;
   118     A_blks[i].
k = j + nblk*i;
   119     A_blks[i].
d = 
Matrix<>(ranges[j],ranges[i],dw);
   120     A_blks[i].
d.fill_random(0,1);
   122   A.
write(nblk,A_blks);
   123   int64_t * B_blk_inds = 
new int64_t[nblk];
   125   for (int64_t i=0; i<nblk; i++){
   126     int64_t j = rand()%nblk;
   127     B_blk_inds[i] = i + j*nblk;
   128     B_blks[i] = 
Matrix<>(ranges[i],ranges[j],dw);
   131   B.
write(nblk,B_blk_inds,B_blks);
   133   delete [] B_blk_inds;
   138                 assert(mA.order == 2 && mB.order == 2);
   139                 Matrix<> mC(mA.lens[0], mB.lens[1]);
   140                 mC[
"ij"] += mA[
"ik"]*mB[
"kj"];
   151   refC[
"ij"] = refA[
"ik"]*refB[
"kj"];
   158   refC[
"ij"] -= cmpC[
"ij"];
   159   double err_nrm = refC.
norm2();
   161   bool pass = err_nrm <= 1.e-4;
   171   char ** itr = std::find(begin, end, option);
   172   if (itr != end && ++itr != end){
   179 int main(
int argc, 
char ** argv){
   181   int const in_num = argc;
   182   char ** input_str = argv;
   184   MPI_Init(&argc, &argv);
   185   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   186   MPI_Comm_size(MPI_COMM_WORLD, &np);
   189     n = atoi(
getCmdOption(input_str, input_str+in_num, 
"-n"));
   194     r = atof(
getCmdOption(input_str, input_str+in_num, 
"-r"));
   199     World dw(argc, argv);
   202       printf(
"Computing block-sparse with %d block ranges, all of size %d... ",r,n);
   204     std::vector<int> ranges;
   205     for (
int i=0; i<r; i++){
   211         printf(
"successful, answer correct.\n");
   213         printf(
"failed, answer wrong.\n");
 Set class defined by a datatype and a min/max function (if it is partially ordered i...
int * sym
symmetries among tensor dimensions 
dtype d
tensor value associated with index 
Matrix class which encapsulates a 2D tensor. 
Tensor< dtype > slice(int const *offsets, int const *ends) const 
cuts out a slice (block) of this tensor A[offsets,ends) result will always be fully nonsymmetric ...
char * getCmdOption(char **begin, char **end, const std::string &option)
Matrix flatten_block_sparse_matrix(Matrix< Tensor<> > A, std::vector< int > ranges, World &dw)
int64_t size
current size of local tensor data chunk (mapping-dependent) 
an instance of the CTF library (world) on a MPI communicator 
bool is_sparse
whether only the non-zero elements of the tensor are stored 
int order
number of tensor dimensions 
int64_t k
key, global index [i1,i2,...] specified as i1+len[0]*i2+... 
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!) 
index-value pair used for tensor data input 
Scalar class which encapsulates a 0D tensor. 
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor 
int * lens
unpadded tensor edge lengths 
void print(char const *a, FILE *fp=stdout) const 
prints the value 
int main(int argc, char **argv)
int block_sparse(std::vector< int > ranges, World &dw)
perform block sparse matrix-matrix product 
def zeros(shape, dtype=np.float64, order='F')
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const 
Gives the global indices and values associated with the local data. 
A Monoid is a Set equipped with a binary addition operator '+' or a custom function addition must hav...
an instance of a tensor within a CTF world 
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...