Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
untyped_tensor.h
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #ifndef __UNTYPED_TENSOR_H__
4 #define __UNTYPED_TENSOR_H__
5 
6 #include "../mapping/mapping.h"
7 #include "../mapping/distribution.h"
8 #include "../interface/world.h"
9 #include "../interface/partition.h"
10 #include "algstrct.h"
11 #include <functional>
12 
13 namespace CTF {
14  class Idx_Tensor;
15 }
16 
17 namespace CTF_int {
18 
20  class tensor {
21  protected:
34  void init(algstrct const * sr,
35  int order,
36  int const * edge_len,
37  int const * sym,
38  CTF::World * wrld,
39  bool alloc_data,
40  char const * name,
41  bool profile,
42  bool is_sparse);
49  PairIterator read_all_pairs(int64_t * num_pair, bool unpack);
50 
55  void copy_tensor_data(tensor const * other);
56 
63  void set_distribution(char const * idx,
64  CTF::Idx_Partition const & prl,
65  CTF::Idx_Partition const & blk);
66 
67  public:
68 
74  int * sym;
76  int order;
78  int * lens;
80  int * pad_edge_len;
82  int * padding;
84  char * name;
88  int * scp_padding;
90  int * sym_table;
92  bool is_mapped;
98  int64_t size;
102  bool is_folded;
108  bool is_cyclic;
117  char * data;
119  bool has_home;
121  char * home_buffer;
123  int64_t home_size;
125  bool is_home;
129  bool profile;
131  bool is_sparse;
133  bool is_csr;
135  int nrow_idx;
137  int64_t nnz_loc;
139  int64_t nnz_tot;
141  int64_t * nnz_blk;
142 
147  CTF::Idx_Tensor operator[](char const * idx_map);
148 
149  tensor();
150 
152  ~tensor();
153 
155  void free_self();
156 
169  tensor(algstrct const * sr,
170  int order,
171  int const * edge_len,
172  int const * sym,
173  CTF::World * wrld,
174  bool alloc_data=true,
175  char const * name=NULL,
176  bool profile=1,
177  bool is_sparse=0);
178 
193  tensor(algstrct const * sr,
194  int order,
195  bool is_sparse,
196  int const * edge_len,
197  int const * sym,
198  CTF::World * wrld,
199  char const * idx,
200  CTF::Idx_Partition const & prl,
201  CTF::Idx_Partition const & blk,
202  char const * name=NULL,
203  bool profile=1);
204 
211  tensor(tensor const * other, bool copy = 1, bool alloc_data = 1);
212 
220  tensor(tensor * other, int const * new_sym);
221 
226  int * calc_phase() const;
227 
232  int calc_tot_phase() const;
233 
238  int64_t calc_nvirt() const;
239 
244  int64_t calc_npe() const;
245 
246 
250  void set_padding();
251 
255  int set_zero();
256 
260  int set(char const * val);
261 
265  int zero_out_padding();
266 
271  void scale_diagonals(int const * sym_mask);
272 
273  // apply an additive inverse to all elements of the tensor
274  void addinv();
275 
281  void print_map(FILE * stream=stdout, bool allcall=1) const;
282 
287  void set_name(char const * name);
288 
293  char const * get_name() const;
294 
296  void profile_on();
297 
299  void profile_off();
300 
301 
307  void get_raw_data(char ** data, int64_t * size) const;
308 
319  int write(int64_t num_pair,
320  char const * alpha,
321  char const * beta,
322  char * mapped_data,
323  char const rw='w');
324 
335  void write(int64_t num_pair,
336  char const * alpha,
337  char const * beta,
338  int64_t const * inds,
339  char const * data);
340 
351  void read(int64_t num_pair,
352  char const * alpha,
353  char const * beta,
354  int64_t const * inds,
355  char * data);
356 
366  int read(int64_t num_pair,
367  char const * alpha,
368  char const * beta,
369  char * mapped_data);
370 
371 
380  char * read(char const * idx,
381  CTF::Idx_Partition const & prl,
382  CTF::Idx_Partition const & blk,
383  bool unpack);
384 
391  int read(int64_t num_pair,
392  char * mapped_data);
393 
399  int64_t get_tot_size(bool packed);
400 
408  int allread(int64_t * num_pair,
409  char ** all_data,
410  bool unpack);
411 
419  int allread(int64_t * num_pair,
420  char * all_data,
421  bool unpack=true);
422 
434  void slice(int const * offsets_B,
435  int const * ends_B,
436  char const * beta,
437  tensor * A,
438  int const * offsets_A,
439  int const * ends_A,
440  char const * alpha);
441 
442  /* Same as above, except tid_B lives on dt_other_B */
443 /* int slice_tensor(int tid_A,
444  int const * offsets_A,
445  int const * ends_A,
446  char const * alpha,
447  int tid_B,
448  int const * offsets_B,
449  int const * ends_B,
450  char const * beta,
451  world * dt_other_B);
452 */
463  int permute(tensor * A,
464  int * const * permutation_A,
465  char const * alpha,
466  int * const * permutation_B,
467  char const * beta);
468 
476  int sparsify(char const * threshold=NULL,
477  bool take_abs=true);
478 
483  int sparsify(std::function<bool(char const*)> f);
484 
492  int read_local(int64_t * num_pair,
493  char ** mapped_data,
494  bool unpack_sym=false) const;
495 
501  int read_local_nnz(int64_t * num_pair,
502  char ** mapped_data,
503  bool unpack_sym=false) const;
504 
513  void read_local(int64_t * num_pair,
514  int64_t ** inds,
515  char ** data,
516  bool unpack_sym=false) const;
517 
524  void read_local_nnz(int64_t * num_pair,
525  int64_t ** inds,
526  char ** data,
527  bool unpack_sym=false) const;
528 
529 
534  //int copy(tensor * A);
535 
540  int align(tensor const * B);
541 
542  /* product will contain the dot prodiuct if tsr_A and tsr_B */
543  //int dot_tensor(int tid_A, int tid_B, char *product);
544 
549  int reduce_sum(char * result);
550 
556  int reduce_sum(char * result, algstrct const * sr_other);
557 
562  int reduce_sumabs(char * result);
563 
569  int reduce_sumabs(char * result, algstrct const * sr_other) ;
570 
571 
576  int reduce_sumsq(char * result);
577 
578  /* map data of tid_A with the given function */
579 /* int map_tensor(int tid,
580  dtype (*map_func)(int order,
581  int const * indices,
582  dtype elem));*/
588  int get_max_abs(int n, char * data) const;
589 
595  void print(FILE * fp = stdout, char const * cutoff = NULL) const;
596  void prnt() const;
597 
604  void compare(const tensor * A, FILE * fp, char const * cutoff);
605 
615  void orient_subworld(CTF::World * greater_world,
616  int & bw_mirror_rank,
617  int & fw_mirror_rank,
618  distribution *& odst,
619  char ** sub_buffer_);
620 
627  void add_to_subworld(tensor * tsr_sub,
628  char const * alpha,
629  char const * beta);
630 
637  void add_from_subworld(tensor * tsr_sub,
638  char const * alpha,
639  char const * beta);
640 
646  void unfold(bool was_mod=0);
647 
652  void remove_fold();
653 
654 
659  double est_time_unfold();
660 
661 
662 
674  void fold(int nfold,
675  int const * fold_idx,
676  int const * idx_map,
677  int * all_fdim,
678  int ** all_flen);
679 
684  void pull_alias(tensor const * other);
685 
686 
688  void clear_mapping();
689 
690 
699  int redistribute(distribution const & old_dist,
700  int const * old_offsets = NULL,
701  int * const * old_permutation = NULL,
702  int const * new_offsets = NULL,
703  int * const * new_permutation = NULL);
704 
705  double est_redist_time(distribution const & old_dist, double nnz_frac);
706 
707  int64_t get_redist_mem(distribution const & old_dist, double nnz_frac);
708 
715  int map_tensor_rem(int num_phys_dims,
716  CommData * phys_comm,
717  int fill=0);
718 
727  int extract_diag(int const * idx_map,
728  int rw,
729  tensor *& new_tsr,
730  int ** idx_map_new);
731 
735  void set_sym(int const * sym);
736 
741  void set_new_nnz_glb(int64_t const * nnz_blk);
742 
750  void spmatricize(int m, int n, int nrow_idx, bool csr);
751 
757  void despmatricize(int nrow_idx, bool csr);
758 
762  void leave_home_with_buffer();
763 
767  void register_size(int64_t size);
768 
772  void deregister_size();
773 
779  void write_dense_to_file(MPI_File & file, int64_t offset=0);
780 
785  void write_dense_to_file(char const * filename);
786 
792  void read_dense_from_file(MPI_File & file, int64_t offset=0);
793 
794 
799  void read_dense_from_file(char const * filename);
800 
805  template <typename dtype_A, typename dtype_B>
806  void conv_type(tensor * B);
807 
811  template <typename dtype_A, typename dtype_B>
812  void exp_helper(tensor * A);
813 
819  template <typename dtype>
820  void compare_elementwise(tensor * A, tensor * B);
821 
827  template <typename dtype>
828  void not_equals(tensor * A, tensor * B);
829 
835  template <typename dtype>
836  void smaller_than(tensor * A, tensor * B);
837 
843  template <typename dtype>
844  void smaller_equal_than(tensor * A, tensor * B);
845 
851  template <typename dtype>
852  void larger_than(tensor * A, tensor * B);
853 
859  template <typename dtype>
860  void larger_equal_than(tensor * A, tensor * B);
861 
862  template <typename dtype>
863  void true_divide(tensor * A);
864 
878  tensor * self_reduce(int const * idx_A,
879  int ** new_idx_A,
880  int order_B,
881  int const * idx_B,
882  int ** new_idx_B,
883  int order_C=0,
884  int const * idx_C=NULL,
885  int ** new_idx_C=NULL);
886  };
887 }
888 #endif// __UNTYPED_TENSOR_H__
void permute(int order, int const *perm, int *arr)
permute an array
Definition: util.cxx:205
char * home_buffer
buffer associated with home mapping of tensor, to which it is returned
bool is_home
whether the latest tensor data is in the home buffer
int64_t * nnz_blk
nonzero elements in each block owned locally
int * sym
symmetries among tensor dimensions
int * pad_edge_len
padded tensor edge lengths
int * inner_ordering
ordering of the dimensions according to which the tensori s folded
def conv_type(self, dtype)
Definition: core.pyx:363
bool has_home
whether the tensor has a home mapping/buffer
int64_t size
current size of local tensor data chunk (mapping-dependent)
bool is_csr
whether CSR or COO if folded
bool is_folded
whether the data is folded/transposed into a (lower-order) tensor
int64_t home_size
size of home buffer
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
CTF::World * wrld
distributed processor context on which tensor is defined
int is_scp_padded
whether tensor data has additional padding
bool is_cyclic
whether the tensor data is cyclically distributed (blocked if false)
int * padding
padding along each edge length (less than distribution phase)
int * lens
unpadded tensor edge lengths
int nrow_idx
how many modes are folded into matricized row
bool is_data_aliased
whether the tensor data is an alias of another tensor object&#39;s data
int64_t nnz_tot
maximum number of local nonzero elements over all procs
algstrct * sr
algstrct on which tensor elements and operations are defined
mapping * edge_map
mappings of each tensor dimension onto topology dimensions
tensor * rec_tsr
representation of folded tensor (shares data pointer)
bool left_home_transp
whether the tensor left home to transpose
bool is_mapped
whether a mapping has been selected
int64_t registered_alloc_size
size CTF keeps track of for memory usage
int * sym_table
order-by-order table of dimensional symmetry relations
def copy(tensor, A)
Definition: core.pyx:3583
int * scp_padding
additional padding, may be greater than ScaLAPACK phase
int64_t nnz_loc
number of local nonzero elements
bool profile
whether profiling should be done for contractions/sums involving this tensor
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
Definition: apsp.cxx:17
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
internal distributed tensor class
topology * topo
topology to which the tensor is mapped
bool has_zero_edge_len
if true tensor has a zero edge length, so is zero, which short-cuts stuff
a tensor with an index map associated with it (necessary for overloaded operators) ...
Definition: idx_tensor.h:15
tensor * slay
tensor object associated with tensor object whose data pointer needs to be preserved, needed for ScaLAPACK wrapper FIXME: home buffer should presumably take care of this...
char * name
name given to tensor