3 #include "../interface/common.h" 6 #include "../tensor/untyped_tensor.h" 11 template<
typename dtype>
15 template<
typename dtype>
23 :
CTF_int::
tensor(&sr, order, len, sym, &world, 1, name, profile) {
27 template<
typename dtype>
35 :
CTF_int::
tensor(&sr, order, len, sym, &world, 1, name, profile) {
40 template<
typename dtype>
49 :
CTF_int::
tensor(&sr, order, len, sym, &world, 1, name, profile, is_sparse) {
53 template<
typename dtype>
61 :
CTF_int::
tensor(&sr, order, len, NULL, &world, 1, name, profile, is_sparse) {
66 template<
typename dtype>
73 :
CTF_int::
tensor(&sr, order, len, NULL, &world, 1, name, profile) {
78 template<
typename dtype>
89 :
CTF_int::
tensor(&sr_, order, 0, len, sym, &world, idx, prl, blk, name, profile) {
93 template<
typename dtype>
105 :
CTF_int::
tensor(&sr_, order, is_sparse_, len, sym, &world, idx, prl, blk, name, profile) {
110 template<
typename dtype>
115 template<
typename dtype>
119 template<
typename dtype>
123 template<
typename dtype>
128 template<
typename dtype>
133 template<
typename dtype>
140 template<
typename dtype>
148 template<
typename dtype>
151 template<
typename dtype>
154 tensor::get_raw_data((
char**)&data, size);
158 template<
typename dtype>
160 int64_t ** global_idx,
163 bool unpack_sym)
const {
170 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n");
IASSERT(0);
return; }
174 for (i=0; i<(*npair); i++){
175 (*global_idx)[
i] = pairs[
i].
k();
181 template<
typename dtype>
183 int64_t ** global_idx,
185 bool unpack_sym)
const {
189 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n");
IASSERT(0);
return; }
193 for (i=0; i<(*npair); i++){
194 (*global_idx)[
i] = pairs[
i].
k();
200 template<
typename dtype>
204 bool unpack_sym)
const {
212 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n");
IASSERT(0);
return; }
215 template<
typename dtype>
218 bool unpack_sym)
const {
222 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_local\n");
IASSERT(0);
return; }
225 template<
typename dtype>
227 int64_t
const * global_idx,
233 for (i=0; i<npair; i++){
234 pairs[
i].
k = global_idx[
i];
235 pairs[
i].
d = data[
i];
239 for (i=0; i<npair; i++){
240 data[
i] = pairs[
i].
d;
245 template<
typename dtype>
250 char * cpairs = (
char*)pairs;
252 IASSERT(cpairs == (
char*)pairs);
263 template<
typename dtype>
265 int64_t
const * global_idx,
270 for (i=0; i<npair; i++){
271 pairs[
i].
k = global_idx[
i];
272 pairs[
i].
d = data[
i];
285 template<
typename dtype>
290 char const * cpairs = (
char const*)pairs;
297 template<
typename dtype>
301 int64_t
const * global_idx,
307 for (i=0; i<npair; i++){
308 pairs[
i].
k = global_idx[
i];
309 pairs[
i].
d = data[
i];
323 template<
typename dtype>
328 char const * cpairs = (
char const*)pairs;
335 template<
typename dtype>
339 int64_t
const * global_idx,
344 for (i=0; i<npair; i++){
345 pairs[
i].
k = global_idx[
i];
346 pairs[
i].
d = data[
i];
350 for (i=0; i<npair; i++){
351 data[
i] = pairs[
i].
d;
356 template<
typename dtype>
361 char * cpairs = (
char*)pairs;
363 IASSERT(cpairs == (
char*)pairs);
376 template<
typename dtype>
380 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function read_all\n");
IASSERT(0);
return; }
383 template<
typename dtype>
391 template<
typename dtype>
396 template<
typename dtype>
401 template<
typename dtype>
406 template<
typename dtype>
411 template<
typename dtype>
416 template<
typename dtype>
423 template<
typename dtype>
428 template<
typename dtype>
431 int *
const * perms_A,
438 template<
typename dtype>
444 perms_B, (
char*)&beta);
448 template<
typename dtype>
451 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function sparsify\n");
IASSERT(0);
return; }
454 template<
typename dtype>
457 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function sparsify\n");
IASSERT(0);
return; }
460 template<
typename dtype>
463 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function sparisfy\n");
IASSERT(0);
return; }
466 template <
typename dtype>
469 int64_t my_nvals = CTF_int::read_data_mpiio<dtype>(T->
wrld, fpath, &datastr);
473 CTF_int::parse_sparse_tensor_data<dtype>(datastr, T->
order, (
dtype*)T->
sr->mulid(), T->
lens, my_nvals, pairs, with_vals);
479 T->
write(my_nvals,pairs);
481 T->
sr->pair_dealloc((
char*)pairs);
486 read_sparse_from_file_base<int>(fpath, with_vals,
this);
491 read_sparse_from_file_base<double>(fpath, with_vals,
this);
496 read_sparse_from_file_base<float>(fpath, with_vals,
this);
501 read_sparse_from_file_base<int64_t>(fpath, with_vals,
this);
505 template <
typename dtype>
512 char * datastr = CTF_int::serialize_sparse_tensor_data<dtype>(T->
order, T->
lens, my_nvals, pairs, with_vals, str_len);
513 CTF_int::write_data_mpiio<dtype>(T->
wrld, fpath, datastr, str_len);
515 T->
sr->pair_dealloc((
char*)pairs);
520 write_sparse_to_file_base<int>(fpath, with_vals,
this);
525 write_sparse_to_file_base<double>(fpath, with_vals,
this);
530 write_sparse_to_file_base<float>(fpath, with_vals,
this);
535 write_sparse_to_file_base<int64_t>(fpath, with_vals,
this);
539 template<
typename dtype>
553 template<
typename dtype>
559 template<
typename dtype>
573 template<
typename dtype>
585 template<
typename dtype>
590 int const * offsets_A,
598 printf(
"CTF ERROR: number of processors should not match in slice if worlds are different\n");
604 offsets, ends, (
char*)&beta, (
Tensor *)&A,
605 offsets_A, ends_A, (
char*)&alpha);
608 offsets, ends, (
char*)&beta, (
Tensor *)&A,
609 offsets_A, ends_A, (
char*)&alpha);
613 template<
typename dtype>
618 int64_t corner_off_A,
619 int64_t corner_end_A,
621 int * offsets, * ends, * offsets_A, * ends_A;
642 template<
typename dtype>
644 int const * ends)
const {
649 template<
typename dtype>
651 int64_t corner_end)
const {
653 return slice(corner_off, corner_end,
wrld);
656 template<
typename dtype>
659 World * owrld)
const {
663 for (i=0; i<
order; i++){
664 if (!(ends[i] - offsets[i] > 0 &&
666 ends[i] <=
lens[i])){
667 printf(
"CTF ERROR: invalid slice dimensions\n");
672 if (offsets[i] == offsets[i+1] && ends[i] == ends[i+1]){
675 if (!(ends[i+1] >= offsets[i])){
676 printf(
"CTF ERROR: slice dimensions don't respect tensor symmetry\n");
682 }
else new_sym[
i] =
NS;
683 new_lens[
i] = ends[
i] - offsets[
i];
688 std::fill(new_sym, new_sym+order, 0);
698 template<
typename dtype>
701 World * owrld)
const {
703 int * offsets, * ends;
719 template<
typename dtype>
722 printf(
"CTF ERROR: cannot align tensors on different CTF instances\n");
730 template<
typename dtype>
767 sr->
min((
char*)&minval);
775 sr->
max((
char*)&maxval);
783 sr->
min((
char*)&minval);
791 sr->
max((
char*)&maxval);
802 template<
typename dtype>
814 for (
int i=0;
i<A.order;
i++){
822 template<
typename dtype>
825 printf(
"CTF ERROR: norm not available for the type of tensor %s\n",
name);
829 #define NORM1_INST(dtype) \ 831 inline void Tensor<dtype>::norm1(double & nrm){ \ 832 real_norm1<dtype>(*this, nrm); \ 843 template<typename
dtype>
844 static
void real_norm2(
Tensor<
dtype> & A,
double & nrm){
846 for (
int i=0;
i<A.order;
i++){
850 nrm = std::sqrt((
double)Function<dtype,double>([](
dtype a){
return (
double)(a*a); })(A[inds]));
853 template<
typename dtype>
854 static void complex_norm2(Tensor<dtype> & A,
double & nrm){
856 for (
int i=0;
i<A.order;
i++){
859 nrm = std::sqrt((
double)Function<dtype,double>([](
dtype a){
return (
double)std::norm(a); })(A[inds]));
863 template<
typename dtype>
866 printf(
"CTF ERROR: norm not available for the type of tensor %s\n",
name);
870 #define NORM2_REAL_INST(dtype) \ 872 inline void Tensor<dtype>::norm2(double & nrm){ \ 873 real_norm2<dtype>(*this, nrm); \ 876 #define NORM2_COMPLEX_INST(dtype) \ 878 inline void Tensor< std::complex<dtype> >::norm2(double & nrm){ \ 879 complex_norm2< std::complex<dtype> >(*this, nrm); \ 893 template<typename
dtype>
896 printf(
"CTF ERROR: norm not available for the type of tensor %s\n",
name);
900 #define NORM_INFTY_INST(dtype) \ 902 inline void Tensor<dtype>::norm_infty(double & nrm){ \ 903 nrm = this->norm_infty(); \ 915 #undef NORM2_REAL_INST 916 #undef NORM2_COMPLEX_INST 917 #undef NORM_INFTY_INST 919 template<
typename dtype>
924 if (ret !=
CTF_int::SUCCESS){ printf(
"CTF ERROR: failed to execute function get_max_abs\n");
IASSERT(0);
return; }
927 template<
typename dtype>
930 printf(
"CTF ERROR: fill_random(rmin, rmax) not available for the type of tensor %s\n",
name);
934 template <
typename dtype>
937 printf(
"CTF ERROR: fill_random should not be called on a sparse tensor, use fill_random_sp instead\n");
941 for (int64_t
i=0;
i<T.
size;
i++){
949 fill_random_base<double>(rmin, rmax, *
this);
954 fill_random_base<float>(rmin, rmax, *
this);
959 fill_random_base<int64_t>(rmin, rmax, *
this);
964 fill_random_base<int>(rmin, rmax, *
this);
968 template<
typename dtype>
971 printf(
"CTF ERROR: fill_sp_random(rmin, rmax, frac_sp) not available for the type of tensor %s\n",
name);
975 template <
typename dtype>
977 int64_t tot_size = 1;
979 double sf = tot_size*frac_sp;
982 for (
int i=2;
i<20;
i++){
986 int64_t gen_size = (int64_t)(dg+.5);
987 int64_t my_gen_size = gen_size/T->
wrld->
np;
992 for (int64_t
i=0;
i<my_gen_size;
i++){
995 T->
write(my_gen_size,pairs);
996 T->
sr->pair_dealloc((
char*)pairs);
1019 fill_sp_random_base<double>(rmin, rmax, frac_sp,
this);
1024 fill_sp_random_base<float>(rmin, rmax, frac_sp,
this);
1029 fill_sp_random_base<int>(rmin, rmax, frac_sp,
this);
1034 fill_sp_random_base<int64_t>(rmin, rmax, frac_sp,
this);
1037 template<
typename dtype>
1044 const char * idx_C){
1046 printf(
"CTF ERROR: worlds of contracted tensors must match\n");
1055 template<
typename dtype>
1065 printf(
"CTF ERROR: worlds of contracted tensors must match\n");
1070 =
CTF_int::contraction(&A, idx_A, &B, idx_B, (
char const *)&alpha,
this, idx_C, (
char const *)&beta, &fseq);
1075 template<
typename dtype>
1080 const char * idx_B){
1082 printf(
"CTF ERROR: worlds of summed tensors must match\n");
1094 template<
typename dtype>
1102 printf(
"CTF ERROR: worlds of summed tensors must match\n");
1112 template<
typename dtype>
1114 const char * idx_A){
1120 template<
typename dtype>
1128 template<
typename dtype>
1138 template<
typename dtype>
1140 set((
char const*)&val);
1149 template<
typename dtype>
1155 const char * idx_C){
1161 template<
typename dtype>
1165 const char * idx_B){
1172 template<
typename dtype>
1198 template<
typename dtype>
void read(int64_t num_pair, char const *alpha, char const *beta, int64_t const *inds, char *data)
read tensor data with <key, value> pairs where key is the global index for the value, which gets filled in with beta times the old values plus alpha times the values read from the tensor.
int reduce_sumabs(char *result)
Performs an elementwise absolute value summation reduction on a tensor.
void profile_off()
turns off profiling for tensor
void contract(dtype alpha, CTF_int::tensor &A, char const *idx_A, CTF_int::tensor &B, char const *idx_B, dtype beta, char const *idx_C)
contracts C[idx_C] = beta*C[idx_C] + alpha*A[idx_A]*B[idx_B]
OP
reduction types for tensor data deprecated types: OP_NORM1=OP_SUMABS, OP_NORM2=call norm2()...
custom scalar function on tensor: e.g. A["ij"] = f(A["ij"])
CTF_int::CommData cdt
communicator data for MPI comm defining this world
int * sym
symmetries among tensor dimensions
void execute()
run contraction
virtual algstrct * clone() const =0
''copy constructor''
void slice(int const *offsets_B, int const *ends_B, char const *beta, tensor *A, int const *offsets_A, int const *ends_A, char const *alpha)
accumulates out a slice (block) of this tensor = B B[offsets,ends)=beta*B[offsets,ends) + alpha*A[offsets_A,ends_A)
int reduce_sum(char *result)
Performs an elementwise summation reduction on a tensor.
void profile_on()
turns on profiling for tensor
dtype d
tensor value associated with index
#define NORM1_INST(dtype)
Ring class defined by a datatype and addition and multiplicaton functions addition must have an ident...
virtual char * pair_alloc(int64_t n) const
allocate space for n (int64_t,dtype) pairs, necessary for object types
void add_to_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor to a tensor object defined on a different world
Typ_Idx_Tensor< dtype > i(char const *idx_map)
void execute(bool run_diag=false)
run summation
void get_local_pairs(int64_t *npair, Pair< dtype > **pairs, bool nonzeros_only=false, bool unpack_sym=false) const
gives the global indices and values associated with the local data
Tensor< dtype > slice(int const *offsets, int const *ends) const
cuts out a slice (block) of this tensor A[offsets,ends) result will always be fully nonsymmetric ...
void real_norm1< bool >(Tensor< bool > &A, double &nrm)
void profile_off()
turn off profiling
virtual bool is_ordered() const =0
#define NORM2_COMPLEX_INST(dtype)
void permute(dtype beta, CTF_int::tensor &A, int *const *perms_A, dtype alpha)
Apply permutation to matrix, potentially extracting a slice B[i,j,...] = beta*B[...] + alpha*A[perms_A[0][i],perms_A[1][j],...].
double get_rand48()
returns new random number in [0,1)
int sparsify(char const *threshold=NULL, bool take_abs=true)
reduce tensor to sparse format, storing only nonzero data, or data above a specified threshold...
int64_t size
current size of local tensor data chunk (mapping-dependent)
Semiring is a Monoid with an addition multiplicaton function addition must have an identity and be as...
void * alloc(int64_t len)
alloc abstraction
dtype reduce(OP op)
performs a reduction on the tensor
void read_all(int64_t *npair, dtype **data, bool unpack=false)
collects the entire tensor data on each process (not memory scalable)
custom bivariate function on two tensors: e.g. C["ij"] = f(A["ik"],B["kj"])
virtual char const * addid() const
MPI datatype for pairs.
an instance of the CTF library (world) on a MPI communicator
void set_name(char const *name)
sets tensor name
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
int64_t k
key, global index [i1,i2,...] specified as i1+len[0]*i2+...
void read_val(char *buf) const
sets external value to the value pointed by the iterator
void read_local(int64_t *npair, int64_t **global_idx, dtype **data, bool unpack_sym=false) const
Using get_local_data(), which returns an array that must be freed with delete [], is more efficient...
void compare(const Tensor< dtype > &A, FILE *fp=stdout, double cutoff=-1.0)
prints two sets of tensor data side-by-side to file using process 0
int permute(tensor *A, int *const *permutation_A, char const *alpha, int *const *permutation_B, char const *beta)
virtual char * alloc(int64_t n) const
allocate space for n items, necessary for object types
#define NORM_INFTY_INST(dtype)
double estimate_time()
predicts execution time in seconds using performance models
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
int align(tensor const *B)
align mapping of thisa tensor to that of B
index-value pair used for tensor data input
void read_sparse_from_file(const char *fpath, bool with_vals=true)
read sparse tensor from file, entries of tensor must be stored one per line, as i_1 ...
CTF::World * wrld
distributed processor context on which tensor is defined
custom function f : X -> Y to be applied to tensor elemetns: e.g. B["ij"] = f(A["ij"]) ...
class for execution distributed scaling of a tensor
a sparse subset of a tensor
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor
virtual void min(char const *a, char const *b, char *c) const
c = min(a,b)
int write(int64_t num_pair, char const *alpha, char const *beta, char *mapped_data, char const rw='w')
Add tensor data new=alpha*new+beta*old with <key, value> pairs where key is the global index for the ...
class for execution distributed contraction of tensors
void print(FILE *fp=stdout, char const *cutoff=NULL) const
prints tensor data to file using process 0
int zero_out_padding()
sets padded portion of tensor to zero (this should be maintained internally)
int * lens
unpadded tensor edge lengths
void add_from_subworld(tensor *tsr_sub, char const *alpha, char const *beta)
accumulates this tensor from a tensor object defined on a different world
dtype norm_infty()
finds the max absolute value element of the tensor
int allread(int64_t *num_pair, char **all_data, bool unpack)
read entire tensor with each processor (in packed layout). WARNING: will use an 'unscalable' amount o...
void sparsify()
reduce tensor to sparse format, storing only nonzero data, or data above a specified threshold...
~Tensor()
frees CTF tensor
void align(CTF_int::tensor const &A)
aligns data mapping with tensor A
Typ_Idx_Tensor< dtype > operator[](char const *idx_map)
associated an index map with the tensor for future operation
int64_t k() const
returns key of pair at head of ptr
void compare(const tensor *A, FILE *fp, char const *cutoff)
prints two sets of tensor data side-by-side to file using process 0
void print(FILE *fp, dtype cutoff) const
prints tensor data to file using process 0 (modify print(...) overload in set.h if you would like a d...
algstrct * sr
algstrct on which tensor elements and operations are defined
virtual void pair_dealloc(char *ptr) const
deallocate given pointer containing contiguous array of pairs
void read_sparse_from_file_base(const char *fpath, bool with_vals, Tensor< dtype > *T)
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
double estimate_time()
predicts execution time in seconds using performance models
void add_from_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor from a tensor object defined on a different world
void fill_sp_random_base(dtype rmin, dtype rmax, double frac_sp, Tensor< dtype > *T)
void write_sparse_to_file(const char *fpath, bool with_vals=true)
write sparse tensor to file, entries of tensor will be stored one per line, as i_1 ...
void real_norm1(Tensor< dtype > &A, double &nrm)
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Tensor()
default constructor
virtual void max(char const *a, char const *b, char *c) const
c = max(a,b)
void fill_random_base(dtype rmin, dtype rmax, Tensor< dtype > &T)
void add_to_subworld(tensor *tsr_sub, char const *alpha, char const *beta)
accumulates this tensor to a tensor object defined on a different world
int el_size
size of each element of algstrct in bytes
bool profile
whether profiling should be done for contractions/sums involving this tensor
int cdealloc(void *ptr)
free abstraction
dtype * get_raw_data(int64_t *size) const
gives the raw current local data with padding included
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void sum(dtype alpha, CTF_int::tensor &A, char const *idx_A, dtype beta, char const *idx_B)
sums B[idx_B] = beta*B[idx_B] + alpha*A[idx_A]
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
A Monoid is a Set equipped with a binary addition operator '+' or a custom function addition must hav...
internal distributed tensor class
dtype norm1()
computes the entrywise 1-norm of the tensor
void profile_on()
turn on profiling
dtype * get_mapped_data(char const *idx, Idx_Partition const &prl, Idx_Partition const &blk=Idx_Partition(), bool unpack=true)
returns local data of tensor with parallel distribution prl and local blocking blk ...
an instance of a tensor within a CTF world
void read(int64_t npair, Pair< dtype > *pairs)
Gives the values associated with any set of indices.
void scale(dtype alpha, char const *idx_A)
scales A[idx_A] = alpha*A[idx_A]
Tensor< dtype > & operator=(dtype val)
sets all values in the tensor to val
#define NORM2_REAL_INST(dtype)
class for execution distributed summation of tensors
int read_local_nnz(int64_t *num_pair, char **mapped_data, bool unpack_sym=false) const
read tensor data pairs local to processor that have nonzero values
double estimate_time(CTF_int::tensor &A, char const *idx_A, CTF_int::tensor &B, char const *idx_B, char const *idx_C)
estimate the time of a contraction C[idx_C] = A[idx_A]*B[idx_B]
void free_self()
destructor
virtual char const * mulid() const
identity element for multiplication i.e. 1
int reduce_sumsq(char *result)
computes the sum of squares of the elements
void write_sparse_to_file_base(const char *fpath, bool with_vals, Tensor< dtype > *T)
void copy_tensor_data(tensor const *other)
copies all tensor data from other
void set_name(char const *name)
set the tensor name
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
char * name
name given to tensor
int np
number of processors
void init(algstrct const *sr, int order, int const *edge_len, int const *sym, CTF::World *wrld, bool alloc_data, char const *name, bool profile, bool is_sparse)
initializes tensor data
MPI_Comm comm
set of processors making up this world
void get_max_abs(int n, dtype *data) const
obtains a small number of the biggest elements of the tensor in sorted order (e.g. eigenvalues)
int read_local(int64_t *num_pair, char **mapped_data, bool unpack_sym=false) const
read tensor data pairs local to processor including those with zero values WARNING: for sparse tensor...
int get_max_abs(int n, char *data) const
obtains the largest n elements (in absolute value) of the tensor
void cvrt_idx(int order, int const *lens, int64_t idx, int *idx_arr)