2 #include "../tensor/untyped_tensor.h" 3 #include "../shared/util.h" 4 #include "../interface/timer.h" 6 #include "../scaling/scaling.h" 15 int i, is, j, sym_dim,
scal_diag, num_sy, num_sy_neg;
16 int * idx_map_A, * idx_map_B;
19 if (sym_tsr == nonsym_tsr)
return;
29 for (i=0; i<sym_tsr->
order; i++){
30 if (sym_tsr->
sym[i] != nonsym_tsr->
sym[i]){
32 if (sym_tsr->
sym[i] ==
AS) rev_sign = -1;
33 if (sym_tsr->
sym[i] ==
SY){
36 if (i>0 && sym_tsr->
sym[i-1] !=
NS) i++;
40 while (j<sym_tsr->order && sym_tsr->
sym[j] !=
NS){
46 while (j>=0 && sym_tsr->
sym[j] !=
NS){
81 strcpy(spf,
"desymmetrize_");
82 strcat(spf,sym_tsr->
name);
99 for (i=0; i<sym_tsr->
order; i++){
107 if (scal_diag && num_sy+num_sy_neg==1){
108 ctsr =
new tensor(sym_tsr);
113 for (i=-num_sy_neg-1; i<num_sy; i++){
115 idx_map_A[sym_dim] = sym_dim+i+1;
116 idx_map_A[sym_dim+i+1] = sym_dim;
117 char * mksign = NULL;
124 ksign = nonsym_tsr->
sr->
mulid();
126 nonsym_tsr, idx_map_B, nonsym_tsr->
sr->
mulid());
129 if (rev_sign == -1)
cdealloc(mksign);
130 idx_map_A[sym_dim] = sym_dim;
131 idx_map_A[sym_dim+i+1] = sym_dim+i+1;
133 if (scal_diag && num_sy+num_sy_neg==1)
delete ctsr;
149 if (num_sy+num_sy_neg>1){
152 if (!is_C && scal_diag){
153 for (i=-num_sy_neg-1; i<num_sy; i++){
155 for (j=0; j<sym_tsr->
order; j++){
158 idx_map_A[j] = sym_dim;
160 idx_map_A[j] = sym_dim-1;
161 }
else if (j<sym_dim+i+1) {
175 nonsym_tsr->
sr->
cast_double(((
double)(num_sy+num_sy_neg-1.))/(num_sy+num_sy_neg), scalf);
176 scaling sscl(nonsym_tsr, idx_map_A, scalf);
226 strcpy(spf,
"desymmetrize_");
227 strcat(spf,sym_tsr->
name);
239 int i, j, is, sym_dim,
scal_diag, num_sy, num_sy_neg;
240 int * idx_map_A, * idx_map_B;
248 strcpy(spf,
"symmetrize_");
249 strcat(spf,sym_tsr->
name);
263 for (i=0; i<sym_tsr->
order; i++){
264 if (sym_tsr->
sym[i] != nonsym_tsr->
sym[i]){
266 if (sym_tsr->
sym[i] ==
AS) rev_sign = -1;
267 if (sym_tsr->
sym[i] ==
SY){
270 if (i>0 && sym_tsr->
sym[i-1] !=
NS) i++;
274 while (j<sym_tsr->order && sym_tsr->
sym[j] !=
NS){
280 while (j>=0 && sym_tsr->
sym[j] !=
NS){
306 for (i=0; i<sym_tsr->
order; i++){
313 for (i=-num_sy_neg-1; i<num_sy; i++){
315 idx_map_A[sym_dim] = sym_dim+i+1;
316 idx_map_A[sym_dim+i+1] = sym_dim;
325 summation csum(nonsym_tsr, idx_map_A, ksign,
326 sym_tsr, idx_map_B, nonsym_tsr->
sr->
mulid());
331 idx_map_A[sym_dim] = sym_dim;
332 idx_map_A[sym_dim+i+1] = sym_dim+i+1;
334 if (scal_diag && num_sy+num_sy_neg == 1) {
340 sym_tsr->
sym[is] =
SH;
342 sym_tsr->
sym[is] =
SY;
352 if (num_sy+num_sy_neg > 1){
356 for (i=-num_sy_neg-1; i<num_sy; i++){
358 idx_map_B[sym_dim+i+1] = sym_dim-num_sy_neg;
359 idx_map_B[sym_dim] = sym_dim-num_sy_neg;
360 for (j=
MAX(sym_dim+i+2,sym_dim+1); j<sym_tsr->
order; j++){
361 idx_map_B[j] = j-i-num_sy_neg-1;
366 nonsym_tsr->
sr->
cast_double(((
double)(num_sy-i-1.))/(num_sy-i), scalf);
376 tensor sym_tsr2(sym_tsr,1,0);
388 strcpy(spf,
"symmetrize_");
389 strcat(spf,sym_tsr->
name);
414 for (i=0; i<ndim; i++){
426 if (np % 2 == 1) sgn = 1.0;
428 for (i=0; i<
np; i++){
431 for (i=np; i<ndim; i++){
449 int iA, jA, iB, jB, iiB, broken, tmp;
452 for (iA=0; iA<A->
order; iA++){
453 if (A->
sym[iA] !=
NS){
455 iB = idx_arr[2*idx_A[iA]+off_B];
456 while (A->
sym[jA] !=
NS){
459 jB = idx_arr[2*idx_A[jA]+off_B];
460 if ((iB == -1) ^ (jB == -1)) broken = 1;
464 if (iB != -1 && jB != -1) {
466 for (iiB=
MIN(iB,jB); iiB<
MAX(iB,jB); iiB++){
467 ASSERT(iiB >= 0 && iiB <= B->order);
468 if (B->
sym[iiB] ==
NS) broken = 1;
477 if (idx_A[iA] > idx_A[jA]){
478 idx_arr[2*idx_A[iA]+off_A] = jA;
479 idx_arr[2*idx_A[jA]+off_A] = iA;
481 idx_A[iA] = idx_A[jA];
483 if (A->
sym[iA] ==
AS) add_sign *= -1.0;
505 int iA, jA, iB, iC, jB, jC, iiB, iiC, broken, tmp;
508 for (iA=0; iA<A->
order; iA++){
509 if (A->
sym[iA] !=
NS){
511 iB = idx_arr[3*idx_A[iA]+off_B];
512 iC = idx_arr[3*idx_A[iA]+off_C];
513 while (A->
sym[jA] !=
NS){
516 jB = idx_arr[3*idx_A[jA]+off_B];
518 if (iB != -1 && jB != -1){
519 for (iiB=
MIN(iB,jB); iiB<
MAX(iB,jB); iiB++){
520 if (B->
sym[iiB] ==
NS) broken = 1;
523 if ((iB == -1) ^ (jB == -1)) broken = 1;
524 jC = idx_arr[3*idx_A[jA]+off_C];
526 if (iC != -1 && jC != -1){
527 for (iiC=
MIN(iC,jC); iiC<
MAX(iC,jC); iiC++){
528 if (C->
sym[iiC] ==
NS) broken = 1;
531 if ((iC == -1) ^ (jC == -1)) broken = 1;
534 if (idx_A[iA] > idx_A[jA]){
535 idx_arr[3*idx_A[iA]+off_A] = jA;
536 idx_arr[3*idx_A[jA]+off_A] = iA;
538 idx_A[iA] = idx_A[jA];
540 if (A->
sym[iA] ==
AS) add_sign *= -1.0;
550 std::vector<int>& signs,
582 for (i=0; i<(int)perms.size(); i++){
583 if (perms[i].is_equal(norm_ord_perm)){
588 perms.push_back(norm_ord_perm);
589 signs.push_back(add_sign);
594 std::vector<int>& signs,
600 tensor * tsr_A, * tsr_B, * tsr_C;
616 order_perm(tsr_A, tsr_B, tsr_C, idx_arr, 0, 1, 2,
619 order_perm(tsr_B, tsr_A, tsr_C, idx_arr, 1, 0, 2,
622 order_perm(tsr_C, tsr_B, tsr_A, idx_arr, 2, 1, 0,
636 for (i=0; i<(int)perms.size(); i++){
637 if (perms[i].is_equal(norm_ord_perm)){
642 perms.push_back(norm_ord_perm);
643 signs.push_back(add_sign);
648 std::vector<summation>& perms,
649 std::vector<int>& signs){
659 for (i=0; i<tsr_A->
order; i++){
661 while (tsr_A->
sym[j] !=
NS){
663 for (k=0; k<(int)perms.size(); k++){
666 if (tsr_A->
sym[j-1] ==
AS) sign *= -1;
667 tmp = new_type1.
idx_A[i];
669 new_type1.
idx_A[j] = tmp;
674 for (i=0; i<tsr_B->
order; i++){
676 while (tsr_B->
sym[j] !=
NS){
678 for (k=0; k<(int)perms.size(); k++){
681 if (tsr_B->
sym[j-1] ==
AS) sign *= -1;
682 tmp = new_type2.
idx_B[i];
684 new_type2.
idx_B[j] = tmp;
692 std::vector<contraction>& perms,
693 std::vector<int>& signs){
702 tensor * tsr_A, * tsr_B, * tsr_C;
711 for (i=0; i<tsr_A->
order; i++){
713 while (tsr_A->
sym[j] !=
NS){
715 for (k=0; k<(int)perms.size(); k++){
718 if (tsr_A->
sym[j-1] ==
AS) sign *= -1;
719 tmp = ntype.
idx_A[i];
721 ntype.
idx_A[j] = tmp;
726 for (i=0; i<tsr_B->
order; i++){
728 while (tsr_B->
sym[j] !=
NS){
730 for (k=0; k<(int)perms.size(); k++){
733 if (tsr_B->
sym[j-1] ==
AS) sign *= -1;
734 tmp = ntype.
idx_B[i];
736 ntype.
idx_B[j] = tmp;
742 for (i=0; i<tsr_C->
order; i++){
744 while (tsr_C->
sym[j] !=
NS){
746 for (k=0; k<(int)perms.size(); k++){
749 if (tsr_C->
sym[j-1] ==
AS) sign *= -1;
750 tmp = ntype.
idx_C[i];
752 ntype.
idx_C[j] = tmp;
void order_perm(tensor const *A, tensor const *B, tensor const *C, int *idx_arr, int off_A, int off_B, int off_C, int *idx_A, int *idx_B, int *idx_C, int &add_sign, int &mod)
orders the contraction indices of one tensor that don't break contraction symmetries ...
char * home_buffer
buffer associated with home mapping of tensor, to which it is returned
def sum(tensor, init_A, axis=None, dtype=None, out=None, keepdims=None)
bool is_home
whether the latest tensor data is in the home buffer
int64_t * nnz_blk
nonzero elements in each block owned locally
int * sym
symmetries among tensor dimensions
void get_sym_perms(contraction const &ctr, std::vector< contraction > &perms, std::vector< int > &signs)
finds all permutations of a contraction that must be done for a broken symmetry
int * idx_A
indices of left operand
void execute(bool run_diag=false)
run summation
void inv_idx(int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
invert index map
virtual void copy(char *a, char const *b) const
copies element b to element a
bool has_home
whether the tensor has a home mapping/buffer
local process walltime measurement
int64_t size
current size of local tensor data chunk (mapping-dependent)
void * alloc(int64_t len)
alloc abstraction
int64_t home_size
size of home buffer
void desymmetrize(tensor *sym_tsr, tensor *nonsym_tsr, bool is_C)
unfolds the data of a tensor
virtual char const * addid() const
MPI datatype for pairs.
void set_new_nnz_glb(int64_t const *nnz_blk)
sets the number of nonzeros both locally (nnz_loc) and overall globally (nnz_tot) ...
void copy_mapping(int order, mapping const *mapping_A, mapping *mapping_B)
copies mapping A to B
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
virtual char * alloc(int64_t n) const
allocate space for n items, necessary for object types
void set_padding()
sets padding and local size of a tensor given a mapping
virtual void addinv(char const *a, char *b) const
b = -a
CTF::World * wrld
distributed processor context on which tensor is defined
class for execution distributed scaling of a tensor
int rank
rank of local processor
class for execution distributed contraction of tensors
int zero_out_padding()
sets padded portion of tensor to zero (this should be maintained internally)
int * idx_B
indices of right operand
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
bool is_data_aliased
whether the tensor data is an alias of another tensor object's data
algstrct * sr
algstrct on which tensor elements and operations are defined
mapping * edge_map
mappings of each tensor dimension onto topology dimensions
int * idx_B
indices of output
bool is_mapped
whether a mapping has been selected
int set_zero()
elects a mapping and sets tensor data to zero
int * idx_C
indices of output
int64_t calc_nvirt() const
calculate virtualization factor of tensor return virtualization factor
int el_size
size of each element of algstrct in bytes
bool profile
whether profiling should be done for contractions/sums involving this tensor
int cdealloc(void *ptr)
free abstraction
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
internal distributed tensor class
void cmp_sym_perms(int ndim, int const *sym, int *nperm, int **perm, double *sign)
finds all permutations of a tensor according to a symmetry
void scal_diag(int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int const *cphase_rank, char *vdata, algstrct const *sr, int const *sym_mask)
scales each element by 1/(number of entries equivalent to it after permutation of indices for which s...
void add_sym_perm(std::vector< contraction > &perms, std::vector< int > &signs, contraction const &new_perm, int new_sign)
puts a contraction map into a nice ordering according to preserved symmetries, and adds it if it is d...
int align_symmetric_indices(int order_A, T &idx_A, const int *sym_A, int order_B, T &idx_B, const int *sym_B)
topology * topo
topology to which the tensor is mapped
class for execution distributed summation of tensors
virtual void cast_double(double d, char *c) const
c = &d
int * idx_A
indices of left operand
virtual char const * mulid() const
identity element for multiplication i.e. 1
void symmetrize(tensor *sym_tsr, tensor *nonsym_tsr)
folds the data of a tensor
char * name
name given to tensor
int sum_tensors(bool run_diag)
PDAXPY: a*idx_map_A(A) + b*idx_map_B(B) -> idx_map_B(B). Treats symmetric as lower triangular...
void clear_mapping()
zeros out mapping