Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
symmetrization.cxx
Go to the documentation of this file.
1 #include "symmetrization.h"
2 #include "../tensor/untyped_tensor.h"
3 #include "../shared/util.h"
4 #include "../interface/timer.h"
5 #include "sym_indices.h"
6 #include "../scaling/scaling.h"
7 
8 using namespace CTF;
9 
10 namespace CTF_int {
11 
12  void desymmetrize(tensor * sym_tsr,
13  tensor * nonsym_tsr,
14  bool is_C){
15  int i, is, j, sym_dim, scal_diag, num_sy, num_sy_neg;
16  int * idx_map_A, * idx_map_B;
17  int rev_sign;
18 
19  if (sym_tsr == nonsym_tsr) return;
20 
22 
23  sym_dim = -1;
24  is = -1;
25  rev_sign = 1;
26  scal_diag = 0;
27  num_sy=0;
28  num_sy_neg=0;
29  for (i=0; i<sym_tsr->order; i++){
30  if (sym_tsr->sym[i] != nonsym_tsr->sym[i]){
31  is = i;
32  if (sym_tsr->sym[i] == AS) rev_sign = -1;
33  if (sym_tsr->sym[i] == SY){
34  scal_diag = 1;
35  }
36  if (i>0 && sym_tsr->sym[i-1] != NS) i++;
37  sym_dim = i;
38  num_sy = 0;
39  j=i;
40  while (j<sym_tsr->order && sym_tsr->sym[j] != NS){
41  num_sy++;
42  j++;
43  }
44  num_sy_neg = 0;
45  j=i-1;
46  while (j>=0 && sym_tsr->sym[j] != NS){
47  num_sy_neg++;
48  j--;
49  }
50  break;
51  }
52  }
53  nonsym_tsr->clear_mapping();
54  nonsym_tsr->set_padding();
55  copy_mapping(sym_tsr->order, sym_tsr->edge_map, nonsym_tsr->edge_map);
56  if (nonsym_tsr->is_sparse){
57  nonsym_tsr->nnz_blk = (int64_t*)alloc(sizeof(int64_t)*nonsym_tsr->calc_nvirt());
58  std::fill(nonsym_tsr->nnz_blk, nonsym_tsr->nnz_blk+nonsym_tsr->calc_nvirt(), 0);
59  }
60  nonsym_tsr->is_mapped = 1;
61  nonsym_tsr->topo = sym_tsr->topo;
62  nonsym_tsr->set_padding();
63 
64  if (sym_dim == -1) {
65  nonsym_tsr->size = sym_tsr->size;
66  nonsym_tsr->data = sym_tsr->data;
67  nonsym_tsr->home_buffer = sym_tsr->home_buffer;
68  nonsym_tsr->is_home = sym_tsr->is_home;
69  nonsym_tsr->has_home = sym_tsr->has_home;
70  nonsym_tsr->home_size = sym_tsr->home_size;
71  nonsym_tsr->is_data_aliased = 1;
72  nonsym_tsr->set_new_nnz_glb(sym_tsr->nnz_blk);
74  return;
75  }
76  #ifdef PROF_SYM
77  if (sym_tsr->wrld->rank == 0)
78  VPRINTF(2,"Desymmetrizing %s\n", sym_tsr->name);
79  if (sym_tsr->profile) {
80  char spf[80];
81  strcpy(spf,"desymmetrize_");
82  strcat(spf,sym_tsr->name);
83  CTF::Timer t_pf(spf);
84  if (sym_tsr->wrld->rank == 0)
85  VPRINTF(2,"Desymmetrizing %s\n", sym_tsr->name);
86  t_pf.start();
87  }
88  #endif
89 
90  if (!nonsym_tsr->is_sparse)
91  nonsym_tsr->data = nonsym_tsr->sr->alloc(nonsym_tsr->size);
92  //CTF_int::mst_alloc_ptr(nonsym_tsr->size*nonsym_tsr->sr->el_size, (void**)&nonsym_tsr->data);
93  nonsym_tsr->set_zero();
94  //nonsym_tsr->sr->set(nonsym_tsr->data, nonsym_tsr->sr->addid(), nonsym_tsr->size);
95 
96  CTF_int::alloc_ptr(sym_tsr->order*sizeof(int), (void**)&idx_map_A);
97  CTF_int::alloc_ptr(sym_tsr->order*sizeof(int), (void**)&idx_map_B);
98 
99  for (i=0; i<sym_tsr->order; i++){
100  idx_map_A[i] = i;
101  idx_map_B[i] = i;
102  }
103 
104  if (scal_diag){
105  if (!is_C){
106  tensor * ctsr = sym_tsr;
107  if (scal_diag && num_sy+num_sy_neg==1){
108  ctsr = new tensor(sym_tsr);
109  ctsr->sym[is] = SH;
110  ctsr->zero_out_padding();
111  ctsr->sym[is] = SY;
112  }
113  for (i=-num_sy_neg-1; i<num_sy; i++){
114  if (i==-1) continue;
115  idx_map_A[sym_dim] = sym_dim+i+1;
116  idx_map_A[sym_dim+i+1] = sym_dim;
117  char * mksign = NULL;
118  char const * ksign;
119  if (rev_sign == -1){
120  mksign = (char*)alloc(nonsym_tsr->sr->el_size);
121  nonsym_tsr->sr->addinv(nonsym_tsr->sr->mulid(), mksign);
122  ksign = mksign;
123  } else
124  ksign = nonsym_tsr->sr->mulid();
125  summation csum = summation(ctsr, idx_map_A, ksign,
126  nonsym_tsr, idx_map_B, nonsym_tsr->sr->mulid());
127 
128  csum.sum_tensors(0);
129  if (rev_sign == -1) cdealloc(mksign);
130  idx_map_A[sym_dim] = sym_dim;
131  idx_map_A[sym_dim+i+1] = sym_dim+i+1;
132  }
133  if (scal_diag && num_sy+num_sy_neg==1) delete ctsr;
134  summation ssum = summation(sym_tsr, idx_map_A, nonsym_tsr->sr->mulid(), nonsym_tsr, idx_map_B, nonsym_tsr->sr->mulid());
135  ssum.sum_tensors(0);
136  }
137  /*printf("DESYMMETRIZED:\n");
138  sym_tsr->print();
139  printf("TO:\n");
140  nonsym_tsr->print();*/
141 
142  /*printf("SYM %s\n",sym_tsr->name);
143  sym_tsr->print();
144  printf("NONSYM %s\n",sym_tsr->name);
145  nonsym_tsr->print();*/
146 
147 
148  /* Do not diagonal rescaling since sum has beta=0 and overwrites diagonal */
149  if (num_sy+num_sy_neg>1){
150  // assert(0); //FIXME: zero_out_padding instead, fractional rescaling factor seems problematic?
151  // print_tsr(stdout, nonsym_tid);
152  if (!is_C && scal_diag){
153  for (i=-num_sy_neg-1; i<num_sy; i++){
154  if (i==-1) continue;
155  for (j=0; j<sym_tsr->order; j++){
156  if (j==sym_dim+i+1){
157  if (j>sym_dim)
158  idx_map_A[j] = sym_dim;
159  else
160  idx_map_A[j] = sym_dim-1;
161  } else if (j<sym_dim+i+1) {
162  idx_map_A[j] = j;
163  } else {
164  idx_map_A[j] = j-1;
165  }
166  }
167  /* idx_map_A[sym_dim+i+1] = sym_dim-num_sy_neg;
168  idx_map_A[sym_dim] = sym_dim-num_sy_neg;
169  for (j=MAX(sym_dim+i+2,sym_dim+1); j<sym_tsr->order; j++){
170  idx_map_A[j] = j-1;
171  }*/
172  /* printf("tid %d before scale\n", nonsym_tid);
173  print_tsr(stdout, nonsym_tid);*/
174  char * scalf = (char*)alloc(nonsym_tsr->sr->el_size);
175  nonsym_tsr->sr->cast_double(((double)(num_sy+num_sy_neg-1.))/(num_sy+num_sy_neg), scalf);
176  scaling sscl(nonsym_tsr, idx_map_A, scalf);
177  sscl.execute();
178  cdealloc(scalf);
179  /* printf("tid %d after scale\n", nonsym_tid);
180  print_tsr(stdout, nonsym_tid);*/
181  // if (ret != CTF_SUCCESS) ABORT;
182  }
183  }
184  // print_tsr(stdout, nonsym_tid);
185  }
186  } else if (!is_C) {
187  summation ssum = summation(sym_tsr, idx_map_A, nonsym_tsr->sr->mulid(), nonsym_tsr, idx_map_B, nonsym_tsr->sr->mulid());
188  ssum.execute();
189  }
190  CTF_int::cdealloc(idx_map_A);
191  CTF_int::cdealloc(idx_map_B);
192 
193  /* switch (sym_tsr->edge_map[sym_dim].type){
194  case NOT_MAPPED:
195  ASSERT(sym_tsr->edge_map[sym_dim+1].type == NOT_MAPPED);
196  rw_smtr<dtype>(sym_tsr->order, sym_tsr->edge_len, 1.0, 0,
197  sym_tsr->sym, nonsym_tsr->sym,
198  sym_tsr->data, nonsym_tsr->data);
199  rw_smtr<dtype>(sym_tsr->order, sym_tsr->edge_len, rev_sign, 1,
200  sym_tsr->sym, nonsym_tsr->sym,
201  sym_tsr->data, nonsym_tsr->data);
202  break;
203 
204  case VIRTUAL_MAP:
205  if (sym_tsr->edge_map[sym_dim+1].type == VIRTUAL_MAP){
206  nvirt =
207 
208  } else {
209  ASSERT(sym_tsr->edge_map[sym_dim+1].type == PHYSICAL_MAP);
210 
211  }
212  break;
213 
214  case PHYSICAL_MAP:
215  if (sym_tsr->edge_map[sym_dim+1].type == VIRTUAL_MAP){
216 
217  } else {
218  ASSERT(sym_tsr->edge_map[sym_dim+1].type == PHYSICAL_MAP);
219 
220  }
221  break;
222  }*/
223  #ifdef PROF_SYM
224  if (sym_tsr->profile) {
225  char spf[80];
226  strcpy(spf,"desymmetrize_");
227  strcat(spf,sym_tsr->name);
228  CTF::Timer t_pf(spf);
229  t_pf.stop();
230  }
231  #endif
232 
234 
235  }
236 
237  void symmetrize(tensor * sym_tsr,
238  tensor * nonsym_tsr){
239  int i, j, is, sym_dim, scal_diag, num_sy, num_sy_neg;
240  int * idx_map_A, * idx_map_B;
241  int rev_sign;
242 
244 
245  #ifdef PROF_SYM
246  if (sym_tsr->profile) {
247  char spf[80];
248  strcpy(spf,"symmetrize_");
249  strcat(spf,sym_tsr->name);
250  CTF::Timer t_pf(spf);
251  if (sym_tsr->wrld->rank == 0)
252  VPRINTF(2,"Symmetrizing %s\n", sym_tsr->name);
253  t_pf.start();
254  }
255  #endif
256 
257  sym_dim = -1;
258  is = -1;
259  rev_sign = 1;
260  scal_diag = 0;
261  num_sy=0;
262  num_sy_neg=0;
263  for (i=0; i<sym_tsr->order; i++){
264  if (sym_tsr->sym[i] != nonsym_tsr->sym[i]){
265  is = i;
266  if (sym_tsr->sym[i] == AS) rev_sign = -1;
267  if (sym_tsr->sym[i] == SY){
268  scal_diag = 1;
269  }
270  if (i>0 && sym_tsr->sym[i-1] != NS) i++;
271  sym_dim = i;
272  num_sy = 0;
273  j=i;
274  while (j<sym_tsr->order && sym_tsr->sym[j] != NS){
275  num_sy++;
276  j++;
277  }
278  num_sy_neg = 0;
279  j=i-1;
280  while (j>=0 && sym_tsr->sym[j] != NS){
281  num_sy_neg++;
282  j--;
283  }
284  break;
285  }
286  }
287  if (sym_dim == -1) {
288  sym_tsr->topo = nonsym_tsr->topo;
289  copy_mapping(nonsym_tsr->order, nonsym_tsr->edge_map, sym_tsr->edge_map);
290  if (sym_tsr->is_sparse){
291  sym_tsr->nnz_blk = (int64_t*)alloc(sizeof(int64_t)*sym_tsr->calc_nvirt());
292  std::fill(sym_tsr->nnz_blk, sym_tsr->nnz_blk+sym_tsr->calc_nvirt(), 0);
293  }
294  sym_tsr->is_mapped = 1;
295  sym_tsr->set_padding();
296  sym_tsr->size = nonsym_tsr->size;
297  sym_tsr->data = nonsym_tsr->data;
298  sym_tsr->is_home = nonsym_tsr->is_home;
299  sym_tsr->set_new_nnz_glb(nonsym_tsr->nnz_blk);
300  } else {
301 
302  //sym_tsr->sr->set(sym_tsr->data, sym_tsr->sr->addid(), sym_tsr->size);
303  CTF_int::alloc_ptr(sym_tsr->order*sizeof(int), (void**)&idx_map_A);
304  CTF_int::alloc_ptr(sym_tsr->order*sizeof(int), (void**)&idx_map_B);
305 
306  for (i=0; i<sym_tsr->order; i++){
307  idx_map_A[i] = i;
308  idx_map_B[i] = i;
309  }
310 
311  if (0){
312  //FIXME: this is not robust when doing e.g. {SY, SY, SY, NS} -> {SY, NS, SY, NS}
313  for (i=-num_sy_neg-1; i<num_sy; i++){
314  if (i==-1) continue;
315  idx_map_A[sym_dim] = sym_dim+i+1;
316  idx_map_A[sym_dim+i+1] = sym_dim;
317  // printf("symmetrizing\n");
318  /* summation csum = summation(nonsym_tsr, idx_map_A, rev_sign,
319  sym_tsr, idx_map_B, 1.0);*/
320  char * ksign = (char*)alloc(nonsym_tsr->sr->el_size);
321  if (rev_sign == -1)
322  nonsym_tsr->sr->addinv(nonsym_tsr->sr->mulid(), ksign);
323  else
324  nonsym_tsr->sr->copy(ksign, nonsym_tsr->sr->mulid());
325  summation csum(nonsym_tsr, idx_map_A, ksign,
326  sym_tsr, idx_map_B, nonsym_tsr->sr->mulid());
327  csum.sum_tensors(0);
328  cdealloc(ksign);
329 
330  // print_tsr(stdout, sym_tid);
331  idx_map_A[sym_dim] = sym_dim;
332  idx_map_A[sym_dim+i+1] = sym_dim+i+1;
333  }
334  if (scal_diag && num_sy+num_sy_neg == 1) {
335  /* for (i=-num_sy_neg-1; i<num_sy-1; i++){
336  tensors[sym_tid]->sym[sym_dim+i+1] = SH;
337  zero_out_padding(sym_tid);
338  tensors[sym_tid]->sym[sym_dim+i+1] = SY;
339  }*/
340  sym_tsr->sym[is] = SH;
341  sym_tsr->zero_out_padding();
342  sym_tsr->sym[is] = SY;
343  /* for (i=-num_sy_neg-1; i<num_sy-1; i++){
344  tensors[sym_tid]->sym[sym_dim+i+1] = SY;
345  }*/
346  }
347 
348  summation ssum = summation(nonsym_tsr, idx_map_A, nonsym_tsr->sr->mulid(), sym_tsr, idx_map_B, nonsym_tsr->sr->mulid());
349  ssum.sum_tensors(0);
350 
351 
352  if (num_sy+num_sy_neg > 1){
353  //assert(0); //FIXME: use zero_out_padding?
354  if (scal_diag){
355  // printf("symmetrizing diagonal=%d\n",num_sy);
356  for (i=-num_sy_neg-1; i<num_sy; i++){
357  if (i==-1) continue;
358  idx_map_B[sym_dim+i+1] = sym_dim-num_sy_neg;
359  idx_map_B[sym_dim] = sym_dim-num_sy_neg;
360  for (j=MAX(sym_dim+i+2,sym_dim+1); j<sym_tsr->order; j++){
361  idx_map_B[j] = j-i-num_sy_neg-1;
362  }
363  /*printf("tid %d before scale\n", nonsym_tid);
364  print_tsr(stdout, sym_tid);*/
365  char * scalf = (char*)alloc(nonsym_tsr->sr->el_size);
366  nonsym_tsr->sr->cast_double(((double)(num_sy-i-1.))/(num_sy-i), scalf);
367  scaling sscl = scaling(sym_tsr, idx_map_B, scalf);
368  sscl.execute();
369  /* printf("tid %d after scale\n", sym_tid);
370  print_tsr(stdout, sym_tid);*/
371  //if (ret != CTF_SUCCESS) ABORT;
372  }
373  }
374  }
375  } else {
376  tensor sym_tsr2(sym_tsr,1,0);
377  summation ssum = summation(nonsym_tsr, idx_map_A, nonsym_tsr->sr->mulid(), &sym_tsr2, idx_map_B, nonsym_tsr->sr->addid());
378  ssum.execute();
379  summation ssum2 = summation(&sym_tsr2, idx_map_A, nonsym_tsr->sr->mulid(), sym_tsr, idx_map_B, nonsym_tsr->sr->mulid());
380  ssum2.execute();
381  }
382  CTF_int::cdealloc(idx_map_A);
383  CTF_int::cdealloc(idx_map_B);
384  }
385  #ifdef PROF_SYM
386  if (sym_tsr->profile) {
387  char spf[80];
388  strcpy(spf,"symmetrize_");
389  strcat(spf,sym_tsr->name);
390  CTF::Timer t_pf(spf);
391  t_pf.stop();
392  }
393  #endif
394 
395 
397  }
398 
399 
400  void cmp_sym_perms(int ndim,
401  int const * sym,
402  int * nperm,
403  int ** perm,
404  double * sign){
405  int i, np;
406  int * pm;
407  double sgn;
408 
409  ASSERT(sym[0] != NS);
410  CTF_int::alloc_ptr(sizeof(int)*ndim, (void**)&pm);
411 
412  np=0;
413  sgn=1.0;
414  for (i=0; i<ndim; i++){
415  if (sym[i]==AS){
416  sgn=-1.0;
417  }
418  if (sym[i]!=NS){
419  np++;
420  } else {
421  np++;
422  break;
423  }
424  }
425  /* a circular shift of n indices requires n-1 swaps */
426  if (np % 2 == 1) sgn = 1.0;
427 
428  for (i=0; i<np; i++){
429  pm[i] = (i+1)%np;
430  }
431  for (i=np; i<ndim; i++){
432  pm[i] = i;
433  }
434 
435  *nperm = np;
436  *perm = pm;
437  *sign = sgn;
438  }
439 
440  void order_perm(tensor const * A,
441  tensor const * B,
442  int * idx_arr,
443  int off_A,
444  int off_B,
445  int * idx_A,
446  int * idx_B,
447  int & add_sign,
448  int & mod){
449  int iA, jA, iB, jB, iiB, broken, tmp;
450 
451  //find all symmetries in A
452  for (iA=0; iA<A->order; iA++){
453  if (A->sym[iA] != NS){
454  jA=iA;
455  iB = idx_arr[2*idx_A[iA]+off_B];
456  while (A->sym[jA] != NS){
457  broken = 0;
458  jA++;
459  jB = idx_arr[2*idx_A[jA]+off_B];
460  if ((iB == -1) ^ (jB == -1)) broken = 1;
461  /* if (iB != -1 && jB != -1) {
462  if (B->sym[iB] != A->sym[iA]) broken = 1;
463  }*/
464  if (iB != -1 && jB != -1) {
465  /* Do this because iB,jB can be in reversed order */
466  for (iiB=MIN(iB,jB); iiB<MAX(iB,jB); iiB++){
467  ASSERT(iiB >= 0 && iiB <= B->order);
468  if (B->sym[iiB] == NS) broken = 1;
469  }
470  }
471 
472 
473  /*//if (iB == jB) broken = 1;
474  } */
475  //if the symmetry is preserved, make sure index map is ordered
476  if (!broken){
477  if (idx_A[iA] > idx_A[jA]){
478  idx_arr[2*idx_A[iA]+off_A] = jA;
479  idx_arr[2*idx_A[jA]+off_A] = iA;
480  tmp = idx_A[iA];
481  idx_A[iA] = idx_A[jA];
482  idx_A[jA] = tmp;
483  if (A->sym[iA] == AS) add_sign *= -1.0;
484  mod = 1;
485  }
486  }
487  }
488  }
489  }
490  }
491 
492  void order_perm(tensor const * A,
493  tensor const * B,
494  tensor const * C,
495  int * idx_arr,
496  int off_A,
497  int off_B,
498  int off_C,
499  int * idx_A,
500  int * idx_B,
501  int * idx_C,
502  int & add_sign,
503  int & mod){
504 
505  int iA, jA, iB, iC, jB, jC, iiB, iiC, broken, tmp;
506 
507  //find all symmetries in A
508  for (iA=0; iA<A->order; iA++){
509  if (A->sym[iA] != NS){
510  jA=iA;
511  iB = idx_arr[3*idx_A[iA]+off_B];
512  iC = idx_arr[3*idx_A[iA]+off_C];
513  while (A->sym[jA] != NS){
514  broken = 0;
515  jA++;
516  jB = idx_arr[3*idx_A[jA]+off_B];
517  //if (iB == jB) broken = 1;
518  if (iB != -1 && jB != -1){
519  for (iiB=MIN(iB,jB); iiB<MAX(iB,jB); iiB++){
520  if (B->sym[iiB] == NS) broken = 1;
521  }
522  }
523  if ((iB == -1) ^ (jB == -1)) broken = 1;
524  jC = idx_arr[3*idx_A[jA]+off_C];
525  //if (iC == jC) broken = 1;
526  if (iC != -1 && jC != -1){
527  for (iiC=MIN(iC,jC); iiC<MAX(iC,jC); iiC++){
528  if (C->sym[iiC] == NS) broken = 1;
529  }
530  }
531  if ((iC == -1) ^ (jC == -1)) broken = 1;
532  //if the symmetry is preserved, make sure index map is ordered
533  if (!broken){
534  if (idx_A[iA] > idx_A[jA]){
535  idx_arr[3*idx_A[iA]+off_A] = jA;
536  idx_arr[3*idx_A[jA]+off_A] = iA;
537  tmp = idx_A[iA];
538  idx_A[iA] = idx_A[jA];
539  idx_A[jA] = tmp;
540  if (A->sym[iA] == AS) add_sign *= -1.0;
541  mod = 1;
542  }
543  }
544  }
545  }
546  }
547  }
548 
549  void add_sym_perm(std::vector<summation>& perms,
550  std::vector<int>& signs,
551  summation const & new_perm,
552  int new_sign){
553 
554  int mod, num_tot, i;
555  int * idx_arr;
556  int add_sign;
557  tensor * tsr_A, * tsr_B;
558 
559  add_sign = new_sign;
560  summation norm_ord_perm(new_perm);
561 
562  tsr_A = new_perm.A;
563  tsr_B = new_perm.B;
564 
565  inv_idx(tsr_A->order, norm_ord_perm.idx_A,
566  tsr_B->order, norm_ord_perm.idx_B,
567  &num_tot, &idx_arr);
568  //keep permuting until we get to normal order (no permutations left)
569  do {
570  mod = 0;
571  order_perm(tsr_A, tsr_B, idx_arr, 0, 1,
572  norm_ord_perm.idx_A, norm_ord_perm.idx_B,
573  add_sign, mod);
574  order_perm(tsr_B, tsr_A, idx_arr, 1, 0,
575  norm_ord_perm.idx_B, norm_ord_perm.idx_A,
576  add_sign, mod);
577  } while (mod);
578  add_sign = add_sign*align_symmetric_indices(tsr_A->order, norm_ord_perm.idx_A, tsr_A->sym,
579  tsr_B->order, norm_ord_perm.idx_B, tsr_B->sym);
580 
581  // check if this summation is equivalent to one of the other permutations
582  for (i=0; i<(int)perms.size(); i++){
583  if (perms[i].is_equal(norm_ord_perm)){
584  CTF_int::cdealloc(idx_arr);
585  return;
586  }
587  }
588  perms.push_back(norm_ord_perm);
589  signs.push_back(add_sign);
590  CTF_int::cdealloc(idx_arr);
591  }
592 
593  void add_sym_perm(std::vector<contraction>& perms,
594  std::vector<int>& signs,
595  contraction const & new_perm,
596  int new_sign){
597  int mod, num_tot, i;
598  int * idx_arr;
599  int add_sign;
600  tensor * tsr_A, * tsr_B, * tsr_C;
601 
602  tsr_A = new_perm.A;
603  tsr_B = new_perm.B;
604  tsr_C = new_perm.C;
605 
606  add_sign = new_sign;
607  contraction norm_ord_perm(new_perm);
608 
609  inv_idx(tsr_A->order, norm_ord_perm.idx_A,
610  tsr_B->order, norm_ord_perm.idx_B,
611  tsr_C->order, norm_ord_perm.idx_C,
612  &num_tot, &idx_arr);
613  //keep permuting until we get to normal order (no permutations left)
614  do {
615  mod = 0;
616  order_perm(tsr_A, tsr_B, tsr_C, idx_arr, 0, 1, 2,
617  norm_ord_perm.idx_A, norm_ord_perm.idx_B, norm_ord_perm.idx_C,
618  add_sign, mod);
619  order_perm(tsr_B, tsr_A, tsr_C, idx_arr, 1, 0, 2,
620  norm_ord_perm.idx_B, norm_ord_perm.idx_A, norm_ord_perm.idx_C,
621  add_sign, mod);
622  order_perm(tsr_C, tsr_B, tsr_A, idx_arr, 2, 1, 0,
623  norm_ord_perm.idx_C, norm_ord_perm.idx_B, norm_ord_perm.idx_A,
624  add_sign, mod);
625  } while (mod);
626  add_sign *= align_symmetric_indices(tsr_A->order,
627  norm_ord_perm.idx_A,
628  tsr_A->sym,
629  tsr_B->order,
630  norm_ord_perm.idx_B,
631  tsr_B->sym,
632  tsr_C->order,
633  norm_ord_perm.idx_C,
634  tsr_C->sym);
635 
636  for (i=0; i<(int)perms.size(); i++){
637  if (perms[i].is_equal(norm_ord_perm)){
638  CTF_int::cdealloc(idx_arr);
639  return;
640  }
641  }
642  perms.push_back(norm_ord_perm);
643  signs.push_back(add_sign);
644  CTF_int::cdealloc(idx_arr);
645  }
646 
648  std::vector<summation>& perms,
649  std::vector<int>& signs){
650  int i, j, k, tmp;
651  int sign;
652  tensor * tsr_A, * tsr_B;
653  tsr_A = sum.A;
654  tsr_B = sum.B;
655 
656  summation new_type(sum);
657  add_sym_perm(perms, signs, new_type, 1);
658 
659  for (i=0; i<tsr_A->order; i++){
660  j=i;
661  while (tsr_A->sym[j] != NS){
662  j++;
663  for (k=0; k<(int)perms.size(); k++){
664  summation new_type1(perms[k]);
665  sign = signs[k];
666  if (tsr_A->sym[j-1] == AS) sign *= -1;
667  tmp = new_type1.idx_A[i];
668  new_type1.idx_A[i] = new_type1.idx_A[j];
669  new_type1.idx_A[j] = tmp;
670  add_sym_perm(perms, signs, new_type1, sign);
671  }
672  }
673  }
674  for (i=0; i<tsr_B->order; i++){
675  j=i;
676  while (tsr_B->sym[j] != NS){
677  j++;
678  for (k=0; k<(int)perms.size(); k++){
679  summation new_type2(perms[k]);
680  sign = signs[k];
681  if (tsr_B->sym[j-1] == AS) sign *= -1;
682  tmp = new_type2.idx_B[i];
683  new_type2.idx_B[i] = new_type2.idx_B[j];
684  new_type2.idx_B[j] = tmp;
685  add_sym_perm(perms, signs, new_type2, sign);
686  }
687  }
688  }
689  }
690 
692  std::vector<contraction>& perms,
693  std::vector<int>& signs){
694  // dtype * scl_alpha_C;
695  // int ** scl_idx_maps_C;
696  // nscl_C = 0;
697  // CTF_int::alloc_ptr(sizeof(dtype)*order_C, (void**)&scl_alpha_C);
698  // CTF_int::alloc_ptr(sizeof(int*)*order_C, (void**)&scl_idx_maps_C);
699 
700  int i, j, k, tmp;
701  int sign;
702  tensor * tsr_A, * tsr_B, * tsr_C;
703  tsr_A = ctr.A;
704  tsr_B = ctr.B;
705  tsr_C = ctr.C;
706 
707  contraction new_type = contraction(ctr);
708 
709  add_sym_perm(perms, signs, new_type, 1);
710 
711  for (i=0; i<tsr_A->order; i++){
712  j=i;
713  while (tsr_A->sym[j] != NS){
714  j++;
715  for (k=0; k<(int)perms.size(); k++){
716  contraction ntype(perms[k]);
717  sign = signs[k];
718  if (tsr_A->sym[j-1] == AS) sign *= -1;
719  tmp = ntype.idx_A[i];
720  ntype.idx_A[i] = ntype.idx_A[j];
721  ntype.idx_A[j] = tmp;
722  add_sym_perm(perms, signs, ntype, sign);
723  }
724  }
725  }
726  for (i=0; i<tsr_B->order; i++){
727  j=i;
728  while (tsr_B->sym[j] != NS){
729  j++;
730  for (k=0; k<(int)perms.size(); k++){
731  contraction ntype(perms[k]);
732  sign = signs[k];
733  if (tsr_B->sym[j-1] == AS) sign *= -1;
734  tmp = ntype.idx_B[i];
735  ntype.idx_B[i] = ntype.idx_B[j];
736  ntype.idx_B[j] = tmp;
737  add_sym_perm(perms, signs, ntype, sign);
738  }
739  }
740  }
741 
742  for (i=0; i<tsr_C->order; i++){
743  j=i;
744  while (tsr_C->sym[j] != NS){
745  j++;
746  for (k=0; k<(int)perms.size(); k++){
747  contraction ntype(perms[k]);
748  sign = signs[k];
749  if (tsr_C->sym[j-1] == AS) sign *= -1;
750  tmp = ntype.idx_C[i];
751  ntype.idx_C[i] = ntype.idx_C[j];
752  ntype.idx_C[j] = tmp;
753  add_sym_perm(perms, signs, ntype, sign);
754  }
755  }
756  }
757  }
758 }
void order_perm(tensor const *A, tensor const *B, tensor const *C, int *idx_arr, int off_A, int off_B, int off_C, int *idx_A, int *idx_B, int *idx_C, int &add_sign, int &mod)
orders the contraction indices of one tensor that don&#39;t break contraction symmetries ...
char * home_buffer
buffer associated with home mapping of tensor, to which it is returned
def sum(tensor, init_A, axis=None, dtype=None, out=None, keepdims=None)
Definition: core.pyx:4261
bool is_home
whether the latest tensor data is in the home buffer
int64_t * nnz_blk
nonzero elements in each block owned locally
int * sym
symmetries among tensor dimensions
tensor * A
left operand
Definition: contraction.h:19
void get_sym_perms(contraction const &ctr, std::vector< contraction > &perms, std::vector< int > &signs)
finds all permutations of a contraction that must be done for a broken symmetry
int * idx_A
indices of left operand
Definition: summation.h:28
void execute(bool run_diag=false)
run summation
Definition: summation.cxx:119
tensor * B
right operand
Definition: contraction.h:21
void inv_idx(int order_A, int const *idx_A, int order_B, int const *idx_B, int order_C, int const *idx_C, int *order_tot, int **idx_arr)
invert index map
Definition: ctr_tsr.cxx:592
virtual void copy(char *a, char const *b) const
copies element b to element a
Definition: algstrct.cxx:538
void stop()
Definition: int_timer.cxx:151
bool has_home
whether the tensor has a home mapping/buffer
double sign(int par)
local process walltime measurement
Definition: timer.h:50
int64_t size
current size of local tensor data chunk (mapping-dependent)
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
Definition: common.h:37
int64_t home_size
size of home buffer
void desymmetrize(tensor *sym_tsr, tensor *nonsym_tsr, bool is_C)
unfolds the data of a tensor
virtual char const * addid() const
MPI datatype for pairs.
Definition: algstrct.cxx:89
void set_new_nnz_glb(int64_t const *nnz_blk)
sets the number of nonzeros both locally (nnz_loc) and overall globally (nnz_tot) ...
void copy_mapping(int order, mapping const *mapping_A, mapping *mapping_B)
copies mapping A to B
Definition: mapping.cxx:190
bool is_sparse
whether only the non-zero elements of the tensor are stored
int order
number of tensor dimensions
virtual char * alloc(int64_t n) const
allocate space for n items, necessary for object types
Definition: algstrct.cxx:685
void set_padding()
sets padding and local size of a tensor given a mapping
virtual void addinv(char const *a, char *b) const
b = -a
Definition: algstrct.cxx:103
CTF::World * wrld
distributed processor context on which tensor is defined
#define MAX(a, b)
Definition: util.h:180
class for execution distributed scaling of a tensor
Definition: scaling.h:14
#define VPRINTF(...)
Definition: util.h:207
int rank
rank of local processor
Definition: world.h:24
class for execution distributed contraction of tensors
Definition: contraction.h:16
int zero_out_padding()
sets padded portion of tensor to zero (this should be maintained internally)
int * idx_B
indices of right operand
Definition: contraction.h:33
tensor * C
output
Definition: contraction.h:23
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
Definition: memcontrol.cxx:320
bool is_data_aliased
whether the tensor data is an alias of another tensor object&#39;s data
algstrct * sr
algstrct on which tensor elements and operations are defined
mapping * edge_map
mappings of each tensor dimension onto topology dimensions
int * idx_B
indices of output
Definition: summation.h:30
#define TAU_FSTOP(ARG)
Definition: util.h:281
#define TAU_FSTART(ARG)
Definition: util.h:280
bool is_mapped
whether a mapping has been selected
int set_zero()
elects a mapping and sets tensor data to zero
int * idx_C
indices of output
Definition: contraction.h:35
int64_t calc_nvirt() const
calculate virtualization factor of tensor return virtualization factor
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
void start()
Definition: int_timer.cxx:141
bool profile
whether profiling should be done for contractions/sums involving this tensor
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
Definition: apsp.cxx:17
tensor * B
output
Definition: summation.h:20
char * data
tensor data, either the data or the key-value pairs should exist at any given time ...
internal distributed tensor class
void cmp_sym_perms(int ndim, int const *sym, int *nperm, int **perm, double *sign)
finds all permutations of a tensor according to a symmetry
void scal_diag(int order, int64_t size, int nvirt, int const *edge_len, int const *sym, int const *padding, int const *phase, int const *phys_phase, int const *virt_phase, int const *cphase_rank, char *vdata, algstrct const *sr, int const *sym_mask)
scales each element by 1/(number of entries equivalent to it after permutation of indices for which s...
Definition: pad.cxx:567
int execute()
run scaling
Definition: scaling.cxx:64
void add_sym_perm(std::vector< contraction > &perms, std::vector< int > &signs, contraction const &new_perm, int new_sign)
puts a contraction map into a nice ordering according to preserved symmetries, and adds it if it is d...
int align_symmetric_indices(int order_A, T &idx_A, const int *sym_A, int order_B, T &idx_B, const int *sym_B)
Definition: sym_indices.cxx:40
topology * topo
topology to which the tensor is mapped
#define MIN(a, b)
Definition: util.h:176
Definition: common.h:37
tensor * A
left operand
Definition: summation.h:18
class for execution distributed summation of tensors
Definition: summation.h:15
virtual void cast_double(double d, char *c) const
c = &d
Definition: algstrct.cxx:150
int * idx_A
indices of left operand
Definition: contraction.h:31
virtual char const * mulid() const
identity element for multiplication i.e. 1
Definition: algstrct.cxx:93
Definition: common.h:37
void symmetrize(tensor *sym_tsr, tensor *nonsym_tsr)
folds the data of a tensor
Definition: common.h:37
char * name
name given to tensor
int sum_tensors(bool run_diag)
PDAXPY: a*idx_map_A(A) + b*idx_map_B(B) -> idx_map_B(B). Treats symmetric as lower triangular...
Definition: summation.cxx:1384
def np(self)
Definition: core.pyx:315
void clear_mapping()
zeros out mapping