Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
topology.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include "topology.h"
4 #include "../shared/util.h"
5 #include "../mapping/mapping.h"
6 
7 #ifdef BGQ
8 #include "mpix.h"
9 #endif
10 
11 namespace CTF_int {
12 /*
13  topology::topology(){
14  order = 0;
15  lens = NULL;
16  lda = NULL;
17  is_activated = false;
18  dim_comm = NULL;
19  }*/
20 
22  deactivate();
26  }
27 
28  topology::topology(topology const & other) : glb_comm(other.glb_comm) {
29  order = other.order;
30 
31  lens = (int*)CTF_int::alloc(order*sizeof(int));
32  memcpy(lens, other.lens, order*sizeof(int));
33 
34  lda = (int*)CTF_int::alloc(order*sizeof(int));
35  memcpy(lda, other.lda, order*sizeof(int));
36 
38  for (int i=0; i<order; i++){
39  dim_comm[i] = CommData(other.dim_comm[i]);
40  }
41 
42  is_activated = other.is_activated;
43  }
44 
45  topology::topology(int order_,
46  int const * lens_,
47  CommData cdt,
48  bool activate) : glb_comm(cdt) {
49  order = order_;
50  lens = (int*)CTF_int::alloc(order_*sizeof(int));
51  lda = (int*)CTF_int::alloc(order_*sizeof(int));
52  dim_comm = (CommData*)CTF_int::alloc(order_*sizeof(CommData));
53  is_activated = false;
54 
55  memcpy(lens, lens_, order_*sizeof(int));
56  //reverse FIXME: this is assumed somewhere...
57 // for (int i=0; i<order; i++){
58 // lens[i] = lens_[order-i-1];
59 // }
60 
61  int stride = 1, cut = 0;
62  int rank = glb_comm.rank;
63  for (int i=0; i<order; i++){
64  lda[i] = stride;
65  dim_comm[i] = CommData(((rank/stride)%lens[i]),
66  (((rank/(stride*lens[i]))*stride)+cut),
67  lens[i]);
68 // SETUP_SUB_COMM_SHELL(cdt, dim_comm[i],
69  stride*=lens[i];
70  cut = (rank - (rank/stride)*stride);
71  }
72  if (activate)
73  this->activate();
74  }
75 
77  if (!is_activated){
78  for (int i=0; i<order; i++){
80  }
81  }
82  is_activated = true;
83  }
84 
86  if (is_activated){
87  for (int i=0; i<order; i++){
88  dim_comm[i].deactivate();
89  }
90  }
91  is_activated = false;
92  }
93 
95  TOPOLOGY mach){
96  int np = glb_comm.np;
97  int * dl;
98  int * dim_len;
99  topology * topo;
100  if (mach == NO_TOPOLOGY){
101  dl = (int*)CTF_int::alloc(sizeof(int));
102  dl[0] = np;
103  topo = new topology(1, dl, glb_comm, 1);
104  CTF_int::cdealloc(dl);
105  return topo;
106  }
107  if (mach == TOPOLOGY_GENERIC){
108  int order;
109  factorize(np, &order, &dim_len);
110  topo = new topology(order, dim_len, glb_comm, 1);
111  if (order>0) CTF_int::cdealloc(dim_len);
112  return topo;
113  } else if (mach == TOPOLOGY_BGQ) {
114  dl = (int*)CTF_int::alloc((7)*sizeof(int));
115  dim_len = dl;
116  #ifdef BGQ
117  if (np >= 512){
118  int i, dim;
119  MPIX_Hardware_t hw;
120  MPIX_Hardware(&hw);
121 
122  int * topo_dims = (int*)CTF_int::alloc(7*sizeof(int));
123  topo_dims[0] = hw.Size[0];
124  topo_dims[1] = hw.Size[1];
125  topo_dims[2] = hw.Size[2];
126  topo_dims[3] = hw.Size[3];
127  topo_dims[4] = hw.Size[4];
128  topo_dims[5] = MIN(4, np/(topo_dims[0]*topo_dims[1]*
129  topo_dims[2]*topo_dims[3]*
130  topo_dims[4]));
131  topo_dims[6] = (np/ (topo_dims[0]*topo_dims[1]*
132  topo_dims[2]*topo_dims[3]*
133  topo_dims[4])) / 4;
134  dim = 0;
135  for (i=0; i<7; i++){
136  if (topo_dims[i] > 1){
137  dl[dim] = topo_dims[i];
138  dim++;
139  }
140  }
141  topo = new topology(dim, topo_dims, glb_comm, 1);
142  CTF_int::cdealloc(topo_dims);
143  return topo;
144  } else
145  #endif
146  {
147  int order;
148  factorize(np, &order, &dim_len);
149  topo = new topology(order, dim_len, glb_comm, 1);
150  CTF_int::cdealloc(dim_len);
151  return topo;
152  }
153  } else if (mach == TOPOLOGY_BGP) {
154  int order;
155  if (1<<(int)log2(np) != np){
156  factorize(np, &order, &dim_len);
157  topo = new topology(order, dim_len, glb_comm, 1);
158  CTF_int::cdealloc(dim_len);
159  return topo;
160  }
161  if ((int)log2(np) == 0) order = 0;
162  else if ((int)log2(np) <= 2) order = 1;
163  else if ((int)log2(np) <= 4) order = 2;
164  else order = 3;
165  dim_len = (int*)CTF_int::alloc((order)*sizeof(int));
166  switch ((int)log2(np)){
167  case 0:
168  break;
169  case 1:
170  dim_len[0] = 2;
171  break;
172  case 2:
173  dim_len[0] = 4;
174  break;
175  case 3:
176  dim_len[0] = 4;
177  dim_len[1] = 2;
178  break;
179  case 4:
180  dim_len[0] = 4;
181  dim_len[1] = 4;
182  break;
183  case 5:
184  dim_len[0] = 4;
185  dim_len[1] = 4;
186  dim_len[2] = 2;
187  break;
188  case 6:
189  dim_len[0] = 4;
190  dim_len[1] = 4;
191  dim_len[2] = 4;
192  break;
193  case 7:
194  dim_len[0] = 8;
195  dim_len[1] = 4;
196  dim_len[2] = 4;
197  break;
198  case 8:
199  dim_len[0] = 8;
200  dim_len[1] = 8;
201  dim_len[2] = 4;
202  break;
203  case 9:
204  dim_len[0] = 8;
205  dim_len[1] = 8;
206  dim_len[2] = 8;
207  break;
208  case 10:
209  dim_len[0] = 16;
210  dim_len[1] = 8;
211  dim_len[2] = 8;
212  break;
213  case 11:
214  dim_len[0] = 32;
215  dim_len[1] = 8;
216  dim_len[2] = 8;
217  break;
218  case 12:
219  dim_len[0] = 32;
220  dim_len[1] = 16;
221  dim_len[2] = 8;
222  break;
223  case 13:
224  dim_len[0] = 32;
225  dim_len[1] = 32;
226  dim_len[2] = 8;
227  break;
228  case 14:
229  dim_len[0] = 32;
230  dim_len[1] = 32;
231  dim_len[2] = 16;
232  break;
233  case 15:
234  dim_len[0] = 32;
235  dim_len[1] = 32;
236  dim_len[2] = 32;
237  break;
238  default:
239  factorize(np, &order, &dim_len);
240  break;
241  }
242  topo = new topology(order, dim_len, glb_comm, 1);
243  CTF_int::cdealloc(dim_len);
244  return topo;
245  } else if (mach == TOPOLOGY_8D) {
246  int order;
247  int * dim_len;
248  if (1<<(int)log2(np) != np){
249  factorize(np, &order, &dim_len);
250  topo = new topology(order, dim_len, glb_comm, 1);
251  CTF_int::cdealloc(dim_len);
252  return topo;
253  }
254  order = MIN((int)log2(np),8);
255  if (order > 0)
256  dim_len = (int*)CTF_int::alloc((order)*sizeof(int));
257  else dim_len = NULL;
258  switch ((int)log2(np)){
259  case 0:
260  break;
261  case 1:
262  dim_len[0] = 2;
263  break;
264  case 2:
265  dim_len[0] = 2;
266  dim_len[1] = 2;
267  break;
268  case 3:
269  dim_len[0] = 2;
270  dim_len[1] = 2;
271  dim_len[2] = 2;
272  break;
273  case 4:
274  dim_len[0] = 2;
275  dim_len[1] = 2;
276  dim_len[2] = 2;
277  dim_len[3] = 2;
278  break;
279  case 5:
280  dim_len[0] = 2;
281  dim_len[1] = 2;
282  dim_len[2] = 2;
283  dim_len[3] = 2;
284  dim_len[4] = 2;
285  break;
286  case 6:
287  dim_len[0] = 2;
288  dim_len[1] = 2;
289  dim_len[2] = 2;
290  dim_len[3] = 2;
291  dim_len[4] = 2;
292  dim_len[5] = 2;
293  break;
294  case 7:
295  dim_len[0] = 2;
296  dim_len[1] = 2;
297  dim_len[2] = 2;
298  dim_len[3] = 2;
299  dim_len[4] = 2;
300  dim_len[5] = 2;
301  dim_len[6] = 2;
302  break;
303  case 8:
304  dim_len[0] = 2;
305  dim_len[1] = 2;
306  dim_len[2] = 2;
307  dim_len[3] = 2;
308  dim_len[4] = 2;
309  dim_len[5] = 2;
310  dim_len[6] = 2;
311  dim_len[7] = 2;
312  break;
313  case 9:
314  dim_len[0] = 4;
315  dim_len[1] = 2;
316  dim_len[2] = 2;
317  dim_len[3] = 2;
318  dim_len[4] = 2;
319  dim_len[5] = 2;
320  dim_len[6] = 2;
321  dim_len[7] = 2;
322  break;
323  case 10:
324  dim_len[0] = 4;
325  dim_len[1] = 4;
326  dim_len[2] = 2;
327  dim_len[3] = 2;
328  dim_len[4] = 2;
329  dim_len[5] = 2;
330  dim_len[6] = 2;
331  dim_len[7] = 2;
332  break;
333  case 11:
334  dim_len[0] = 4;
335  dim_len[1] = 4;
336  dim_len[2] = 4;
337  dim_len[3] = 2;
338  dim_len[4] = 2;
339  dim_len[5] = 2;
340  dim_len[6] = 2;
341  dim_len[7] = 2;
342  break;
343  case 12:
344  dim_len[0] = 4;
345  dim_len[1] = 4;
346  dim_len[2] = 4;
347  dim_len[3] = 4;
348  dim_len[4] = 2;
349  dim_len[5] = 2;
350  dim_len[6] = 2;
351  dim_len[7] = 2;
352  break;
353  case 13:
354  dim_len[0] = 4;
355  dim_len[1] = 4;
356  dim_len[2] = 4;
357  dim_len[3] = 4;
358  dim_len[4] = 4;
359  dim_len[5] = 2;
360  dim_len[6] = 2;
361  dim_len[7] = 2;
362  break;
363  case 14:
364  dim_len[0] = 4;
365  dim_len[1] = 4;
366  dim_len[2] = 4;
367  dim_len[3] = 4;
368  dim_len[4] = 4;
369  dim_len[5] = 4;
370  dim_len[6] = 2;
371  dim_len[7] = 2;
372  break;
373  case 15:
374  dim_len[0] = 4;
375  dim_len[1] = 4;
376  dim_len[2] = 4;
377  dim_len[3] = 4;
378  dim_len[4] = 4;
379  dim_len[5] = 4;
380  dim_len[6] = 4;
381  dim_len[7] = 2;
382  break;
383  default:
384  factorize(np, &order, &dim_len);
385  break;
386 
387  }
388  topo = new topology(order, dim_len, glb_comm, 1);
389  CTF_int::cdealloc(dim_len);
390  return topo;
391  } else {
392  int order;
393  dim_len = (int*)CTF_int::alloc((log2(np)+1)*sizeof(int));
394  factorize(np, &order, &dim_len);
395  topo = new topology(order, dim_len, glb_comm, 1);
396  return topo;
397  }
398  }
399 
410  std::vector< topology* > get_all_topos(CommData cdt, int n_uf, int const * uniq_fact, int const * mults, int n_prepend, int const * prelens){
411  std::vector<topology*> topos;
412 
413  int num_divisors = 1;
414  for (int i=0; i<n_uf; i++){
415  num_divisors *= (1+mults[i]);
416  ASSERT(num_divisors < 1E6);
417  }
418 
419  if (num_divisors == 1){
420  topos.push_back(new topology(n_prepend, prelens, cdt));
421  return topos;
422  }
423  int sub_mults[n_uf];
424  int new_prelens[n_prepend+1];
425  memcpy(new_prelens, prelens, n_prepend*sizeof(int));
426  //FIXME: load may be highly imbalanced
427  //for (int div=cdt.rank; div<num_divisors; div+=cdt.np)
428  for (int div=1; div<num_divisors; div++){
429  //memcpy(sub_mults, mults, n_uf*sizeof(int));
430  int dmults[n_uf];
431  int len0 = 1;
432  int idiv = div;
433  for (int i=0; i<n_uf; i++){
434  dmults[i] = idiv%(1+mults[i]);
435  sub_mults[i] = mults[i]-dmults[i];
436  idiv = idiv/(1+mults[i]);
437  len0 *= std::pow(uniq_fact[i], dmults[i]);
438  }
439  new_prelens[n_prepend] = len0;
440  std::vector< topology* > new_topos = get_all_topos(cdt, n_uf, uniq_fact, sub_mults, n_prepend+1, new_prelens);
441  //FIXME call some append function?
442  for (unsigned i=0; i<new_topos.size(); i++){
443  topos.push_back(new_topos[i]);
444  }
445  }
446  return topos;
447  }
448 
449  std::vector< topology* > get_generic_topovec(CommData cdt){
450  std::vector<topology*> topovec;
451 
452  int nfact, * factors;
453  factorize(cdt.np, &nfact, &factors);
454  if (nfact <= 1){
455  topovec.push_back(new topology(nfact, factors, cdt));
456  if (cdt.np >= 7 && cdt.rank == 0)
457  DPRINTF(1,"CTF WARNING: using a world with a prime number of processors may lead to very bad performance\n");
458  if (nfact > 0) cdealloc(factors);
459  return topovec;
460  }
461  std::sort(factors,factors+nfact);
462  int n_uf = 1;
463  assert(factors[0] != 1);
464  for (int i=1; i<nfact; i++){
465  if (factors[i] != factors[i-1]) n_uf++;
466  }
467  if (n_uf >= 3){
468  if (cdt.rank == 0)
469  DPRINTF(1,"CTF WARNING: using a world with a number of processors that contains 3 or more unique prime factors may lead to suboptimal performance, when possible use p=2^k3^l processors for some k,l\n");
470  }
471  int uniq_fact[n_uf];
472  int mults[n_uf];
473  int i_uf = 0;
474  uniq_fact[0] = factors[0];
475  mults[0] = 1;
476  for (int i=1; i<nfact; i++){
477  if (factors[i] != factors[i-1]){
478  i_uf++;
479  uniq_fact[i_uf] = factors[i];
480  mults[i_uf] = 1;
481  } else mults[i_uf]++;
482  }
483  cdealloc(factors);
484  return get_all_topos(cdt, n_uf, uniq_fact, mults, 0, NULL);
485  }
486 
487 
488  std::vector< topology* > peel_perm_torus(topology * phys_topology,
489  CommData cdt){
490  std::vector<topology*> topovec;
491  std::vector<topology*> perm_vec;
492  perm_vec.push_back(phys_topology);
493  bool changed;
494  /*int i=0;
495  do {
496  for (int j=0; j< perm_vec[i]->order;
497  } while(i<perm_vec.size();*/
498  do {
499 // printf("HERE %d %d %d %d\n",perm_vec[0]->order, perm_vec.size(), perm_vec[0]->lens[0], perm_vec[0]->lens[1]);
500  changed = false;
501  for (int i=0; i<(int)perm_vec.size(); i++){
502  for (int j=0; j<perm_vec[i]->order; j++){
503  if (perm_vec[i]->lens[j] != 2){
504  for (int k=0; k<perm_vec[i]->order; k++){
505  if (j!=k && perm_vec[i]->lens[j] != perm_vec[i]->lens[k]){
506  int new_lens[perm_vec[i]->order];
507  memcpy(new_lens,perm_vec[i]->lens,perm_vec[i]->order*sizeof(int));
508  new_lens[j] = perm_vec[i]->lens[k];
509  new_lens[k] = perm_vec[i]->lens[j];
510  topology * new_topo = new topology(perm_vec[i]->order, new_lens, cdt);
511  /*for (int z=0; z<new_topo->order; z++){
512  printf("%d %d %d adding topo %d with len[%d] = %d %d\n",i,j,k,perm_vec.size(),z,new_topo->lens[z], new_lens[z]);
513  }*/
514  if (find_topology(new_topo, perm_vec) == -1){
515  perm_vec.push_back(new_topo);
516  changed=true;
517  } else delete new_topo;
518  }
519  }
520  }
521  }
522  }
523  } while (changed);
524  topovec = peel_torus(perm_vec[0], cdt);
525  for (int i=1; i<(int)perm_vec.size(); i++){
526  std::vector<topology*> temp_vec = peel_torus(perm_vec[i], cdt);
527  for (int j=0; j<(int)temp_vec.size(); j++){
528  if (find_topology(temp_vec[j], topovec) == -1){
529  topovec.push_back(temp_vec[j]);
530  } else delete temp_vec[j];
531  }
532  delete perm_vec[i];
533  }
534  return topovec;
535  }
536 
537  std::vector< topology* > peel_torus(topology const * topo,
539  std::vector< topology* > topos;
540  topos.push_back(new topology(*topo));
541 
542  if (topo->order <= 1) return topos;
543 
544  int * new_lens = (int*)alloc(sizeof(int)*topo->order-1);
545 
546  for (int i=0; i<topo->order-1; i++){
547  for (int j=0; j<i; j++){
548  new_lens[j] = topo->lens[j];
549  }
550  new_lens[i] = topo->lens[i]*topo->lens[i+1];
551  for (int j=i+2; j<topo->order; j++){
552  new_lens[j-1] = topo->lens[j];
553  }
554  topology* new_topo = new topology(topo->order-1, new_lens, glb_comm);
555  topos.push_back(new_topo);
556  }
557  cdealloc(new_lens);
558  for (int i=1; i<(int)topos.size(); i++){
559  std::vector< topology* > more_topos = peel_torus(topos[i], glb_comm);
560  for (int j=0; j<(int)more_topos.size(); j++){
561  if (find_topology(more_topos[j], topos) == -1)
562  topos.push_back(more_topos[j]);
563  else
564  delete more_topos[j];
565  }
566  more_topos.clear();
567  }
568  return topos;
569  }
570 
571  int find_topology(topology const * topo,
572  std::vector< topology* > & topovec){
573  int i, j, found;
574  std::vector< topology* >::iterator iter;
575 
576  found = -1;
577  for (j=0, iter=topovec.begin(); iter!=topovec.end(); iter++, j++){
578  if ((*iter)->order == topo->order){
579  found = j;
580  for (i=0; i<(*iter)->order; i++) {
581  if ((*iter)->lens[i] != topo->lens[i]){
582  found = -1;
583  }
584  }
585  }
586  if (found != -1) return found;
587  }
588  return -1;
589  }
590 
591  int get_best_topo(int64_t nvirt,
592  int topo,
593  CommData global_comm,
594  int64_t bcomm_vol,
595  int64_t bmemuse){
596  int64_t gnvirt, nv, gcomm_vol, gmemuse, bv;
597  int btopo, gtopo;
598  nv = nvirt;
599  MPI_Allreduce(&nv, &gnvirt, 1, MPI_INT64_T, MPI_MIN, global_comm.cm);
600  ASSERT(gnvirt <= nvirt);
601 
602  nv = bcomm_vol;
603  bv = bmemuse;
604  if (nvirt == gnvirt){
605  btopo = topo;
606  } else {
607  btopo = INT_MAX;
608  nv = INT64_MAX;
609  bv = INT64_MAX;
610  }
611  MPI_Allreduce(&nv, &gcomm_vol, 1, MPI_INT64_T, MPI_MIN, global_comm.cm);
612  if (bcomm_vol != gcomm_vol){
613  btopo = INT_MAX;
614  bv = INT64_MAX;
615  }
616  MPI_Allreduce(&bv, &gmemuse, 1, MPI_INT64_T, MPI_MIN, global_comm.cm);
617  if (bmemuse != gmemuse){
618  btopo = INT_MAX;
619  }
620  MPI_Allreduce(&btopo, &gtopo, 1, MPI_INT, MPI_MIN, global_comm.cm);
621  /*printf("nvirt = " PRIu64 " bcomm_vol = " PRIu64 " bmemuse = " PRIu64 " topo = %d\n",
622  nvirt, bcomm_vol, bmemuse, topo);
623  printf("gnvirt = " PRIu64 " gcomm_vol = " PRIu64 " gmemuse = " PRIu64 " bv = " PRIu64 " nv = " PRIu64 " gtopo = %d\n",
624  gnvirt, gcomm_vol, gmemuse, bv, nv, gtopo);*/
625 
626  return gtopo;
627  }
628  void extract_free_comms(topology const * topo,
629  int order_A,
630  mapping const * edge_map_A,
631  int order_B,
632  mapping const * edge_map_B,
633  int & num_sub_phys_dims,
634  CommData * * psub_phys_comm,
635  int ** pcomm_idx){
636  int i;
637  int phys_mapped[topo->order];
638  CommData * sub_phys_comm;
639  int * comm_idx;
640  mapping const * map;
641  memset(phys_mapped, 0, topo->order*sizeof(int));
642 
643  num_sub_phys_dims = 0;
644 
645  for (i=0; i<order_A; i++){
646  map = &edge_map_A[i];
647  while (map->type == PHYSICAL_MAP){
648  phys_mapped[map->cdt] = 1;
649  if (map->has_child) map = map->child;
650  else break;
651  }
652  }
653  for (i=0; i<order_B; i++){
654  map = &edge_map_B[i];
655  while (map->type == PHYSICAL_MAP){
656  phys_mapped[map->cdt] = 1;
657  if (map->has_child) map = map->child;
658  else break;
659  }
660  }
661 
662  num_sub_phys_dims = 0;
663  for (i=0; i<topo->order; i++){
664  if (phys_mapped[i] == 0){
665  num_sub_phys_dims++;
666  }
667  }
668  CTF_int::alloc_ptr(num_sub_phys_dims*sizeof(CommData), (void**)&sub_phys_comm);
669  CTF_int::alloc_ptr(num_sub_phys_dims*sizeof(int), (void**)&comm_idx);
670  num_sub_phys_dims = 0;
671  for (i=0; i<topo->order; i++){
672  if (phys_mapped[i] == 0){
673  sub_phys_comm[num_sub_phys_dims] = topo->dim_comm[i];
674  comm_idx[num_sub_phys_dims] = i;
675  num_sub_phys_dims++;
676  }
677  }
678  *pcomm_idx = comm_idx;
679  *psub_phys_comm = sub_phys_comm;
680 
681  }
682 
683  int can_morph(topology const * topo_keep,
684  topology const * topo_change){
685  int i, j, lda;
686  lda = 1;
687  j = 0;
688  for (i=0; i<topo_keep->order; i++){
689  lda *= topo_keep->dim_comm[i].np;
690  if (lda == topo_change->dim_comm[j].np){
691  j++;
692  lda = 1;
693  } else if (lda > topo_change->dim_comm[j].np){
694  return 0;
695  }
696  }
697  return 1;
698  }
699 
700  void morph_topo(topology const * new_topo,
701  topology const * old_topo,
702  int order,
703  mapping * edge_map){
704  int i,j,old_lda,new_np;
705  mapping * old_map, * new_map, * new_rec_map;
706 
707  for (i=0; i<order; i++){
708  if (edge_map[i].type == PHYSICAL_MAP){
709  old_map = &edge_map[i];
710  CTF_int::alloc_ptr(sizeof(mapping), (void**)&new_map);
711  new_rec_map = new_map;
712  for (;;){
713  old_lda = old_topo->lda[old_map->cdt];
714  new_np = 1;
715  do {
716  for (j=0; j<new_topo->order; j++){
717  if (new_topo->lda[j] == old_lda) break;
718  }
719  ASSERT(j!=new_topo->order);
720  new_rec_map->type = PHYSICAL_MAP;
721  new_rec_map->cdt = j;
722  new_rec_map->np = new_topo->dim_comm[j].np;
723  new_np *= new_rec_map->np;
724  if (new_np<old_map->np) {
725  old_lda = old_lda * new_rec_map->np;
726  new_rec_map->has_child = 1;
727  CTF_int::alloc_ptr(sizeof(mapping), (void**)&new_rec_map->child);
728  new_rec_map = new_rec_map->child;
729  }
730  } while (new_np<old_map->np);
731 
732  if (old_map->has_child){
733  if (old_map->child->type == VIRTUAL_MAP){
734  new_rec_map->has_child = 1;
735  CTF_int::alloc_ptr(sizeof(mapping), (void**)&new_rec_map->child);
736  new_rec_map->child->type = VIRTUAL_MAP;
737  new_rec_map->child->np = old_map->child->np;
738  new_rec_map->child->has_child = 0;
739  break;
740  } else {
741  new_rec_map->has_child = 1;
742  CTF_int::alloc_ptr(sizeof(mapping), (void**)&new_rec_map->child);
743  new_rec_map = new_rec_map->child;
744  old_map = old_map->child;
745  //continue
746  }
747  } else {
748  new_rec_map->has_child = 0;
749  break;
750  }
751  }
752  edge_map[i].clear();
753  edge_map[i] = *new_map;
754  CTF_int::cdealloc(new_map);
755  }
756  }
757  }
758 }
map_type type
Definition: mapping.h:22
#define DPRINTF(...)
Definition: util.h:235
def rank(self)
Definition: core.pyx:312
std::vector< topology * > peel_torus(topology const *topo, CommData glb_comm)
folds specified topology into all configurations of lesser dimensionality
Definition: topology.cxx:537
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
int find_topology(topology const *topo, std::vector< topology * > &topovec)
searches for an equivalent topology in avector of topologies
Definition: topology.cxx:571
int get_best_topo(int64_t nvirt, int topo, CommData global_comm, int64_t bcomm_vol, int64_t bmemuse)
get the best topologoes (least nvirt) over all procs
Definition: topology.cxx:591
void deactivate()
Definition: common.cxx:283
std::vector< topology * > peel_perm_torus(topology *phys_topology, CommData cdt)
folds specified topology and all of its permutations into all configurations of lesser dimensionality...
Definition: topology.cxx:488
topology * get_phys_topo(CommData glb_comm, TOPOLOGY mach)
get dimension and torus lengths of specified topology
Definition: topology.cxx:94
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
Definition: memcontrol.cxx:320
CommData * dim_comm
Definition: topology.h:20
mapping * child
Definition: mapping.h:26
MPI_Comm cm
Definition: common.h:129
std::vector< topology * > get_all_topos(CommData cdt, int n_uf, int const *uniq_fact, int const *mults, int n_prepend, int const *prelens)
computes all unique factorizations into non-primes each yielding a topology, prepending additional fa...
Definition: topology.cxx:410
int can_morph(topology const *topo_keep, topology const *topo_change)
determines if two topologies are compatible with each other
Definition: topology.cxx:683
CommData glb_comm
Definition: topology.h:21
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
topology(topology const &other)
copy constructor
Definition: topology.cxx:28
void activate(MPI_Comm parent)
activate this subcommunicator by splitting parent_comm
Definition: common.cxx:272
void factorize(int n, int *nfactor, int **factor)
computes the size of a tensor in packed symmetric layout
Definition: util.cxx:170
#define MIN(a, b)
Definition: util.h:176
void extract_free_comms(topology const *topo, int order_A, mapping const *edge_map_A, int order_B, mapping const *edge_map_B, int &num_sub_phys_dims, CommData **psub_phys_comm, int **pcomm_idx)
extracts the set of physical dimensions still available for mapping
Definition: topology.cxx:628
std::vector< topology * > get_generic_topovec(CommData cdt)
computes all topology configurations given undelying physical topology information ...
Definition: topology.cxx:449
TOPOLOGY
Definition: topology.h:10
void clear()
resets mapping to NOT_MAPPED
Definition: mapping.cxx:94
def np(self)
Definition: core.pyx:315
void morph_topo(topology const *new_topo, topology const *old_topo, int order, mapping *edge_map)
morphs a tensor topology into another
Definition: topology.cxx:700