Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
ctr_2d_general.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include "ctr_2d_general.h"
4 #include "../tensor/untyped_tensor.h"
5 #include "../mapping/mapping.h"
6 #include "../shared/util.h"
7 #include "../shared/offload.h"
8 #include <climits>
9 
10 namespace CTF_int {
11 
12  int ctr_2d_gen_build(int is_used,
13  CommData global_comm,
14  int i,
15  int * virt_dim,
16  int & cg_edge_len,
17  int & total_iter,
18  tensor * A,
19  int i_A,
20  CommData *& cg_cdt_A,
21  int64_t & cg_ctr_lda_A,
22  int64_t & cg_ctr_sub_lda_A,
23  bool & cg_move_A,
24  int * blk_len_A,
25  int64_t & blk_sz_A,
26  int const * virt_blk_len_A,
27  int & load_phase_A,
28  tensor * B,
29  int i_B,
30  CommData *& cg_cdt_B,
31  int64_t & cg_ctr_lda_B,
32  int64_t & cg_ctr_sub_lda_B,
33  bool & cg_move_B,
34  int * blk_len_B,
35  int64_t & blk_sz_B,
36  int const * virt_blk_len_B,
37  int & load_phase_B,
38  tensor * C,
39  int i_C,
40  CommData *& cg_cdt_C,
41  int64_t & cg_ctr_lda_C,
42  int64_t & cg_ctr_sub_lda_C,
43  bool & cg_move_C,
44  int * blk_len_C,
45  int64_t & blk_sz_C,
46  int const * virt_blk_len_C,
47  int & load_phase_C){
48  mapping * map;
49  int j;
50  int64_t nstep = 1;
51  if (comp_dim_map(&C->edge_map[i_C], &B->edge_map[i_B])){
52  map = &B->edge_map[i_B];
53  while (map->has_child) map = map->child;
54  if (map->type == VIRTUAL_MAP){
55  virt_dim[i] = map->np;
56  }
57  return 0;
58  } else {
59  if (B->edge_map[i_B].type == VIRTUAL_MAP &&
60  C->edge_map[i_C].type == VIRTUAL_MAP){
61  virt_dim[i] = B->edge_map[i_B].np;
62  return 0;
63  } else {
64  cg_edge_len = 1;
65  if (B->edge_map[i_B].type == PHYSICAL_MAP){
66  cg_edge_len = lcm(cg_edge_len, B->edge_map[i_B].calc_phase());
67  cg_cdt_B = &B->topo->dim_comm[B->edge_map[i_B].cdt];
68  /*if (is_used && cg_cdt_B.alive == 0)
69  cg_cdt_B.activate(global_comm.cm);*/
70  nstep = B->edge_map[i_B].calc_phase();
71  cg_move_B = 1;
72  } else
73  cg_move_B = 0;
74  if (C->edge_map[i_C].type == PHYSICAL_MAP){
75  cg_edge_len = lcm(cg_edge_len, C->edge_map[i_C].calc_phase());
76  cg_cdt_C = &C->topo->dim_comm[C->edge_map[i_C].cdt];
77  /*if (is_used && cg_cdt_C.alive == 0)
78  cg_cdt_C.activate(global_comm.cm);*/
79  nstep = MAX(nstep, C->edge_map[i_C].calc_phase());
80  cg_move_C = 1;
81  } else
82  cg_move_C = 0;
83  cg_ctr_lda_A = 1;
84  cg_ctr_sub_lda_A = 0;
85  cg_move_A = 0;
86 
87 
88  /* Adjust the block lengths, since this algorithm will cut
89  the block into smaller ones of the min block length */
90  /* Determine the LDA of this dimension, based on virtualization */
91  cg_ctr_lda_B = 1;
92  if (B->edge_map[i_B].type == PHYSICAL_MAP)
93  cg_ctr_sub_lda_B= blk_sz_B*B->edge_map[i_B].np/cg_edge_len;
94  else
95  cg_ctr_sub_lda_B= blk_sz_B/cg_edge_len;
96  for (j=i_B+1; j<B->order; j++) {
97  cg_ctr_sub_lda_B = (cg_ctr_sub_lda_B *
98  virt_blk_len_B[j]) / blk_len_B[j];
99  cg_ctr_lda_B = (cg_ctr_lda_B*blk_len_B[j])
100  /virt_blk_len_B[j];
101  }
102  cg_ctr_lda_C = 1;
103  if (C->edge_map[i_C].type == PHYSICAL_MAP)
104  cg_ctr_sub_lda_C= blk_sz_C*C->edge_map[i_C].np/cg_edge_len;
105  else
106  cg_ctr_sub_lda_C= blk_sz_C/cg_edge_len;
107  for (j=i_C+1; j<C->order; j++) {
108  cg_ctr_sub_lda_C = (cg_ctr_sub_lda_C *
109  virt_blk_len_C[j]) / blk_len_C[j];
110  cg_ctr_lda_C = (cg_ctr_lda_C*blk_len_C[j])
111  /virt_blk_len_C[j];
112  }
113  if (B->edge_map[i_B].type != PHYSICAL_MAP){
114  if (blk_sz_B / nstep == 0)
115  printf("blk_len_B[%d] = %d, nstep = %ld blk_sz_B = %ld\n",i_B,blk_len_B[i_B],nstep,blk_sz_B);
116  blk_sz_B = blk_sz_B / nstep;
117  blk_len_B[i_B] = blk_len_B[i_B] / nstep;
118  } else {
119  if (blk_sz_B * B->edge_map[i_B].np/ nstep == 0)
120  printf("blk_len_B[%d] = %d B->edge_map[%d].np = %d, nstep = %ld blk_sz_B = %ld\n",i_B,blk_len_B[i_B],i_B,B->edge_map[i_B].np,nstep,blk_sz_B);
121  blk_sz_B = blk_sz_B * B->edge_map[i_B].np / nstep;
122  blk_len_B[i_B] = blk_len_B[i_B] * B->edge_map[i_B].np / nstep;
123  }
124  if (C->edge_map[i_C].type != PHYSICAL_MAP){
125  if (blk_sz_C / nstep == 0)
126  printf("blk_len_C[%d] = %d, nstep = %ld blk_sz_C = %ld\n",i_C,blk_len_C[i_C],nstep,blk_sz_C);
127  blk_sz_C = blk_sz_C / nstep;
128  blk_len_C[i_C] = blk_len_C[i_C] / nstep;
129  } else {
130  if (blk_sz_C * C->edge_map[i_C].np/ nstep == 0)
131  printf("blk_len_C[%d] = %d C->edge_map[%d].np = %d, nstep = %ld blk_sz_C = %ld\n",i_C,blk_len_C[i_C],i_C,C->edge_map[i_C].np,nstep,blk_sz_C);
132  blk_sz_C = blk_sz_C * C->edge_map[i_C].np / nstep;
133  blk_len_C[i_C] = blk_len_C[i_C] * C->edge_map[i_C].np / nstep;
134  }
135 
136  if (B->edge_map[i_B].has_child){
137  ASSERT(B->edge_map[i_B].child->type == VIRTUAL_MAP);
138  virt_dim[i] = B->edge_map[i_B].np*B->edge_map[i_B].child->np/nstep;
139  }
140  if (C->edge_map[i_C].has_child) {
141  ASSERT(C->edge_map[i_C].child->type == VIRTUAL_MAP);
142  virt_dim[i] = C->edge_map[i_C].np*C->edge_map[i_C].child->np/nstep;
143  }
144  if (C->edge_map[i_C].type == VIRTUAL_MAP){
145  virt_dim[i] = C->edge_map[i_C].np/nstep;
146  }
147  if (B->edge_map[i_B].type == VIRTUAL_MAP)
148  virt_dim[i] = B->edge_map[i_B].np/nstep;
149  #ifdef OFFLOAD
150  total_iter *= nstep;
151  if (cg_ctr_sub_lda_A == 0)
152  load_phase_A *= nstep;
153  else
154  load_phase_A = 1;
155  if (cg_ctr_sub_lda_B == 0)
156  load_phase_B *= nstep;
157  else
158  load_phase_B = 1;
159  if (cg_ctr_sub_lda_C == 0)
160  load_phase_C *= nstep;
161  else
162  load_phase_C = 1;
163  #endif
164  }
165  }
166  return 1;
167  }
168 
169 
170 
172  /*if (move_A) cdt_A->deactivate();
173  if (move_B) cdt_B->deactivate();
174  if (move_C) cdt_C->deactivate();*/
175  if (rec_ctr != NULL)
176  delete rec_ctr;
177  }
178 
180  ctr_2d_general * o = (ctr_2d_general*)other;
181  rec_ctr = o->rec_ctr->clone();
182  edge_len = o->edge_len;
183  ctr_lda_A = o->ctr_lda_A;
185  cdt_A = o->cdt_A;
186  move_A = o->move_A;
187  ctr_lda_B = o->ctr_lda_B;
189  cdt_B = o->cdt_B;
190  move_B = o->move_B;
191  ctr_lda_C = o->ctr_lda_C;
193  cdt_C = o->cdt_C;
194  move_C = o->move_C;
195 #ifdef OFFLOAD
196  alloc_host_buf = o->alloc_host_buf;
197 #endif
198  }
199 
201  printf("ctr_2d_general: edge_len = %d\n", edge_len);
202  printf("move_A = %d, ctr_lda_A = %ld, ctr_sub_lda_A = %ld\n",
204  if (move_A) printf("cdt_A length = %d\n",cdt_A->np);
205  printf("move_B = %d, ctr_lda_B = %ld, ctr_sub_lda_B = %ld\n",
207  if (move_B) printf("cdt_B length = %d\n",cdt_B->np);
208  printf("move_C = %d, ctr_lda_C = %ld, ctr_sub_lda_C = %ld\n",
210  if (move_C) printf("cdt_C length = %d\n",cdt_C->np);
211 #ifdef OFFLOAD
212  if (alloc_host_buf)
213  printf("alloc_host_buf is true\n");
214  else
215  printf("alloc_host_buf is false\n");
216 #endif
217  rec_ctr->print();
218  }
219 
221  return new ctr_2d_general(this);
222  }
223 
224  void ctr_2d_general::find_bsizes(int64_t & b_A,
225  int64_t & b_B,
226  int64_t & b_C,
227  int64_t & s_A,
228  int64_t & s_B,
229  int64_t & s_C,
230  int64_t & aux_size){
231  b_A = 0, b_B = 0, b_C = 0;
232  s_A = ctr_sub_lda_A*ctr_lda_A;
233  s_B = ctr_sub_lda_B*ctr_lda_B;
234  s_C = ctr_lda_C*ctr_sub_lda_C;
235  if (move_A){
236  b_A = edge_len/cdt_A->np;
237  }
238  if (move_B){
239  b_B = edge_len/cdt_B->np;
240  }
241  if (move_C){
242  b_C = edge_len/cdt_C->np;
243  }
244 
245  aux_size = MAX(move_A*sr_A->el_size*s_A, MAX(move_B*sr_B->el_size*s_B, move_C*sr_C->el_size*s_C));
246  }
247 
248  double ctr_2d_general::est_time_fp(int nlyr) {
249  int64_t b_A, b_B, b_C, s_A, s_B, s_C, aux_size;
250  find_bsizes(b_A, b_B, b_C, s_A, s_B, s_C, aux_size);
251  double est_comm_time = 0.0;
252  if (move_A)
253  est_comm_time += cdt_A->estimate_bcast_time(sr_A->el_size*s_A);
254  if (move_B)
255  est_comm_time += cdt_B->estimate_bcast_time(sr_B->el_size*s_B);
256  if (move_C)
257  est_comm_time += cdt_C->estimate_red_time(sr_C->el_size*s_C, sr_C->addmop());
258  return (est_comm_time*(double)edge_len)/MIN(nlyr,edge_len);
259  }
260 
261  double ctr_2d_general::est_time_rec(int nlyr) {
262  return rec_ctr->est_time_rec(1)*(double)edge_len/MIN(nlyr,edge_len) + est_time_fp(nlyr);
263  }
264 
266  int64_t b_A, b_B, b_C, s_A, s_B, s_C, aux_size;
267  find_bsizes(b_A, b_B, b_C, s_A, s_B, s_C, aux_size);
268  return sr_A->el_size*s_A+sr_B->el_size*s_B+sr_C->el_size*s_C+aux_size;
269  }
270 
272  return rec_ctr->mem_rec() + mem_fp();
273  }
274 
275  void ctr_2d_general::run(char * A, char * B, char * C){
276  int owner_A, owner_B, owner_C, ret;
277  int64_t ib;
278  char * buf_A, * buf_B, * buf_C;
279  char * op_A, * op_B, * op_C;
280  int rank_A, rank_B, rank_C;
281  int64_t b_A, b_B, b_C, s_A, s_B, s_C, aux_size;
282  if (move_A) rank_A = cdt_A->rank;
283  else rank_A = -1;
284  if (move_B) rank_B = cdt_B->rank;
285  else rank_B = -1;
286  if (move_C) rank_C = cdt_C->rank;
287  else rank_C = -1;
288 
290 
291  /* Must move at most two tensors */
292  ASSERT(!(move_A && move_B && move_C));
293 
294  rec_ctr->beta = this->beta;
295 
296  int iidx_lyr, inum_lyr;
297  if (edge_len >= num_lyr && edge_len % num_lyr == 0){
298  inum_lyr = num_lyr;
299  iidx_lyr = idx_lyr;
300  rec_ctr->num_lyr = 1;
301  rec_ctr->idx_lyr = 0;
302  } else if (edge_len < num_lyr && num_lyr % edge_len == 0){
303  inum_lyr = edge_len;
304  iidx_lyr = idx_lyr%edge_len;
307  } else {
310  inum_lyr = 1;
311  iidx_lyr = 0;
312  }
313 
314 
315  find_bsizes(b_A, b_B, b_C, s_A, s_B, s_C, aux_size);
316 
317 #ifdef OFFLOAD
318  if (alloc_host_buf){
319  if (s_A > 0) host_pinned_alloc((void**)&buf_A, s_A*sr_A->el_size);
320  if (s_B > 0) host_pinned_alloc((void**)&buf_B, s_B*sr_B->el_size);
321  if (s_C > 0) host_pinned_alloc((void**)&buf_C, s_C*sr_C->el_size);
322  }
323 #else
324  if (0){
325  }
326 #endif
327  else {
328  ret = CTF_int::mst_alloc_ptr(s_A*sr_A->el_size, (void**)&buf_A);
329  ASSERT(ret==0);
330  ret = CTF_int::mst_alloc_ptr(s_B*sr_B->el_size, (void**)&buf_B);
331  ASSERT(ret==0);
332  ret = CTF_int::mst_alloc_ptr(s_C*sr_C->el_size, (void**)&buf_C);
333  ASSERT(ret==0);
334  }
335  //ret = CTF_int::mst_alloc_ptr(aux_size, (void**)&buf_aux);
336  //ASSERT(ret==0);
337 
338  //for (ib=this->idx_lyr; ib<edge_len; ib+=this->num_lyr){
339 #ifdef MICROBENCH
340  for (ib=iidx_lyr; ib<edge_len; ib+=edge_len)
341 #else
342  for (ib=iidx_lyr; ib<edge_len; ib+=inum_lyr)
343 #endif
344  {
345  if (move_A){
346  owner_A = ib % cdt_A->np;
347  if (rank_A == owner_A){
348  if (b_A == 1){
349  op_A = A;
350  } else {
351  op_A = buf_A;
354  op_A, ctr_sub_lda_A);
355  }
356  } else
357  op_A = buf_A;
358  cdt_A->bcast(op_A, s_A, sr_A->mdtype(), owner_A);
359  } else {
360  if (ctr_sub_lda_A == 0)
361  op_A = A;
362  else {
363  if (ctr_lda_A == 1)
364  op_A = A+sr_A->el_size*ib*ctr_sub_lda_A;
365  else {
366  op_A = buf_A;
367  sr_A->copy(ctr_sub_lda_A, ctr_lda_A,
368  A+sr_A->el_size*ib*ctr_sub_lda_A, ctr_sub_lda_A*edge_len,
369  buf_A, ctr_sub_lda_A);
370  }
371  }
372  }
373  if (move_B){
374  owner_B = ib % cdt_B->np;
375  if (rank_B == owner_B){
376  if (b_B == 1){
377  op_B = B;
378  } else {
379  op_B = buf_B;
382  op_B, ctr_sub_lda_B);
383  }
384  } else
385  op_B = buf_B;
386 // printf("c_B = %ld, s_B = %ld, d_B = %ld, b_B = %ld\n", c_B, s_B,db, b_B);
387  cdt_B->bcast(op_B, s_B, sr_B->mdtype(), owner_B);
388  } else {
389  if (ctr_sub_lda_B == 0)
390  op_B = B;
391  else {
392  if (ctr_lda_B == 1){
393  op_B = B+sr_B->el_size*ib*ctr_sub_lda_B;
394  } else {
395  op_B = buf_B;
397  B+sr_B->el_size*ib*ctr_sub_lda_B, ctr_sub_lda_B*edge_len,
398  buf_B, ctr_sub_lda_B);
399  }
400  }
401  }
402  if (move_C){
403  op_C = buf_C;
404  //sr_C->set(op_C, sr_C->addid(), s_C);
405  //rec_ctr->beta = sr_C->mulid();
406  rec_ctr->beta = sr_C->addid();
407  } else {
408  if (ctr_sub_lda_C == 0)
409  op_C = C;
410  else {
411  if (ctr_lda_C == 1)
412  op_C = C+sr_C->el_size*ib*ctr_sub_lda_C;
413  else {
414  op_C = buf_C;
415  //sr_C->set(op_C, sr_C->addid(), s_C);
416  //rec_ctr->beta = sr_C->mulid();
417  rec_ctr->beta = sr_C->addid();
418  }
419  }
420  }
421 
422 
423  rec_ctr->run(op_A, op_B, op_C);
424 
425  /*for (int i=0; i<ctr_sub_lda_C*ctr_lda_C; i++){
426  printf("[%d] P%d op_C[%d] = %lf\n",ctr_lda_C,idx_lyr,i, ((double*)op_C)[i]);
427  }*/
428  if (move_C){
429  /* FIXME: Wont work for single precsion */
430  owner_C = ib % cdt_C->np;
431  if (cdt_C->rank == owner_C)
432  cdt_C->red(MPI_IN_PLACE, op_C, s_C, sr_C->mdtype(), sr_C->addmop(), owner_C);
433  else
434  cdt_C->red(op_C, NULL, s_C, sr_C->mdtype(), sr_C->addmop(), owner_C);
435  if (rank_C == owner_C){
437  op_C, ctr_sub_lda_C, sr_C->mulid(),
438  C+sr_C->el_size*(ib/cdt_C->np)*ctr_sub_lda_C,
439  ctr_sub_lda_C*b_C, this->beta);
440  }
441  } else {
442  if (ctr_lda_C != 1 && ctr_sub_lda_C != 0)
444  buf_C, ctr_sub_lda_C, sr_C->mulid(),
445  C+sr_C->el_size*ib*ctr_sub_lda_C,
446  ctr_sub_lda_C*edge_len, this->beta);
447  if (ctr_sub_lda_C == 0)
448  rec_ctr->beta = sr_C->mulid();
449  }
450 /* for (int i=0; i<ctr_sub_lda_C*ctr_lda_C*edge_len; i++){
451  printf("[%d] P%d C[%d] = %lf\n",ctr_lda_C,idx_lyr,i, ((double*)C)[i]);
452  }*/
453  }
454  /* FIXME: reuse that */
455 #ifdef OFFLOAD
456  if (alloc_host_buf){
457  if (s_A > 0) host_pinned_free(buf_A);
458  if (s_B > 0) host_pinned_free(buf_B);
459  if (s_C > 0) host_pinned_free(buf_C);
460  }
461 #else
462  if (0){
463  }
464 #endif
465  else {
466  CTF_int::cdealloc(buf_A);
467  CTF_int::cdealloc(buf_B);
468  CTF_int::cdealloc(buf_C);
469  }
471  }
472 }
473 
~ctr_2d_general()
deallocs ctr_2d_general object
map_type type
Definition: mapping.h:22
int calc_phase() const
compute the phase of a mapping
Definition: mapping.cxx:39
void red(void *inbuf, void *outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root)
reduce, same interface as MPI_Reduce, but excluding the comm
Definition: common.cxx:392
void print()
print ctr object
virtual int64_t mem_rec()
Definition: ctr_comm.h:177
virtual void copy(char *a, char const *b) const
copies element b to element a
Definition: algstrct.cxx:538
void host_pinned_alloc(void **ptr, int64_t size)
allocate a pinned host buffer
int64_t mem_rec()
returns the number of bytes of buffer space we need recursively
#define ASSERT(...)
Definition: util.h:88
virtual char const * addid() const
MPI datatype for pairs.
Definition: algstrct.cxx:89
int order
number of tensor dimensions
double est_time_rec(int nlyr)
returns the number of bytes send by each proc recursively
algstrct const * sr_B
Definition: ctr_comm.h:168
double estimate_red_time(int64_t msg_sz, MPI_Op op)
Definition: common.cxx:308
#define MAX(a, b)
Definition: util.h:180
int mst_alloc_ptr(int64_t len, void **const ptr)
mst_alloc abstraction
Definition: memcontrol.cxx:269
algstrct const * sr_C
Definition: ctr_comm.h:169
void host_pinned_free(void *ptr)
free a pinned host buffer
int64_t mem_fp()
returns the number of bytes of buffer space we need
virtual void print()
Definition: ctr_comm.h:175
void run(char *A, char *B, char *C)
Basically doing SUMMA, except assumes equal block size on each processor. Performs rank-b updates whe...
ctr_2d_general(ctr *other)
copies ctr object
algstrct const * sr_A
Definition: ctr_comm.h:167
int comp_dim_map(mapping const *map_A, mapping const *map_B)
compares two mappings
Definition: mapping.cxx:143
virtual ctr * clone()
Definition: ctr_comm.h:180
double estimate_bcast_time(int64_t msg_sz)
Definition: common.cxx:295
virtual MPI_Op addmop() const
MPI addition operation for reductions.
Definition: algstrct.cxx:73
mapping * edge_map
mappings of each tensor dimension onto topology dimensions
CommData * dim_comm
Definition: topology.h:20
#define TAU_FSTOP(ARG)
Definition: util.h:281
#define TAU_FSTART(ARG)
Definition: util.h:280
void bcast(void *buf, int64_t count, MPI_Datatype mdtype, int root)
broadcast, same interface as MPI_Bcast, but excluding the comm
Definition: common.cxx:336
mapping * child
Definition: mapping.h:26
virtual double est_time_rec(int nlyr)
Definition: ctr_comm.h:179
double est_time_fp(int nlyr)
returns the number of bytes this kernel will send per processor
void find_bsizes(int64_t &b_A, int64_t &b_B, int64_t &b_C, int64_t &s_A, int64_t &s_B, int64_t &s_C, int64_t &aux_size)
determines buffer and block sizes needed for ctr_2d_general
virtual void run(char *A, char *B, char *C)
Definition: ctr_comm.h:174
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
internal distributed tensor class
int lcm(int a, int b)
Definition: util.h:340
topology * topo
topology to which the tensor is mapped
#define MIN(a, b)
Definition: util.h:176
int ctr_2d_gen_build(int is_used, CommData global_comm, int i, int *virt_dim, int &cg_edge_len, int &total_iter, tensor *A, int i_A, CommData *&cg_cdt_A, int64_t &cg_ctr_lda_A, int64_t &cg_ctr_sub_lda_A, bool &cg_move_A, int *blk_len_A, int64_t &blk_sz_A, int const *virt_blk_len_A, int &load_phase_A, tensor *B, int i_B, CommData *&cg_cdt_B, int64_t &cg_ctr_lda_B, int64_t &cg_ctr_sub_lda_B, bool &cg_move_B, int *blk_len_B, int64_t &blk_sz_B, int const *virt_blk_len_B, int &load_phase_B, tensor *C, int i_C, CommData *&cg_cdt_C, int64_t &cg_ctr_lda_C, int64_t &cg_ctr_sub_lda_C, bool &cg_move_C, int *blk_len_C, int64_t &blk_sz_C, int const *virt_blk_len_C, int &load_phase_C)
sets up a ctr_2d_general (2D SUMMA) level where A is not communicated function will be called with A/...
virtual char const * mulid() const
identity element for multiplication i.e. 1
Definition: algstrct.cxx:93
char const * beta
Definition: ctr_comm.h:170
virtual MPI_Datatype mdtype() const
MPI datatype.
Definition: algstrct.cxx:80