Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
dgtog_bucket.h
Go to the documentation of this file.
1 #ifndef __DGTOG_BUCKET_H__
2 #define __DGTOG_BUCKET_H__
3 
4 
5 namespace CTF_int {
6  template <int idim>
7  void redist_bucket(int * const * bucket_offset,
8  int64_t * const * data_offset,
9  int * const * ivmax_pre,
10  int rep_phase0,
11  int virt_dim0,
12  bool data_to_buckets,
13  char * __restrict__ data,
14  char ** __restrict__ buckets,
15  int64_t * counts,
16  algstrct const * sr,
17  int64_t data_off,
18  int bucket_off,
19  int prev_idx){
20  int ivmax = ivmax_pre[idim][prev_idx];
21  for (int iv=0; iv <= ivmax; iv++){
22  int rec_bucket_off = bucket_off + bucket_offset[idim][iv];
23  int64_t rec_data_off = data_off + data_offset[idim][iv];
24  redist_bucket<idim-1>(bucket_offset, data_offset, ivmax_pre, rep_phase0, virt_dim0, data_to_buckets, data, buckets, counts, sr, rec_data_off, rec_bucket_off, iv);
25  }
26  }
27 
28 
29  template <>
30  void redist_bucket<0>(int * const * bucket_offset,
31  int64_t * const * data_offset,
32  int * const * ivmax_pre,
33  int rep_phase0,
34  int virt_dim0,
35  bool data_to_buckets,
36  char * __restrict__ data,
37  char ** __restrict__ buckets,
38  int64_t * counts,
39  algstrct const * sr,
40  int64_t data_off,
41  int bucket_off,
42  int prev_idx){
43  int ivmax = ivmax_pre[0][prev_idx]+1;
44  if (virt_dim0 == 1){
45  if (data_to_buckets){
46  for (int i=0; i<rep_phase0; i++){
47  int n = (ivmax-i+rep_phase0-1)/rep_phase0;
48  if (n>0){
49  int bucket = bucket_off + bucket_offset[0][i];
50  //printf("ivmax = %d bucket_off = %d, bucket = %d, counts[bucket] = %ld, n= %d data_off = %ld, rep_phase=%d\n",
51  // ivmax, bucket_off, bucket, counts[bucket], n, data_off, rep_phase0);
52  sr->copy(n,
53  data + sr->el_size*(data_off+i), rep_phase0,
54  buckets[bucket] + sr->el_size*counts[bucket], 1);
55  counts[bucket] += n;
56  }
57  }
58  } else {
59  for (int i=0; i<rep_phase0; i++){
60  int n = (ivmax-i+rep_phase0-1)/rep_phase0;
61  if (n>0){
62  int bucket = bucket_off + bucket_offset[0][i];
63  sr->copy(n,
64  buckets[bucket] + sr->el_size*counts[bucket], 1,
65  data + sr->el_size*(data_off+i), rep_phase0);
66  counts[bucket] += n;
67  }
68  }
69  }
70  } else {
71  if (data_to_buckets){
72  for (int iv=0; iv < ivmax; iv++){
73  int bucket = bucket_off + bucket_offset[0][iv];
74  sr->copy(buckets[bucket] + sr->el_size*counts[bucket],
75  data + sr->el_size*(data_off+data_offset[0][iv]));
76  counts[bucket]++;
77  }
78  } else {
79  for (int iv=0; iv < ivmax; iv++){
80  int bucket = bucket_off + bucket_offset[0][iv];
81  sr->copy(data + sr->el_size*(data_off+data_offset[0][iv]),
82  buckets[bucket] + sr->el_size*counts[bucket]);
83  counts[bucket]++;
84  }
85  }
86  }
87  }
88 
89 
90  void redist_bucket_r0(int * const * bucket_offset,
91  int64_t * const * data_offset,
92  int * const * ivmax_pre,
93  int rep_phase0,
94  int rep_idx0,
95  int virt_dim0,
96  bool data_to_buckets,
97  char * __restrict__ data,
98  char ** __restrict__ buckets,
99  int64_t * counts,
100  algstrct const * sr,
101  int64_t data_off,
102  int bucket_off,
103  int prev_idx){
104  int ivmax = ivmax_pre[0][prev_idx]+1;
105  //printf("ivmax = %d, rep_phase0 = %d data_off = %ld\n",ivmax, rep_phase0, data_off);
106  if (virt_dim0 == 1){
107  if (data_to_buckets){
108  int i=rep_idx0;
109  {
110  int n = (ivmax-i+rep_phase0-1)/rep_phase0;
111  if (n>0){
112  int bucket = bucket_off;
113  //printf("ivmax = %d bucket_off = %d, bucket = %d, counts[bucket] = %ld, n= %d data_off = %ld, rep_phase=%d\n",
114  // ivmax, bucket_off, bucket, counts[bucket], n, data_off, rep_phase0);
115  sr->copy(n,
116  data + sr->el_size*(data_off+i), rep_phase0,
117  buckets[bucket] + sr->el_size*counts[bucket], 1);
118  counts[bucket] += n;
119  }
120  }
121  } else {
122  int i=rep_idx0;
123  {
124  int n = (ivmax-i+rep_phase0-1)/rep_phase0;
125  if (n>0){
126  int bucket = bucket_off;
127  sr->copy(n,
128  buckets[bucket] + sr->el_size*counts[bucket], 1,
129  data + sr->el_size*(data_off+i), rep_phase0);
130  counts[bucket] += n;
131  }
132  }
133  }
134  } else {
135  if (data_to_buckets){
136  for (int iv=rep_idx0; iv < ivmax; iv+=rep_phase0){
137  int bucket = bucket_off;
138  sr->copy(buckets[bucket] + sr->el_size*counts[bucket],
139  data + sr->el_size*(data_off+data_offset[0][iv]));
140  counts[bucket]++;
141  }
142  } else {
143  for (int iv=rep_idx0; iv < ivmax; iv+=rep_phase0){
144  int bucket = bucket_off;
145  sr->copy(data + sr->el_size*(data_off+data_offset[0][iv]),
146  buckets[bucket] + sr->el_size*counts[bucket]);
147  counts[bucket]++;
148  }
149  }
150  }
151  }
152 }
153 #endif
void redist_bucket(int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
Definition: dgtog_bucket.h:7
virtual void copy(char *a, char const *b) const
copies element b to element a
Definition: algstrct.cxx:538
void redist_bucket< 0 >(int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
Definition: dgtog_bucket.h:30
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
Definition: algstrct.h:34
void redist_bucket_r0(int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int rep_idx0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
Definition: dgtog_bucket.h:90