1 #ifndef __DGTOG_BUCKET_H__ 2 #define __DGTOG_BUCKET_H__ 8 int64_t *
const * data_offset,
9 int *
const * ivmax_pre,
13 char * __restrict__ data,
14 char ** __restrict__ buckets,
20 int ivmax = ivmax_pre[idim][prev_idx];
21 for (
int iv=0; iv <= ivmax; iv++){
22 int rec_bucket_off = bucket_off + bucket_offset[idim][iv];
23 int64_t rec_data_off = data_off + data_offset[idim][iv];
24 redist_bucket<idim-1>(bucket_offset, data_offset, ivmax_pre, rep_phase0, virt_dim0, data_to_buckets, data, buckets, counts, sr, rec_data_off, rec_bucket_off, iv);
31 int64_t *
const * data_offset,
32 int *
const * ivmax_pre,
36 char * __restrict__ data,
37 char ** __restrict__ buckets,
43 int ivmax = ivmax_pre[0][prev_idx]+1;
46 for (
int i=0; i<rep_phase0; i++){
47 int n = (ivmax-i+rep_phase0-1)/rep_phase0;
49 int bucket = bucket_off + bucket_offset[0][i];
53 data + sr->el_size*(data_off+i), rep_phase0,
54 buckets[bucket] + sr->el_size*counts[bucket], 1);
59 for (
int i=0; i<rep_phase0; i++){
60 int n = (ivmax-i+rep_phase0-1)/rep_phase0;
62 int bucket = bucket_off + bucket_offset[0][i];
64 buckets[bucket] + sr->el_size*counts[bucket], 1,
65 data + sr->el_size*(data_off+i), rep_phase0);
72 for (
int iv=0; iv < ivmax; iv++){
73 int bucket = bucket_off + bucket_offset[0][iv];
74 sr->copy(buckets[bucket] + sr->el_size*counts[bucket],
75 data + sr->el_size*(data_off+data_offset[0][iv]));
79 for (
int iv=0; iv < ivmax; iv++){
80 int bucket = bucket_off + bucket_offset[0][iv];
81 sr->copy(data + sr->el_size*(data_off+data_offset[0][iv]),
82 buckets[bucket] + sr->el_size*counts[bucket]);
91 int64_t *
const * data_offset,
92 int *
const * ivmax_pre,
97 char * __restrict__ data,
98 char ** __restrict__ buckets,
104 int ivmax = ivmax_pre[0][prev_idx]+1;
107 if (data_to_buckets){
110 int n = (ivmax-i+rep_phase0-1)/rep_phase0;
112 int bucket = bucket_off;
116 data + sr->
el_size*(data_off+i), rep_phase0,
117 buckets[bucket] + sr->
el_size*counts[bucket], 1);
124 int n = (ivmax-i+rep_phase0-1)/rep_phase0;
126 int bucket = bucket_off;
128 buckets[bucket] + sr->
el_size*counts[bucket], 1,
129 data + sr->
el_size*(data_off+i), rep_phase0);
135 if (data_to_buckets){
136 for (
int iv=rep_idx0; iv < ivmax; iv+=rep_phase0){
137 int bucket = bucket_off;
138 sr->
copy(buckets[bucket] + sr->
el_size*counts[bucket],
139 data + sr->
el_size*(data_off+data_offset[0][iv]));
143 for (
int iv=rep_idx0; iv < ivmax; iv+=rep_phase0){
144 int bucket = bucket_off;
145 sr->
copy(data + sr->
el_size*(data_off+data_offset[0][iv]),
146 buckets[bucket] + sr->
el_size*counts[bucket]);
void redist_bucket(int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
virtual void copy(char *a, char const *b) const
copies element b to element a
void redist_bucket< 0 >(int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)
int el_size
size of each element of algstrct in bytes
algstrct (algebraic structure) defines the elementwise operations computed in each tensor contraction...
void redist_bucket_r0(int *const *bucket_offset, int64_t *const *data_offset, int *const *ivmax_pre, int rep_phase0, int rep_idx0, int virt_dim0, bool data_to_buckets, char *__restrict__ data, char **__restrict__ buckets, int64_t *counts, algstrct const *sr, int64_t data_off, int bucket_off, int prev_idx)