Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
util.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include <stdio.h>
4 #include <stdint.h>
5 #include "string.h"
6 #include <assert.h>
7 #include "util.h"
8 
9 namespace CTF_int {
10  int64_t sy_packed_size(const int order, const int* len, const int* sym){
11  int i, k, mp;
12  int64_t size, tmp;
13 
14  if (order == 0) return 1;
15 
16  k = 1;
17  tmp = 1;
18  size = 1;
19  mp = len[0];
20  for (i = 0;i < order;i++){
21  tmp = (tmp * mp) / k;
22  k++;
23  mp ++;
24 
25  if (sym[i] == 0){
26  size *= tmp;
27  k = 1;
28  tmp = 1;
29  if (i < order - 1) mp = len[i + 1];
30  }
31  }
32  size *= tmp;
33 
34  return size;
35  }
36 
37 
38  int64_t packed_size(const int order, const int* len, const int* sym){
39 
40  int i, k, mp;
41  int64_t size, tmp;
42 
43  if (order == 0) return 1;
44 
45  k = 1;
46  tmp = 1;
47  size = 1;
48  if (order > 0)
49  mp = len[0];
50  else
51  mp = 1;
52  for (i = 0;i < order;i++){
53  tmp = (tmp * mp) / k;
54  k++;
55  if (sym[i] != 1)
56  mp--;
57  else
58  mp ++;
59 
60  if (sym[i] == 0){
61  size *= tmp;
62  k = 1;
63  tmp = 1;
64  if (i < order - 1) mp = len[i + 1];
65  }
66  }
67  size *= tmp;
68 
69  return size;
70  }
71 
72  void calc_idx_arr(int order,
73  int const * lens,
74  int const * sym,
75  int64_t idx,
76  int * idx_arr){
77  int64_t idx_rem = idx;
78  memset(idx_arr, 0, order*sizeof(int));
79  for (int dim=order-1; dim>=0; dim--){
80  if (idx_rem == 0) break;
81  if (dim == 0 || sym[dim-1] == NS){
82  int64_t lda = packed_size(dim, lens, sym);
83  idx_arr[dim] = idx_rem/lda;
84  idx_rem -= idx_arr[dim]*lda;
85  } else {
86  int plen[dim+1];
87  memcpy(plen, lens, (dim+1)*sizeof(int));
88  int sg = 2;
89  int fsg = 2;
90  while (dim >= sg && sym[dim-sg] != NS) { sg++; fsg*=sg; }
91  int64_t lda = packed_size(dim-sg+1, lens, sym);
92  double fsg_idx = (((double)idx_rem)*fsg)/lda;
93  int kidx = (int)pow(fsg_idx,1./sg);
94  //if (sym[dim-1] != SY)
95  kidx += sg+1;
96  int mkidx = kidx;
97  #if DEBUG >= 1
98  for (int idim=dim-sg+1; idim<=dim; idim++){
99  plen[idim] = mkidx+1;
100  }
101  int64_t smidx = packed_size(dim+1, plen, sym);
102  ASSERT(smidx > idx_rem);
103  #endif
104  int64_t midx = 0;
105  for (; mkidx >= 0; mkidx--){
106  for (int idim=dim-sg+1; idim<=dim; idim++){
107  plen[idim] = mkidx;
108  }
109  midx = packed_size(dim+1, plen, sym);
110  if (midx <= idx_rem) break;
111  }
112  if (midx == 0) mkidx = 0;
113  idx_arr[dim] = mkidx;
114  idx_rem -= midx;
115  }
116  }
117  ASSERT(idx_rem == 0);
118  }
119 
120 
121  void sy_calc_idx_arr(int order,
122  int const * lens,
123  int const * sym,
124  int64_t idx,
125  int * idx_arr){
126  int64_t idx_rem = idx;
127  memset(idx_arr, 0, order*sizeof(int));
128  for (int dim=order-1; dim>=0; dim--){
129  if (idx_rem == 0) break;
130  if (dim == 0 || sym[dim-1] == NS){
131  int64_t lda = sy_packed_size(dim, lens, sym);
132  idx_arr[dim] = idx_rem/lda;
133  idx_rem -= idx_arr[dim]*lda;
134  } else {
135  int plen[dim+1];
136  memcpy(plen, lens, (dim+1)*sizeof(int));
137  int sg = 2;
138  int fsg = 2;
139  while (dim >= sg && sym[dim-sg] != NS) { sg++; fsg*=sg; }
140  int64_t lda = sy_packed_size(dim-sg+1, lens, sym);
141  double fsg_idx = (((double)idx_rem)*fsg)/lda;
142  int kidx = (int)pow(fsg_idx,1./sg);
143  //if (sym[dim-1] != SY)
144  kidx += sg+1;
145  int mkidx = kidx;
146  #if DEBUG >= 1
147  for (int idim=dim-sg+1; idim<=dim; idim++){
148  plen[idim] = mkidx+1;
149  }
150  int64_t smidx = sy_packed_size(dim+1, plen, sym);
151  ASSERT(smidx > idx_rem);
152  #endif
153  int64_t midx = 0;
154  for (; mkidx >= 0; mkidx--){
155  for (int idim=dim-sg+1; idim<=dim; idim++){
156  plen[idim] = mkidx;
157  }
158  midx = sy_packed_size(dim+1, plen, sym);
159  if (midx <= idx_rem) break;
160  }
161  if (midx == 0) mkidx = 0;
162  idx_arr[dim] = mkidx;
163  idx_rem -= midx;
164  }
165  }
166  ASSERT(idx_rem == 0);
167  }
168 
169 
170  void factorize(int n, int *nfactor, int **factor){
171  int tmp, nf, i;
172  int * ff;
173  tmp = n;
174  nf = 0;
175  while (tmp > 1){
176  for (i=2; i<=n; i++){
177  if (tmp % i == 0){
178  nf++;
179  tmp = tmp/i;
180  break;
181  }
182  }
183  }
184  if (nf == 0){
185  *nfactor = nf;
186  } else {
187  ff = (int*)CTF_int::alloc(sizeof(int)*nf);
188  tmp = n;
189  nf = 0;
190  while (tmp > 1){
191  for (i=2; i<=n; i++){
192  if (tmp % i == 0){
193  ff[nf] = i;
194  nf++;
195  tmp = tmp/i;
196  break;
197  }
198  }
199  }
200  *factor = ff;
201  *nfactor = nf;
202  }
203  }
204 
205  void permute(int order,
206  int const * perm,
207  int * arr){
208  int i;
209  int * swap;
210  CTF_int::alloc_ptr(order*sizeof(int), (void**)&swap);
211 
212  for (i=0; i<order; i++){
213  swap[i] = arr[perm[i]];
214  }
215  for (i=0; i<order; i++){
216  arr[i] = swap[i];
217  }
218 
219  CTF_int::cdealloc(swap);
220  }
221 
222  void permute_target(int order,
223  int const * perm,
224  int * arr){
225  int i;
226  int * swap;
227  CTF_int::alloc_ptr(order*sizeof(int), (void**)&swap);
228 
229  for (i=0; i<order; i++){
230  swap[i] = arr[perm[i]];
231  }
232  for (i=0; i<order; i++){
233  arr[i] = swap[i];
234  }
235 
236  CTF_int::cdealloc(swap);
237  }
238 
239 
240  void socopy(int64_t m,
241  int64_t n,
242  int64_t lda_a,
243  int64_t lda_b,
244  int64_t const * sizes_a,
245  int64_t *& sizes_b,
246  int64_t *& offsets_b){
247  sizes_b = (int64_t*)alloc(sizeof(int64_t)*m*n);
248  offsets_b = (int64_t*)alloc(sizeof(int64_t)*m*n);
249 
250  int64_t last_offset = 0;
251  for (int i=0; i<n; i++){
252  for (int j=0; j<m; j++){
253  sizes_b[lda_b*i+j] = sizes_a[lda_a*i+j];
254  offsets_b[lda_b*i+j] = last_offset;
255  last_offset = last_offset+sizes_a[lda_a*i+j];
256  }
257  }
258  }
259 
260  void spcopy(int64_t m,
261  int64_t n,
262  int64_t lda_a,
263  int64_t lda_b,
264  int64_t const * sizes_a,
265  int64_t const * offsets_a,
266  char const * a,
267  int64_t const * sizes_b,
268  int64_t const * offsets_b,
269  char * b){
270  for (int i=0; i<n; i++){
271  for (int j=0; j<m; j++){
272  memcpy(b+offsets_b[lda_b*i+j],a+offsets_a[lda_a*i+j],sizes_a[lda_a*i+j]);
273  }
274  }
275  }
276 
277  int64_t fact(int64_t n){
278  int64_t f = 1;
279  for (int64_t i=1; i<=n; i++){
280  f*=i;
281  }
282  return f;
283  }
284 
285  int64_t choose(int64_t n, int64_t k){
286  return fact(n)/(fact(k)*fact(n-k));
287  }
288 
289  void get_choice(int64_t n, int64_t k, int64_t ch, int * chs){
290  if (k==0) return;
291  if (k==1){
292  chs[0] = ch;
293  return;
294  }
295  int lens[k];
296  std::fill(lens, lens+k, n);
297  int sym[k];
298  std::fill(sym, sym+k-1, SH);
299  sym[k-1] = NS;
300 
301  calc_idx_arr(k,lens,sym,ch,chs);
302  //FIXME add 1?
303  }
304 
305  int64_t chchoose(int64_t n, int64_t k){
306  return fact(n+k-1)/(fact(k)*fact(n-1));
307  }
308 
309 
310 
311 }
void calc_idx_arr(int order, int const *lens, int const *sym, int64_t idx, int *idx_arr)
Definition: util.cxx:72
void permute(int order, int const *perm, int *arr)
permute an array
Definition: util.cxx:205
void get_choice(int64_t n, int64_t k, int64_t ch, int *chs)
Definition: util.cxx:289
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
Definition: common.h:37
int64_t fact(int64_t n)
Definition: util.cxx:277
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
Definition: memcontrol.cxx:320
void sy_calc_idx_arr(int order, int const *lens, int const *sym, int64_t idx, int *idx_arr)
same as above except assumes sym only NS or SY
Definition: util.cxx:121
void permute_target(int order, int const *perm, int *arr)
permutes a permutation array
Definition: util.cxx:222
void socopy(int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t *&sizes_b, int64_t *&offsets_b)
Definition: util.cxx:240
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
int64_t choose(int64_t n, int64_t k)
Definition: util.cxx:285
void factorize(int n, int *nfactor, int **factor)
computes the size of a tensor in packed symmetric layout
Definition: util.cxx:170
int64_t packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in packed symmetric (SY, SH, or AS) layout
Definition: util.cxx:38
Definition: common.h:37
int64_t chchoose(int64_t n, int64_t k)
Definition: util.cxx:305
int64_t sy_packed_size(int order, const int *len, const int *sym)
computes the size of a tensor in SY (NOT HOLLOW) packed symmetric layout
Definition: util.cxx:10
void spcopy(int64_t m, int64_t n, int64_t lda_a, int64_t lda_b, int64_t const *sizes_a, int64_t const *offsets_a, char const *a, int64_t const *sizes_b, int64_t const *offsets_b, char *b)
Definition: util.cxx:260