Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
scale_tsr.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include "../shared/util.h"
4 #include "scale_tsr.h"
5 
6 namespace CTF_int {
10  scl::scl(scl * other){
11  A = other->A;
12  alpha = other->alpha;
13  buffer = NULL;
14  }
15 
20  CTF_int::cdealloc(virt_dim);
21  delete rec_scl;
22  }
23 
27  scl_virt::scl_virt(scl * other) : scl(other) {
28  scl_virt * o = (scl_virt*)other;
29  rec_scl = o->rec_scl->clone();
30  num_dim = o->num_dim;
31  virt_dim = (int*)CTF_int::alloc(sizeof(int)*num_dim);
32  memcpy(virt_dim, o->virt_dim, sizeof(int)*num_dim);
33 
34  order_A = o->order_A;
35  blk_sz_A = o->blk_sz_A;
36  idx_map_A = o->idx_map_A;
37  }
38 
43  return new scl_virt(this);
44  }
45 
46 
52  int64_t scl_virt::mem_fp(){
53  return (order_A+2*num_dim)*sizeof(int);
54  }
55 
59  void scl_virt::run(){
60  int * idx_arr, * lda_A;
61  int * ilda_A;
62  int i, off_A, nb_A, alloced, ret;
64 
65  if (this->buffer != NULL){
66  alloced = 0;
67  idx_arr = (int*)this->buffer;
68  } else {
69  alloced = 1;
70  ret = CTF_int::alloc_ptr(mem_fp(), (void**)&idx_arr);
71  ASSERT(ret==0);
72  }
73 
74  lda_A = idx_arr + num_dim;
75  ilda_A = lda_A + order_A;
76 
77 
78  #define SET_LDA_X(__X) \
79  do { \
80  nb_##__X = 1; \
81  for (i=0; i<order_##__X; i++){ \
82  lda_##__X[i] = nb_##__X; \
83  nb_##__X = nb_##__X*virt_dim[idx_map_##__X[i]]; \
84  } \
85  memset(ilda_##__X, 0, num_dim*sizeof(int)); \
86  for (i=0; i<order_##__X; i++){ \
87  ilda_##__X[idx_map_##__X[i]] += lda_##__X[i]; \
88  } \
89  } while (0)
90  SET_LDA_X(A);
91  #undef SET_LDA_X
92 
93  /* for (i=0; i<order_A; i++){
94  printf("lda[%d] = %d idx_map_A[%d] = %d\n",i,lda_A[i],i,idx_map_A[i]);
95  }
96  for (i=0; i<num_dim; i++){
97  printf("ilda[%d] = %d virt_dim[%d] = %d\n",i,ilda_A[i],i,virt_dim[i]);
98  }*/
99  memset(idx_arr, 0, num_dim*sizeof(int));
100  rec_scl->alpha = this->alpha;
101  off_A = 0;
102  for (;;){
103  /* for (i=0; i<num_dim; i++){
104  for (j=0; j<num_dim; j++){
105  if (i!=j && idx_arr[i] != idx_arr[j] && idx_map[i] */
106  rec_scl->A = this->A + off_A*blk_sz_A*sr_A->el_size;
107  rec_scl->run();
108 
109  for (i=0; i<num_dim; i++){
110  off_A -= ilda_A[i]*idx_arr[i];
111  idx_arr[i]++;
112  if (idx_arr[i] >= virt_dim[i])
113  idx_arr[i] = 0;
114  off_A += ilda_A[i]*idx_arr[i];
115  if (idx_arr[i] != 0) break;
116  }
117  if (i==num_dim) break;
118  }
119  if (alloced){
120  CTF_int::cdealloc(idx_arr);
121  }
123  }
124 
125 
126  seq_tsr_scl::seq_tsr_scl(scl * other) : scl(other) {
127  seq_tsr_scl * o = (seq_tsr_scl*)other;
128 
129  order = o->order;
130  idx_map = o->idx_map;
131  sym = o->sym;
132  edge_len = (int*)CTF_int::alloc(sizeof(int)*order);
133  memcpy(edge_len, o->edge_len, sizeof(int)*order);
134  is_custom = o->is_custom;
135  func = o->func;
136  }
137 
139  return new seq_tsr_scl(this);
140  }
141 
142  int64_t seq_tsr_scl::mem_fp(){ return 0; }
143 
145  if (is_custom)
147  this->A,
148  sr_A,
149  order,
150  edge_len,
151  sym,
152  idx_map,
153  func);
154  else
156  this->A,
157  sr_A,
158  order,
159  edge_len,
160  sym,
161  idx_map);
162  }
163 
165  int i;
166  printf("seq_tsr_scl:\n");
167  printf("is_custom = %d\n",is_custom);
168  for (i=0; i<order; i++){
169  printf("edge_len[%d]=%d\n",i,edge_len[i]);
170  }
171  }
172 
173 }
#define SET_LDA_X(__X)
~scl_virt()
deallocates scl_virt object
Definition: scale_tsr.cxx:19
char const * alpha
Definition: scale_tsr.h:16
char * A
Definition: scale_tsr.h:14
#define ASSERT(...)
Definition: util.h:88
void * alloc(int64_t len)
alloc abstraction
Definition: memcontrol.cxx:365
algstrct const * sr_A
Definition: scale_tsr.h:15
int64_t blk_sz_A
Definition: scale_tsr.h:36
int const * idx_map_A
Definition: scale_tsr.h:37
int sym_seq_scl_ref(char const *alpha, char *A, algstrct const *sr_A, int order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A)
performs symmetric scaling using algstrct const * sr_A
Definition: sym_seq_scl.cxx:40
virtual void run()
Definition: scale_tsr.h:19
endomorphism const * func
Definition: scale_tsr.h:57
int alloc_ptr(int64_t len, void **const ptr)
alloc abstraction
Definition: memcontrol.cxx:320
void run()
iterates over the dense virtualization block grid and contracts
Definition: scale_tsr.cxx:59
#define TAU_FSTOP(ARG)
Definition: util.h:281
int sym_seq_scl_cust(char const *alpha, char *A, algstrct const *sr_A, int const order_A, int const *edge_len_A, int const *sym_A, int const *idx_map_A, endomorphism const *func)
performs symmetric scaling using custom func
#define TAU_FSTART(ARG)
Definition: util.h:280
int const * sym
Definition: scale_tsr.h:53
int el_size
size of each element of algstrct in bytes
Definition: algstrct.h:16
int cdealloc(void *ptr)
free abstraction
Definition: memcontrol.cxx:480
virtual scl * clone()
Definition: scale_tsr.h:21
scl * clone()
copies scl object
Definition: scale_tsr.cxx:42
int64_t mem_fp()
returns the number of bytes of buffer space we need
Definition: scale_tsr.cxx:52
void * buffer
Definition: scale_tsr.h:17
int const * idx_map
Definition: scale_tsr.h:52