Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
fft.cxx
Go to the documentation of this file.
1 
8 #include <ctf.hpp>
9 using namespace CTF;
10 
11 namespace CTF_int{
12  // factorizes n into nfactor factors
13  void factorize(int n, int *nfactor, int **factor);
14 }
15 
16 // gets ith power of nth imaginary root of the identity, w^i_n
17 std::complex<double> omega(int i, int n){
18  std::complex<double> imag(0,1);
19  return exp(-2.*i*(M_PI/n)*imag);
20 }
21 
22 // gets DFT matrix D(i,j) = w_n^(ij)
24  int64_t np;
25  int64_t * idx;
26  std::complex<double> * data;
27  Matrix < std::complex<double> >DFT(n, n, NS, wrld, "DFT");
28  DFT.get_local_data(&np, &idx, &data);
29 
30  for (int64_t i=0; i<np; i++){
31  data[i] = omega((idx[i]/n)*(idx[i]%n), n);
32  }
33  DFT.write(np, idx, data);
34  free(idx);
35  delete [] data;
36  return DFT;
37 }
38 
39 // gets twiddle factor matirx T = [1, 1; 1, w^1_n]
41  int64_t np;
42  int64_t * idx;
43  std::complex<double> * data;
44  Matrix < std::complex<double> >T(2, 2, NS, wrld, "T");
45  T.get_local_data(&np, &idx, &data);
46 
47  for (int64_t i=0; i<np; i++){
48  if (idx[i]<3) data[i] = std::complex<double>(1,0);
49  else data[i] = omega(1,n);
50  }
51  T.write(np, idx, data);
52  free(idx);
53  delete [] data;
54  return T;
55 
56 }
57 
58 void fft(Vector< std::complex<double> > & v, int n){
59  int64_t np;
60  int64_t * idx;
61  std::complex<double> * data;
62  // assert that n is a power of two
63  int logn=0;
64  while (1<<logn < n){
65  logn++;
66  }
67  assert(1<<logn == n);
68  // factorize n, factors will all just be 2 for now
69  int nfact;
70  int * factors;
71  CTF_int::factorize(n, &nfact, &factors);
72  assert(nfact == logn);
73 
74  // Fold v into log_2(n) tensor V
75  Tensor< std::complex<double> > V(nfact, factors, *v.wrld, *v.sr, "V");
76  v.get_local_data(&np, &idx, &data);
77  V.write(np, idx, data);
78  free(idx);
79  delete [] data;
80 
81  // define range of indices [a, b, c, ...]
82  char inds[nfact+1];
83  char rev_inds[nfact];
84  for (int i=0; i<nfact; i++){
85  inds[i]='a'+i;
86  rev_inds[i]='a'+nfact-i-1;
87  }
88  inds[nfact] = 'a';
89  // reverse tensor index order
90  // now that each FFT step transforms the lowest level modes, which should be faster
91  V[rev_inds] = V[inds];
92  for (int i=0; i<nfact; i++){
93  inds[nfact+i]='a'+i;
94  }
95 
96  Matrix< std::complex<double> > DFT = DFT_matrix(2,*v.wrld);
97  Tensor< std::complex<double> > S_old(1, factors, *v.wrld, *v.sr, "S");
98  S_old["i"] = 1;
99 
100  char inds_in[nfact];
101  char inds_DFT[2];
102  // m = 2^{i+1}
103  int m = 1;
104  // execute FFT from the bottom level of recursion upwards
105  for (int i=0; i<nfact; i++){
106  m*=2;
107  // S(m) when unfolded looks like [ 1 1 1 ... 1 ]
108  // [ w_m w_m^2 w_m^3 ... w_m^{m/2} ]
109  // it is used to scale the odd elements of the FFT, i.e., we compute recursively FFT(v) via
110  // for i=1 to m/2, A_i = sum_{j=0}^{m/2-1} v_{2j} *w_{m/2}^{ij}
111  // B_i = w_m^i sum_{j=0}^{m/2-1} v_{2j+1}*w_{m/2}^{ij}
112  // multiplying V by S will do the scaling of the elements of B_i by w_m^i
113  // we do it first, since we want to apply it to the sequences obtain recursively
114  // note that for i=0, m=2 the application of S(2) = [1; 1] is trivial
116  if (i==0){
117  S = S_old;
118  } else {
119  // we construct S(m) by the Kharti-Rao product of S(m/2) and the twiddle factor [1, 1; 1, w_m]
120  S = Tensor< std::complex<double> >(i+1, factors, *v.wrld, *v.sr, "S");
122  char inds_T[2] = {'a', inds[i]};
123  S[inds] = T[inds_T]*S_old[inds+1];
124  S_old = S;
125  }
126  V[inds] *= S[inds];
127 
128  // then we multiply by the 2-by-2 DFT matrix, which is just [1, 1; 1, -1]
129  // this gives FFT(v) = [A+B; A-B]
130  // this combines the recursive sequences or does the DFT at the base case when i=0
131  memcpy(inds_in, inds, nfact*sizeof(char));
132  inds_in[i] = 'a'-1;
133  inds_DFT[0] = inds[i];
134  inds_DFT[1] = 'a'-1;
135  V[inds] = DFT[inds_DFT]*V[inds_in];
136  }
137 
138  // we now unfold the tensor back into vector form
139  V.get_local_data(&np, &idx, &data);
140  v.write(np, idx, data);
141 
142  free(idx);
143  delete [] data;
144 
145 }
146 
147 
148 int fft(int n,
149  World & dw){
150 
151  Vector< std::complex<double> > a(n, dw, "a");
152  Vector< std::complex<double> > da(n, dw, "da");
153 
154  srand48(2*dw.rank);
155  Transform< std::complex<double> > set_rand([](std::complex<double> & d){ d=std::complex<double>(drand48(),drand48()); } );
156  set_rand(a["i"]);
157 
159 
160  da["i"] = DFT["ij"]*a["j"];
161 
162  Vector< std::complex<double> > fa(n, dw, "fa");
163  fa["i"] = a["i"];
164 
165  fft(fa, n);
166 
167  da["i"] -= fa["i"];
168 
169  std::complex<double> dnrm;
170  Scalar< std::complex<double> > s(dnrm, dw);
171  s[""] += da["i"]*da["i"];
172  dnrm = s;
173  bool pass = fabs(dnrm.real()) <= 1.E-6 && fabs(dnrm.imag()) <= 1.E-6;
174 
175  if (dw.rank == 0){
176  if (pass)
177  printf("{ FFT = DFT } passed \n");
178  else
179  printf("{ FFT = DFT } failed \n");
180  }
181  return pass;
182 }
183 
184 
185 #ifndef TEST_SUITE
186 char* getCmdOption(char ** begin,
187  char ** end,
188  const std::string & option){
189  char ** itr = std::find(begin, end, option);
190  if (itr != end && ++itr != end){
191  return *itr;
192  }
193  return 0;
194 }
195 
196 
197 int main(int argc, char ** argv){
198  int rank, np, n, pass;
199  int const in_num = argc;
200  char ** input_str = argv;
201 
202  MPI_Init(&argc, &argv);
203  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
204  MPI_Comm_size(MPI_COMM_WORLD, &np);
205 
206  if (getCmdOption(input_str, input_str+in_num, "-n")){
207  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
208  if (n < 0) n = 16;
209  } else n = 16;
210 
211  {
212  World dw(argc, argv);
213 
214  if (rank == 0){
215  printf("Running FFT on random dimension %d vector\n",n);
216  }
217  pass = fft(n, dw);
218  assert(pass);
219  }
220 
221  MPI_Finalize();
222  return 0;
223 }
229 #endif
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
def rank(self)
Definition: core.pyx:312
Matrix< std::complex< double > > twiddle_matrix(int n, World &wrld)
Definition: fft.cxx:40
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14
Definition: common.h:37
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
def imag(tensor, A)
Definition: core.pyx:3038
string
Definition: core.pyx:456
Scalar class which encapsulates a 0D tensor.
Definition: scalar.h:13
int rank
rank of local processor
Definition: world.h:24
Matrix< std::complex< double > > DFT_matrix(int n, World &wrld)
Definition: fft.cxx:23
def exp(init_x, out=None, where=True, casting='same_kind', order='F', dtype=None, subok=True)
Definition: core.pyx:3954
void fft(Vector< std::complex< double > > &v, int n)
Definition: fft.cxx:58
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
int main(int argc, char **argv)
Definition: fft.cxx:197
Definition: apsp.cxx:17
char * getCmdOption(char **begin, char **end, const std::string &option)
Definition: fft.cxx:186
an instance of a tensor within a CTF world
Definition: tensor.h:74
std::complex< double > omega(int i, int n)
Definition: fft.cxx:17
void factorize(int n, int *nfactor, int **factor)
computes the size of a tensor in packed symmetric layout
Definition: util.cxx:170
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
def np(self)
Definition: core.pyx:315