Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
algebraic_multigrid.cxx
Go to the documentation of this file.
1 
7 #include <ctf.hpp>
8 using namespace CTF;
9 //#define ERR_REPORT
10 
11 typedef float REAL;
12 
14  Timer jacobi("jacobi");
15  Timer jacobi_spmv("jacobi_spmv");
16 
17  jacobi.start();
18  Vector<REAL> d(x.len, *x.wrld);
19  d["i"] = A["ii"];
20  Transform<REAL>([](REAL & d){ d= fabs(d) > 0.0 ? 1./d : 0.0; })(d["i"]);
21  Matrix<REAL> R(A);
22  R["ii"] = 0.0;
23  Vector<REAL> x1(x.len, *x.wrld);
24 
25 /*
26  int64_t N = A.nrow;
27  Vector<REAL> Red(N, *A.wrld);
28  Vector<REAL> Blk(N, *A.wrld);
29  int64_t Np = N / A.wrld->np;
30  int64_t sNp = Np * A.wrld->rank;
31  if (A.wrld->rank < N % A.wrld->np) Np++;
32  sNp += std::min(A.wrld->rank,(int)( N % A.wrld->np));
33  int64_t * inds_R = (int64_t*)malloc(sizeof(int64_t)*Np);
34  REAL * vals_R = (REAL*)malloc(sizeof(REAL)*Np);
35  int64_t * inds_B = (int64_t*)malloc(sizeof(int64_t)*Np);
36  REAL * vals_B = (REAL*)malloc(sizeof(REAL)*Np);
37  int nR = 0;
38  int nB = 0;
39  int64_t n=1;
40  while (n*n*n < N) n++;
41  assert(n*n*n==N);
42  for (int64_t i=0; i<Np; i++){
43  bool p = 1;
44  if (((i+sNp)/(n*n)) % 2 == 1) p = !p;
45  if ((((i+sNp)/n)%n) % 2 == 1) p = !p;
46  if (((i+sNp)%n) % 2 == 1) p = !p;
47  if (p){
48  inds_B[nB] = i+sNp;
49  vals_B[nB] = 1.;
50  nB++;
51  } else {
52  inds_R[nR] = i+sNp;
53  vals_R[nR] = 1.;
54  nR++;
55  }
56  }
57  Red.write(nR, inds_R, vals_R);
58  Blk.write(nB, inds_B, vals_B);*/
59 
60  double omega = .333;
61  //20 iterations of Jacobi, should probably be a parameter or some convergence check instead
62  for (int i=0; i<nsm; i++){
63 /* jacobi_spmv.start();
64  x1["i"] = -omega*A["ij"]*x["j"];
65  jacobi_spmv.stop();
66  x1["i"] *= d["i"];
67  x1["i"] += x["i"];
68  x1["i"] += omega*d["i"]*b["i"];
69  x["i"] *= Red["i"];
70  x["i"] += Blk["i"]*x1["i"];
71 
72  jacobi_spmv.start();
73  x1["i"] = -omega*A["ij"]*x["j"];
74  jacobi_spmv.stop();
75  x1["i"] *= d["i"];
76  x1["i"] += x["i"];
77  x1["i"] += omega*d["i"]*b["i"];
78  x["i"] *= Blk["i"];
79  x["i"] += Red["i"]*x1["i"];*/
80 
81 // x["i"] *= .333;
82  jacobi_spmv.start();
83  x1["i"] = -1.*R["ij"]*x["j"];
84  jacobi_spmv.stop();
85  x1["i"] += b["i"];
86  x1["i"] *= d["i"];
87  x["i"] *= (1.-omega);
88  x["i"] += omega*x1["i"];
89  //x["i"] = x1["i"];
90 #ifdef ERR_REPORT
91  Vector<REAL> r(b);
92  r["i"] -= A["ij"]*x["j"];
93  r.print();
94  REAL rnorm = r.norm2();
95  if (A.wrld->rank == 0) printf("r norm is %E\n",rnorm);
96 #endif
97  }
98  jacobi.stop();
99 }
100 
101 void vcycle(Matrix<REAL> & A, Vector<REAL> & x, Vector<REAL> & b, Matrix<REAL> * P, Matrix<REAL> * PTAP, int64_t N, int nlevel, int * nsm){
102  //do smoothing using Jacobi
103  char tlvl_name[] = {'l','v','l',(char)('0'+nlevel),'\0'};
104  Timer tlvl(tlvl_name);
105  tlvl.start();
106  Vector<REAL> r(N,*A.wrld,"r");
107 #ifdef ERR_REPORT
108  r["i"] -= A["ij"]*x["j"];
109  r["i"] += b["i"];
110  REAL rnorm0 = r.norm2();
111 #endif
112 #ifdef ERR_REPORT
113  if (A.wrld->rank == 0) printf("At level %d residual norm was %1.2E initially\n",nlevel,rnorm0);
114 #endif
115  if (N==1){
116  x["i"] = Function<REAL>([](REAL a, REAL b){ return b/a; })(A["ij"],b["j"]);
117  } else {
118  smooth_jacobi(A,x,b,nsm[0]);
119  }
120  r["i"] = b["i"];
121  r["i"] -= A["ij"]*x["j"];
122 #ifdef ERR_REPORT
123  REAL rnorm = r.norm2();
124 #endif
125  if (nlevel == 0){
126 #ifdef ERR_REPORT
127  if (A.wrld->rank == 0) printf("At level %d (coarsest level), residual norm was %1.2E initially\n",nlevel,rnorm0);
128  if (A.wrld->rank == 0) printf("At level %d (coarsest level), residual norm was %1.2E after smooth\n",nlevel,rnorm);
129 #endif
130  return;
131  }
132  int64_t m = P[0].lens[1];
133 
134  //smooth the restriction/interpolation operator P = (I-omega*diag(A)^{-1}*A)T
135  Timer rstr("restriction");
136  rstr.start();
137 
138  //restrict residual vector
139  Vector<REAL> PTr(m, *x.wrld);
140  PTr["i"] += P[0]["ji"]*r["j"];
141 
142  //coarses initial guess should be zeros
143  Vector<REAL> zx(m, *b.wrld);
144  rstr.stop();
145  tlvl.stop();
146  //recurse into coarser level
147  vcycle(PTAP[0], zx, PTr, P+1, PTAP+1, m, nlevel-1, nsm+1);
148  tlvl.start();
149 
150  //interpolate solution to residual equation at coraser level back
151  x["i"] += P[0]["ij"]*zx["j"];
152 
153 #ifdef ERR_REPORT
154  r["i"] = b["i"];
155  r["i"] -= A["ij"]*x["j"];
156  REAL rnorm2 = r.norm2();
157 #endif
158  //smooth new solution
159  smooth_jacobi(A,x,b,nsm[0]);
160  tlvl.stop();
161 #ifdef ERR_REPORT
162  r["i"] = b["i"];
163  r["i"] -= A["ij"]*x["j"];
164  REAL rnorm3 = r.norm2();
165  if (A.wrld->rank == 0) printf("At level %d, residual norm was %1.2E initially\n",nlevel,rnorm0);
166  if (x.wrld->rank == 0) printf("At level %d, n=%ld residual norm was %1.2E after initial smooth\n",nlevel,N,rnorm);
167  if (A.wrld->rank == 0) printf("At level %d, residual norm was %1.2E after coarse recursion\n",nlevel,rnorm2);
168  if (A.wrld->rank == 0) printf("At level %d, residual norm was %1.2E after final smooth\n",nlevel,rnorm3);
169 #endif
170 }
171 
172 
173 void setup(Matrix<REAL> & A, Matrix<REAL> * T, int N, int nlevel, Matrix<REAL> * P, Matrix<REAL> * PTAP){
174  if (nlevel == 0) return;
175 
176  char slvl_name[] = {'s','l','v','l',(char)('0'+nlevel),'\0'};
177  Timer slvl(slvl_name);
178  slvl.start();
179  int64_t m = T[0].lens[1];
180  P[0] = Matrix<REAL>(N, m, SP, *T[0].wrld);
181  Matrix<REAL> D(N,N,SP,*A.wrld);
182  D["ii"] = A["ii"];
183  REAL omega=.333;
184  Transform<REAL>([=](REAL & d){ d= omega/d; })(D["ii"]);
185  Timer trip("triple_matrix_product_to_form_T");
186  trip.start();
187  Matrix<REAL> F(P[0]);
188  F["ik"] = A["ij"]*T[0]["jk"];
189  P[0]["ij"] = T[0]["ij"];
190  P[0]["ik"] -= D["il"]*F["lk"];
191  trip.stop();
192 
193  int atr = 0;
194  if (A.is_sparse){
195  atr = atr | SP;
196  }
197  Matrix<REAL> AP(N, m, atr, *A.wrld);
198  PTAP[0] = Matrix<REAL>(m, m, atr, *A.wrld);
199 
200  Timer trip2("triple_matrix_product_to_form_PTAP");
201  trip2.start();
202  //restrict A via triple matrix product, should probably be done outside v-cycle
203  AP["lj"] = A["lk"]*P[0]["kj"];
204  PTAP[0]["ij"] = P[0]["li"]*AP["lj"];
205 
206  trip2.stop();
207  slvl.stop();
208  setup(PTAP[0], T+1, m, nlevel-1, P+1, PTAP+1);
209 }
210 
214 int test_alg_multigrid(int64_t N,
215  int nlvl,
216  int * nsm,
217  Matrix<REAL> & A,
218  Vector<REAL> & b,
219  Vector<REAL> & x_init,
220  Matrix<REAL> * P,
221  Matrix<REAL> * PTAP){
222 
223  Vector<REAL> x2(x_init);
224  Timer_epoch vc("vcycle");
225  vc.begin();
226  double st_time = MPI_Wtime();
227  vcycle(A, x_init, b, P, PTAP, N, nlvl, nsm);
228  double vtime = MPI_Wtime()-st_time;
229  vc.end();
230 
231  smooth_jacobi(A,x2,b,2*nsm[0]);
232  Vector<REAL> r2(x2);
233  r2["i"] = b["i"];
234  r2["i"] -= A["ij"]*x2["j"];
235  REAL rnorm_alt = r2.norm2();
236 
237  Vector<REAL> r(x_init);
238  r["i"] = b["i"];
239  r["i"] -= A["ij"]*x_init["j"];
240  REAL rnorm = r.norm2();
241 
242  bool pass = rnorm < rnorm_alt;
243 
244  if (A.wrld->rank == 0){
245 #ifndef TEST_SUITE
246  printf("Algebraic multigrid with n %ld nlvl %d took %lf seconds, fine-grid only err = %E, multigrid err = %E\n",N,nlvl,vtime,rnorm_alt,rnorm);
247 #endif
248  if (pass)
249  printf("{ algebraic multigrid method } passed \n");
250  else
251  printf("{ algebraic multigrid method } failed \n");
252  }
253  return pass;
254 
255 }
256 
257 void setup_unstructured(int64_t n,
258  int nlvl,
259  REAL sp_frac,
260  int ndiv,
261  int decay_exp,
262  Matrix<REAL> & A,
263  Matrix<REAL> *& P,
264  Matrix<REAL> *& PTAP,
265  World & dw){
266  int64_t n3 = n*n*n;
267  Timer tct("initialization");
268  tct.start();
269  A = Matrix<REAL>(n3, n3, SP, dw);
270  srand48(dw.rank*12);
271  A.fill_sp_random(0.0, 1.0, sp_frac);
272 
273  A["ij"] += A["ji"];
274  REAL pn = sqrt((REAL)n);
275  A["ii"] += pn;
276 
277  if (dw.rank == 0){
278  printf("Generated matrix with dimension %1.2E and %1.2E nonzeros\n", (REAL)n3, (REAL)A.nnz_tot);
279  fflush(stdout);
280  }
281 
282  Matrix<std::pair<REAL, int64_t>> B(n3,n3,SP,dw,Set<std::pair<REAL, int64_t>>());
283 
284  int64_t * inds;
285  REAL * vals;
286  std::pair<REAL,int64_t> * new_vals;
287  int64_t nvals;
288  A.get_local_data(&nvals, &inds, &vals, true);
289 
290  new_vals = (std::pair<REAL,int64_t>*)malloc(sizeof(std::pair<REAL,int64_t>)*nvals);
291 
292  for (int64_t i=0; i<nvals; i++){
293  new_vals[i] = std::pair<REAL,int64_t>(vals[i],abs((inds[i]%n3) - (inds[i]/n3)));
294  }
295 
296  B.write(nvals,inds,new_vals);
297  delete [] vals;
298  free(new_vals);
299  free(inds);
300 
301  Transform< std::pair<REAL,int64_t> >([=](std::pair<REAL,int64_t> & d){
302  int64_t x = d.second % n;
303  int64_t y = (d.second / n) % n;
304  int64_t z = d.second / n / n;
305  if (x+y+z > 0)
306  d.first = d.first/pow((REAL)(x+y+z),decay_exp/2.);
307  }
308  )(B["ij"]);
309 
310  A["ij"] = Function< std::pair<REAL,int64_t>, REAL >([](std::pair<REAL,int64_t> p){ return p.first; })(B["ij"]);
311 
312  Matrix<REAL> * T = new Matrix<REAL>[nlvl];
313  int64_t m=n3;
314  int tot_ndiv = ndiv*ndiv*ndiv;
315  for (int i=0; i<nlvl; i++){
316  int64_t m2 = m/tot_ndiv;
317  T[i] = Matrix<REAL>(m, m2, SP, dw);
318  int64_t mmy = m2/dw.np;
319  if (dw.rank < m2%dw.np) mmy++;
320  Pair<REAL> * pairs = (Pair<REAL>*)malloc(sizeof(Pair<REAL>)*mmy*tot_ndiv);
321  int64_t nel = 0;
322  for (int64_t j=dw.rank; j<m2; j+=dw.np){
323  for (int k=0; k<tot_ndiv; k++){
324  pairs[nel] = Pair<REAL>(j*m+j*tot_ndiv+k, 1.0);
325  nel++;
326  }
327  }
328  T[i].write(nel, pairs);
329  delete [] pairs;
330  m = m2;
331  }
332  tct.stop();
333 
334  P = new Matrix<REAL>[nlvl];
335  PTAP = new Matrix<REAL>[nlvl];
336 
337  Timer_epoch ve("setup");
338  ve.begin();
339  setup(A, T, n3, nlvl, P, PTAP);
340  ve.end();
341 }
342 
343 
344 void setup_3d_Poisson(int64_t n,
345  int nlvl,
346  int ndiv,
347  Matrix<REAL> & A,
348  Matrix<REAL> *& P,
349  Matrix<REAL> *& PTAP,
350  World & dw){
351 
352  Timer tct("initialization");
353  tct.start();
354  int n3 =n*n*n;
355  A = Matrix<REAL>(n3, n3, SP, dw);
356  A["ii"] = 3.;
357 
358  int64_t my_col = n3/dw.np;
359  int64_t my_col_st = dw.rank*my_col;
360  if (dw.rank < n%dw.np) my_col++;
361  my_col_st += std::min((int)dw.rank, n3%dw.np);
362  int64_t my_tot_nnz = my_col*3;
363  int64_t * inds = (int64_t*)malloc(sizeof(int64_t)*my_tot_nnz);
364  REAL * vals = (REAL*)malloc(sizeof(REAL)*my_tot_nnz);
365 
366  int64_t act_tot_nnz = 0;
367  for (int64_t col=my_col_st; col<my_col_st+my_col; col++){
368  if ((col+1)%n != 0){
369  inds[act_tot_nnz] = col*n3 + col+1;
370  vals[act_tot_nnz] = -1.;
371  act_tot_nnz++;
372  }
373  if (col+n < n3 && (col/n+1)%n != 0){
374  inds[act_tot_nnz] = col*n3 + col+n;
375  vals[act_tot_nnz] = -1.;
376  act_tot_nnz++;
377  }
378  if (col+n*n < n3 && (col/(n*n)+1)%n != 0){
379  inds[act_tot_nnz] = col*n3 + col+n*n;
380  vals[act_tot_nnz] = -1.;
381  act_tot_nnz++;
382  }
383  }
384  A.write(act_tot_nnz, inds, vals);
385  free(inds);
386  free(vals);
387 
388  A["ij"] += A["ji"];
389 
390  if (dw.rank == 0){
391  printf("Generated matrix with dimension %1.2E and %1.2E nonzeros\n", (REAL)n3, (REAL)A.nnz_tot);
392  fflush(stdout);
393  }
394 
395  Matrix<REAL> * T = new Matrix<REAL>[nlvl];
396  int64_t m=n3;
397  int64_t nn=n;
398  int tot_ndiv = ndiv*ndiv*ndiv;
399  for (int i=0; i<nlvl; i++){
400  int64_t m2 = m/tot_ndiv;
401  T[i] = Matrix<REAL>(m, m2, SP, dw);
402  int64_t mmy = m2/dw.np;
403  if (dw.rank < m2%dw.np) mmy++;
404  Pair<REAL> * pairs = (Pair<REAL>*)malloc(sizeof(Pair<REAL>)*mmy*tot_ndiv);
405  //Pair<REAL> * pairs = new Pair<REAL>[mmy*tot_ndiv];
406  int64_t nel = 0;
407  for (int64_t j=dw.rank; j<m2; j+=dw.np){
408  int64_t j1 = j/(nn*nn);
409  int64_t j2 = (j/nn)%nn;
410  int64_t j3 = j%nn;
411  for (int k1=0; k1<ndiv; k1++){
412  for (int k2=0; k2<ndiv; k2++){
413  for (int k3=0; k3<ndiv; k3++){
414  //printf("i=%d, m= %ld m2=%ld key = %ld\n",i,m,m2,j*m+(j1*ndiv+k1)*nn*nn+(j2*ndiv+k2)*nn+j3*ndiv+k3);
415  pairs[nel] = Pair<REAL>(j*m+(j1*ndiv+k1)*nn*nn+(j2*ndiv+k2)*nn+j3*ndiv+k3, 1.0/tot_ndiv);
416  nel++;
417  }
418  }
419  }
420  }
421  T[i].write(nel, pairs);
422  free(pairs);
423  m = m2;
424  nn = n/ndiv;
425  }
426 
427  tct.stop();
428 
429  P = new Matrix<REAL>[nlvl];
430  PTAP = new Matrix<REAL>[nlvl];
431  Timer_epoch ve("setup");
432  ve.begin();
433  setup(A, T, n3, nlvl, P, PTAP);
434  ve.end();
435 }
436 
437 
438 
439 #ifndef TEST_SUITE
440 char* getCmdOption(char ** begin,
441  char ** end,
442  const std::string & option){
443  char ** itr = std::find(begin, end, option);
444  if (itr != end && ++itr != end){
445  return *itr;
446  }
447  return 0;
448 }
449 
450 
451 int main(int argc, char ** argv){
452  int rank, np, pass, nlvl, ndiv, decay_exp, nsmooth, poi;
453  int * nsm;
454  int64_t n;
455  REAL sp_frac;
456  int const in_num = argc;
457  char ** input_str = argv;
458 
459  MPI_Init(&argc, &argv);
460  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
461  MPI_Comm_size(MPI_COMM_WORLD, &np);
462 
463  if (getCmdOption(input_str, input_str+in_num, "-n")){
464  n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
465  if (n < 0) n = 16;
466  } else n = 16;
467 
468  if (getCmdOption(input_str, input_str+in_num, "-nlvl")){
469  nlvl = atoi(getCmdOption(input_str, input_str+in_num, "-nlvl"));
470  if (nlvl < 0) nlvl = 3;
471  } else nlvl = 3;
472 
473  if (getCmdOption(input_str, input_str+in_num, "-ndiv")){
474  ndiv = atoi(getCmdOption(input_str, input_str+in_num, "-ndiv"));
475  if (ndiv < 0) ndiv = 2;
476  } else ndiv = 2;
477 
478  if (getCmdOption(input_str, input_str+in_num, "-nsmooth")){
479  nsmooth = atoi(getCmdOption(input_str, input_str+in_num, "-nsmooth"));
480  if (nsmooth < 0) nsmooth = 3;
481  } else nsmooth = 3;
482 
483  nsm = (int*)malloc(sizeof(int)*nlvl);
484  std::fill(nsm, nsm+nlvl, nsmooth);
485 
486  char str[] = {'-','n','s','m','0','\0'};
487  for (int i=0; i<nlvl; i++){
488  str[4] = '0'+i;
489  if (getCmdOption(input_str, input_str+in_num, str)){
490  int insm = atoi(getCmdOption(input_str, input_str+in_num, str));
491  if (insm > 0) nsm[i] = insm;
492  }
493  }
494 
495  if (getCmdOption(input_str, input_str+in_num, "-poi")){
496  poi = atoi(getCmdOption(input_str, input_str+in_num, "-poi"));
497  if (poi < 0) poi = 0;
498  } else poi = 1;
499 
500 
501  if (getCmdOption(input_str, input_str+in_num, "-decay_exp")){
502  decay_exp = atoi(getCmdOption(input_str, input_str+in_num, "-decay_exp"));
503  if (decay_exp < 0) decay_exp = 3;
504  } else decay_exp = 3;
505 
506  if (getCmdOption(input_str, input_str+in_num, "-sp_frac")){
507  sp_frac = atof(getCmdOption(input_str, input_str+in_num, "-sp_frac"));
508  if (sp_frac < 0) sp_frac = .01;
509  } else sp_frac = .01;
510 
511  nlvl--;
512  int64_t all_lvl_ndiv=1;
513  for (int i=0; i<nlvl; i++){ all_lvl_ndiv *= ndiv; }
514 
515  assert(n%all_lvl_ndiv == 0);
516 
517  {
518  World dw(argc, argv);
519 
520  if (rank == 0){
521  printf("Running algebraic smoothed multigrid method with %d levels with divisor %d in V-cycle, matrix dimension %ld, %d smooth iterations, decayed based on 3D indexing with decay exponent of %d\n",nlvl,ndiv,n,nsmooth, decay_exp);
522  printf("number of smoothing iterations per level is ");
523  for (int i=0; i<nlvl+1; i++){ printf("%d ",nsm[i]); }
524  printf("\n");
525  }
526  Matrix<REAL> A;
527  Matrix<REAL> * P;
528  Matrix<REAL> * PTAP;
529  Vector<REAL> b(n*n*n,dw,"b");
530  Vector<REAL> x(n*n*n,dw,"x");
531  if (poi){
532  setup_3d_Poisson(n, nlvl, ndiv, A, P, PTAP, dw);
533  int64_t * inds;
534  int64_t nloc;
535  REAL * vals;
536  b.get_local_data(&nloc, &inds, &vals);
537  int n1 = n+1;
538  REAL h = 1./(n1);
539  for (int64_t i=0; i<nloc; i++){
540  vals[i] = (1./(n1))*(1./(n1))*sin(h*M_PI*(1+(inds[i]/(n*n))))*sin(h*M_PI*(1+((inds[i]/n)%n)))*sin(h*M_PI*(1+(inds[i]%n)));
541  }
542  b.write(nloc,inds,vals);
543  for (int64_t i=0; i<nloc; i++){
544  vals[i] = (1./(3.*M_PI*M_PI))*sin(h*M_PI*(1+(inds[i]/(n*n))))*sin(h*M_PI*(1+((inds[i]/n)%n)))*sin(h*M_PI*(1+(inds[i]%n)));
545  }
546  Vector<REAL> x_t(n*n*n,dw,"x_t");
547  Vector<REAL> r(n*n*n,dw,"r");
548  x_t.write(nloc,inds,vals);
549  r["i"] = A["ij"]*x_t["j"];
550  r["i"] -= b["i"];
551  REAL tnorm = r.norm2();
552  if (dw.rank == 0) printf("Truncation error norm is %1.2E\n",tnorm);
553  x["i"] = x_t["i"];
554  Vector<REAL> rand(n*n*n,dw,"rand");
555  REAL tot = x["i"];
556  tot = tot/(n*n*n);
557  rand.fill_random(-tot*.1,tot*.1);
558  x["i"]+=rand["i"];
559  } else {
560  setup_unstructured(n, nlvl, sp_frac, ndiv, decay_exp, A, P, PTAP, dw);
561  b.fill_random(-1.E-1, 1.E-1);
562  }
563  pass = test_alg_multigrid(n*n*n, nlvl, nsm, A, b, x, P, PTAP);
564  // assert(pass);
565  }
566 
567  MPI_Finalize();
568  return 0;
569 }
575 #endif
Set class defined by a datatype and a min/max function (if it is partially ordered i...
Definition: set.h:280
void smooth_jacobi(Matrix< REAL > &A, Vector< REAL > &x, Vector< REAL > &b, int nsm)
Matrix class which encapsulates a 2D tensor.
Definition: matrix.h:18
int jacobi(int n, World &dw)
Definition: jacobi.cxx:19
def rank(self)
Definition: core.pyx:312
int test_alg_multigrid(int64_t N, int nlvl, int *nsm, Matrix< REAL > &A, Vector< REAL > &b, Vector< REAL > &x_init, Matrix< REAL > *P, Matrix< REAL > *PTAP)
computes Multigrid for a 3D regular discretization
void setup(Matrix< REAL > &A, Matrix< REAL > *T, int N, int nlevel, Matrix< REAL > *P, Matrix< REAL > *PTAP)
Definition: common.h:37
void stop()
Definition: int_timer.cxx:151
local process walltime measurement
Definition: timer.h:50
Vector class which encapsulates a 1D tensor.
Definition: vector.h:14
float REAL
void setup_3d_Poisson(int64_t n, int nlvl, int ndiv, Matrix< REAL > &A, Matrix< REAL > *&P, Matrix< REAL > *&PTAP, World &dw)
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
bool is_sparse
whether only the non-zero elements of the tensor are stored
string
Definition: core.pyx:456
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
Definition: tensor.h:811
index-value pair used for tensor data input
Definition: tensor.h:28
CTF::World * wrld
distributed processor context on which tensor is defined
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
Definition: tensor.cxx:928
int rank
rank of local processor
Definition: world.h:24
int main(int argc, char **argv)
int * lens
unpadded tensor edge lengths
char * getCmdOption(char **begin, char **end, const std::string &option)
int64_t nnz_tot
maximum number of local nonzero elements over all procs
void print(FILE *fp, dtype cutoff) const
prints tensor data to file using process 0 (modify print(...) overload in set.h if you would like a d...
Definition: tensor.cxx:407
epoch during which to measure timers
Definition: timer.h:69
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
Definition: tensor.cxx:969
def abs(initA)
Definition: core.pyx:5440
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
Definition: tensor.cxx:159
void setup_unstructured(int64_t n, int nlvl, REAL sp_frac, int ndiv, int decay_exp, Matrix< REAL > &A, Matrix< REAL > *&P, Matrix< REAL > *&PTAP, World &dw)
void start()
Definition: int_timer.cxx:141
Definition: apsp.cxx:17
std::complex< double > omega(int i, int n)
Definition: fft.cxx:17
void vcycle(Matrix< REAL > &A, Vector< REAL > &x, Vector< REAL > &b, Matrix< REAL > *P, Matrix< REAL > *PTAP, int64_t N, int nlevel, int *nsm)
int len
Definition: vector.h:16
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
Definition: tensor.cxx:264
int np
number of processors
Definition: world.h:26
def np(self)
Definition: core.pyx:315