15 Timer jacobi_spmv(
"jacobi_spmv");
62 for (
int i=0; i<nsm; i++){
83 x1[
"i"] = -1.*R[
"ij"]*x[
"j"];
88 x[
"i"] += omega*x1[
"i"];
92 r[
"i"] -= A[
"ij"]*x[
"j"];
95 if (A.
wrld->
rank == 0) printf(
"r norm is %E\n",rnorm);
103 char tlvl_name[] = {
'l',
'v',
'l',(char)(
'0'+nlevel),
'\0'};
104 Timer tlvl(tlvl_name);
108 r[
"i"] -= A[
"ij"]*x[
"j"];
113 if (A.
wrld->
rank == 0) printf(
"At level %d residual norm was %1.2E initially\n",nlevel,rnorm0);
121 r[
"i"] -= A[
"ij"]*x[
"j"];
127 if (A.
wrld->
rank == 0) printf(
"At level %d (coarsest level), residual norm was %1.2E initially\n",nlevel,rnorm0);
128 if (A.
wrld->
rank == 0) printf(
"At level %d (coarsest level), residual norm was %1.2E after smooth\n",nlevel,rnorm);
132 int64_t m = P[0].
lens[1];
135 Timer rstr(
"restriction");
140 PTr[
"i"] += P[0][
"ji"]*r[
"j"];
147 vcycle(PTAP[0], zx, PTr, P+1, PTAP+1, m, nlevel-1, nsm+1);
151 x[
"i"] += P[0][
"ij"]*zx[
"j"];
155 r[
"i"] -= A[
"ij"]*x[
"j"];
163 r[
"i"] -= A[
"ij"]*x[
"j"];
165 if (A.
wrld->
rank == 0) printf(
"At level %d, residual norm was %1.2E initially\n",nlevel,rnorm0);
166 if (x.wrld->rank == 0) printf(
"At level %d, n=%ld residual norm was %1.2E after initial smooth\n",nlevel,N,rnorm);
167 if (A.
wrld->
rank == 0) printf(
"At level %d, residual norm was %1.2E after coarse recursion\n",nlevel,rnorm2);
168 if (A.
wrld->
rank == 0) printf(
"At level %d, residual norm was %1.2E after final smooth\n",nlevel,rnorm3);
174 if (nlevel == 0)
return;
176 char slvl_name[] = {
's',
'l',
'v',
'l',(char)(
'0'+nlevel),
'\0'};
177 Timer slvl(slvl_name);
179 int64_t m = T[0].
lens[1];
185 Timer trip(
"triple_matrix_product_to_form_T");
188 F[
"ik"] = A[
"ij"]*T[0][
"jk"];
189 P[0][
"ij"] = T[0][
"ij"];
190 P[0][
"ik"] -= D[
"il"]*F[
"lk"];
200 Timer trip2(
"triple_matrix_product_to_form_PTAP");
203 AP[
"lj"] = A[
"lk"]*P[0][
"kj"];
204 PTAP[0][
"ij"] = P[0][
"li"]*AP[
"lj"];
208 setup(PTAP[0], T+1, m, nlevel-1, P+1, PTAP+1);
226 double st_time = MPI_Wtime();
227 vcycle(A, x_init, b, P, PTAP, N, nlvl, nsm);
228 double vtime = MPI_Wtime()-st_time;
234 r2[
"i"] -= A[
"ij"]*x2[
"j"];
239 r[
"i"] -= A[
"ij"]*x_init[
"j"];
242 bool pass = rnorm < rnorm_alt;
246 printf(
"Algebraic multigrid with n %ld nlvl %d took %lf seconds, fine-grid only err = %E, multigrid err = %E\n",N,nlvl,vtime,rnorm_alt,rnorm);
249 printf(
"{ algebraic multigrid method } passed \n");
251 printf(
"{ algebraic multigrid method } failed \n");
267 Timer tct(
"initialization");
278 printf(
"Generated matrix with dimension %1.2E and %1.2E nonzeros\n", (
REAL)n3, (
REAL)A.
nnz_tot);
286 std::pair<REAL,int64_t> * new_vals;
290 new_vals = (std::pair<REAL,int64_t>*)malloc(
sizeof(std::pair<REAL,int64_t>)*nvals);
292 for (int64_t i=0; i<nvals; i++){
293 new_vals[i] = std::pair<REAL,int64_t>(vals[i],
abs((inds[i]%n3) - (inds[i]/n3)));
296 B.
write(nvals,inds,new_vals);
302 int64_t x = d.second % n;
303 int64_t y = (d.second / n) % n;
304 int64_t z = d.second / n / n;
306 d.first = d.first/pow((
REAL)(x+y+z),decay_exp/2.);
314 int tot_ndiv = ndiv*ndiv*ndiv;
315 for (
int i=0; i<nlvl; i++){
316 int64_t m2 = m/tot_ndiv;
318 int64_t mmy = m2/dw.
np;
319 if (dw.
rank < m2%dw.
np) mmy++;
322 for (int64_t j=dw.
rank; j<m2; j+=dw.
np){
323 for (
int k=0; k<tot_ndiv; k++){
324 pairs[nel] =
Pair<REAL>(j*m+j*tot_ndiv+k, 1.0);
328 T[i].
write(nel, pairs);
339 setup(A, T, n3, nlvl, P, PTAP);
352 Timer tct(
"initialization");
358 int64_t my_col = n3/dw.
np;
359 int64_t my_col_st = dw.
rank*my_col;
360 if (dw.
rank < n%dw.
np) my_col++;
361 my_col_st += std::min((
int)dw.
rank, n3%dw.
np);
362 int64_t my_tot_nnz = my_col*3;
363 int64_t * inds = (int64_t*)malloc(
sizeof(int64_t)*my_tot_nnz);
366 int64_t act_tot_nnz = 0;
367 for (int64_t col=my_col_st; col<my_col_st+my_col; col++){
369 inds[act_tot_nnz] = col*n3 + col+1;
370 vals[act_tot_nnz] = -1.;
373 if (col+n < n3 && (col/n+1)%n != 0){
374 inds[act_tot_nnz] = col*n3 + col+n;
375 vals[act_tot_nnz] = -1.;
378 if (col+n*n < n3 && (col/(n*n)+1)%n != 0){
379 inds[act_tot_nnz] = col*n3 + col+n*n;
380 vals[act_tot_nnz] = -1.;
384 A.
write(act_tot_nnz, inds, vals);
391 printf(
"Generated matrix with dimension %1.2E and %1.2E nonzeros\n", (
REAL)n3, (
REAL)A.
nnz_tot);
398 int tot_ndiv = ndiv*ndiv*ndiv;
399 for (
int i=0; i<nlvl; i++){
400 int64_t m2 = m/tot_ndiv;
402 int64_t mmy = m2/dw.
np;
403 if (dw.
rank < m2%dw.
np) mmy++;
407 for (int64_t j=dw.
rank; j<m2; j+=dw.
np){
408 int64_t j1 = j/(nn*nn);
409 int64_t j2 = (j/nn)%nn;
411 for (
int k1=0; k1<ndiv; k1++){
412 for (
int k2=0; k2<ndiv; k2++){
413 for (
int k3=0; k3<ndiv; k3++){
415 pairs[nel] =
Pair<REAL>(j*m+(j1*ndiv+k1)*nn*nn+(j2*ndiv+k2)*nn+j3*ndiv+k3, 1.0/tot_ndiv);
421 T[i].
write(nel, pairs);
433 setup(A, T, n3, nlvl, P, PTAP);
443 char ** itr = std::find(begin, end, option);
444 if (itr != end && ++itr != end){
451 int main(
int argc,
char ** argv){
452 int rank,
np, pass, nlvl, ndiv, decay_exp, nsmooth, poi;
456 int const in_num = argc;
457 char ** input_str = argv;
459 MPI_Init(&argc, &argv);
460 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
461 MPI_Comm_size(MPI_COMM_WORLD, &np);
464 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
468 if (
getCmdOption(input_str, input_str+in_num,
"-nlvl")){
469 nlvl = atoi(
getCmdOption(input_str, input_str+in_num,
"-nlvl"));
470 if (nlvl < 0) nlvl = 3;
473 if (
getCmdOption(input_str, input_str+in_num,
"-ndiv")){
474 ndiv = atoi(
getCmdOption(input_str, input_str+in_num,
"-ndiv"));
475 if (ndiv < 0) ndiv = 2;
478 if (
getCmdOption(input_str, input_str+in_num,
"-nsmooth")){
479 nsmooth = atoi(
getCmdOption(input_str, input_str+in_num,
"-nsmooth"));
480 if (nsmooth < 0) nsmooth = 3;
483 nsm = (
int*)malloc(
sizeof(
int)*nlvl);
484 std::fill(nsm, nsm+nlvl, nsmooth);
486 char str[] = {
'-',
'n',
's',
'm',
'0',
'\0'};
487 for (
int i=0; i<nlvl; i++){
490 int insm = atoi(
getCmdOption(input_str, input_str+in_num, str));
491 if (insm > 0) nsm[i] = insm;
496 poi = atoi(
getCmdOption(input_str, input_str+in_num,
"-poi"));
497 if (poi < 0) poi = 0;
501 if (
getCmdOption(input_str, input_str+in_num,
"-decay_exp")){
502 decay_exp = atoi(
getCmdOption(input_str, input_str+in_num,
"-decay_exp"));
503 if (decay_exp < 0) decay_exp = 3;
504 }
else decay_exp = 3;
506 if (
getCmdOption(input_str, input_str+in_num,
"-sp_frac")){
507 sp_frac = atof(
getCmdOption(input_str, input_str+in_num,
"-sp_frac"));
508 if (sp_frac < 0) sp_frac = .01;
509 }
else sp_frac = .01;
512 int64_t all_lvl_ndiv=1;
513 for (
int i=0; i<nlvl; i++){ all_lvl_ndiv *= ndiv; }
515 assert(n%all_lvl_ndiv == 0);
518 World dw(argc, argv);
521 printf(
"Running algebraic smoothed multigrid method with %d levels with divisor %d in V-cycle, matrix dimension %ld, %d smooth iterations, decayed based on 3D indexing with decay exponent of %d\n",nlvl,ndiv,n,nsmooth, decay_exp);
522 printf(
"number of smoothing iterations per level is ");
523 for (
int i=0; i<nlvl+1; i++){ printf(
"%d ",nsm[i]); }
539 for (int64_t i=0; i<nloc; i++){
540 vals[i] = (1./(n1))*(1./(n1))*sin(h*M_PI*(1+(inds[i]/(n*n))))*sin(h*M_PI*(1+((inds[i]/n)%n)))*sin(h*M_PI*(1+(inds[i]%n)));
542 b.
write(nloc,inds,vals);
543 for (int64_t i=0; i<nloc; i++){
544 vals[i] = (1./(3.*M_PI*M_PI))*sin(h*M_PI*(1+(inds[i]/(n*n))))*sin(h*M_PI*(1+((inds[i]/n)%n)))*sin(h*M_PI*(1+(inds[i]%n)));
548 x_t.
write(nloc,inds,vals);
549 r[
"i"] = A[
"ij"]*x_t[
"j"];
552 if (dw.
rank == 0) printf(
"Truncation error norm is %1.2E\n",tnorm);
Set class defined by a datatype and a min/max function (if it is partially ordered i...
void smooth_jacobi(Matrix< REAL > &A, Vector< REAL > &x, Vector< REAL > &b, int nsm)
Matrix class which encapsulates a 2D tensor.
int jacobi(int n, World &dw)
int test_alg_multigrid(int64_t N, int nlvl, int *nsm, Matrix< REAL > &A, Vector< REAL > &b, Vector< REAL > &x_init, Matrix< REAL > *P, Matrix< REAL > *PTAP)
computes Multigrid for a 3D regular discretization
void setup(Matrix< REAL > &A, Matrix< REAL > *T, int N, int nlevel, Matrix< REAL > *P, Matrix< REAL > *PTAP)
local process walltime measurement
Vector class which encapsulates a 1D tensor.
void setup_3d_Poisson(int64_t n, int nlvl, int ndiv, Matrix< REAL > &A, Matrix< REAL > *&P, Matrix< REAL > *&PTAP, World &dw)
an instance of the CTF library (world) on a MPI communicator
bool is_sparse
whether only the non-zero elements of the tensor are stored
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
index-value pair used for tensor data input
CTF::World * wrld
distributed processor context on which tensor is defined
void fill_random(dtype rmin, dtype rmax)
fills local unique tensor elements to random values in the range [min,max] works only for dtype in {f...
int rank
rank of local processor
int main(int argc, char **argv)
int * lens
unpadded tensor edge lengths
char * getCmdOption(char **begin, char **end, const std::string &option)
int64_t nnz_tot
maximum number of local nonzero elements over all procs
void print(FILE *fp, dtype cutoff) const
prints tensor data to file using process 0 (modify print(...) overload in set.h if you would like a d...
epoch during which to measure timers
void fill_sp_random(dtype rmin, dtype rmax, double frac_sp)
generate roughly frac_sp*dense_tensor_size nonzeros between rmin and rmax, works only for dtype in {f...
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
void setup_unstructured(int64_t n, int nlvl, REAL sp_frac, int ndiv, int decay_exp, Matrix< REAL > &A, Matrix< REAL > *&P, Matrix< REAL > *&PTAP, World &dw)
std::complex< double > omega(int i, int n)
void vcycle(Matrix< REAL > &A, Vector< REAL > &x, Vector< REAL > &b, Matrix< REAL > *P, Matrix< REAL > *PTAP, int64_t N, int nlevel, int *nsm)
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
int np
number of processors