29 MPI_Comm pcomm = dw.
comm;
30 MPI_Comm_rank(pcomm, &rank);
31 MPI_Comm_size(pcomm, &num_pes);
34 if (div > num_pes) div = num_pes;
39 for (i=0; i<
np; i++ ) pairs[i] = drand48()-.5;
40 A.
write(np, indices, pairs);
44 for (i=0; i<
np; i++ ) pairs[i] = drand48()-.5;
45 B.
write(np, indices, pairs);
50 int cnum_pes = num_pes / div;
51 int color = rank/cnum_pes;
52 int crank = rank%cnum_pes;
55 MPI_Comm_split(pcomm, color, crank, &ccomm);
58 C_ans[
"ij"] = ((double)div)*A[
"ik"]*B[
"kj"];
64 for (
int c=0; c<num_pes/cnum_pes; c++){
67 B.add_to_subworld(&subB,1.0,0.0);
70 B.add_to_subworld(NULL,1.0,0.0);
74 if (rank < cnum_pes*div)
75 subC[
"ij"] = subA[
"ik"]*subB[
"kj"];
78 for (
int c=0; c<num_pes/cnum_pes; c++){
86 C_ans[
"ij"] -= C[
"ij"];
92 printf(
"{ GEMM on subworlds } passed\n");
94 printf(
"{ GEMM on subworlds } FAILED, error norm = %E\n",err);
96 MPI_Comm_free(&ccomm);
105 char ** itr = std::find(begin, end, option);
106 if (itr != end && ++itr != end){
112 int main(
int argc,
char ** argv){
113 int rank,
np, n, m, k, div;
114 int const in_num = argc;
115 char ** input_str = argv;
117 MPI_Init(&argc, &argv);
118 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
119 MPI_Comm_size(MPI_COMM_WORLD, &np);
122 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
126 m = atoi(
getCmdOption(input_str, input_str+in_num,
"-m"));
130 k = atoi(
getCmdOption(input_str, input_str+in_num,
"-k"));
134 div = atoi(
getCmdOption(input_str, input_str+in_num,
"-div"));
135 if (div < 0) div = 2;
139 World dw(MPI_COMM_WORLD, argc, argv);
142 printf(
"Non-symmetric: NS = NS*NS test_subworld_gemm:\n");
Matrix class which encapsulates a 2D tensor.
void add_to_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor to a tensor object defined on a different world
an instance of the CTF library (world) on a MPI communicator
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
int test_subworld_gemm(int n, int m, int k, int div_, World &dw)
char * getCmdOption(char **begin, char **end, const std::string &option)
int main(int argc, char **argv)
void add_from_subworld(Tensor< dtype > *tsr, dtype alpha, dtype beta)
accumulates this tensor from a tensor object defined on a different world
void get_local_data(int64_t *npair, int64_t **global_idx, dtype **data, bool nonzeros_only=false, bool unpack_sym=false) const
Gives the global indices and values associated with the local data.
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
MPI_Comm comm
set of processors making up this world