17 for (
int i=0; i<
dim; i++){
21 for (
int i=0; i<
dim; i++){
22 for (
int j=i+1; j<
dim; j++){
25 ptsr[str] += tsr[str];
26 ptsr[str] -= tsr[pstr];
27 if (ptsr.
norm2() > 1.E-6)
return false;
37 for (int64_t i=1; i<=n; i++){
43 int64_t
choose(int64_t n, int64_t k){
51 void chi(
char const * idx,
62 if (p_len+q_len > idx_len){
66 if (idx_len == 0 || (p_len == 0 && q_len == 0)){
68 char ** ip = (
char**)malloc(
sizeof(
char*));
69 char ** iq = (
char**)malloc(
sizeof(
char*));
75 np =
choose(idx_len, p_len);
76 np *=
choose(idx_len-p_len, q_len);
80 char ** ip = (
char**)malloc(
sizeof(
char*)*
np);
81 char ** iq = (
char**)malloc(
sizeof(
char*)*
np);
86 for (
int i=0; i<
np; i++){
87 ip[i] = (
char*)malloc(
sizeof(
char)*p_len);
88 iq[i] = (
char*)malloc(
sizeof(
char)*q_len);
95 chi(idx, idx_len-1, p_len-1, 0, &n1_len, &n1_ip, &qnull);
97 for (
int i=0; i<n1_len; i++){
98 memcpy(ip[i], n1_ip[i],
sizeof(
char)*(p_len-1));
99 ip[i][p_len-1] = idx[idx_len-1];
104 chi(idx, idx_len-1, p_len, 0, &n2_len, &n2_ip, &qnull);
105 assert(n2_len + n1_len == np);
107 for (
int i=0; i<n2_len; i++){
108 memcpy(ip[i+n1_len], n2_ip[i],
sizeof(
char)*p_len);
110 }
else if (p_len == 0){
114 chi(idx, idx_len-1, 0, q_len-1, &n1_len, &pnull, &n1_iq);
116 for (
int i=0; i<n1_len; i++){
117 memcpy(iq[i], n1_iq[i],
sizeof(
char)*(q_len-1));
118 iq[i][q_len-1] = idx[idx_len-1];
123 chi(idx, idx_len-1, 0, q_len, &n2_len, &pnull, &n2_iq);
124 assert(n2_len + n1_len == np);
126 for (
int i=0; i<n2_len; i++){
127 memcpy(iq[i+n1_len], n2_iq[i],
sizeof(
char)*q_len);
133 chi(idx, idx_len-1, p_len-1, q_len, &n1_len, &n1_ip, &n1_iq);
136 for (
int i=0; i<n1_len; i++){
137 memcpy(ip[i], n1_ip[i],
sizeof(
char)*(p_len-1));
138 ip[i][p_len-1] = idx[idx_len-1];
139 memcpy(iq[i], n1_iq[i],
sizeof(
char)*q_len);
145 chi(idx, idx_len-1, p_len, q_len-1, &n2_len, &n2_ip, &n2_iq);
147 for (
int i=0; i<n2_len; i++){
148 memcpy(ip[i+n1_len], n2_ip[i],
sizeof(
char)*p_len);
149 memcpy(iq[i+n1_len], n2_iq[i],
sizeof(
char)*(q_len-1));
150 iq[i+n1_len][q_len-1] = idx[idx_len-1];
156 chi(idx, idx_len-1, p_len, q_len, &n3_len, &n3_ip, &n3_iq);
158 for (
int i=0; i<n3_len; i++){
159 memcpy(ip[i+n1_len+n2_len], n3_ip[i],
sizeof(
char)*p_len);
160 memcpy(iq[i+n1_len+n2_len], n3_iq[i],
sizeof(
char)*q_len);
163 assert(n1_len+n2_len+n3_len==np);
180 void chi(
char const * idx,
187 chi(idx, idx_len, p_len, idx_len-p_len, npair, idx_p, &idx_q);
197 int64_t * indices, size;
200 MPI_Comm_rank(ctf.
comm, &rank);
214 for (i=0; i<s+v; i++){
220 idx_A[i] =
'a'+(s+t)+(i-s);
222 for (i=0; i<t+v; i++){
228 idx_B[i] =
'a'+(s+t)+(i-t);
230 for (i=0; i<s+t; i++){
236 Tensor<> A(s+v, len_A, sym_A, ctf,
"A", 1);
237 Tensor<> B(t+v, len_B, sym_B, ctf,
"B", 1);
238 Tensor<> C(s+t, len_C, sym_C, ctf,
"C", 1);
239 Tensor<> C_int(s+t, len_C, sym_C, ctf,
"C_psym", 1);
240 Tensor<> C_ans(s+t, len_C, sym_C, ctf,
"C_ans", 1);
245 for (i=0; i<size; i++){
246 values[i] = drand48();
248 vec.
write(size, indices, values);
252 for (i=0; i<s+v; i++){
253 A[idx_A] += vec[idx_A+i];
257 for (i=0; i<size; i++){
258 values[i] = drand48();
260 vec.
write(size, indices, values);
264 for (i=0; i<t+v; i++){
265 B[idx_B] += vec[idx_B+i];
269 C_int[idx_C] += A[idx_A]*B[idx_B];
275 chi(idx_C, s+t, s, t, &ncperms, &idx_As, &idx_Bt);
277 for (i=0; i<ncperms; i++){
279 memcpy(idx_C_int, idx_As[i],
sizeof(
char)*s);
280 memcpy(idx_C_int+s, idx_Bt[i],
sizeof(
char)*t);
281 C_ans[idx_C] += C_int[idx_C_int];
284 if (is_C_sym) printf(
"C_ans is symmetric\n");
285 else printf(
"C_ans is NOT symmetric!!\n");
290 for (i=0; i<s+v+t; i++){
296 Tensor<> Z_A_ops(s+v+t, len_Z, sym_Z, ctf,
"Z_A", 1);
297 Tensor<> Z_B_ops(s+v+t, len_Z, sym_Z, ctf,
"Z_B", 1);
298 Tensor<> Z_mults(s+v+t, len_Z, sym_Z, ctf,
"Z", 1);
303 chi(idx_Z, s+t+v, s+v, &nAperms, &idx_Asv);
305 for (i=0; i<nAperms; i++){
306 Z_A_ops[idx_Z] += A[idx_Asv[i]];
312 chi(idx_Z, s+t+v, t+v, &nBperms, &idx_Btv);
314 for (i=0; i<nBperms; i++){
315 Z_B_ops[idx_Z] += B[idx_Btv[i]];
319 Z_mults[idx_Z] = Z_A_ops[idx_Z]*Z_B_ops[idx_Z];
322 if (is_Z_sym) printf(
"Z_mults is symmetric\n");
323 else printf(
"Z_mults is NOT symmetric!!\n");
325 memcpy(idx_Z,idx_C,(s+t)*
sizeof(
char));
326 for (i=s+t; i<s+t+v; i++){
327 idx_Z[i] = idx_Z[s+t-1]+(i-s-t+1);
330 C[idx_C]+=Z_mults[idx_Z];
332 Tensor<> V(s+t, len_C, sym_C, ctf,
"V");
333 for (
int r=0; r<v; r++){
334 for (
int p=std::max(v-t-r,0); p<=v-r; p++){
335 for (
int q=std::max(v-s-r,0); q<=v-p-r; q++){
339 idx_kr[i] =
'a'+s+t+i;
343 idx_kp[i] =
'a'+s+t+r+i;
347 idx_kq[i] =
'a'+s+t+r+p+i;
350 Tensor<> V_A_ops(s+t+r, len_Z, sym_Z, ctf,
"V_A_ops");
352 memcpy(idx_VA,idx_C,(s+t)*
sizeof(
char));
353 memcpy(idx_VA+s+t,idx_kr,r*
sizeof(
char));
357 chi(idx_C, s+t, s+v-p-r, &nvAperms, &idx_VAsvpr);
358 for (i=0; i<nvAperms; i++){
360 memcpy(idx_VAA, idx_VAsvpr[i], (s+v-p-r)*
sizeof(
char));
361 memcpy(idx_VAA+s+v-p-r, idx_kr, r*
sizeof(
char));
362 memcpy(idx_VAA+s+v-p, idx_kp, p*
sizeof(
char));
363 V_A_ops[idx_VA] += A[idx_VAA];
366 Tensor<> V_B_ops(s+t+r, len_Z, sym_Z, ctf,
"V_B_ops");
368 memcpy(idx_VB,idx_C,(s+t)*
sizeof(
char));
369 memcpy(idx_VB+s+t,idx_kr,r*
sizeof(
char));
373 chi(idx_C, s+t, t+v-q-r, &nvBperms, &idx_VBtvqr);
374 for (i=0; i<nvBperms; i++){
376 memcpy(idx_VBB, idx_VBtvqr[i], (t+v-q-r)*
sizeof(
char));
377 memcpy(idx_VBB+t+v-q-r, idx_kr, r*
sizeof(
char));
378 memcpy(idx_VBB+t+v-q, idx_kq, q*
sizeof(
char));
385 V_B_ops[idx_VB] += B[idx_VBB];
388 V[idx_C] += prefact*V_A_ops[idx_VA]*V_B_ops[idx_VB];
392 Tensor<> W(s+t, len_C, sym_C, ctf,
"W");
393 for (
int r=1; r<=std::min(s,t); r++){
395 for (
int i=0; i<r; i++){
396 idx_kr[i] =
'a'+s+t+i;
399 for (
int i=0; i<v; i++){
400 idx_kv[i] =
'a'+s+t+r+i;
402 Tensor<> U(s+t-r, len_C, sym_C, ctf,
"U");
406 memcpy(idx_U, idx_kr,
sizeof(
char)*r);
407 memcpy(idx_UA, idx_kr,
sizeof(
char)*r);
408 memcpy(idx_UB, idx_kr,
sizeof(
char)*r);
409 memcpy(idx_UA+r, idx_kv,
sizeof(
char)*v);
410 memcpy(idx_UB+r, idx_kv,
sizeof(
char)*v);
412 char ** idxj, ** idxl;
413 chi(idx_C, s+t-2*r, s-r, t-r, &npermU, &idxj, &idxl);
414 memcpy(idx_U+r,idx_C,s+t-2*r);
415 for (
int i=0; i<npermU; i++){
416 memcpy(idx_UA+r+v,idxj[i],s-r);
417 memcpy(idx_UB+r+v,idxl[i],t-r);
418 U[idx_U] += A[idx_UA]*B[idx_UB];
421 char ** idxh, ** idxr;
422 chi(idx_C, s+t, r, s+t-2*r, &npermW, &idxr, &idxh);
423 for (
int i=0; i<npermW; i++){
424 memcpy(idx_U,idxr[i],r);
425 memcpy(idx_U+r,idxh[i],s+t-2*r);
426 W[idx_C] += U[idx_U];
430 C[idx_C] -= V[idx_C];
431 C[idx_C] -= W[idx_C];
433 C[idx_C] -= C_ans[idx_C];
435 double nrm = C.
norm2();
437 printf(
"error 2-norm is %.4E\n",nrm);
439 int pass = (nrm <=1.E-3);
442 if (pass) printf(
"{ fast symmetric tensor contraction algorithm test } passed\n");
443 else printf(
"{ fast symmetric tensor contraction algorithm test } failed\n");
452 char ** itr = std::find(begin, end, option);
453 if (itr != end && ++itr != end){
460 int main(
int argc,
char ** argv){
462 int const in_num = argc;
463 char ** input_str = argv;
465 MPI_Init(&argc, &argv);
466 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
467 MPI_Comm_size(MPI_COMM_WORLD, &np);
470 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
475 s = atoi(
getCmdOption(input_str, input_str+in_num,
"-s"));
480 t = atoi(
getCmdOption(input_str, input_str+in_num,
"-t"));
485 v = atoi(
getCmdOption(input_str, input_str+in_num,
"-v"));
490 World dw(MPI_COMM_WORLD, argc, argv);
492 printf(
"Contracting symmetric A of order %d with B of order %d into C of order %d, all with dimension %d\n",s+v,t+v,s+t,n);
int64_t choose(int64_t n, int64_t k)
int main(int argc, char **argv)
bool check_sym(Tensor<> &tsr)
int * sym
symmetries among tensor dimensions
Vector class which encapsulates a 1D tensor.
int64_t chchoose(int64_t n, int64_t k)
an instance of the CTF library (world) on a MPI communicator
int order
number of tensor dimensions
void read_local(int64_t *npair, int64_t **global_idx, dtype **data, bool unpack_sym=false) const
Using get_local_data(), which returns an array that must be freed with delete [], is more efficient...
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
CTF::World * wrld
distributed processor context on which tensor is defined
int * lens
unpadded tensor edge lengths
void chi(char const *idx, int idx_len, int p_len, int q_len, int *npair, char ***idx_p, char ***idx_q)
char * getCmdOption(char **begin, char **end, const std::string &option)
int fast_tensor_ctr(int n, int s, int t, int v, World &ctf)
an instance of a tensor within a CTF world
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
MPI_Comm comm
set of processors making up this world