13 int parity(
char const *
a,
char const *
b,
char const * c,
int len_A,
int len_B){
15 for (
int i=0; i<len_A; i++){
18 for (
int i=0; i<len_B; i++){
30 for (
int i=0; i<len_A+len_B; i++){
33 for (j=i+1; j<len_A+len_B; j++){
41 assert(j<len_A+len_B);
49 int parity(
char const *
a,
char const * c,
int len_A,
int len_C){
50 if (len_A == len_C)
return parity(a, NULL, c, len_A, 0);
54 for (
int i=0; i<len_C; i++){
56 for (j=0; j<len_A; j++){
57 if (c[i] == a[j])
break;
64 assert(ib == len_C-len_A);
65 return parity(a, b, c, len_A, len_C-len_A);
71 if (par%2 == 1)
return -1.0;
77 int64_t * indices, size;
83 for (int64_t i=0; i<size; i++){
84 values[i] = drand48();
86 tsr.
write(size, indices, values);
95 for (int64_t i=0; i<size; i++){
96 values[i] = drand48();
98 v.
write(size, indices, values);
103 for (
int i=0; i<
dim; i++){
107 for (
int i=0; i<
dim; i++){
108 for (
int j=0; j<dim-1; j++){
109 if (j>=i) str_n1[j] = str[j+1];
110 else str_n1[j] =str[j];
113 assert(
sign(
parity(str+i, str_n1, str, 1, dim-1)) == sgn);
114 tsr[str] += sgn*v[str+i]*tsr_n1[str_n1];
125 for (
int i=0; i<
dim; i++){
129 for (
int i=0; i<
dim; i++){
130 for (
int j=i+1; j<
dim; j++){
133 ptsr[str] += tsr[str];
134 ptsr[str] += tsr[pstr];
135 if (ptsr.
norm2() > 1.E-6)
return false;
148 for (
int i=0; i<
dim; i++){
152 for (
int i=0; i<
dim; i++){
153 for (
int j=i+1; j<
dim; j++){
156 ptsr[str] += tsr[str];
157 ptsr[str] -= tsr[pstr];
158 if (ptsr.
norm2() > 1.E-6)
return false;
168 for (int64_t i=1; i<=n; i++){
182 void chi(
char const * idx,
193 if (p_len+q_len > idx_len){
197 if (idx_len == 0 || (p_len == 0 && q_len == 0)){
199 char ** ip = (
char**)malloc(
sizeof(
char*));
200 char ** iq = (
char**)malloc(
sizeof(
char*));
206 np =
choose(idx_len, p_len);
207 np *=
choose(idx_len-p_len, q_len);
211 char ** ip = (
char**)malloc(
sizeof(
char*)*
np);
212 char ** iq = (
char**)malloc(
sizeof(
char*)*
np);
217 for (
int i=0; i<
np; i++){
218 ip[i] = (
char*)malloc(
sizeof(
char)*p_len);
219 iq[i] = (
char*)malloc(
sizeof(
char)*q_len);
226 chi(idx, idx_len-1, p_len-1, 0, &n1_len, &n1_ip, &qnull);
228 for (
int i=0; i<n1_len; i++){
229 memcpy(ip[i], n1_ip[i],
sizeof(
char)*(p_len-1));
230 ip[i][p_len-1] = idx[idx_len-1];
235 chi(idx, idx_len-1, p_len, 0, &n2_len, &n2_ip, &qnull);
236 assert(n2_len + n1_len == np);
238 for (
int i=0; i<n2_len; i++){
239 memcpy(ip[i+n1_len], n2_ip[i],
sizeof(
char)*p_len);
241 }
else if (p_len == 0){
245 chi(idx, idx_len-1, 0, q_len-1, &n1_len, &pnull, &n1_iq);
247 for (
int i=0; i<n1_len; i++){
248 memcpy(iq[i], n1_iq[i],
sizeof(
char)*(q_len-1));
249 iq[i][q_len-1] = idx[idx_len-1];
254 chi(idx, idx_len-1, 0, q_len, &n2_len, &pnull, &n2_iq);
255 assert(n2_len + n1_len == np);
257 for (
int i=0; i<n2_len; i++){
258 memcpy(iq[i+n1_len], n2_iq[i],
sizeof(
char)*q_len);
264 chi(idx, idx_len-1, p_len-1, q_len, &n1_len, &n1_ip, &n1_iq);
267 for (
int i=0; i<n1_len; i++){
268 memcpy(ip[i], n1_ip[i],
sizeof(
char)*(p_len-1));
269 ip[i][p_len-1] = idx[idx_len-1];
270 memcpy(iq[i], n1_iq[i],
sizeof(
char)*q_len);
276 chi(idx, idx_len-1, p_len, q_len-1, &n2_len, &n2_ip, &n2_iq);
278 for (
int i=0; i<n2_len; i++){
279 memcpy(ip[i+n1_len], n2_ip[i],
sizeof(
char)*p_len);
280 memcpy(iq[i+n1_len], n2_iq[i],
sizeof(
char)*(q_len-1));
281 iq[i+n1_len][q_len-1] = idx[idx_len-1];
287 chi(idx, idx_len-1, p_len, q_len, &n3_len, &n3_ip, &n3_iq);
289 for (
int i=0; i<n3_len; i++){
290 memcpy(ip[i+n1_len+n2_len], n3_ip[i],
sizeof(
char)*p_len);
291 memcpy(iq[i+n1_len+n2_len], n3_iq[i],
sizeof(
char)*q_len);
294 assert(n1_len+n2_len+n3_len==np);
311 void chi(
char const * idx,
318 chi(idx, idx_len, p_len, idx_len-p_len, npair, idx_p, &idx_q);
328 int64_t * indices, size;
331 MPI_Comm_rank(ctf.
comm, &rank);
346 for (i=0; i<s+v; i++){
352 idx_A[i] =
'a'+(s+t)+(i-s);
354 for (i=0; i<t+v; i++){
362 idx_B[i] =
'a'+(s+t)+i;
364 idx_B[i] =
'a'+s+(i-v);
366 for (i=0; i<s+t; i++){
372 Tensor<> A(s+v, len_A, sym_A, ctf,
"A", 1);
373 Tensor<> B(t+v, len_B, sym_B, ctf,
"B", 1);
374 Tensor<> C(s+t, len_C, sym_C, ctf,
"C", 1);
375 Tensor<> C_int(s+t, len_C, sym_C, ctf,
"C_psym", 1);
376 Tensor<> C_ans(s+t, len_C, sym_C, ctf,
"C_ans", 1);
399 for (i=0; i<size; i++){
400 values[i] = drand48();
402 vec.
write(size, indices, values);
406 for (i=0; i<t+v; i++){
407 B[idx_B] += vec[idx_B+i];
414 C_int[idx_C] += A[idx_A]*B[idx_B];
420 chi(idx_C, s+t, s, t, &ncperms, &idx_As, &idx_Bt);
422 for (i=0; i<ncperms; i++){
424 memcpy(idx_C_int, idx_As[i],
sizeof(
char)*s);
425 memcpy(idx_C_int+s, idx_Bt[i],
sizeof(
char)*t);
426 C_ans[idx_C] +=
sign(
parity(idx_As[i], idx_Bt[i], idx_C, s, t))*C_int[idx_C_int];
429 if (is_C_asym) printf(
"C_ans is antisymmetric\n");
430 else printf(
"C_ans is NOT antisymmetric!!\n");
435 for (i=0; i<s+v+t; i++){
441 Tensor<> Z_A_ops(s+v+t, len_Z, sym_Z, ctf,
"Z_A", 1);
442 Tensor<> Z_B_ops(s+v+t, len_Z, sym_Z, ctf,
"Z_B", 1);
443 Tensor<> Z_mults(s+v+t, len_Z, sym_Z, ctf,
"Z", 1);
448 chi(idx_Z, s+t+v, s+v, &nAperms, &idx_Asv);
450 for (i=0; i<nAperms; i++){
451 Z_A_ops[idx_Z] +=
sign(
parity(idx_Asv[i], idx_Z, s+v, s+t+v))*A[idx_Asv[i]];
454 if (is_A_asym) printf(
"Z_A_ops is antisymmetric\n");
455 else printf(
"Z_A_ops is NOT antisymmetric!!\n");
460 chi(idx_Z, s+t+v, t+v, &nBperms, &idx_Btv);
462 for (i=0; i<nBperms; i++){
463 Z_B_ops[idx_Z] += B[idx_Btv[i]];
466 if (is_B_sym) printf(
"Z_B_ops is symmetric\n");
467 else printf(
"Z_B_ops is NOT symmetric!!\n");
470 Z_mults[idx_Z] = Z_A_ops[idx_Z]*Z_B_ops[idx_Z];
473 if (is_Z_asym) printf(
"Z_mults is antisymmetric\n");
474 else printf(
"Z_mults is NOT antisymmetric!!\n");
476 memcpy(idx_Z,idx_C,(s+t)*
sizeof(
char));
477 for (i=s+t; i<s+t+v; i++){
478 idx_Z[i] = idx_Z[s+t-1]+(i-s-t+1);
481 C[idx_C]+=
sign(t*v)*Z_mults[idx_Z];
483 Tensor<> V(s+t, len_C, sym_C, ctf,
"V");
484 for (
int r=0; r<v; r++){
485 for (
int p=std::max(v-t-r,0); p<=v-r; p++){
486 for (
int q=std::max(v-s-r,0); q<=v-p-r; q++){
488 double sgn_V =
sign(p+1);
494 idx_kr[i] =
'a'+s+t+i;
498 idx_kp[i] =
'a'+s+t+r+i;
502 idx_kq[i] =
'a'+s+t+r+p+i;
505 Tensor<> V_A_ops(s+t+r, len_Z, sym_Z, ctf,
"V_A_ops");
507 memcpy(idx_VA,idx_C,(s+t)*
sizeof(
char));
508 memcpy(idx_VA+s+t,idx_kr,r*
sizeof(
char));
512 chi(idx_C, s+t, s+v-p-r, &nvAperms, &idx_VAsvpr);
513 for (i=0; i<nvAperms; i++){
515 memcpy(idx_VAA, idx_VAsvpr[i], (s+v-p-r)*
sizeof(
char));
516 memcpy(idx_VAA+s+v-p-r, idx_kr, r*
sizeof(
char));
517 memcpy(idx_VAA+s+v-p, idx_kp, p*
sizeof(
char));
518 double sgn_VA =
sign(
parity(idx_VAsvpr[i], idx_C, s+v-p-r, s+t));
519 V_A_ops[idx_VA] += sgn_VA*A[idx_VAA];
522 Tensor<> V_B_ops(s+t+r, len_Z, sym_Z, ctf,
"V_B_ops");
524 memcpy(idx_VB,idx_C,(s+t)*
sizeof(
char));
525 memcpy(idx_VB+s+t,idx_kr,r*
sizeof(
char));
530 chi(idx_C, s+t, t+v-q-r, &nvBperms, &idx_VBtvqr);
531 for (i=0; i<nvBperms; i++){
533 memcpy(idx_VBB, idx_VBtvqr[i], (t+v-q-r)*
sizeof(
char));
534 memcpy(idx_VBB+t+v-q-r, idx_kr, r*
sizeof(
char));
535 memcpy(idx_VBB+t+v-q, idx_kq, q*
sizeof(
char));
542 V_B_ops[idx_VB] += B[idx_VBB];
545 V[idx_C] += prefact*V_A_ops[idx_VA]*V_B_ops[idx_VB];
549 Tensor<> W(s+t, len_C, sym_C, ctf,
"W");
550 for (
int r=1; r<=std::min(s,t); r++){
552 for (
int i=0; i<r; i++){
553 idx_kr[i] =
'a'+s+t+i;
556 for (
int i=0; i<v; i++){
557 idx_kv[i] =
'a'+s+t+r+i;
559 Tensor<> U(s+t-r, len_C, sym_C, ctf,
"U");
563 memcpy(idx_U, idx_kr,
sizeof(
char)*r);
564 memcpy(idx_UA, idx_kr,
sizeof(
char)*r);
566 memcpy(idx_UB+t+v-r, idx_kr,
sizeof(
char)*r);
568 char ** idxj, ** idxl;
569 chi(idx_C, s+t-2*r, s-r, t-r, &npermU, &idxj, &idxl);
570 memcpy(idx_U+r,idx_C,s+t-2*r);
571 for (
int i=0; i<npermU; i++){
572 memcpy(idx_UA+r,idxj[i],s-r);
574 memcpy(idx_UB+v,idxl[i],t-r);
575 memcpy(idx_UA+s, idx_kv,
sizeof(
char)*v);
577 memcpy(idx_UB, idx_kv,
sizeof(
char)*v);
579 U[idx_U] += A[idx_UA]*B[idx_UB];
583 chi(idx_C, s+t, s+t-r, &npermW1,&idxh1);
584 for (
int j=0; j<npermW1; j++){
585 double sgn1 =
sign(
parity(idxh1[j], idx_C, s+t-r, s+t));
587 char ** idxh, ** idxr;
588 chi(idxh1[j], s+t-r, r, s+t-2*r, &npermW, &idxr, &idxh);
589 printf(
"npermW = %d\n",npermW);
590 for (
int i=0; i<npermW; i++){
591 memcpy(idx_U,idxr[i],r);
592 memcpy(idx_U+r,idxh[i],s+t-2*r);
593 W[idx_C] += sgn1*
sign(
parity(idxr[i], idxh[i],idxh1[j], r,s+t-2*r))*U[idx_U];
601 C[idx_C] -= V[idx_C];
602 C[idx_C] -= W[idx_C];
604 C[idx_C] -= C_ans[idx_C];
606 double nrm = C.
norm2();
608 printf(
"error 2-norm is %.4E\n",nrm);
610 int pass = (nrm <=1.E-3);
613 if (pass) printf(
"{ fast symmetric tensor contraction algorithm test } passed\n");
614 else printf(
"{ fast symmetric tensor contraction algorithm test } failed\n");
623 char ** itr = std::find(begin, end, option);
624 if (itr != end && ++itr != end){
631 int main(
int argc,
char ** argv){
633 int const in_num = argc;
634 char ** input_str = argv;
636 MPI_Init(&argc, &argv);
637 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
638 MPI_Comm_size(MPI_COMM_WORLD, &np);
641 n = atoi(
getCmdOption(input_str, input_str+in_num,
"-n"));
646 s = atoi(
getCmdOption(input_str, input_str+in_num,
"-s"));
651 t = atoi(
getCmdOption(input_str, input_str+in_num,
"-t"));
656 v = atoi(
getCmdOption(input_str, input_str+in_num,
"-v"));
661 World dw(MPI_COMM_WORLD, argc, argv);
663 printf(
"Contracting symmetric A of order %d with B of order %d into C of order %d, all with dimension %d\n",s+v,t+v,s+t,n);
int * sym
symmetries among tensor dimensions
int64_t chchoose(int64_t n, int64_t k)
Vector class which encapsulates a 1D tensor.
an instance of the CTF library (world) on a MPI communicator
bool check_asym(Tensor<> &tsr)
int order
number of tensor dimensions
void read_local(int64_t *npair, int64_t **global_idx, dtype **data, bool unpack_sym=false) const
Using get_local_data(), which returns an array that must be freed with delete [], is more efficient...
dtype norm2()
computes the frobenius norm of the tensor (needs sqrt()!)
CTF::World * wrld
distributed processor context on which tensor is defined
int * lens
unpadded tensor edge lengths
bool check_sym(Tensor<> tsr)
void chi(char const *idx, int idx_len, int p_len, int q_len, int *npair, char ***idx_p, char ***idx_q)
int fast_tensor_ctr(int n, int s, int t, int v, World &ctf)
void get_rand_as_tsr(Tensor<> &tsr, int seed)
int parity(char const *a, char const *b, char const *c, int len_A, int len_B)
int main(int argc, char **argv)
char * getCmdOption(char **begin, char **end, const std::string &option)
int64_t choose(int64_t n, int64_t k)
an instance of a tensor within a CTF world
void write(int64_t npair, int64_t const *global_idx, dtype const *data)
writes in values associated with any set of indices The sparse data is defined in coordinate format...
MPI_Comm comm
set of processors making up this world