=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2018/engine/nd.c,v retrieving revision 1.1 retrieving revision 1.5 diff -u -p -r1.1 -r1.5 --- OpenXM_contrib2/asir2018/engine/nd.c 2018/09/19 05:45:07 1.1 +++ OpenXM_contrib2/asir2018/engine/nd.c 2018/09/27 02:39:37 1.5 @@ -1,4 +1,4 @@ -/* $OpenXM$ */ +/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.4 2018/09/25 07:36:01 noro Exp $ */ #include "nd.h" @@ -3837,7 +3837,7 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int } get_eg(&eg1); init_eg(&eg_check); add_eg(&eg_check,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"check=%.3fsec,",eg_check.exectime+eg_check.gctime); + fprintf(asir_out,"check=%.3fsec,",eg_check.exectime); /* dp->p */ nd_bpe = cbpe; nd_setup_parameters(nd_nvar,0); @@ -4232,6 +4232,44 @@ void removecont_array_q(Z *c,int n) for ( i = 0; i < n; i++ ) c[i] = q[i]; } +void gcdv_mpz_estimate(mpz_t d0,mpz_t *c,int n); + +void mpz_removecont_array(mpz_t *c,int n) +{ + mpz_t d0,a,u,u1,gcd; + int i,j; + mpz_t *q,*r; + + for ( i = 0; i < n; i++ ) + if ( mpz_sgn(c[i]) ) break; + if ( i == n ) return; + gcdv_mpz_estimate(d0,c,n); + q = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + r = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + for ( i = 0; i < n; i++ ) { + mpz_init(q[i]); mpz_init(r[i]); + mpz_fdiv_qr(q[i],r[i],c[i],d0); + } + for ( i = 0; i < n; i++ ) + if ( mpz_sgn(r[i]) ) break; + mpz_init(gcd); mpz_init(a); mpz_init(u); mpz_init(u1); + if ( i < n ) { + mpz_gcd(gcd,d0,r[i]); + for ( j = i+1; j < n; j++ ) mpz_gcd(gcd,gcd,r[j]); + mpz_div(a,d0,gcd); + for ( i = 0; i < n; i++ ) { + mpz_mul(u,a,q[i]); + if ( mpz_sgn(r[i]) ) { + mpz_div(u1,r[i],gcd); + mpz_add(q[i],u,u1); + } else + mpz_set(q[i],u); + } + } + for ( i = 0; i < n; i++ ) + mpz_set(c[i],q[i]); +} + void nd_mul_c(int mod,ND p,int mul) { NM m; @@ -5796,7 +5834,7 @@ int nd_to_vect_q(UINT *s0,int n,ND d,Z *r) for ( i = 0, s = s0, m = BDY(d); m; m = NEXT(m) ) { t = DL(m); for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); - r[i] = CQ(m); + dupz(CQ(m),&r[i]); } for ( i = 0; !r[i]; i++ ); return i; @@ -5952,6 +5990,7 @@ void expand_array(Z *svect,Z *cvect,int n) if ( svect[i] ) svect[i] = cvect[j++]; } +#if 1 int ndv_reduce_vect_q(Z *svect,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev,nz; @@ -5966,6 +6005,7 @@ int ndv_reduce_vect_q(Z *svect,int trace,int col,IndAr int maxrs; double hmag; Z *cvect; + int l; maxrs = 0; for ( i = 0; i < col && !svect[i]; i++ ); @@ -5993,21 +6033,21 @@ int ndv_reduce_vect_q(Z *svect,int trace,int col,IndAr ivc = ivect->index.c; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivc[j]; prev = pos; - mulz(CQ(mr),mcs,&c2); addz(svect[pos],c2,&t); svect[pos] = t; + muladdtoz(CQ(mr),mcs,&svect[pos]); } break; case 2: ivs = ivect->index.s; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivs[j]; prev = pos; - mulz(CQ(mr),mcs,&c2); addz(svect[pos],c2,&t); svect[pos] = t; + muladdtoz(CQ(mr),mcs,&svect[pos]); } break; case 4: ivi = ivect->index.i; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivi[j]; prev = pos; - mulz(CQ(mr),mcs,&c2); addz(svect[pos],c2,&t); svect[pos] = t; + muladdtoz(CQ(mr),mcs,&svect[pos]); } break; } @@ -6029,7 +6069,95 @@ int ndv_reduce_vect_q(Z *svect,int trace,int col,IndAr } return maxrs; } +#else +/* direct mpz version */ +int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) +{ + int i,j,k,len,pos,prev; + mpz_t *svect; + mpz_t cs,cr,gcd; + IndArray ivect; + unsigned char *ivc; + unsigned short *ivs; + unsigned int *ivi; + NDV redv; + NMV mr; + NODE rp; + int maxrs; + double hmag; + int l; + maxrs = 0; + for ( i = 0; i < col && !svect0[i]; i++ ); + if ( i == col ) return maxrs; + hmag = p_mag((P)svect0[i])*nd_scale; + svect = (mpz_t *)MALLOC(col*sizeof(mpz_t)); + for ( i = 0; i < col; i++ ) { + mpz_init(svect[i]); + if ( svect0[i] ) + mpz_set(svect[i],BDY(svect0[i])); + else + mpz_set_ui(svect[i],0); + } + mpz_init(gcd); mpz_init(cs); mpz_init(cr); + for ( i = 0; i < nred; i++ ) { + ivect = imat[i]; + k = ivect->head; + if ( mpz_sgn(svect[k]) ) { + maxrs = MAX(maxrs,rp0[i]->sugar); + redv = nd_demand?ndv_load(rp0[i]->index) + :(trace?nd_ps_trace[rp0[i]->index]:nd_ps[rp0[i]->index]); + len = LEN(redv); mr = BDY(redv); + mpz_gcd(gcd,svect[k],BDY(CQ(mr))); + mpz_div(cs,svect[k],gcd); + mpz_div(cr,BDY(CQ(mr)),gcd); + mpz_neg(cs,cs); + for ( j = 0; j < col; j++ ) + mpz_mul(svect[j],svect[j],cr); + mpz_set_ui(svect[k],0); + prev = k; + switch ( ivect->width ) { + case 1: + ivc = ivect->index.c; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivc[j]; prev = pos; + mpz_addmul(svect[pos],BDY(CQ(mr)),cs); + } + break; + case 2: + ivs = ivect->index.s; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivs[j]; prev = pos; + mpz_addmul(svect[pos],BDY(CQ(mr)),cs); + } + break; + case 4: + ivi = ivect->index.i; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivi[j]; prev = pos; + mpz_addmul(svect[pos],BDY(CQ(mr)),cs); + } + break; + } + for ( j = k+1; j < col && !svect[j]; j++ ); + if ( j == col ) break; + if ( hmag && ((double)mpz_sizeinbase(svect[j],2) > hmag) ) { + mpz_removecont_array(svect,col); + hmag = ((double)mpz_sizeinbase(svect[j],2))*nd_scale; + } + } + } + mpz_removecont_array(svect,col); + if ( DP_Print ) { + fprintf(asir_out,"-"); fflush(asir_out); + } + for ( i = 0; i < col; i++ ) + if ( mpz_sgn(svect[i]) ) MPZTOZ(svect[i],svect0[i]); + else svect0[i] = 0; + return maxrs; +} +#endif + int ndv_reduce_vect(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; @@ -6713,8 +6841,8 @@ NODE nd_f4(int m,int checkonly,int **indp) } get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"sugar=%d,symb=%.3fsec,", - sugar,eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"\nsugar=%d,symb=%.3fsec,", + sugar,eg_f4.exectime); nflist = nd_f4_red(m,nd_nzlist?lh:l,0,s0vect,col,rp0,nd_gentrace?&ll:0); if ( checkonly && nflist ) return 0; /* adding new bases */ @@ -6813,8 +6941,8 @@ NODE nd_f4_trace(int m,int **indp) } get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"sugar=%d,symb=%.3fsec,", - sugar,eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"\nsugar=%d,symb=%.3fsec,", + sugar,eg_f4.exectime); nflist = nd_f4_red(m,l,0,s0vect,col,rp0,&l0); if ( !l0 ) continue; l = l0; @@ -7028,7 +7156,7 @@ init_eg(&eg_search); init_eg(&eg_elim2); add_eg(&eg_elim2,&eg1,&eg2); if ( DP_Print ) { fprintf(asir_out,"elim1=%.3fsec,elim2=%.3fsec,", - eg_elim1.exectime+eg_elim1.gctime,eg_elim2.exectime+eg_elim2.gctime); + eg_elim1.exectime,eg_elim2.exectime); fflush(asir_out); } return r0; @@ -7134,7 +7262,7 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); fflush(asir_out); } /* free index arrays */ @@ -7159,10 +7287,10 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); } if ( nz ) { for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; @@ -7222,7 +7350,7 @@ NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,U } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); fflush(asir_out); } /* free index arrays */ @@ -7244,10 +7372,10 @@ NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,U get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); } if ( nz ) { for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; @@ -7305,7 +7433,7 @@ NODE nd_f4_red_sf_main(int m,ND_pairs sp0,int nsp,UINT } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); fflush(asir_out); } /* free index arrays */ @@ -7327,10 +7455,10 @@ NODE nd_f4_red_sf_main(int m,ND_pairs sp0,int nsp,UINT get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); } if ( nz ) { for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; @@ -7382,7 +7510,7 @@ NODE nd_f4_red_lf_main(int m,ND_pairs sp0,int nsp,int } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); fflush(asir_out); } /* free index arrays */ @@ -7417,10 +7545,10 @@ NODE nd_f4_red_lf_main(int m,ND_pairs sp0,int nsp,int get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); } return r0; } @@ -7464,7 +7592,7 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,int trace,U } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); fflush(asir_out); } /* free index arrays */ @@ -7498,10 +7626,10 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,int trace,U get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); } return r0; } @@ -9044,7 +9172,7 @@ void nd_f4_lf_trace(LIST f,LIST v,int trace,int homo,s } get_eg(&eg1); init_eg(&eg_check); add_eg(&eg_check,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"check=%.3fsec\n",eg_check.exectime+eg_check.gctime); + fprintf(asir_out,"check=%.3fsec\n",eg_check.exectime); /* dp->p */ nd_bpe = cbpe; nd_setup_parameters(nd_nvar,0); @@ -9104,8 +9232,7 @@ NODE nd_f4_lf_trace_main(int m,int **indp) } get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"sugar=%d,symb=%.3fsec,", - sugar,eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"\nsugar=%d,symb=%.3fsec,",sugar,eg_f4.exectime); nflist = nd_f4_red(m,l,0,s0vect,col,rp0,&l0); if ( !l0 ) continue; l = l0;