=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2018/engine/nd.c,v retrieving revision 1.8 retrieving revision 1.13 diff -u -p -r1.8 -r1.13 --- OpenXM_contrib2/asir2018/engine/nd.c 2018/10/01 07:48:01 1.8 +++ OpenXM_contrib2/asir2018/engine/nd.c 2019/01/14 09:17:34 1.13 @@ -1,8 +1,9 @@ -/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.7 2018/10/01 05:49:06 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.12 2018/11/12 04:25:13 noro Exp $ */ #include "nd.h" -struct oEGT eg_search; +int Nnd_add,Nf4_red; +struct oEGT eg_search,f4_symb,f4_conv,f4_elim1,f4_elim2; int diag_period = 6; int weight_check = 1; @@ -1125,11 +1126,12 @@ int ndl_check_bound2(int index,UINT *d2) INLINE int ndl_hash_value(UINT *d) { int i; - int r; + UINT r; r = 0; for ( i = 0; i < nd_wpd; i++ ) - r = ((r<<16)+d[i])%REDTAB_LEN; + r = (r*1511+d[i]); + r %= REDTAB_LEN; return r; } @@ -1216,6 +1218,7 @@ ND nd_add(int mod,ND p1,ND p2) ND r; NM m1,m2,mr0,mr,s; + Nnd_add++; if ( !p1 ) return p2; else if ( !p2 ) return p1; else if ( mod == -1 ) return nd_add_sf(p1,p2); @@ -2081,6 +2084,7 @@ NODE nd_gb(int m,int ishomo,int checkonly,int gensyz,i P cont; LIST list; + Nnd_add = 0; g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i,gensyz); @@ -2170,7 +2174,7 @@ again: } } conv_ilist(nd_demand,0,g,indp); - if ( !checkonly && DP_Print ) { printf("nd_gb done.\n"); fflush(stdout); } + if ( !checkonly && DP_Print ) { printf("nd_gb done. Number of nd_add=%d\n",Nnd_add); fflush(stdout); } return g; } @@ -4231,14 +4235,18 @@ void mpz_removecont_array(mpz_t *c,int n) { mpz_t d0,a,u,u1,gcd; int i,j; - mpz_t *q,*r; + static mpz_t *q,*r; + static int c_len = 0; for ( i = 0; i < n; i++ ) if ( mpz_sgn(c[i]) ) break; if ( i == n ) return; gcdv_mpz_estimate(d0,c,n); - q = (mpz_t *)MALLOC(n*sizeof(mpz_t)); - r = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + if ( n > c_len ) { + q = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + r = (mpz_t *)MALLOC(n*sizeof(mpz_t)); + c_len = n; + } for ( i = 0; i < n; i++ ) { mpz_init(q[i]); mpz_init(r[i]); mpz_fdiv_qr(q[i],r[i],c[i],d0); @@ -5889,18 +5897,17 @@ Z *nm_ind_pair_to_vect(int mod,UINT *s0,int n,NM_ind_p return r; } -IndArray nm_ind_pair_to_vect_compress(int trace,UINT *s0,int n,int *s0hash,NM_ind_pair pair) +IndArray nm_ind_pair_to_vect_compress(int trace,UINT *s0,int n,NM_ind_pair pair,int start) { NM m; NMV mr; - UINT *d,*t,*s; + UINT *d,*t,*s,*u; NDV p; unsigned char *ivc; unsigned short *ivs; UINT *v,*ivi,*s0v; - int i,j,len,prev,diff,cdiff,h; + int i,j,len,prev,diff,cdiff,h,st,ed,md,c; IndArray r; -struct oEGT eg0,eg1; m = pair->mul; d = DL(m); @@ -5912,14 +5919,20 @@ struct oEGT eg0,eg1; len = LEN(p); t = (UINT *)MALLOC(nd_wpd*sizeof(UINT)); v = (unsigned int *)MALLOC(len*sizeof(unsigned int)); -get_eg(&eg0); - for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { - ndl_add(d,DL(mr),t); - h = ndl_hash_value(t); - for ( ; h != s0hash[i] || !ndl_equal(t,s); s += nd_wpd, i++ ); - v[j] = i; + for ( prev = start, mr = BDY(p), j = 0; j < len; j++, NMV_ADV(mr) ) { + ndl_add(d,DL(mr),t); + st = prev; + ed = n; + while ( ed > st ) { + md = (st+ed)/2; + u = s0+md*nd_wpd; + c = DL_COMPARE(u,t); + if ( c == 0 ) break; + else if ( c > 0 ) st = md; + else ed = md; + } + prev = v[j] = md; } -get_eg(&eg1); add_eg(&eg_search,&eg0,&eg1); r = (IndArray)MALLOC(sizeof(struct oIndArray)); r->head = v[0]; diff = 0; @@ -6042,11 +6055,11 @@ int ndv_reduce_vect_q(Z *svect,int trace,int col,IndAr return maxrs; } #else + /* direct mpz version */ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; - mpz_t *svect; mpz_t cs,cr,gcd; IndArray ivect; unsigned char *ivc; @@ -6058,12 +6071,17 @@ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndA int maxrs; double hmag; int l; + static mpz_t *svect; + static int svect_len=0; maxrs = 0; for ( i = 0; i < col && !svect0[i]; i++ ); if ( i == col ) return maxrs; hmag = p_mag((P)svect0[i])*nd_scale; - svect = (mpz_t *)MALLOC(col*sizeof(mpz_t)); + if ( col > svect_len ) { + svect = (mpz_t *)MALLOC(col*sizeof(mpz_t)); + svect_len = col; + } for ( i = 0; i < col; i++ ) { mpz_init(svect[i]); if ( svect0[i] ) @@ -6084,8 +6102,12 @@ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndA mpz_div(cs,svect[k],gcd); mpz_div(cr,BDY(CZ(mr)),gcd); mpz_neg(cs,cs); - for ( j = 0; j < col; j++ ) - mpz_mul(svect[j],svect[j],cr); + if ( MUNIMPZ(cr) ) + for ( j = 0; j < col; j++ ) mpz_neg(svect[j],svect[j]); + else if ( !UNIMPZ(cr) ) + for ( j = 0; j < col; j++ ) { + if ( mpz_sgn(svect[j]) ) mpz_mul(svect[j],svect[j],cr); + } mpz_set_ui(svect[k],0); prev = k; switch ( ivect->width ) { @@ -6665,9 +6687,12 @@ NODE nd_f4(int m,int checkonly,int **indp) PGeoBucket bucket; struct oEGT eg0,eg1,eg_f4; Z i1,i2,sugarq; + + init_eg(&f4_symb); init_eg(&f4_conv); init_eg(&f4_conv); init_eg(&f4_elim1); init_eg(&f4_elim2); #if 0 ndv_alloc = 0; #endif + Nf4_red=0; g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i,0); @@ -6709,7 +6734,7 @@ NODE nd_f4(int m,int checkonly,int **indp) d = nd_reconstruct(0,d); continue; } - get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); + get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); add_eg(&f4_symb,&eg0,&eg1); if ( DP_Print ) fprintf(asir_out,"sugar=%d,symb=%.3fsec,", sugar,eg_f4.exectime); @@ -6758,6 +6783,11 @@ NODE nd_f4(int m,int checkonly,int **indp) #if 0 fprintf(asir_out,"ndv_alloc=%d\n",ndv_alloc); #endif + if ( DP_Print ) { + fprintf(asir_out,"number of red=%d,",Nf4_red); + fprintf(asir_out,"symb=%.3fsec,conv=%.3fsec,elim1=%.3fsec,elim2=%.3fsec\n", + f4_symb.exectime,f4_conv.exectime,f4_elim1.exectime,f4_elim2.exectime); + } conv_ilist(nd_demand,0,g,indp); return g; } @@ -6981,7 +7011,6 @@ NODE nd_f4_red_2(ND_pairs sp0,UINT *s0vect,int col,NOD unsigned long *v; get_eg(&eg0); -init_eg(&eg_search); for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); nred = length(rp0); mat = alloc_matrix(nsp,col); @@ -7036,18 +7065,18 @@ init_eg(&eg_search); NODE nd_f4_red(int m,ND_pairs sp0,int trace,UINT *s0vect,int col,NODE rp0,ND_pairs *nz) { IndArray *imat; - int nsp,nred,i; + int nsp,nred,i,start; int *rhead; NODE r0,rp; ND_pairs sp; NM_ind_pair *rvect; UINT *s; int *s0hash; + struct oEGT eg0,eg1,eg_conv; if ( m == 2 && nd_rref2 ) return nd_f4_red_2(sp0,s0vect,col,rp0,nz); -init_eg(&eg_search); for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); nred = length(rp0); imat = (IndArray *)MALLOC(nred*sizeof(IndArray)); @@ -7055,18 +7084,23 @@ init_eg(&eg_search); for ( i = 0; i < col; i++ ) rhead[i] = 0; /* construction of index arrays */ + get_eg(&eg0); if ( DP_Print ) { - fprintf(asir_out,"%dx%d,",nsp+nred,col); + fprintf(asir_out,"%dx%d,",nsp+nred,col); + fflush(asir_out); } rvect = (NM_ind_pair *)MALLOC(nred*sizeof(NM_ind_pair)); - s0hash = (int *)MALLOC(col*sizeof(int)); - for ( i = 0, s = s0vect; i < col; i++, s += nd_wpd ) - s0hash[i] = ndl_hash_value(s); - for ( rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { + for ( start = 0, rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { rvect[i] = (NM_ind_pair)BDY(rp); - imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,s0hash,rvect[i]); + imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,rvect[i],start); rhead[imat[i]->head] = 1; + start = imat[i]->head; } + get_eg(&eg1); init_eg(&eg_conv); add_eg(&eg_conv,&eg0,&eg1); add_eg(&f4_conv,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"conv=%.3fsec,",eg_conv.exectime); + fflush(asir_out); + } if ( m > 0 ) #if SIZEOF_LONG==8 r0 = nd_f4_red_mod64_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); @@ -7079,9 +7113,6 @@ init_eg(&eg_search); r0 = nd_f4_red_lf_main(m,sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); else r0 = nd_f4_red_q_main(sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); -#if 0 - if ( DP_Print ) print_eg("search",&eg_search); -#endif return r0; } @@ -8196,7 +8227,7 @@ P ndc_div(int mod,union oNDC a,union oNDC b) int inv,t; if ( mod == -1 ) c.m = _mulsf(a.m,_invsf(b.m)); - else if ( mod == -2 ) divlf(a.gz,b.gz,&c.gz); + else if ( mod == -2 ) divlf(a.z,b.z,&c.z); else if ( mod ) { inv = invm(b.m,mod); DMAR(a.m,inv,0,mod,t); c.m = t; @@ -8216,7 +8247,7 @@ P ndctop(int mod,union oNDC c) if ( mod == -1 ) { e = IFTOF(c.m); MKGFS(e,gfs); return (P)gfs; } else if ( mod == -2 ) { - q = c.gz; return (P)q; + q = c.z; return (P)q; } else if ( mod > 0 ) { STOZ(c.m,q); return (P)q; } else @@ -9012,14 +9043,25 @@ NDV vect64_to_ndv(mp_limb_t *vect,int spcol,int col,in int nd_to_vect64(int mod,UINT *s0,int n,ND d,mp_limb_t *r) { NM m; - UINT *t,*s; - int i; + UINT *t,*s,*u; + int i,st,ed,md,prev,c; for ( i = 0; i < n; i++ ) r[i] = 0; - for ( i = 0, s = s0, m = BDY(d); m; m = NEXT(m) ) { - t = DL(m); - for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); - r[i] = (mp_limb_t)CM(m); + prev = 0; + for ( i = 0, m = BDY(d); m; m = NEXT(m) ) { + t = DL(m); + st = prev; + ed = n; + while ( ed > st ) { + md = (st+ed)/2; + u = s0+md*nd_wpd; + c = DL_COMPARE(u,t); + if ( c == 0 ) break; + else if ( c > 0 ) st = md; + else ed = md; + } + r[md] = (mp_limb_t)CM(m); + prev = md; } for ( i = 0; !r[i]; i++ ); return i; @@ -9049,6 +9091,7 @@ int ndv_reduce_vect64(int m,mp_limb_t *svect,mp_limb_t MOD128(a,c,m); svect[k] = a; cvect[k] = 0; if ( (c = svect[k]) != 0 ) { + Nf4_red++; maxrs = MAX(maxrs,rp0[i]->sugar); c = m-c; redv = nd_ps[rp0[i]->index]; len = LEN(redv); mr = BDY(redv); @@ -9058,33 +9101,27 @@ int ndv_reduce_vect64(int m,mp_limb_t *svect,mp_limb_t ivc = ivect->index.c; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivc[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; } break; case 2: ivs = ivect->index.s; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivs[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; } break; case 4: ivi = ivect->index.i; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivi[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; } break; } @@ -9140,7 +9177,7 @@ NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,U } nd_free(spol); } - get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); + get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); add_eg(&f4_elim1,&eg0,&eg1); if ( DP_Print ) { fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime); fflush(asir_out); @@ -9161,7 +9198,7 @@ NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,U if ( r0 ) NEXT(r) = 0; for ( ; i < sprow; i++ ) GCFREE(spmat[i]); - get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); add_eg(&f4_elim2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); @@ -9232,6 +9269,7 @@ int nd_gauss_elim_mod64(mp_limb_t **mat,int *sugar,ND_ if ( (a = mat[i][j]) != 0 ) { sugar[i] = MAX(sugar[i],s); red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[rank]+j,(int)(md-a),col-j); + Nf4_red++; } } rank++; @@ -9247,6 +9285,7 @@ int nd_gauss_elim_mod64(mp_limb_t **mat,int *sugar,ND_ if ( a ) { sugar[i] = MAX(sugar[i],s); red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[l]+j,(int)(md-a),col-j); + Nf4_red++; } } l--;