=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2000/engine/nd.c,v retrieving revision 1.237 retrieving revision 1.244 diff -u -p -r1.237 -r1.244 --- OpenXM_contrib2/asir2000/engine/nd.c 2017/04/09 02:23:12 1.237 +++ OpenXM_contrib2/asir2000/engine/nd.c 2018/03/05 06:43:09 1.244 @@ -1,4 +1,4 @@ -/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.236 2017/03/27 09:05:46 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.243 2018/03/05 01:56:17 noro Exp $ */ #include "nd.h" @@ -65,7 +65,7 @@ static int *nd_gbblock; static NODE nd_nzlist,nd_check_splist; static int nd_splist; static int *nd_sugarweight; -static int nd_f4red,nd_rank0; +static int nd_f4red,nd_rank0,nd_last_nonzero; NumberField get_numberfield(); UINT *nd_det_compute_bound(NDV **dm,int n,int j); @@ -98,6 +98,10 @@ void Pox_cmo_rpc(NODE,Obj *); ND nd_add_lf(ND p1,ND p2); void nd_mul_c_lf(ND p,GZ mul); void ndv_mul_c_lf(NDV p,GZ mul); +NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, + NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred,ND_pairs *nz); +NODE nd_f4_red_mod64_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, + NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred,ND_pairs *nz); NODE nd_f4_red_lf_main(int m,ND_pairs sp0,int nsp,int trace,UINT *s0vect,int col, NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred); int nd_gauss_elim_lf(mpz_t **mat0,int *sugar,int row,int col,int *colstat); @@ -2909,6 +2913,18 @@ ND_pairs nd_minsugarp( ND_pairs d, ND_pairs *prest ) return dm0; } +int nd_tdeg(NDV c) +{ + int wmax = 0; + int i,len; + NMV a; + + len = LEN(c); + for ( a = BDY(c), i = 0; i < len; i++, NMV_ADV(a) ) + wmax = MAX(TD(DL(a)),wmax); + return wmax; +} + int ndv_newps(int m,NDV a,NDV aq,int f4) { int len; @@ -2941,11 +2957,21 @@ int ndv_newps(int m,NDV a,NDV aq,int f4) } else error("ndv_newps : invalud modulus"); nd_bound[nd_psn] = ndv_compute_bound(aq); - SG(r) = SG(aq); ndl_copy(HDL(aq),DL(r)); +#if 1 + SG(r) = SG(aq); +#else + SG(r) = nd_tdeg(aq); +#endif + ndl_copy(HDL(aq),DL(r)); } else { if ( !m ) register_hcf(a); nd_bound[nd_psn] = ndv_compute_bound(a); - SG(r) = SG(a); ndl_copy(HDL(a),DL(r)); +#if 1 + SG(r) = SG(a); +#else + SG(r) = nd_tdeg(a); +#endif + ndl_copy(HDL(a),DL(r)); if ( !m && !nd_vc ) nd_ps_gz[nd_psn] = ndvtondvgz(a); } if ( nd_demand ) { @@ -3120,10 +3146,10 @@ void preprocess_algcoef(VL vv,VL av,struct order_spec if ( NID(hc) == N_DA ) { invdalg(hc,&inv); for ( m = BDY(d); m; m = NEXT(m) ) { - muldalg(inv,(DAlg)m->c,&da); m->c = (P)da; + muldalg(inv,(DAlg)m->c,&da); m->c = (Obj)da; } } - initd(ord); dtop(vv,vv,d,&poly); BDY(f) = (pointer)poly; + initd(ord); dtop(vv,vv,d,(Obj *)&poly); BDY(f) = (pointer)poly; } obj_dalgtoalg((Obj)f1,(Obj *)&f); @@ -3202,7 +3228,7 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int NODE tr,tl1,tl2,tl3,tl4,nzlist; LIST l1,l2,l3,l4,l5; int j; - Q jq,bpe; + Q jq,bpe,last_nonzero; int *perm; EPOS oepos; int obpe,oadv,ompos,cbpe; @@ -3289,7 +3315,7 @@ void nd_gr(LIST f,LIST v,int m,int homo,int retdp,int } homogenize_order(ord,nvar,&ord1); nd_init_ord(ord1); - nd_setup_parameters(nvar+1,wmax); + nd_setup_parameters(nvar+1,nd_nzlist?0:wmax); for ( t = fd0; t; t = NEXT(t) ) ndv_homogenize((NDV)BDY(t),obpe,oadv,oepos,ompos); } @@ -3361,7 +3387,9 @@ FINAL: if ( nd_gentrace ) { if ( f4 ) { STOQ(16,bpe); - tr = mknode(4,*rp,(!ishomo&&homo)?ONE:0,BDY(nzlist),bpe); MKLIST(*rp,tr); + STOQ(nd_last_nonzero,last_nonzero); + tr = mknode(5,*rp,(!ishomo&&homo)?ONE:0,BDY(nzlist),bpe,last_nonzero); MKLIST(*rp,tr); + } else { tl1 = reverse_node(tl1); tl2 = reverse_node(tl2); tl3 = reverse_node(tl3); @@ -3814,7 +3842,7 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,int } get_eg(&eg1); init_eg(&eg_check); add_eg(&eg_check,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"check=%fsec\n",eg_check.exectime+eg_check.gctime); + fprintf(asir_out,"check=%.3fsec,",eg_check.exectime+eg_check.gctime); /* dp->p */ nd_bpe = cbpe; nd_setup_parameters(nd_nvar,0); @@ -3903,7 +3931,7 @@ void nmtodp(int mod,NM m,DP *r) NEWMP(mr); mr->dl = ndltodl(nd_nvar,DL(m)); - mr->c = ndctop(mod,m->c); + mr->c = (Obj)ndctop(mod,m->c); NEXT(mr) = 0; MKDP(nd_nvar,mr,dp); dp->sugar = mr->dl->td; *r = dp; } @@ -5317,7 +5345,7 @@ P ndvtop(int mod,VL vl,VL dvl,NDV p) if ( mod == -1 ) { e = IFTOF(CM(m)); MKGFS(e,gfs); c = (P)gfs; } else if ( mod == -2 ) { - c = gztoz(CZ(m)); + c = (P)gztoz(CZ(m)); } else if ( mod > 0 ) { STOQ(CM(m),q); c = (P)q; } else @@ -5436,7 +5464,7 @@ DP ndvtodp(int mod,NDV p) for ( t = BDY(p), i = 0; i < len; NMV_ADV(t), i++ ) { NEXTMP(m0,m); m->dl = ndltodl(nd_nvar,DL(t)); - m->c = ndctop(mod,t->c); + m->c = (Obj)ndctop(mod,t->c); } NEXT(m) = 0; MKDP(nd_nvar,m0,d); @@ -5457,7 +5485,7 @@ DP ndtodp(int mod,ND p) for ( t = BDY(p); t; t = NEXT(t) ) { NEXTMP(m0,m); m->dl = ndltodl(nd_nvar,DL(t)); - m->c = ndctop(mod,t->c); + m->c = (Obj)ndctop(mod,t->c); } NEXT(m) = 0; MKDP(nd_nvar,m0,d); @@ -5796,6 +5824,27 @@ int nd_to_vect(int mod,UINT *s0,int n,ND d,UINT *r) return i; } +#if defined(__GNUC__) && SIZEOF_LONG==8 + +#define MOD128(a,c,m) ((a)=(((c)!=0||((a)>=(m)))?(((((U128)(c))<<64)+(a))%(m)):(a))) + +int nd_to_vect64(int mod,UINT *s0,int n,ND d,U64 *r) +{ + NM m; + UINT *t,*s; + int i; + + for ( i = 0; i < n; i++ ) r[i] = 0; + for ( i = 0, s = s0, m = BDY(d); m; m = NEXT(m) ) { + t = DL(m); + for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); + r[i] = (U64)CM(m); + } + for ( i = 0; !r[i]; i++ ); + return i; +} +#endif + int nd_to_vect_q(UINT *s0,int n,ND d,Q *r) { NM m; @@ -6121,7 +6170,6 @@ int ndv_reduce_vect_gz(GZ *gvect,int trace,int col,Ind return maxrs; } - int ndv_reduce_vect(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; @@ -6191,6 +6239,78 @@ int ndv_reduce_vect(int m,UINT *svect,int col,IndArray return maxrs; } +#if defined(__GNUC__) && SIZEOF_LONG==8 + +int ndv_reduce_vect64(int m,U64 *svect,U64 *cvect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) +{ + int i,j,k,len,pos,prev; + U64 a,c,c1,c2; + IndArray ivect; + unsigned char *ivc; + unsigned short *ivs; + unsigned int *ivi; + NDV redv; + NMV mr; + NODE rp; + int maxrs; + + for ( i = 0; i < col; i++ ) cvect[i] = 0; + maxrs = 0; + for ( i = 0; i < nred; i++ ) { + ivect = imat[i]; + k = ivect->head; + a = svect[k]; c = cvect[k]; + MOD128(a,c,m); + svect[k] = a; cvect[k] = 0; + if ( c = svect[k] ) { + maxrs = MAX(maxrs,rp0[i]->sugar); + c = m-c; redv = nd_ps[rp0[i]->index]; + len = LEN(redv); mr = BDY(redv); + svect[k] = 0; prev = k; + switch ( ivect->width ) { + case 1: + ivc = ivect->index.c; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivc[j]; c1 = CM(mr); prev = pos; + if ( c1 ) { + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + } + break; + case 2: + ivs = ivect->index.s; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivs[j]; c1 = CM(mr); prev = pos; + if ( c1 ) { + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + } + break; + case 4: + ivi = ivect->index.i; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivi[j]; c1 = CM(mr); prev = pos; + if ( c1 ) { + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + } + break; + } + } + } + for ( i = 0; i < col; i++ ) { + a = svect[i]; c = cvect[i]; MOD128(a,c,m); svect[i] = a; + } + return maxrs; +} +#endif + int ndv_reduce_vect_sf(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; @@ -6448,6 +6568,36 @@ NDV vect_to_ndv(UINT *vect,int spcol,int col,int *rhea } } +#if defined(__GNUC__) && SIZEOF_LONG==8 +NDV vect64_to_ndv(U64 *vect,int spcol,int col,int *rhead,UINT *s0vect) +{ + int j,k,len; + UINT *p; + UINT c; + NDV r; + NMV mr0,mr; + + for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; + if ( !len ) return 0; + else { + mr0 = (NMV)MALLOC_ATOMIC_IGNORE_OFF_PAGE(nmv_adv*len); +#if 0 + ndv_alloc += nmv_adv*len; +#endif + mr = mr0; + p = s0vect; + for ( j = k = 0; j < col; j++, p += nd_wpd ) + if ( !rhead[j] ) { + if ( c = (UINT)vect[k++] ) { + ndl_copy(p,DL(mr)); CM(mr) = c; NMV_ADV(mr); + } + } + MKNDV(nd_nvar,mr0,len,r); + return r; + } +} +#endif + NDV vect_to_ndv_2(unsigned long *vect,int col,UINT *s0vect) { int j,k,len; @@ -6654,11 +6804,20 @@ int nd_symbolic_preproc(PGeoBucket bucket,int trace,UI return col; } +void print_ndp(ND_pairs l) +{ + ND_pairs t; + + for ( t = l; t; t = NEXT(t) ) + printf("[%d,%d] ",t->i1,t->i2); + printf("\n"); +} + NODE nd_f4(int m,int checkonly,int **indp) { int i,nh,stat,index,f4red; NODE r,g,tn0,tn,node; - ND_pairs d,l,t,ll0,ll; + ND_pairs d,l,t,ll0,ll,lh; LIST l0,l1; ND spol,red; NDV nf,redv; @@ -6672,7 +6831,7 @@ NODE nd_f4(int m,int checkonly,int **indp) IndArray *imat; int *rhead; int spcol,sprow; - int sugar; + int sugar,sugarh; PGeoBucket bucket; struct oEGT eg0,eg1,eg_f4; Q i1,i2,sugarq; @@ -6681,37 +6840,35 @@ NODE nd_f4(int m,int checkonly,int **indp) #endif g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { - if ( !nd_nzlist ) d = update_pairs(d,g,i,0); + d = update_pairs(d,g,i,0); g = update_base(g,i); } nzlist = 0; nzlist_t = nd_nzlist; - f4red = 0; - while ( d || nzlist_t ) { + f4red = 1; + nd_last_nonzero = 0; + while ( d ) { get_eg(&eg0); - if ( nd_nzlist ) { + l = nd_minsugarp(d,&d); + sugar = nd_sugarweight?l->sugar2:SG(l); + if ( MaxDeg > 0 && sugar > MaxDeg ) break; + if ( nzlist_t ) { node = BDY((LIST)BDY(nzlist_t)); - sugar = (int)ARG0(node); + sugarh = QTOS((Q)ARG0(node)); tn = BDY((LIST)ARG1(node)); if ( !tn ) { nzlist_t = NEXT(nzlist_t); continue; } /* tn = [[i1,i2],...] */ - l = nd_ipairtospair(tn); - } else { - l = nd_minsugarp(d,&d); - sugar = nd_sugarweight?l->sugar2:SG(l); - if ( MaxDeg > 0 && sugar > MaxDeg ) break; + lh = nd_ipairtospair(tn); } bucket = create_pbucket(); stat = nd_sp_f4(m,0,l,bucket); if ( !stat ) { - if ( !nd_nzlist ) { - for ( t = l; NEXT(t); t = NEXT(t) ); - NEXT(t) = d; d = l; - d = nd_reconstruct(0,d); - } + for ( t = l; NEXT(t); t = NEXT(t) ); + NEXT(t) = d; d = l; + d = nd_reconstruct(0,d); continue; } if ( bucket->m < 0 ) continue; @@ -6724,11 +6881,12 @@ NODE nd_f4(int m,int checkonly,int **indp) } get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"sugar=%d,symb=%fsec,", + fprintf(asir_out,"sugar=%d,symb=%.3fsec,", sugar,eg_f4.exectime+eg_f4.gctime); - nflist = nd_f4_red(m,l,0,s0vect,col,rp0,nd_gentrace?&ll:0); + nflist = nd_f4_red(m,nd_nzlist?lh:l,0,s0vect,col,rp0,nd_gentrace?&ll:0); if ( checkonly && nflist ) return 0; /* adding new bases */ + if ( nflist ) nd_last_nonzero = f4red; for ( r = nflist; r; r = NEXT(r) ) { nf = (NDV)BDY(r); ndv_removecont(m,nf); @@ -6741,11 +6899,11 @@ NODE nd_f4(int m,int checkonly,int **indp) nf = ndtondv(m,nf1); } nh = ndv_newps(m,nf,0,1); - if ( !nd_nzlist ) d = update_pairs(d,g,nh,0); + d = update_pairs(d,g,nh,0); g = update_base(g,nh); } if ( DP_Print ) { - fprintf(asir_out,"f4red=%d,gblen=%d,",f4red,length(g)); fflush(asir_out); + fprintf(asir_out,"f4red=%d,gblen=%d\n",f4red,length(g)); fflush(asir_out); } if ( nd_gentrace ) { for ( t = ll, tn0 = 0; t; t = NEXT(t) ) { @@ -6760,7 +6918,7 @@ NODE nd_f4(int m,int checkonly,int **indp) } if ( nd_nzlist ) nzlist_t = NEXT(nzlist_t); f4red++; - if ( nd_f4red && f4red >= nd_f4red ) break; + if ( nd_f4red && f4red > nd_f4red ) break; if ( nd_rank0 && !nflist ) break; } if ( nd_gentrace ) { @@ -6823,7 +6981,7 @@ NODE nd_f4_trace(int m,int **indp) } get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"sugar=%d,symb=%fsec,", + fprintf(asir_out,"sugar=%d,symb=%.3fsec,", sugar,eg_f4.exectime+eg_f4.gctime); nflist = nd_f4_red(m,l,0,s0vect,col,rp0,&l0); if ( !l0 ) continue; @@ -7037,7 +7195,7 @@ init_eg(&eg_search); get_eg(&eg2); init_eg(&eg_elim2); add_eg(&eg_elim2,&eg1,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim1=%fsec,elim2=%fsec\n", + fprintf(asir_out,"elim1=%.3fsec,elim2=%.3fsec,", eg_elim1.exectime+eg_elim1.gctime,eg_elim2.exectime+eg_elim2.gctime); fflush(asir_out); } @@ -7068,7 +7226,7 @@ init_eg(&eg_search); /* construction of index arrays */ if ( DP_Print ) { - fprintf(stderr,"%dx%d,",nsp+nred,col); + fprintf(asir_out,"%dx%d,",nsp+nred,col); } rvect = (NM_ind_pair *)MALLOC(nred*sizeof(NM_ind_pair)); s0hash = (int *)MALLOC(col*sizeof(int)); @@ -7079,8 +7237,14 @@ init_eg(&eg_search); imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,s0hash,rvect[i]); rhead[imat[i]->head] = 1; } - if ( m > 0 || m == -1 ) + if ( m > 0 ) +#if defined(__GNUC__) && SIZEOF_LONG==8 + r0 = nd_f4_red_mod64_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); +#else r0 = nd_f4_red_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); +#endif + else if ( m == -1 ) + r0 = nd_f4_red_sf_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); else if ( m == -2 ) r0 = nd_f4_red_lf_main(m,sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); else @@ -7089,11 +7253,14 @@ init_eg(&eg_search); #else r0 = nd_f4_red_gz_main(sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); #endif +#if 0 if ( DP_Print ) print_eg("search",&eg_search); +#endif return r0; } -/* for small finite fields */ +/* for Fp, 2<=p<2^16 */ + NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred,ND_pairs *nz) { @@ -7139,7 +7306,7 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); fflush(asir_out); } /* free index arrays */ @@ -7164,10 +7331,10 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); - fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); } if ( nz ) { for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; @@ -7180,6 +7347,174 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s return r0; } +#if defined(__GNUC__) && SIZEOF_LONG==8 +/* for Fp, 2^15=index.c); + + /* elimination (2nd step) */ + colstat = (int *)MALLOC(spcol*sizeof(int)); + rank = nd_gauss_elim_mod64(spmat,spsugar,spactive,sprow,spcol,m,colstat); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + NEXTNODE(r0,r); BDY(r) = + (pointer)vect64_to_ndv(spmat[i],spcol,col,rhead,s0vect); + SG((NDV)BDY(r)) = spsugar[i]; + GCFREE(spmat[i]); + } + if ( r0 ) NEXT(r) = 0; + + for ( ; i < sprow; i++ ) GCFREE(spmat[i]); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); + if ( DP_Print ) { + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + nsp,nred,sprow,spcol,rank); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + } + if ( nz ) { + for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; + if ( rank > 0 ) { + NEXT(spactive[rank-1]) = 0; + *nz = spactive[0]; + } else + *nz = 0; + } + return r0; +} +#endif + +/* for small finite fields */ + +NODE nd_f4_red_sf_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, + NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred,ND_pairs *nz) +{ + int spcol,sprow,a; + int i,j,k,l,rank; + NODE r0,r; + ND_pairs sp; + ND spol; + int **spmat; + UINT *svect,*v; + int *colstat; + struct oEGT eg0,eg1,eg2,eg_f4,eg_f4_1,eg_f4_2; + int maxrs; + int *spsugar; + ND_pairs *spactive; + + spcol = col-nred; + get_eg(&eg0); + /* elimination (1st step) */ + spmat = (int **)MALLOC(nsp*sizeof(UINT *)); + svect = (UINT *)MALLOC(col*sizeof(UINT)); + spsugar = (int *)MALLOC(nsp*sizeof(int)); + spactive = !nz?0:(ND_pairs *)MALLOC(nsp*sizeof(ND_pairs)); + for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { + nd_sp(m,0,sp,&spol); + if ( !spol ) continue; + nd_to_vect(m,s0vect,col,spol,svect); + maxrs = ndv_reduce_vect_sf(m,svect,col,imat,rvect,nred); + for ( i = 0; i < col; i++ ) if ( svect[i] ) break; + if ( i < col ) { + spmat[sprow] = v = (UINT *)MALLOC_ATOMIC(spcol*sizeof(UINT)); + for ( j = k = 0; j < col; j++ ) + if ( !rhead[j] ) v[k++] = svect[j]; + spsugar[sprow] = MAX(maxrs,SG(spol)); + if ( nz ) + spactive[sprow] = sp; + sprow++; + } + nd_free(spol); + } + get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fflush(asir_out); + } + /* free index arrays */ + for ( i = 0; i < nred; i++ ) GCFREE(imat[i]->index.c); + + /* elimination (2nd step) */ + colstat = (int *)MALLOC(spcol*sizeof(int)); + rank = nd_gauss_elim_sf(spmat,spsugar,sprow,spcol,m,colstat); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + NEXTNODE(r0,r); BDY(r) = + (pointer)vect_to_ndv(spmat[i],spcol,col,rhead,s0vect); + SG((NDV)BDY(r)) = spsugar[i]; + GCFREE(spmat[i]); + } + if ( r0 ) NEXT(r) = 0; + + for ( ; i < sprow; i++ ) GCFREE(spmat[i]); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); + if ( DP_Print ) { + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + nsp,nred,sprow,spcol,rank); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); + } + if ( nz ) { + for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; + if ( rank > 0 ) { + NEXT(spactive[rank-1]) = 0; + *nz = spactive[0]; + } else + *nz = 0; + } + return r0; +} + NODE nd_f4_red_lf_main(int m,ND_pairs sp0,int nsp,int trace,UINT *s0vect,int col, NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred) { @@ -7219,7 +7554,7 @@ NODE nd_f4_red_lf_main(int m,ND_pairs sp0,int nsp,int } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); fflush(asir_out); } /* free index arrays */ @@ -7254,10 +7589,10 @@ NODE nd_f4_red_lf_main(int m,ND_pairs sp0,int nsp,int get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); - fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); } return r0; } @@ -7302,7 +7637,7 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,int trace,U } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); fflush(asir_out); } /* free index arrays */ @@ -7336,10 +7671,10 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,int trace,U get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); - fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); } return r0; } @@ -7384,7 +7719,7 @@ NODE nd_f4_red_gz_main(ND_pairs sp0,int nsp,int trace, } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { - fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fprintf(asir_out,"elim1=%.3fsec,",eg_f4_1.exectime+eg_f4_1.gctime); fflush(asir_out); } /* free index arrays */ @@ -7421,10 +7756,10 @@ NODE nd_f4_red_gz_main(ND_pairs sp0,int nsp,int trace, get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { - fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); - fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime+eg_f4.gctime); } return r0; } @@ -7674,6 +8009,85 @@ int nd_gauss_elim_mod(int **mat0,int *sugar,ND_pairs * return rank; } +#if defined(__GNUC__) && SIZEOF_LONG==8 + +int nd_gauss_elim_mod64(U64 **mat,int *sugar,ND_pairs *spactive,int row,int col,int md,int *colstat) +{ + int i,j,k,l,rank,s; + U64 inv; + U64 a; + UINT c; + U64 *t,*pivot,*pk; + UINT *ck; + UINT **cmat; + UINT *ct; + ND_pairs pair; + + cmat = (UINT **)MALLOC(row*sizeof(UINT *)); + for ( i = 0; i < row; i++ ) { + cmat[i] = MALLOC_ATOMIC(col*sizeof(UINT)); + bzero(cmat[i],col*sizeof(UINT)); + } + + for ( rank = 0, j = 0; j < col; j++ ) { + for ( i = rank; i < row; i++ ) { + a = mat[i][j]; c = cmat[i][j]; + MOD128(a,c,md); + mat[i][j] = a; cmat[i][j] = 0; + } + for ( i = rank; i < row; i++ ) + if ( mat[i][j] ) + break; + if ( i == row ) { + colstat[j] = 0; + continue; + } else + colstat[j] = 1; + if ( i != rank ) { + t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; + ct = cmat[i]; cmat[i] = cmat[rank]; cmat[rank] = ct; + s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; + if ( spactive ) { + pair = spactive[i]; spactive[i] = spactive[rank]; + spactive[rank] = pair; + } + } + /* column j is normalized */ + s = sugar[rank]; + inv = invm((UINT)mat[rank][j],md); + /* normalize pivot row */ + for ( k = j, pk = mat[rank]+j, ck = cmat[rank]+j; k < col; k++, pk++, ck++ ) { + a = *pk; c = *ck; MOD128(a,c,md); *pk = (a*inv)%md; *ck = 0; + } + for ( i = rank+1; i < row; i++ ) { + if ( a = mat[i][j] ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[rank]+j,(int)(md-a),col-j); + } + } + rank++; + } + for ( j = col-1, l = rank-1; j >= 0; j-- ) + if ( colstat[j] ) { + for ( k = j, pk = mat[l]+j, ck = cmat[l]+j; k < col; k++, pk++, ck++ ) { + a = *pk; c = *ck; MOD128(a,c,md); *pk = a; *ck = 0; + } + s = sugar[l]; + for ( i = 0; i < l; i++ ) { + a = mat[i][j]; c = cmat[i][j]; MOD128(a,c,md); mat[i][j] = a; cmat[i][j] = 0; + if ( a ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[l]+j,(int)(md-a),col-j); + } + } + l--; + } + for ( i = 0; i < row; i++ ) GCFREE(cmat[i]); + GCFREE(cmat); + return rank; +} +#endif + int nd_gauss_elim_sf(int **mat0,int *sugar,int row,int col,int md,int *colstat) { int i,j,k,l,inv,a,rank,s; @@ -7963,7 +8377,7 @@ void nd_det(int mod,MAT f,P *rp) chsgnq(ONE,&mone); for ( j = 0, sgn = 1; j < n; j++ ) { if ( DP_Print ) { - fprintf(stderr,".",j); + fprintf(asir_out,".",j); } for ( i = j; i < n && !dm[i][j]; i++ ); if ( i == n ) { @@ -7993,14 +8407,14 @@ void nd_det(int mod,MAT f,P *rp) nd_det_reconstruct(dm,n,j,d); for ( i = j+1, mj = dm[j], mjj = mj[j]; i < n; i++ ) { -/* if ( DP_Print ) fprintf(stderr," i=%d\n ",i); */ +/* if ( DP_Print ) fprintf(asir_out," i=%d\n ",i); */ mi = dm[i]; mij = mi[j]; if ( mod ) ndv_mul_c(mod,mij,mod-1); else ndv_mul_c_q(mij,mone); for ( k = j+1; k < n; k++ ) { -/* if ( DP_Print ) fprintf(stderr,"k=%d ",k); */ +/* if ( DP_Print ) fprintf(asir_out,"k=%d ",k); */ bucket = create_pbucket(); if ( mi[k] ) { nmv = BDY(mjj); len = LEN(mjj); @@ -8019,12 +8433,12 @@ void nd_det(int mod,MAT f,P *rp) u = nd_quo(mod,bucket,d); mi[k] = ndtondv(mod,u); } -/* if ( DP_Print ) fprintf(stderr,"\n",k); */ +/* if ( DP_Print ) fprintf(asir_out,"\n",k); */ } d = mjj; } if ( DP_Print ) { - fprintf(stderr,"\n",k); + fprintf(asir_out,"\n",k); } if ( sgn < 0 ) if ( mod ) @@ -8200,14 +8614,14 @@ int nd_monic(int mod,ND *p) is_lc = 1; while ( 1 ) { NEWMP(mp0); mp = mp0; - mp->c = (P)CQ(m); + mp->c = (Obj)CQ(m); mp->dl = nd_separate_d(DL(m),DL(ma)); NEWNM(mb); for ( m = NEXT(m); m; m = NEXT(m) ) { alg = nd_separate_d(DL(m),DL(mb)); if ( !ndl_equal(DL(ma),DL(mb)) ) break; - NEXTMP(mp0,mp); mp->c = (P)CQ(m); mp->dl = alg; + NEXTMP(mp0,mp); mp->c = (Obj)CQ(m); mp->dl = alg; } NEXT(mp) = 0; MKDP(nd_nalg,mp0,nm); @@ -8386,9 +8800,11 @@ void parse_nd_option(NODE opt) else if ( !strcmp(key,"lf") ) nd_lf = value?1:0; else if ( !strcmp(key,"trace") ) { - u = BDY((LIST)value); - nd_nzlist = BDY((LIST)ARG2(u)); - nd_bpe = QTOS((Q)ARG3(u)); + if ( value ) { + u = BDY((LIST)value); + nd_nzlist = BDY((LIST)ARG2(u)); + nd_bpe = QTOS((Q)ARG3(u)); + } } else if ( !strcmp(key,"f4red") ) { nd_f4red = QTOS((Q)value); } else if ( !strcmp(key,"rank0") ) { @@ -8823,7 +9239,7 @@ void nd_f4_lf_trace(LIST f,LIST v,int trace,int homo,s } get_eg(&eg1); init_eg(&eg_check); add_eg(&eg_check,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"check=%fsec\n",eg_check.exectime+eg_check.gctime); + fprintf(asir_out,"check=%.3fsec\n",eg_check.exectime+eg_check.gctime); /* dp->p */ nd_bpe = cbpe; nd_setup_parameters(nd_nvar,0); @@ -8883,7 +9299,7 @@ NODE nd_f4_lf_trace_main(int m,int **indp) } get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); if ( DP_Print ) - fprintf(asir_out,"sugar=%d,symb=%fsec,", + fprintf(asir_out,"sugar=%d,symb=%.3fsec,", sugar,eg_f4.exectime+eg_f4.gctime); nflist = nd_f4_red(m,l,0,s0vect,col,rp0,&l0); if ( !l0 ) continue;