=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2018/engine/nd.c,v retrieving revision 1.6 retrieving revision 1.12 diff -u -p -r1.6 -r1.12 --- OpenXM_contrib2/asir2018/engine/nd.c 2018/09/28 08:20:28 1.6 +++ OpenXM_contrib2/asir2018/engine/nd.c 2018/11/12 04:25:13 1.12 @@ -1,8 +1,9 @@ -/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.5 2018/09/27 02:39:37 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2018/engine/nd.c,v 1.11 2018/10/23 04:53:37 noro Exp $ */ #include "nd.h" -struct oEGT eg_search; +int Nnd_add,Nf4_red; +struct oEGT eg_search,f4_symb,f4_conv,f4_elim1,f4_elim2; int diag_period = 6; int weight_check = 1; @@ -1125,11 +1126,12 @@ int ndl_check_bound2(int index,UINT *d2) INLINE int ndl_hash_value(UINT *d) { int i; - int r; + UINT r; r = 0; for ( i = 0; i < nd_wpd; i++ ) - r = ((r<<16)+d[i])%REDTAB_LEN; + r = (r*1511+d[i]); + r %= REDTAB_LEN; return r; } @@ -1216,6 +1218,7 @@ ND nd_add(int mod,ND p1,ND p2) ND r; NM m1,m2,mr0,mr,s; + Nnd_add++; if ( !p1 ) return p2; else if ( !p2 ) return p1; else if ( mod == -1 ) return nd_add_sf(p1,p2); @@ -2081,6 +2084,7 @@ NODE nd_gb(int m,int ishomo,int checkonly,int gensyz,i P cont; LIST list; + Nnd_add = 0; g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i,gensyz); @@ -2170,7 +2174,7 @@ again: } } conv_ilist(nd_demand,0,g,indp); - if ( !checkonly && DP_Print ) { printf("nd_gb done.\n"); fflush(stdout); } + if ( !checkonly && DP_Print ) { printf("nd_gb done. Number of nd_add=%d\n",Nnd_add); fflush(stdout); } return g; } @@ -5796,27 +5800,6 @@ int nd_to_vect(int mod,UINT *s0,int n,ND d,UINT *r) return i; } -#if defined(__GNUC__) && SIZEOF_LONG==8 - -#define MOD128(a,c,m) ((a)=(((c)!=0||((a)>=(m)))?(((((U128)(c))<<64)+(a))%(m)):(a))) - -int nd_to_vect64(int mod,UINT *s0,int n,ND d,U64 *r) -{ - NM m; - UINT *t,*s; - int i; - - for ( i = 0; i < n; i++ ) r[i] = 0; - for ( i = 0, s = s0, m = BDY(d); m; m = NEXT(m) ) { - t = DL(m); - for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); - r[i] = (U64)CM(m); - } - for ( i = 0; !r[i]; i++ ); - return i; -} -#endif - int nd_to_vect_q(UINT *s0,int n,ND d,Z *r) { NM m; @@ -5910,18 +5893,17 @@ Z *nm_ind_pair_to_vect(int mod,UINT *s0,int n,NM_ind_p return r; } -IndArray nm_ind_pair_to_vect_compress(int trace,UINT *s0,int n,int *s0hash,NM_ind_pair pair) +IndArray nm_ind_pair_to_vect_compress(int trace,UINT *s0,int n,NM_ind_pair pair,int start) { NM m; NMV mr; - UINT *d,*t,*s; + UINT *d,*t,*s,*u; NDV p; unsigned char *ivc; unsigned short *ivs; UINT *v,*ivi,*s0v; - int i,j,len,prev,diff,cdiff,h; + int i,j,len,prev,diff,cdiff,h,st,ed,md,c; IndArray r; -struct oEGT eg0,eg1; m = pair->mul; d = DL(m); @@ -5933,14 +5915,20 @@ struct oEGT eg0,eg1; len = LEN(p); t = (UINT *)MALLOC(nd_wpd*sizeof(UINT)); v = (unsigned int *)MALLOC(len*sizeof(unsigned int)); -get_eg(&eg0); - for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { - ndl_add(d,DL(mr),t); - h = ndl_hash_value(t); - for ( ; h != s0hash[i] || !ndl_equal(t,s); s += nd_wpd, i++ ); - v[j] = i; + for ( prev = start, mr = BDY(p), j = 0; j < len; j++, NMV_ADV(mr) ) { + ndl_add(d,DL(mr),t); + st = prev; + ed = n; + while ( ed > st ) { + md = (st+ed)/2; + u = s0+md*nd_wpd; + c = DL_COMPARE(u,t); + if ( c == 0 ) break; + else if ( c > 0 ) st = md; + else ed = md; + } + prev = v[j] = md; } -get_eg(&eg1); add_eg(&eg_search,&eg0,&eg1); r = (IndArray)MALLOC(sizeof(struct oIndArray)); r->head = v[0]; diff = 0; @@ -5983,7 +5971,7 @@ void expand_array(Z *svect,Z *cvect,int n) if ( svect[i] ) svect[i] = cvect[j++]; } -#if 1 +#if 0 int ndv_reduce_vect_q(Z *svect,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev,nz; @@ -6063,6 +6051,7 @@ int ndv_reduce_vect_q(Z *svect,int trace,int col,IndAr return maxrs; } #else + /* direct mpz version */ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { @@ -6105,8 +6094,12 @@ int ndv_reduce_vect_q(Z *svect0,int trace,int col,IndA mpz_div(cs,svect[k],gcd); mpz_div(cr,BDY(CZ(mr)),gcd); mpz_neg(cs,cs); - for ( j = 0; j < col; j++ ) - mpz_mul(svect[j],svect[j],cr); + if ( MUNIMPZ(cr) ) + for ( j = 0; j < col; j++ ) mpz_neg(svect[j],svect[j]); + else if ( !UNIMPZ(cr) ) + for ( j = 0; j < col; j++ ) { + if ( mpz_sgn(svect[j]) ) mpz_mul(svect[j],svect[j],cr); + } mpz_set_ui(svect[k],0); prev = k; switch ( ivect->width ) { @@ -6220,78 +6213,6 @@ int ndv_reduce_vect(int m,UINT *svect,int col,IndArray return maxrs; } -#if defined(__GNUC__) && SIZEOF_LONG==8 - -int ndv_reduce_vect64(int m,U64 *svect,U64 *cvect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) -{ - int i,j,k,len,pos,prev; - U64 a,c,c1,c2; - IndArray ivect; - unsigned char *ivc; - unsigned short *ivs; - unsigned int *ivi; - NDV redv; - NMV mr; - NODE rp; - int maxrs; - - for ( i = 0; i < col; i++ ) cvect[i] = 0; - maxrs = 0; - for ( i = 0; i < nred; i++ ) { - ivect = imat[i]; - k = ivect->head; - a = svect[k]; c = cvect[k]; - MOD128(a,c,m); - svect[k] = a; cvect[k] = 0; - if ( (c = svect[k]) != 0 ) { - maxrs = MAX(maxrs,rp0[i]->sugar); - c = m-c; redv = nd_ps[rp0[i]->index]; - len = LEN(redv); mr = BDY(redv); - svect[k] = 0; prev = k; - switch ( ivect->width ) { - case 1: - ivc = ivect->index.c; - for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { - pos = prev+ivc[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } - } - break; - case 2: - ivs = ivect->index.s; - for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { - pos = prev+ivs[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } - } - break; - case 4: - ivi = ivect->index.i; - for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { - pos = prev+ivi[j]; c1 = CM(mr); prev = pos; - if ( c1 ) { - c2 = svect[pos]+c1*c; - if ( c2 < svect[pos] ) cvect[pos]++; - svect[pos] = c2; - } - } - break; - } - } - } - for ( i = 0; i < col; i++ ) { - a = svect[i]; c = cvect[i]; MOD128(a,c,m); svect[i] = a; - } - return maxrs; -} -#endif - int ndv_reduce_vect_sf(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; @@ -6549,36 +6470,6 @@ NDV vect_to_ndv(UINT *vect,int spcol,int col,int *rhea } } -#if defined(__GNUC__) && SIZEOF_LONG==8 -NDV vect64_to_ndv(U64 *vect,int spcol,int col,int *rhead,UINT *s0vect) -{ - int j,k,len; - UINT *p; - UINT c; - NDV r; - NMV mr0,mr; - - for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; - if ( !len ) return 0; - else { - mr0 = (NMV)MALLOC_ATOMIC_IGNORE_OFF_PAGE(nmv_adv*len); -#if 0 - ndv_alloc += nmv_adv*len; -#endif - mr = mr0; - p = s0vect; - for ( j = k = 0; j < col; j++, p += nd_wpd ) - if ( !rhead[j] ) { - if ( (c = (UINT)vect[k++]) != 0 ) { - ndl_copy(p,DL(mr)); CM(mr) = c; NMV_ADV(mr); - } - } - MKNDV(nd_nvar,mr0,len,r); - return r; - } -} -#endif - NDV vect_to_ndv_2(unsigned long *vect,int col,UINT *s0vect) { int j,k,len; @@ -6788,9 +6679,12 @@ NODE nd_f4(int m,int checkonly,int **indp) PGeoBucket bucket; struct oEGT eg0,eg1,eg_f4; Z i1,i2,sugarq; + + init_eg(&f4_symb); init_eg(&f4_conv); init_eg(&f4_conv); init_eg(&f4_elim1); init_eg(&f4_elim2); #if 0 ndv_alloc = 0; #endif + Nf4_red=0; g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i,0); @@ -6832,7 +6726,7 @@ NODE nd_f4(int m,int checkonly,int **indp) d = nd_reconstruct(0,d); continue; } - get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); + get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); add_eg(&f4_symb,&eg0,&eg1); if ( DP_Print ) fprintf(asir_out,"sugar=%d,symb=%.3fsec,", sugar,eg_f4.exectime); @@ -6881,6 +6775,11 @@ NODE nd_f4(int m,int checkonly,int **indp) #if 0 fprintf(asir_out,"ndv_alloc=%d\n",ndv_alloc); #endif + if ( DP_Print ) { + fprintf(asir_out,"number of red=%d,",Nf4_red); + fprintf(asir_out,"symb=%.3fsec,conv=%.3fsec,elim1=%.3fsec,elim2=%.3fsec\n", + f4_symb.exectime,f4_conv.exectime,f4_elim1.exectime,f4_elim2.exectime); + } conv_ilist(nd_demand,0,g,indp); return g; } @@ -7104,7 +7003,6 @@ NODE nd_f4_red_2(ND_pairs sp0,UINT *s0vect,int col,NOD unsigned long *v; get_eg(&eg0); -init_eg(&eg_search); for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); nred = length(rp0); mat = alloc_matrix(nsp,col); @@ -7159,18 +7057,18 @@ init_eg(&eg_search); NODE nd_f4_red(int m,ND_pairs sp0,int trace,UINT *s0vect,int col,NODE rp0,ND_pairs *nz) { IndArray *imat; - int nsp,nred,i; + int nsp,nred,i,start; int *rhead; NODE r0,rp; ND_pairs sp; NM_ind_pair *rvect; UINT *s; int *s0hash; + struct oEGT eg0,eg1,eg_conv; if ( m == 2 && nd_rref2 ) return nd_f4_red_2(sp0,s0vect,col,rp0,nz); -init_eg(&eg_search); for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); nred = length(rp0); imat = (IndArray *)MALLOC(nred*sizeof(IndArray)); @@ -7178,20 +7076,25 @@ init_eg(&eg_search); for ( i = 0; i < col; i++ ) rhead[i] = 0; /* construction of index arrays */ + get_eg(&eg0); if ( DP_Print ) { - fprintf(asir_out,"%dx%d,",nsp+nred,col); + fprintf(asir_out,"%dx%d,",nsp+nred,col); + fflush(asir_out); } rvect = (NM_ind_pair *)MALLOC(nred*sizeof(NM_ind_pair)); - s0hash = (int *)MALLOC(col*sizeof(int)); - for ( i = 0, s = s0vect; i < col; i++, s += nd_wpd ) - s0hash[i] = ndl_hash_value(s); - for ( rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { + for ( start = 0, rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { rvect[i] = (NM_ind_pair)BDY(rp); - imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,s0hash,rvect[i]); + imat[i] = nm_ind_pair_to_vect_compress(trace,s0vect,col,rvect[i],start); rhead[imat[i]->head] = 1; + start = imat[i]->head; } + get_eg(&eg1); init_eg(&eg_conv); add_eg(&eg_conv,&eg0,&eg1); add_eg(&f4_conv,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"conv=%.3fsec,",eg_conv.exectime); + fflush(asir_out); + } if ( m > 0 ) -#if defined(__GNUC__) && SIZEOF_LONG==8 +#if SIZEOF_LONG==8 r0 = nd_f4_red_mod64_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); #else r0 = nd_f4_red_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred,nz); @@ -7202,9 +7105,6 @@ init_eg(&eg_search); r0 = nd_f4_red_lf_main(m,sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); else r0 = nd_f4_red_q_main(sp0,nsp,trace,s0vect,col,rvect,rhead,imat,nred); -#if 0 - if ( DP_Print ) print_eg("search",&eg_search); -#endif return r0; } @@ -7296,92 +7196,7 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s return r0; } -#if defined(__GNUC__) && SIZEOF_LONG==8 -/* for Fp, 2^15=index.c); - - /* elimination (2nd step) */ - colstat = (int *)MALLOC(spcol*sizeof(int)); - rank = nd_gauss_elim_mod64(spmat,spsugar,spactive,sprow,spcol,m,colstat); - r0 = 0; - for ( i = 0; i < rank; i++ ) { - NEXTNODE(r0,r); BDY(r) = - (pointer)vect64_to_ndv(spmat[i],spcol,col,rhead,s0vect); - SG((NDV)BDY(r)) = spsugar[i]; - GCFREE(spmat[i]); - } - if ( r0 ) NEXT(r) = 0; - - for ( ; i < sprow; i++ ) GCFREE(spmat[i]); - get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); - init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); - if ( DP_Print ) { - fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); - fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", - nsp,nred,sprow,spcol,rank); - fprintf(asir_out,"%.3fsec,",eg_f4.exectime); - } - if ( nz ) { - for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; - if ( rank > 0 ) { - NEXT(spactive[rank-1]) = 0; - *nz = spactive[0]; - } else - *nz = 0; - } - return r0; -} -#endif - /* for small finite fields */ NODE nd_f4_red_sf_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, @@ -7793,85 +7608,7 @@ int nd_gauss_elim_mod(UINT **mat0,int *sugar,ND_pairs return rank; } -#if defined(__GNUC__) && SIZEOF_LONG==8 -int nd_gauss_elim_mod64(U64 **mat,int *sugar,ND_pairs *spactive,int row,int col,int md,int *colstat) -{ - int i,j,k,l,rank,s; - U64 inv; - U64 a; - UINT c; - U64 *t,*pivot,*pk; - UINT *ck; - UINT **cmat; - UINT *ct; - ND_pairs pair; - - cmat = (UINT **)MALLOC(row*sizeof(UINT *)); - for ( i = 0; i < row; i++ ) { - cmat[i] = MALLOC_ATOMIC(col*sizeof(UINT)); - bzero(cmat[i],col*sizeof(UINT)); - } - - for ( rank = 0, j = 0; j < col; j++ ) { - for ( i = rank; i < row; i++ ) { - a = mat[i][j]; c = cmat[i][j]; - MOD128(a,c,md); - mat[i][j] = a; cmat[i][j] = 0; - } - for ( i = rank; i < row; i++ ) - if ( mat[i][j] ) - break; - if ( i == row ) { - colstat[j] = 0; - continue; - } else - colstat[j] = 1; - if ( i != rank ) { - t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; - ct = cmat[i]; cmat[i] = cmat[rank]; cmat[rank] = ct; - s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; - if ( spactive ) { - pair = spactive[i]; spactive[i] = spactive[rank]; - spactive[rank] = pair; - } - } - /* column j is normalized */ - s = sugar[rank]; - inv = invm((UINT)mat[rank][j],md); - /* normalize pivot row */ - for ( k = j, pk = mat[rank]+j, ck = cmat[rank]+j; k < col; k++, pk++, ck++ ) { - a = *pk; c = *ck; MOD128(a,c,md); *pk = (a*inv)%md; *ck = 0; - } - for ( i = rank+1; i < row; i++ ) { - if ( (a = mat[i][j]) != 0 ) { - sugar[i] = MAX(sugar[i],s); - red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[rank]+j,(int)(md-a),col-j); - } - } - rank++; - } - for ( j = col-1, l = rank-1; j >= 0; j-- ) - if ( colstat[j] ) { - for ( k = j, pk = mat[l]+j, ck = cmat[l]+j; k < col; k++, pk++, ck++ ) { - a = *pk; c = *ck; MOD128(a,c,md); *pk = a; *ck = 0; - } - s = sugar[l]; - for ( i = 0; i < l; i++ ) { - a = mat[i][j]; c = cmat[i][j]; MOD128(a,c,md); mat[i][j] = a; cmat[i][j] = 0; - if ( a ) { - sugar[i] = MAX(sugar[i],s); - red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[l]+j,(int)(md-a),col-j); - } - } - l--; - } - for ( i = 0; i < row; i++ ) GCFREE(cmat[i]); - GCFREE(cmat); - return rank; -} -#endif - int nd_gauss_elim_sf(UINT **mat0,int *sugar,int row,int col,int md,int *colstat) { int i,j,k,l,inv,a,rank,s; @@ -8482,7 +8219,7 @@ P ndc_div(int mod,union oNDC a,union oNDC b) int inv,t; if ( mod == -1 ) c.m = _mulsf(a.m,_invsf(b.m)); - else if ( mod == -2 ) divlf(a.gz,b.gz,&c.gz); + else if ( mod == -2 ) divlf(a.z,b.z,&c.z); else if ( mod ) { inv = invm(b.m,mod); DMAR(a.m,inv,0,mod,t); c.m = t; @@ -8502,7 +8239,7 @@ P ndctop(int mod,union oNDC c) if ( mod == -1 ) { e = IFTOF(c.m); MKGFS(e,gfs); return (P)gfs; } else if ( mod == -2 ) { - q = c.gz; return (P)q; + q = c.z; return (P)q; } else if ( mod > 0 ) { STOZ(c.m,q); return (P)q; } else @@ -9264,4 +9001,290 @@ NODE nd_f4_lf_trace_main(int m,int **indp) conv_ilist(nd_demand,1,g,indp); return g; } + +#if SIZEOF_LONG==8 + +NDV vect64_to_ndv(mp_limb_t *vect,int spcol,int col,int *rhead,UINT *s0vect) +{ + int j,k,len; + UINT *p; + UINT c; + NDV r; + NMV mr0,mr; + + for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; + if ( !len ) return 0; + else { + mr0 = (NMV)MALLOC_ATOMIC_IGNORE_OFF_PAGE(nmv_adv*len); +#if 0 + ndv_alloc += nmv_adv*len; +#endif + mr = mr0; + p = s0vect; + for ( j = k = 0; j < col; j++, p += nd_wpd ) + if ( !rhead[j] ) { + if ( (c = (UINT)vect[k++]) != 0 ) { + ndl_copy(p,DL(mr)); CM(mr) = c; NMV_ADV(mr); + } + } + MKNDV(nd_nvar,mr0,len,r); + return r; + } +} + +int nd_to_vect64(int mod,UINT *s0,int n,ND d,mp_limb_t *r) +{ + NM m; + UINT *t,*s,*u; + int i,st,ed,md,prev,c; + + for ( i = 0; i < n; i++ ) r[i] = 0; + prev = 0; + for ( i = 0, m = BDY(d); m; m = NEXT(m) ) { + t = DL(m); + st = prev; + ed = n; + while ( ed > st ) { + md = (st+ed)/2; + u = s0+md*nd_wpd; + c = DL_COMPARE(u,t); + if ( c == 0 ) break; + else if ( c > 0 ) st = md; + else ed = md; + } + r[md] = (mp_limb_t)CM(m); + prev = md; + } + for ( i = 0; !r[i]; i++ ); + return i; +} + +#define MOD128(a,c,m) ((a)=(((c)!=0||((a)>=(m)))?(((((U128)(c))<<64)+(a))%(m)):(a))) + +int ndv_reduce_vect64(int m,mp_limb_t *svect,mp_limb_t *cvect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) +{ + int i,j,k,len,pos,prev; + mp_limb_t a,c,c1,c2; + IndArray ivect; + unsigned char *ivc; + unsigned short *ivs; + unsigned int *ivi; + NDV redv; + NMV mr; + NODE rp; + int maxrs; + + for ( i = 0; i < col; i++ ) cvect[i] = 0; + maxrs = 0; + for ( i = 0; i < nred; i++ ) { + ivect = imat[i]; + k = ivect->head; + a = svect[k]; c = cvect[k]; + MOD128(a,c,m); + svect[k] = a; cvect[k] = 0; + if ( (c = svect[k]) != 0 ) { + Nf4_red++; + maxrs = MAX(maxrs,rp0[i]->sugar); + c = m-c; redv = nd_ps[rp0[i]->index]; + len = LEN(redv); mr = BDY(redv); + svect[k] = 0; prev = k; + switch ( ivect->width ) { + case 1: + ivc = ivect->index.c; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivc[j]; c1 = CM(mr); prev = pos; + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + break; + case 2: + ivs = ivect->index.s; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivs[j]; c1 = CM(mr); prev = pos; + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + break; + case 4: + ivi = ivect->index.i; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivi[j]; c1 = CM(mr); prev = pos; + c2 = svect[pos]+c1*c; + if ( c2 < svect[pos] ) cvect[pos]++; + svect[pos] = c2; + } + break; + } + } + } + for ( i = 0; i < col; i++ ) { + a = svect[i]; c = cvect[i]; MOD128(a,c,m); svect[i] = a; + } + return maxrs; +} + +/* for Fp, 2^15=index.c); + + /* elimination (2nd step) */ + colstat = (int *)MALLOC(spcol*sizeof(int)); + rank = nd_gauss_elim_mod64(spmat,spsugar,spactive,sprow,spcol,m,colstat); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + NEXTNODE(r0,r); BDY(r) = + (pointer)vect64_to_ndv(spmat[i],spcol,col,rhead,s0vect); + SG((NDV)BDY(r)) = spsugar[i]; + GCFREE(spmat[i]); + } + if ( r0 ) NEXT(r) = 0; + + for ( ; i < sprow; i++ ) GCFREE(spmat[i]); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); add_eg(&f4_elim2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); + if ( DP_Print ) { + fprintf(asir_out,"elim2=%.3fsec,",eg_f4_2.exectime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + nsp,nred,sprow,spcol,rank); + fprintf(asir_out,"%.3fsec,",eg_f4.exectime); + } + if ( nz ) { + for ( i = 0; i < rank-1; i++ ) NEXT(spactive[i]) = spactive[i+1]; + if ( rank > 0 ) { + NEXT(spactive[rank-1]) = 0; + *nz = spactive[0]; + } else + *nz = 0; + } + return r0; +} + +int nd_gauss_elim_mod64(mp_limb_t **mat,int *sugar,ND_pairs *spactive,int row,int col,int md,int *colstat) +{ + int i,j,k,l,rank,s; + mp_limb_t inv; + mp_limb_t a; + UINT c; + mp_limb_t *t,*pivot,*pk; + UINT *ck; + UINT **cmat; + UINT *ct; + ND_pairs pair; + + cmat = (UINT **)MALLOC(row*sizeof(UINT *)); + for ( i = 0; i < row; i++ ) { + cmat[i] = MALLOC_ATOMIC(col*sizeof(UINT)); + bzero(cmat[i],col*sizeof(UINT)); + } + + for ( rank = 0, j = 0; j < col; j++ ) { + for ( i = rank; i < row; i++ ) { + a = mat[i][j]; c = cmat[i][j]; + MOD128(a,c,md); + mat[i][j] = a; cmat[i][j] = 0; + } + for ( i = rank; i < row; i++ ) + if ( mat[i][j] ) + break; + if ( i == row ) { + colstat[j] = 0; + continue; + } else + colstat[j] = 1; + if ( i != rank ) { + t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; + ct = cmat[i]; cmat[i] = cmat[rank]; cmat[rank] = ct; + s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; + if ( spactive ) { + pair = spactive[i]; spactive[i] = spactive[rank]; + spactive[rank] = pair; + } + } + /* column j is normalized */ + s = sugar[rank]; + inv = invm((UINT)mat[rank][j],md); + /* normalize pivot row */ + for ( k = j, pk = mat[rank]+j, ck = cmat[rank]+j; k < col; k++, pk++, ck++ ) { + a = *pk; c = *ck; MOD128(a,c,md); *pk = (a*inv)%md; *ck = 0; + } + for ( i = rank+1; i < row; i++ ) { + if ( (a = mat[i][j]) != 0 ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[rank]+j,(int)(md-a),col-j); + Nf4_red++; + } + } + rank++; + } + for ( j = col-1, l = rank-1; j >= 0; j-- ) + if ( colstat[j] ) { + for ( k = j, pk = mat[l]+j, ck = cmat[l]+j; k < col; k++, pk++, ck++ ) { + a = *pk; c = *ck; MOD128(a,c,md); *pk = a; *ck = 0; + } + s = sugar[l]; + for ( i = 0; i < l; i++ ) { + a = mat[i][j]; c = cmat[i][j]; MOD128(a,c,md); mat[i][j] = a; cmat[i][j] = 0; + if ( a ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect64(md,mat[i]+j,cmat[i]+j,mat[l]+j,(int)(md-a),col-j); + Nf4_red++; + } + } + l--; + } + for ( i = 0; i < row; i++ ) GCFREE(cmat[i]); + GCFREE(cmat); + return rank; +} +#endif