=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2018/engine/Q.c,v retrieving revision 1.1 retrieving revision 1.7 diff -u -p -r1.1 -r1.7 --- OpenXM_contrib2/asir2018/engine/Q.c 2018/09/19 05:45:07 1.1 +++ OpenXM_contrib2/asir2018/engine/Q.c 2018/10/01 05:54:09 1.7 @@ -1,3 +1,4 @@ +/* $OpenXM: OpenXM_contrib2/asir2018/engine/Q.c,v 1.6 2018/10/01 05:49:06 noro Exp $ */ #include "ca.h" #include "gmp.h" #include "base.h" @@ -10,6 +11,10 @@ Z current_mod_lf; int current_mod_lf_size; gmp_randstate_t GMP_RAND; +#define F4_INTRAT_PERIOD 8 + +extern int DP_Print; + void isqrtz(Z a,Z *r); void bshiftz(Z a,int n,Z *r); @@ -31,6 +36,18 @@ void init_gmpq() gmp_randinit_default(GMP_RAND); } +void printexpr(VL,Obj); + +void pmat(Z **a,int row,int col) +{ + int i,j; + + for ( i = 0; i < row; i++, printf("\n") ) + for ( j = 0; j < col; j++, printf(" ") ) + printexpr(CO,(Obj)a[i][j]); + printf("\n"); +} + Z utoz(unsigned int u) { mpz_t z; @@ -161,6 +178,7 @@ void mulz(Z n1,Z n2,Z *nr) void muladdtoz(Z n1,Z n2,Z *nr) { +#if 0 Z t; if ( n1 && n2 ) { @@ -168,13 +186,21 @@ void muladdtoz(Z n1,Z n2,Z *nr) NEWZ(t); mpz_init(BDY(t)); *nr = t; } mpz_addmul(BDY(*nr),BDY(n1),BDY(n2)); - } + if ( !mpz_sgn(BDY(*nr)) ) + *nr = 0; + } +#else + Z t,s; + + mulz(n1,n2,&t); addz(*nr,t,&s); *nr = s; +#endif } /* nr += n1*u */ void mul1addtoz(Z n1,long u,Z *nr) { +#if 0 Z t; if ( n1 && u ) { @@ -185,7 +211,14 @@ void mul1addtoz(Z n1,long u,Z *nr) mpz_addmul_ui(BDY(*nr),BDY(n1),(unsigned long)u); else mpz_submul_ui(BDY(*nr),BDY(n1),(unsigned long)(-u)); + if ( !mpz_sgn(BDY(*nr)) ) + *nr = 0; } +#else + Z t,s; + + mul1z(n1,u,&t); addz(*nr,t,&s); *nr = s; +#endif } void mul1z(Z n1,long n2,Z *nr) @@ -327,7 +360,7 @@ void pwrz(Z n1,Z n,Z *nr) } else if ( !smallz(n) ) { error("exponent too big."); *nr = 0; } else if ( n1->z && mpz_sgn(BDY((Z)n))>0 ) { - mpz_init(z); mpz_pow_ui(z,BDY(n1),QTOS(n)); MPZTOZ(z,*nr); + mpz_init(z); mpz_pow_ui(z,BDY(n1),ZTOS(n)); MPZTOZ(z,*nr); } else { MPZTOMPQ(BDY(n1),q); MPQTOQ(q,r); pwrq(r,(Q)n,&p); *nr = (Z)p; @@ -427,7 +460,7 @@ void gcdvz_estimate(VECT v,Z *q) else addz(s,b[i],&u); s = u; } - for ( i = 0, t = 0; i < n; i++ ) { + for ( t = 0; i < n; i++ ) { if ( b[i] && mpz_sgn(BDY(b[i]))<0 ) subz(t,b[i],&u); else addz(t,b[i],&u); t = u; @@ -435,6 +468,49 @@ void gcdvz_estimate(VECT v,Z *q) gcdz(s,t,q); } +void gcdv_mpz_estimate(mpz_t g,mpz_t *b,int n) +{ + int m,m2,i,j; + mpz_t s,t; + + mpz_init(g); + for ( i = 0, m = 0; i < n; i++ ) + if ( mpz_sgn(b[i]) ) m++; + if ( !m ) { + mpz_set_ui(g,0); + return; + } + if ( m == 1 ) { + for ( i = 0, m = 0; i < n; i++ ) + if ( mpz_sgn(b[i]) ) break; + if ( mpz_sgn(b[i])<0 ) mpz_neg(g,b[i]); + else mpz_set(g,b[i]); + return ; + } + m2 = m/2; + mpz_init_set_ui(s,0); + for ( i = j = 0; j < m2; i++ ) { + if ( mpz_sgn(b[i]) ) { + if ( mpz_sgn(b[i])<0 ) + mpz_sub(s,s,b[i]); + else + mpz_add(s,s,b[i]); + j++; + } + } + mpz_init_set_ui(t,0); + for ( ; i < n; i++ ) { + if ( mpz_sgn(b[i]) ) { + if ( mpz_sgn(b[i])<0 ) + mpz_sub(t,t,b[i]); + else + mpz_add(t,t,b[i]); + } + } + mpz_gcd(g,s,t); +} + + void factorialz(unsigned int n,Z *nr) { mpz_t a; @@ -579,7 +655,7 @@ void pwrq(Q n1,Q n,Q *nr) } else if ( !smallz((Z)n) ) { error("exponent too big."); *nr = 0; } else { - e = QTOS(n); + e = ZTOS(n); if ( e < 0 ) { e = -e; if ( n1->z ) { @@ -636,8 +712,8 @@ void mkbc(int n,Z *t) Z c,d,iq; for ( t[0] = ONE, i = 1; i <= n/2; i++ ) { - STOQ(n-i+1,c); mulz(t[i-1],c,&d); - STOQ(i,iq); divsz(d,iq,&t[i]); + STOZ(n-i+1,c); mulz(t[i-1],c,&d); + STOZ(i,iq); divsz(d,iq,&t[i]); } for ( ; i <= n; i++ ) t[i] = t[n-i]; @@ -815,10 +891,6 @@ unsigned int remqi(Q a,unsigned int mod) return c; } -extern int DP_Print; - -#define F4_INTRAT_PERIOD 8 - int generic_gauss_elim(MAT mat,MAT *nm,Z *dn,int **rindp,int **cindp) { int **wmat; @@ -829,6 +901,9 @@ int generic_gauss_elim(MAT mat,MAT *nm,Z *dn,int **rin MAT r,crmat; int ret; +#if SIZEOF_LONG == 8 + return generic_gauss_elim64(mat,nm,dn,rindp,cindp); +#endif bmat = (Z **)mat->body; row = mat->row; col = mat->col; wmat = (int **)almat(row,col); @@ -1300,7 +1375,7 @@ void isqrtz(Z a,Z *r) else { k = z_bits((Q)a); /* a <= 2^k-1 */ bshiftz(ONE,-((k>>1)+(k&1)),&x); /* a <= x^2 */ - STOQ(2,two); + STOZ(2,two); while ( 1 ) { pwrz(x,two,&t); if ( cmpz(t,a) <= 0 ) { @@ -1411,19 +1486,20 @@ int generic_gauss_elim_hensel(MAT mat,MAT *nmmat,Z *dn int *rind,*cind; int count; int ret; - struct oEGT eg_mul,eg_inv,eg_intrat,eg_check,tmp0,tmp1; + struct oEGT eg_mul1,eg_mul2,tmp0,tmp1,tmp2; int period; int *wx,*ptr; int wxsize,nsize; Z wn; Z wq; +init_eg(&eg_mul1); init_eg(&eg_mul2); a0 = (Z **)mat->body; row = mat->row; col = mat->col; w = (int **)almat(row,col); for ( ind = 0; ; ind++ ) { md = get_lprime(ind); - STOQ(md,mdq); + STOZ(md,mdq); for ( i = 0; i < row; i++ ) for ( j = 0, ai = a0[i], wi = w[i]; j < col; j++ ) wi[j] = remqi((Q)ai[j],md); @@ -1468,40 +1544,54 @@ int generic_gauss_elim_hensel(MAT mat,MAT *nmmat,Z *dn MKMAT(xmat,rank,ri); x = (Z **)(xmat)->body; MKMAT(*nmmat,rank,ri); nm = (Z **)(*nmmat)->body; wc = (int **)almat(rank,ri); - for ( i = 0; i < rank; i++ ) - wc[i] = w[i]+rank; *rindp = rind = (int *)MALLOC_ATOMIC(rank*sizeof(int)); *cindp = cind = (int *)MALLOC_ATOMIC((ri)*sizeof(int)); period = F4_INTRAT_PERIOD; for ( q = ONE, count = 0; ; ) { + /* check Ax=B mod q */ if ( DP_Print > 3 ) fprintf(stderr,"o"); /* wc = b mod md */ for ( i = 0; i < rank; i++ ) - for ( j = 0, bi = b[i], wi = wc[i]; j < ri; j++ ) { + for ( j = 0, bi = b[i], wi = wc[i]; j < ri; j++ ) wi[j] = remqi((Q)bi[j],md); - if ( wi[j] && sgnz(bi[j]) < 0 ) - wi[j] = md-wi[j]; - } - /* wc = A^(-1)wc; wc is normalized */ - solve_by_lu_mod(w,rank,md,wc,ri,1); + /* wc = A^(-1)wc; wc is not normalized */ + solve_by_lu_mod(w,rank,md,wc,ri,0); /* x += q*wc */ +get_eg(&tmp0); for ( i = 0; i < rank; i++ ) for ( j = 0, wi = wc[i]; j < ri; j++ ) mul1addtoz(q,wi[j],&x[i][j]); - /* b =(A*wc+b)/md */ + /* b =(b-A*wc)/md */ +get_eg(&tmp1); add_eg(&eg_mul1,&tmp0,&tmp1); for ( i = 0; i < rank; i++ ) for ( j = 0; j < ri; j++ ) { - u = b[i][j]; - for ( k = 0; k < rank; k++ ) mul1addtoz(a[i][k],wc[k][j],&u); + mpz_t uz; + + if ( b[i][j] ) + mpz_init_set(uz,BDY(b[i][j])); + else + mpz_init_set_ui(uz,0); + for ( k = 0; k < rank; k++ ) { + if ( a[i][k] && wc[k][j] ) { + if ( wc[k][j] < 0 ) + mpz_addmul_ui(uz,BDY(a[i][k]),-wc[k][j]); + else + mpz_submul_ui(uz,BDY(a[i][k]),wc[k][j]); + } + } + MPZTOZ(uz,u); divsz(u,mdq,&b[i][j]); } +get_eg(&tmp2); add_eg(&eg_mul2,&tmp1,&tmp2); count++; /* q = q*md */ mulz(q,mdq,&u); q = u; if ( count == period ) { ret = intmtoratm(xmat,q,*nmmat,dn); if ( ret ) { + print_eg("MUL1",&eg_mul1); + print_eg("MUL2",&eg_mul2); for ( j = k = l = 0; j < col; j++ ) if ( cinfo[j] ) rind[k++] = j; @@ -1557,7 +1647,7 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT w = (int **)almat(row,col); for ( ind = 0; ; ind++ ) { md = get_lprime(ind); - STOQ(md,mdq); + STOZ(md,mdq); for ( i = 0; i < row; i++ ) for ( j = 0, ai = a0[i], wi = w[i]; j < col; j++ ) wi[j] = remqi((Q)ai[j],md); @@ -1583,14 +1673,14 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT a = (Z **)almat_pointer(rank,rank); /* lhs mat */ MKMAT(bmat,rank,col-rank); b = (Z **)bmat->body; /* lhs mat */ for ( j = li = ri = 0; j < col; j++ ) - if ( cinfo[j] ) { + if ( cinfo[j] > 0 ) { /* the column is in lhs */ for ( i = 0; i < rank; i++ ) { w[i][li] = w[i][j]; a[i][li] = a0[rinfo[i]][j]; } li++; - } else { + } else if ( !cinfo[j] ) { /* the column is in rhs */ for ( i = 0; i < rank; i++ ) b[i][ri] = a0[rinfo[i]][j]; @@ -1613,8 +1703,6 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT MKMAT(xmat,rank,ri); x = (Z **)(xmat)->body; MKMAT(*nmmat,rank,ri); nm = (Z **)(*nmmat)->body; wc = (int **)almat(rank,ri); - for ( i = 0; i < rank; i++ ) - wc[i] = w[i]+rank; *rindp = rind = (int *)MALLOC_ATOMIC(rank*sizeof(int)); *cindp = cind = (int *)MALLOC_ATOMIC((ri)*sizeof(int)); @@ -1624,21 +1712,31 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT fprintf(stderr,"o"); /* wc = b mod md */ for ( i = 0; i < rank; i++ ) - for ( j = 0, bi = b[i], wi = wc[i]; j < ri; j++ ) { + for ( j = 0, bi = b[i], wi = wc[i]; j < ri; j++ ) wi[j] = remqi((Q)bi[j],md); - if ( wi[j] && sgnz(bi[j]) < 0 ) - wi[j] = md-wi[j]; - } /* wc = A^(-1)wc; wc is normalized */ solve_by_lu_mod(w,rank,md,wc,ri,1); /* x += q*wc */ for ( i = 0; i < rank; i++ ) for ( j = 0, wi = wc[i]; j < ri; j++ ) mul1addtoz(q,wi[j],&x[i][j]); - /* b =(A*wc+b)/md */ + /* b =(b-A*wc)/md */ for ( i = 0; i < rank; i++ ) for ( j = 0; j < ri; j++ ) { - u = b[i][j]; - for ( k = 0; k < rank; k++ ) mul1addtoz(a[i][k],wc[k][j],&u); + mpz_t uz; + + if ( b[i][j] ) + mpz_init_set(uz,BDY(b[i][j])); + else + mpz_init_set_ui(uz,0); + for ( k = 0; k < rank; k++ ) { + if ( a[i][k] && wc[k][j] ) { + if ( wc[k][j] < 0 ) + mpz_addmul_ui(uz,BDY(a[i][k]),-wc[k][j]); + else + mpz_submul_ui(uz,BDY(a[i][k]),wc[k][j]); + } + } + MPZTOZ(uz,u); divsz(u,mdq,&b[i][j]); } count++; @@ -1669,3 +1767,126 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT } } } + +#if SIZEOF_LONG == 8 +mp_limb_t remqi64(Q a,mp_limb_t mod) +{ + mp_limb_t c,nm,dn; + mpz_t r; + + if ( !a ) return 0; + else if ( a->z ) { + mpz_init(r); + c = mpz_fdiv_r_ui(r,BDY((Z)a),mod); + } else { + mpz_init(r); + nm = mpz_fdiv_r_ui(r,mpq_numref(BDY(a)),mod); + dn = mpz_fdiv_r_ui(r,mpq_denref(BDY(a)),mod); + dn = invmod64(dn,mod); + c = mulmod64(nm,dn,mod); + } + return c; +} + +int generic_gauss_elim_mod64(mp_limb_t **mat,int row,int col,mp_limb_t md,int *colstat); +mp_limb_t get_lprime64(int ind); + +int generic_gauss_elim64(MAT mat,MAT *nm,Z *dn,int **rindp,int **cindp) +{ + mp_limb_t **wmat; + mp_limb_t *wmi; + mp_limb_t md,inv,t,t1; + Z **bmat,**tmat,*bmi,*tmi; + Z q,m1,m2,m3,s,u; + int *colstat,*wcolstat,*rind,*cind; + int row,col,ind,i,j,k,l,rank,rank0; + MAT r,crmat; + int ret; + + bmat = (Z **)mat->body; + row = mat->row; col = mat->col; + wmat = (mp_limb_t **)almat64(row,col); + colstat = (int *)MALLOC_ATOMIC(col*sizeof(int)); + wcolstat = (int *)MALLOC_ATOMIC(col*sizeof(int)); + for ( ind = 0; ; ind++ ) { + if ( DP_Print ) { + fprintf(asir_out,"."); fflush(asir_out); + } + md = get_lprime64(ind); + for ( i = 0; i < row; i++ ) + for ( j = 0, bmi = bmat[i], wmi = wmat[i]; j < col; j++ ) + wmi[j] = remqi64((Q)bmi[j],md); + rank = generic_gauss_elim_mod64(wmat,row,col,md,wcolstat); + if ( !ind ) { +RESET: + UTOZ(md,m1); + rank0 = rank; + bcopy(wcolstat,colstat,col*sizeof(int)); + MKMAT(crmat,rank,col-rank); + MKMAT(r,rank,col-rank); *nm = r; + tmat = (Z **)crmat->body; + for ( i = 0; i < rank; i++ ) + for ( j = k = 0, tmi = tmat[i], wmi = wmat[i]; j < col; j++ ) + if ( !colstat[j] ) { UTOZ(wmi[j],tmi[k]); k++; } + } else { + if ( rank < rank0 ) { + if ( DP_Print ) { + fprintf(asir_out,"lower rank matrix; continuing...\n"); + fflush(asir_out); + } + continue; + } else if ( rank > rank0 ) { + if ( DP_Print ) { + fprintf(asir_out,"higher rank matrix; resetting...\n"); + fflush(asir_out); + } + goto RESET; + } else { + for ( j = 0; (j= t ) t = wmi[j]-t; + else t = md-(t-wmi[j]); + t1 = mulmod64(t,inv,md); + UTOZ(t1,u); mulz(m1,u,&s); + addz(tmi[k],s,&u); tmi[k] = u; + } else if ( wmi[j] ) { + /* f3 = m1*(m1 mod m2)^(-1)*f2 */ + t = mulmod64(wmi[j],inv,md); + UTOZ(t,u); mulz(m1,u,&s); tmi[k] = s; + } + k++; + } + m1 = m3; + if ( ind % F4_INTRAT_PERIOD ) + ret = 0; + else + ret = intmtoratm(crmat,m1,*nm,dn); + if ( ret ) { + *rindp = rind = (int *)MALLOC_ATOMIC(rank*sizeof(int)); + *cindp = cind = (int *)MALLOC_ATOMIC((col-rank)*sizeof(int)); + for ( j = k = l = 0; j < col; j++ ) + if ( colstat[j] ) rind[k++] = j; + else cind[l++] = j; + if ( gensolve_check(mat,*nm,*dn,rind,cind) ) + return rank; + } + } + } +} +#endif