=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2018/engine/Q.c,v retrieving revision 1.7 retrieving revision 1.19 diff -u -p -r1.7 -r1.19 --- OpenXM_contrib2/asir2018/engine/Q.c 2018/10/01 05:54:09 1.7 +++ OpenXM_contrib2/asir2018/engine/Q.c 2020/10/06 06:31:19 1.19 @@ -1,22 +1,26 @@ -/* $OpenXM: OpenXM_contrib2/asir2018/engine/Q.c,v 1.6 2018/10/01 05:49:06 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2018/engine/Q.c,v 1.18 2020/10/04 03:14:09 noro Exp $ */ #include "ca.h" #include "gmp.h" #include "base.h" #include "inline.h" mpz_t ONEMPZ; -Z ONE; +extern Z ONE; int lf_lazy; Z current_mod_lf; int current_mod_lf_size; gmp_randstate_t GMP_RAND; -#define F4_INTRAT_PERIOD 8 +#define F4_INTRAT_PERIOD 4 extern int DP_Print; void isqrtz(Z a,Z *r); void bshiftz(Z a,int n,Z *r); +int mpz_inttorat(mpz_t c,mpz_t m,mpz_t b,mpz_t nm,mpz_t dn); +int generic_gauss_elim_hensel64(MAT mat,MAT *nmmat,Z *dn,int **rindp,int **cindp,DP *mb); +int find_lhs_and_lu_mod64(mp_limb_t **a,int row,int col,mp_limb_t md,int **rinfo,int **cinfo); +void solve_by_lu_mod64(mp_limb_t **a,int n,mp_limb_t md,mp_limb_signed_t **b,int l,int normalize); void *gc_realloc(void *p,size_t osize,size_t nsize) { @@ -30,7 +34,7 @@ void gc_free(void *p,size_t size) void init_gmpq() { - mp_set_memory_functions(Risa_GC_malloc_atomic,gc_realloc,gc_free); + mp_set_memory_functions(Risa_GC_malloc,gc_realloc,gc_free); mpz_init(ONEMPZ); mpz_set_ui(ONEMPZ,1); MPZTOZ(ONEMPZ,ONE); gmp_randinit_default(GMP_RAND); @@ -388,6 +392,8 @@ int cmpz(Z q1,Z q2) else if ( sgn < 0 ) return -1; else return 0; } + /* XXX */ + return 0; } void gcdz(Z n1,Z n2,Z *nq) @@ -515,7 +521,7 @@ void factorialz(unsigned int n,Z *nr) { mpz_t a; mpz_init(a); - mpz_fac_ui(a,n); + mpz_fac_ui(a,(unsigned long)n); MPZTOZ(a,*nr); } @@ -558,7 +564,7 @@ void subq(Q n1,Q n2,Q *nr) if ( !n1 ) { if ( !n2 ) *nr = 0; - else if ( n1->z ) chsgnz((Z)n1,(Z *)nr); + else if ( n2->z ) chsgnz((Z)n2,(Z *)nr); else { mpq_init(t); mpq_neg(t,BDY(n2)); MPQTOQ(t,*nr); } @@ -762,12 +768,15 @@ void lgp(P p,Z *g,Z *l); void ptozp(P p,int sgn,Q *c,P *pr) { - Z nm,dn; + Z nm,dn,nm1; if ( !p ) { *c = 0; *pr = 0; } else { lgp(p,&nm,&dn); + if ( sgn < 0 ) { + chsgnz(nm,&nm1); nm = nm1; + } divz(nm,dn,(Z *)c); divsp(CO,p,(P)*c,pr); } @@ -900,9 +909,14 @@ int generic_gauss_elim(MAT mat,MAT *nm,Z *dn,int **rin int row,col,ind,md,i,j,k,l,t,t1,rank,rank0,inv; MAT r,crmat; int ret; + MAT mat2,nm2; + Z dn2; + int *rind2,*cind2; + int ret2; #if SIZEOF_LONG == 8 - return generic_gauss_elim64(mat,nm,dn,rindp,cindp); + ret = generic_gauss_elim64(mat,nm,dn,rindp,cindp); + return ret; #endif bmat = (Z **)mat->body; row = mat->row; col = mat->col; @@ -1180,6 +1194,40 @@ int generic_gauss_elim_direct(MAT mat,MAT *nm,Z *dn,in return rank; } +int mpz_intmtoratm(mpz_t **mat,int row,int col,mpz_t md,mpz_t **nm,mpz_t dn) +{ + mpz_t t,s,b,u,nm1,dn1; + int i,j,k,l,ret; + mpz_t *mi,*nmk; + + if ( UNIMPZ(md) ) + return 0; + mpz_init(t); mpz_init(s); mpz_init(b); mpz_init(u); + mpz_init(nm1); mpz_init(dn1); + mpz_fdiv_q_2exp(t,md,1); mpz_sqrt(s,t); mpz_fdiv_q_2exp(b,s,64); + if ( !mpz_sgn(b) ) mpz_set_ui(b,1); + mpz_set_ui(dn,1); + for ( i = 0; i < row; i++ ) + for ( j = 0, mi = mat[i]; j < col; j++ ) + if ( mpz_sgn(mi[j]) ) { + mpz_mul(s,mi[j],dn); + mpz_mod(u,s,md); + ret = mpz_inttorat(u,md,b,nm1,dn1); + if ( !ret ) + return 0; + else { + if ( !UNIMPZ(dn1) ) { + for ( k = 0; k < i; k++ ) + for ( l = 0, nmk = nm[k]; l < col; l++ ) mpz_mul(nmk[l],nmk[l],dn1); + for ( l = 0, nmk = nm[i]; l < j; l++ ) mpz_mul(nmk[l],nmk[l],dn1); + } + mpz_set(nm[i][j],nm1); + mpz_mul(dn,dn,dn1); + } + } + return 1; +} + int intmtoratm(MAT mat,Z md,MAT nm,Z *dn) { Z t,s,b,dn0,dn1,nm1,q,u,unm,udn,dmy; @@ -1280,9 +1328,45 @@ int intvtoratv(Z *v,int n,Z md,Z b,Z *nm,Z *dn) /* assuming 0 < c < m */ +int mpz_inttorat(mpz_t c,mpz_t m,mpz_t b,mpz_t nm,mpz_t dn) +{ + mpz_t u1,v1,u2,v2,r1,r2; + mpz_t q,t; + + mpz_init_set_ui(u1,0); mpz_init_set_ui(v1,1); + mpz_init_set(u2,m); mpz_init_set(v2,c); + mpz_init(q); mpz_init(t); mpz_init(r1); mpz_init(r2); + while ( mpz_cmp(v2,b) >= 0 ) { + /* r2 = u2-q*v2 */ + mpz_fdiv_qr(q,r2,u2,v2); + mpz_set(u2,v2); mpz_set(v2,r2); + /* r1 = u1-q*v1 */ + mpz_mul(t,q,v1); mpz_sub(r1,u1,t); + mpz_set(u1,v1); mpz_set(v1,r1); + } + if ( mpz_cmp(v1,b) >= 0 ) return 0; + else { + mpz_gcd(t,v1,v2); + if ( UNIMPZ(t) ) + mpz_set_ui(r1,0); + else { + /* v1 /= t, v2 /= t, t=c*v1-v2, r1=t%m */ + mpz_divexact(v1,v1,t); mpz_divexact(v2,v2,t); + mpz_mul(t,c,v1); mpz_sub(t,t,v2); mpz_mod(r1,t,m); + } + if ( mpz_sgn(r1) ) return 0; + if ( mpz_sgn(v1)<0 ) { + mpz_neg(dn,v1); mpz_neg(nm,v2); + } else { + mpz_set(dn,v1); mpz_set(nm,v2); + } + return 1; + } +} + int inttorat(Z c,Z m,Z b,Z *nmp,Z *dnp) { - Z qq,t,u1,v1,r1; + Z qq,t,s,r,u1,v1,r1; Z q,u2,v2,r2; u1 = 0; v1 = ONE; u2 = m; v2 = c; @@ -1292,17 +1376,59 @@ int inttorat(Z c,Z m,Z b,Z *nmp,Z *dnp) } if ( cmpz(v1,b) >= 0 ) return 0; else { - if ( mpz_sgn(BDY(v1))<0 ) { - chsgnz(v1,dnp); chsgnz(v2,nmp); + /* reduction and check */ + /* v2/v1 = u2/u1, c*u1-u2 = 0 mod m? */ + gcdz(v1,v2,&t); + if ( UNIZ(t) ) { + u1 = v1; u2 = v2; r = 0; } else { - *dnp = v1; *nmp = v2; + divsz(v1,t,&u1); divsz(v2,t,&u2); + mulz(c,u1,&t); subz(t,u2,&s); remz(s,m,&r); } + if ( r ) return 0; + if ( mpz_sgn(BDY(u1))<0 ) { + chsgnz(u1,dnp); chsgnz(u2,nmp); + } else { + *dnp = u1; *nmp = u2; + } return 1; } } extern int f4_nocheck; +int mpz_gensolve_check(MAT mat,mpz_t **nm,mpz_t dn,int rank,int clen,int *rind,int *cind) +{ + int row,col,i,j,k,l; + mpz_t t; + mpz_t *w; + Z *mati; + mpz_t *nmk; + + if ( f4_nocheck ) return 1; + row = mat->row; col = mat->col; + w = (mpz_t *)MALLOC(clen*sizeof(mpz_t)); + mpz_init(t); + for ( i = 0; i < clen; i++ ) mpz_init(w[i]); + for ( i = 0; i < row; i++ ) { + mati = (Z *)mat->body[i]; + for ( l = 0; l < clen; l++ ) mpz_set_ui(w[l],0); + for ( k = 0; k < rank; k++ ) + for ( l = 0, nmk = (mpz_t *)nm[k]; l < clen; l++ ) { + /* w[l] += mati[rind[k]]*nmk[k] */ + if ( mati[rind[k]] ) mpz_addmul(w[l],BDY(mati[rind[k]]),nmk[l]); + } + for ( j = 0; j < clen; j++ ) { + if ( mati[cind[j]] ) mpz_mul(t,dn,BDY(mati[cind[j]])); + else mpz_set_ui(t,0); + if ( mpz_cmp(w[j],t) ) break; + } + if ( j != clen ) break; + } + if ( i != row ) return 0; + else return 1; +} + int gensolve_check(MAT mat,MAT nm,Z dn,int *rind,int *cind) { int row,col,rank,clen,i,j,k,l; @@ -1371,7 +1497,7 @@ void isqrtz(Z a,Z *r) Z two; if ( !a ) *r = 0; - else if ( UNIQ(a) ) *r = ONE; + else if ( UNIZ(a) ) *r = ONE; else { k = z_bits((Q)a); /* a <= 2^k-1 */ bshiftz(ONE,-((k>>1)+(k&1)),&x); /* a <= x^2 */ @@ -1493,6 +1619,9 @@ int generic_gauss_elim_hensel(MAT mat,MAT *nmmat,Z *dn Z wn; Z wq; +#if SIZEOF_LONG == 8 + return generic_gauss_elim_hensel64(mat,nmmat,dn,rindp,cindp,0); +#endif init_eg(&eg_mul1); init_eg(&eg_mul2); a0 = (Z **)mat->body; row = mat->row; col = mat->col; @@ -1605,8 +1734,10 @@ get_eg(&tmp2); add_eg(&eg_mul2,&tmp1,&tmp2); if ( !cinfo[j] ) cind[k++] = j; return rank; - } + } else + goto reset; } else { +reset: period = period*3/2; count = 0; } @@ -1642,6 +1773,9 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT Z wq; DP m; +#if SIZEOF_LONG == 8 + return generic_gauss_elim_hensel64(mat,nmmat,dn,rindp,cindp,mb); +#endif a0 = (Z **)mat->body; row = mat->row; col = mat->col; w = (int **)almat(row,col); @@ -1714,8 +1848,8 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT for ( i = 0; i < rank; i++ ) for ( j = 0, bi = b[i], wi = wc[i]; j < ri; j++ ) wi[j] = remqi((Q)bi[j],md); - /* wc = A^(-1)wc; wc is normalized */ - solve_by_lu_mod(w,rank,md,wc,ri,1); + /* wc = A^(-1)wc; wc is not normalized */ + solve_by_lu_mod(w,rank,md,wc,ri,0); /* x += q*wc */ for ( i = 0; i < rank; i++ ) for ( j = 0, wi = wc[i]; j < ri; j++ ) mul1addtoz(q,wi[j],&x[i][j]); @@ -1728,17 +1862,13 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT mpz_init_set(uz,BDY(b[i][j])); else mpz_init_set_ui(uz,0); - for ( k = 0; k < rank; k++ ) { - if ( a[i][k] && wc[k][j] ) { - if ( wc[k][j] < 0 ) - mpz_addmul_ui(uz,BDY(a[i][k]),-wc[k][j]); - else + for ( k = 0; k < rank; k++ ) + if ( a[i][k] && wc[k][j] ) mpz_submul_ui(uz,BDY(a[i][k]),wc[k][j]); - } - } MPZTOZ(uz,u); divsz(u,mdq,&b[i][j]); } + count++; /* q = q*md */ mulz(q,mdq,&u); q = u; @@ -1758,8 +1888,10 @@ int generic_gauss_elim_hensel_dalg(MAT mat,DP *mb,MAT if ( !cinfo[j] ) cind[k++] = j; return rank; - } + } else + goto reset; } else { +reset: period = period*3/2; count = 0; } @@ -1791,11 +1923,150 @@ mp_limb_t remqi64(Q a,mp_limb_t mod) int generic_gauss_elim_mod64(mp_limb_t **mat,int row,int col,mp_limb_t md,int *colstat); mp_limb_t get_lprime64(int ind); +void mpz_print(mpz_t a) +{ + mpz_out_str(stdout,10,a); printf("\n"); +} + +void mpz_printmat(mpz_t **a,int row,int col) +{ + int i,j; + for ( i = 0; i < row; i++ ) { + for ( j = 0; j < col; j++ ) { + mpz_out_str(stdout,10,a[i][j]); printf(" "); + } + printf("\n"); + } +} + +mpz_t **mpz_allocmat(int row,int col) +{ + mpz_t **p; + int i,j; + + p = (mpz_t **)MALLOC(row*sizeof(mpz_t *)); + for ( i = 0; i < row; i++ ) { + p[i] = (mpz_t *)MALLOC(col*sizeof(mpz_t)); + for ( j = 0; j < col; j++ ) mpz_init(p[i][j]); + } + return p; +} + +#if 1 int generic_gauss_elim64(MAT mat,MAT *nm,Z *dn,int **rindp,int **cindp) { mp_limb_t **wmat; mp_limb_t *wmi; mp_limb_t md,inv,t,t1; + Z z; + Z **bmat,*bmi; + mpz_t **tmat,**num; + mpz_t *tmi; + mpz_t den; + mpz_t q,m1,m3,s,u; + int *colstat,*wcolstat,*rind,*cind; + int row,col,ind,i,j,k,l,rank,rank0; + MAT r; + int ret; + + bmat = (Z **)mat->body; + row = mat->row; col = mat->col; + wmat = (mp_limb_t **)almat64(row,col); + colstat = (int *)MALLOC_ATOMIC(col*sizeof(int)); + wcolstat = (int *)MALLOC_ATOMIC(col*sizeof(int)); + mpz_init(m1); mpz_init(m3); mpz_init(den); + for ( ind = 0; ; ind++ ) { + if ( DP_Print ) { + fprintf(asir_out,"."); fflush(asir_out); + } + md = get_lprime64(ind); + for ( i = 0; i < row; i++ ) + for ( j = 0, bmi = bmat[i], wmi = wmat[i]; j < col; j++ ) + wmi[j] = bmi[j]==0?0:mpz_fdiv_ui(BDY(bmi[j]),md); + rank = generic_gauss_elim_mod64(wmat,row,col,md,wcolstat); + if ( !ind ) { +RESET: + mpz_set_ui(m1,md); + rank0 = rank; + bcopy(wcolstat,colstat,col*sizeof(int)); + // crmat + tmat = mpz_allocmat(rank,col-rank); + // + num = mpz_allocmat(rank,col-rank); + for ( i = 0; i < rank; i++ ) + for ( j = k = 0, tmi = tmat[i], wmi = wmat[i]; j < col; j++ ) + if ( !colstat[j] ) { mpz_set_ui(tmi[k],wmi[j]); k++; } + } else { + if ( rank < rank0 ) { + if ( DP_Print ) { + fprintf(asir_out,"lower rank matrix; continuing...\n"); + fflush(asir_out); + } + continue; + } else if ( rank > rank0 ) { + if ( DP_Print ) { + fprintf(asir_out,"higher rank matrix; resetting...\n"); + fflush(asir_out); + } + goto RESET; + } else { + for ( j = 0; (j= t ) t = wmi[j]-t; + else t = md-(t-wmi[j]); + mpz_addmul_ui(tmi[k],m1,mulmod64(t,inv,md)); + } else if ( wmi[j] ) { + /* f3 = m1*(m1 mod m2)^(-1)*f2 */ + mpz_mul_ui(tmi[k],m1,mulmod64(wmi[j],inv,md)); + } + k++; + } + mpz_set(m1,m3); + if ( ind % F4_INTRAT_PERIOD ) + ret = 0; + else + ret = mpz_intmtoratm(tmat,rank,col-rank,m1,num,den); + if ( ret ) { + *rindp = rind = (int *)MALLOC_ATOMIC(rank*sizeof(int)); + *cindp = cind = (int *)MALLOC_ATOMIC((col-rank)*sizeof(int)); + for ( j = k = l = 0; j < col; j++ ) + if ( colstat[j] ) rind[k++] = j; + else cind[l++] = j; + if ( mpz_gensolve_check(mat,num,den,rank,col-rank,rind,cind) ) { + MKMAT(r,rank,col-rank); *nm = r; + for ( i = 0; i < rank; i++ ) + for ( j = 0; j < col-rank; j++ ) { + MPZTOZ(num[i][j],z); BDY(r)[i][j] = z; + } + MPZTOZ(den,*dn); + return rank; + } + } + } + } +} +#else +int generic_gauss_elim64(MAT mat,MAT *nm,Z *dn,int **rindp,int **cindp) +{ + mp_limb_t **wmat; + mp_limb_t *wmi; + mp_limb_t md,inv,t,t1; Z **bmat,**tmat,*bmi,*tmi; Z q,m1,m2,m3,s,u; int *colstat,*wcolstat,*rind,*cind; @@ -1889,4 +2160,173 @@ RESET: } } } +#endif + +int generic_gauss_elim_hensel64(MAT mat,MAT *nmmat,Z *dn,int **rindp,int **cindp,DP *mb) +{ + MAT r; + Z z; + Z **a0; + Z *ai; + mpz_t **a,**b,**x,**nm; + mpz_t *bi,*xi; + mpz_t q,u,den; + mp_limb_t **w; + mp_limb_t *wi; + mp_limb_t **wc; + mp_limb_t md; + int row,col; + int ind,i,j,k,l,li,ri,rank; + int *cinfo,*rinfo; + int *rind,*cind; + int count; + int ret; + int period; + DP m; + + a0 = (Z **)mat->body; + row = mat->row; col = mat->col; + w = (mp_limb_t **)almat64(row,col); + for ( ind = 0; ; ind++ ) { + md = get_lprime64(ind); + for ( i = 0; i < row; i++ ) + for ( j = 0, ai = a0[i], wi = w[i]; j < col; j++ ) + wi[j] = remqi64((Q)ai[j],md); + + if ( DP_Print > 3 ) { + fprintf(asir_out,"LU decomposition.."); fflush(asir_out); + } + rank = find_lhs_and_lu_mod64(w,row,col,md,&rinfo,&cinfo); + if ( DP_Print > 3 ) { + fprintf(asir_out,"done.\n"); fflush(asir_out); + } + + if ( mb ) { + /* this part is added for inv_or_split_dalg */ + for ( i = 0; i < col-1; i++ ) { + if ( !cinfo[i] ) { + m = mb[i]; + for ( j = i+1; j < col-1; j++ ) + if ( dp_redble(mb[j],m) ) + cinfo[j] = -1; + } + } + } + + a = (mpz_t **)mpz_allocmat(rank,rank); /* lhs mat */ + b = (mpz_t **)mpz_allocmat(rank,col-rank); + for ( j = li = ri = 0; j < col; j++ ) + if ( cinfo[j] > 0 ) { + /* the column is in lhs */ + for ( i = 0; i < rank; i++ ) { + w[i][li] = w[i][j]; + if ( a0[rinfo[i]][j] ) + mpz_set(a[i][li],BDY(a0[rinfo[i]][j])); + else + mpz_set_ui(a[i][li],0); + } + li++; + } else if ( !cinfo[j] ) { + /* the column is in rhs */ + for ( i = 0; i < rank; i++ ) { + if ( a0[rinfo[i]][j] ) + mpz_set(b[i][ri],BDY(a0[rinfo[i]][j])); + else + mpz_set_ui(b[i][ri],0); + } + ri++; + } + + /* solve Ax=B; A: rank x rank, B: rank x ri */ + /* algorithm + c <- B + x <- 0 + q <- 1 + do + t <- A^(-1)c mod p + x <- x+qt + c <- (c-At)/p + q <- qp + end do + then Ax-B=0 mod q and b=(B-Ax)/q hold after "do". + */ + x = (mpz_t **)mpz_allocmat(rank,ri); + nm = (mpz_t **)mpz_allocmat(rank,ri); + wc = (mp_limb_t **)almat64(rank,ri); + *rindp = rind = (int *)MALLOC_ATOMIC(rank*sizeof(int)); + *cindp = cind = (int *)MALLOC_ATOMIC((ri)*sizeof(int)); + + period = F4_INTRAT_PERIOD; + mpz_init_set_ui(q,1); + mpz_init(u); + mpz_init(den); + for ( count = 0; ; ) { + /* check Ax=B mod q */ + if ( DP_Print > 3 ) + fprintf(stderr,"o"); + /* wc = b mod md */ + for ( i = 0; i < rank; i++ ) + for ( j = 0, bi = b[i], wi = wc[i]; j < ri; j++ ) + wi[j] = mpz_fdiv_ui(bi[j],md); + /* wc = A^(-1)wc; wc is not normalized */ + solve_by_lu_mod64(w,rank,md,(mp_limb_signed_t **)wc,ri,0); + /* x += q*wc */ + for ( i = 0; i < rank; i++ ) + for ( j = 0, wi = wc[i]; j < ri; j++ ) + if ( wi[j] > 0 ) + mpz_addmul_ui(x[i][j],q,wi[j]); + else if ( wi[j] < 0 ) + mpz_submul_ui(x[i][j],q,-wi[j]); + /* b =(b-A*wc)/md */ + for ( i = 0; i < rank; i++ ) + for ( j = 0; j < ri; j++ ) { + mpz_set(u,b[i][j]); + for ( k = 0; k < rank; k++ ) { + if ( a[i][k] && wc[k][j] ) { + if ( wc[k][j] < 0 ) + mpz_addmul_ui(u,a[i][k],-wc[k][j]); + else + mpz_submul_ui(u,a[i][k],wc[k][j]); + } + } + mpz_divexact_ui(b[i][j],u,md); + } + count++; + /* q = q*md */ + mpz_mul_ui(q,q,md); + if ( count == period ) { + ret = mpz_intmtoratm(x,rank,ri,q,nm,den); + if ( ret ) { + for ( j = k = l = 0; j < col; j++ ) + if ( cinfo[j] > 0 ) + rind[k++] = j; + else if ( !cinfo[j] ) + cind[l++] = j; + ret = mpz_gensolve_check(mat,nm,den,rank,ri,rind,cind); + if ( ret ) { + *rindp = rind; + *cindp = cind; + for ( j = k = 0; j < col; j++ ) + if ( !cinfo[j] ) + cind[k++] = j; + MKMAT(r,rank,ri); *nmmat = r; + for ( i = 0; i < rank; i++ ) + for ( j = 0; j < ri; j++ ) { + MPZTOZ(nm[i][j],z); BDY(r)[i][j] = z; + } + MPZTOZ(den,*dn); + return rank; + } else + goto reset; + } else { +reset: + fprintf(stderr,"F"); + period = period*3/2; + count = 0; + } + } + } + } +} + #endif