=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2000/engine/nd.c,v retrieving revision 1.108 retrieving revision 1.116 diff -u -p -r1.108 -r1.116 --- OpenXM_contrib2/asir2000/engine/nd.c 2004/09/21 02:43:11 1.108 +++ OpenXM_contrib2/asir2000/engine/nd.c 2004/12/01 12:36:17 1.116 @@ -1,4 +1,4 @@ -/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.107 2004/09/21 02:34:12 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.115 2004/11/18 08:29:11 noro Exp $ */ #include "nd.h" @@ -41,6 +41,9 @@ static int nd_found,nd_create,nd_notfirst; static int nmv_adv; static int nd_demand; +UINT *nd_det_compute_bound(NDV **dm,int n,int j); +void nd_det_reconstruct(NDV **dm,int n,int j,NDV d); + void nd_free_private_storage() { _nm_free_list = 0; @@ -724,13 +727,11 @@ int ndl_disjoint(UINT *d1,UINT *d2) #endif } -int ndl_check_bound2(int index,UINT *d2) +int ndl_check_bound(UINT *d1,UINT *d2) { UINT u2; - UINT *d1; int i,j,ind,k; - d1 = nd_bound[index]; ind = 0; #if USE_UNROLL switch ( nd_bpe ) { @@ -819,6 +820,11 @@ int ndl_check_bound2(int index,UINT *d2) #endif } +int ndl_check_bound2(int index,UINT *d2) +{ + return ndl_check_bound(nd_bound[index],d2); +} + INLINE int ndl_hash_value(UINT *d) { int i; @@ -2579,7 +2585,7 @@ void removecont_array(Q *c,int n) { struct oVECT v; Q d0,d1,a,u,u1,gcd; - int i; + int i,j; N qn,rn,gn; Q *q,*r; @@ -2616,6 +2622,7 @@ void nd_mul_c(int mod,ND p,int mul) int c,c1; if ( !p ) return; + if ( mul == 1 ) return; if ( mod == -1 ) for ( m = BDY(p); m; m = NEXT(m) ) CM(m) = _mulsf(CM(m),mul); @@ -2631,6 +2638,7 @@ void nd_mul_c_q(ND p,Q mul) Q c; if ( !p ) return; + if ( UNIQ(mul) ) return; for ( m = BDY(p); m; m = NEXT(m) ) { mulq(CQ(m),mul,&c); CQ(m) = c; } @@ -3709,6 +3717,10 @@ void nd_nf_p(P f,LIST g,LIST v,int m,struct order_spec int stat,nvar,max,e; union oNDC dn; + if ( !f ) { + *rp = 0; + return; + } pltovl(v,&vv); for ( nvar = 0, tv = vv; tv; tv = NEXT(tv), nvar++ ); @@ -3785,29 +3797,6 @@ int nd_to_vect_q(UINT *s0,int n,ND d,Q *r) return i; } -int nm_ind_pair_to_vect(int mod,UINT *s0,int n,NM_ind_pair pair,UINT *r) -{ - NM m; - NMV mr; - UINT *d,*t,*s; - NDV p; - int i,j,len; - - m = pair->mul; - d = DL(m); - p = nd_ps[pair->index]; - t = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); - for ( i = 0; i < n; i++ ) r[i] = 0; - len = LEN(p); - for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { - ndl_add(d,DL(mr),t); - for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); - r[i] = CM(mr); - } - for ( i = 0; !r[i]; i++ ); - return i; -} - IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0,int n,NM_ind_pair pair) { NM m; @@ -3860,7 +3849,7 @@ IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0 int ndv_reduce_vect_q(Q *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; - Q cs,mcs,c1,c2,cr,gcd; + Q cs,mcs,c1,c2,cr,gcd,t; IndArray ivect; unsigned char *ivc; unsigned short *ivs; @@ -3880,27 +3869,32 @@ int ndv_reduce_vect_q(Q *svect,int col,IndArray *imat, len = LEN(redv); mr = BDY(redv); igcd_cofactor(svect[k],CQ(mr),&gcd,&cs,&cr); chsgnq(cs,&mcs); + if ( !UNIQ(cr) ) { + for ( j = 0; j < col; j++ ) { + mulq(svect[j],cr,&c1); svect[j] = c1; + } + } svect[k] = 0; prev = k; switch ( ivect->width ) { case 1: ivc = ivect->index.c; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivc[j]; prev = pos; - mulq(svect[pos],cr,&c1); mulq(CQ(mr),mcs,&c2); addq(c1,c2,&svect[pos]); + mulq(CQ(mr),mcs,&c2); addq(svect[pos],c2,&t); svect[pos] = t; } break; case 2: ivs = ivect->index.s; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivs[j]; prev = pos; - mulq(svect[pos],cr,&c1); mulq(CQ(mr),mcs,&c2); addq(c1,c2,&svect[pos]); + mulq(CQ(mr),mcs,&c2); addq(svect[pos],c2,&t); svect[pos] = t; } break; case 4: ivi = ivect->index.i; for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { pos = prev+ivi[j]; prev = pos; - mulq(svect[pos],cr,&c1); mulq(CQ(mr),mcs,&c2); addq(c1,c2,&svect[pos]); + mulq(CQ(mr),mcs,&c2); addq(svect[pos],c2,&t); svect[pos] = t; } break; } @@ -4059,7 +4053,7 @@ NDV vect_to_ndv_q(Q *vect,int spcol,int col,int *rhead for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; if ( !len ) return 0; else { - mr0 = (NMV)GC_malloc_atomic_ignore_off_page(nmv_adv*len); + mr0 = (NMV)GC_malloc(nmv_adv*len); #if 0 ndv_alloc += nmv_adv*len; #endif @@ -4068,6 +4062,8 @@ NDV vect_to_ndv_q(Q *vect,int spcol,int col,int *rhead for ( j = k = 0; j < col; j++, p += nd_wpd ) if ( !rhead[j] ) { if ( c = vect[k++] ) { + if ( DN(c) ) + error("afo"); ndl_copy(p,DL(mr)); CQ(mr) = c; NMV_ADV(mr); } } @@ -4155,8 +4151,6 @@ NODE nd_f4(int m) PGeoBucket bucket; struct oEGT eg0,eg1,eg_f4; - if ( !m ) - error("nd_f4 : not implemented"); #if 0 ndv_alloc = 0; #endif @@ -4298,6 +4292,7 @@ NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s SG((NDV)BDY(r)) = spsugar[i]; GC_free(spmat[i]); } + if ( r0 ) NEXT(r) = 0; for ( ; i < sprow; i++ ) GC_free(spmat[i]); get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); @@ -4328,9 +4323,9 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,UINT *s0vec spcol = col-nred; get_eg(&eg0); /* elimination (1st step) */ - spmat = (Q **)ALLOCA(nsp*sizeof(UINT *)); - svect = (Q *)ALLOCA(col*sizeof(UINT)); - spsugar = (int *)ALLOCA(nsp*sizeof(UINT)); + spmat = (Q **)ALLOCA(nsp*sizeof(Q *)); + svect = (Q *)ALLOCA(col*sizeof(Q)); + spsugar = (int *)ALLOCA(nsp*sizeof(Q)); for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { nd_sp(0,0,sp,&spol); if ( !spol ) continue; @@ -4338,13 +4333,13 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,UINT *s0vec maxrs = ndv_reduce_vect_q(svect,col,imat,rvect,nred); for ( i = 0; i < col; i++ ) if ( svect[i] ) break; if ( i < col ) { - spmat[sprow] = v = (Q *)MALLOC_ATOMIC(spcol*sizeof(Q)); + spmat[sprow] = v = (Q *)MALLOC(spcol*sizeof(Q)); for ( j = k = 0; j < col; j++ ) if ( !rhead[j] ) v[k++] = svect[j]; spsugar[sprow] = MAX(maxrs,SG(spol)); sprow++; } - nd_free(spol); +/* nd_free(spol); */ } get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); if ( DP_Print ) { @@ -4352,7 +4347,7 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,UINT *s0vec fflush(asir_out); } /* free index arrays */ - for ( i = 0; i < nred; i++ ) GC_free(imat[i]->index.c); +/* for ( i = 0; i < nred; i++ ) GC_free(imat[i]->index.c); */ /* elimination (2nd step) */ colstat = (int *)ALLOCA(spcol*sizeof(int)); @@ -4362,9 +4357,11 @@ NODE nd_f4_red_q_main(ND_pairs sp0,int nsp,UINT *s0vec NEXTNODE(r0,r); BDY(r) = (pointer)vect_to_ndv_q(spmat[i],spcol,col,rhead,s0vect); SG((NDV)BDY(r)) = spsugar[i]; - GC_free(spmat[i]); +/* GC_free(spmat[i]); */ } - for ( ; i < sprow; i++ ) GC_free(spmat[i]); + if ( r0 ) NEXT(r) = 0; + +/* for ( ; i < sprow; i++ ) GC_free(spmat[i]); */ get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { @@ -4634,7 +4631,10 @@ void nd_exec_f4_red_dist() int nd_gauss_elim_q(Q **mat0,int *sugar,int row,int col,int *colstat) { - int mod,i,j,t; + int mod,i,j,t,c,rank,rank0,inv; + int *ci,*ri; + Q dn; + MAT m,nm; int **wmat; /* XXX */ @@ -4651,7 +4651,34 @@ int nd_gauss_elim_q(Q **mat0,int *sugar,int row,int co wmat[i][j] = 0; } } - nd_gauss_elim_mod(wmat,sugar,row,col,mod,colstat); + rank0 = nd_gauss_elim_mod(wmat,sugar,row,col,mod,colstat); + NEWMAT(m); m->row = row; m->col = col; m->body = (pointer **)mat0; + rank = generic_gauss_elim(m,&nm,&dn,&ri,&ci); + if ( rank != rank0 ) + error("afo"); + for ( i = 0; i < row; i++ ) + for ( j = 0; j < col; j++ ) + mat0[i][j] = 0; + c = col-rank; + for ( i = 0; i < rank; i++ ) { + mat0[i][ri[i]] = dn; + for ( j = 0; j < c; j++ ) + mat0[i][ci[j]] = (Q)BDY(nm)[i][j]; + } + inv = invm(rem(NM(dn),mod),mod); + if ( SGN(dn) < 0 ) inv = mod-inv; + for ( i = 0; i < row; i++ ) + for ( j = 0; j < col; j++ ) { + if ( mat0[i][j] ) { + t = rem(NM(mat0[i][j]),mod); + if ( SGN(mat0[i][j]) < 0 ) t = mod-t; + } else + t = 0; + c = dmar(t,inv,0,mod); + if ( wmat[i][j] != c ) + error("afo"); + } + return rank; } int nd_gauss_elim_mod(int **mat0,int *sugar,int row,int col,int md,int *colstat) @@ -4872,6 +4899,7 @@ void nd_det(int mod,MAT f,P *rp) NDV d,s,mij,mjj; ND u; NMV nmv; + UINT *bound; PGeoBucket bucket; struct order_spec *ord; @@ -4889,7 +4917,7 @@ void nd_det(int mod,MAT f,P *rp) e = getdeg(tv->v,(P)m[i][j]); max = MAX(e,max); } - nd_setup_parameters(nvar,1024); + nd_setup_parameters(nvar,max); dm = (NDV **)almat_pointer(n,n); for ( i = 0, max = 0; i < n; i++ ) for ( j = 0; j < n; j++ ) { @@ -4923,15 +4951,19 @@ void nd_det(int mod,MAT f,P *rp) } sgn = -sgn; } + bound = nd_det_compute_bound(dm,n,j); + if ( ndl_check_bound(bound,bound) ) + nd_det_reconstruct(dm,n,j,d); + for ( i = j+1, mj = dm[j], mjj = mj[j]; i < n; i++ ) { - if ( DP_Print ) fprintf(stderr," i=%d\n ",i); +/* if ( DP_Print ) fprintf(stderr," i=%d\n ",i); */ mi = dm[i]; mij = mi[j]; if ( mod ) ndv_mul_c(mod,mij,mod-1); else ndv_mul_c_q(mij,mone); for ( k = j+1; k < n; k++ ) { - if ( DP_Print ) fprintf(stderr,"k=%d ",k); +/* if ( DP_Print ) fprintf(stderr,"k=%d ",k); */ bucket = create_pbucket(); if ( mi[k] ) { nmv = BDY(mjj); len = LEN(mjj); @@ -4950,7 +4982,7 @@ void nd_det(int mod,MAT f,P *rp) u = nd_quo(mod,bucket,d); mi[k] = ndtondv(mod,u); } - if ( DP_Print ) fprintf(stderr,"\n",k); +/* if ( DP_Print ) fprintf(stderr,"\n",k); */ } d = mjj; } @@ -5010,4 +5042,57 @@ ND ndv_mul_nmv_trunc(int mod,NMV m0,NDV p,UINT *d) return r; } } +} + +void nd_det_reconstruct(NDV **dm,int n,int j,NDV d) +{ + int i,obpe,oadv,h,k,l; + static NM prev_nm_free_list; + EPOS oepos; + + obpe = nd_bpe; + oadv = nmv_adv; + oepos = nd_epos; + if ( obpe < 2 ) nd_bpe = 2; + else if ( obpe < 3 ) nd_bpe = 3; + else if ( obpe < 4 ) nd_bpe = 4; + else if ( obpe < 5 ) nd_bpe = 5; + else if ( obpe < 6 ) nd_bpe = 6; + else if ( obpe < 8 ) nd_bpe = 8; + else if ( obpe < 10 ) nd_bpe = 10; + else if ( obpe < 16 ) nd_bpe = 16; + else if ( obpe < 32 ) nd_bpe = 32; + else error("nd_det_reconstruct : exponent too large"); + + nd_setup_parameters(nd_nvar,0); + prev_nm_free_list = _nm_free_list; + _nm_free_list = 0; + for ( k = j; k < n; k++ ) + for (l = j; l < n; l++ ) + ndv_realloc(dm[k][l],obpe,oadv,oepos); + ndv_realloc(d,obpe,oadv,oepos); + prev_nm_free_list = 0; +#if 0 + GC_gcollect(); +#endif +} + +UINT *nd_det_compute_bound(NDV **dm,int n,int j) +{ + UINT *d0,*d1,*d,*t,*r; + int k,l; + + d0 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + d1 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + for ( k = 0; k < nd_wpd; k++ ) d0[k] = 0; + for ( k = j; k < n; k++ ) + for ( l = j; l < n; l++ ) + if ( dm[k][l] ) { + d = ndv_compute_bound(dm[k][l]); + ndl_lcm(d,d0,d1); + t = d1; d1 = d0; d0 = t; + } + r = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + for ( k = 0; k < nd_wpd; k++ ) r[k] = d0[k]; + return r; }