=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2000/engine/nd.c,v retrieving revision 1.67 retrieving revision 1.112 diff -u -p -r1.67 -r1.112 --- OpenXM_contrib2/asir2000/engine/nd.c 2003/09/12 14:51:27 1.67 +++ OpenXM_contrib2/asir2000/engine/nd.c 2004/10/06 11:58:52 1.112 @@ -1,121 +1,21 @@ -/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.66 2003/09/12 08:26:19 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.111 2004/09/21 07:19:01 noro Exp $ */ -#include "ca.h" -#include "inline.h" -#include +#include "nd.h" -#if defined(__GNUC__) -#define INLINE inline -#elif defined(VISUAL) -#define INLINE __inline -#else -#define INLINE -#endif - -typedef unsigned int UINT; - -#define USE_GEOBUCKET 1 -#define USE_UNROLL 1 - -#define REDTAB_LEN 32003 - -/* GeoBucket for polynomial addition */ - -typedef struct oPGeoBucket { - int m; - struct oND *body[32]; -} *PGeoBucket; - -/* distributed polynomial; linked list rep. */ -typedef struct oND { - struct oNM *body; - int nv; - int len; - int sugar; -} *ND; - -/* distributed polynomial; array rep. */ -typedef struct oNDV { - struct oNMV *body; - int nv; - int len; - int sugar; -} *NDV; - -/* monomial; linked list rep. */ -typedef struct oNM { - struct oNM *next; - union { - int m; - Q z; - P p; - } c; - UINT dl[1]; -} *NM; - -/* monomial; array rep. */ -typedef struct oNMV { - union { - int m; - Q z; - P p; - } c; - UINT dl[1]; -} *NMV; - -/* history of reducer */ -typedef struct oRHist { - struct oRHist *next; - int index; - int sugar; - UINT dl[1]; -} *RHist; - -/* S-pair list */ -typedef struct oND_pairs { - struct oND_pairs *next; - int i1,i2; - int sugar; - UINT lcm[1]; -} *ND_pairs; - -/* index and shift count for each exponent */ -typedef struct oEPOS { - int i; /* index */ - int s; /* shift */ -} *EPOS; - -typedef struct oBlockMask { - int n; - struct order_pair *order_pair; - UINT **mask; -} *BlockMask; - -typedef struct oBaseSet { - int len; - NDV *ps; - UINT **bound; -} *BaseSet; - -typedef struct oNM_ind_pair -{ - NM mul; - int index; -} *NM_ind_pair; - -typedef struct oIndArray -{ - char width; - int head; - union { - unsigned char *c; - unsigned short *s; - unsigned int *i; - } index; -} *IndArray; - int (*ndl_compare_function)(UINT *a1,UINT *a2); +int nd_dcomp; +NM _nm_free_list; +ND _nd_free_list; +ND_pairs _ndp_free_list; +#if 0 +static int ndv_alloc; +#endif +#if 1 +static int nd_f4_nsp=0x7fffffff; +#else +static int nd_f4_nsp=50; +#endif static double nd_scale=2; static UINT **nd_bound; static struct order_spec *nd_ord; @@ -127,248 +27,27 @@ static int nd_epw,nd_bpe,nd_wpd,nd_exporigin; static UINT nd_mask[32]; static UINT nd_mask0,nd_mask1; -static NM _nm_free_list; -static ND _nd_free_list; -static ND_pairs _ndp_free_list; - static NDV *nd_ps; static NDV *nd_ps_trace; static RHist *nd_psh; static int nd_psn,nd_pslen; - static RHist *nd_red; - +static int *nd_work_vector; +static int **nd_matrix; +static int nd_matrix_len; +static struct weight_or_block *nd_worb; +static int nd_worb_len; static int nd_found,nd_create,nd_notfirst; -static int nm_adv; static int nmv_adv; -static int nd_dcomp; +static int nd_demand; -extern VL CO; -extern int Top,Reverse,dp_nelim,do_weyl; -extern int *current_weyl_weight_vector; - -/* fundamental macros */ -#define TD(d) (d[0]) -#define HDL(d) ((d)->body->dl) -#define HTD(d) (TD(HDL(d))) -#define HCM(d) ((d)->body->c.m) -#define HCQ(d) ((d)->body->c.z) -#define HCP(d) ((d)->body->c.p) -#define CM(a) ((a)->c.m) -#define CQ(a) ((a)->c.z) -#define CP(a) ((a)->c.p) -#define DL(a) ((a)->dl) -#define SG(a) ((a)->sugar) -#define LEN(a) ((a)->len) -#define LCM(a) ((a)->lcm) -#define GET_EXP(d,a) (((d)[nd_epos[a].i]>>nd_epos[a].s)&nd_mask0) -#define GET_EXP_MASK(d,a,m) ((((d)[nd_epos[a].i]&(m)[nd_epos[a].i])>>nd_epos[a].s)&nd_mask0) -#define PUT_EXP(r,a,e) ((r)[nd_epos[a].i] |= ((e)<>oepos[a].s)&omask0) -#define PUT_EXP_OLD(r,a,e) ((r)[oepos[a].i] |= ((e)<TD(d2)?1:(TD(d1)0?TD_DL_COMPARE(d1,d2)\ - :(nd_dcomp==0?ndl_lex_compare(d1,d2)\ - :(nd_blockmask?ndl_block_compare(d1,d2)\ - :(*ndl_compare_function)(d1,d2)))) -#else -#define DL_COMPARE(d1,d2)\ -(nd_dcomp>0?TD_DL_COMPARE(d1,d2):(*ndl_compare_function)(d1,d2)) -#endif - -/* allocators */ -#define NEWRHist(r) \ -((r)=(RHist)MALLOC(sizeof(struct oRHist)+(nd_wpd-1)*sizeof(UINT))) -#define NEWND_pairs(m) \ -if(!_ndp_free_list)_NDP_alloc();\ -(m)=_ndp_free_list; _ndp_free_list = NEXT(_ndp_free_list) -#define NEWNM(m)\ -if(!_nm_free_list)_NM_alloc();\ -(m)=_nm_free_list; _nm_free_list = NEXT(_nm_free_list) -#define MKND(n,m,len,d)\ -if(!_nd_free_list)_ND_alloc();\ -(d)=_nd_free_list; _nd_free_list = (ND)BDY(_nd_free_list);\ -NV(d)=(n); LEN(d)=(len); BDY(d)=(m) -#define NEWNDV(d) ((d)=(NDV)MALLOC(sizeof(struct oNDV))) -#define MKNDV(n,m,l,d) NEWNDV(d); NV(d)=(n); BDY(d)=(m); LEN(d) = l; -#define NEWNM_ind_pair(p)\ -((p)=(NM_ind_pair)MALLOC(sizeof(struct oNM_ind_pair))) - -/* allocate and link a new object */ -#define NEXTRHist(r,c) \ -if(!(r)){NEWRHist(r);(c)=(r);}else{NEWRHist(NEXT(c));(c)=NEXT(c);} -#define NEXTNM(r,c) \ -if(!(r)){NEWNM(r);(c)=(r);}else{NEWNM(NEXT(c));(c)=NEXT(c);} -#define NEXTNM2(r,c,s) \ -if(!(r)){(c)=(r)=(s);}else{NEXT(c)=(s);(c)=(s);} -#define NEXTND_pairs(r,c) \ -if(!(r)){NEWND_pairs(r);(c)=(r);}else{NEWND_pairs(NEXT(c));(c)=NEXT(c);} -#define MKNM_ind_pair(p,m,i) (NEWNM_ind_pair(p),(p)->mul=(m),(p)->index=(i)) - -/* deallocators */ -#define FREENM(m) NEXT(m)=_nm_free_list; _nm_free_list=(m) -#define FREENDP(m) NEXT(m)=_ndp_free_list; _ndp_free_list=(m) -#define FREEND(m) BDY(m)=(NM)_nd_free_list; _nd_free_list=(m) - -/* macro for increasing pointer to NMV */ -#define NMV_ADV(m) (m = (NMV)(((char *)m)+nmv_adv)) -#define NMV_OADV(m) (m = (NMV)(((char *)m)+oadv)) -#define NDV_NADV(m) (m = (NMV)(((char *)m)+newadv)) -#define NMV_PREV(m) (m = (NMV)(((char *)m)-nmv_adv)) -#define NMV_OPREV(m) (m = (NMV)(((char *)m)-oadv)) - -/* external functions */ -void GC_gcollect(); -NODE append_one(NODE,int); - -/* manipulation of coefficients */ -void nd_removecont(int mod,ND p); -void nd_removecont2(ND p1,ND p2); -void removecont_array(Q *c,int n); - -/* GeoBucket functions */ -ND normalize_pbucket(int mod,PGeoBucket g); -int head_pbucket(int mod,PGeoBucket g); -int head_pbucket_q(PGeoBucket g); -void add_pbucket_symbolic(PGeoBucket g,ND d); -void add_pbucket(int mod,PGeoBucket g,ND d); -void free_pbucket(PGeoBucket b); -void mulq_pbucket(PGeoBucket g,Q c); -NM remove_head_pbucket_symbolic(PGeoBucket g); -PGeoBucket create_pbucket(); - -/* manipulation of pairs and bases */ -int nd_newps(int mod,ND a,ND aq); -ND_pairs nd_newpairs( NODE g, int t ); -ND_pairs nd_minp( ND_pairs d, ND_pairs *prest ); -ND_pairs nd_minsugarp( ND_pairs d, ND_pairs *prest ); -NODE update_base(NODE nd,int ndp); -ND_pairs update_pairs( ND_pairs d, NODE /* of index */ g, int t); -ND_pairs equivalent_pairs( ND_pairs d1, ND_pairs *prest ); -ND_pairs crit_B( ND_pairs d, int s ); -ND_pairs crit_M( ND_pairs d1 ); -ND_pairs crit_F( ND_pairs d1 ); -int crit_2( int dp1, int dp2 ); -int ndv_newps(NDV a,NDV aq); - -/* top level functions */ -void nd_gr(LIST f,LIST v,int m,int f4,struct order_spec *ord,LIST *rp); -void nd_gr_trace(LIST f,LIST v,int trace,int homo,struct order_spec *ord,LIST *rp); -NODE nd_f4(int m); -NODE nd_gb(int m,int checkonly); -NODE nd_gb_trace(int m); - -/* ndl functions */ -int ndl_weight(UINT *d); -void ndl_weight_mask(UINT *d); -void ndl_homogenize(UINT *d,UINT *r,int obpe,EPOS oepos,int weight); -void ndl_dehomogenize(UINT *p); -void ndl_reconstruct(UINT *d,UINT *r,int obpe,EPOS oepos); -INLINE int ndl_reducible(UINT *d1,UINT *d2); -INLINE int ndl_lex_compare(UINT *d1,UINT *d2); -INLINE int ndl_block_compare(UINT *d1,UINT *d2); -INLINE int ndl_equal(UINT *d1,UINT *d2); -INLINE void ndl_copy(UINT *d1,UINT *d2); -INLINE void ndl_zero(UINT *d); -INLINE void ndl_add(UINT *d1,UINT *d2,UINT *d); -INLINE void ndl_addto(UINT *d1,UINT *d2); -INLINE void ndl_sub(UINT *d1,UINT *d2,UINT *d); -INLINE int ndl_hash_value(UINT *d); - -/* normal forms */ -INLINE int ndl_find_reducer(UINT *g); -INLINE int ndl_find_reducer_direct(UINT *g,NDV *ps,int len); -int nd_sp(int mod,int trace,ND_pairs p,ND *nf); -int nd_nf(int mod,ND g,NDV *ps,int full,ND *nf); -int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *nf); -int nd_nf_direct(int mod,ND g,BaseSet base,int full,ND *rp); - -/* finalizers */ -NODE ndv_reducebase(NODE x); -NODE ndv_reduceall(int m,NODE f); - -/* allocators */ -void nd_free_private_storage(); -void _NM_alloc(); -void _ND_alloc(); -void nd_free(ND p); -void nd_free_redlist(); - -/* printing */ -void ndl_print(UINT *dl); -void nd_print(ND p); -void nd_print_q(ND p); -void ndp_print(ND_pairs d); - - -/* setup, reconstruct */ -void nd_init_ord(struct order_spec *spec); -ND_pairs nd_reconstruct(int mod,int trace,ND_pairs ndp); -void nd_reconstruct_direct(int mod,NDV *ps,int len); -void ndv_setup(int mod,int trace,NODE f); -void nd_setup_parameters(int nvar,int max); -BlockMask nd_create_blockmask(struct order_spec *ord); -EPOS nd_create_epos(struct order_spec *ord); -int nd_get_exporigin(struct order_spec *ord); -void ndv_mod(int mod,NDV p); -NDV ndv_dup(int mod,NDV p); -ND nd_dup(ND p); - -/* ND functions */ -int ndv_check_candidate(NODE input,int obpe,int oadv,EPOS oepos,NODE cand); -void nd_mul_c(int mod,ND p,int mul); -void nd_mul_c_q(ND p,Q mul); -void nd_mul_c_p(VL vl,ND p,P mul); -ND nd_remove_head(ND p); -int nd_length(ND p); -void nd_append_red(UINT *d,int i); -UINT *ndv_compute_bound(NDV p); -ND nd_copy(ND p); -ND nd_merge(ND p1,ND p2); -ND nd_add(int mod,ND p1,ND p2); -ND nd_add_q(ND p1,ND p2); -INLINE int nd_length(ND p); - -/* NDV functions */ -ND weyl_ndv_mul_nm(int mod,NM m0,NDV p); -void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *tab,int tlen); -void ndv_mul_c(int mod,NDV p,int mul); -void ndv_mul_c_q(NDV p,Q mul); -ND ndv_mul_nm_symbolic(NM m0,NDV p); -ND ndv_mul_nm(int mod,NM m0,NDV p); -void ndv_realloc(NDV p,int obpe,int oadv,EPOS oepos); -NDV ndv_dup_realloc(NDV p,int obpe,int oadv,EPOS oepos); -void ndv_homogenize(NDV p,int obpe,int oadv,EPOS eops); -void ndv_dehomogenize(NDV p,struct order_spec *spec); -void ndv_removecont(int mod,NDV p); -void ndv_print(NDV p); -void ndv_print_q(NDV p); -void ndv_free(NDV p); - -/* converters */ -ND ptond(VL vl,VL dvl,P p); -NDV ptondv(VL vl,VL dvl,P p); -P ndvtop(int mod,VL vl,VL dvl,NDV p); -NDV ndtondv(int mod,ND p); -ND ndvtond(int mod,NDV p); -int nm_ind_pair_to_vect(int m,UINT *s0,int n,NM_ind_pair pair,UINT *r); -IndArray nm_ind_pair_to_vect_compress(int m,UINT *s0,int n,NM_ind_pair pair); -int nd_to_vect(int mod,UINT *s0,int n,ND d,UINT *r); - void nd_free_private_storage() { - _nd_free_list = 0; _nm_free_list = 0; _ndp_free_list = 0; - bzero(nd_red,sizeof(REDTAB_LEN*sizeof(RHist))); +#if 0 GC_gcollect(); +#endif } void _NM_alloc() @@ -748,6 +427,84 @@ int ndl_block_compare(UINT *d1,UINT *d2) return 0; } +int ndl_matrix_compare(UINT *d1,UINT *d2) +{ + int i,j,s; + int *v; + + for ( j = 0; j < nd_nvar; j++ ) + nd_work_vector[j] = GET_EXP(d1,j)-GET_EXP(d2,j); + for ( i = 0; i < nd_matrix_len; i++ ) { + v = nd_matrix[i]; + for ( j = 0, s = 0; j < nd_nvar; j++ ) + s += v[j]*nd_work_vector[j]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + } + return 0; +} + +int ndl_composite_compare(UINT *d1,UINT *d2) +{ + int i,j,s,start,end,len,o; + int *v; + struct sparse_weight *sw; + + for ( j = 0; j < nd_nvar; j++ ) + nd_work_vector[j] = GET_EXP(d1,j)-GET_EXP(d2,j); + for ( i = 0; i < nd_worb_len; i++ ) { + len = nd_worb[i].length; + switch ( nd_worb[i].type ) { + case IS_DENSE_WEIGHT: + v = nd_worb[i].body.dense_weight; + for ( j = 0, s = 0; j < len; j++ ) + s += v[j]*nd_work_vector[j]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + break; + case IS_SPARSE_WEIGHT: + sw = nd_worb[i].body.sparse_weight; + for ( j = 0, s = 0; j < len; j++ ) + s += sw[j].value*nd_work_vector[sw[j].pos]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + break; + case IS_BLOCK: + o = nd_worb[i].body.block.order; + start = nd_worb[i].body.block.start; + switch ( o ) { + case 0: + end = start+len; + for ( j = start, s = 0; j < end; j++ ) + s += MUL_WEIGHT(nd_work_vector[j],j); + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + for ( j = end-1; j >= start; j-- ) + if ( nd_work_vector[j] < 0 ) return 1; + else if ( nd_work_vector[j] > 0 ) return -1; + break; + case 1: + end = start+len; + for ( j = start, s = 0; j < end; j++ ) + s += MUL_WEIGHT(nd_work_vector[j],j); + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + for ( j = start; j < end; j++ ) + if ( nd_work_vector[j] > 0 ) return 1; + else if ( nd_work_vector[j] < 0 ) return -1; + break; + case 2: + for ( j = start; j < end; j++ ) + if ( nd_work_vector[j] > 0 ) return 1; + else if ( nd_work_vector[j] < 0 ) return -1; + break; + } + break; + } + } + return 0; +} + /* TDH -> WW -> TD-> RL */ int ndl_ww_lex_compare(UINT *d1,UINT *d2) @@ -770,10 +527,24 @@ INLINE int ndl_equal(UINT *d1,UINT *d2) { int i; - for ( i = 0; i < nd_wpd; i++ ) - if ( *d1++ != *d2++ ) - return 0; - return 1; + switch ( nd_wpd ) { + case 2: + if ( TD(d2) != TD(d1) ) return 0; + if ( d2[1] != d1[1] ) return 0; + return 1; + break; + case 3: + if ( TD(d2) != TD(d1) ) return 0; + if ( d2[1] != d1[1] ) return 0; + if ( d2[2] != d1[2] ) return 0; + return 1; + break; + default: + for ( i = 0; i < nd_wpd; i++ ) + if ( *d1++ != *d2++ ) return 0; + return 1; + break; + } } INLINE void ndl_copy(UINT *d1,UINT *d2) @@ -1048,99 +819,6 @@ int ndl_check_bound2(int index,UINT *d2) #endif } -int ndl_check_bound2_direct(UINT *d1,UINT *d2) -{ - UINT u2; - int i,j,ind,k; - - ind = 0; -#if USE_UNROLL - switch ( nd_bpe ) { - case 3: - for ( i = nd_exporigin; i < nd_wpd; i++ ) { - u2 = d2[i]; - if ( d1[ind++]+((u2>>27)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>24)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>21)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>18)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>15)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>12)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>9)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>6)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+((u2>>3)&0x7) >= 0x8 ) return 1; - if ( d1[ind++]+(u2&0x7) >= 0x8 ) return 1; - } - return 0; - break; - case 4: - for ( i = nd_exporigin; i < nd_wpd; i++ ) { - u2 = d2[i]; - if ( d1[ind++]+((u2>>28)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+((u2>>24)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+((u2>>20)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+((u2>>16)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+((u2>>12)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+((u2>>8)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+((u2>>4)&0xf) >= 0x10 ) return 1; - if ( d1[ind++]+(u2&0xf) >= 0x10 ) return 1; - } - return 0; - break; - case 6: - for ( i = nd_exporigin; i < nd_wpd; i++ ) { - u2 = d2[i]; - if ( d1[ind++]+((u2>>24)&0x3f) >= 0x40 ) return 1; - if ( d1[ind++]+((u2>>18)&0x3f) >= 0x40 ) return 1; - if ( d1[ind++]+((u2>>12)&0x3f) >= 0x40 ) return 1; - if ( d1[ind++]+((u2>>6)&0x3f) >= 0x40 ) return 1; - if ( d1[ind++]+(u2&0x3f) >= 0x40 ) return 1; - } - return 0; - break; - case 8: - for ( i = nd_exporigin; i < nd_wpd; i++ ) { - u2 = d2[i]; - if ( d1[ind++]+((u2>>24)&0xff) >= 0x100 ) return 1; - if ( d1[ind++]+((u2>>16)&0xff) >= 0x100 ) return 1; - if ( d1[ind++]+((u2>>8)&0xff) >= 0x100 ) return 1; - if ( d1[ind++]+(u2&0xff) >= 0x100 ) return 1; - } - return 0; - break; - case 16: - for ( i = nd_exporigin; i < nd_wpd; i++ ) { - u2 = d2[i]; - if ( d1[ind++]+((u2>>16)&0xffff) > 0x10000 ) return 1; - if ( d1[ind++]+(u2&0xffff) > 0x10000 ) return 1; - } - return 0; - break; - case 32: - for ( i = nd_exporigin; i < nd_wpd; i++ ) - if ( d1[i]+d2[i]>k)&nd_mask0) > nd_mask0 ) return 1; - } - return 0; - break; - } -#else - for ( i = nd_exporigin; i < nd_wpd; i++ ) { - u2 = d2[i]; - k = (nd_epw-1)*nd_bpe; - for ( j = 0; j < nd_epw; j++, k -= nd_bpe ) - if ( d1[ind++]+((u2>>k)&nd_mask0) > nd_mask0 ) return 1; - } - return 0; -#endif -} - INLINE int ndl_hash_value(UINT *d) { int i; @@ -1186,27 +864,6 @@ INLINE int ndl_find_reducer(UINT *dg) return -1; } -INLINE int ndl_find_reducer_direct(UINT *dg,NDV *ps,int len) -{ - NDV r; - RHist s; - int d,k,i; - - if ( Reverse ) - for ( i = len-1; i >= 0; i-- ) { - r = ps[i]; - if ( ndl_reducible(dg,HDL(r)) ) - return i; - } - else - for ( i = 0; i < len; i++ ) { - r = ps[i]; - if ( ndl_reducible(dg,HDL(r)) ) - return i; - } - return -1; -} - ND nd_merge(ND p1,ND p2) { int n,c; @@ -1258,7 +915,8 @@ ND nd_add(int mod,ND p1,ND p2) if ( !p1 ) return p2; else if ( !p2 ) return p1; - else if ( !mod ) return nd_add_q(p1,p2); + else if ( mod == -1 ) return nd_add_sf(p1,p2); + else if ( !mod ) return nd_add_z(p1,p2); else { can = 0; for ( n = NV(p1), m1 = BDY(p1), m2 = BDY(p2), mr0 = 0; m1 && m2; ) { @@ -1298,12 +956,34 @@ ND nd_add(int mod,ND p1,ND p2) } } -ND nd_add_q(ND p1,ND p2) +/* XXX on opteron, the inlined manipulation of destructive additon of + * two NM seems to make gcc optimizer get confused, so the part is + * done in a function. + */ + +int nm_destructive_add_z(NM *m1,NM *m2,NM *mr0,NM *mr) { + NM s; + Z t; + int can; + + t = addz(CZ(*m1),CZ(*m2)); + s = *m1; *m1 = NEXT(*m1); + if ( t ) { + can = 1; NEXTNM2(*mr0,*mr,s); CZ(*mr) = (t); + } else { + can = 2; FREENM(s); + } + s = *m2; *m2 = NEXT(*m2); FREENM(s); + return can; +} + +ND nd_add_z(ND p1,ND p2) +{ int n,c,can; ND r; NM m1,m2,mr0,mr,s; - Q t; + Z t; if ( !p1 ) return p2; else if ( !p2 ) return p1; @@ -1313,14 +993,18 @@ ND nd_add_q(ND p1,ND p2) c = DL_COMPARE(DL(m1),DL(m2)); switch ( c ) { case 0: - addq(CQ(m1),CQ(m2),&t); +#if defined(__x86_64__) + can += nm_destructive_add_z(&m1,&m2,&mr0,&mr); +#else + t = addz(CZ(m1),CZ(m2)); s = m1; m1 = NEXT(m1); if ( t ) { - can++; NEXTNM2(mr0,mr,s); CQ(mr) = (t); + can++; NEXTNM2(mr0,mr,s); CZ(mr) = (t); } else { can += 2; FREENM(s); } s = m2; m2 = NEXT(m2); FREENM(s); +#endif break; case 1: s = m1; m1 = NEXT(m1); NEXTNM2(mr0,mr,s); @@ -1345,8 +1029,55 @@ ND nd_add_q(ND p1,ND p2) } } +ND nd_add_sf(ND p1,ND p2) +{ + int n,c,can; + ND r; + NM m1,m2,mr0,mr,s; + int t; + + if ( !p1 ) return p2; + else if ( !p2 ) return p1; + else { + can = 0; + for ( n = NV(p1), m1 = BDY(p1), m2 = BDY(p2), mr0 = 0; m1 && m2; ) { + c = DL_COMPARE(DL(m1),DL(m2)); + switch ( c ) { + case 0: + t = _addsf(CM(m1),CM(m2)); + s = m1; m1 = NEXT(m1); + if ( t ) { + can++; NEXTNM2(mr0,mr,s); CM(mr) = (t); + } else { + can += 2; FREENM(s); + } + s = m2; m2 = NEXT(m2); FREENM(s); + break; + case 1: + s = m1; m1 = NEXT(m1); NEXTNM2(mr0,mr,s); + break; + case -1: + s = m2; m2 = NEXT(m2); NEXTNM2(mr0,mr,s); + break; + } + } + if ( !mr0 ) + if ( m1 ) mr0 = m1; + else if ( m2 ) mr0 = m2; + else return 0; + else if ( m1 ) NEXT(mr) = m1; + else if ( m2 ) NEXT(mr) = m2; + else NEXT(mr) = 0; + BDY(p1) = mr0; + SG(p1) = MAX(SG(p1),SG(p2)); + LEN(p1) = LEN(p1)+LEN(p2)-can; + FREEND(p2); + return p1; + } +} + /* ret=1 : success, ret=0 : overflow */ -int nd_nf(int mod,ND g,NDV *ps,int full,ND *rp) +int nd_nf(int mod,ND g,NDV *ps,int full,Q *dn,ND *rp) { ND d; NM m,mrd,tail; @@ -1355,14 +1086,17 @@ int nd_nf(int mod,ND g,NDV *ps,int full,ND *rp) int c,c1,c2,dummy; RHist h; NDV p,red; - Q cg,cred,gcd; + Z cg,cred,gcd; + Q tq,qq; double hmag; + if ( dn && !mod ) + *dn = ONE; if ( !g ) { *rp = 0; return 1; } - if ( !mod ) hmag = ((double)p_mag((P)HCQ(g)))*nd_scale; + if ( !mod ) hmag = ((double)z_mag(HCZ(g)))*nd_scale; sugar0 = sugar = SG(g); n = NV(g); @@ -1376,20 +1110,34 @@ int nd_nf(int mod,ND g,NDV *ps,int full,ND *rp) nd_free(g); nd_free(d); return 0; } - p = ps[index]; - if ( mod ) { + if ( nd_demand ) + p = ndv_load(index); + else + p = ps[index]; + if ( mod == -1 ) + CM(mul) = _mulsf(_invsf(HCM(p)),_chsgnsf(HCM(g))); + else if ( mod ) { c1 = invm(HCM(p),mod); c2 = mod-HCM(g); DMAR(c1,c2,0,mod,c); CM(mul) = c; } else { - igcd_cofactor(HCQ(g),HCQ(p),&gcd,&cg,&cred); - chsgnq(cg,&CQ(mul)); - nd_mul_c_q(d,cred); nd_mul_c_q(g,cred); + gcd = gcdz_cofactor(HCZ(g),HCZ(p),&cg,&cred); + CZ(mul) = chsgnz(cg); + nd_mul_c_z(d,cred); + nd_mul_c_z(g,cred); + if ( dn ) { + mulq(*dn,ztoq(cred),&tq); *dn = tq; + } } g = nd_add(mod,g,ndv_mul_nm(mod,mul,p)); + sugar = MAX(sugar,SG(p)+TD(DL(mul))); - if ( !mod && hmag && g && ((double)(p_mag((P)HCQ(g))) > hmag) ) { + if ( !mod && hmag && g && ((double)(z_mag(HCZ(g))) > hmag) ) { + tq = ztoq(HCZ(g)); nd_removecont2(d,g); - hmag = ((double)p_mag((P)HCQ(g)))*nd_scale; + if ( dn ) { + divq(tq,ztoq(HCZ(g)),&qq); divq(*dn,qq,&tq); *dn = tq; + } + hmag = ((double)z_mag((P)HCZ(g)))*nd_scale; } } else if ( !full ) { *rp = g; @@ -1423,9 +1171,11 @@ int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *rp int sugar,psugar,n,h_reducible; PGeoBucket bucket; int c,c1,c2; - Q cg,cred,gcd,zzz; + Z cg,cred,gcd,zzz; RHist h; double hmag,gmag; + int count = 0; + int hcount = 0; if ( !g ) { *rp = 0; @@ -1433,14 +1183,15 @@ int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *rp } sugar = SG(g); n = NV(g); - if ( !mod ) hmag = ((double)p_mag((P)HCQ(g)))*nd_scale; + if ( !mod ) hmag = ((double)z_mag(HCZ(g)))*nd_scale; bucket = create_pbucket(); add_pbucket(mod,bucket,g); d = 0; mul = (NM)ALLOCA(sizeof(struct oNM)+(nd_wpd-1)*sizeof(UINT)); while ( 1 ) { - hindex = mod?head_pbucket(mod,bucket):head_pbucket_q(bucket); + hindex = mod?head_pbucket(mod,bucket):head_pbucket_z(bucket); if ( hindex < 0 ) { + if ( DP_Print > 3 ) printf("(%d %d)",count,hcount); if ( d ) SG(d) = sugar; *rp = d; return 1; @@ -1448,6 +1199,8 @@ int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *rp g = bucket->body[hindex]; index = ndl_find_reducer(HDL(g)); if ( index >= 0 ) { + count++; + if ( !d ) hcount++; h = nd_psh[index]; ndl_sub(HDL(g),DL(h),DL(mul)); if ( ndl_check_bound2(index,DL(mul)) ) { @@ -1457,16 +1210,18 @@ int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *rp return 0; } p = ps[index]; - if ( mod ) { + if ( mod == -1 ) + CM(mul) = _mulsf(_invsf(HCM(p)),_chsgnsf(HCM(g))); + else if ( mod ) { c1 = invm(HCM(p),mod); c2 = mod-HCM(g); DMAR(c1,c2,0,mod,c); CM(mul) = c; } else { - igcd_cofactor(HCQ(g),HCQ(p),&gcd,&cg,&cred); - chsgnq(cg,&CQ(mul)); - nd_mul_c_q(d,cred); - mulq_pbucket(bucket,cred); + gcd = gcdz_cofactor(HCZ(g),HCZ(p),&cg,&cred); + CZ(mul) = chsgnz(cg); + nd_mul_c_z(d,cred); + mulz_pbucket(bucket,cred); g = bucket->body[hindex]; - gmag = (double)p_mag((P)HCQ(g)); + gmag = (double)z_mag(HCZ(g)); } red = ndv_mul_nm(mod,mul,p); bucket->body[hindex] = nd_remove_head(g); @@ -1482,7 +1237,7 @@ int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *rp return 1; } nd_removecont2(d,g); - hmag = ((double)p_mag((P)HCQ(g)))*nd_scale; + hmag = ((double)z_mag(HCZ(g)))*nd_scale; add_pbucket(mod,bucket,g); } } else if ( !full ) { @@ -1508,83 +1263,6 @@ int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *rp } } -int nd_nf_direct(int mod,ND g,BaseSet base,int full,ND *rp) -{ - ND d; - NM m,mrd,tail; - NM mul; - NDV *ps; - int n,sugar,psugar,sugar0,stat,index,len; - int c,c1,c2; - UINT **bound; - RHist h; - NDV p,red; - Q cg,cred,gcd; - double hmag; - - if ( !g ) { - *rp = 0; - return 1; - } -#if 0 - if ( !mod ) - hmag = ((double)p_mag((P)HCQ(g)))*nd_scale; -#else - /* XXX */ - hmag = 0; -#endif - - ps = base->ps; - bound = base->bound; - len = base->len; - sugar0 = sugar = SG(g); - n = NV(g); - mul = (NM)ALLOCA(sizeof(struct oNM)+(nd_wpd-1)*sizeof(UINT)); - for ( d = 0; g; ) { - index = ndl_find_reducer_direct(HDL(g),ps,len); - if ( index >= 0 ) { - p = ps[index]; - ndl_sub(HDL(g),HDL(p),DL(mul)); - if ( ndl_check_bound2_direct(bound[index],DL(mul)) ) { - nd_free(g); nd_free(d); - return 0; - } - if ( mod ) { - c1 = invm(HCM(p),mod); c2 = mod-HCM(g); - DMAR(c1,c2,0,mod,c); CM(mul) = c; - } else { - igcd_cofactor(HCQ(g),HCQ(p),&gcd,&cg,&cred); - chsgnq(cg,&CQ(mul)); - nd_mul_c_q(d,cred); nd_mul_c_q(g,cred); - } - g = nd_add(mod,g,ndv_mul_nm(mod,mul,p)); - sugar = MAX(sugar,SG(p)+TD(DL(mul))); - if ( !mod && hmag && g && ((double)(p_mag((P)HCQ(g))) > hmag) ) { - nd_removecont2(d,g); - hmag = ((double)p_mag((P)HCQ(g)))*nd_scale; - } - } else if ( !full ) { - *rp = g; - return 1; - } else { - m = BDY(g); - if ( NEXT(m) ) { - BDY(g) = NEXT(m); NEXT(m) = 0; LEN(g)--; - } else { - FREEND(g); g = 0; - } - if ( d ) { - NEXT(tail)=m; tail=m; LEN(d)++; - } else { - MKND(n,m,1,d); tail = BDY(d); - } - } - } - if ( d ) SG(d) = sugar; - *rp = d; - return 1; -} - /* input : list of NDV, cand : list of NDV */ int ndv_check_candidate(NODE input,int obpe,int oadv,EPOS oepos,NODE cand) @@ -1605,16 +1283,16 @@ again: else r = (NDV)BDY(t); d = ndvtond(0,r); - stat = nd_nf(0,d,nd_ps,0,&nf); + stat = nd_nf(0,d,nd_ps,0,0,&nf); if ( !stat ) { - nd_reconstruct(0,0,0); + nd_reconstruct(0,0); goto again; } else if ( nf ) return 0; - printf("."); fflush(stdout); + if ( DP_Print ) { printf("."); fflush(stdout); } } - printf("\n"); + if ( DP_Print ) { printf("\n"); } /* gbcheck : cand is a GB of Id(cand) ? */ - if ( !nd_gb(0,1) ) return 0; + if ( !nd_gb(0,0,1) ) return 0; /* XXX */ return 1; } @@ -1633,6 +1311,24 @@ ND nd_remove_head(ND p) return p; } +ND nd_separate_head(ND p,ND *head) +{ + NM m,m0; + ND r; + + m = BDY(p); + if ( !NEXT(m) ) { + *head = p; p = 0; + } else { + m0 = m; + BDY(p) = NEXT(m); LEN(p)--; + NEXT(m0) = 0; + MKND(NV(p),m0,1,r); + *head = r; + } + return p; +} + PGeoBucket create_pbucket() { PGeoBucket g; @@ -1689,12 +1385,12 @@ void add_pbucket(int mod,PGeoBucket g,ND d) g->m = MAX(g->m,k); } -void mulq_pbucket(PGeoBucket g,Q c) +void mulz_pbucket(PGeoBucket g,Z c) { int k; for ( k = 0; k <= g->m; k++ ) - nd_mul_c_q(g->body[k],c); + nd_mul_c_z(g->body[k],c); } NM remove_head_pbucket_symbolic(PGeoBucket g) @@ -1756,8 +1452,12 @@ int head_pbucket(int mod,PGeoBucket g) dj = HDL(gj); sum = HCM(gj); } else if ( c == 0 ) { - sum = sum+HCM(gi)-mod; - if ( sum < 0 ) sum += mod; + if ( mod == -1 ) + sum = _addsf(sum,HCM(gi)); + else { + sum = sum+HCM(gi)-mod; + if ( sum < 0 ) sum += mod; + } g->body[i] = nd_remove_head(gi); } } @@ -1771,10 +1471,10 @@ int head_pbucket(int mod,PGeoBucket g) } } -int head_pbucket_q(PGeoBucket g) +int head_pbucket_z(PGeoBucket g) { int j,i,c,k,nv; - Q sum,t; + Z sum,t; ND gi,gj; k = g->m; @@ -1785,26 +1485,25 @@ int head_pbucket_q(PGeoBucket g) if ( j < 0 ) { j = i; gj = g->body[j]; - sum = HCQ(gj); + sum = HCZ(gj); } else { nv = NV(gi); c = DL_COMPARE(HDL(gi),HDL(gj)); if ( c > 0 ) { - if ( sum ) HCQ(gj) = sum; + if ( sum ) HCZ(gj) = sum; else g->body[j] = nd_remove_head(gj); j = i; gj = g->body[j]; - sum = HCQ(gj); + sum = HCZ(gj); } else if ( c == 0 ) { - addq(sum,HCQ(gi),&t); - sum = t; + sum = addz(sum,HCZ(gi)); g->body[i] = nd_remove_head(gi); } } } if ( j < 0 ) return -1; else if ( sum ) { - HCQ(gj) = sum; + HCZ(gj) = sum; return j; } else g->body[j] = nd_remove_head(gj); @@ -1825,15 +1524,52 @@ ND normalize_pbucket(int mod,PGeoBucket g) return r; } +void do_diagonalize(int sugar,int m) +{ + int i,nh,stat; + NODE r,g,t; + ND h,nf,s,head; + NDV nfv; + Z q,num,den; + Q dn; + + for ( i = nd_psn-1; i >= 0 && SG(nd_psh[i]) == sugar; i-- ) { + if ( nd_demand ) + nfv = ndv_load(i); + else + nfv = nd_ps[i]; + s = ndvtond(m,nfv); + s = nd_separate_head(s,&head); + nd_nf(m,s,nd_ps,1,&dn,&nf); + if ( !m ) { + HCZ(head) = mulz(HCZ(head),simpz((Z)NM(dn))); + if ( SGN(dn) < 0 ) HCZ(head) = chsgnz(HCZ(head)); + if ( DN(dn) ) + nd_mul_c_z(nf,simpz((Z)DN(dn))); + } + nf = nd_add(m,head,nf); + ndv_free(nfv); + nd_removecont(m,nf); + nfv = ndtondv(m,nf); + nd_free(nf); + nd_bound[i] = ndv_compute_bound(nfv); + if ( nd_demand ) { + ndv_save(nfv,i); + ndv_free(nfv); + } else + nd_ps[i] = nfv; + } +} + /* return value = 0 => input is not a GB */ -NODE nd_gb(int m,int checkonly) +NODE nd_gb(int m,int ishomo,int checkonly) { int i,nh,sugar,stat; NODE r,g,t; ND_pairs d; ND_pairs l; - ND h,nf; + ND h,nf,s,head; NDV nfv; g = 0; d = 0; @@ -1846,49 +1582,100 @@ NODE nd_gb(int m,int checkonly) again: l = nd_minp(d,&d); if ( SG(l) != sugar ) { + if ( ishomo ) do_diagonalize(sugar,m); + sugar = SG(l); - fprintf(asir_out,"%d",sugar); + if ( DP_Print ) fprintf(asir_out,"%d",sugar); } stat = nd_sp(m,0,l,&h); if ( !stat ) { NEXT(l) = d; d = l; - d = nd_reconstruct(m,0,d); + d = nd_reconstruct(0,d); goto again; } #if USE_GEOBUCKET - stat = m?nd_nf_pbucket(m,h,nd_ps,!Top,&nf):nd_nf(m,h,nd_ps,!Top,&nf); + stat = m?nd_nf_pbucket(m,h,nd_ps,!Top,&nf):nd_nf(m,h,nd_ps,!Top,0,&nf); #else - stat = nd_nf(m,h,nd_ps,!Top,&nf); + stat = nd_nf(m,h,nd_ps,!Top,0,&nf); #endif if ( !stat ) { NEXT(l) = d; d = l; - d = nd_reconstruct(m,0,d); + d = nd_reconstruct(0,d); goto again; } else if ( nf ) { if ( checkonly ) return 0; - printf("+"); fflush(stdout); + if ( DP_Print ) { printf("+"); fflush(stdout); } nd_removecont(m,nf); nfv = ndtondv(m,nf); nd_free(nf); - nh = ndv_newps(nfv,0); + nh = ndv_newps(m,nfv,0); d = update_pairs(d,g,nh); g = update_base(g,nh); FREENDP(l); } else { - printf("."); fflush(stdout); + if ( DP_Print ) { printf("."); fflush(stdout); } FREENDP(l); } } - for ( t = g; t; t = NEXT(t) ) BDY(t) = (pointer)nd_ps[(int)BDY(t)]; + if ( nd_demand ) + for ( t = g; t; t = NEXT(t) ) + BDY(t) = (pointer)ndv_load((int)BDY(t)); + else + for ( t = g; t; t = NEXT(t) ) + BDY(t) = (pointer)nd_ps[(int)BDY(t)]; return g; } -NODE nd_gb_trace(int m) +void do_diagonalize_trace(int sugar,int m) { + int i,nh,stat; + NODE r,g,t; + ND h,nf,nfq,s,head; + NDV nfv,nfqv; + Q dn; + + for ( i = nd_psn-1; i >= 0 && SG(nd_psh[i]) == sugar; i-- ) { + /* for nd_ps */ + s = ndvtond(m,nd_ps[i]); + s = nd_separate_head(s,&head); + nd_nf_pbucket(m,s,nd_ps,1,&nf); + nf = nd_add(m,head,nf); + ndv_free(nd_ps[i]); + nd_ps[i] = ndtondv(m,nf); + nd_free(nf); + + /* for nd_ps_trace */ + if ( nd_demand ) + nfv = ndv_load(i); + else + nfv = nd_ps_trace[i]; + s = ndvtond(0,nfv); + s = nd_separate_head(s,&head); + nd_nf(0,s,nd_ps_trace,1,&dn,&nf); + HCZ(head) = mulz(HCZ(head),(Z)NM(dn)); + if ( SGN(dn) < 0 ) HCZ(head) = chsgnz(HCZ(head)); + if ( DN(dn) ) + nd_mul_c_z(nf,(Z)DN(dn)); + nf = nd_add(0,head,nf); + ndv_free(nfv); + nd_removecont(0,nf); + nfv = ndtondv(0,nf); + nd_free(nf); + nd_bound[i] = ndv_compute_bound(nfv); + if ( nd_demand ) { + ndv_save(nfv,i); + ndv_free(nfv); + } else + nd_ps_trace[i] = nfv; + } +} + +NODE nd_gb_trace(int m,int ishomo) +{ int i,nh,sugar,stat; NODE r,g,t; ND_pairs d; ND_pairs l; - ND h,nf,nfq; + ND h,nf,nfq,s,head; NDV nfv,nfqv; g = 0; d = 0; @@ -1901,48 +1688,62 @@ NODE nd_gb_trace(int m) again: l = nd_minp(d,&d); if ( SG(l) != sugar ) { + if ( ishomo ) do_diagonalize_trace(sugar,m); sugar = SG(l); - fprintf(asir_out,"%d",sugar); + if ( DP_Print ) fprintf(asir_out,"%d",sugar); } stat = nd_sp(m,0,l,&h); if ( !stat ) { NEXT(l) = d; d = l; - d = nd_reconstruct(m,1,d); + d = nd_reconstruct(1,d); goto again; } #if USE_GEOBUCKET stat = nd_nf_pbucket(m,h,nd_ps,!Top,&nf); #else - stat = nd_nf(m,h,nd_ps,!Top,&nf); + stat = nd_nf(m,h,nd_ps,!Top,0,&nf); #endif if ( !stat ) { NEXT(l) = d; d = l; - d = nd_reconstruct(m,1,d); + d = nd_reconstruct(1,d); goto again; } else if ( nf ) { - /* overflow does not occur */ - nd_sp(0,1,l,&h); - nd_nf(0,h,nd_ps_trace,!Top,&nfq); + if ( nd_demand ) { + nfqv = ndv_load(nd_psn); + nfq = ndvtond(0,nfqv); + } else + nfq = 0; + if ( !nfq ) { + if ( !nd_sp(0,1,l,&h) || !nd_nf(0,h,nd_ps_trace,!Top,0,&nfq) ) { + NEXT(l) = d; d = l; + d = nd_reconstruct(1,d); + goto again; + } + } if ( nfq ) { - /* failure; m|HC(nfq) */ - if ( !rem(NM(HCQ(nfq)),m) ) return 0; + /* m|HC(nfq) => failure */ + if ( !remzi(HCZ(nfq),m) ) return 0; - printf("+"); fflush(stdout); + if ( DP_Print ) { printf("+"); fflush(stdout); } nd_removecont(m,nf); nfv = ndtondv(m,nf); nd_free(nf); nd_removecont(0,nfq); nfqv = ndtondv(0,nfq); nd_free(nfq); - nh = ndv_newps(nfv,nfqv); + nh = ndv_newps(0,nfv,nfqv); d = update_pairs(d,g,nh); g = update_base(g,nh); } else { - printf("*"); fflush(stdout); + if ( DP_Print ) { printf("*"); fflush(stdout); } } } else { - printf("."); fflush(stdout); + if ( DP_Print ) { printf("."); fflush(stdout); } } FREENDP(l); } - for ( t = g; t; t = NEXT(t) ) - BDY(t) = (pointer)nd_ps_trace[(int)BDY(t)]; + if ( nd_demand ) + for ( t = g; t; t = NEXT(t) ) + BDY(t) = (pointer)ndv_load((int)BDY(t)); + else + for ( t = g; t; t = NEXT(t) ) + BDY(t) = (pointer)nd_ps_trace[(int)BDY(t)]; return g; } @@ -1958,56 +1759,47 @@ int ndv_compare_rev(NDV *p1,NDV *p2) NODE ndv_reduceall(int m,NODE f) { - int i,j,n,stat; - NDV *w,*ps; - ND nf,g; + int i,n,stat; + ND nf,g,head; NODE t,a0,a; - struct oBaseSet base; - UINT **bound; + Q dn; + NDV *w; - for ( n = 0, t = f; t; t = NEXT(t), n++ ); - ps = (NDV *)ALLOCA(n*sizeof(NDV)); - bound = (UINT **)ALLOCA(n*sizeof(UINT *)); - for ( i = 0, t = f; i < n; i++, t = NEXT(t) ) ps[i] = (NDV)BDY(t); - qsort(ps,n,sizeof(NDV),(int (*)(const void *,const void *))ndv_compare); - for ( i = 0; i < n; i++ ) bound[i] = ndv_compute_bound(ps[i]); - base.ps = (NDV *)ALLOCA((n-1)*sizeof(NDV)); - base.bound = (UINT **)ALLOCA((n-1)*sizeof(UINT *)); - base.len = n-1; - i = 0; - while ( i < n ) { - for ( j = 0; j < i; j++ ) { - base.ps[j] = ps[j]; base.bound[j] = bound[j]; - } - for ( j = i+1; j < n; j++ ) { - base.ps[j-1] = ps[j]; base.bound[j-1] = bound[j]; - } - g = ndvtond(m,ps[i]); - stat = nd_nf_direct(m,g,&base,1,&nf); + n = length(f); +#if 0 + w = (NDV *)ALLOCA(n*sizeof(NDV)); + for ( i = 0, t = f; i < n; i++, t = NEXT(t) ) w[i] = (NDV)BDY(t); + qsort(w,n,sizeof(NDV), + (int (*)(const void *,const void *))ndv_compare); + for ( t = f, i = 0; t; i++, t = NEXT(t) ) BDY(t) = (pointer)w[i]; +#endif + ndv_setup(m,0,f); + for ( i = 0; i < n; ) { + g = ndvtond(m,nd_ps[i]); + g = nd_separate_head(g,&head); + stat = nd_nf(m,g,nd_ps,1,&dn,&nf); if ( !stat ) - nd_reconstruct_direct(m,ps,n); - else if ( !nf ) { - printf("."); fflush(stdout); - ndv_free(ps[i]); - for ( j = i+1; j < n; j++ ) { - ps[j-1] = ps[j]; bound[j-1] = bound[j]; + nd_reconstruct(0,0); + else { + if ( DP_Print ) { printf("."); fflush(stdout); } + if ( !m ) { + HCZ(head) = mulz(HCZ(head),simpz((Z)NM(dn))); + if ( SGN(dn) < 0 ) HCZ(head) = chsgnz(HCZ(head)); + if ( DN(dn) ) + nd_mul_c_z(nf,simpz((Z)DN(dn))); } - n--; - base.len = n-1; - } else { - printf("."); fflush(stdout); - ndv_free(ps[i]); + nf = nd_add(m,head,nf); + ndv_free(nd_ps[i]); nd_removecont(m,nf); - ps[i] = ndtondv(m,nf); - bound[i] = ndv_compute_bound(ps[i]); - nd_free(nf); + nd_ps[i] = ndtondv(m,nf); nd_free(nf); + nd_bound[i] = ndv_compute_bound(nd_ps[i]); i++; } } - printf("\n"); + if ( DP_Print ) { printf("\n"); } for ( a0 = 0, i = 0; i < n; i++ ) { NEXTNODE(a0,a); - BDY(a) = (pointer)ps[i]; + BDY(a) = (pointer)nd_ps[i]; } NEXT(a) = 0; return a0; @@ -2237,11 +2029,18 @@ ND_pairs nd_minp( ND_pairs d, ND_pairs *prest ) return m; } s = SG(m); - for ( ml = 0, l = m; p; p = NEXT(l = p) ) - if ( (SG(p) < s) - || ((SG(p) == s) && (DL_COMPARE(LCM(p),LCM(m)) < 0)) ) { - ml = l; m = p; s = SG(m); - } + if ( !NoSugar ) { + for ( ml = 0, l = m; p; p = NEXT(l = p) ) + if ( (SG(p) < s) + || ((SG(p) == s) && (DL_COMPARE(LCM(p),LCM(m)) < 0)) ) { + ml = l; m = p; s = SG(m); + } + } else { + for ( ml = 0, l = m; p; p = NEXT(l = p) ) + if ( DL_COMPARE(LCM(p),LCM(m)) < 0 ) { + ml = l; m = p; s = SG(m); + } + } if ( !ml ) *prest = NEXT(m); else { NEXT(ml) = NEXT(m); @@ -2253,17 +2052,18 @@ ND_pairs nd_minp( ND_pairs d, ND_pairs *prest ) ND_pairs nd_minsugarp( ND_pairs d, ND_pairs *prest ) { - int msugar; + int msugar,i; ND_pairs t,dm0,dm,dr0,dr; for ( msugar = SG(d), t = NEXT(d); t; t = NEXT(t) ) if ( SG(t) < msugar ) msugar = SG(t); dm0 = 0; dr0 = 0; - for ( t = d; t; t = NEXT(t) ) - if ( SG(t) == msugar ) { + for ( i = 0, t = d; t; t = NEXT(t) ) + if ( i < nd_f4_nsp && SG(t) == msugar ) { if ( dm0 ) NEXT(dm) = t; else dm0 = t; dm = t; + i++; } else { if ( dr0 ) NEXT(dr) = t; else dr0 = t; @@ -2275,7 +2075,7 @@ ND_pairs nd_minsugarp( ND_pairs d, ND_pairs *prest ) return dm0; } -int ndv_newps(NDV a,NDV aq) +int ndv_newps(int m,NDV a,NDV aq) { int len; RHist r; @@ -2299,6 +2099,15 @@ int ndv_newps(NDV a,NDV aq) nd_bound[nd_psn] = ndv_compute_bound(a); SG(r) = SG(a); ndl_copy(HDL(a),DL(r)); } + if ( nd_demand ) { + if ( aq ) { + ndv_save(nd_ps_trace[nd_psn],nd_psn); + nd_ps_trace[nd_psn] = 0; + } else { + ndv_save(nd_ps[nd_psn],nd_psn); + nd_ps[nd_psn] = 0; + } + } return nd_psn++; } @@ -2308,11 +2117,17 @@ void ndv_setup(int mod,int trace,NODE f) NODE s,s0,f0; UINT *d; RHist r; + NDV *w; NDV a,am; nd_found = 0; nd_notfirst = 0; nd_create = 0; - nd_psn = length(f); nd_pslen = 2*nd_psn; + for ( nd_psn = 0, s = f; s; s = NEXT(s) ) if ( BDY(s) ) nd_psn++; + w = (NDV *)ALLOCA(nd_psn*sizeof(NDV)); + for ( i = 0, s = f; s; s = NEXT(s) ) if ( BDY(s) ) w[i++] = BDY(s); + qsort(w,nd_psn,sizeof(NDV), + (int (*)(const void *,const void *))ndv_compare); + nd_pslen = 2*nd_psn; nd_ps = (NDV *)MALLOC(nd_pslen*sizeof(NDV)); nd_ps_trace = (NDV *)MALLOC(nd_pslen*sizeof(NDV)); nd_psh = (RHist *)MALLOC(nd_pslen*sizeof(RHist)); @@ -2320,23 +2135,30 @@ void ndv_setup(int mod,int trace,NODE f) if ( !nd_red ) nd_red = (RHist *)MALLOC(REDTAB_LEN*sizeof(RHist)); - bzero(nd_red,REDTAB_LEN*sizeof(RHist)); - nd_free_private_storage(); - for ( i = 0; i < nd_psn; i++, f = NEXT(f) ) { + for ( i = 0; i < REDTAB_LEN; i++ ) nd_red[i] = 0; + for ( i = 0; i < nd_psn; i++ ) { if ( trace ) { - a = nd_ps_trace[i] = ndv_dup(0,(NDV)BDY(f)); + a = nd_ps_trace[i] = ndv_dup(0,w[i]); ndv_removecont(0,a); am = nd_ps[i] = ndv_dup(mod,a); ndv_mod(mod,am); ndv_removecont(mod,am); } else { - a = nd_ps[i] = ndv_dup(mod,(NDV)BDY(f)); - if ( mod ) ndv_mod(mod,a); + a = nd_ps[i] = ndv_dup(mod,w[i]); ndv_removecont(mod,a); } NEWRHist(r); SG(r) = HTD(a); ndl_copy(HDL(a),DL(r)); nd_bound[i] = ndv_compute_bound(a); nd_psh[i] = r; + if ( nd_demand ) { + if ( trace ) { + ndv_save(nd_ps_trace[i],i); + nd_ps_trace[i] = 0; + } else { + ndv_save(nd_ps[i],i); + nd_ps[i] = 0; + } + } } } @@ -2345,10 +2167,25 @@ void nd_gr(LIST f,LIST v,int m,int f4,struct order_spe VL tv,fv,vv,vc; NODE fd,fd0,r,r0,t,x,s,xx; int e,max,nvar; - ND b; + NDV b; + int ishomo; + if ( !m && Demand ) nd_demand = 1; + else nd_demand = 0; + +#if 0 + ndv_alloc = 0; +#endif get_vars((Obj)f,&fv); pltovl(v,&vv); - nvar = length(vv); + for ( nvar = 0, tv = vv; tv; tv = NEXT(tv), nvar++ ); + switch ( ord->id ) { + case 1: + if ( ord->nv != nvar ) + error("nd_{gr,f4} : invalid order specification"); + break; + default: + break; + } nd_init_ord(ord); for ( t = BDY(f), max = 0; t; t = NEXT(t) ) for ( tv = vv; tv; tv = NEXT(tv) ) { @@ -2356,13 +2193,18 @@ void nd_gr(LIST f,LIST v,int m,int f4,struct order_spe max = MAX(e,max); } nd_setup_parameters(nvar,max); + ishomo = 1; for ( fd0 = 0, t = BDY(f); t; t = NEXT(t) ) { b = (pointer)ptondv(CO,vv,(P)BDY(t)); + if ( ishomo ) + ishomo = ishomo && ndv_ishomo(b); + if ( m ) ndv_mod(m,b); if ( b ) { NEXTNODE(fd0,fd); BDY(fd) = (pointer)b; } } if ( fd0 ) NEXT(fd) = 0; ndv_setup(m,0,fd0); - x = f4?nd_f4(m):nd_gb(m,0); + x = f4?nd_f4(m):nd_gb(m,ishomo,0); + nd_demand = 0; x = ndv_reducebase(x); x = ndv_reduceall(m,x); for ( r0 = 0, t = x; t; t = NEXT(t) ) { @@ -2371,11 +2213,14 @@ void nd_gr(LIST f,LIST v,int m,int f4,struct order_spe } if ( r0 ) NEXT(r) = 0; MKLIST(*rp,r0); +#if 0 + fprintf(asir_out,"ndv_alloc=%d\n",ndv_alloc); +#endif } void nd_gr_trace(LIST f,LIST v,int trace,int homo,struct order_spec *ord,LIST *rp) { - struct order_spec ord1; + struct order_spec *ord1; VL tv,fv,vv,vc; NODE fd,fd0,in0,in,r,r0,t,s,cand; int m,nocheck,nvar,mindex,e,max; @@ -2383,13 +2228,24 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru NMV a; P p; EPOS oepos; - int obpe,oadv,wmax,i,len,cbpe; + int obpe,oadv,wmax,i,len,cbpe,ishomo; get_vars((Obj)f,&fv); pltovl(v,&vv); - nvar = length(vv); + for ( nvar = 0, tv = vv; tv; tv = NEXT(tv), nvar++ ); + switch ( ord->id ) { + case 1: + if ( ord->nv != nvar ) + error("nd_gr_trace : invalid order specification"); + break; + default: + break; + } nocheck = 0; mindex = 0; + if ( Demand ) nd_demand = 1; + else nd_demand = 0; + /* setup modulus */ if ( trace < 0 ) { trace = -trace; @@ -2404,8 +2260,11 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru nd_init_ord(ord); nd_setup_parameters(nvar,max); obpe = nd_bpe; oadv = nmv_adv; oepos = nd_epos; + ishomo = 1; for ( in0 = 0, fd0 = 0, t = BDY(f); t; t = NEXT(t) ) { c = ptondv(CO,vv,(P)BDY(t)); + if ( ishomo ) + ishomo = ishomo && ndv_ishomo(c); if ( c ) { NEXTNODE(in0,in); BDY(in) = (pointer)c; NEXTNODE(fd0,fd); BDY(fd) = (pointer)ndv_dup(0,c); @@ -2413,38 +2272,41 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru } if ( in0 ) NEXT(in) = 0; if ( fd0 ) NEXT(fd) = 0; - if ( homo ) { + if ( !ishomo && homo ) { for ( t = in0, wmax = 0; t; t = NEXT(t) ) { c = (NDV)BDY(t); len = LEN(c); for ( a = BDY(c), i = 0; i < len; i++, NMV_ADV(a) ) wmax = MAX(TD(DL(a)),wmax); } homogenize_order(ord,nvar,&ord1); - nd_init_ord(&ord1); + nd_init_ord(ord1); nd_setup_parameters(nvar+1,wmax); for ( t = fd0; t; t = NEXT(t) ) ndv_homogenize((NDV)BDY(t),obpe,oadv,oepos); } while ( 1 ) { + if ( Demand ) + nd_demand = 1; ndv_setup(m,1,fd0); - cand = nd_gb_trace(m); + cand = nd_gb_trace(m,ishomo || homo); if ( !cand ) { /* failure */ if ( trace > 1 ) { *rp = 0; return; } else m = get_lprime(++mindex); continue; } - if ( homo ) { + if ( !ishomo && homo ) { /* dehomogenization */ for ( t = cand; t; t = NEXT(t) ) ndv_dehomogenize((NDV)BDY(t),ord); nd_init_ord(ord); nd_setup_parameters(nvar,0); } + nd_demand = 0; cand = ndv_reducebase(cand); cand = ndv_reduceall(0,cand); + cbpe = nd_bpe; if ( nocheck ) break; - cbpe = nd_bpe; if ( ndv_check_candidate(in0,obpe,oadv,oepos,cand) ) /* success */ break; @@ -2455,8 +2317,8 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru /* try the next modulus */ m = get_lprime(++mindex); /* reset the parameters */ - if ( homo ) { - nd_init_ord(&ord1); + if ( !ishomo && homo ) { + nd_init_ord(ord1); nd_setup_parameters(nvar+1,wmax); } else { nd_init_ord(ord); @@ -2466,7 +2328,7 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru } /* dp->p */ nd_bpe = cbpe; - nd_setup_parameters(0,0); + nd_setup_parameters(nd_nvar,0); for ( r = cand; r; r = NEXT(r) ) BDY(r) = (pointer)ndvtop(0,CO,vv,BDY(r)); MKLIST(*rp,cand); } @@ -2547,14 +2409,15 @@ void nd_print(ND p) printf("0\n"); else { for ( m = BDY(p); m; m = NEXT(m) ) { - printf("+%d*",CM(m)); + if ( CM(m) & 0x80000000 ) printf("+@_%d*",IFTOF(CM(m))); + else printf("+%d*",CM(m)); ndl_print(DL(m)); } printf("\n"); } } -void nd_print_q(ND p) +void nd_print_z(ND p) { NM m; @@ -2563,7 +2426,7 @@ void nd_print_q(ND p) else { for ( m = BDY(p); m; m = NEXT(m) ) { printf("+"); - printexpr(CO,CQ(m)); + printz(CZ(m)); printf("*"); ndl_print(DL(m)); } @@ -2582,32 +2445,30 @@ void ndp_print(ND_pairs d) void nd_removecont(int mod,ND p) { int i,n; - Q *w; - Q dvr,t; + Z *w; + Z dvr,t; NM m; struct oVECT v; - N q,r; - if ( mod ) nd_mul_c(mod,p,invm(HCM(p),mod)); + if ( mod == -1 ) nd_mul_c(mod,p,_invsf(HCM(p))); + else if ( mod ) nd_mul_c(mod,p,invm(HCM(p),mod)); else { for ( m = BDY(p), n = 0; m; m = NEXT(m), n++ ); - w = (Q *)ALLOCA(n*sizeof(Q)); + w = (Z *)ALLOCA(n*sizeof(Z)); v.len = n; v.body = (pointer *)w; - for ( m = BDY(p), i = 0; i < n; m = NEXT(m), i++ ) w[i] = CQ(m); + for ( m = BDY(p), i = 0; i < n; m = NEXT(m), i++ ) w[i] = CZ(m); removecont_array(w,n); - for ( m = BDY(p), i = 0; i < n; m = NEXT(m), i++ ) CQ(m) = w[i]; + for ( m = BDY(p), i = 0; i < n; m = NEXT(m), i++ ) CZ(m) = w[i]; } } void nd_removecont2(ND p1,ND p2) { int i,n1,n2,n; - Q *w; - Q dvr,t; + Z *w; NM m; struct oVECT v; - N q,r; if ( !p1 ) { nd_removecont(0,p2); return; @@ -2617,34 +2478,33 @@ void nd_removecont2(ND p1,ND p2) n1 = nd_length(p1); n2 = nd_length(p2); n = n1+n2; - w = (Q *)ALLOCA(n*sizeof(Q)); + w = (Z *)ALLOCA(n*sizeof(Z)); v.len = n; v.body = (pointer *)w; - for ( m = BDY(p1), i = 0; i < n1; m = NEXT(m), i++ ) w[i] = CQ(m); - for ( m = BDY(p2); i < n; m = NEXT(m), i++ ) w[i] = CQ(m); + for ( m = BDY(p1), i = 0; i < n1; m = NEXT(m), i++ ) w[i] = CZ(m); + for ( m = BDY(p2); i < n; m = NEXT(m), i++ ) w[i] = CZ(m); removecont_array(w,n); - for ( m = BDY(p1), i = 0; i < n1; m = NEXT(m), i++ ) CQ(m) = w[i]; - for ( m = BDY(p2); i < n; m = NEXT(m), i++ ) CQ(m) = w[i]; + for ( m = BDY(p1), i = 0; i < n1; m = NEXT(m), i++ ) CZ(m) = w[i]; + for ( m = BDY(p2); i < n; m = NEXT(m), i++ ) CZ(m) = w[i]; } void ndv_removecont(int mod,NDV p) { int i,len; - Q *w; - Q dvr,t; + Z *w; + Z dvr,t; NMV m; - if ( mod ) + if ( mod == -1 ) + ndv_mul_c(mod,p,_invsf(HCM(p))); + else if ( mod ) ndv_mul_c(mod,p,invm(HCM(p),mod)); else { len = p->len; - w = (Q *)ALLOCA(len*sizeof(Q)); - for ( m = BDY(p), i = 0; i < len; NMV_ADV(m), i++ ) w[i] = CQ(m); - sortbynm(w,len); - qltozl(w,len,&dvr); - for ( m = BDY(p), i = 0; i < len; NMV_ADV(m), i++ ) { - divq(CQ(m),dvr,&t); CQ(m) = t; - } + w = (Z *)ALLOCA(len*sizeof(Z)); + for ( m = BDY(p), i = 0; i < len; NMV_ADV(m), i++ ) w[i] = CZ(m); + removecont_array(w,len); + for ( m = BDY(p), i = 0; i < len; NMV_ADV(m), i++ ) CZ(m) = w[i]; } } @@ -2662,7 +2522,7 @@ void ndv_homogenize(NDV p,int obpe,int oadv,EPOS oepos t = (NMV)ALLOCA(nmv_adv); for ( i = 0; i < len; i++, NMV_OPREV(m), NMV_PREV(mr) ) { ndl_homogenize(DL(m),DL(t),obpe,oepos,max); - CQ(mr) = CQ(m); + CZ(mr) = CZ(m); ndl_copy(DL(t),DL(mr)); } NV(p)++; @@ -2672,8 +2532,8 @@ void ndv_homogenize(NDV p,int obpe,int oadv,EPOS oepos void ndv_dehomogenize(NDV p,struct order_spec *ord) { int i,j,adj,len,newnvar,newwpd,newadv,newexporigin; - Q *w; - Q dvr,t; + Z *w; + Z dvr,t; NMV m,r; len = p->len; @@ -2683,9 +2543,9 @@ void ndv_dehomogenize(NDV p,struct order_spec *ord) for ( m = BDY(p), i = 0; i < len; NMV_ADV(m), i++ ) ndl_dehomogenize(DL(m)); if ( newwpd != nd_wpd ) { - newadv = sizeof(struct oNMV)+(newwpd-1)*sizeof(UINT); + newadv = ROUND_FOR_ALIGN(sizeof(struct oNMV)+(newwpd-1)*sizeof(UINT)); for ( m = r = BDY(p), i = 0; i < len; NMV_ADV(m), NDV_NADV(r), i++ ) { - CQ(r) = CQ(m); + CZ(r) = CZ(m); for ( j = 0; j < newexporigin; j++ ) DL(r)[j] = DL(m)[j]; adj = nd_exporigin-newexporigin; for ( ; j < newwpd; j++ ) DL(r)[j] = DL(m)[j+adj]; @@ -2694,36 +2554,87 @@ void ndv_dehomogenize(NDV p,struct order_spec *ord) NV(p)--; } -void removecont_array(Q *c,int n) +int comp_z(Z *a,Z *b) { + return cmpz(*a,*b); +} + +Z zgcdv(VECT a) +{ + int i,j,n,nz; + Z g,gt,q,r; + Z *c; + + n = a->len; + c = (Z *)ALLOCA(n*sizeof(Z)); + for ( i = 0; i < n; i++ ) + c[i] = (Z)a->body[i]; + qsort(c,n,sizeof(Z),(int (*) (const void *,const void *))comp_z); + for ( ; n && ! *c; n--, c++ ); + + if ( !n ) + return 0; + else if ( n == 1 ) + return absz(c[0]); + + g = gcdz(c[0],c[1]); + for ( i = 2; i < n; i++ ) { + q = divz(c[i],g,&r); + gt = gcdz(g,r); + if ( !cmpz(g,gt) ) { + for ( j = i+1; j < n; j++ ) { + q = divz(c[j],g,&r); c[j] = r; + } + } else + g = gt; + } + return absz(g); +} + +Z zgcdv_estimate(VECT a) +{ + int n,i,m; + Z s,t,u,g; + Z *q; + + n = a->len; q = (Z *)a->body; + if ( n == 1 ) + return absz(q[0]); + + m = n/2; + for ( i = 0 , s = 0; i < m; i++ ) + s = sgnz(q[i])>=0 ? addz(s,q[i]) : subz(s,q[i]); + for ( t = 0; i < n; i++ ) + t = sgnz(q[i])>=0 ? addz(t,q[i]) : subz(t,q[i]); + g = gcdz(s,t); + return absz(g); +} + +void removecont_array(Z *c,int n) +{ struct oVECT v; - Q d0,d1,a,u,u1,gcd; - int i; - N qn,rn,gn; - Q *q,*r; + Z d0,d1,a,u,u1,gcd; + int i,j; + Z q0,q1; + Z *q,*r; - q = (Q *)ALLOCA(n*sizeof(Q)); - r = (Q *)ALLOCA(n*sizeof(Q)); + q = (Z *)ALLOCA(n*sizeof(Z)); + r = (Z *)ALLOCA(n*sizeof(Z)); v.id = O_VECT; v.len = n; v.body = (pointer *)c; - igcdv_estimate(&v,&d0); - for ( i = 0; i < n; i++ ) { - divn(NM(c[i]),NM(d0),&qn,&rn); - NTOQ(qn,SGN(c[i])*SGN(d0),q[i]); - NTOQ(rn,SGN(c[i]),r[i]); - } + d0 = zgcdv_estimate(&v); + /* c[i] = d0*q[i]+r[i] */ + for ( i = 0; i < n; i++ ) + q[i] = divz(c[i],d0,&r[i]); for ( i = 0; i < n; i++ ) if ( r[i] ) break; if ( i < n ) { v.id = O_VECT; v.len = n; v.body = (pointer *)r; - igcdv(&v,&d1); - gcdn(NM(d0),NM(d1),&gn); NTOQ(gn,1,gcd); - divsn(NM(d0),gn,&qn); NTOQ(qn,1,a); + /* d1 = GCD(r[0],...,r[n-1]) */ + d1 = zgcdv(&v); + /* q0 = d0/GCD(d0,d1) */ + gcd = gcdz_cofactor(d0,d1,&q0,&q1); for ( i = 0; i < n; i++ ) { - mulq(a,q[i],&u); - if ( r[i] ) { - divsn(NM(r[i]),gn,&qn); NTOQ(qn,SGN(r[i]),u1); - addq(u,u1,&q[i]); - } else - q[i] = u; + /* q[i] = d0/gcd*q[i]+r[i]/gcd */ + q[i] = addz(mulz(q0,q[i]),divsz(r[i],gcd)); } } for ( i = 0; i < n; i++ ) c[i] = q[i]; @@ -2735,21 +2646,23 @@ void nd_mul_c(int mod,ND p,int mul) int c,c1; if ( !p ) return; - for ( m = BDY(p); m; m = NEXT(m) ) { - c1 = CM(m); - DMAR(c1,mul,0,mod,c); - CM(m) = c; - } + if ( mod == -1 ) + for ( m = BDY(p); m; m = NEXT(m) ) + CM(m) = _mulsf(CM(m),mul); + else + for ( m = BDY(p); m; m = NEXT(m) ) { + c1 = CM(m); DMAR(c1,mul,0,mod,c); CM(m) = c; + } } -void nd_mul_c_q(ND p,Q mul) +void nd_mul_c_z(ND p,Z mul) { NM m; - Q c; + Z c; - if ( !p ) return; + if ( !p || uniz(mul) ) return; for ( m = BDY(p); m; m = NEXT(m) ) { - mulq(CQ(m),mul,&c); CQ(m) = c; + CZ(m) = mulz(CZ(m),mul); } } @@ -2825,28 +2738,59 @@ UINT *ndv_compute_bound(NDV p) return t; } +UINT *nd_compute_bound(ND p) +{ + UINT *d1,*d2,*t; + UINT u; + int i,j,k,l,len,ind; + NM m; + + if ( !p ) + return 0; + d1 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + d2 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + len = LEN(p); + m = BDY(p); ndl_copy(DL(m),d1); m = NEXT(m); + for ( m = NEXT(m); m; m = NEXT(m) ) { + ndl_lcm(DL(m),d1,d2); + t = d1; d1 = d2; d2 = t; + } + l = nd_nvar+31; + t = (UINT *)MALLOC_ATOMIC(l*sizeof(UINT)); + for ( i = nd_exporigin, ind = 0; i < nd_wpd; i++ ) { + u = d1[i]; + k = (nd_epw-1)*nd_bpe; + for ( j = 0; j < nd_epw; j++, k -= nd_bpe, ind++ ) + t[ind] = (u>>k)&nd_mask0; + } + for ( ; ind < l; ind++ ) t[ind] = 0; + return t; +} + int nd_get_exporigin(struct order_spec *ord) { switch ( ord->id ) { - case 0: + case 0: case 2: return 1; case 1: /* block order */ /* d[0]:weight d[1]:w0,...,d[nd_exporigin-1]:w(n-1) */ return ord->ord.block.length+1; - case 2: - error("nd_get_exporigin : matrix order is not supported yet."); + case 3: + error("nd_get_exporigin : composite order is not supported yet."); } } void nd_setup_parameters(int nvar,int max) { - int i,j,n,elen,ord_o,ord_l,l,s; + int i,j,n,elen,ord_o,ord_l,l,s,wpd; struct order_pair *op; - /* if max == 0, don't touch nd_bpe */ - if ( max > 0 ) { - if ( max < 2 ) nd_bpe = 1; - if ( max < 4 ) nd_bpe = 2; + nd_nvar = nvar; + if ( max ) { + /* XXX */ + if ( do_weyl ) nd_bpe = 32; + else if ( max < 2 ) nd_bpe = 1; + else if ( max < 4 ) nd_bpe = 2; else if ( max < 8 ) nd_bpe = 3; else if ( max < 16 ) nd_bpe = 4; else if ( max < 32 ) nd_bpe = 5; @@ -2856,15 +2800,14 @@ void nd_setup_parameters(int nvar,int max) { else if ( max < 65536 ) nd_bpe = 16; else nd_bpe = 32; } - /* nvar == 0, don't touch nd_nvar */ - if ( nvar > 0 ) nd_nvar = nvar; - nd_epw = (sizeof(UINT)*8)/nd_bpe; elen = nd_nvar/nd_epw+(nd_nvar%nd_epw?1:0); - nd_exporigin = nd_get_exporigin(nd_ord); - nd_wpd = nd_exporigin+elen; - + wpd = nd_exporigin+elen; + if ( wpd != nd_wpd ) { + nd_free_private_storage(); + nd_wpd = wpd; + } if ( nd_bpe < 32 ) { nd_mask0 = (1<= 0; i-- ) ndv_realloc(ps[i],obpe,oadv,oepos); - prev_nm_free_list = 0; - prev_ndp_free_list = 0; - GC_gcollect(); -} - void ndl_reconstruct(UINT *d,UINT *r,int obpe,EPOS oepos) { int n,i,ei,oepw,omask0,j,s,ord_l,l; @@ -3047,20 +2958,25 @@ int nd_sp(int mod,int trace,ND_pairs p,ND *rp) UINT *lcm; int td; - if ( trace ) { - p1 = nd_ps_trace[p->i1]; p2 = nd_ps_trace[p->i2]; + if ( !mod && nd_demand ) { + p1 = ndv_load(p->i1); p2 = ndv_load(p->i2); } else { - p1 = nd_ps[p->i1]; p2 = nd_ps[p->i2]; + if ( trace ) { + p1 = nd_ps_trace[p->i1]; p2 = nd_ps_trace[p->i2]; + } else { + p1 = nd_ps[p->i1]; p2 = nd_ps[p->i2]; + } } lcm = LCM(p); NEWNM(m); - CQ(m) = HCQ(p2); + CZ(m) = HCZ(p2); ndl_sub(lcm,HDL(p1),DL(m)); if ( ndl_check_bound2(p->i1,DL(m)) ) return 0; t1 = ndv_mul_nm(mod,m,p1); - if ( mod ) CM(m) = mod-HCM(p1); - else chsgnq(HCQ(p1),&CQ(m)); + if ( mod == -1 ) CM(m) = _chsgnsf(HCM(p1)); + else if ( mod ) CM(m) = mod-HCM(p1); + else CZ(m) = chsgnz(HCZ(p1)); ndl_sub(lcm,HDL(p2),DL(m)); if ( ndl_check_bound2(p->i2,DL(m)) ) { nd_free(t1); @@ -3079,21 +2995,25 @@ void ndv_mul_c(int mod,NDV p,int mul) if ( !p ) return; len = LEN(p); - for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) { - c1 = CM(m); DMAR(c1,mul,0,mod,c); CM(m) = c; - } + if ( mod == -1 ) + for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) + CM(m) = _mulsf(CM(m),mul); + else + for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) { + c1 = CM(m); DMAR(c1,mul,0,mod,c); CM(m) = c; + } } -void ndv_mul_c_q(NDV p,Q mul) +void ndv_mul_c_z(NDV p,Z mul) { NMV m; - Q c; + Z c; int len,i; if ( !p ) return; len = LEN(p); for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) { - mulq(CQ(m),mul,&c); CQ(m) = c; + CZ(m) = mulz(CZ(m),mul); } } @@ -3140,8 +3060,8 @@ void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *ta { int i,n2,j,s,curlen,homo,h,a,b,k,l,u,min; UINT *d0,*d1,*d,*dt,*ctab; - Q *ctab_q; - Q q,q1; + Z *ctab_z; + Z q,q1; UINT c0,c1,c; NM *p; NM m,t; @@ -3153,7 +3073,7 @@ void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *ta if ( mod ) { c0 = CM(m0); c1 = CM(m1); DMAR(c0,c1,0,mod,c); CM(m) = c; } else - mulq(CQ(m0),CQ(m1),&CQ(m)); + CZ(m) = mulz(CZ(m0),CZ(m1)); for ( i = 0; i < nd_wpd; i++ ) d[i] = 0; homo = n&1 ? 1 : 0; if ( homo ) { @@ -3186,8 +3106,8 @@ void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *ta ctab = (UINT *)ALLOCA((min+1)*sizeof(UINT)); mkwcm(k,l,mod,ctab); } else { - ctab_q = (Q *)ALLOCA((min+1)*sizeof(Q)); - mkwc(k,l,ctab_q); + ctab_z = (Z *)ALLOCA((min+1)*sizeof(Z)); + mkwcz(k,l,ctab_z); } for ( j = min; j >= 0; j-- ) { for ( u = 0; u < nd_wpd; u++ ) d[u] = 0; @@ -3199,7 +3119,7 @@ void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *ta } else TD(d) = h; if ( nd_blockmask ) ndl_weight_mask(d); if ( mod ) c = ctab[j]; - else q = ctab_q[j]; + else q = ctab_z[j]; p = tab+curlen*j; if ( j == 0 ) { for ( u = 0; u < curlen; u++, p++ ) { @@ -3208,7 +3128,7 @@ void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *ta if ( mod ) { c0 = CM(tab[u]); DMAR(c0,c,0,mod,c1); CM(tab[u]) = c1; } else { - mulq(CQ(tab[u]),q,&q1); CQ(tab[u]) = q1; + CZ(tab[u]) = mulz(CZ(tab[u]),q); } } } @@ -3220,7 +3140,7 @@ void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *ta if ( mod ) { c0 = CM(tab[u]); DMAR(c0,c,0,mod,c1); CM(t) = c1; } else - mulq(CQ(tab[u]),q,&CQ(t)); + CZ(t) = mulz(CZ(tab[u]),q); *p = t; } } @@ -3237,7 +3157,6 @@ ND ndv_mul_nm_symbolic(NM m0,NDV p) NMV m; UINT *d,*dt,*dm; int c,n,td,i,c1,c2,len; - Q q; ND r; if ( !p ) return 0; @@ -3266,32 +3185,42 @@ ND ndv_mul_nm(int mod,NM m0,NDV p) NMV m; UINT *d,*dt,*dm; int c,n,td,i,c1,c2,len; - Q q; + Z q; ND r; if ( !p ) return 0; else if ( do_weyl ) - return weyl_ndv_mul_nm(mod,m0,p); + if ( mod == -1 ) + error("ndv_mul_nm : not implemented (weyl)"); + else + return weyl_ndv_mul_nm(mod,m0,p); else { n = NV(p); m = BDY(p); d = DL(m0); len = LEN(p); mr0 = 0; td = TD(d); - if ( mod ) { + if ( mod == -1 ) { c = CM(m0); for ( i = 0; i < len; i++, NMV_ADV(m) ) { NEXTNM(mr0,mr); + CM(mr) = _mulsf(CM(m),c); + ndl_add(DL(m),d,DL(mr)); + } + } else if ( mod ) { + c = CM(m0); + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + NEXTNM(mr0,mr); c1 = CM(m); DMAR(c1,c,0,mod,c2); CM(mr) = c2; ndl_add(DL(m),d,DL(mr)); } } else { - q = CQ(m0); + q = CZ(m0); for ( i = 0; i < len; i++, NMV_ADV(m) ) { NEXTNM(mr0,mr); - mulq(CQ(m),q,&CQ(mr)); + CZ(mr) = mulz(CZ(m),q); ndl_add(DL(m),d,DL(mr)); } } @@ -3302,6 +3231,52 @@ ND ndv_mul_nm(int mod,NM m0,NDV p) } } +ND nd_quo(int mod,PGeoBucket bucket,NDV d) +{ + NM mq0,mq; + NMV tm; + int i,nv,sg,c,c1,c2,hindex; + ND p,t,r; + N tnm; + + if ( !p ) return 0; + else { + nv = NV(d); + mq0 = 0; + tm = (NMV)ALLOCA(nmv_adv); + while ( 1 ) { + hindex = mod?head_pbucket(mod,bucket):head_pbucket_z(bucket); + if ( hindex < 0 ) break; + p = bucket->body[hindex]; + NEXTNM(mq0,mq); + ndl_sub(HDL(p),HDL(d),DL(mq)); + ndl_copy(DL(mq),DL(tm)); + if ( mod ) { + c1 = invm(HCM(d),mod); c2 = HCM(p); + DMAR(c1,c2,0,mod,c); CM(mq) = c; + CM(tm) = mod-c; + } else { + CZ(mq) = divsz(HCZ(p),HCZ(d)); + CZ(tm) = chsgnz(CZ(mq)); + } + t = ndv_mul_nmv_trunc(mod,tm,d,HDL(d)); + bucket->body[hindex] = nd_remove_head(p); + t = nd_remove_head(t); + add_pbucket(mod,bucket,t); + } + if ( !mq0 ) + r = 0; + else { + NEXT(mq) = 0; + for ( i = 0, mq = mq0; mq; mq = NEXT(mq), i++ ); + MKND(nv,mq0,i,r); + /* XXX */ + SG(r) = HTD(r); + } + return r; + } +} + void ndv_realloc(NDV p,int obpe,int oadv,EPOS oepos) { NMV m,mr,mr0,t; @@ -3314,10 +3289,10 @@ void ndv_realloc(NDV p,int obpe,int oadv,EPOS oepos) mr = (NMV)((char *)mr0+(len-1)*nmv_adv); t = (NMV)ALLOCA(nmv_adv); for ( i = 0; i < len; i++, NMV_OPREV(m), NMV_PREV(mr) ) { - CQ(t) = CQ(m); + CZ(t) = CZ(m); for ( k = 0; k < nd_wpd; k++ ) DL(t)[k] = 0; ndl_reconstruct(DL(m),DL(t),obpe,oepos); - CQ(mr) = CQ(t); + CZ(mr) = CZ(t); ndl_copy(DL(t),DL(mr)); } BDY(p) = mr0; @@ -3335,7 +3310,7 @@ NDV ndv_dup_realloc(NDV p,int obpe,int oadv,EPOS oepos for ( i = 0; i < len; i++, NMV_OADV(m), NMV_ADV(mr) ) { ndl_zero(DL(mr)); ndl_reconstruct(DL(m),DL(mr),obpe,oepos); - CQ(mr) = CQ(m); + CZ(mr) = CZ(m); } MKNDV(NV(p),mr0,len,r); SG(r) = SG(p); @@ -3355,7 +3330,7 @@ NDV ndv_dup(int mod,NDV p) m0 = m = (NMV)(mod?MALLOC_ATOMIC(len*nmv_adv):MALLOC(len*nmv_adv)); for ( t = BDY(p), i = 0; i < len; i++, NMV_ADV(t), NMV_ADV(m) ) { ndl_copy(DL(t),DL(m)); - CQ(m) = CQ(t); + CZ(m) = CZ(t); } MKNDV(NV(p),m0,len,d); SG(d) = SG(p); @@ -3371,7 +3346,7 @@ ND nd_dup(ND p) for ( m0 = 0, t = BDY(p); t; t = NEXT(t) ) { NEXTNM(m0,m); ndl_copy(DL(t),DL(m)); - CQ(m) = CQ(t); + CZ(m) = CZ(t); } if ( m0 ) NEXT(m) = 0; MKND(NV(p),m0,LEN(p),d); @@ -3386,21 +3361,30 @@ void ndv_mod(int mod,NDV p) NMV t,d; int r; int i,len,dlen; + Obj gfs; if ( !p ) return; len = LEN(p); dlen = 0; - for ( t = d = BDY(p), i = 0; i < len; i++, NMV_ADV(t) ) { - r = rem(NM(CQ(t)),mod); - if ( r ) { - if ( SGN(CQ(t)) < 0 ) - r = mod-r; + if ( mod == -1 ) + for ( t = d = BDY(p), i = 0; i < len; i++, NMV_ADV(t) ) { + simp_ff((Obj)CP(t),&gfs); + r = FTOIF(CONT((GFS)gfs)); CM(d) = r; ndl_copy(DL(t),DL(d)); NMV_ADV(d); dlen++; } - } + else + for ( t = d = BDY(p), i = 0; i < len; i++, NMV_ADV(t) ) { + r = remzi(CZ(t),mod); + if ( r ) { + CM(d) = r; + ndl_copy(DL(t),DL(d)); + NMV_ADV(d); + dlen++; + } + } LEN(p) = dlen; } @@ -3430,7 +3414,7 @@ ND ptond(VL vl,VL dvl,P p) else if ( NUM(p) ) { NEWNM(m); ndl_zero(DL(m)); - CQ(m) = (Q)p; + CZ(m) = qtoz((Q)p); NEXT(m) = 0; MKND(nd_nvar,m,1,r); SG(r) = 0; @@ -3440,7 +3424,7 @@ ND ptond(VL vl,VL dvl,P p) w = (DCP *)ALLOCA(k*sizeof(DCP)); for ( dc = DC(p), j = 0; j < k; dc = NEXT(dc), j++ ) w[j] = dc; for ( i = 0, tvl = dvl, v = VR(p); - vl && tvl->v != v; tvl = NEXT(tvl), i++ ); + tvl && tvl->v != v; tvl = NEXT(tvl), i++ ); if ( !tvl ) { for ( j = k-1, s = 0, MKV(v,x); j >= 0; j-- ) { t = ptond(vl,dvl,COEF(w[j])); @@ -3475,6 +3459,7 @@ P ndvtop(int mod,VL vl,VL dvl,NDV p) P c; UINT *d; P s,r,u,t,w; + GFS gfs; if ( !p ) return 0; else { @@ -3482,10 +3467,12 @@ P ndvtop(int mod,VL vl,VL dvl,NDV p) n = NV(p); m = (NMV)(((char *)BDY(p))+nmv_adv*(len-1)); for ( j = len-1, s = 0; j >= 0; j--, NMV_PREV(m) ) { - if ( mod ) { + if ( mod == -1 ) { + e = IFTOF(CM(m)); MKGFS(e,gfs); c = (P)gfs; + } else if ( mod ) { STOQ(CM(m),q); c = (P)q; } else - c = CP(m); + c = (P)ztoq(CZ(m)); d = DL(m); for ( i = 0, t = c, tvl = dvl; i < n; tvl = NEXT(tvl), i++ ) { MKV(tvl->v,r); e = GET_EXP(d,i); STOQ(e,q); @@ -3506,10 +3493,16 @@ NDV ndtondv(int mod,ND p) if ( !p ) return 0; len = LEN(p); - m0 = m = (NMV)(mod?MALLOC_ATOMIC(len*nmv_adv):MALLOC(len*nmv_adv)); + if ( mod ) + m0 = m = (NMV)GC_malloc_atomic_ignore_off_page(len*nmv_adv); + else + m0 = m = MALLOC(len*nmv_adv); +#if 0 + ndv_alloc += nmv_adv*len; +#endif for ( t = BDY(p), i = 0; t; t = NEXT(t), i++, NMV_ADV(m) ) { ndl_copy(DL(t),DL(m)); - CQ(m) = CQ(t); + CZ(m) = CZ(t); } MKNDV(NV(p),m0,len,d); SG(d) = SG(p); @@ -3529,7 +3522,7 @@ ND ndvtond(int mod,NDV p) for ( t = BDY(p), i = 0; i < len; NMV_ADV(t), i++ ) { NEXTNM(m0,m); ndl_copy(DL(t),DL(m)); - CQ(m) = CQ(t); + CZ(m) = CZ(t); } NEXT(m) = 0; MKND(NV(p),m0,len,d); @@ -3546,14 +3539,15 @@ void ndv_print(NDV p) else { len = LEN(p); for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) { - printf("+%d*",CM(m)); + if ( CM(m) & 0x80000000 ) printf("+@_%d*",IFTOF(CM(m))); + else printf("+%d*",CM(m)); ndl_print(DL(m)); } printf("\n"); } } -void ndv_print_q(NDV p) +void ndv_print_z(NDV p) { NMV m; int i,len; @@ -3563,7 +3557,7 @@ void ndv_print_q(NDV p) len = LEN(p); for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) { printf("+"); - printexpr(CO,CQ(m)); + printz(CZ(m)); printf("*"); ndl_print(DL(m)); } @@ -3625,14 +3619,29 @@ void nd_init_ord(struct order_spec *ord) } break; case 1: + /* block order */ /* XXX */ nd_dcomp = -1; nd_isrlex = 0; ndl_compare_function = ndl_block_compare; break; case 2: - error("nd_init_ord : matrix order is not supported yet."); + /* matrix order */ + /* XXX */ + nd_dcomp = -1; + nd_isrlex = 0; + nd_matrix_len = ord->ord.matrix.row; + nd_matrix = ord->ord.matrix.matrix; + ndl_compare_function = ndl_matrix_compare; break; + case 3: + /* composite order */ + nd_dcomp = -1; + nd_isrlex = 0; + nd_worb_len = ord->ord.composite.length; + nd_worb = ord->ord.composite.w_or_b; + ndl_compare_function = ndl_composite_compare; + break; } nd_ord = ord; } @@ -3643,7 +3652,8 @@ BlockMask nd_create_blockmask(struct order_spec *ord) UINT *t; BlockMask bm; - if ( !ord->id ) + /* we only create mask table for block order */ + if ( ord->id != 1 ) return 0; n = ord->ord.block.length; bm = (BlockMask)MALLOC(sizeof(struct oBlockMask)); @@ -3701,7 +3711,14 @@ EPOS nd_create_epos(struct order_spec *ord) } break; case 2: - error("nd_create_epos : matrix order is not supported yet."); + /* matrix order */ + case 3: + /* composite order */ + for ( i = 0; i < nd_nvar; i++ ) { + epos[i].i = nd_exporigin + i/nd_epw; + epos[i].s = (nd_epw-(i%nd_epw)-1)*nd_bpe; + } + break; } return epos; } @@ -3717,7 +3734,7 @@ void nd_nf_p(P f,LIST g,LIST v,int m,struct order_spec int stat,nvar,max,e; pltovl(v,&vv); - nvar = length(vv); + for ( nvar = 0, tv = vv; tv; tv = NEXT(tv), nvar++ ); /* get the degree bound */ for ( t = BDY(g), max = 0; t; t = NEXT(t) ) @@ -3737,9 +3754,11 @@ void nd_nf_p(P f,LIST g,LIST v,int m,struct order_spec for ( in0 = 0, t = BDY(g); t; t = NEXT(t) ) { NEXTNODE(in0,in); BDY(in) = (pointer)ptondv(CO,vv,(P)BDY(t)); + if ( m ) ndv_mod(m,(NDV)BDY(in)); } NEXTNODE(in0,in); BDY(in) = (pointer)ptondv(CO,vv,f); + if ( m ) ndv_mod(m,(NDV)BDY(in)); NEXT(in) = 0; ndv_setup(m,0,in0); @@ -3747,10 +3766,10 @@ void nd_nf_p(P f,LIST g,LIST v,int m,struct order_spec nd_scale=2; while ( 1 ) { nd = (pointer)ndvtond(m,nd_ps[nd_psn]); - stat = nd_nf(m,nd,nd_ps,1,&nf); + stat = nd_nf(m,nd,nd_ps,1,0,&nf); if ( !stat ) { nd_psn++; - nd_reconstruct(m,0,0); + nd_reconstruct(0,0); nd_psn--; } else break; @@ -3774,24 +3793,17 @@ int nd_to_vect(int mod,UINT *s0,int n,ND d,UINT *r) return i; } -int nm_ind_pair_to_vect(int mod,UINT *s0,int n,NM_ind_pair pair,UINT *r) +int nd_to_vect_z(UINT *s0,int n,ND d,Z *r) { NM m; - NMV mr; - UINT *d,*t,*s; - NDV p; - int i,j,len; + UINT *t,*s; + int i; - m = pair->mul; - d = DL(m); - p = nd_ps[pair->index]; - t = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); for ( i = 0; i < n; i++ ) r[i] = 0; - len = LEN(p); - for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { - ndl_add(d,DL(mr),t); + for ( i = 0, s = s0, m = BDY(d); m; m = NEXT(m) ) { + t = DL(m); for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); - r[i] = CM(mr); + r[i] = CZ(m); } for ( i = 0; !r[i]; i++ ); return i; @@ -3805,7 +3817,7 @@ IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0 NDV p; unsigned char *ivc; unsigned short *ivs; - UINT *v,*ivi; + UINT *v,*ivi,*s0v; int i,j,len,prev,diff,cdiff; IndArray r; @@ -3814,13 +3826,13 @@ IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0 p = nd_ps[pair->index]; len = LEN(p); t = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); - r = (IndArray)MALLOC(sizeof(struct oIndArray)); v = (unsigned int *)ALLOCA(len*sizeof(unsigned int)); for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { ndl_add(d,DL(mr),t); for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); v[j] = i; } + r = (IndArray)MALLOC(sizeof(struct oIndArray)); r->head = v[0]; diff = 0; for ( i = 1; i < len; i++ ) { @@ -3846,9 +3858,64 @@ IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0 } -void ndv_reduce_vect(int m,UINT *svect,int col,IndArray *imat,NODE rp0) +int ndv_reduce_vect_z(Z *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) { int i,j,k,len,pos,prev; + Z cs,mcs,c1,c2,cr,gcd,t; + IndArray ivect; + unsigned char *ivc; + unsigned short *ivs; + unsigned int *ivi; + NDV redv; + NMV mr; + NODE rp; + int maxrs; + + maxrs = 0; + for ( i = 0; i < nred; i++ ) { + ivect = imat[i]; + k = ivect->head; + if ( svect[k] ) { + maxrs = MAX(maxrs,rp0[i]->sugar); + redv = nd_ps[rp0[i]->index]; + len = LEN(redv); mr = BDY(redv); + gcd = gcdz_cofactor(svect[k],CZ(mr),&cs,&cr); + mcs = chsgnz(cs); + if ( !uniz(cr) ) + for ( j = 0; j < col; j++ ) + svect[j] = mulz(svect[j],cr); + svect[k] = 0; prev = k; + switch ( ivect->width ) { + case 1: + ivc = ivect->index.c; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivc[j]; prev = pos; + svect[pos] = addz(svect[pos],mulz(CZ(mr),mcs)); + } + break; + case 2: + ivs = ivect->index.s; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivs[j]; prev = pos; + svect[pos] = addz(svect[pos],mulz(CZ(mr),mcs)); + } + break; + case 4: + ivi = ivect->index.i; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivi[j]; prev = pos; + svect[pos] = addz(svect[pos],mulz(CZ(mr),mcs)); + } + break; + } + } + } + return maxrs; +} + +int ndv_reduce_vect(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) +{ + int i,j,k,len,pos,prev; UINT c,c1,c2,c3,up,lo,dmy; IndArray ivect; unsigned char *ivc; @@ -3857,12 +3924,15 @@ void ndv_reduce_vect(int m,UINT *svect,int col,IndArra NDV redv; NMV mr; NODE rp; + int maxrs; - for ( rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { + maxrs = 0; + for ( i = 0; i < nred; i++ ) { ivect = imat[i]; k = ivect->head; svect[k] %= m; if ( c = svect[k] ) { - c = m-c; redv = nd_ps[((NM_ind_pair)BDY(rp))->index]; + maxrs = MAX(maxrs,rp0[i]->sugar); + c = m-c; redv = nd_ps[rp0[i]->index]; len = LEN(redv); mr = BDY(redv); svect[k] = 0; prev = k; switch ( ivect->width ) { @@ -3901,8 +3971,59 @@ void ndv_reduce_vect(int m,UINT *svect,int col,IndArra } for ( i = 0; i < col; i++ ) if ( svect[i] >= (UINT)m ) svect[i] %= m; + return maxrs; } +int ndv_reduce_vect_sf(int m,UINT *svect,int col,IndArray *imat,NM_ind_pair *rp0,int nred) +{ + int i,j,k,len,pos,prev; + UINT c,c1,c2,c3,up,lo,dmy; + IndArray ivect; + unsigned char *ivc; + unsigned short *ivs; + unsigned int *ivi; + NDV redv; + NMV mr; + NODE rp; + int maxrs; + + maxrs = 0; + for ( i = 0; i < nred; i++ ) { + ivect = imat[i]; + k = ivect->head; svect[k] %= m; + if ( c = svect[k] ) { + maxrs = MAX(maxrs,rp0[i]->sugar); + c = _chsgnsf(c); redv = nd_ps[rp0[i]->index]; + len = LEN(redv); mr = BDY(redv); + svect[k] = 0; prev = k; + switch ( ivect->width ) { + case 1: + ivc = ivect->index.c; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivc[j]; prev = pos; + svect[pos] = _addsf(_mulsf(CM(mr),c),svect[pos]); + } + break; + case 2: + ivs = ivect->index.s; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivs[j]; prev = pos; + svect[pos] = _addsf(_mulsf(CM(mr),c),svect[pos]); + } + break; + case 4: + ivi = ivect->index.i; + for ( j = 1, NMV_ADV(mr); j < len; j++, NMV_ADV(mr) ) { + pos = prev+ivi[j]; prev = pos; + svect[pos] = _addsf(_mulsf(CM(mr),c),svect[pos]); + } + break; + } + } + } + return maxrs; +} + NDV vect_to_ndv(UINT *vect,int spcol,int col,int *rhead,UINT *s0vect) { int j,k,len; @@ -3914,7 +4035,10 @@ NDV vect_to_ndv(UINT *vect,int spcol,int col,int *rhea for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; if ( !len ) return 0; else { - mr0 = (NMV)MALLOC_ATOMIC(nmv_adv*len); + mr0 = (NMV)GC_malloc_atomic_ignore_off_page(nmv_adv*len); +#if 0 + ndv_alloc += nmv_adv*len; +#endif mr = mr0; p = s0vect; for ( j = k = 0; j < col; j++, p += nd_wpd ) @@ -3928,30 +4052,56 @@ NDV vect_to_ndv(UINT *vect,int spcol,int col,int *rhea } } -NODE nd_sp_f4(int m,ND_pairs l,PGeoBucket bucket) +NDV vect_to_ndv_z(Z *vect,int spcol,int col,int *rhead,UINT *s0vect) { + int j,k,len; + UINT *p; + Z c; + NDV r; + NMV mr0,mr; + + for ( j = 0, len = 0; j < spcol; j++ ) if ( vect[j] ) len++; + if ( !len ) return 0; + else { + mr0 = (NMV)GC_malloc(nmv_adv*len); +#if 0 + ndv_alloc += nmv_adv*len; +#endif + mr = mr0; + p = s0vect; + for ( j = k = 0; j < col; j++, p += nd_wpd ) + if ( !rhead[j] ) { + if ( c = vect[k++] ) { + ndl_copy(p,DL(mr)); CZ(mr) = c; NMV_ADV(mr); + } + } + MKNDV(nd_nvar,mr0,len,r); + return r; + } +} + +int nd_sp_f4(int m,ND_pairs l,PGeoBucket bucket) +{ ND_pairs t; NODE sp0,sp; int stat; ND spol; - sp0 = 0; for ( t = l; t; t = NEXT(t) ) { stat = nd_sp(m,0,t,&spol); if ( !stat ) return 0; if ( spol ) { - NEXTNODE(sp0,sp); BDY(sp) = (pointer)nd_dup(spol); add_pbucket_symbolic(bucket,spol); } } - return sp0; + return 1; } int nd_symbolic_preproc(PGeoBucket bucket,UINT **s0vect,NODE *r) { NODE rp0,rp; NM mul,head,s0,s; - int index,col,i; + int index,col,i,sugar; RHist h; UINT *s0v,*p; NM_ind_pair pair; @@ -3970,14 +4120,16 @@ int nd_symbolic_preproc(PGeoBucket bucket,UINT **s0vec NEWNM(mul); ndl_sub(DL(head),DL(h),DL(mul)); if ( ndl_check_bound2(index,DL(mul)) ) return 0; - MKNM_ind_pair(pair,mul,index); + sugar = TD(DL(mul))+SG(nd_ps[index]); + MKNM_ind_pair(pair,mul,index,sugar); red = ndv_mul_nm_symbolic(mul,nd_ps[index]); add_pbucket_symbolic(bucket,nd_remove_head(red)); NEXTNODE(rp0,rp); BDY(rp) = (pointer)pair; } col++; } - NEXT(rp) = 0; NEXT(s) = 0; + if ( rp0 ) NEXT(rp) = 0; + NEXT(s) = 0; s0v = (UINT *)MALLOC_ATOMIC(col*nd_wpd*sizeof(UINT)); for ( i = 0, p = s0v, s = s0; i < col; i++, p += nd_wpd, s = NEXT(s) ) ndl_copy(DL(s),p); @@ -3994,7 +4146,7 @@ NODE nd_f4(int m) ND spol,red; NDV nf,redv; NM s0,s; - NODE sp0,sp,rp0,rp; + NODE rp0,srp0,nflist; int nsp,nred,col,rank,len,k,j,a; UINT c; UINT **spmat; @@ -4007,9 +4159,9 @@ NODE nd_f4(int m) PGeoBucket bucket; struct oEGT eg0,eg1,eg_f4; - if ( !m ) - error("nd_f4 : not implemented"); - +#if 0 + ndv_alloc = 0; +#endif g = 0; d = 0; for ( i = 0; i < nd_psn; i++ ) { d = update_pairs(d,g,i); @@ -4020,63 +4172,877 @@ NODE nd_f4(int m) l = nd_minsugarp(d,&d); sugar = SG(l); bucket = create_pbucket(); - if ( !(sp0 = nd_sp_f4(m,l,bucket)) - || !(col = nd_symbolic_preproc(bucket,&s0vect,&rp0)) ) { + stat = nd_sp_f4(m,l,bucket); + if ( !stat ) { for ( t = l; NEXT(t); t = NEXT(t) ); NEXT(t) = d; d = l; - d = nd_reconstruct(m,0,d); + d = nd_reconstruct(0,d); continue; } + if ( bucket->m < 0 ) continue; + col = nd_symbolic_preproc(bucket,&s0vect,&rp0); + if ( !col ) { + for ( t = l; NEXT(t); t = NEXT(t) ); + NEXT(t) = d; d = l; + d = nd_reconstruct(0,d); + continue; + } + get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); + if ( DP_Print ) + fprintf(asir_out,"sugar=%d,symb=%fsec,", + sugar,eg_f4.exectime+eg_f4.gctime); + if ( 1 ) + nflist = nd_f4_red(m,l,s0vect,col,rp0); + else + nflist = nd_f4_red_dist(m,l,s0vect,col,rp0); + /* adding new bases */ + for ( r = nflist; r; r = NEXT(r) ) { + nf = (NDV)BDY(r); + ndv_removecont(m,nf); + nh = ndv_newps(m,nf,0); + d = update_pairs(d,g,nh); + g = update_base(g,nh); + } + } + for ( r = g; r; r = NEXT(r) ) BDY(r) = (pointer)nd_ps[(int)BDY(r)]; +#if 0 + fprintf(asir_out,"ndv_alloc=%d\n",ndv_alloc); +#endif + return g; +} - nsp = length(sp0); nred = length(rp0); spcol = col-nred; - imat = (IndArray *)MALLOC(nred*sizeof(IndArray)); - rhead = (int *)MALLOC_ATOMIC(col*sizeof(int)); - for ( i = 0; i < col; i++ ) rhead[i] = 0; +NODE nd_f4_red(int m,ND_pairs sp0,UINT *s0vect,int col,NODE rp0) +{ + IndArray *imat; + int nsp,nred,i; + int *rhead; + NODE r0,rp; + ND_pairs sp; + NM_ind_pair *rvect; - /* construction of index arrays */ - for ( rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { - imat[i] = nm_ind_pair_to_vect_compress(m,s0vect,col,(NM_ind_pair)BDY(rp)); - rhead[imat[i]->head] = 1; + for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); + nred = length(rp0); + imat = (IndArray *)ALLOCA(nred*sizeof(IndArray)); + rhead = (int *)ALLOCA(col*sizeof(int)); + for ( i = 0; i < col; i++ ) rhead[i] = 0; + + /* construction of index arrays */ + rvect = (NM_ind_pair *)ALLOCA(nred*sizeof(NM_ind_pair)); + for ( rp = rp0, i = 0; rp; i++, rp = NEXT(rp) ) { + rvect[i] = (NM_ind_pair)BDY(rp); + imat[i] = nm_ind_pair_to_vect_compress(m,s0vect,col,rvect[i]); + rhead[imat[i]->head] = 1; + } + if ( m ) + r0 = nd_f4_red_main(m,sp0,nsp,s0vect,col,rvect,rhead,imat,nred); + else + r0 = nd_f4_red_z_main(sp0,nsp,s0vect,col,rvect,rhead,imat,nred); + return r0; +} + +NODE nd_f4_red_main(int m,ND_pairs sp0,int nsp,UINT *s0vect,int col, + NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred) +{ + int spcol,sprow,a; + int i,j,k,l,rank; + NODE r0,r; + ND_pairs sp; + ND spol; + int **spmat; + UINT *svect,*v; + int *colstat; + struct oEGT eg0,eg1,eg2,eg_f4,eg_f4_1,eg_f4_2; + int maxrs; + int *spsugar; + + spcol = col-nred; + get_eg(&eg0); + /* elimination (1st step) */ + spmat = (int **)ALLOCA(nsp*sizeof(UINT *)); + svect = (UINT *)ALLOCA(col*sizeof(UINT)); + spsugar = (int *)ALLOCA(nsp*sizeof(UINT)); + for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { + nd_sp(m,0,sp,&spol); + if ( !spol ) continue; + nd_to_vect(m,s0vect,col,spol,svect); + if ( m == -1 ) + maxrs = ndv_reduce_vect_sf(m,svect,col,imat,rvect,nred); + else + maxrs = ndv_reduce_vect(m,svect,col,imat,rvect,nred); + for ( i = 0; i < col; i++ ) if ( svect[i] ) break; + if ( i < col ) { + spmat[sprow] = v = (UINT *)MALLOC_ATOMIC(spcol*sizeof(UINT)); + for ( j = k = 0; j < col; j++ ) + if ( !rhead[j] ) v[k++] = svect[j]; + spsugar[sprow] = MAX(maxrs,SG(spol)); + sprow++; } + nd_free(spol); + } + get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fflush(asir_out); + } + /* free index arrays */ + for ( i = 0; i < nred; i++ ) GC_free(imat[i]->index.c); - /* elimination (1st step) */ - spmat = (UINT **)MALLOC(nsp*sizeof(UINT *)); - svect = (UINT *)MALLOC_ATOMIC(col*sizeof(UINT)); - for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { - nd_to_vect(m,s0vect,col,BDY(sp),svect); - ndv_reduce_vect(m,svect,col,imat,rp0); - for ( i = 0; i < col; i++ ) if ( svect[i] ) break; - if ( i < col ) { - spmat[sprow] = v = (UINT *)MALLOC_ATOMIC(spcol*sizeof(UINT)); - for ( j = k = 0; j < col; j++ ) - if ( !rhead[j] ) v[k++] = svect[j]; - sprow++; - } + /* elimination (2nd step) */ + colstat = (int *)ALLOCA(spcol*sizeof(int)); + if ( m == -1 ) + rank = nd_gauss_elim_sf(spmat,spsugar,sprow,spcol,m,colstat); + else + rank = nd_gauss_elim_mod(spmat,spsugar,sprow,spcol,m,colstat); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + NEXTNODE(r0,r); BDY(r) = + (pointer)vect_to_ndv(spmat[i],spcol,col,rhead,s0vect); + SG((NDV)BDY(r)) = spsugar[i]; + GC_free(spmat[i]); + } + if ( r0 ) NEXT(r) = 0; + for ( ; i < sprow; i++ ) GC_free(spmat[i]); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); + if ( DP_Print ) { + fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + nsp,nred,sprow,spcol,rank); + fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); + } + return r0; +} + +NODE nd_f4_red_z_main(ND_pairs sp0,int nsp,UINT *s0vect,int col, + NM_ind_pair *rvect,int *rhead,IndArray *imat,int nred) +{ + int spcol,sprow,a; + int i,j,k,l,rank; + NODE r0,r; + ND_pairs sp; + ND spol; + Z **spmat; + Z *svect,*v; + int *colstat; + struct oEGT eg0,eg1,eg2,eg_f4,eg_f4_1,eg_f4_2; + int maxrs; + int *spsugar; + + spcol = col-nred; + get_eg(&eg0); + /* elimination (1st step) */ + spmat = (Z **)ALLOCA(nsp*sizeof(Z *)); + svect = (Z *)ALLOCA(col*sizeof(Z)); + spsugar = (int *)ALLOCA(nsp*sizeof(Z)); + for ( a = sprow = 0, sp = sp0; a < nsp; a++, sp = NEXT(sp) ) { + nd_sp(0,0,sp,&spol); + if ( !spol ) continue; + nd_to_vect_z(s0vect,col,spol,svect); + maxrs = ndv_reduce_vect_z(svect,col,imat,rvect,nred); + for ( i = 0; i < col; i++ ) if ( svect[i] ) break; + if ( i < col ) { + spmat[sprow] = v = (Z *)MALLOC(spcol*sizeof(Z)); + for ( j = k = 0; j < col; j++ ) + if ( !rhead[j] ) v[k++] = svect[j]; + spsugar[sprow] = MAX(maxrs,SG(spol)); + sprow++; } - /* free index arrays */ - for ( i = 0; i < nred; i++ ) GC_free(imat[i]->index.c); +/* nd_free(spol); */ + } + get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fflush(asir_out); + } + /* free index arrays */ +/* for ( i = 0; i < nred; i++ ) GC_free(imat[i]->index.c); */ - /* elimination (2nd step) */ - colstat = (int *)ALLOCA(spcol*sizeof(int)); - rank = generic_gauss_elim_mod(spmat,sprow,spcol,m,colstat); + /* elimination (2nd step) */ + colstat = (int *)ALLOCA(spcol*sizeof(int)); + rank = nd_gauss_elim_z(spmat,spsugar,sprow,spcol,colstat); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + NEXTNODE(r0,r); BDY(r) = + (pointer)vect_to_ndv_z(spmat[i],spcol,col,rhead,s0vect); + SG((NDV)BDY(r)) = spsugar[i]; +/* GC_free(spmat[i]); */ + } + if ( r0 ) NEXT(r) = 0; - get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); - fprintf(asir_out,"sugar=%d,nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", - sugar,nsp,nred,sprow,spcol,rank); +/* for ( ; i < sprow; i++ ) GC_free(spmat[i]); */ + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); + if ( DP_Print ) { + fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); + fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", + nsp,nred,sprow,spcol,rank); fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); + } + return r0; +} - /* adding new bases */ - for ( i = 0; i < rank; i++ ) { - nf = vect_to_ndv(spmat[i],spcol,col,rhead,s0vect); - SG(nf) = sugar; - ndv_removecont(m,nf); - nh = ndv_newps(nf,0); - d = update_pairs(d,g,nh); - g = update_base(g,nh); - GC_free(spmat[i]); +FILE *nd_write,*nd_read; + +void nd_send_int(int a) { + write_int(nd_write,&a); +} + +void nd_send_intarray(int *p,int len) { + write_intarray(nd_write,p,len); +} + +int nd_recv_int() { + int a; + + read_int(nd_read,&a); + return a; +} + +void nd_recv_intarray(int *p,int len) { + read_intarray(nd_read,p,len); +} + +void nd_send_ndv(NDV p) { + int len,i; + NMV m; + + if ( !p ) nd_send_int(0); + else { + len = LEN(p); + nd_send_int(len); + m = BDY(p); + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + nd_send_int(CM(m)); + nd_send_intarray(DL(m),nd_wpd); } - for ( ; i < sprow; i++ ) GC_free(spmat[i]); } - for ( r = g; r; r = NEXT(r) ) BDY(r) = (pointer)nd_ps[(int)BDY(r)]; - return g; +} + +void nd_send_nd(ND p) { + int len,i; + NM m; + + if ( !p ) nd_send_int(0); + else { + len = LEN(p); + nd_send_int(len); + m = BDY(p); + for ( i = 0; i < len; i++, m = NEXT(m) ) { + nd_send_int(CM(m)); + nd_send_intarray(DL(m),nd_wpd); + } + } +} + +NDV nd_recv_ndv() +{ + int len,i; + NMV m,m0; + NDV r; + + len = nd_recv_int(); + if ( !len ) return 0; + else { + m0 = m = (NMV)GC_malloc_atomic_ignore_off_page(nmv_adv*len); +#if 0 + ndv_alloc += len*nmv_adv; +#endif + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + CM(m) = nd_recv_int(); + nd_recv_intarray(DL(m),nd_wpd); + } + MKNDV(nd_nvar,m0,len,r); + return r; + } +} + +int ox_exec_f4_red(Q proc) +{ + Obj obj; + STRING fname; + NODE arg; + int s; + extern int ox_need_conv,ox_file_io; + + MKSTR(fname,"nd_exec_f4_red"); + arg = mknode(2,proc,fname); + Pox_cmo_rpc(arg,&obj); + s = get_ox_server_id(QTOS(proc)); + nd_write = iofp[s].out; + nd_read = iofp[s].in; + ox_need_conv = ox_file_io = 0; + return s; +} + +NODE nd_f4_red_dist(int m,ND_pairs sp0,UINT *s0vect,int col,NODE rp0) +{ + int nsp,nred; + int i,rank,s; + NODE rp,r0,r; + ND_pairs sp; + NM_ind_pair pair; + NMV nmv; + NM nm; + NDV nf; + Obj proc,dmy; + + ox_launch_main(0,0,&proc); + s = ox_exec_f4_red((Q)proc); + + nd_send_int(m); + nd_send_int(nd_nvar); + nd_send_int(nd_bpe); + nd_send_int(nd_wpd); + nd_send_int(nmv_adv); + + saveobj(nd_write,dp_current_spec->obj); fflush(nd_write); + + nd_send_int(nd_psn); + for ( i = 0; i < nd_psn; i++ ) nd_send_ndv(nd_ps[i]); + + for ( sp = sp0, nsp = 0; sp; sp = NEXT(sp), nsp++ ); + nd_send_int(nsp); + for ( i = 0, sp = sp0; i < nsp; i++, sp = NEXT(sp) ) { + nd_send_int(sp->i1); nd_send_int(sp->i2); + } + + nd_send_int(col); nd_send_intarray(s0vect,col*nd_wpd); + + nred = length(rp0); nd_send_int(nred); + for ( i = 0, rp = rp0; i < nred; i++, rp = NEXT(rp) ) { + pair = (NM_ind_pair)BDY(rp); + nd_send_int(pair->index); + nd_send_intarray(pair->mul->dl,nd_wpd); + } + fflush(nd_write); + rank = nd_recv_int(); + fprintf(asir_out,"rank=%d\n",rank); + r0 = 0; + for ( i = 0; i < rank; i++ ) { + nf = nd_recv_ndv(); + NEXTNODE(r0,r); BDY(r) = (pointer)nf; + } + Pox_shutdown(mknode(1,proc),&dmy); + return r0; +} + +/* server side */ + +void nd_exec_f4_red_dist() +{ + int m,i,nsp,col,s0size,nred,spcol,j,k; + NM_ind_pair *rp0; + NDV nf; + UINT *s0vect; + IndArray *imat; + int *rhead; + int **spmat; + UINT *svect,*v; + ND_pairs *sp0; + int *colstat; + int a,sprow,rank; + struct order_spec *ord; + Obj ordspec; + ND spol; + int maxrs; + int *spsugar; + + nd_read = iofp[0].in; + nd_write = iofp[0].out; + m = nd_recv_int(); + nd_nvar = nd_recv_int(); + nd_bpe = nd_recv_int(); + nd_wpd = nd_recv_int(); + nmv_adv = nd_recv_int(); + + loadobj(nd_read,&ordspec); + create_order_spec(0,ordspec,&ord); + nd_init_ord(ord); + nd_setup_parameters(nd_nvar,0); + + nd_psn = nd_recv_int(); + nd_ps = (NDV *)MALLOC(nd_psn*sizeof(NDV)); + nd_bound = (UINT **)MALLOC(nd_psn*sizeof(UINT *)); + for ( i = 0; i < nd_psn; i++ ) { + nd_ps[i] = nd_recv_ndv(); + nd_bound[i] = ndv_compute_bound(nd_ps[i]); + } + + nsp = nd_recv_int(); + sp0 = (ND_pairs *)MALLOC(nsp*sizeof(ND_pairs)); + for ( i = 0; i < nsp; i++ ) { + NEWND_pairs(sp0[i]); + sp0[i]->i1 = nd_recv_int(); sp0[i]->i2 = nd_recv_int(); + ndl_lcm(HDL(nd_ps[sp0[i]->i1]),HDL(nd_ps[sp0[i]->i2]),LCM(sp0[i])); + } + + col = nd_recv_int(); + s0size = col*nd_wpd; + s0vect = (UINT *)MALLOC(s0size*sizeof(UINT)); + nd_recv_intarray(s0vect,s0size); + + nred = nd_recv_int(); + rp0 = (NM_ind_pair *)MALLOC(nred*sizeof(NM_ind_pair)); + for ( i = 0; i < nred; i++ ) { + rp0[i] = (NM_ind_pair)MALLOC(sizeof(struct oNM_ind_pair)); + rp0[i]->index = nd_recv_int(); + rp0[i]->mul = (NM)MALLOC(sizeof(struct oNM)+(nd_wpd-1)*sizeof(UINT)); + nd_recv_intarray(rp0[i]->mul->dl,nd_wpd); + } + + spcol = col-nred; + imat = (IndArray *)MALLOC(nred*sizeof(IndArray)); + rhead = (int *)MALLOC(col*sizeof(int)); + for ( i = 0; i < col; i++ ) rhead[i] = 0; + + /* construction of index arrays */ + for ( i = 0; i < nred; i++ ) { + imat[i] = nm_ind_pair_to_vect_compress(m,s0vect,col,rp0[i]); + rhead[imat[i]->head] = 1; + } + + /* elimination (1st step) */ + spmat = (int **)MALLOC(nsp*sizeof(UINT *)); + svect = (UINT *)MALLOC(col*sizeof(UINT)); + spsugar = (int *)ALLOCA(nsp*sizeof(UINT)); + for ( a = sprow = 0; a < nsp; a++ ) { + nd_sp(m,0,sp0[a],&spol); + if ( !spol ) continue; + nd_to_vect(m,s0vect,col,spol,svect); + if ( m == -1 ) + maxrs = ndv_reduce_vect_sf(m,svect,col,imat,rp0,nred); + else + maxrs = ndv_reduce_vect(m,svect,col,imat,rp0,nred); + for ( i = 0; i < col; i++ ) if ( svect[i] ) break; + if ( i < col ) { + spmat[sprow] = v = (UINT *)MALLOC(spcol*sizeof(UINT)); + for ( j = k = 0; j < col; j++ ) + if ( !rhead[j] ) v[k++] = svect[j]; + spsugar[sprow] = MAX(maxrs,SG(spol)); + sprow++; + } + nd_free(spol); + } + /* elimination (2nd step) */ + colstat = (int *)ALLOCA(spcol*sizeof(int)); + if ( m == -1 ) + rank = nd_gauss_elim_sf(spmat,spsugar,sprow,spcol,m,colstat); + else + rank = nd_gauss_elim_mod(spmat,spsugar,sprow,spcol,m,colstat); + nd_send_int(rank); + for ( i = 0; i < rank; i++ ) { + nf = vect_to_ndv(spmat[i],spcol,col,rhead,s0vect); + nd_send_ndv(nf); + } + fflush(nd_write); +} + +/* XXX */ +int generic_gauss_elim_z(MAT m,MAT *nm,Z *dn,int **ri,int **ci) +{} + +int nd_gauss_elim_z(Z **mat0,int *sugar,int row,int col,int *colstat) +{ + int mod,i,j,t,c,rank,rank0,inv; + int *ci,*ri; + Z dn; + MAT m,nm; + int **wmat; + + /* XXX */ + mod = 99999989; + wmat = (int **)ALLOCA(row*sizeof(int *)); + for ( i = 0; i < row; i++ ) { + wmat[i] = (int *)ALLOCA(col*sizeof(int)); + for ( j = 0; j < col; j++ ) { + if ( mat0[i][j] ) + wmat[i][j] = remzi(mat0[i][j],mod); + else + wmat[i][j] = 0; + } + } + rank0 = nd_gauss_elim_mod(wmat,sugar,row,col,mod,colstat); + NEWMAT(m); m->row = row; m->col = col; m->body = (pointer **)mat0; + rank = generic_gauss_elim_z(m,&nm,&dn,&ri,&ci); + if ( rank != rank0 ) + error("afo"); + for ( i = 0; i < row; i++ ) + for ( j = 0; j < col; j++ ) + mat0[i][j] = 0; + c = col-rank; + for ( i = 0; i < rank; i++ ) { + mat0[i][ri[i]] = dn; + for ( j = 0; j < c; j++ ) + mat0[i][ci[j]] = (Z)BDY(nm)[i][j]; + } + inv = invm(remzi(dn,mod),mod); + for ( i = 0; i < row; i++ ) + for ( j = 0; j < col; j++ ) { + if ( mat0[i][j] ) + t = remzi(mat0[i][j],mod); + else + t = 0; + c = dmar(t,inv,0,mod); + if ( wmat[i][j] != c ) + error("afo"); + } + return rank; +} + +int nd_gauss_elim_mod(int **mat0,int *sugar,int row,int col,int md,int *colstat) +{ + int i,j,k,l,inv,a,rank,s; + unsigned int *t,*pivot,*pk; + unsigned int **mat; + + mat = (unsigned int **)mat0; + for ( rank = 0, j = 0; j < col; j++ ) { + for ( i = rank; i < row; i++ ) + mat[i][j] %= md; + for ( i = rank; i < row; i++ ) + if ( mat[i][j] ) + break; + if ( i == row ) { + colstat[j] = 0; + continue; + } else + colstat[j] = 1; + if ( i != rank ) { + t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; + s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; + } + pivot = mat[rank]; + s = sugar[rank]; + inv = invm(pivot[j],md); + for ( k = j, pk = pivot+k; k < col; k++, pk++ ) + if ( *pk ) { + if ( *pk >= (unsigned int)md ) + *pk %= md; + DMAR(*pk,inv,0,md,*pk) + } + for ( i = rank+1; i < row; i++ ) { + t = mat[i]; + if ( a = t[j] ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect(md,t+j,pivot+j,md-a,col-j); + } + } + rank++; + } + for ( j = col-1, l = rank-1; j >= 0; j-- ) + if ( colstat[j] ) { + pivot = mat[l]; + s = sugar[l]; + for ( i = 0; i < l; i++ ) { + t = mat[i]; + t[j] %= md; + if ( a = t[j] ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect(md,t+j,pivot+j,md-a,col-j); + } + } + l--; + } + for ( j = 0, l = 0; l < rank; j++ ) + if ( colstat[j] ) { + t = mat[l]; + for ( k = j; k < col; k++ ) + if ( t[k] >= (unsigned int)md ) + t[k] %= md; + l++; + } + return rank; +} + +int nd_gauss_elim_sf(int **mat0,int *sugar,int row,int col,int md,int *colstat) +{ + int i,j,k,l,inv,a,rank,s; + unsigned int *t,*pivot,*pk; + unsigned int **mat; + + mat = (unsigned int **)mat0; + for ( rank = 0, j = 0; j < col; j++ ) { + for ( i = rank; i < row; i++ ) + if ( mat[i][j] ) + break; + if ( i == row ) { + colstat[j] = 0; + continue; + } else + colstat[j] = 1; + if ( i != rank ) { + t = mat[i]; mat[i] = mat[rank]; mat[rank] = t; + s = sugar[i]; sugar[i] = sugar[rank]; sugar[rank] = s; + } + pivot = mat[rank]; + s = sugar[rank]; + inv = _invsf(pivot[j]); + for ( k = j, pk = pivot+k; k < col; k++, pk++ ) + if ( *pk ) + *pk = _mulsf(*pk,inv); + for ( i = rank+1; i < row; i++ ) { + t = mat[i]; + if ( a = t[j] ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect_sf(md,t+j,pivot+j,_chsgnsf(a),col-j); + } + } + rank++; + } + for ( j = col-1, l = rank-1; j >= 0; j-- ) + if ( colstat[j] ) { + pivot = mat[l]; + s = sugar[l]; + for ( i = 0; i < l; i++ ) { + t = mat[i]; + if ( a = t[j] ) { + sugar[i] = MAX(sugar[i],s); + red_by_vect_sf(md,t+j,pivot+j,_chsgnsf(a),col-j); + } + } + l--; + } + return rank; +} + +int ndv_ishomo(NDV p) +{ + NMV m; + int len,h; + + if ( !p ) return 1; + len = LEN(p); + m = BDY(p); + h = TD(DL(m)); + NMV_ADV(m); + for ( len--; len; len--, NMV_ADV(m) ) + if ( TD(DL(m)) != h ) return 0; + return 1; +} + +void ndv_save(NDV p,int index) +{ + FILE *s; + char name[BUFSIZ]; + short id; + int nv,sugar,len,n,i,td,e,j; + NMV m; + unsigned int *dl; + + sprintf(name,"%s/%d",Demand,index); + s = fopen(name,"w"); + savevl(s,0); + if ( !p ) { + saveobj(s,0); + return; + } + id = O_DP; + nv = NV(p); + sugar = SG(p); + len = LEN(p); + write_short(s,&id); write_int(s,&nv); write_int(s,&sugar); + write_int(s,&len); + + for ( m = BDY(p), i = 0; i < len; i++, NMV_ADV(m) ) { + saveobj(s,(Obj)ztoq(CZ(m))); + dl = DL(m); + td = TD(dl); + write_int(s,&td); + for ( j = 0; j < nv; j++ ) { + e = GET_EXP(dl,j); + write_int(s,&e); + } + } + fclose(s); +} + +NDV ndv_load(int index) +{ + FILE *s; + char name[BUFSIZ]; + short id; + int nv,sugar,len,n,i,td,e,j; + NDV d; + NMV m0,m; + unsigned int *dl; + Obj obj; + + sprintf(name,"%s/%d",Demand,index); + s = fopen(name,"r"); + if ( !s ) return 0; + + skipvl(s); + read_short(s,&id); + if ( !id ) return 0; + read_int(s,&nv); + read_int(s,&sugar); + read_int(s,&len); + + m0 = m = MALLOC(len*nmv_adv); + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + loadobj(s,&obj); CZ(m) = qtoz((Q)obj); + dl = DL(m); + ndl_zero(dl); + read_int(s,&td); TD(dl) = td; + for ( j = 0; j < nv; j++ ) { + read_int(s,&e); + PUT_EXP(dl,j,e); + } + if ( nd_blockmask ) ndl_weight_mask(dl); + } + fclose(s); + MKNDV(nv,m0,len,d); + SG(d) = sugar; + return d; +} + +void nd_det(int mod,MAT f,P *rp) +{ + VL fv,tv; + int n,i,j,max,e,nvar,sgn,k0,l0,len0,len,k,l,a; + pointer **m; + Z mone; + NDV **dm; + NDV *t,*mi,*mj; + NDV d,s,mij,mjj; + ND u; + NMV nmv; + PGeoBucket bucket; + struct order_spec *ord; + + create_order_spec(0,0,&ord); + nd_init_ord(ord); + get_vars((Obj)f,&fv); + if ( f->row != f->col ) + error("nd_det : non-square matrix"); + n = f->row; + for ( nvar = 0, tv = fv; tv; tv = NEXT(tv), nvar++ ); + m = f->body; + for ( i = 0, max = 0; i < n; i++ ) + for ( j = 0; j < n; j++ ) + for ( tv = fv; tv; tv = NEXT(tv) ) { + e = getdeg(tv->v,(P)m[i][j]); + max = MAX(e,max); + } + nd_setup_parameters(nvar,1024); + dm = (NDV **)almat_pointer(n,n); + for ( i = 0, max = 0; i < n; i++ ) + for ( j = 0; j < n; j++ ) { + dm[i][j] = ptondv(CO,fv,m[i][j]); + if ( mod ) ndv_mod(mod,dm[i][j]); + if ( dm[i][j] && !LEN(dm[i][j]) ) dm[i][j] = 0; + } + d = ptondv(CO,fv,(P)ONE); + if ( mod ) ndv_mod(mod,d); + mone = chsgnz(qtoz(ONE)); + for ( j = 0, sgn = 1; j < n; j++ ) { + if ( DP_Print ) fprintf(stderr,"j=%d\n",j); + for ( i = j; i < n && !dm[i][j]; i++ ); + if ( i == n ) { + *rp = 0; + return; + } + k0 = i; l0 = j; len0 = LEN(dm[k0][l0]); + for ( k = j; k < n; k++ ) + for ( l = j; l < n; l++ ) + if ( dm[k][l] && LEN(dm[k][l]) < len0 ) { + k0 = k; l0 = l; len0 = LEN(dm[k][l]); + } + if ( k0 != j ) { + t = dm[j]; dm[j] = dm[k0]; dm[k0] = t; + sgn = -sgn; + } + if ( l0 != j ) { + for ( k = j; k < n; k++ ) { + s = dm[k][j]; dm[k][j] = dm[k][l0]; dm[k][l0] = s; + } + sgn = -sgn; + } + for ( i = j+1, mj = dm[j], mjj = mj[j]; i < n; i++ ) { + if ( DP_Print ) fprintf(stderr," i=%d\n ",i); + mi = dm[i]; mij = mi[j]; + if ( mod ) + ndv_mul_c(mod,mij,mod-1); + else + ndv_mul_c_z(mij,mone); + for ( k = j+1; k < n; k++ ) { + if ( DP_Print ) fprintf(stderr,"k=%d ",k); + bucket = create_pbucket(); + if ( mi[k] ) { + nmv = BDY(mjj); len = LEN(mjj); + for ( a = 0; a < len; a++, NMV_ADV(nmv) ) { + u = ndv_mul_nmv_trunc(mod,nmv,mi[k],DL(BDY(d))); + add_pbucket(mod,bucket,u); + } + } + if ( mj[k] && mij ) { + nmv = BDY(mij); len = LEN(mij); + for ( a = 0; a < len; a++, NMV_ADV(nmv) ) { + u = ndv_mul_nmv_trunc(mod,nmv,mj[k],DL(BDY(d))); + add_pbucket(mod,bucket,u); + } + } + u = nd_quo(mod,bucket,d); + mi[k] = ndtondv(mod,u); + } + if ( DP_Print ) fprintf(stderr,"\n",k); + } + d = mjj; + } + if ( sgn < 0 ) + if ( mod ) + ndv_mul_c(mod,d,mod-1); + else + ndv_mul_c_z(d,mone); + *rp = ndvtop(mod,CO,fv,d); +} + +ND ndv_mul_nmv_trunc(int mod,NMV m0,NDV p,UINT *d) +{ + NM mr,mr0; + NM tnm; + NMV m; + UINT *d0,*dt,*dm; + int c,n,td,i,c1,c2,len; + Z q; + ND r; + + if ( !p ) return 0; + else { + n = NV(p); m = BDY(p); len = LEN(p); + d0 = DL(m0); + td = TD(d); + mr0 = 0; + NEWNM(tnm); + if ( mod ) { + c = CM(m0); + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + ndl_add(DL(m),d0,DL(tnm)); + if ( ndl_reducible(DL(tnm),d) ) { + NEXTNM(mr0,mr); + c1 = CM(m); DMAR(c1,c,0,mod,c2); CM(mr) = c2; + ndl_copy(DL(tnm),DL(mr)); + } + } + } else { + q = CZ(m0); + for ( i = 0; i < len; i++, NMV_ADV(m) ) { + ndl_add(DL(m),d0,DL(tnm)); + if ( ndl_reducible(DL(tnm),d) ) { + NEXTNM(mr0,mr); + CZ(mr) = mulz(CZ(m),q); + ndl_copy(DL(tnm),DL(mr)); + } + } + } + if ( !mr0 ) + return 0; + else { + NEXT(mr) = 0; + for ( len = 0, mr = mr0; mr; mr = NEXT(mr), len++ ); + MKND(NV(p),mr0,len,r); + SG(r) = SG(p) + TD(d0); + return r; + } + } }