=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2000/engine/nd.c,v retrieving revision 1.79 retrieving revision 1.101 diff -u -p -r1.79 -r1.101 --- OpenXM_contrib2/asir2000/engine/nd.c 2003/10/10 07:18:12 1.79 +++ OpenXM_contrib2/asir2000/engine/nd.c 2004/09/14 10:00:26 1.101 @@ -1,123 +1,19 @@ -/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.78 2003/10/10 01:51:09 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.100 2004/09/14 09:25:48 noro Exp $ */ -#include "ca.h" -#include "parse.h" -#include "ox.h" -#include "inline.h" -#include +#include "nd.h" -#if defined(__GNUC__) -#define INLINE inline -#elif defined(VISUAL) -#define INLINE __inline -#else -#define INLINE -#endif - -typedef unsigned int UINT; - -#define USE_GEOBUCKET 1 -#define USE_UNROLL 1 - -#define REDTAB_LEN 32003 - -/* GeoBucket for polynomial addition */ - -typedef struct oPGeoBucket { - int m; - struct oND *body[32]; -} *PGeoBucket; - -/* distributed polynomial; linked list rep. */ -typedef struct oND { - struct oNM *body; - int nv; - int len; - int sugar; -} *ND; - -/* distributed polynomial; array rep. */ -typedef struct oNDV { - struct oNMV *body; - int nv; - int len; - int sugar; -} *NDV; - -typedef union oNDC { - int m; - Q z; - P p; -} *NDC; - -/* monomial; linked list rep. */ -typedef struct oNM { - struct oNM *next; - union oNDC c; - UINT dl[1]; -} *NM; - -/* monomial; array rep. */ -typedef struct oNMV { - union oNDC c; - UINT dl[1]; -} *NMV; - -/* history of reducer */ -typedef struct oRHist { - struct oRHist *next; - int index; - int sugar; - UINT dl[1]; -} *RHist; - -/* S-pair list */ -typedef struct oND_pairs { - struct oND_pairs *next; - int i1,i2; - int sugar; - UINT lcm[1]; -} *ND_pairs; - -/* index and shift count for each exponent */ -typedef struct oEPOS { - int i; /* index */ - int s; /* shift */ -} *EPOS; - -typedef struct oBlockMask { - int n; - struct order_pair *order_pair; - UINT **mask; -} *BlockMask; - -typedef struct oBaseSet { - int len; - NDV *ps; - UINT **bound; -} *BaseSet; - -typedef struct oNM_ind_pair -{ - NM mul; - int index,sugar; -} *NM_ind_pair; - -typedef struct oIndArray -{ - char width; - int head; - union { - unsigned char *c; - unsigned short *s; - unsigned int *i; - } index; -} *IndArray; - int (*ndl_compare_function)(UINT *a1,UINT *a2); +int nd_dcomp; +NM _nm_free_list; +ND _nd_free_list; +ND_pairs _ndp_free_list; static int ndv_alloc; +#if 1 static int nd_f4_nsp=0x7fffffff; +#else +static int nd_f4_nsp=50; +#endif static double nd_scale=2; static UINT **nd_bound; static struct order_spec *nd_ord; @@ -129,253 +25,20 @@ static int nd_epw,nd_bpe,nd_wpd,nd_exporigin; static UINT nd_mask[32]; static UINT nd_mask0,nd_mask1; -static NM _nm_free_list; -static ND _nd_free_list; -static ND_pairs _ndp_free_list; - static NDV *nd_ps; static NDV *nd_ps_trace; static RHist *nd_psh; static int nd_psn,nd_pslen; - static RHist *nd_red; - +static int *nd_work_vector; +static int **nd_matrix; +static int nd_matrix_len; +static struct weight_or_block *nd_worb; +static int nd_worb_len; static int nd_found,nd_create,nd_notfirst; -static int nm_adv; static int nmv_adv; -static int nd_dcomp; static int nd_demand; -extern struct order_spec dp_current_spec; -extern char *Demand; -extern VL CO; -extern int Top,Reverse,DP_Print,dp_nelim,do_weyl,NoSugar; -extern int *current_weyl_weight_vector; - -/* fundamental macros */ -#define TD(d) (d[0]) -#define HDL(d) ((d)->body->dl) -#define HTD(d) (TD(HDL(d))) -#define HCM(d) ((d)->body->c.m) -#define HCQ(d) ((d)->body->c.z) -#define HCP(d) ((d)->body->c.p) -#define CM(a) ((a)->c.m) -#define CQ(a) ((a)->c.z) -#define CP(a) ((a)->c.p) -#define DL(a) ((a)->dl) -#define SG(a) ((a)->sugar) -#define LEN(a) ((a)->len) -#define LCM(a) ((a)->lcm) -#define GET_EXP(d,a) (((d)[nd_epos[a].i]>>nd_epos[a].s)&nd_mask0) -#define GET_EXP_MASK(d,a,m) ((((d)[nd_epos[a].i]&(m)[nd_epos[a].i])>>nd_epos[a].s)&nd_mask0) -#define PUT_EXP(r,a,e) ((r)[nd_epos[a].i] |= ((e)<>oepos[a].s)&omask0) -#define PUT_EXP_OLD(r,a,e) ((r)[oepos[a].i] |= ((e)<TD(d2)?1:(TD(d1)0?TD_DL_COMPARE(d1,d2)\ - :(nd_dcomp==0?ndl_lex_compare(d1,d2)\ - :(nd_blockmask?ndl_block_compare(d1,d2)\ - :(*ndl_compare_function)(d1,d2)))) -#else -#define DL_COMPARE(d1,d2)\ -(nd_dcomp>0?TD_DL_COMPARE(d1,d2):(*ndl_compare_function)(d1,d2)) -#endif - -/* allocators */ -#define NEWRHist(r) \ -((r)=(RHist)MALLOC(sizeof(struct oRHist)+(nd_wpd-1)*sizeof(UINT))) -#define NEWND_pairs(m) \ -if(!_ndp_free_list)_NDP_alloc();\ -(m)=_ndp_free_list; _ndp_free_list = NEXT(_ndp_free_list) -#define NEWNM(m)\ -if(!_nm_free_list)_NM_alloc();\ -(m)=_nm_free_list; _nm_free_list = NEXT(_nm_free_list) -#define MKND(n,m,len,d)\ -if(!_nd_free_list)_ND_alloc();\ -(d)=_nd_free_list; _nd_free_list = (ND)BDY(_nd_free_list);\ -NV(d)=(n); LEN(d)=(len); BDY(d)=(m) -#define NEWNDV(d) ((d)=(NDV)MALLOC(sizeof(struct oNDV))) -#define MKNDV(n,m,l,d) NEWNDV(d); NV(d)=(n); BDY(d)=(m); LEN(d) = l; -#define NEWNM_ind_pair(p)\ -((p)=(NM_ind_pair)MALLOC(sizeof(struct oNM_ind_pair))) - -/* allocate and link a new object */ -#define NEXTRHist(r,c) \ -if(!(r)){NEWRHist(r);(c)=(r);}else{NEWRHist(NEXT(c));(c)=NEXT(c);} -#define NEXTNM(r,c) \ -if(!(r)){NEWNM(r);(c)=(r);}else{NEWNM(NEXT(c));(c)=NEXT(c);} -#define NEXTNM2(r,c,s) \ -if(!(r)){(c)=(r)=(s);}else{NEXT(c)=(s);(c)=(s);} -#define NEXTND_pairs(r,c) \ -if(!(r)){NEWND_pairs(r);(c)=(r);}else{NEWND_pairs(NEXT(c));(c)=NEXT(c);} -#define MKNM_ind_pair(p,m,i,s) (NEWNM_ind_pair(p),(p)->mul=(m),(p)->index=(i),(p)->sugar = (s)) - -/* deallocators */ -#define FREENM(m) NEXT(m)=_nm_free_list; _nm_free_list=(m) -#define FREENDP(m) NEXT(m)=_ndp_free_list; _ndp_free_list=(m) -#define FREEND(m) BDY(m)=(NM)_nd_free_list; _nd_free_list=(m) - -/* macro for increasing pointer to NMV */ -#define NMV_ADV(m) (m = (NMV)(((char *)m)+nmv_adv)) -#define NMV_OADV(m) (m = (NMV)(((char *)m)+oadv)) -#define NDV_NADV(m) (m = (NMV)(((char *)m)+newadv)) -#define NMV_PREV(m) (m = (NMV)(((char *)m)-nmv_adv)) -#define NMV_OPREV(m) (m = (NMV)(((char *)m)-oadv)) - -/* external functions */ -#if 1 -void GC_gcollect(); -#endif -NODE append_one(NODE,int); - -/* manipulation of coefficients */ -void nd_removecont(int mod,ND p); -void nd_removecont2(ND p1,ND p2); -void removecont_array(Q *c,int n); - -/* GeoBucket functions */ -ND normalize_pbucket(int mod,PGeoBucket g); -int head_pbucket(int mod,PGeoBucket g); -int head_pbucket_q(PGeoBucket g); -void add_pbucket_symbolic(PGeoBucket g,ND d); -void add_pbucket(int mod,PGeoBucket g,ND d); -void free_pbucket(PGeoBucket b); -void mulq_pbucket(PGeoBucket g,Q c); -NM remove_head_pbucket_symbolic(PGeoBucket g); -PGeoBucket create_pbucket(); - -/* manipulation of pairs and bases */ -int nd_newps(int mod,ND a,ND aq); -ND_pairs nd_newpairs( NODE g, int t ); -ND_pairs nd_minp( ND_pairs d, ND_pairs *prest ); -ND_pairs nd_minsugarp( ND_pairs d, ND_pairs *prest ); -NODE update_base(NODE nd,int ndp); -ND_pairs update_pairs( ND_pairs d, NODE /* of index */ g, int t); -ND_pairs equivalent_pairs( ND_pairs d1, ND_pairs *prest ); -ND_pairs crit_B( ND_pairs d, int s ); -ND_pairs crit_M( ND_pairs d1 ); -ND_pairs crit_F( ND_pairs d1 ); -int crit_2( int dp1, int dp2 ); -int ndv_newps(int m,NDV a,NDV aq); - -/* top level functions */ -void nd_gr(LIST f,LIST v,int m,int f4,struct order_spec *ord,LIST *rp); -void nd_gr_trace(LIST f,LIST v,int trace,int homo,struct order_spec *ord,LIST *rp); -NODE nd_f4(int m); -NODE nd_gb(int m,int ishomo,int checkonly); -NODE nd_gb_trace(int m); - -/* ndl functions */ -int ndl_weight(UINT *d); -void ndl_weight_mask(UINT *d); -void ndl_homogenize(UINT *d,UINT *r,int obpe,EPOS oepos,int weight); -void ndl_dehomogenize(UINT *p); -void ndl_reconstruct(UINT *d,UINT *r,int obpe,EPOS oepos); -INLINE int ndl_reducible(UINT *d1,UINT *d2); -INLINE int ndl_lex_compare(UINT *d1,UINT *d2); -INLINE int ndl_block_compare(UINT *d1,UINT *d2); -INLINE int ndl_equal(UINT *d1,UINT *d2); -INLINE void ndl_copy(UINT *d1,UINT *d2); -INLINE void ndl_zero(UINT *d); -INLINE void ndl_add(UINT *d1,UINT *d2,UINT *d); -INLINE void ndl_addto(UINT *d1,UINT *d2); -INLINE void ndl_sub(UINT *d1,UINT *d2,UINT *d); -INLINE int ndl_hash_value(UINT *d); - -/* normal forms */ -INLINE int ndl_find_reducer(UINT *g); -int nd_sp(int mod,int trace,ND_pairs p,ND *nf); -int nd_nf(int mod,ND g,NDV *ps,int full,NDC dn,ND *nf); -int nd_nf_pbucket(int mod,ND g,NDV *ps,int full,ND *nf); - -/* finalizers */ -NODE ndv_reducebase(NODE x); -NODE ndv_reduceall(int m,NODE f); - -/* allocators */ -void nd_free_private_storage(); -void _NM_alloc(); -void _ND_alloc(); -void nd_free(ND p); -void nd_free_redlist(); - -/* printing */ -void ndl_print(UINT *dl); -void nd_print(ND p); -void nd_print_q(ND p); -void ndp_print(ND_pairs d); - - -/* setup, reconstruct */ -void nd_init_ord(struct order_spec *spec); -ND_pairs nd_reconstruct(int mod,int trace,ND_pairs ndp); -void ndv_setup(int mod,int trace,NODE f); -void nd_setup_parameters(int nvar,int max); -BlockMask nd_create_blockmask(struct order_spec *ord); -EPOS nd_create_epos(struct order_spec *ord); -int nd_get_exporigin(struct order_spec *ord); -void ndv_mod(int mod,NDV p); -NDV ndv_dup(int mod,NDV p); -ND nd_dup(ND p); - -/* ND functions */ -int ndv_check_candidate(NODE input,int obpe,int oadv,EPOS oepos,NODE cand); -void nd_mul_c(int mod,ND p,int mul); -void nd_mul_c_q(ND p,Q mul); -void nd_mul_c_p(VL vl,ND p,P mul); -ND nd_remove_head(ND p); -ND nd_separate_head(ND p,ND *head); -int nd_length(ND p); -void nd_append_red(UINT *d,int i); -UINT *ndv_compute_bound(NDV p); -ND nd_copy(ND p); -ND nd_merge(ND p1,ND p2); -ND nd_add(int mod,ND p1,ND p2); -ND nd_add_q(ND p1,ND p2); -ND nd_add_sf(ND p1,ND p2); -INLINE int nd_length(ND p); -NODE nd_f4_red(int m,ND_pairs sp0,UINT *s0vect,int col,NODE rp0); -NODE nd_f4_red_dist(int m,ND_pairs sp0,UINT *s0vect,int col,NODE rp0); - -/* NDV functions */ -ND weyl_ndv_mul_nm(int mod,NM m0,NDV p); -void weyl_mul_nm_nmv(int n,int mod,NM m0,NMV m1,NM *tab,int tlen); -void ndv_mul_c(int mod,NDV p,int mul); -void ndv_mul_c_q(NDV p,Q mul); -ND ndv_mul_nm_symbolic(NM m0,NDV p); -ND ndv_mul_nm(int mod,NM m0,NDV p); -void ndv_realloc(NDV p,int obpe,int oadv,EPOS oepos); -NDV ndv_dup_realloc(NDV p,int obpe,int oadv,EPOS oepos); -void ndv_homogenize(NDV p,int obpe,int oadv,EPOS eops); -void ndv_dehomogenize(NDV p,struct order_spec *spec); -void ndv_removecont(int mod,NDV p); -void ndv_print(NDV p); -void ndv_print_q(NDV p); -void ndv_free(NDV p); -void ndv_save(NDV p,int index); -NDV ndv_load(int index); - -/* converters */ -ND ptond(VL vl,VL dvl,P p); -NDV ptondv(VL vl,VL dvl,P p); -P ndvtop(int mod,VL vl,VL dvl,NDV p); -NDV ndtondv(int mod,ND p); -ND ndvtond(int mod,NDV p); -int nm_ind_pair_to_vect(int m,UINT *s0,int n,NM_ind_pair pair,UINT *r); -IndArray nm_ind_pair_to_vect_compress(int m,UINT *s0,int n,NM_ind_pair pair); -int nd_to_vect(int mod,UINT *s0,int n,ND d,UINT *r); - -/* elimination */ -int nd_gauss_elim_mod(int **mat0,int *sugar,int row,int col,int md,int *colstat); -int nd_gauss_elim_sf(int **mat0,int *sugar,int row,int col,int md,int *colstat); - void nd_free_private_storage() { _nm_free_list = 0; @@ -762,6 +425,84 @@ int ndl_block_compare(UINT *d1,UINT *d2) return 0; } +int ndl_matrix_compare(UINT *d1,UINT *d2) +{ + int i,j,s; + int *v; + + for ( j = 0; j < nd_nvar; j++ ) + nd_work_vector[j] = GET_EXP(d1,j)-GET_EXP(d2,j); + for ( i = 0; i < nd_matrix_len; i++ ) { + v = nd_matrix[i]; + for ( j = 0, s = 0; j < nd_nvar; j++ ) + s += v[j]*nd_work_vector[j]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + } + return 0; +} + +int ndl_composite_compare(UINT *d1,UINT *d2) +{ + int i,j,s,start,end,len,o; + int *v; + struct sparse_weight *sw; + + for ( j = 0; j < nd_nvar; j++ ) + nd_work_vector[j] = GET_EXP(d1,j)-GET_EXP(d2,j); + for ( i = 0; i < nd_worb_len; i++ ) { + len = nd_worb[i].length; + switch ( nd_worb[i].type ) { + case IS_DENSE_WEIGHT: + v = nd_worb[i].body.dense_weight; + for ( j = 0, s = 0; j < len; j++ ) + s += v[j]*nd_work_vector[j]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + break; + case IS_SPARSE_WEIGHT: + sw = nd_worb[i].body.sparse_weight; + for ( j = 0, s = 0; j < len; j++ ) + s += sw[j].value*nd_work_vector[sw[j].pos]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + break; + case IS_BLOCK: + o = nd_worb[i].body.block.order; + start = nd_worb[i].body.block.start; + switch ( o ) { + case 0: + end = start+len; + for ( j = start, s = 0; j < end; j++ ) + s += MUL_WEIGHT(nd_work_vector[j],j); + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + for ( j = end-1; j >= start; j-- ) + if ( nd_work_vector[j] < 0 ) return 1; + else if ( nd_work_vector[j] > 0 ) return -1; + break; + case 1: + end = start+len; + for ( j = start, s = 0; j < end; j++ ) + s += MUL_WEIGHT(nd_work_vector[j],j); + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + for ( j = start; j < end; j++ ) + if ( nd_work_vector[j] > 0 ) return 1; + else if ( nd_work_vector[j] < 0 ) return -1; + break; + case 2: + for ( j = start; j < end; j++ ) + if ( nd_work_vector[j] > 0 ) return 1; + else if ( nd_work_vector[j] < 0 ) return -1; + break; + } + break; + } + } + return 0; +} + /* TDH -> WW -> TD-> RL */ int ndl_ww_lex_compare(UINT *d1,UINT *d2) @@ -784,10 +525,24 @@ INLINE int ndl_equal(UINT *d1,UINT *d2) { int i; - for ( i = 0; i < nd_wpd; i++ ) - if ( *d1++ != *d2++ ) - return 0; - return 1; + switch ( nd_wpd ) { + case 2: + if ( TD(d2) != TD(d1) ) return 0; + if ( d2[1] != d1[1] ) return 0; + return 1; + break; + case 3: + if ( TD(d2) != TD(d1) ) return 0; + if ( d2[1] != d1[1] ) return 0; + if ( d2[2] != d1[2] ) return 0; + return 1; + break; + default: + for ( i = 0; i < nd_wpd; i++ ) + if ( *d1++ != *d2++ ) return 0; + return 1; + break; + } } INLINE void ndl_copy(UINT *d1,UINT *d2) @@ -1199,6 +954,28 @@ ND nd_add(int mod,ND p1,ND p2) } } +/* XXX on opteron, the inlined manipulation of destructive additon of + * two NM seems to make gcc optimizer get confused, so the part is + * done in a function. + */ + +int nm_destructive_add_q(NM *m1,NM *m2,NM *mr0,NM *mr) +{ + NM s; + Q t; + int can; + + addq(CQ(*m1),CQ(*m2),&t); + s = *m1; *m1 = NEXT(*m1); + if ( t ) { + can = 1; NEXTNM2(*mr0,*mr,s); CQ(*mr) = (t); + } else { + can = 2; FREENM(s); + } + s = *m2; *m2 = NEXT(*m2); FREENM(s); + return can; +} + ND nd_add_q(ND p1,ND p2) { int n,c,can; @@ -1214,6 +991,9 @@ ND nd_add_q(ND p1,ND p2) c = DL_COMPARE(DL(m1),DL(m2)); switch ( c ) { case 0: +#if defined(__x86_64__) + can += nm_destructive_add_q(&m1,&m2,&mr0,&mr); +#else addq(CQ(m1),CQ(m2),&t); s = m1; m1 = NEXT(m1); if ( t ) { @@ -1222,6 +1002,7 @@ ND nd_add_q(ND p1,ND p2) can += 2; FREENM(s); } s = m2; m2 = NEXT(m2); FREENM(s); +#endif break; case 1: s = m1; m1 = NEXT(m1); NEXTNM2(mr0,mr,s); @@ -1744,6 +1525,45 @@ ND normalize_pbucket(int mod,PGeoBucket g) return r; } +void do_diagonalize(int sugar,int m) +{ + int i,nh,stat; + NODE r,g,t; + ND h,nf,s,head; + NDV nfv; + Q q,num,den; + union oNDC dn; + + for ( i = nd_psn-1; i >= 0 && SG(nd_psh[i]) == sugar; i-- ) { + if ( nd_demand ) + nfv = ndv_load(i); + else + nfv = nd_ps[i]; + s = ndvtond(m,nfv); + s = nd_separate_head(s,&head); + nd_nf(m,s,nd_ps,1,&dn,&nf); + if ( !m ) { + NTOQ(NM(dn.z),SGN(dn.z),num); + mulq(HCQ(head),num,&q); HCQ(head) = q; + if ( DN(dn.z) ) { + NTOQ(DN(dn.z),1,den); + nd_mul_c_q(nf,den); + } + } + nf = nd_add(m,head,nf); + ndv_free(nfv); + nd_removecont(m,nf); + nfv = ndtondv(m,nf); + nd_free(nf); + nd_bound[i] = ndv_compute_bound(nfv); + if ( nd_demand ) { + ndv_save(nfv,i); + ndv_free(nfv); + } else + nd_ps[i] = nfv; + } +} + /* return value = 0 => input is not a GB */ NODE nd_gb(int m,int ishomo,int checkonly) @@ -1754,7 +1574,7 @@ NODE nd_gb(int m,int ishomo,int checkonly) ND_pairs l; ND h,nf,s,head; NDV nfv; - Q q; + Q q,num,den; union oNDC dn; g = 0; d = 0; @@ -1767,19 +1587,8 @@ NODE nd_gb(int m,int ishomo,int checkonly) again: l = nd_minp(d,&d); if ( SG(l) != sugar ) { - if ( ishomo ) { - for ( i = nd_psn-1; SG(nd_ps[i]) == sugar; i-- ) { - s = ndvtond(m,nd_ps[i]); - s = nd_separate_head(s,&head); - nd_nf(m,s,nd_ps,1,&dn,&nf); - if ( !m ) { mulq(HCQ(head),dn.z,&q); HCQ(head) = q; } - nf = nd_add(m,head,nf); - ndv_free(nd_ps[i]); - nd_removecont(m,nf); - nd_ps[i] = ndtondv(m,nf); nd_free(nf); - nd_bound[i] = ndv_compute_bound(nd_ps[i]); - } - } + if ( ishomo ) do_diagonalize(sugar,m); + sugar = SG(l); if ( DP_Print ) fprintf(asir_out,"%d",sugar); } @@ -1821,14 +1630,62 @@ again: return g; } -NODE nd_gb_trace(int m) +void do_diagonalize_trace(int sugar,int m) { + int i,nh,stat; + NODE r,g,t; + ND h,nf,nfq,s,head; + NDV nfv,nfqv; + Q q,den,num; + union oNDC dn; + + for ( i = nd_psn-1; i >= 0 && SG(nd_psh[i]) == sugar; i-- ) { + /* for nd_ps */ + s = ndvtond(m,nd_ps[i]); + s = nd_separate_head(s,&head); + nd_nf_pbucket(m,s,nd_ps,1,&nf); + nf = nd_add(m,head,nf); + ndv_free(nd_ps[i]); + nd_ps[i] = ndtondv(m,nf); + nd_free(nf); + + /* for nd_ps_trace */ + if ( nd_demand ) + nfv = ndv_load(i); + else + nfv = nd_ps_trace[i]; + s = ndvtond(0,nfv); + s = nd_separate_head(s,&head); + nd_nf(0,s,nd_ps_trace,1,&dn,&nf); + NTOQ(NM(dn.z),SGN(dn.z),num); + mulq(HCQ(head),num,&q); HCQ(head) = q; + if ( DN(dn.z) ) { + NTOQ(DN(dn.z),1,den); + nd_mul_c_q(nf,den); + } + nf = nd_add(0,head,nf); + ndv_free(nfv); + nd_removecont(0,nf); + nfv = ndtondv(0,nf); + nd_free(nf); + nd_bound[i] = ndv_compute_bound(nfv); + if ( nd_demand ) { + ndv_save(nfv,i); + ndv_free(nfv); + } else + nd_ps_trace[i] = nfv; + } +} + +NODE nd_gb_trace(int m,int ishomo) +{ int i,nh,sugar,stat; NODE r,g,t; ND_pairs d; ND_pairs l; - ND h,nf,nfq; + ND h,nf,nfq,s,head; NDV nfv,nfqv; + Q q,den,num; union oNDC dn; g = 0; d = 0; @@ -1841,6 +1698,7 @@ NODE nd_gb_trace(int m) again: l = nd_minp(d,&d); if ( SG(l) != sugar ) { + if ( ishomo ) do_diagonalize_trace(sugar,m); sugar = SG(l); if ( DP_Print ) fprintf(asir_out,"%d",sugar); } @@ -1916,7 +1774,7 @@ NODE ndv_reduceall(int m,NODE f) NODE t,a0,a; union oNDC dn; NDV *w; - Q q; + Q q,num,den; n = length(f); #if 0 @@ -1935,7 +1793,14 @@ NODE ndv_reduceall(int m,NODE f) nd_reconstruct(m,0,0); else { if ( DP_Print ) { printf("."); fflush(stdout); } - if ( !m ) { mulq(HCQ(head),dn.z,&q); HCQ(head) = q; } + if ( !m ) { + NTOQ(NM(dn.z),SGN(dn.z),num); + mulq(HCQ(head),num,&q); HCQ(head) = q; + if ( DN(dn.z) ) { + NTOQ(DN(dn.z),1,den); + nd_mul_c_q(nf,den); + } + } nf = nd_add(m,head,nf); ndv_free(nd_ps[i]); nd_removecont(m,nf); @@ -2324,6 +2189,14 @@ void nd_gr(LIST f,LIST v,int m,int f4,struct order_spe ndv_alloc = 0; get_vars((Obj)f,&fv); pltovl(v,&vv); for ( nvar = 0, tv = vv; tv; tv = NEXT(tv), nvar++ ); + switch ( ord->id ) { + case 1: + if ( ord->nv != nvar ) + error("nd_{gr,f4} : invalid order specification"); + break; + default: + break; + } nd_init_ord(ord); for ( t = BDY(f), max = 0; t; t = NEXT(t) ) for ( tv = vv; tv; tv = NEXT(tv) ) { @@ -2342,6 +2215,7 @@ void nd_gr(LIST f,LIST v,int m,int f4,struct order_spe if ( fd0 ) NEXT(fd) = 0; ndv_setup(m,0,fd0); x = f4?nd_f4(m):nd_gb(m,ishomo,0); + nd_demand = 0; x = ndv_reducebase(x); x = ndv_reduceall(m,x); for ( r0 = 0, t = x; t; t = NEXT(t) ) { @@ -2355,7 +2229,7 @@ void nd_gr(LIST f,LIST v,int m,int f4,struct order_spe void nd_gr_trace(LIST f,LIST v,int trace,int homo,struct order_spec *ord,LIST *rp) { - struct order_spec ord1; + struct order_spec *ord1; VL tv,fv,vv,vc; NODE fd,fd0,in0,in,r,r0,t,s,cand; int m,nocheck,nvar,mindex,e,max; @@ -2363,10 +2237,18 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru NMV a; P p; EPOS oepos; - int obpe,oadv,wmax,i,len,cbpe; + int obpe,oadv,wmax,i,len,cbpe,ishomo; get_vars((Obj)f,&fv); pltovl(v,&vv); for ( nvar = 0, tv = vv; tv; tv = NEXT(tv), nvar++ ); + switch ( ord->id ) { + case 1: + if ( ord->nv != nvar ) + error("nd_gr_trace : invalid order specification"); + break; + default: + break; + } nocheck = 0; mindex = 0; @@ -2387,8 +2269,11 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru nd_init_ord(ord); nd_setup_parameters(nvar,max); obpe = nd_bpe; oadv = nmv_adv; oepos = nd_epos; + ishomo = 1; for ( in0 = 0, fd0 = 0, t = BDY(f); t; t = NEXT(t) ) { c = ptondv(CO,vv,(P)BDY(t)); + if ( ishomo ) + ishomo = ishomo && ndv_ishomo(c); if ( c ) { NEXTNODE(in0,in); BDY(in) = (pointer)c; NEXTNODE(fd0,fd); BDY(fd) = (pointer)ndv_dup(0,c); @@ -2396,14 +2281,14 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru } if ( in0 ) NEXT(in) = 0; if ( fd0 ) NEXT(fd) = 0; - if ( homo ) { + if ( !ishomo && homo ) { for ( t = in0, wmax = 0; t; t = NEXT(t) ) { c = (NDV)BDY(t); len = LEN(c); for ( a = BDY(c), i = 0; i < len; i++, NMV_ADV(a) ) wmax = MAX(TD(DL(a)),wmax); } homogenize_order(ord,nvar,&ord1); - nd_init_ord(&ord1); + nd_init_ord(ord1); nd_setup_parameters(nvar+1,wmax); for ( t = fd0; t; t = NEXT(t) ) ndv_homogenize((NDV)BDY(t),obpe,oadv,oepos); @@ -2412,14 +2297,14 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru if ( Demand ) nd_demand = 1; ndv_setup(m,1,fd0); - cand = nd_gb_trace(m); + cand = nd_gb_trace(m,ishomo || homo); if ( !cand ) { /* failure */ if ( trace > 1 ) { *rp = 0; return; } else m = get_lprime(++mindex); continue; } - if ( homo ) { + if ( !ishomo && homo ) { /* dehomogenization */ for ( t = cand; t; t = NEXT(t) ) ndv_dehomogenize((NDV)BDY(t),ord); nd_init_ord(ord); @@ -2441,8 +2326,8 @@ void nd_gr_trace(LIST f,LIST v,int trace,int homo,stru /* try the next modulus */ m = get_lprime(++mindex); /* reset the parameters */ - if ( homo ) { - nd_init_ord(&ord1); + if ( !ishomo && homo ) { + nd_init_ord(ord1); nd_setup_parameters(nvar+1,wmax); } else { nd_init_ord(ord); @@ -2673,7 +2558,7 @@ void ndv_dehomogenize(NDV p,struct order_spec *ord) for ( m = BDY(p), i = 0; i < len; NMV_ADV(m), i++ ) ndl_dehomogenize(DL(m)); if ( newwpd != nd_wpd ) { - newadv = sizeof(struct oNMV)+(newwpd-1)*sizeof(UINT); + newadv = ROUND_FOR_ALIGN(sizeof(struct oNMV)+(newwpd-1)*sizeof(UINT)); for ( m = r = BDY(p), i = 0; i < len; NMV_ADV(m), NDV_NADV(r), i++ ) { CQ(r) = CQ(m); for ( j = 0; j < newexporigin; j++ ) DL(r)[j] = DL(m)[j]; @@ -2817,17 +2702,46 @@ UINT *ndv_compute_bound(NDV p) return t; } +UINT *nd_compute_bound(ND p) +{ + UINT *d1,*d2,*t; + UINT u; + int i,j,k,l,len,ind; + NM m; + + if ( !p ) + return 0; + d1 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + d2 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + len = LEN(p); + m = BDY(p); ndl_copy(DL(m),d1); m = NEXT(m); + for ( m = NEXT(m); m; m = NEXT(m) ) { + ndl_lcm(DL(m),d1,d2); + t = d1; d1 = d2; d2 = t; + } + l = nd_nvar+31; + t = (UINT *)MALLOC_ATOMIC(l*sizeof(UINT)); + for ( i = nd_exporigin, ind = 0; i < nd_wpd; i++ ) { + u = d1[i]; + k = (nd_epw-1)*nd_bpe; + for ( j = 0; j < nd_epw; j++, k -= nd_bpe, ind++ ) + t[ind] = (u>>k)&nd_mask0; + } + for ( ; ind < l; ind++ ) t[ind] = 0; + return t; +} + int nd_get_exporigin(struct order_spec *ord) { switch ( ord->id ) { - case 0: + case 0: case 2: return 1; case 1: /* block order */ /* d[0]:weight d[1]:w0,...,d[nd_exporigin-1]:w(n-1) */ return ord->ord.block.length+1; - case 2: - error("nd_get_exporigin : matrix order is not supported yet."); + case 3: + error("nd_get_exporigin : composite order is not supported yet."); } } @@ -2837,7 +2751,9 @@ void nd_setup_parameters(int nvar,int max) { nd_nvar = nvar; if ( max ) { - if ( max < 2 ) nd_bpe = 1; + /* XXX */ + if ( do_weyl ) nd_bpe = 32; + else if ( max < 2 ) nd_bpe = 1; else if ( max < 4 ) nd_bpe = 2; else if ( max < 8 ) nd_bpe = 3; else if ( max < 16 ) nd_bpe = 4; @@ -2867,10 +2783,10 @@ void nd_setup_parameters(int nvar,int max) { nd_mask[nd_epw-i-1] = (nd_mask0<<(i*nd_bpe)); nd_mask1 |= (1<<(nd_bpe-1))<<(i*nd_bpe); } - nm_adv = sizeof(struct oNM)+(nd_wpd-1)*sizeof(UINT); - nmv_adv = sizeof(struct oNMV)+(nd_wpd-1)*sizeof(UINT); + nmv_adv = ROUND_FOR_ALIGN(sizeof(struct oNMV)+(nd_wpd-1)*sizeof(UINT)); nd_epos = nd_create_epos(nd_ord); nd_blockmask = nd_create_blockmask(nd_ord); + nd_work_vector = (int *)REALLOC(nd_work_vector,nd_nvar*sizeof(int)); } ND_pairs nd_reconstruct(int mod,int trace,ND_pairs d) @@ -3280,6 +3196,36 @@ ND ndv_mul_nm(int mod,NM m0,NDV p) } } +ND nd_quo(ND p,ND d) +{ + NM mq0,mq; + Q q; + int i,nv,sg; + ND t,r; + + if ( !p ) return 0; + else { + nv = NV(p); + sg = SG(p); + mq0 = 0; + while ( p ) { + NEXTNM(mq0,mq); + ndl_sub(HDL(p),HDL(d),DL(mq)); + divq(HCQ(p),HCQ(d),&q); + chsgnq(q,&CQ(mq)); + t = nd_mul_nm_trunc(mq,d,HDL(d)); + CQ(mq) = q; + p = nd_add(0,p,t); + } + NEXT(mq) = 0; + for ( i = 0, mq = mq0; mq; mq = NEXT(mq), i++ ); + MKND(nv,mq0,i,r); + /* XXX */ + SG(r) = sg-SG(d); + return r; + } +} + void ndv_realloc(NDV p,int obpe,int oadv,EPOS oepos) { NMV m,mr,mr0,t; @@ -3429,7 +3375,7 @@ ND ptond(VL vl,VL dvl,P p) w = (DCP *)ALLOCA(k*sizeof(DCP)); for ( dc = DC(p), j = 0; j < k; dc = NEXT(dc), j++ ) w[j] = dc; for ( i = 0, tvl = dvl, v = VR(p); - vl && tvl->v != v; tvl = NEXT(tvl), i++ ); + tvl && tvl->v != v; tvl = NEXT(tvl), i++ ); if ( !tvl ) { for ( j = k-1, s = 0, MKV(v,x); j >= 0; j-- ) { t = ptond(vl,dvl,COEF(w[j])); @@ -3622,14 +3568,29 @@ void nd_init_ord(struct order_spec *ord) } break; case 1: + /* block order */ /* XXX */ nd_dcomp = -1; nd_isrlex = 0; ndl_compare_function = ndl_block_compare; break; case 2: - error("nd_init_ord : matrix order is not supported yet."); + /* matrix order */ + /* XXX */ + nd_dcomp = -1; + nd_isrlex = 0; + nd_matrix_len = ord->ord.matrix.row; + nd_matrix = ord->ord.matrix.matrix; + ndl_compare_function = ndl_matrix_compare; break; + case 3: + /* composite order */ + nd_dcomp = -1; + nd_isrlex = 0; + nd_worb_len = ord->ord.composite.length; + nd_worb = ord->ord.composite.w_or_b; + ndl_compare_function = ndl_composite_compare; + break; } nd_ord = ord; } @@ -3640,7 +3601,8 @@ BlockMask nd_create_blockmask(struct order_spec *ord) UINT *t; BlockMask bm; - if ( !ord->id ) + /* we only create mask table for block order */ + if ( ord->id != 1 ) return 0; n = ord->ord.block.length; bm = (BlockMask)MALLOC(sizeof(struct oBlockMask)); @@ -3698,7 +3660,14 @@ EPOS nd_create_epos(struct order_spec *ord) } break; case 2: - error("nd_create_epos : matrix order is not supported yet."); + /* matrix order */ + case 3: + /* composite order */ + for ( i = 0; i < nd_nvar; i++ ) { + epos[i].i = nd_exporigin + i/nd_epw; + epos[i].s = (nd_epw-(i%nd_epw)-1)*nd_bpe; + } + break; } return epos; } @@ -3823,7 +3792,7 @@ IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0 NDV p; unsigned char *ivc; unsigned short *ivs; - UINT *v,*ivi; + UINT *v,*ivi,*s0v; int i,j,len,prev,diff,cdiff; IndArray r; @@ -3832,13 +3801,13 @@ IndArray nm_ind_pair_to_vect_compress(int mod,UINT *s0 p = nd_ps[pair->index]; len = LEN(p); t = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); - r = (IndArray)MALLOC(sizeof(struct oIndArray)); v = (unsigned int *)ALLOCA(len*sizeof(unsigned int)); for ( i = j = 0, s = s0, mr = BDY(p); j < len; j++, NMV_ADV(mr) ) { ndl_add(d,DL(mr),t); for ( ; !ndl_equal(t,s); s += nd_wpd, i++ ); v[j] = i; } + r = (IndArray)MALLOC(sizeof(struct oIndArray)); r->head = v[0]; diff = 0; for ( i = 1; i < len; i++ ) { @@ -4067,7 +4036,7 @@ NODE nd_f4(int m) ND spol,red; NDV nf,redv; NM s0,s; - NODE rp0,sp0,srp0,nflist; + NODE rp0,srp0,nflist; int nsp,nred,col,rank,len,k,j,a; UINT c; UINT **spmat; @@ -4100,7 +4069,7 @@ NODE nd_f4(int m) d = nd_reconstruct(m,0,d); continue; } - if ( !sp0 ) continue; + if ( bucket->m < 0 ) continue; col = nd_symbolic_preproc(bucket,&s0vect,&rp0); if ( !col ) { for ( t = l; NEXT(t); t = NEXT(t) ); @@ -4142,7 +4111,7 @@ NODE nd_f4_red(int m,ND_pairs sp0,UINT *s0vect,int col int **spmat; UINT *svect,*v; int *colstat; - struct oEGT eg0,eg1,eg_f4; + struct oEGT eg0,eg1,eg2,eg_f4,eg_f4_1,eg_f4_2; NM_ind_pair *rvect; int maxrs; int *spsugar; @@ -4184,6 +4153,11 @@ NODE nd_f4_red(int m,ND_pairs sp0,UINT *s0vect,int col } nd_free(spol); } + get_eg(&eg1); init_eg(&eg_f4_1); add_eg(&eg_f4_1,&eg0,&eg1); + if ( DP_Print ) { + fprintf(asir_out,"elim1=%fsec,",eg_f4_1.exectime+eg_f4_1.gctime); + fflush(asir_out); + } /* free index arrays */ for ( i = 0; i < nred; i++ ) GC_free(imat[i]->index.c); @@ -4201,8 +4175,10 @@ NODE nd_f4_red(int m,ND_pairs sp0,UINT *s0vect,int col GC_free(spmat[i]); } for ( ; i < sprow; i++ ) GC_free(spmat[i]); - get_eg(&eg1); init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg1); + get_eg(&eg2); init_eg(&eg_f4_2); add_eg(&eg_f4_2,&eg1,&eg2); + init_eg(&eg_f4); add_eg(&eg_f4,&eg0,&eg2); if ( DP_Print ) { + fprintf(asir_out,"elim2=%fsec\n",eg_f4_2.exectime+eg_f4_2.gctime); fprintf(asir_out,"nsp=%d,nred=%d,spmat=(%d,%d),rank=%d ", nsp,nred,sprow,spcol,rank); fprintf(asir_out,"%fsec\n",eg_f4.exectime+eg_f4.gctime); @@ -4322,7 +4298,7 @@ NODE nd_f4_red_dist(int m,ND_pairs sp0,UINT *s0vect,in nd_send_int(nd_wpd); nd_send_int(nmv_adv); - saveobj(nd_write,dp_current_spec.obj); fflush(nd_write); + saveobj(nd_write,dp_current_spec->obj); fflush(nd_write); nd_send_int(nd_psn); for ( i = 0; i < nd_psn; i++ ) nd_send_ndv(nd_ps[i]); @@ -4368,7 +4344,7 @@ void nd_exec_f4_red_dist() ND_pairs *sp0; int *colstat; int a,sprow,rank; - struct order_spec ord; + struct order_spec *ord; Obj ordspec; ND spol; int maxrs; @@ -4383,8 +4359,8 @@ void nd_exec_f4_red_dist() nmv_adv = nd_recv_int(); loadobj(nd_read,&ordspec); - create_order_spec(ordspec,&ord); - nd_init_ord(&ord); + create_order_spec(0,ordspec,&ord); + nd_init_ord(ord); nd_setup_parameters(nd_nvar,0); nd_psn = nd_recv_int(); @@ -4669,4 +4645,126 @@ NDV ndv_load(int index) MKNDV(nv,m0,len,d); SG(d) = sugar; return d; +} + +void nd_det(MAT f,P *rp) +{ + VL fv,tv; + int n,i,j,max,e,nvar,sgn,k0,l0,len0,len,k,l; + pointer **m; + Q mone; + ND **dm; + ND *t,*mi,*mj; + ND d,s,mij,mjj,m1,m2,u; + NDV dv; + PGeoBucket bucket; + NM nm; + struct order_spec *ord; + + create_order_spec(0,0,&ord); + nd_init_ord(ord); + get_vars((Obj)f,&fv); + if ( f->row != f->col ) + error("nd_det : non-square matrix"); + n = f->row; + for ( nvar = 0, tv = fv; tv; tv = NEXT(tv), nvar++ ); + m = f->body; + for ( i = 0, max = 0; i < n; i++ ) + for ( j = 0; j < n; j++ ) + for ( tv = fv; tv; tv = NEXT(tv) ) { + e = getdeg(tv->v,(P)m[i][j]); + max = MAX(e,max); + } + nd_setup_parameters(nvar,1024); + dm = (ND **)almat_pointer(n,n); + for ( i = 0, max = 0; i < n; i++ ) + for ( j = 0; j < n; j++ ) + dm[i][j] = ptond(CO,fv,m[i][j]); + d = ptond(CO,fv,(P)ONE); + chsgnq(ONE,&mone); + for ( j = 0, sgn = 1; j < n; j++ ) { + for ( i = j; i < n && !dm[i][j]; i++ ); + if ( i == n ) { + *rp = 0; + return; + } + k0 = i; l0 = j; len0 = nd_length(dm[k0][l0]); + for ( k = j; k < n; k++ ) + for ( l = j; l < n; l++ ) + if ( dm[k][l] && (len = nd_length(dm[k][l])) < len0 ) { + k0 = k; l0 = l; len0 = len; + } + if ( k0 != j ) { + t = dm[j]; dm[j] = dm[k0]; dm[k0] = t; + sgn = -sgn; + } + if ( l0 != j ) { + for ( k = j; k < n; k++ ) { + s = dm[k][j]; dm[k][j] = dm[k][l0]; dm[k][l0] = s; + } + sgn = -sgn; + } + for ( i = j+1, mj = dm[j], mjj = mj[j]; i < n; i++ ) { + mi = dm[i]; mij = mi[j]; + nd_mul_c_q(mij,mone); + for ( k = j+1; k < n; k++ ) { + bucket = create_pbucket(); + if ( mi[k] ) + for ( nm = BDY(mjj); nm; nm = NEXT(nm) ) { + u = nd_mul_nm_trunc(nm,mi[k],DL(BDY(d))); + add_pbucket(0,bucket,u); + } + if ( mj[k] && mij ) { + for ( nm = BDY(mij); nm; nm = NEXT(nm) ) { + u = nd_mul_nm_trunc(nm,mj[k],DL(BDY(d))); + add_pbucket(0,bucket,u); + } + } + s = normalize_pbucket(0,bucket); + mi[k] = nd_quo(s,d); + } + } + d = mjj; + } + if ( sgn < 0 ) + nd_mul_c_q(d,mone); + dv = ndtondv(0,d); + *rp = ndvtop(0,CO,fv,dv); +} + +ND nd_mul_nm_trunc(NM m0,ND p,UINT *d) +{ + NM mr,mr0; + NM m,tnm; + UINT *d0,*dt,*dm; + int c,n,td,i,c1,c2,len; + Q q; + ND r; + + if ( !p ) return 0; + else { + n = NV(p); m = BDY(p); + d0 = DL(m0); + len = LEN(p); + mr0 = 0; + td = TD(d); + q = CQ(m0); + NEWNM(tnm); + for ( ; m;m = NEXT(m) ) { + ndl_add(DL(m),d0,DL(tnm)); + if ( ndl_reducible(DL(tnm),d) ) { + NEXTNM(mr0,mr); + mulq(CQ(m),q,&CQ(mr)); + ndl_add(DL(m),d0,DL(mr)); + } + } + if ( !mr0 ) + return 0; + else { + NEXT(mr) = 0; + MKND(NV(p),mr0,len,r); + SG(r) = SG(p) + TD(d0); + return r; + } + } }