=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2000/engine/nd.c,v retrieving revision 1.94 retrieving revision 1.100 diff -u -p -r1.94 -r1.100 --- OpenXM_contrib2/asir2000/engine/nd.c 2004/03/15 07:30:44 1.94 +++ OpenXM_contrib2/asir2000/engine/nd.c 2004/09/14 09:25:48 1.100 @@ -1,4 +1,4 @@ -/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.93 2004/03/13 07:48:30 noro Exp $ */ +/* $OpenXM: OpenXM_contrib2/asir2000/engine/nd.c,v 1.99 2004/09/14 07:23:34 noro Exp $ */ #include "nd.h" @@ -29,9 +29,12 @@ static NDV *nd_ps; static NDV *nd_ps_trace; static RHist *nd_psh; static int nd_psn,nd_pslen; - static RHist *nd_red; - +static int *nd_work_vector; +static int **nd_matrix; +static int nd_matrix_len; +static struct weight_or_block *nd_worb; +static int nd_worb_len; static int nd_found,nd_create,nd_notfirst; static int nmv_adv; static int nd_demand; @@ -422,6 +425,84 @@ int ndl_block_compare(UINT *d1,UINT *d2) return 0; } +int ndl_matrix_compare(UINT *d1,UINT *d2) +{ + int i,j,s; + int *v; + + for ( j = 0; j < nd_nvar; j++ ) + nd_work_vector[j] = GET_EXP(d1,j)-GET_EXP(d2,j); + for ( i = 0; i < nd_matrix_len; i++ ) { + v = nd_matrix[i]; + for ( j = 0, s = 0; j < nd_nvar; j++ ) + s += v[j]*nd_work_vector[j]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + } + return 0; +} + +int ndl_composite_compare(UINT *d1,UINT *d2) +{ + int i,j,s,start,end,len,o; + int *v; + struct sparse_weight *sw; + + for ( j = 0; j < nd_nvar; j++ ) + nd_work_vector[j] = GET_EXP(d1,j)-GET_EXP(d2,j); + for ( i = 0; i < nd_worb_len; i++ ) { + len = nd_worb[i].length; + switch ( nd_worb[i].type ) { + case IS_DENSE_WEIGHT: + v = nd_worb[i].body.dense_weight; + for ( j = 0, s = 0; j < len; j++ ) + s += v[j]*nd_work_vector[j]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + break; + case IS_SPARSE_WEIGHT: + sw = nd_worb[i].body.sparse_weight; + for ( j = 0, s = 0; j < len; j++ ) + s += sw[j].value*nd_work_vector[sw[j].pos]; + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + break; + case IS_BLOCK: + o = nd_worb[i].body.block.order; + start = nd_worb[i].body.block.start; + switch ( o ) { + case 0: + end = start+len; + for ( j = start, s = 0; j < end; j++ ) + s += MUL_WEIGHT(nd_work_vector[j],j); + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + for ( j = end-1; j >= start; j-- ) + if ( nd_work_vector[j] < 0 ) return 1; + else if ( nd_work_vector[j] > 0 ) return -1; + break; + case 1: + end = start+len; + for ( j = start, s = 0; j < end; j++ ) + s += MUL_WEIGHT(nd_work_vector[j],j); + if ( s > 0 ) return 1; + else if ( s < 0 ) return -1; + for ( j = start; j < end; j++ ) + if ( nd_work_vector[j] > 0 ) return 1; + else if ( nd_work_vector[j] < 0 ) return -1; + break; + case 2: + for ( j = start; j < end; j++ ) + if ( nd_work_vector[j] > 0 ) return 1; + else if ( nd_work_vector[j] < 0 ) return -1; + break; + } + break; + } + } + return 0; +} + /* TDH -> WW -> TD-> RL */ int ndl_ww_lex_compare(UINT *d1,UINT *d2) @@ -873,6 +954,79 @@ ND nd_add(int mod,ND p1,ND p2) } } +/* XXX on opteron, the inlined manipulation of destructive additon of + * two NM seems to make gcc optimizer get confused, so the part is + * done in a function. + */ + +int nm_destructive_add_q(NM *m1,NM *m2,NM *mr0,NM *mr) +{ + NM s; + Q t; + int can; + + addq(CQ(*m1),CQ(*m2),&t); + s = *m1; *m1 = NEXT(*m1); + if ( t ) { + can = 1; NEXTNM2(*mr0,*mr,s); CQ(*mr) = (t); + } else { + can = 2; FREENM(s); + } + s = *m2; *m2 = NEXT(*m2); FREENM(s); + return can; +} + +ND nd_add_q(ND p1,ND p2) +{ + int n,c,can; + ND r; + NM m1,m2,mr0,mr,s; + Q t; + + if ( !p1 ) return p2; + else if ( !p2 ) return p1; + else { + can = 0; + for ( n = NV(p1), m1 = BDY(p1), m2 = BDY(p2), mr0 = 0; m1 && m2; ) { + c = DL_COMPARE(DL(m1),DL(m2)); + switch ( c ) { + case 0: +#if defined(__x86_64__) + can += nm_destructive_add_q(&m1,&m2,&mr0,&mr); +#else + addq(CQ(m1),CQ(m2),&t); + s = m1; m1 = NEXT(m1); + if ( t ) { + can++; NEXTNM2(mr0,mr,s); CQ(mr) = (t); + } else { + can += 2; FREENM(s); + } + s = m2; m2 = NEXT(m2); FREENM(s); +#endif + break; + case 1: + s = m1; m1 = NEXT(m1); NEXTNM2(mr0,mr,s); + break; + case -1: + s = m2; m2 = NEXT(m2); NEXTNM2(mr0,mr,s); + break; + } + } + if ( !mr0 ) + if ( m1 ) mr0 = m1; + else if ( m2 ) mr0 = m2; + else return 0; + else if ( m1 ) NEXT(mr) = m1; + else if ( m2 ) NEXT(mr) = m2; + else NEXT(mr) = 0; + BDY(p1) = mr0; + SG(p1) = MAX(SG(p1),SG(p2)); + LEN(p1) = LEN(p1)+LEN(p2)-can; + FREEND(p2); + return p1; + } +} + ND nd_add_sf(ND p1,ND p2) { int n,c,can; @@ -2548,17 +2702,46 @@ UINT *ndv_compute_bound(NDV p) return t; } +UINT *nd_compute_bound(ND p) +{ + UINT *d1,*d2,*t; + UINT u; + int i,j,k,l,len,ind; + NM m; + + if ( !p ) + return 0; + d1 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + d2 = (UINT *)ALLOCA(nd_wpd*sizeof(UINT)); + len = LEN(p); + m = BDY(p); ndl_copy(DL(m),d1); m = NEXT(m); + for ( m = NEXT(m); m; m = NEXT(m) ) { + ndl_lcm(DL(m),d1,d2); + t = d1; d1 = d2; d2 = t; + } + l = nd_nvar+31; + t = (UINT *)MALLOC_ATOMIC(l*sizeof(UINT)); + for ( i = nd_exporigin, ind = 0; i < nd_wpd; i++ ) { + u = d1[i]; + k = (nd_epw-1)*nd_bpe; + for ( j = 0; j < nd_epw; j++, k -= nd_bpe, ind++ ) + t[ind] = (u>>k)&nd_mask0; + } + for ( ; ind < l; ind++ ) t[ind] = 0; + return t; +} + int nd_get_exporigin(struct order_spec *ord) { switch ( ord->id ) { - case 0: + case 0: case 2: return 1; case 1: /* block order */ /* d[0]:weight d[1]:w0,...,d[nd_exporigin-1]:w(n-1) */ return ord->ord.block.length+1; - case 2: - error("nd_get_exporigin : matrix order is not supported yet."); + case 3: + error("nd_get_exporigin : composite order is not supported yet."); } } @@ -2603,6 +2786,7 @@ void nd_setup_parameters(int nvar,int max) { nmv_adv = ROUND_FOR_ALIGN(sizeof(struct oNMV)+(nd_wpd-1)*sizeof(UINT)); nd_epos = nd_create_epos(nd_ord); nd_blockmask = nd_create_blockmask(nd_ord); + nd_work_vector = (int *)REALLOC(nd_work_vector,nd_nvar*sizeof(int)); } ND_pairs nd_reconstruct(int mod,int trace,ND_pairs d) @@ -3012,6 +3196,35 @@ ND ndv_mul_nm(int mod,NM m0,NDV p) } } +ND nd_quo(ND p,ND d) +{ + NM mq0,mq; + Q q; + int i,nv,sg; + ND t,r; + + if ( !p ) return 0; + else { + nv = NV(p); + sg = SG(p); + mq0 = 0; + while ( p ) { + NEXTNM(mq0,mq); + ndl_sub(HDL(p),HDL(d),DL(mq)); + divq(HCQ(p),HCQ(d),&q); + chsgnq(q,&CQ(mq)); + t = nd_mul_nm_trunc(mq,d,HDL(d)); + p = nd_add(0,p,t); + } + NEXT(mq) = 0; + for ( i = 0, mq = mq0; mq; mq = NEXT(mq), i++ ); + MKND(nv,mq0,i,r); + /* XXX */ + SG(r) = sg-SG(d); + return r; + } +} + void ndv_realloc(NDV p,int obpe,int oadv,EPOS oepos) { NMV m,mr,mr0,t; @@ -3161,7 +3374,7 @@ ND ptond(VL vl,VL dvl,P p) w = (DCP *)ALLOCA(k*sizeof(DCP)); for ( dc = DC(p), j = 0; j < k; dc = NEXT(dc), j++ ) w[j] = dc; for ( i = 0, tvl = dvl, v = VR(p); - vl && tvl->v != v; tvl = NEXT(tvl), i++ ); + tvl && tvl->v != v; tvl = NEXT(tvl), i++ ); if ( !tvl ) { for ( j = k-1, s = 0, MKV(v,x); j >= 0; j-- ) { t = ptond(vl,dvl,COEF(w[j])); @@ -3354,14 +3567,29 @@ void nd_init_ord(struct order_spec *ord) } break; case 1: + /* block order */ /* XXX */ nd_dcomp = -1; nd_isrlex = 0; ndl_compare_function = ndl_block_compare; break; case 2: - error("nd_init_ord : matrix order is not supported yet."); + /* matrix order */ + /* XXX */ + nd_dcomp = -1; + nd_isrlex = 0; + nd_matrix_len = ord->ord.matrix.row; + nd_matrix = ord->ord.matrix.matrix; + ndl_compare_function = ndl_matrix_compare; break; + case 3: + /* composite order */ + nd_dcomp = -1; + nd_isrlex = 0; + nd_worb_len = ord->ord.composite.length; + nd_worb = ord->ord.composite.w_or_b; + ndl_compare_function = ndl_composite_compare; + break; } nd_ord = ord; } @@ -3372,7 +3600,8 @@ BlockMask nd_create_blockmask(struct order_spec *ord) UINT *t; BlockMask bm; - if ( !ord->id ) + /* we only create mask table for block order */ + if ( ord->id != 1 ) return 0; n = ord->ord.block.length; bm = (BlockMask)MALLOC(sizeof(struct oBlockMask)); @@ -3430,7 +3659,14 @@ EPOS nd_create_epos(struct order_spec *ord) } break; case 2: - error("nd_create_epos : matrix order is not supported yet."); + /* matrix order */ + case 3: + /* composite order */ + for ( i = 0; i < nd_nvar; i++ ) { + epos[i].i = nd_exporigin + i/nd_epw; + epos[i].s = (nd_epw-(i%nd_epw)-1)*nd_bpe; + } + break; } return epos; } @@ -4408,4 +4644,126 @@ NDV ndv_load(int index) MKNDV(nv,m0,len,d); SG(d) = sugar; return d; +} + +void nd_det(MAT f,P *rp) +{ + VL fv,tv; + int n,i,j,max,e,nvar,sgn,k0,l0,len0,len,k,l; + pointer **m; + Q mone; + ND **dm; + ND *t,*mi,*mj; + ND d,s,mij,mjj,m1,m2,u; + NDV dv; + PGeoBucket bucket; + NM nm; + struct order_spec *ord; + + create_order_spec(0,0,&ord); + nd_init_ord(ord); + get_vars((Obj)f,&fv); + if ( f->row != f->col ) + error("nd_det : non-square matrix"); + n = f->row; + for ( nvar = 0, tv = fv; tv; tv = NEXT(tv), nvar++ ); + m = f->body; + for ( i = 0, max = 0; i < n; i++ ) + for ( j = 0; j < n; j++ ) + for ( tv = fv; tv; tv = NEXT(tv) ) { + e = getdeg(tv->v,(P)m[i][j]); + max = MAX(e,max); + } + nd_setup_parameters(nvar,1024); + dm = (ND **)almat_pointer(n,n); + for ( i = 0, max = 0; i < n; i++ ) + for ( j = 0; j < n; j++ ) + dm[i][j] = ptond(CO,fv,m[i][j]); + d = ptond(CO,fv,(P)ONE); + chsgnq(ONE,&mone); + for ( j = 0, sgn = 1; j < n; j++ ) { + for ( i = j; i < n && !dm[i][j]; i++ ); + if ( i == n ) { + *rp = 0; + return; + } + k0 = i; l0 = j; len0 = nd_length(dm[k0][l0]); + for ( k = j; k < n; k++ ) + for ( l = j; l < n; l++ ) + if ( dm[k][l] && (len = nd_length(dm[k][l])) < len0 ) { + k0 = k; l0 = l; len0 = len; + } + if ( k0 != j ) { + t = dm[j]; dm[j] = dm[k0]; dm[k0] = t; + sgn = -sgn; + } + if ( l0 != j ) { + for ( k = j; k < n; k++ ) { + s = dm[k][j]; dm[k][j] = dm[k][l0]; dm[k][l0] = s; + } + sgn = -sgn; + } + for ( i = j+1, mj = dm[j], mjj = mj[j]; i < n; i++ ) { + mi = dm[i]; mij = mi[j]; + nd_mul_c_q(mij,mone); + for ( k = j+1; k < n; k++ ) { + bucket = create_pbucket(); + if ( mi[k] ) + for ( nm = BDY(mjj); nm; nm = NEXT(nm) ) { + u = nd_mul_nm_trunc(nm,mi[k],DL(BDY(d))); + add_pbucket(0,bucket,u); + } + if ( mj[k] && mij ) { + for ( nm = BDY(mij); nm; nm = NEXT(nm) ) { + u = nd_mul_nm_trunc(nm,mj[k],DL(BDY(d))); + add_pbucket(0,bucket,u); + } + } + s = normalize_pbucket(0,bucket); + mi[k] = nd_quo(s,d); + } + } + d = mjj; + } + if ( sgn < 0 ) + nd_mul_c_q(d,mone); + dv = ndtondv(0,d); + *rp = ndvtop(0,CO,fv,dv); +} + +ND nd_mul_nm_trunc(NM m0,ND p,UINT *d) +{ + NM mr,mr0; + NM m,tnm; + UINT *d0,*dt,*dm; + int c,n,td,i,c1,c2,len; + Q q; + ND r; + + if ( !p ) return 0; + else { + n = NV(p); m = BDY(p); + d0 = DL(m0); + len = LEN(p); + mr0 = 0; + td = TD(d); + q = CQ(m0); + NEWNM(tnm); + for ( ; m;m = NEXT(m) ) { + ndl_add(DL(m),d0,DL(tnm)); + if ( ndl_reducible(DL(tnm),d) ) { + NEXTNM(mr0,mr); + mulq(CQ(m),q,&CQ(mr)); + ndl_add(DL(m),d0,DL(mr)); + } + } + if ( !mr0 ) + return 0; + else { + NEXT(mr) = 0; + MKND(NV(p),mr0,len,r); + SG(r) = SG(p) + TD(d0); + return r; + } + } }