Annotation of OpenXM_contrib2/asir2000/asm/ddN.c, Revision 1.1.1.1
1.1 noro 1: /* $OpenXM: OpenXM/src/asir99/asm/ddN.c,v 1.1.1.1 1999/11/10 08:12:25 noro Exp $ */
2: #ifndef FBASE
3: #define FBASE
4: #endif
5:
6: #include "ca.h"
7: #include "base.h"
8: #include "inline.h"
9:
10: void bxprintn(N),bprintn(N);
11: void divnmain_special(int,int,unsigned int *,unsigned int *,unsigned int *);
12: void dupn(N,N);
13:
14: void divn(n1,n2,nq,nr)
15: N n1,n2,*nq,*nr;
16: {
17: int tmp,b;
18: int i,j,d1,d2,dd;
19: unsigned int *m1,*m2;
20: unsigned int uq,ur,msw;
21: N q,r,t1,t2;
22:
23: if ( !n2 )
24: error("divn: divsion by 0");
25: else if ( !n1 ) {
26: *nq = 0; *nr = 0;
27: } else if ( UNIN(n2) ) {
28: COPY(n1,*nq); *nr = 0;
29: } else if ( (d1 = PL(n1)) < (d2 = PL(n2)) ) {
30: COPY(n1,*nr); *nq = 0;
31: } else if ( d2 == 1 ) {
32: if ( d1 == 1 ) {
33: DQR(BD(n1)[0],BD(n2)[0],uq,ur)
34: STON(uq,*nq); STON(ur,*nr);
35: } else {
36: ur = divin(n1,BD(n2)[0],nq); STON(ur,*nr);
37: }
38: return;
39: } else if ( (d1 == d2) && ((tmp = cmpn(n1,n2)) <= 0) )
40: if ( !tmp ) {
41: *nr = 0; COPY(ONEN,*nq);
42: } else {
43: COPY(n1,*nr); *nq = 0;
44: }
45: else {
46: msw = BD(n2)[d2-1];
47: for ( i = BSH-1; !(msw&(((unsigned int)1)<<i)); i-- ); i++;
48: b = BSH-i;
49: if ( b ) {
50: bshiftn(n1,-b,&t1); bshiftn(n2,-b,&t2);
51: } else {
52: COPY(n1,t1); COPY(n2,t2);
53: }
54: m1 = (unsigned int *)ALLOCA((PL(t1)+1)*sizeof(unsigned int));
55: bcopy(BD(t1),m1,PL(t1)*sizeof(unsigned int)); m1[PL(t1)] = 0;
56: m2 = BD(t2); dd = d1-d2;
57: *nq = q = NALLOC(dd+1); INITRC(q);
58: divnmain(d1,d2,m1,m2,BD(q)); FREEN(t1); FREEN(t2);
59: PL(q) = (BD(q)[dd]?dd+1:dd);
60: if ( !PL(q) ) {
61: FREEN(q);
62: }
63: for ( j = d2-1; (j >= 0) && (m1[j] == 0); j--);
64: if ( j == -1 )
65: *nr = 0;
66: else {
67: r = NALLOC(j+1); INITRC(r); PL(r) = j+1;
68: bcopy(m1,BD(r),(j+1)*sizeof(unsigned int));
69: if ( b ) {
70: bshiftn(r,b,nr); FREEN(r);
71: } else
72: *nr = r;
73: }
74: }
75: }
76:
77: void divsn(n1,n2,nq)
78: N n1,n2,*nq;
79: {
80: int d1,d2,dd,i,b;
81: unsigned int *m1,*m2;
82: unsigned int uq,msw;
83: N q,t1,t2;
84:
85: if ( !n1 )
86: *nq = 0;
87: else if ( UNIN(n2) ) {
88: COPY(n1,*nq);
89: } else if ( (d1 = PL(n1)) < (d2 = PL(n2)) )
90: error("divsn: cannot happen");
91: else if ( d2 == 1 ) {
92: if ( d1 == 1 ) {
93: uq = BD(n1)[0] / BD(n2)[0]; STON(uq,*nq);
94: } else
95: divin(n1,BD(n2)[0],nq);
96: } else {
97: msw = BD(n2)[d2-1];
98: for ( i = BSH-1; !(msw&(((unsigned int)1)<<i)); i-- ); i++;
99: b = BSH-i;
100: if ( b ) {
101: bshiftn(n1,-b,&t1); bshiftn(n2,-b,&t2);
102: } else {
103: COPY(n1,t1); COPY(n2,t2);
104: }
105: m1 = (unsigned int *)ALLOCA((PL(t1)+1)*sizeof(unsigned int));
106: bcopy(BD(t1),m1,PL(t1)*sizeof(unsigned int)); m1[PL(t1)] = 0;
107: m2 = BD(t2); dd = d1-d2;
108: *nq = q = NALLOC(dd+1); INITRC(q);
109: divnmain(d1,d2,m1,m2,BD(q));
110: FREEN(t1); FREEN(t2);
111: PL(q) = (BD(q)[dd]?dd+1:dd);
112: if ( !PL(q) )
113: FREEN(q);
114: }
115: }
116:
117: void remn(n1,n2,nr)
118: N n1,n2,*nr;
119: {
120: int d1,d2,tmp;
121: unsigned int uq,ur;
122: N q;
123:
124: if ( !n2 )
125: error("remn: divsion by 0");
126: else if ( !n1 || UNIN(n2) )
127: *nr = 0;
128: else if ( (d1 = PL(n1)) < (d2 = PL(n2)) ) {
129: COPY(n1,*nr);
130: } else if ( d2 == 1 ) {
131: if ( d1 == 1 ) {
132: DQR(BD(n1)[0],BD(n2)[0],uq,ur)
133: STON(ur,*nr);
134: } else {
135: ur = rem(n1,BD(n2)[0]); UTON(ur,*nr);
136: }
137: return;
138: } else if ( (d1 == d2) && ((tmp = cmpn(n1,n2)) <= 0) )
139: if ( !tmp ) {
140: *nr = 0;
141: } else {
142: COPY(n1,*nr);
143: }
144: else
145: divn(n1,n2,&q,nr);
146: }
147:
148: /* d = 2^(32*words)-lower */
149:
150: void remn_special(a,d,bits,lower,b)
151: N a,d;
152: int bits;
153: unsigned int lower;
154: N *b;
155: {
156: int words;
157: N r;
158: int rl,i;
159: unsigned int c,tmp;
160: unsigned int *rb,*src,*dst;
161:
162: if ( cmpn(a,d) < 0 ) {
163: *b = a;
164: return;
165: }
166: words = bits>>5;
167: r = NALLOC(PL(a)); dupn(a,r);
168: rl = PL(r); rb = BD(r);
169: while ( rl > words ) {
170: src = rb+words;
171: dst = rb;
172: i = rl-words;
173: for ( c = 0; i > 0; i--, src++, dst++ ) {
174: DMA2(*src,lower,c,*dst,c,*dst)
175: *src = 0;
176: }
177: for ( ; c; dst++ ) {
178: tmp = *dst + c;
179: c = tmp < c ? 1 : 0;
180: *dst = tmp;
181: }
182: rl = dst-rb;
183: }
184: for ( i = words-1; i >= 0 && !rb[i]; i-- );
185: PL(r) = i + 1;
186: if ( cmpn(r,d) >= 0 ) {
187: tmp = rb[0]-BD(d)[0];
188: UTON(tmp,*b);
189: } else
190: *b = r;
191: }
192: void mulin(n,d,p)
193: N n;
194: unsigned int d;
195: unsigned int *p;
196: {
197: unsigned int carry;
198: unsigned int *m,*me;
199:
200: bzero((char *)p,(int)((PL(n)+1)*sizeof(int)));
201: for ( carry = 0, m = BD(n), me = m+PL(n); m < me; p++, m++ ) {
202: DMA2(*m,d,*p,carry,carry,*p)
203: }
204: *p = carry;
205: }
206:
207: unsigned int divin(n,dvr,q)
208: N n;
209: unsigned int dvr;
210: N *q;
211: {
212: int d;
213: unsigned int up;
214: unsigned int *m,*mq,*md;
215: N nq;
216:
217: d = PL(n); m = BD(n);
218: if ( ( d == 1 ) && ( dvr > *m ) ) {
219: *q = 0;
220: return *m;
221: }
222: *q = nq = NALLOC(d); INITRC(nq);
223: for ( md = m+d-1, mq = BD(nq)+d-1, up = 0; md >= m; md--, mq-- ) {
224: DSAB(dvr,up,*md,*mq,up)
225: }
226: PL(nq) = (BD(nq)[d-1]?d:d-1);
227: if ( !PL(nq) )
228: FREEN(nq);
229: return ( up );
230: }
231:
232: void bprintn(n)
233: N n;
234: {
235: int l,i;
236: unsigned int *b;
237:
238: if ( !n )
239: printf("0");
240: else {
241: l = PL(n); b = BD(n);
242: for ( i = l-1; i >= 0; i-- )
243: printf("%010u|",b[i]);
244: }
245: }
246:
247: void bxprintn(n)
248: N n;
249: {
250: int l,i;
251: unsigned int *b;
252:
253: if ( !n )
254: printf("0");
255: else {
256: l = PL(n); b = BD(n);
257: for ( i = l-1; i >= 0; i-- )
258: printf("%08x|",b[i]);
259: }
260: }
261:
262: #if defined(VISUAL) || defined(i386)
263: void muln(n1,n2,nr)
264: N n1,n2,*nr;
265: {
266: unsigned int tmp,carry,mul;
267: unsigned int *p1,*m1,*m2;
268: int i,d1,d2,d;
269: N r;
270:
271: if ( !n1 || !n2 )
272: *nr = 0;
273: else if ( UNIN(n1) )
274: COPY(n2,*nr);
275: else if ( UNIN(n2) )
276: COPY(n1,*nr);
277: else if ( (PL(n1) == 1) && (PL(n2) == 1) ) {
278: DM(*BD(n1),*BD(n2),carry,tmp)
279: if ( carry ) {
280: *nr = r = NALLOC(2); INITRC(r);
281: PL(r) = 2; p1 = BD(r); *p1++ = tmp; *p1 = carry;
282: } else
283: STON(tmp,*nr);
284: } else {
285: d1 = PL(n1); d2 = PL(n2);
286: d = d1+d2;
287: *nr = r = NALLOC(d); INITRC(r);
288: bzero((char *)BD(r),(int)((d1+d2)*sizeof(int)));
289: for ( i = 0, m1 = BD(n1), m2 = BD(n2); i < d2; i++, m2++ )
290: if ( mul = *m2 )
291: muln_1(m1,d1,mul,BD(r)+i);
292: PL(r) = (BD(r)[d-1]?d:d-1);
293: }
294: }
295:
296: void _muln(n1,n2,nr)
297: N n1,n2,nr;
298: {
299: unsigned int mul;
300: unsigned int *m1,*m2;
301: int i,d1,d2,d;
302:
303: if ( !n1 || !PL(n1) || !n2 || !PL(n2) )
304: PL(nr) = 0;
305: else if ( UNIN(n1) )
306: dupn(n2,nr);
307: else if ( UNIN(n2) )
308: dupn(n1,nr);
309: else {
310: d1 = PL(n1); d2 = PL(n2);
311: d = d1+d2;
312: bzero((char *)BD(nr),(int)((d1+d2)*sizeof(int)));
313: for ( i = 0, m1 = BD(n1), m2 = BD(n2); i < d2; i++, m2++ )
314: if ( mul = *m2 )
315: muln_1(m1,d1,mul,BD(nr)+i);
316: PL(nr) = (BD(nr)[d-1]?d:d-1);
317: }
318: }
319:
320: void muln_1(p,s,d,r)
321: unsigned int *p;
322: int s;
323: unsigned int d;
324: unsigned int *r;
325: {
326: /* esi : p, edi : r, carry : ebx, s : ecx */
327: #if defined(VISUAL)
328: __asm {
329: push esi
330: push edi
331: mov esi,p
332: mov edi,r
333: mov ecx,s
334: xor ebx,ebx
335: Lstart_muln:
336: mov eax,DWORD PTR [esi]
337: mul d
338: add eax,DWORD PTR [edi]
339: adc edx,0
340: add eax,ebx
341: adc edx,0
342: mov DWORD PTR[edi],eax
343: mov ebx,edx
344: lea esi,DWORD PTR [esi+4]
345: lea edi,DWORD PTR [edi+4]
346: dec ecx
347: jnz Lstart_muln
348: mov DWORD PTR [edi],ebx
349: pop edi
350: pop esi
351: }
352: #else
353: asm volatile("\
354: movl %0,%%esi;\
355: movl %1,%%edi;\
356: movl $0,%%ebx;\
357: Lstart_muln:;\
358: movl (%%esi),%%eax;\
359: mull %2;\
360: addl (%%edi),%%eax;\
361: adcl $0,%%edx;\
362: addl %%ebx,%%eax;\
363: adcl $0,%%edx;\
364: movl %%eax,(%%edi);\
365: movl %%edx,%%ebx;\
366: leal 4(%%esi),%%esi;\
367: leal 4(%%edi),%%edi;\
368: decl %3;\
369: jnz Lstart_muln;\
370: movl %%ebx,(%%edi)"\
371: :\
372: :"m"(p),"m"(r),"m"(d),"m"(s)\
373: :"eax","ebx","edx","esi","edi");
374: #endif
375: }
376:
377: void divnmain(d1,d2,m1,m2,q)
378: int d1,d2;
379: unsigned int *m1,*m2,*q;
380: {
381: int i,j;
382: UL r,ltmp;
383: unsigned int l,ur;
384: unsigned int *n1,*n2;
385: unsigned int u,qhat;
386: unsigned int v1,v2;
387:
388: v1 = m2[d2-1]; v2 = m2[d2-2];
389: #if 0
390: if ( v1 == (1<<31) && !v2 ) {
391: divnmain_special(d1,d2,m1,m2,q);
392: return;
393: }
394: #endif
395: for ( j = d1-d2, m1 += j, q += j; j >= 0; j--, q--, m1-- ) {
396: n1 = m1+d2; n2 = m1+d2-1;
397: if ( *n1 == v1 ) {
398: qhat = ULBASE - 1;
399: r = (UL)*n1 + (UL)*n2;
400: } else {
401: DSAB(v1,*n1,*n2,qhat,ur)
402: r = (UL)ur;
403: }
404: DM(v2,qhat,u,l)
405: while ( 1 ) {
406: if ((r > (UL)u) || ((r == (UL)u) && (*(n1-2) >= l)))
407: break;
408: if ( l >= v2 )
409: l -= v2;
410: else {
411: l = (unsigned int)((UL)l+(ULBASE-(UL)v2)); u--;
412: }
413: r += v1; qhat--;
414: }
415: if ( qhat ) {
416: u = divn_1(m2,d2,qhat,m1);
417: if ( *(m1+d2) < u ) {
418: for ( i = d2, qhat--, ur = 0, n1 = m1, n2 = m2;
419: i > 0; i--, n1++, n2++ ) {
420: ltmp = (UL)*n1 + (UL)*n2 + (UL)ur;
421: *n1 = (unsigned int)(ltmp & BMASK);
422: ur = (unsigned int)(ltmp >> BSH);
423: }
424: }
425: *n1 = 0;
426: }
427: *q = qhat;
428: }
429: }
430:
431: void divnmain_special(d1,d2,m1,m2,q)
432: int d1,d2;
433: unsigned int *m1,*m2,*q;
434: {
435: int i,j;
436: UL ltmp;
437: unsigned int ur;
438: unsigned int *n1,*n2;
439: unsigned int u,qhat;
440: unsigned int v1,v2;
441:
442: v1 = m2[d2-1]; v2 = 0;
443: for ( j = d1-d2, m1 += j, q += j; j >= 0; j--, q--, m1-- ) {
444: n1 = m1+d2; n2 = m1+d2-1;
445: qhat = ((*n1)<<1)|((*n2)>>31);
446: if ( qhat ) {
447: u = divn_1(m2,d2,qhat,m1);
448: if ( *(m1+d2) < u ) {
449: for ( i = d2, qhat--, ur = 0, n1 = m1, n2 = m2;
450: i > 0; i--, n1++, n2++ ) {
451: ltmp = (UL)*n1 + (UL)*n2 + (UL)ur;
452: *n1 = (unsigned int)(ltmp & BMASK);
453: ur = (unsigned int)(ltmp >> BSH);
454: }
455: }
456: *n1 = 0;
457: }
458: *q = qhat;
459: }
460: }
461:
462: unsigned int divn_1(p,s,d,r)
463: unsigned int *p;
464: int s;
465: unsigned int d;
466: unsigned int *r;
467: {
468: /*
469: unsigned int borrow,l;
470:
471: for ( borrow = 0; s--; p++, r++ ) {
472: DMA(*p,d,borrow,borrow,l)
473: if ( *r >= l )
474: *r -= l;
475: else {
476: *r = (unsigned int)((UL)*r+(ULBASE-(UL)l)); borrow++;
477: }
478: }
479: return borrow;
480: */
481: /* esi : p, edi : r, borrow : ebx, s : ecx */
482: #if defined(VISUAL)
483: __asm {
484: push esi
485: push edi
486: mov esi,p
487: mov edi,r
488: mov ecx,s
489: xor ebx,ebx
490: Lstart_divn:
491: mov eax,DWORD PTR [esi]
492: mul d
493: add eax,ebx
494: adc edx,0
495: sub DWORD PTR [edi],eax
496: adc edx,0
497: mov ebx,edx
498: lea esi,DWORD PTR [esi+4]
499: lea edi,DWORD PTR [edi+4]
500: dec ecx
501: jnz Lstart_divn
502: mov eax,ebx
503: pop edi
504: pop esi
505: }
506: /* return value is in eax. */
507: #else
508: unsigned int borrow;
509:
510: asm volatile("\
511: movl %1,%%esi;\
512: movl %2,%%edi;\
513: movl $0,%%ebx;\
514: Lstart_divn:;\
515: movl (%%esi),%%eax;\
516: mull %3;\
517: addl %%ebx,%%eax;\
518: adcl $0,%%edx;\
519: subl %%eax,(%%edi);\
520: adcl $0,%%edx;\
521: movl %%edx,%%ebx;\
522: leal 4(%%esi),%%esi;\
523: leal 4(%%edi),%%edi;\
524: decl %4;\
525: jnz Lstart_divn;\
526: movl %%ebx,%0"\
527: :"=m"(borrow)\
528: :"m"(p),"m"(r),"m"(d),"m"(s)\
529: :"eax","ebx","edx","esi","edi");
530:
531: return borrow;
532: #endif
533: }
534:
535: #else
536:
537: void muln(n1,n2,nr)
538: N n1,n2,*nr;
539: {
540: unsigned int tmp,carry,mul;
541: unsigned int *p1,*pp,*m1,*m2;
542: int i,j,d1,d2;
543: N r;
544:
545: if ( !n1 || !n2 )
546: *nr = 0;
547: else if ( UNIN(n1) )
548: COPY(n2,*nr);
549: else if ( UNIN(n2) )
550: COPY(n1,*nr);
551: else if ( (PL(n1) == 1) && (PL(n2) == 1) ) {
552: DM(*BD(n1),*BD(n2),carry,tmp)
553: if ( carry ) {
554: *nr = r = NALLOC(2); INITRC(r);
555: PL(r) = 2; p1 = BD(r); *p1++ = tmp; *p1 = carry;
556: } else
557: STON(tmp,*nr);
558: } else {
559: d1 = PL(n1); d2 = PL(n2);
560: *nr = r = NALLOC(d1+d2); INITRC(r);
561: bzero((char *)BD(r),(int)((d1+d2)*sizeof(int)));
562: for ( i = 0, m1 = BD(n1), m2 = BD(n2); i < d2; i++, m2++ )
563: if ( mul = *m2 ) {
564: for ( j = d1, carry = 0, p1 = m1, pp = BD(r)+i;
565: j--; pp++ ) {
566: DMA2(*p1++,mul,*pp,carry,carry,*pp)
567: }
568: *pp = carry;
569: }
570: PL(r) = (carry?d1+d2:d1+d2-1);
571: }
572: }
573:
574: void _muln(n1,n2,nr)
575: N n1,n2,nr;
576: {
577: unsigned int tmp,carry,mul;
578: unsigned int *p1,*pp,*m1,*m2;
579: int i,j,d1,d2;
580:
581: if ( !n1 || !PL(n1) || !n2 || !PL(n2) )
582: PL(nr) = 0;
583: else if ( UNIN(n1) )
584: dupn(n2,nr);
585: else if ( UNIN(n2) )
586: dupn(n1,nr);
587: else {
588: d1 = PL(n1); d2 = PL(n2);
589: bzero((char *)BD(nr),(int)((d1+d2)*sizeof(int)));
590: for ( i = 0, m1 = BD(n1), m2 = BD(n2); i < d2; i++, m2++ )
591: if ( mul = *m2 ) {
592: for ( j = d1, carry = 0, p1 = m1, pp = BD(nr)+i;
593: j--; pp++ ) {
594: DMA2(*p1++,mul,*pp,carry,carry,*pp)
595: }
596: *pp = carry;
597: }
598: PL(nr) = (carry?d1+d2:d1+d2-1);
599: }
600: }
601:
602: /* r[0...s] = p[0...s-1]*d */
603:
604: void muln_1(p,s,d,r)
605: unsigned int *p;
606: int s;
607: unsigned int d;
608: unsigned int *r;
609: {
610: unsigned int carry;
611:
612: for ( carry = 0; s--; p++, r++ ) {
613: DMA2(*p,d,carry,*r,carry,*r)
614: }
615: *r = carry;
616: }
617:
618: void divnmain(d1,d2,m1,m2,q)
619: int d1,d2;
620: unsigned int *m1,*m2,*q;
621: {
622: int i,j;
623: UL r,ltmp;
624: unsigned int l,ur,tmp;
625: unsigned int *n1,*n2;
626: unsigned int u,qhat;
627: unsigned int v1,v2;
628:
629: v1 = m2[d2-1]; v2 = m2[d2-2];
630: for ( j = d1-d2, m1 += j, q += j; j >= 0; j--, q--, m1-- ) {
631: n1 = m1+d2; n2 = m1+d2-1;
632: if ( *n1 == v1 ) {
633: qhat = ULBASE - 1;
634: r = (UL)*n1 + (UL)*n2;
635: } else {
636: DSAB(v1,*n1,*n2,qhat,ur)
637: r = (UL)ur;
638: }
639: DM(v2,qhat,u,l)
640: while ( 1 ) {
641: if ((r > (UL)u) || ((r == (UL)u) && (*(n1-2) >= l)))
642: break;
643: if ( l >= v2 )
644: l -= v2;
645: else {
646: l = (unsigned int)((UL)l+(ULBASE-(UL)v2)); u--;
647: }
648: r += v1; qhat--;
649: }
650: if ( qhat ) {
651: u = divn_1(m2,d2,qhat,m1);
652: if ( *(m1+d2) < u ) {
653: for ( i = d2, qhat--, ur = 0, n1 = m1, n2 = m2;
654: i > 0; i--, n1++, n2++ ) {
655: ltmp = (UL)*n1 + (UL)*n2 + (UL)ur;
656: *n1 = (unsigned int)(ltmp & BMASK);
657: ur = (unsigned int)(ltmp >> BSH);
658: }
659: }
660: *n1 = 0;
661: }
662: *q = qhat;
663: }
664: }
665:
666: unsigned int divn_1(p,s,d,r)
667: unsigned int *p;
668: int s;
669: unsigned int d;
670: unsigned int *r;
671: {
672: unsigned int borrow,l;
673:
674: for ( borrow = 0; s--; p++, r++ ) {
675: DMA(*p,d,borrow,borrow,l)
676: if ( *r >= l )
677: *r -= l;
678: else {
679: *r = (unsigned int)((UL)*r+(ULBASE-(UL)l)); borrow++;
680: }
681: }
682: return borrow;
683: }
684: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>