Annotation of OpenXM_contrib/pari/src/kernel/none/level0.h, Revision 1.1.1.1
1.1 maekawa 1: /* $Id: level0.h,v 1.1.1.1 1999/09/16 13:47:55 karim Exp $ */
2:
3: /* This file defines some "level 0" kernel functions */
4: /* These functions can be inline, with gcc */
5: /* If not gcc, they are defined externally with "level0.c" */
6: /* NB: Those functions are not defined in mp.s */
7:
8: /* level0.c includes this file and never needs to be changed */
9: /* The following seven lines are necessary for level0.c and level1.c */
10: #ifdef LEVEL0
11: #undef INLINE
12: #define INLINE
13: #endif
14: #ifdef LEVEL1
15: #undef INLINE
16: #endif
17:
18: #define LOCAL_OVERFLOW
19: #define SAVE_OVERFLOW
20: #define LOCAL_HIREMAINDER
21: #define SAVE_HIREMAINDER
22:
23: #ifndef INLINE
24: BEGINEXTERN
25: extern ulong overflow;
26: extern ulong hiremainder;
27: extern long addll(ulong x, ulong y);
28: extern long addllx(ulong x, ulong y);
29: extern long subll(ulong x, ulong y);
30: extern long subllx(ulong x, ulong y);
31: extern long shiftl(ulong x, ulong y);
32: extern long shiftlr(ulong x, ulong y);
33: extern long mulll(ulong x, ulong y);
34: extern long addmul(ulong x, ulong y);
35: extern long divll(ulong x, ulong y);
36: extern int bfffo(ulong x);
37: ENDEXTERN
38:
39: #else
40:
41: ulong overflow;
42: ulong hiremainder;
43:
44: INLINE long
45: addll(ulong x, ulong y)
46: {
47: const ulong z = x+y;
48: overflow=(z<x);
49: return (long) z;
50: }
51:
52: INLINE long
53: addllx(ulong x, ulong y)
54: {
55: const ulong z = x+y+overflow;
56: overflow = (z<x || (z==x && overflow));
57: return (long) z;
58: }
59:
60: INLINE long
61: subll(ulong x, ulong y)
62: {
63: const ulong z = x-y;
64: overflow = (z>x);
65: return (long) z;
66: }
67:
68: INLINE long
69: subllx(ulong x, ulong y)
70: {
71: const ulong z = x-y-overflow;
72: overflow = (z>x || (z==x && overflow));
73: return (long) z;
74: }
75:
76: INLINE long
77: shiftl(ulong x, ulong y)
78: {
79: hiremainder=x>>(BITS_IN_LONG-y);
80: return (x<<y);
81: }
82:
83: INLINE long
84: shiftlr(ulong x, ulong y)
85: {
86: hiremainder=x<<(BITS_IN_LONG-y);
87: return (x>>y);
88: }
89:
90: #define HIGHWORD(a) ((a) >> BITS_IN_HALFULONG)
91: #define LOWWORD(a) ((a) & LOWMASK)
92:
93: /* Version Peter Montgomery */
94: /*
95: * Assume (for presentation) that BITS_IN_LONG = 32.
96: * Then 0 <= xhi, xlo, yhi, ylo <= 2^16 - 1. Hence
97: *
98: * -2^31 + 2^16 <= (xhi-2^15)*(ylo-2^15) + (xlo-2^15)*(yhi-2^15) <= 2^31.
99: *
100: * If xhi*ylo + xlo*yhi = 2^32*overflow + xymid, then
101: *
102: * -2^32 + 2^16 <= 2^32*overflow + xymid - 2^15*(xhi + ylo + xlo + yhi) <= 0.
103: *
104: * 2^16*overflow <= (xhi+xlo+yhi+ylo)/2 - xymid/2^16 <= 2^16*overflow + 2^16-1
105: *
106: * This inequality was derived using exact (rational) arithmetic;
107: * it remains valid when we truncate the two middle terms.
108: */
109: INLINE long
110: mulll(ulong x, ulong y)
111: {
112: const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x);
113: const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y);
114: ulong xylo,xymid,xyhi,xymidhi,xymidlo;
115:
116: xylo = xlo*ylo; xyhi = xhi*yhi;
117: xymid = (xhi+xlo)*(yhi+ylo) - (xyhi+xylo);
118:
119: xymidhi = HIGHWORD(xymid);
120: xymidlo = xymid << BITS_IN_HALFULONG;
121:
122: xylo += xymidlo;
123: hiremainder = xyhi + xymidhi + (xylo < xymidlo)
124: + (((((xhi+xlo) + (yhi+ylo)) >> 1) - xymidhi) & HIGHMASK);
125:
126: return xylo;
127: }
128:
129: INLINE long
130: addmul(ulong x, ulong y)
131: {
132: const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x);
133: const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y);
134: ulong xylo,xymid,xyhi,xymidhi,xymidlo;
135:
136: xylo = xlo*ylo; xyhi = xhi*yhi;
137: xymid = (xhi+xlo)*(yhi+ylo) - (xyhi+xylo);
138:
139: xylo += hiremainder; xyhi += (xylo < hiremainder);
140:
141: xymidhi = HIGHWORD(xymid);
142: xymidlo = xymid << BITS_IN_HALFULONG;
143:
144: xylo += xymidlo;
145: hiremainder = xyhi + xymidhi + (xylo < xymidlo)
146: + (((((xhi+xlo) + (yhi+ylo)) >> 1) - xymidhi) & HIGHMASK);
147:
148: return xylo;
149: }
150:
151: /* version Peter Montgomery */
152:
153: INLINE int
154: bfffo(ulong x)
155: {
156: int sc;
157: static int tabshi[16]={4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};
158:
159: sc = BITS_IN_LONG - 4;
160: #ifdef LONG_IS_64BIT
161: if (x & 0xffffffff00000000) {sc -= 32; x >>= 32;}
162: #endif
163: if (x > 0xffffUL) {sc -= 16; x >>= 16;}
164: if (x > 0x00ffUL) {sc -= 8; x >>= 8;}
165: if (x > 0x000fUL) {sc -= 4; x >>= 4;}
166: return sc + tabshi[x];
167: }
168:
169: /* Version Peter Montgomery */
170:
171: INLINE long
172: divll(ulong x, ulong y)
173: {
174: #define GLUE(hi, lo) (((hi) << BITS_IN_HALFULONG) + (lo))
175: #define SPLIT(a, b, c) b = HIGHWORD(a); c = LOWWORD(a)
176:
177: ulong v1, v2, u3, u4, q1, q2, aux, aux1, aux2;
178: int k;
179: if (hiremainder >= y) err(talker, "Invalid arguments to divll");
180: if (hiremainder == 0)
181: { /* Only one division needed */
182: hiremainder = x % y;
183: return x/y;
184: }
185: if (y <= LOWMASK)
186: { /* Use two half-word divisions */
187: hiremainder = GLUE(hiremainder, HIGHWORD(x));
188: q1 = hiremainder / y;
189: hiremainder = GLUE(hiremainder % y, LOWWORD(x));
190: q2 = hiremainder / y;
191: hiremainder = hiremainder % y;
192: return GLUE(q1, q2);
193: }
194: if (y & HIGHBIT) k = 0;
195: else
196: {
197: k = bfffo(y);
198: hiremainder = (hiremainder << k) + (x >> (BITS_IN_LONG - k));
199: x <<= k; y <<= k;
200: }
201: SPLIT(y, v1, v2);
202: SPLIT(x, u3, u4);
203: q1 = hiremainder / v1; if (q1 > LOWMASK) q1 = LOWMASK;
204: hiremainder -= q1 * v1;
205: aux = v2 * q1;
206: again:
207: SPLIT(aux, aux1, aux2);
208: if (aux2 > u3) aux1++;
209: if (aux1 > hiremainder) {q1--; hiremainder += v1; aux -= v2; goto again;}
210:
211: hiremainder = GLUE(hiremainder - aux1, LOWWORD(u3 - aux2));
212: q2 = hiremainder / v1; if (q2 > LOWMASK) q2 = LOWMASK;
213: hiremainder -= q2 * v1;
214: aux = v2 * q2;
215: again2:
216: SPLIT(aux, aux1, aux2);
217: if (aux2 > u4) aux1++;
218: if (aux1 > hiremainder) {q2--; hiremainder += v1; aux -= v2; goto again2;}
219: hiremainder = GLUE(hiremainder - aux1, LOWWORD(u4 - aux2)) >> k;
220: return GLUE(q1, q2);
221: }
222:
223: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>