Annotation of OpenXM_contrib/pari-2.2/src/kernel/none/level0.h, Revision 1.1.1.1
1.1 noro 1: /* $Id: level0.h,v 1.3 2000/11/03 21:00:26 karim Exp $
2:
3: Copyright (C) 2000 The PARI group.
4:
5: This file is part of the PARI/GP package.
6:
7: PARI/GP is free software; you can redistribute it and/or modify it under the
8: terms of the GNU General Public License as published by the Free Software
9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10: ANY WARRANTY WHATSOEVER.
11:
12: Check the License for details. You should have received a copy of it, along
13: with the package; see the file 'COPYING'. If not, write to the Free Software
14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15:
16: /* This file defines some "level 0" kernel functions */
17: /* These functions can be inline, with gcc */
18: /* If not gcc, they are defined externally with "level0.c" */
19: /* NB: Those functions are not defined in mp.s */
20:
21: /* level0.c includes this file and never needs to be changed */
22: /* The following seven lines are necessary for level0.c and level1.c */
23: #ifdef LEVEL0
24: #undef INLINE
25: #define INLINE
26: #endif
27: #ifdef LEVEL1
28: #undef INLINE
29: #endif
30:
31: #define LOCAL_OVERFLOW
32: #define SAVE_OVERFLOW
33: #define LOCAL_HIREMAINDER
34: #define SAVE_HIREMAINDER
35:
36: #ifndef INLINE
37: BEGINEXTERN
38: extern ulong overflow, hiremainder;
39: extern long addll(ulong x, ulong y);
40: extern long addllx(ulong x, ulong y);
41: extern long subll(ulong x, ulong y);
42: extern long subllx(ulong x, ulong y);
43: extern long shiftl(ulong x, ulong y);
44: extern long shiftlr(ulong x, ulong y);
45: extern long mulll(ulong x, ulong y);
46: extern long addmul(ulong x, ulong y);
47: extern long divll(ulong x, ulong y);
48: extern int bfffo(ulong x);
49: ENDEXTERN
50:
51: #else
52:
53: ulong overflow;
54: ulong hiremainder;
55:
56: INLINE long
57: addll(ulong x, ulong y)
58: {
59: const ulong z = x+y;
60: overflow=(z<x);
61: return (long) z;
62: }
63:
64: INLINE long
65: addllx(ulong x, ulong y)
66: {
67: const ulong z = x+y+overflow;
68: overflow = (z<x || (z==x && overflow));
69: return (long) z;
70: }
71:
72: INLINE long
73: subll(ulong x, ulong y)
74: {
75: const ulong z = x-y;
76: overflow = (z>x);
77: return (long) z;
78: }
79:
80: INLINE long
81: subllx(ulong x, ulong y)
82: {
83: const ulong z = x-y-overflow;
84: overflow = (z>x || (z==x && overflow));
85: return (long) z;
86: }
87:
88: INLINE long
89: shiftl(ulong x, ulong y)
90: {
91: hiremainder=x>>(BITS_IN_LONG-y);
92: return (x<<y);
93: }
94:
95: INLINE long
96: shiftlr(ulong x, ulong y)
97: {
98: hiremainder=x<<(BITS_IN_LONG-y);
99: return (x>>y);
100: }
101:
102: #define HIGHWORD(a) ((a) >> BITS_IN_HALFULONG)
103: #define LOWWORD(a) ((a) & LOWMASK)
104:
105: /* Version Peter Montgomery */
106: /*
107: * Assume (for presentation) that BITS_IN_LONG = 32.
108: * Then 0 <= xhi, xlo, yhi, ylo <= 2^16 - 1. Hence
109: *
110: * -2^31 + 2^16 <= (xhi-2^15)*(ylo-2^15) + (xlo-2^15)*(yhi-2^15) <= 2^31.
111: *
112: * If xhi*ylo + xlo*yhi = 2^32*overflow + xymid, then
113: *
114: * -2^32 + 2^16 <= 2^32*overflow + xymid - 2^15*(xhi + ylo + xlo + yhi) <= 0.
115: *
116: * 2^16*overflow <= (xhi+xlo+yhi+ylo)/2 - xymid/2^16 <= 2^16*overflow + 2^16-1
117: *
118: * This inequality was derived using exact (rational) arithmetic;
119: * it remains valid when we truncate the two middle terms.
120: */
121: INLINE long
122: mulll(ulong x, ulong y)
123: {
124: const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x);
125: const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y);
126: ulong xylo,xymid,xyhi,xymidhi,xymidlo;
127:
128: xylo = xlo*ylo; xyhi = xhi*yhi;
129: xymid = (xhi+xlo)*(yhi+ylo) - (xyhi+xylo);
130:
131: xymidhi = HIGHWORD(xymid);
132: xymidlo = xymid << BITS_IN_HALFULONG;
133:
134: xylo += xymidlo;
135: hiremainder = xyhi + xymidhi + (xylo < xymidlo)
136: + (((((xhi+xlo) + (yhi+ylo)) >> 1) - xymidhi) & HIGHMASK);
137:
138: return xylo;
139: }
140:
141: INLINE long
142: addmul(ulong x, ulong y)
143: {
144: const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x);
145: const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y);
146: ulong xylo,xymid,xyhi,xymidhi,xymidlo;
147:
148: xylo = xlo*ylo; xyhi = xhi*yhi;
149: xymid = (xhi+xlo)*(yhi+ylo) - (xyhi+xylo);
150:
151: xylo += hiremainder; xyhi += (xylo < hiremainder);
152:
153: xymidhi = HIGHWORD(xymid);
154: xymidlo = xymid << BITS_IN_HALFULONG;
155:
156: xylo += xymidlo;
157: hiremainder = xyhi + xymidhi + (xylo < xymidlo)
158: + (((((xhi+xlo) + (yhi+ylo)) >> 1) - xymidhi) & HIGHMASK);
159:
160: return xylo;
161: }
162:
163: /* version Peter Montgomery */
164:
165: INLINE int
166: bfffo(ulong x)
167: {
168: int sc;
169: static int tabshi[16]={4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};
170:
171: sc = BITS_IN_LONG - 4;
172: #ifdef LONG_IS_64BIT
173: if (x & 0xffffffff00000000) {sc -= 32; x >>= 32;}
174: #endif
175: if (x > 0xffffUL) {sc -= 16; x >>= 16;}
176: if (x > 0x00ffUL) {sc -= 8; x >>= 8;}
177: if (x > 0x000fUL) {sc -= 4; x >>= 4;}
178: return sc + tabshi[x];
179: }
180:
181: /* Version Peter Montgomery */
182:
183: INLINE long
184: divll(ulong x, ulong y)
185: {
186: #define GLUE(hi, lo) (((hi) << BITS_IN_HALFULONG) + (lo))
187: #define SPLIT(a, b, c) b = HIGHWORD(a); c = LOWWORD(a)
188:
189: ulong v1, v2, u3, u4, q1, q2, aux, aux1, aux2;
190: int k;
191: if (hiremainder >= y) err(talker, "Invalid arguments to divll");
192: if (hiremainder == 0)
193: { /* Only one division needed */
194: hiremainder = x % y;
195: return x/y;
196: }
197: if (y <= LOWMASK)
198: { /* Use two half-word divisions */
199: hiremainder = GLUE(hiremainder, HIGHWORD(x));
200: q1 = hiremainder / y;
201: hiremainder = GLUE(hiremainder % y, LOWWORD(x));
202: q2 = hiremainder / y;
203: hiremainder = hiremainder % y;
204: return GLUE(q1, q2);
205: }
206: if (y & HIGHBIT) k = 0;
207: else
208: {
209: k = bfffo(y);
210: hiremainder = (hiremainder << k) + (x >> (BITS_IN_LONG - k));
211: x <<= k; y <<= k;
212: }
213: SPLIT(y, v1, v2);
214: SPLIT(x, u3, u4);
215: q1 = hiremainder / v1; if (q1 > LOWMASK) q1 = LOWMASK;
216: hiremainder -= q1 * v1;
217: aux = v2 * q1;
218: again:
219: SPLIT(aux, aux1, aux2);
220: if (aux2 > u3) aux1++;
221: if (aux1 > hiremainder) {q1--; hiremainder += v1; aux -= v2; goto again;}
222:
223: hiremainder = GLUE(hiremainder - aux1, LOWWORD(u3 - aux2));
224: q2 = hiremainder / v1; if (q2 > LOWMASK) q2 = LOWMASK;
225: hiremainder -= q2 * v1;
226: aux = v2 * q2;
227: again2:
228: SPLIT(aux, aux1, aux2);
229: if (aux2 > u4) aux1++;
230: if (aux1 > hiremainder) {q2--; hiremainder += v1; aux -= v2; goto again2;}
231: hiremainder = GLUE(hiremainder - aux1, LOWWORD(u4 - aux2)) >> k;
232: return GLUE(q1, q2);
233: }
234:
235: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>