Annotation of OpenXM_contrib/gmp/longlong.h, Revision 1.1.1.3
1.1 maekawa 1: /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2:
1.1.1.3 ! ohara 3: Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002 Free
! 4: Software Foundation, Inc.
1.1 maekawa 5:
6: This file is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 7: it under the terms of the GNU Lesser General Public License as published by
8: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 9: option) any later version.
10:
11: This file is distributed in the hope that it will be useful, but
12: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 13: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 14: License for more details.
15:
1.1.1.2 maekawa 16: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 17: along with this file; see the file COPYING.LIB. If not, write to
18: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
19: MA 02111-1307, USA. */
20:
21: /* You have to define the following before including this file:
22:
23: UWtype -- An unsigned type, default type for operations (typically a "word")
24: UHWtype -- An unsigned type, at least half the size of UWtype.
25: UDWtype -- An unsigned type, at least twice as large a UWtype
26: W_TYPE_SIZE -- size in bits of UWtype
27:
28: SItype, USItype -- Signed and unsigned 32 bit types.
29: DItype, UDItype -- Signed and unsigned 64 bit types.
30:
31: On a 32 bit machine UWtype should typically be USItype;
32: on a 64 bit machine, UWtype should typically be UDItype.
33: */
34:
35: #define __BITS4 (W_TYPE_SIZE / 4)
36: #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
37: #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
38: #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
39:
40: /* This is used to make sure no undesirable sharing between different libraries
41: that use this file takes place. */
42: #ifndef __MPN
43: #define __MPN(x) __##x
44: #endif
45:
1.1.1.2 maekawa 46: #ifndef _PROTO
47: #if (__STDC__-0) || defined (__cplusplus)
48: #define _PROTO(x) x
49: #else
50: #define _PROTO(x) ()
51: #endif
52: #endif
53:
1.1 maekawa 54: /* Define auxiliary asm macros.
55:
56: 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
57: UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
58: word product in HIGH_PROD and LOW_PROD.
59:
60: 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
61: UDWtype product. This is just a variant of umul_ppmm.
62:
63: 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
64: denominator) divides a UDWtype, composed by the UWtype integers
65: HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
66: in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
67: than DENOMINATOR for correct operation. If, in addition, the most
68: significant bit of DENOMINATOR must be 1, then the pre-processor symbol
69: UDIV_NEEDS_NORMALIZATION is defined to 1.
70:
71: 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72: denominator). Like udiv_qrnnd but the numbers are signed. The quotient
73: is rounded towards 0.
74:
75: 5) count_leading_zeros(count, x) counts the number of zero-bits from the
76: msb to the first non-zero bit in the UWtype X. This is the number of
77: steps X needs to be shifted left to set the msb. Undefined for X == 0,
78: unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
79:
80: 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
81: from the least significant end.
82:
83: 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
84: high_addend_2, low_addend_2) adds two UWtype integers, composed by
85: HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
86: respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
87: (i.e. carry out) is not stored anywhere, and is lost.
88:
89: 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
90: high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
91: composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
92: LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
93: and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
94: and is lost.
95:
96: If any of these macros are left undefined for a particular CPU,
97: C macros are used. */
98:
99: /* The CPUs come in alphabetical order below.
100:
101: Please add support for more CPUs here, or improve the current support
102: for the CPUs below! */
103:
1.1.1.3 ! ohara 104: /* FIXME: The macros using external routines like __MPN(count_leading_zeros)
! 105: don't need to be under !NO_ASM */
! 106: #if ! defined (NO_ASM)
! 107:
1.1.1.2 maekawa 108: #if defined (__alpha) && W_TYPE_SIZE == 64
1.1.1.3 ! ohara 109: /* Most alpha-based machines, except Cray systems. */
1.1.1.2 maekawa 110: #if defined (__GNUC__)
111: #define umul_ppmm(ph, pl, m0, m1) \
112: do { \
113: UDItype __m0 = (m0), __m1 = (m1); \
114: __asm__ ("umulh %r1,%2,%0" \
115: : "=r" (ph) \
116: : "%rJ" (m0), "rI" (m1)); \
117: (pl) = __m0 * __m1; \
118: } while (0)
119: #define UMUL_TIME 18
1.1.1.3 ! ohara 120: #else /* ! __GNUC__ */
! 121: #include <machine/builtins.h>
! 122: #define umul_ppmm(ph, pl, m0, m1) \
! 123: do { \
! 124: UDItype __m0 = (m0), __m1 = (m1); \
! 125: (ph) = __UMULH (m0, m1); \
! 126: (pl) = __m0 * __m1; \
! 127: } while (0)
! 128: #endif
1.1.1.2 maekawa 129: #ifndef LONGLONG_STANDALONE
130: #define udiv_qrnnd(q, r, n1, n0, d) \
1.1.1.3 ! ohara 131: do { UWtype __di; \
1.1.1.2 maekawa 132: __di = __MPN(invert_limb) (d); \
133: udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
134: } while (0)
1.1.1.3 ! ohara 135: #define UDIV_PREINV_ALWAYS 1
1.1.1.2 maekawa 136: #define UDIV_NEEDS_NORMALIZATION 1
137: #define UDIV_TIME 220
1.1.1.3 ! ohara 138: #endif /* LONGLONG_STANDALONE */
! 139: /* clz_tab is required by mpn/alpha/cntlz.asm, and that file is built for
! 140: all alphas, even though ev67 and ev68 don't need it. */
! 141: #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
! 142: #if defined (__GNUC__) && (HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68)
! 143: #define count_leading_zeros(COUNT,X) \
! 144: __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
! 145: #define count_trailing_zeros(COUNT,X) \
! 146: __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
! 147: #else /* ! (ev67 || ev68) */
! 148: #ifndef LONGLONG_STANDALONE
! 149: #if HAVE_ATTRIBUTE_CONST
! 150: long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const));
! 151: #else
! 152: long __MPN(count_leading_zeros) _PROTO ((UDItype));
! 153: #endif
1.1.1.2 maekawa 154: #define count_leading_zeros(count, x) \
155: ((count) = __MPN(count_leading_zeros) (x))
156: #endif /* LONGLONG_STANDALONE */
1.1.1.3 ! ohara 157: #endif /* ! (ev67 || ev68) */
! 158: #endif /* __alpha */
! 159:
! 160: #if defined (_CRAY) && W_TYPE_SIZE == 64
! 161: #include <intrinsics.h>
! 162: #define UDIV_PREINV_ALWAYS 1
! 163: #define UDIV_NEEDS_NORMALIZATION 1
! 164: #define UDIV_TIME 220
! 165: long __MPN(count_leading_zeros) _PROTO ((UDItype));
! 166: #define count_leading_zeros(count, x) \
! 167: ((count) = _leadz ((UWtype) (x)))
! 168: #if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */
1.1.1.2 maekawa 169: #define umul_ppmm(ph, pl, m0, m1) \
170: do { \
171: UDItype __m0 = (m0), __m1 = (m1); \
1.1.1.3 ! ohara 172: (ph) = _int_mult_upper (m0, m1); \
1.1.1.2 maekawa 173: (pl) = __m0 * __m1; \
174: } while (0)
1.1.1.3 ! ohara 175: #ifndef LONGLONG_STANDALONE
! 176: #define udiv_qrnnd(q, r, n1, n0, d) \
! 177: do { UWtype __di; \
! 178: __di = __MPN(invert_limb) (d); \
! 179: udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
! 180: } while (0)
! 181: #endif /* LONGLONG_STANDALONE */
! 182: #endif /* _CRAYIEEE */
! 183: #endif /* _CRAY */
1.1.1.2 maekawa 184:
185: #if defined (__hppa) && W_TYPE_SIZE == 64
1.1.1.3 ! ohara 186: #if defined (__GNUC__)
! 187: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! 188: __asm__ ("add %4,%5,%1\n\tadd,dc %2,%3,%0" \
! 189: : "=r" (sh), "=&r" (sl) \
! 190: : "%rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
! 191: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! 192: __asm__ ("sub %4,%5,%1\n\tsub,db %2,%3,%0" \
! 193: : "=r" (sh), "=&r" (sl) \
! 194: : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
! 195: #endif
1.1.1.2 maekawa 196: /* We put the result pointer parameter last here, since it makes passing
197: of the other parameters more efficient. */
198: #ifndef LONGLONG_STANDALONE
199: #define umul_ppmm(wh, wl, u, v) \
200: do { \
1.1.1.3 ! ohara 201: UWtype __p0; \
1.1.1.2 maekawa 202: (wh) = __MPN(umul_ppmm) (u, v, &__p0); \
203: (wl) = __p0; \
204: } while (0)
1.1.1.3 ! ohara 205: extern UWtype __MPN(umul_ppmm) _PROTO ((UWtype, UWtype, UWtype *));
1.1.1.2 maekawa 206: #define udiv_qrnnd(q, r, n1, n0, d) \
1.1.1.3 ! ohara 207: do { UWtype __r; \
1.1.1.2 maekawa 208: (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \
209: (r) = __r; \
210: } while (0)
1.1.1.3 ! ohara 211: extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype, UWtype, UWtype, UWtype *));
1.1.1.2 maekawa 212: #define UMUL_TIME 8
213: #define UDIV_TIME 60
214: #endif /* LONGLONG_STANDALONE */
215: #endif /* hppa */
216:
217: #if defined (__ia64) && W_TYPE_SIZE == 64
218: #if defined (__GNUC__)
219: #define umul_ppmm(ph, pl, m0, m1) \
220: do { \
221: UDItype __m0 = (m0), __m1 = (m1); \
222: __asm__ ("xma.hu %0 = %1, %2, f0" \
1.1.1.3 ! ohara 223: : "=f" (ph) \
! 224: : "f" (m0), "f" (m1)); \
1.1.1.2 maekawa 225: (pl) = __m0 * __m1; \
226: } while (0)
1.1.1.3 ! ohara 227: #define UMUL_TIME 14
! 228: #define count_leading_zeros(count, x) \
! 229: do { \
! 230: UWtype _x = (x), _y, _a, _c; \
! 231: __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
! 232: __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
! 233: _c = (_a - 1) << 3; \
! 234: _x >>= _c; \
! 235: if (_x >= 1 << 4) \
! 236: _x >>= 4, _c += 4; \
! 237: if (_x >= 1 << 2) \
! 238: _x >>= 2, _c += 2; \
! 239: _c += _x >> 1; \
! 240: (count) = W_TYPE_SIZE - 1 - _c; \
! 241: } while (0)
! 242: #endif
! 243: #ifndef LONGLONG_STANDALONE
! 244: #define udiv_qrnnd(q, r, n1, n0, d) \
! 245: do { UWtype __di; \
! 246: __di = __MPN(invert_limb) (d); \
! 247: udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
! 248: } while (0)
! 249: #define UDIV_PREINV_ALWAYS 1
! 250: #define UDIV_NEEDS_NORMALIZATION 1
1.1.1.2 maekawa 251: #endif
1.1.1.3 ! ohara 252: #define UDIV_TIME 220
1.1.1.2 maekawa 253: #endif
254:
255:
1.1.1.3 ! ohara 256: #if defined (__GNUC__)
1.1 maekawa 257:
258: /* We sometimes need to clobber "cc" with gcc2, but that would not be
259: understood by gcc1. Use cpp to avoid major code duplication. */
260: #if __GNUC__ < 2
261: #define __CLOBBER_CC
262: #define __AND_CLOBBER_CC
263: #else /* __GNUC__ >= 2 */
264: #define __CLOBBER_CC : "cc"
265: #define __AND_CLOBBER_CC , "cc"
266: #endif /* __GNUC__ < 2 */
267:
268: #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
269: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 270: __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \
271: : "=r" (sh), "=&r" (sl) \
272: : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
1.1 maekawa 273: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 274: __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \
1.1.1.3 ! ohara 275: : "=r" (sh), "=&r" (sl) \
1.1.1.2 maekawa 276: : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
1.1 maekawa 277: #define umul_ppmm(xh, xl, m0, m1) \
278: do { \
279: USItype __m0 = (m0), __m1 = (m1); \
280: __asm__ ("multiplu %0,%1,%2" \
1.1.1.2 maekawa 281: : "=r" (xl) \
282: : "r" (__m0), "r" (__m1)); \
1.1 maekawa 283: __asm__ ("multmu %0,%1,%2" \
1.1.1.2 maekawa 284: : "=r" (xh) \
285: : "r" (__m0), "r" (__m1)); \
1.1 maekawa 286: } while (0)
287: #define udiv_qrnnd(q, r, n1, n0, d) \
288: __asm__ ("dividu %0,%3,%4" \
1.1.1.2 maekawa 289: : "=r" (q), "=q" (r) \
290: : "1" (n1), "r" (n0), "r" (d))
1.1 maekawa 291: #define count_leading_zeros(count, x) \
292: __asm__ ("clz %0,%1" \
1.1.1.2 maekawa 293: : "=r" (count) \
294: : "r" (x))
1.1 maekawa 295: #define COUNT_LEADING_ZEROS_0 32
296: #endif /* __a29k__ */
297:
1.1.1.3 ! ohara 298: #if defined (__arc__)
! 299: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! 300: __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \
! 301: : "=r" ((USItype) (sh)), \
! 302: "=&r" ((USItype) (sl)) \
! 303: : "%r" ((USItype) (ah)), \
! 304: "rIJ" ((USItype) (bh)), \
! 305: "%r" ((USItype) (al)), \
! 306: "rIJ" ((USItype) (bl)))
! 307: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! 308: __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
! 309: : "=r" ((USItype) (sh)), \
! 310: "=&r" ((USItype) (sl)) \
! 311: : "r" ((USItype) (ah)), \
! 312: "rIJ" ((USItype) (bh)), \
! 313: "r" ((USItype) (al)), \
! 314: "rIJ" ((USItype) (bl)))
! 315: #endif
! 316:
1.1 maekawa 317: #if defined (__arm__) && W_TYPE_SIZE == 32
318: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 319: __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
320: : "=r" (sh), "=&r" (sl) \
1.1.1.3 ! ohara 321: : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
1.1 maekawa 322: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.3 ! ohara 323: do { \
! 324: if (__builtin_constant_p (al)) \
! 325: { \
! 326: if (__builtin_constant_p (ah)) \
! 327: __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
! 328: : "=r" (sh), "=&r" (sl) \
! 329: : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
! 330: else \
! 331: __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
! 332: : "=r" (sh), "=&r" (sl) \
! 333: : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
! 334: } \
! 335: else if (__builtin_constant_p (ah)) \
! 336: { \
! 337: if (__builtin_constant_p (bl)) \
! 338: __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
! 339: : "=r" (sh), "=&r" (sl) \
! 340: : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
! 341: else \
! 342: __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
! 343: : "=r" (sh), "=&r" (sl) \
! 344: : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
! 345: } \
! 346: else if (__builtin_constant_p (bl)) \
! 347: { \
! 348: if (__builtin_constant_p (bh)) \
! 349: __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
! 350: : "=r" (sh), "=&r" (sl) \
! 351: : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
! 352: else \
! 353: __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
! 354: : "=r" (sh), "=&r" (sl) \
! 355: : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
! 356: } \
! 357: else /* only bh might be a constant */ \
! 358: __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
! 359: : "=r" (sh), "=&r" (sl) \
! 360: : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
! 361: } while (0)
! 362: #if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
1.1.1.2 maekawa 363: #define umul_ppmm(xh, xl, a, b) \
364: __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
1.1.1.3 ! ohara 365: #define UMUL_TIME 5
1.1.1.2 maekawa 366: #define smul_ppmm(xh, xl, a, b) \
367: __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
1.1.1.3 ! ohara 368: #ifndef LONGLONG_STANDALONE
! 369: #define udiv_qrnnd(q, r, n1, n0, d) \
! 370: do { UWtype __di; \
! 371: __di = __MPN(invert_limb) (d); \
! 372: udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
! 373: } while (0)
! 374: #define UDIV_PREINV_ALWAYS 1
! 375: #define UDIV_NEEDS_NORMALIZATION 1
! 376: #define UDIV_TIME 70
! 377: #endif /* LONGLONG_STANDALONE */
1.1.1.2 maekawa 378: #else
1.1 maekawa 379: #define umul_ppmm(xh, xl, a, b) \
1.1.1.3 ! ohara 380: __asm__ ("%@ Inlined umul_ppmm\n" \
! 381: " mov %|r0, %2, lsr #16\n" \
! 382: " mov %|r2, %3, lsr #16\n" \
! 383: " bic %|r1, %2, %|r0, lsl #16\n" \
! 384: " bic %|r2, %3, %|r2, lsl #16\n" \
! 385: " mul %1, %|r1, %|r2\n" \
! 386: " mul %|r2, %|r0, %|r2\n" \
! 387: " mul %|r1, %0, %|r1\n" \
! 388: " mul %0, %|r0, %0\n" \
! 389: " adds %|r1, %|r2, %|r1\n" \
! 390: " addcs %0, %0, #65536\n" \
! 391: " adds %1, %1, %|r1, lsl #16\n" \
! 392: " adc %0, %0, %|r1, lsr #16" \
1.1.1.2 maekawa 393: : "=&r" (xh), "=r" (xl) \
394: : "r" (a), "r" (b) \
1.1 maekawa 395: : "r0", "r1", "r2")
396: #define UMUL_TIME 20
1.1.1.3 ! ohara 397: #ifndef LONGLONG_STANDALONE
! 398: #define udiv_qrnnd(q, r, n1, n0, d) \
! 399: do { UWtype __r; \
! 400: (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
! 401: (r) = __r; \
! 402: } while (0)
! 403: extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
! 404: #define UDIV_TIME 200
! 405: #endif /* LONGLONG_STANDALONE */
1.1.1.2 maekawa 406: #endif
1.1 maekawa 407: #endif /* __arm__ */
408:
409: #if defined (__clipper__) && W_TYPE_SIZE == 32
410: #define umul_ppmm(w1, w0, u, v) \
411: ({union {UDItype __ll; \
412: struct {USItype __l, __h;} __i; \
1.1.1.2 maekawa 413: } __x; \
1.1 maekawa 414: __asm__ ("mulwux %2,%0" \
1.1.1.2 maekawa 415: : "=r" (__x.__ll) \
416: : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
417: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 418: #define smul_ppmm(w1, w0, u, v) \
419: ({union {DItype __ll; \
420: struct {SItype __l, __h;} __i; \
1.1.1.2 maekawa 421: } __x; \
1.1 maekawa 422: __asm__ ("mulwx %2,%0" \
1.1.1.2 maekawa 423: : "=r" (__x.__ll) \
424: : "%0" ((SItype)(u)), "r" ((SItype)(v))); \
425: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 426: #define __umulsidi3(u, v) \
427: ({UDItype __w; \
428: __asm__ ("mulwux %2,%0" \
1.1.1.2 maekawa 429: : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
1.1 maekawa 430: __w; })
431: #endif /* __clipper__ */
432:
1.1.1.2 maekawa 433: /* Fujitsu vector computers. */
434: #if defined (__uxp__) && W_TYPE_SIZE == 32
435: #define umul_ppmm(ph, pl, u, v) \
436: do { \
437: union {UDItype __ll; \
438: struct {USItype __h, __l;} __i; \
439: } __x; \
440: __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\
441: (ph) = __x.__i.__h; \
442: (pl) = __x.__i.__l; \
443: } while (0)
444: #define smul_ppmm(ph, pl, u, v) \
445: do { \
446: union {UDItype __ll; \
447: struct {USItype __h, __l;} __i; \
448: } __x; \
449: __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \
450: (ph) = __x.__i.__h; \
451: (pl) = __x.__i.__l; \
452: } while (0)
453: #endif
454:
1.1 maekawa 455: #if defined (__gmicro__) && W_TYPE_SIZE == 32
456: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 457: __asm__ ("add.w %5,%1\n\taddx %3,%0" \
458: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
459: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
460: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 461: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 462: __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \
463: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
464: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
465: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 466: #define umul_ppmm(ph, pl, m0, m1) \
467: __asm__ ("mulx %3,%0,%1" \
1.1.1.2 maekawa 468: : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \
469: : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
1.1 maekawa 470: #define udiv_qrnnd(q, r, nh, nl, d) \
471: __asm__ ("divx %4,%0,%1" \
1.1.1.2 maekawa 472: : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \
473: : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
1.1 maekawa 474: #define count_leading_zeros(count, x) \
475: __asm__ ("bsch/1 %1,%0" \
1.1.1.2 maekawa 476: : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
1.1 maekawa 477: #endif
478:
479: #if defined (__hppa) && W_TYPE_SIZE == 32
480: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 481: __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
482: : "=r" (sh), "=&r" (sl) \
483: : "%rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
1.1 maekawa 484: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 485: __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
486: : "=r" (sh), "=&r" (sl) \
487: : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
1.1 maekawa 488: #if defined (_PA_RISC1_1)
489: #define umul_ppmm(wh, wl, u, v) \
490: do { \
491: union {UDItype __ll; \
492: struct {USItype __h, __l;} __i; \
1.1.1.2 maekawa 493: } __x; \
494: __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \
495: (wh) = __x.__i.__h; \
496: (wl) = __x.__i.__l; \
1.1 maekawa 497: } while (0)
498: #define UMUL_TIME 8
499: #define UDIV_TIME 60
500: #else
501: #define UMUL_TIME 40
502: #define UDIV_TIME 80
503: #endif
504: #ifndef LONGLONG_STANDALONE
505: #define udiv_qrnnd(q, r, n1, n0, d) \
1.1.1.3 ! ohara 506: do { UWtype __r; \
1.1.1.2 maekawa 507: (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
1.1 maekawa 508: (r) = __r; \
509: } while (0)
1.1.1.3 ! ohara 510: extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
1.1 maekawa 511: #endif /* LONGLONG_STANDALONE */
512: #define count_leading_zeros(count, x) \
513: do { \
514: USItype __tmp; \
515: __asm__ ( \
1.1.1.3 ! ohara 516: "ldi 1,%0\n" \
! 517: " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
! 518: " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \
! 519: " ldo 16(%0),%0 ; Yes. Perform add.\n" \
! 520: " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
! 521: " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \
! 522: " ldo 8(%0),%0 ; Yes. Perform add.\n" \
! 523: " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
! 524: " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \
! 525: " ldo 4(%0),%0 ; Yes. Perform add.\n" \
! 526: " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
! 527: " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \
! 528: " ldo 2(%0),%0 ; Yes. Perform add.\n" \
! 529: " extru %1,30,1,%1 ; Extract bit 1.\n" \
! 530: " sub %0,%1,%0 ; Subtract it.\n" \
! 531: : "=r" (count), "=r" (__tmp) : "1" (x)); \
1.1 maekawa 532: } while (0)
533: #endif /* hppa */
534:
1.1.1.3 ! ohara 535: #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
1.1 maekawa 536: #define smul_ppmm(xh, xl, m0, m1) \
537: do { \
538: union {DItype __ll; \
539: struct {USItype __h, __l;} __i; \
1.1.1.2 maekawa 540: } __x; \
1.1.1.3 ! ohara 541: __asm__ ("lr %N0,%1\n\tmr %0,%2" \
! 542: : "=&r" (__x.__ll) \
! 543: : "r" (m0), "r" (m1)); \
1.1.1.2 maekawa 544: (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1.1 maekawa 545: } while (0)
546: #define sdiv_qrnnd(q, r, n1, n0, d) \
547: do { \
548: union {DItype __ll; \
549: struct {USItype __h, __l;} __i; \
1.1.1.2 maekawa 550: } __x; \
551: __x.__i.__h = n1; __x.__i.__l = n0; \
1.1 maekawa 552: __asm__ ("dr %0,%2" \
1.1.1.2 maekawa 553: : "=r" (__x.__ll) \
554: : "0" (__x.__ll), "r" (d)); \
555: (q) = __x.__i.__l; (r) = __x.__i.__h; \
1.1 maekawa 556: } while (0)
557: #endif
558:
559: #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
560: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 561: __asm__ ("addl %5,%1\n\tadcl %3,%0" \
562: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
563: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
564: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 565: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 566: __asm__ ("subl %5,%1\n\tsbbl %3,%0" \
567: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
568: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
569: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 570: #define umul_ppmm(w1, w0, u, v) \
571: __asm__ ("mull %3" \
1.1.1.2 maekawa 572: : "=a" (w0), "=d" (w1) \
573: : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
1.1.1.3 ! ohara 574: #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
! 575: __asm__ ("divl %4" /* stringification in K&R C */ \
1.1.1.2 maekawa 576: : "=a" (q), "=d" (r) \
1.1.1.3 ! ohara 577: : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
! 578:
! 579: /* P5 bsrl takes between 10 and 72 cycles depending where the most
! 580: significant 1 bit is, hence the use of the alternatives below. bsfl is
! 581: slow too, between 18 and 42 depending where the least significant 1 bit
! 582: is. The faster count_leading_zeros are pressed into service via the
! 583: generic count_trailing_zeros at the end of the file. */
! 584:
! 585: #if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium
! 586:
! 587: /* The following should be a fixed 14 cycles or so. Some scheduling
! 588: opportunities should be available between the float load/store too. This
! 589: is used (with "n&-n" to get trailing zeros) in gcc 3 for __builtin_ffs
! 590: and is apparently suggested by the Intel optimizing manual (don't know
! 591: exactly where). gcc 2.95 or up will be best for this, so the "double" is
! 592: correctly aligned on the stack. */
! 593:
! 594: #define count_leading_zeros(c,n) \
! 595: do { \
! 596: union { \
! 597: double d; \
! 598: unsigned a[2]; \
! 599: } __u; \
! 600: ASSERT ((n) != 0); \
! 601: __u.d = (UWtype) (n); \
! 602: (c) = 0x3FF + 31 - (__u.a[1] >> 20); \
! 603: } while (0)
! 604: #define COUNT_LEADING_ZEROS_0 (0x3FF + 31)
! 605:
! 606: #else /* ! pentium */
! 607: #if HAVE_HOST_CPU_pentiummmx
! 608:
! 609: /* The following should be a fixed 14 or 15 cycles, but possibly plus an L1
! 610: cache miss reading from __clz_tab. It's favoured over the float above so
! 611: as to avoid mixing MMX and x87, since the penalty for switching between
! 612: the two is about 100 cycles.
! 613:
! 614: The asm block sets __shift to -3 if the high 24 bits are clear, -2 for
! 615: 16, -1 for 8, or 0 otherwise. This could be written equivalently as
! 616: follows, but as of gcc 2.95.2 it results in conditional jumps.
! 617:
! 618: __shift = -(__n < 0x1000000);
! 619: __shift -= (__n < 0x10000);
! 620: __shift -= (__n < 0x100);
! 621:
! 622: The middle two sbbl and cmpl's pair, and with luck something gcc
! 623: generates might pair with the first cmpl and the last sbbl. The "32+1"
! 624: constant could be folded into __clz_tab[], but it doesn't seem worth
! 625: making a different table just for that. */
! 626:
! 627: #define count_leading_zeros(c,n) \
! 628: do { \
! 629: USItype __n = (n); \
! 630: USItype __shift; \
! 631: __asm__ ("cmpl $0x1000000, %1\n" \
! 632: "sbbl %0, %0\n" \
! 633: "cmpl $0x10000, %1\n" \
! 634: "sbbl $0, %0\n" \
! 635: "cmpl $0x100, %1\n" \
! 636: "sbbl $0, %0\n" \
! 637: : "=&r" (__shift) : "r" (__n)); \
! 638: __shift = __shift*8 + 24 + 1; \
! 639: (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift]; \
! 640: } while (0)
! 641:
! 642: #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
! 643: #define COUNT_LEADING_ZEROS_0 31 /* n==0 indistinguishable from n==1 */
! 644:
! 645: #else /* !pentiummmx */
! 646: /* On P6, gcc prior to 3.0 generates a partial register stall for
! 647: __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former
! 648: being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the
! 649: cost of one extra instruction. Do this for "i386" too, since that means
! 650: generic x86. */
! 651: #if __GNUC__ < 3 \
! 652: && (HAVE_HOST_CPU_i386 \
! 653: || HAVE_HOST_CPU_i686 \
! 654: || HAVE_HOST_CPU_pentiumpro \
! 655: || HAVE_HOST_CPU_pentium2 \
! 656: || HAVE_HOST_CPU_pentium3)
! 657: #define count_leading_zeros(count, x) \
1.1 maekawa 658: do { \
659: USItype __cbtmp; \
1.1.1.3 ! ohara 660: ASSERT ((x) != 0); \
! 661: __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
! 662: (count) = 31 - __cbtmp; \
! 663: } while (0)
! 664: #else
! 665: #define count_leading_zeros(count, x) \
! 666: do { \
! 667: USItype __cbtmp; \
! 668: ASSERT ((x) != 0); \
1.1.1.2 maekawa 669: __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
1.1 maekawa 670: (count) = __cbtmp ^ 31; \
671: } while (0)
1.1.1.3 ! ohara 672: #endif
! 673:
! 674: #define count_trailing_zeros(count, x) \
! 675: do { \
! 676: ASSERT ((x) != 0); \
! 677: __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))); \
! 678: } while (0)
! 679: #endif /* ! pentiummmx */
! 680: #endif /* ! pentium */
! 681:
1.1 maekawa 682: #ifndef UMUL_TIME
1.1.1.2 maekawa 683: #define UMUL_TIME 10
1.1 maekawa 684: #endif
685: #ifndef UDIV_TIME
686: #define UDIV_TIME 40
687: #endif
688: #endif /* 80x86 */
689:
1.1.1.3 ! ohara 690: #if defined (__x86_64__) && W_TYPE_SIZE == 64
! 691: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! 692: __asm__ ("addq %5,%1\n\tadcq %3,%0" \
! 693: : "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \
! 694: : "%0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \
! 695: "%1" ((UDItype)(al)), "g" ((UDItype)(bl)))
! 696: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! 697: __asm__ ("subq %5,%1\n\tsbbq %3,%0" \
! 698: : "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \
! 699: : "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \
! 700: "1" ((UDItype)(al)), "g" ((UDItype)(bl)))
! 701: #define umul_ppmm(w1, w0, u, v) \
! 702: __asm__ ("mulq %3" \
! 703: : "=a" (w0), "=d" (w1) \
! 704: : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
! 705: #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
! 706: __asm__ ("divq %4" /* stringification in K&R C */ \
! 707: : "=a" (q), "=d" (r) \
! 708: : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
! 709: #define count_leading_zeros(count, x) \
! 710: do { \
! 711: UDItype __cbtmp; \
! 712: ASSERT ((x) != 0); \
! 713: __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
! 714: (count) = __cbtmp ^ 63; \
! 715: } while (0)
! 716: #define count_trailing_zeros(count, x) \
! 717: do { \
! 718: ASSERT ((x) != 0); \
! 719: __asm__ ("bsfq %1,%0" : "=r" (count) : "rm" ((UDItype)(x))); \
! 720: } while (0)
! 721: #endif /* x86_64 */
! 722:
1.1 maekawa 723: #if defined (__i860__) && W_TYPE_SIZE == 32
724: #define rshift_rhlc(r,h,l,c) \
725: __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \
726: "=r" (r) : "r" (h), "r" (l), "rn" (c))
727: #endif /* i860 */
728:
729: #if defined (__i960__) && W_TYPE_SIZE == 32
730: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
731: __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \
1.1.1.2 maekawa 732: : "=r" (sh), "=&r" (sl) \
733: : "%dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
1.1 maekawa 734: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
735: __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \
1.1.1.2 maekawa 736: : "=r" (sh), "=&r" (sl) \
737: : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
1.1 maekawa 738: #define umul_ppmm(w1, w0, u, v) \
739: ({union {UDItype __ll; \
740: struct {USItype __l, __h;} __i; \
1.1.1.2 maekawa 741: } __x; \
742: __asm__ ("emul %2,%1,%0" \
743: : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \
744: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 745: #define __umulsidi3(u, v) \
746: ({UDItype __w; \
1.1.1.2 maekawa 747: __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \
1.1 maekawa 748: __w; })
749: #define udiv_qrnnd(q, r, nh, nl, d) \
750: do { \
751: union {UDItype __ll; \
752: struct {USItype __l, __h;} __i; \
753: } __nn; \
754: __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
755: __asm__ ("ediv %d,%n,%0" \
1.1.1.2 maekawa 756: : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
1.1 maekawa 757: (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
758: } while (0)
759: #define count_leading_zeros(count, x) \
760: do { \
761: USItype __cbtmp; \
1.1.1.2 maekawa 762: __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \
1.1 maekawa 763: (count) = __cbtmp ^ 31; \
764: } while (0)
765: #define COUNT_LEADING_ZEROS_0 (-32) /* sic */
766: #if defined (__i960mx) /* what is the proper symbol to test??? */
767: #define rshift_rhlc(r,h,l,c) \
768: do { \
769: union {UDItype __ll; \
770: struct {USItype __l, __h;} __i; \
771: } __nn; \
772: __nn.__i.__h = (h); __nn.__i.__l = (l); \
1.1.1.2 maekawa 773: __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
1.1 maekawa 774: }
775: #endif /* i960mx */
776: #endif /* i960 */
777:
1.1.1.2 maekawa 778: #if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
779: || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
780: || defined (__mc5307__)) && W_TYPE_SIZE == 32
1.1 maekawa 781: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 782: __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
783: : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
784: : "%0" ((USItype)(ah)), "d" ((USItype)(bh)), \
785: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 786: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 787: __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
788: : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
789: : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \
790: "1" ((USItype)(al)), "g" ((USItype)(bl)))
791: /* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */
792: #if defined (__mc68020__) || defined(mc68020) \
793: || defined (__mc68030__) || defined (mc68030) \
794: || defined (__mc68040__) || defined (mc68040) \
1.1.1.3 ! ohara 795: || defined (__mcpu32__) || defined (mcpu32) \
1.1.1.2 maekawa 796: || defined (__NeXT__)
1.1 maekawa 797: #define umul_ppmm(w1, w0, u, v) \
798: __asm__ ("mulu%.l %3,%1:%0" \
1.1.1.2 maekawa 799: : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \
800: : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
1.1 maekawa 801: #define UMUL_TIME 45
802: #define udiv_qrnnd(q, r, n1, n0, d) \
803: __asm__ ("divu%.l %4,%1:%0" \
1.1.1.2 maekawa 804: : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
805: : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
1.1 maekawa 806: #define UDIV_TIME 90
807: #define sdiv_qrnnd(q, r, n1, n0, d) \
808: __asm__ ("divs%.l %4,%1:%0" \
1.1.1.2 maekawa 809: : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
810: : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
811: #else /* for other 68k family members use 16x16->32 multiplication */
1.1 maekawa 812: #define umul_ppmm(xh, xl, a, b) \
813: do { USItype __umul_tmp1, __umul_tmp2; \
1.1.1.3 ! ohara 814: __asm__ ("| Inlined umul_ppmm\n" \
! 815: " move%.l %5,%3\n" \
! 816: " move%.l %2,%0\n" \
! 817: " move%.w %3,%1\n" \
! 818: " swap %3\n" \
! 819: " swap %0\n" \
! 820: " mulu%.w %2,%1\n" \
! 821: " mulu%.w %3,%0\n" \
! 822: " mulu%.w %2,%3\n" \
! 823: " swap %2\n" \
! 824: " mulu%.w %5,%2\n" \
! 825: " add%.l %3,%2\n" \
! 826: " jcc 1f\n" \
! 827: " add%.l %#0x10000,%0\n" \
! 828: "1: move%.l %2,%3\n" \
! 829: " clr%.w %2\n" \
! 830: " swap %2\n" \
! 831: " swap %3\n" \
! 832: " clr%.w %3\n" \
! 833: " add%.l %3,%1\n" \
! 834: " addx%.l %2,%0\n" \
! 835: " | End inlined umul_ppmm" \
1.1 maekawa 836: : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
1.1.1.3 ! ohara 837: "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
1.1 maekawa 838: : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
839: } while (0)
840: #define UMUL_TIME 100
841: #define UDIV_TIME 400
842: #endif /* not mc68020 */
1.1.1.2 maekawa 843: /* The '020, '030, '040 and '060 have bitfield insns. */
844: #if defined (__mc68020__) || defined (mc68020) \
845: || defined (__mc68030__) || defined (mc68030) \
846: || defined (__mc68040__) || defined (mc68040) \
847: || defined (__mc68060__) || defined (mc68060) \
848: || defined (__NeXT__)
849: #define count_leading_zeros(count, x) \
850: __asm__ ("bfffo %1{%b2:%b2},%0" \
851: : "=d" ((USItype) (count)) \
852: : "od" ((USItype) (x)), "n" (0))
853: #define COUNT_LEADING_ZEROS_0 32
854: #endif
1.1 maekawa 855: #endif /* mc68000 */
856:
857: #if defined (__m88000__) && W_TYPE_SIZE == 32
858: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 859: __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
860: : "=r" (sh), "=&r" (sl) \
861: : "%rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
1.1 maekawa 862: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 863: __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
864: : "=r" (sh), "=&r" (sl) \
865: : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
1.1 maekawa 866: #define count_leading_zeros(count, x) \
867: do { \
868: USItype __cbtmp; \
1.1.1.2 maekawa 869: __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \
1.1 maekawa 870: (count) = __cbtmp ^ 31; \
871: } while (0)
872: #define COUNT_LEADING_ZEROS_0 63 /* sic */
873: #if defined (__m88110__)
874: #define umul_ppmm(wh, wl, u, v) \
875: do { \
876: union {UDItype __ll; \
877: struct {USItype __h, __l;} __i; \
1.1.1.2 maekawa 878: } __x; \
879: __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
880: (wh) = __x.__i.__h; \
881: (wl) = __x.__i.__l; \
1.1 maekawa 882: } while (0)
883: #define udiv_qrnnd(q, r, n1, n0, d) \
884: ({union {UDItype __ll; \
885: struct {USItype __h, __l;} __i; \
1.1.1.2 maekawa 886: } __x, __q; \
887: __x.__i.__h = (n1); __x.__i.__l = (n0); \
1.1 maekawa 888: __asm__ ("divu.d %0,%1,%2" \
1.1.1.2 maekawa 889: : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
890: (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
1.1 maekawa 891: #define UMUL_TIME 5
892: #define UDIV_TIME 25
893: #else
894: #define UMUL_TIME 17
895: #define UDIV_TIME 150
896: #endif /* __m88110__ */
897: #endif /* __m88000__ */
898:
1.1.1.2 maekawa 899: #if defined (__mips) && W_TYPE_SIZE == 32
1.1 maekawa 900: #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
901: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 902: __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
1.1 maekawa 903: #else
904: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 905: __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \
906: : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
1.1 maekawa 907: #endif
908: #define UMUL_TIME 10
909: #define UDIV_TIME 100
1.1.1.2 maekawa 910: #endif /* __mips */
1.1 maekawa 911:
912: #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
913: #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
914: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 915: __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
1.1 maekawa 916: #else
917: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 918: __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \
919: : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
1.1 maekawa 920: #endif
921: #define UMUL_TIME 20
922: #define UDIV_TIME 140
1.1.1.2 maekawa 923: #endif /* __mips */
1.1 maekawa 924:
925: #if defined (__ns32000__) && W_TYPE_SIZE == 32
926: #define umul_ppmm(w1, w0, u, v) \
927: ({union {UDItype __ll; \
928: struct {USItype __l, __h;} __i; \
1.1.1.2 maekawa 929: } __x; \
1.1 maekawa 930: __asm__ ("meid %2,%0" \
1.1.1.2 maekawa 931: : "=g" (__x.__ll) \
932: : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
933: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 934: #define __umulsidi3(u, v) \
935: ({UDItype __w; \
936: __asm__ ("meid %2,%0" \
937: : "=g" (__w) \
1.1.1.2 maekawa 938: : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
1.1 maekawa 939: __w; })
940: #define udiv_qrnnd(q, r, n1, n0, d) \
941: ({union {UDItype __ll; \
942: struct {USItype __l, __h;} __i; \
1.1.1.2 maekawa 943: } __x; \
944: __x.__i.__h = (n1); __x.__i.__l = (n0); \
1.1 maekawa 945: __asm__ ("deid %2,%0" \
1.1.1.2 maekawa 946: : "=g" (__x.__ll) \
947: : "0" (__x.__ll), "g" ((USItype)(d))); \
948: (r) = __x.__i.__l; (q) = __x.__i.__h; })
1.1 maekawa 949: #define count_trailing_zeros(count,x) \
1.1.1.2 maekawa 950: do { \
1.1 maekawa 951: __asm__ ("ffsd %2,%0" \
952: : "=r" ((USItype) (count)) \
1.1.1.2 maekawa 953: : "0" ((USItype) 0), "r" ((USItype) (x))); \
1.1 maekawa 954: } while (0)
955: #endif /* __ns32000__ */
956:
1.1.1.3 ! ohara 957: /* FIXME: We should test _IBMR2 here when we add assembly support for the
! 958: system vendor compilers.
! 959: FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
! 960: enough, since that hits ARM and m68k too. */
! 961: #if (defined (_ARCH_PPC) /* AIX */ \
! 962: || defined (_ARCH_PWR) /* AIX */ \
! 963: || defined (__powerpc__) /* gcc */ \
! 964: || defined (__POWERPC__) /* BEOS */ \
! 965: || defined (__ppc__) /* Darwin */ \
! 966: || defined (PPC) /* GNU/Linux, SysV */ \
! 967: ) && W_TYPE_SIZE == 32
1.1 maekawa 968: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
969: do { \
970: if (__builtin_constant_p (bh) && (bh) == 0) \
971: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
1.1.1.3 ! ohara 972: : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1.1.1.2 maekawa 973: else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
1.1 maekawa 974: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
1.1.1.3 ! ohara 975: : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1.1 maekawa 976: else \
977: __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
1.1.1.2 maekawa 978: : "=r" (sh), "=&r" (sl) \
979: : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
1.1 maekawa 980: } while (0)
981: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
982: do { \
983: if (__builtin_constant_p (ah) && (ah) == 0) \
984: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
1.1.1.2 maekawa 985: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
986: else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
1.1 maekawa 987: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
1.1.1.2 maekawa 988: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1.1 maekawa 989: else if (__builtin_constant_p (bh) && (bh) == 0) \
990: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
1.1.1.2 maekawa 991: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
992: else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
1.1 maekawa 993: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
1.1.1.2 maekawa 994: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1.1 maekawa 995: else \
996: __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
1.1.1.2 maekawa 997: : "=r" (sh), "=&r" (sl) \
998: : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
1.1 maekawa 999: } while (0)
1000: #define count_leading_zeros(count, x) \
1.1.1.2 maekawa 1001: __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
1.1 maekawa 1002: #define COUNT_LEADING_ZEROS_0 32
1.1.1.3 ! ohara 1003: #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
! 1004: || defined (__ppc__) || defined (PPC) || defined (__vxworks__)
1.1 maekawa 1005: #define umul_ppmm(ph, pl, m0, m1) \
1006: do { \
1007: USItype __m0 = (m0), __m1 = (m1); \
1.1.1.2 maekawa 1008: __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1.1 maekawa 1009: (pl) = __m0 * __m1; \
1010: } while (0)
1011: #define UMUL_TIME 15
1012: #define smul_ppmm(ph, pl, m0, m1) \
1013: do { \
1014: SItype __m0 = (m0), __m1 = (m1); \
1.1.1.2 maekawa 1015: __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1.1 maekawa 1016: (pl) = __m0 * __m1; \
1017: } while (0)
1018: #define SMUL_TIME 14
1019: #define UDIV_TIME 120
1020: #else
1021: #define UMUL_TIME 8
1022: #define smul_ppmm(xh, xl, m0, m1) \
1.1.1.2 maekawa 1023: __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
1.1 maekawa 1024: #define SMUL_TIME 4
1025: #define sdiv_qrnnd(q, r, nh, nl, d) \
1.1.1.2 maekawa 1026: __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
1.1 maekawa 1027: #define UDIV_TIME 100
1028: #endif
1.1.1.2 maekawa 1029: #endif /* 32-bit POWER architecture variants. */
1030:
1031: /* We should test _IBMR2 here when we add assembly support for the system
1032: vendor compilers. */
1033: #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
1034: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1035: do { \
1036: if (__builtin_constant_p (bh) && (bh) == 0) \
1037: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
1.1.1.3 ! ohara 1038: : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1.1.1.2 maekawa 1039: else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
1040: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
1.1.1.3 ! ohara 1041: : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1.1.1.2 maekawa 1042: else \
1043: __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
1044: : "=r" (sh), "=&r" (sl) \
1045: : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
1046: } while (0)
1047: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1048: do { \
1049: if (__builtin_constant_p (ah) && (ah) == 0) \
1050: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
1051: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1052: else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
1053: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
1054: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1055: else if (__builtin_constant_p (bh) && (bh) == 0) \
1056: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
1057: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1058: else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
1059: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
1060: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1061: else \
1062: __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
1063: : "=r" (sh), "=&r" (sl) \
1064: : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
1065: } while (0)
1066: #define count_leading_zeros(count, x) \
1067: __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
1068: #define COUNT_LEADING_ZEROS_0 64
1069: #define umul_ppmm(ph, pl, m0, m1) \
1070: do { \
1071: UDItype __m0 = (m0), __m1 = (m1); \
1072: __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1073: (pl) = __m0 * __m1; \
1074: } while (0)
1075: #define UMUL_TIME 15
1076: #define smul_ppmm(ph, pl, m0, m1) \
1077: do { \
1078: DItype __m0 = (m0), __m1 = (m1); \
1079: __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1080: (pl) = __m0 * __m1; \
1081: } while (0)
1082: #define SMUL_TIME 14 /* ??? */
1083: #define UDIV_TIME 120 /* ??? */
1084: #endif /* 64-bit PowerPC. */
1.1 maekawa 1085:
1086: #if defined (__pyr__) && W_TYPE_SIZE == 32
1087: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1088: __asm__ ("addw %5,%1\n\taddwc %3,%0" \
1089: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1090: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1091: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 1092: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1093: __asm__ ("subw %5,%1\n\tsubwb %3,%0" \
1094: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1095: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1096: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 1097: /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
1098: #define umul_ppmm(w1, w0, u, v) \
1099: ({union {UDItype __ll; \
1100: struct {USItype __h, __l;} __i; \
1.1.1.2 maekawa 1101: } __x; \
1102: __asm__ ("movw %1,%R0\n\tuemul %2,%0" \
1103: : "=&r" (__x.__ll) \
1104: : "g" ((USItype) (u)), "g" ((USItype)(v))); \
1105: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 1106: #endif /* __pyr__ */
1107:
1108: #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
1109: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1110: __asm__ ("a %1,%5\n\tae %0,%3" \
1111: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1112: : "%0" ((USItype)(ah)), "r" ((USItype)(bh)), \
1113: "%1" ((USItype)(al)), "r" ((USItype)(bl)))
1.1 maekawa 1114: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1115: __asm__ ("s %1,%5\n\tse %0,%3" \
1116: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1117: : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
1118: "1" ((USItype)(al)), "r" ((USItype)(bl)))
1119: #define smul_ppmm(ph, pl, m0, m1) \
1120: __asm__ ( \
1.1.1.3 ! ohara 1121: "s r2,r2\n" \
! 1122: " mts r10,%2\n" \
! 1123: " m r2,%3\n" \
! 1124: " m r2,%3\n" \
! 1125: " m r2,%3\n" \
! 1126: " m r2,%3\n" \
! 1127: " m r2,%3\n" \
! 1128: " m r2,%3\n" \
! 1129: " m r2,%3\n" \
! 1130: " m r2,%3\n" \
! 1131: " m r2,%3\n" \
! 1132: " m r2,%3\n" \
! 1133: " m r2,%3\n" \
! 1134: " m r2,%3\n" \
! 1135: " m r2,%3\n" \
! 1136: " m r2,%3\n" \
! 1137: " m r2,%3\n" \
! 1138: " m r2,%3\n" \
! 1139: " cas %0,r2,r0\n" \
! 1140: " mfs r10,%1" \
1.1.1.2 maekawa 1141: : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \
1142: : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
1.1.1.3 ! ohara 1143: : "r2")
1.1 maekawa 1144: #define UMUL_TIME 20
1145: #define UDIV_TIME 200
1146: #define count_leading_zeros(count, x) \
1147: do { \
1148: if ((x) >= 0x10000) \
1149: __asm__ ("clz %0,%1" \
1.1.1.2 maekawa 1150: : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \
1.1 maekawa 1151: else \
1152: { \
1153: __asm__ ("clz %0,%1" \
1.1.1.2 maekawa 1154: : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \
1.1 maekawa 1155: (count) += 16; \
1156: } \
1157: } while (0)
1158: #endif /* RT/ROMP */
1159:
1160: #if defined (__sh2__) && W_TYPE_SIZE == 32
1161: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 1162: __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
1163: : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
1.1 maekawa 1164: #define UMUL_TIME 5
1165: #endif
1166:
1167: #if defined (__sparc__) && W_TYPE_SIZE == 32
1168: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1169: __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
1170: : "=r" (sh), "=&r" (sl) \
1171: : "%rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \
1.1 maekawa 1172: __CLOBBER_CC)
1173: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1174: __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
1175: : "=r" (sh), "=&r" (sl) \
1176: : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
1.1 maekawa 1177: __CLOBBER_CC)
1.1.1.3 ! ohara 1178: /* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h
! 1179: doesn't define anything to indicate that to us, it only sets __sparcv8. */
1.1.1.2 maekawa 1180: #if defined (__sparc_v9__) || defined (__sparcv9)
1181: /* Perhaps we should use floating-point operations here? */
1182: #if 0
1183: /* Triggers a bug making mpz/tests/t-gcd.c fail.
1184: Perhaps we simply need explicitly zero-extend the inputs? */
1185: #define umul_ppmm(w1, w0, u, v) \
1186: __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \
1187: "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
1188: #else
1189: /* Use v8 umul until above bug is fixed. */
1190: #define umul_ppmm(w1, w0, u, v) \
1191: __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1192: #endif
1193: /* Use a plain v8 divide for v9. */
1194: #define udiv_qrnnd(q, r, n1, n0, d) \
1195: do { \
1196: USItype __q; \
1197: __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1198: : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
1199: (r) = (n0) - __q * (d); \
1200: (q) = __q; \
1201: } while (0)
1202: #else
1.1.1.3 ! ohara 1203: #if defined (__sparc_v8__) /* gcc normal */ \
! 1204: || defined (__sparcv8) /* gcc solaris */
1.1 maekawa 1205: /* Don't match immediate range because, 1) it is not often useful,
1206: 2) the 'I' flag thinks of the range as a 13 bit signed interval,
1207: while we want to match a 13 bit interval, sign extended to 32 bits,
1208: but INTERPRETED AS UNSIGNED. */
1209: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 1210: __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1.1 maekawa 1211: #define UMUL_TIME 5
1.1.1.3 ! ohara 1212:
! 1213: #if HAVE_HOST_CPU_supersparc
! 1214: #define UDIV_TIME 60 /* SuperSPARC timing */
! 1215: #else
! 1216: /* Don't use this on SuperSPARC because its udiv only handles 53 bit
! 1217: dividends and will trap to the kernel for the rest. */
1.1 maekawa 1218: #define udiv_qrnnd(q, r, n1, n0, d) \
1219: do { \
1220: USItype __q; \
1221: __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1.1.1.2 maekawa 1222: : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
1.1 maekawa 1223: (r) = (n0) - __q * (d); \
1224: (q) = __q; \
1225: } while (0)
1226: #define UDIV_TIME 25
1.1.1.3 ! ohara 1227: #endif /* HAVE_HOST_CPU_supersparc */
! 1228:
1.1 maekawa 1229: #else /* ! __sparc_v8__ */
1230: #if defined (__sparclite__)
1231: /* This has hardware multiply but not divide. It also has two additional
1232: instructions scan (ffs from high bit) and divscc. */
1233: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 maekawa 1234: __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1.1 maekawa 1235: #define UMUL_TIME 5
1236: #define udiv_qrnnd(q, r, n1, n0, d) \
1.1.1.3 ! ohara 1237: __asm__ ("! Inlined udiv_qrnnd\n" \
! 1238: " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
! 1239: " tst %%g0\n" \
! 1240: " divscc %3,%4,%%g1\n" \
! 1241: " divscc %%g1,%4,%%g1\n" \
! 1242: " divscc %%g1,%4,%%g1\n" \
! 1243: " divscc %%g1,%4,%%g1\n" \
! 1244: " divscc %%g1,%4,%%g1\n" \
! 1245: " divscc %%g1,%4,%%g1\n" \
! 1246: " divscc %%g1,%4,%%g1\n" \
! 1247: " divscc %%g1,%4,%%g1\n" \
! 1248: " divscc %%g1,%4,%%g1\n" \
! 1249: " divscc %%g1,%4,%%g1\n" \
! 1250: " divscc %%g1,%4,%%g1\n" \
! 1251: " divscc %%g1,%4,%%g1\n" \
! 1252: " divscc %%g1,%4,%%g1\n" \
! 1253: " divscc %%g1,%4,%%g1\n" \
! 1254: " divscc %%g1,%4,%%g1\n" \
! 1255: " divscc %%g1,%4,%%g1\n" \
! 1256: " divscc %%g1,%4,%%g1\n" \
! 1257: " divscc %%g1,%4,%%g1\n" \
! 1258: " divscc %%g1,%4,%%g1\n" \
! 1259: " divscc %%g1,%4,%%g1\n" \
! 1260: " divscc %%g1,%4,%%g1\n" \
! 1261: " divscc %%g1,%4,%%g1\n" \
! 1262: " divscc %%g1,%4,%%g1\n" \
! 1263: " divscc %%g1,%4,%%g1\n" \
! 1264: " divscc %%g1,%4,%%g1\n" \
! 1265: " divscc %%g1,%4,%%g1\n" \
! 1266: " divscc %%g1,%4,%%g1\n" \
! 1267: " divscc %%g1,%4,%%g1\n" \
! 1268: " divscc %%g1,%4,%%g1\n" \
! 1269: " divscc %%g1,%4,%%g1\n" \
! 1270: " divscc %%g1,%4,%%g1\n" \
! 1271: " divscc %%g1,%4,%0\n" \
! 1272: " rd %%y,%1\n" \
! 1273: " bl,a 1f\n" \
! 1274: " add %1,%4,%1\n" \
! 1275: "1: ! End of inline udiv_qrnnd" \
! 1276: : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \
1.1 maekawa 1277: : "%g1" __AND_CLOBBER_CC)
1278: #define UDIV_TIME 37
1279: #define count_leading_zeros(count, x) \
1.1.1.3 ! ohara 1280: __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
1.1 maekawa 1281: /* Early sparclites return 63 for an argument of 0, but they warn that future
1282: implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1283: undefined. */
1284: #endif /* __sparclite__ */
1285: #endif /* __sparc_v8__ */
1.1.1.2 maekawa 1286: #endif /* __sparc_v9__ */
1.1 maekawa 1287: /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
1288: #ifndef umul_ppmm
1289: #define umul_ppmm(w1, w0, u, v) \
1.1.1.3 ! ohara 1290: __asm__ ("! Inlined umul_ppmm\n" \
! 1291: " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
! 1292: " sra %3,31,%%g2 ! Don't move this insn\n" \
! 1293: " and %2,%%g2,%%g2 ! Don't move this insn\n" \
! 1294: " andcc %%g0,0,%%g1 ! Don't move this insn\n" \
! 1295: " mulscc %%g1,%3,%%g1\n" \
! 1296: " mulscc %%g1,%3,%%g1\n" \
! 1297: " mulscc %%g1,%3,%%g1\n" \
! 1298: " mulscc %%g1,%3,%%g1\n" \
! 1299: " mulscc %%g1,%3,%%g1\n" \
! 1300: " mulscc %%g1,%3,%%g1\n" \
! 1301: " mulscc %%g1,%3,%%g1\n" \
! 1302: " mulscc %%g1,%3,%%g1\n" \
! 1303: " mulscc %%g1,%3,%%g1\n" \
! 1304: " mulscc %%g1,%3,%%g1\n" \
! 1305: " mulscc %%g1,%3,%%g1\n" \
! 1306: " mulscc %%g1,%3,%%g1\n" \
! 1307: " mulscc %%g1,%3,%%g1\n" \
! 1308: " mulscc %%g1,%3,%%g1\n" \
! 1309: " mulscc %%g1,%3,%%g1\n" \
! 1310: " mulscc %%g1,%3,%%g1\n" \
! 1311: " mulscc %%g1,%3,%%g1\n" \
! 1312: " mulscc %%g1,%3,%%g1\n" \
! 1313: " mulscc %%g1,%3,%%g1\n" \
! 1314: " mulscc %%g1,%3,%%g1\n" \
! 1315: " mulscc %%g1,%3,%%g1\n" \
! 1316: " mulscc %%g1,%3,%%g1\n" \
! 1317: " mulscc %%g1,%3,%%g1\n" \
! 1318: " mulscc %%g1,%3,%%g1\n" \
! 1319: " mulscc %%g1,%3,%%g1\n" \
! 1320: " mulscc %%g1,%3,%%g1\n" \
! 1321: " mulscc %%g1,%3,%%g1\n" \
! 1322: " mulscc %%g1,%3,%%g1\n" \
! 1323: " mulscc %%g1,%3,%%g1\n" \
! 1324: " mulscc %%g1,%3,%%g1\n" \
! 1325: " mulscc %%g1,%3,%%g1\n" \
! 1326: " mulscc %%g1,%3,%%g1\n" \
! 1327: " mulscc %%g1,0,%%g1\n" \
! 1328: " add %%g1,%%g2,%0\n" \
! 1329: " rd %%y,%1" \
1.1.1.2 maekawa 1330: : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
1.1 maekawa 1331: : "%g1", "%g2" __AND_CLOBBER_CC)
1332: #define UMUL_TIME 39 /* 39 instructions */
1333: #endif
1334: #ifndef udiv_qrnnd
1335: #ifndef LONGLONG_STANDALONE
1336: #define udiv_qrnnd(q, r, n1, n0, d) \
1.1.1.3 ! ohara 1337: do { UWtype __r; \
1.1.1.2 maekawa 1338: (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
1.1 maekawa 1339: (r) = __r; \
1340: } while (0)
1.1.1.3 ! ohara 1341: extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
1.1.1.2 maekawa 1342: #ifndef UDIV_TIME
1.1 maekawa 1343: #define UDIV_TIME 140
1.1.1.2 maekawa 1344: #endif
1.1 maekawa 1345: #endif /* LONGLONG_STANDALONE */
1346: #endif /* udiv_qrnnd */
1347: #endif /* __sparc__ */
1348:
1.1.1.3 ! ohara 1349: #if defined (__sparc__) && W_TYPE_SIZE == 64
! 1350: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! 1351: __asm__ ( \
! 1352: "addcc %r4,%5,%1\n" \
! 1353: " addccc %r6,%7,%%g0\n" \
! 1354: " addc %r2,%3,%0" \
! 1355: : "=r" (sh), "=&r" (sl) \
! 1356: : "%rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
! 1357: "%rJ" ((al) >> 32), "rI" ((bl) >> 32) \
! 1358: __CLOBBER_CC)
! 1359: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! 1360: __asm__ ( \
! 1361: "subcc %r4,%5,%1\n" \
! 1362: " subccc %r6,%7,%%g0\n" \
! 1363: " subc %r2,%3,%0" \
! 1364: : "=r" (sh), "=&r" (sl) \
! 1365: : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \
! 1366: "rJ" ((al) >> 32), "rI" ((bl) >> 32) \
! 1367: __CLOBBER_CC)
! 1368: #endif
! 1369:
1.1 maekawa 1370: #if defined (__vax__) && W_TYPE_SIZE == 32
1371: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1372: __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
1373: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
1374: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1375: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 1376: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 maekawa 1377: __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
1378: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
1379: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1380: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1381: #define smul_ppmm(xh, xl, m0, m1) \
1.1 maekawa 1382: do { \
1383: union {UDItype __ll; \
1384: struct {USItype __l, __h;} __i; \
1.1.1.2 maekawa 1385: } __x; \
1.1 maekawa 1386: USItype __m0 = (m0), __m1 = (m1); \
1387: __asm__ ("emul %1,%2,$0,%0" \
1.1.1.2 maekawa 1388: : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \
1389: (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1.1 maekawa 1390: } while (0)
1391: #define sdiv_qrnnd(q, r, n1, n0, d) \
1392: do { \
1393: union {DItype __ll; \
1394: struct {SItype __l, __h;} __i; \
1.1.1.2 maekawa 1395: } __x; \
1396: __x.__i.__h = n1; __x.__i.__l = n0; \
1.1 maekawa 1397: __asm__ ("ediv %3,%2,%0,%1" \
1.1.1.2 maekawa 1398: : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
1.1 maekawa 1399: } while (0)
1.1.1.3 ! ohara 1400: #if 0
! 1401: /* FIXME: This instruction appears to be unimplemented on some systems (vax
! 1402: 8800 maybe). */
! 1403: #define count_trailing_zeros(count,x) \
! 1404: do { \
! 1405: __asm__ ("ffs 0, 31, %1, %0" \
! 1406: : "=g" ((USItype) (count)) \
! 1407: : "g" ((USItype) (x))); \
! 1408: } while (0)
! 1409: #endif
1.1 maekawa 1410: #endif /* __vax__ */
1411:
1412: #if defined (__z8000__) && W_TYPE_SIZE == 16
1413: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1414: __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1.1.1.2 maekawa 1415: : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
1416: : "%0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
1417: "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
1.1 maekawa 1418: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1419: __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1.1.1.2 maekawa 1420: : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
1421: : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
1422: "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
1.1 maekawa 1423: #define umul_ppmm(xh, xl, m0, m1) \
1424: do { \
1425: union {long int __ll; \
1426: struct {unsigned int __h, __l;} __i; \
1.1.1.2 maekawa 1427: } __x; \
1.1 maekawa 1428: unsigned int __m0 = (m0), __m1 = (m1); \
1429: __asm__ ("mult %S0,%H3" \
1.1.1.2 maekawa 1430: : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
1431: : "%1" (m0), "rQR" (m1)); \
1432: (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1.1 maekawa 1433: (xh) += ((((signed int) __m0 >> 15) & __m1) \
1434: + (((signed int) __m1 >> 15) & __m0)); \
1435: } while (0)
1436: #endif /* __z8000__ */
1437:
1438: #endif /* __GNUC__ */
1439:
1.1.1.3 ! ohara 1440: #endif /* NO_ASM */
! 1441:
1.1 maekawa 1442:
1443: #if !defined (umul_ppmm) && defined (__umulsidi3)
1444: #define umul_ppmm(ph, pl, m0, m1) \
1445: { \
1446: UDWtype __ll = __umulsidi3 (m0, m1); \
1447: ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1448: pl = (UWtype) __ll; \
1449: }
1450: #endif
1451:
1452: #if !defined (__umulsidi3)
1453: #define __umulsidi3(u, v) \
1454: ({UWtype __hi, __lo; \
1455: umul_ppmm (__hi, __lo, u, v); \
1456: ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1457: #endif
1458:
1.1.1.2 maekawa 1459:
1460: /* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA
1461: versions above are different and we don't want to conflict. */
1462:
1463: #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm
1464: #define mpn_umul_ppmm __MPN(umul_ppmm)
1465: extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
1.1.1.3 ! ohara 1466: #define umul_ppmm(wh, wl, u, v) \
! 1467: do { \
! 1468: mp_limb_t __umul_ppmm__p0; \
! 1469: (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \
! 1470: (mp_limb_t) (u), (mp_limb_t) (v)); \
! 1471: (wl) = __umul_ppmm__p0; \
1.1.1.2 maekawa 1472: } while (0)
1473: #endif
1474:
1475: #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd
1476: #define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
1477: extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *,
1.1.1.3 ! ohara 1478: mp_limb_t, mp_limb_t, mp_limb_t));
! 1479: #define udiv_qrnnd(q, r, n1, n0, d) \
! 1480: do { \
! 1481: mp_limb_t __udiv_qrnnd__r; \
! 1482: (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \
! 1483: (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \
! 1484: (r) = __udiv_qrnnd__r; \
1.1.1.2 maekawa 1485: } while (0)
1486: #endif
1487:
1488:
1.1 maekawa 1489: /* If this machine has no inline assembler, use C macros. */
1490:
1491: #if !defined (add_ssaaaa)
1492: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1493: do { \
1494: UWtype __x; \
1495: __x = (al) + (bl); \
1496: (sh) = (ah) + (bh) + (__x < (al)); \
1497: (sl) = __x; \
1498: } while (0)
1499: #endif
1500:
1501: #if !defined (sub_ddmmss)
1502: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1503: do { \
1504: UWtype __x; \
1505: __x = (al) - (bl); \
1506: (sh) = (ah) - (bh) - (__x > (al)); \
1507: (sl) = __x; \
1508: } while (0)
1509: #endif
1510:
1.1.1.2 maekawa 1511: /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1512: smul_ppmm. */
1513: #if !defined (umul_ppmm) && defined (smul_ppmm)
1514: #define umul_ppmm(w1, w0, u, v) \
1515: do { \
1516: UWtype __w1; \
1517: UWtype __xm0 = (u), __xm1 = (v); \
1518: smul_ppmm (__w1, w0, __xm0, __xm1); \
1519: (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1520: + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1521: } while (0)
1522: #endif
1523:
1524: /* If we still don't have umul_ppmm, define it using plain C. */
1.1 maekawa 1525: #if !defined (umul_ppmm)
1526: #define umul_ppmm(w1, w0, u, v) \
1527: do { \
1528: UWtype __x0, __x1, __x2, __x3; \
1529: UHWtype __ul, __vl, __uh, __vh; \
1530: UWtype __u = (u), __v = (v); \
1531: \
1532: __ul = __ll_lowpart (__u); \
1533: __uh = __ll_highpart (__u); \
1534: __vl = __ll_lowpart (__v); \
1535: __vh = __ll_highpart (__v); \
1536: \
1537: __x0 = (UWtype) __ul * __vl; \
1538: __x1 = (UWtype) __ul * __vh; \
1539: __x2 = (UWtype) __uh * __vl; \
1540: __x3 = (UWtype) __uh * __vh; \
1541: \
1542: __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1543: __x1 += __x2; /* but this indeed can */ \
1544: if (__x1 < __x2) /* did we get it? */ \
1545: __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1546: \
1547: (w1) = __x3 + __ll_highpart (__x1); \
1.1.1.2 maekawa 1548: (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \
1.1 maekawa 1549: } while (0)
1550: #endif
1551:
1.1.1.2 maekawa 1552: /* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
1553: exist in one form or another. */
1554: #if !defined (smul_ppmm)
1.1 maekawa 1555: #define smul_ppmm(w1, w0, u, v) \
1556: do { \
1557: UWtype __w1; \
1.1.1.2 maekawa 1558: UWtype __xm0 = (u), __xm1 = (v); \
1559: umul_ppmm (__w1, w0, __xm0, __xm1); \
1560: (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1561: - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1.1 maekawa 1562: } while (0)
1563: #endif
1564:
1565: /* Define this unconditionally, so it can be used for debugging. */
1566: #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1567: do { \
1568: UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1.1.1.3 ! ohara 1569: \
! 1570: ASSERT ((d) != 0); \
! 1571: ASSERT ((n1) < (d)); \
! 1572: \
1.1 maekawa 1573: __d1 = __ll_highpart (d); \
1574: __d0 = __ll_lowpart (d); \
1575: \
1576: __q1 = (n1) / __d1; \
1.1.1.2 maekawa 1577: __r1 = (n1) - __q1 * __d1; \
1.1 maekawa 1578: __m = (UWtype) __q1 * __d0; \
1579: __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1580: if (__r1 < __m) \
1581: { \
1582: __q1--, __r1 += (d); \
1583: if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1584: if (__r1 < __m) \
1585: __q1--, __r1 += (d); \
1586: } \
1587: __r1 -= __m; \
1588: \
1589: __q0 = __r1 / __d1; \
1.1.1.2 maekawa 1590: __r0 = __r1 - __q0 * __d1; \
1.1 maekawa 1591: __m = (UWtype) __q0 * __d0; \
1592: __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1593: if (__r0 < __m) \
1594: { \
1595: __q0--, __r0 += (d); \
1596: if (__r0 >= (d)) \
1597: if (__r0 < __m) \
1598: __q0--, __r0 += (d); \
1599: } \
1600: __r0 -= __m; \
1601: \
1602: (q) = (UWtype) __q1 * __ll_B | __q0; \
1603: (r) = __r0; \
1604: } while (0)
1605:
1606: /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1607: __udiv_w_sdiv (defined in libgcc or elsewhere). */
1608: #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1609: #define udiv_qrnnd(q, r, nh, nl, d) \
1610: do { \
1611: UWtype __r; \
1612: (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1613: (r) = __r; \
1614: } while (0)
1615: #endif
1616:
1617: /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1618: #if !defined (udiv_qrnnd)
1619: #define UDIV_NEEDS_NORMALIZATION 1
1620: #define udiv_qrnnd __udiv_qrnnd_c
1621: #endif
1622:
1623: #if !defined (count_leading_zeros)
1624: #define count_leading_zeros(count, x) \
1625: do { \
1626: UWtype __xr = (x); \
1627: UWtype __a; \
1628: \
1.1.1.3 ! ohara 1629: if (W_TYPE_SIZE == 32) \
1.1 maekawa 1630: { \
1631: __a = __xr < ((UWtype) 1 << 2*__BITS4) \
1.1.1.3 ! ohara 1632: ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \
! 1633: : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \
! 1634: : 3*__BITS4 + 1); \
1.1 maekawa 1635: } \
1636: else \
1637: { \
1638: for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
1639: if (((__xr >> __a) & 0xff) != 0) \
1640: break; \
1.1.1.3 ! ohara 1641: ++__a; \
1.1 maekawa 1642: } \
1643: \
1.1.1.3 ! ohara 1644: (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
1.1 maekawa 1645: } while (0)
1646: /* This version gives a well-defined value for zero. */
1.1.1.3 ! ohara 1647: #define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
1.1.1.2 maekawa 1648: #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
1.1 maekawa 1649: #endif
1650:
1.1.1.3 ! ohara 1651: #ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
! 1652: extern const unsigned char __GMP_DECLSPEC __clz_tab[128];
! 1653: #endif
! 1654:
1.1 maekawa 1655: #if !defined (count_trailing_zeros)
1656: /* Define count_trailing_zeros using count_leading_zeros. The latter might be
1657: defined in asm, but if it is not, the C version above is good enough. */
1658: #define count_trailing_zeros(count, x) \
1659: do { \
1660: UWtype __ctz_x = (x); \
1661: UWtype __ctz_c; \
1.1.1.3 ! ohara 1662: ASSERT (__ctz_x != 0); \
1.1 maekawa 1663: count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
1664: (count) = W_TYPE_SIZE - 1 - __ctz_c; \
1665: } while (0)
1666: #endif
1667:
1668: #ifndef UDIV_NEEDS_NORMALIZATION
1669: #define UDIV_NEEDS_NORMALIZATION 0
1670: #endif
1.1.1.2 maekawa 1671:
1.1.1.3 ! ohara 1672: /* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and
! 1673: that hence the latter should always be used. */
! 1674: #ifndef UDIV_PREINV_ALWAYS
! 1675: #define UDIV_PREINV_ALWAYS 0
! 1676: #endif
! 1677:
1.1.1.2 maekawa 1678: /* Give defaults for UMUL_TIME and UDIV_TIME. */
1679: #ifndef UMUL_TIME
1680: #define UMUL_TIME 1
1681: #endif
1682:
1683: #ifndef UDIV_TIME
1684: #define UDIV_TIME UMUL_TIME
1685: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>