Annotation of OpenXM_contrib/gmp/longlong.h, Revision 1.1.1.2
1.1 maekawa 1: /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2:
1.1.1.2 ! maekawa 3: Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
! 4: Foundation, Inc.
1.1 maekawa 5:
6: This file is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa 7: it under the terms of the GNU Lesser General Public License as published by
! 8: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 9: option) any later version.
10:
11: This file is distributed in the hope that it will be useful, but
12: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa 13: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 14: License for more details.
15:
1.1.1.2 ! maekawa 16: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 17: along with this file; see the file COPYING.LIB. If not, write to
18: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
19: MA 02111-1307, USA. */
20:
21: /* You have to define the following before including this file:
22:
23: UWtype -- An unsigned type, default type for operations (typically a "word")
24: UHWtype -- An unsigned type, at least half the size of UWtype.
25: UDWtype -- An unsigned type, at least twice as large a UWtype
26: W_TYPE_SIZE -- size in bits of UWtype
27:
28: SItype, USItype -- Signed and unsigned 32 bit types.
29: DItype, UDItype -- Signed and unsigned 64 bit types.
30:
31: On a 32 bit machine UWtype should typically be USItype;
32: on a 64 bit machine, UWtype should typically be UDItype.
33: */
34:
35: #define __BITS4 (W_TYPE_SIZE / 4)
36: #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
37: #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
38: #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
39:
40: /* This is used to make sure no undesirable sharing between different libraries
41: that use this file takes place. */
42: #ifndef __MPN
43: #define __MPN(x) __##x
44: #endif
45:
1.1.1.2 ! maekawa 46: #ifndef _PROTO
! 47: #if (__STDC__-0) || defined (__cplusplus)
! 48: #define _PROTO(x) x
! 49: #else
! 50: #define _PROTO(x) ()
! 51: #endif
! 52: #endif
! 53:
1.1 maekawa 54: /* Define auxiliary asm macros.
55:
56: 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
57: UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
58: word product in HIGH_PROD and LOW_PROD.
59:
60: 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
61: UDWtype product. This is just a variant of umul_ppmm.
62:
63: 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
64: denominator) divides a UDWtype, composed by the UWtype integers
65: HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
66: in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
67: than DENOMINATOR for correct operation. If, in addition, the most
68: significant bit of DENOMINATOR must be 1, then the pre-processor symbol
69: UDIV_NEEDS_NORMALIZATION is defined to 1.
70:
71: 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72: denominator). Like udiv_qrnnd but the numbers are signed. The quotient
73: is rounded towards 0.
74:
75: 5) count_leading_zeros(count, x) counts the number of zero-bits from the
76: msb to the first non-zero bit in the UWtype X. This is the number of
77: steps X needs to be shifted left to set the msb. Undefined for X == 0,
78: unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
79:
80: 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
81: from the least significant end.
82:
83: 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
84: high_addend_2, low_addend_2) adds two UWtype integers, composed by
85: HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
86: respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
87: (i.e. carry out) is not stored anywhere, and is lost.
88:
89: 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
90: high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
91: composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
92: LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
93: and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
94: and is lost.
95:
96: If any of these macros are left undefined for a particular CPU,
97: C macros are used. */
98:
99: /* The CPUs come in alphabetical order below.
100:
101: Please add support for more CPUs here, or improve the current support
102: for the CPUs below! */
103:
1.1.1.2 ! maekawa 104: #if defined (__alpha) && W_TYPE_SIZE == 64
! 105: #if defined (__GNUC__)
! 106: #define umul_ppmm(ph, pl, m0, m1) \
! 107: do { \
! 108: UDItype __m0 = (m0), __m1 = (m1); \
! 109: __asm__ ("umulh %r1,%2,%0" \
! 110: : "=r" (ph) \
! 111: : "%rJ" (m0), "rI" (m1)); \
! 112: (pl) = __m0 * __m1; \
! 113: } while (0)
! 114: #define UMUL_TIME 18
! 115: #ifndef LONGLONG_STANDALONE
! 116: #define udiv_qrnnd(q, r, n1, n0, d) \
! 117: do { UDItype __di; \
! 118: __di = __MPN(invert_limb) (d); \
! 119: udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
! 120: } while (0)
! 121: #define UDIV_NEEDS_NORMALIZATION 1
! 122: #define UDIV_TIME 220
! 123: long __MPN(count_leading_zeros) ();
! 124: #define count_leading_zeros(count, x) \
! 125: ((count) = __MPN(count_leading_zeros) (x))
! 126: #endif /* LONGLONG_STANDALONE */
! 127: #else /* ! __GNUC__ */
! 128: #include <machine/builtins.h>
! 129: #define umul_ppmm(ph, pl, m0, m1) \
! 130: do { \
! 131: UDItype __m0 = (m0), __m1 = (m1); \
! 132: (ph) = __UMULH (m0, m1); \
! 133: (pl) = __m0 * __m1; \
! 134: } while (0)
! 135: #endif
! 136: #endif /* __alpha */
! 137:
! 138: #if defined (__hppa) && W_TYPE_SIZE == 64
! 139: /* We put the result pointer parameter last here, since it makes passing
! 140: of the other parameters more efficient. */
! 141: #ifndef LONGLONG_STANDALONE
! 142: #define umul_ppmm(wh, wl, u, v) \
! 143: do { \
! 144: UDItype __p0; \
! 145: (wh) = __MPN(umul_ppmm) (u, v, &__p0); \
! 146: (wl) = __p0; \
! 147: } while (0)
! 148: extern UDItype __MPN(umul_ppmm) _PROTO ((UDItype, UDItype, UDItype *));
! 149: #define udiv_qrnnd(q, r, n1, n0, d) \
! 150: do { UDItype __r; \
! 151: (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \
! 152: (r) = __r; \
! 153: } while (0)
! 154: extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *));
! 155: #define UMUL_TIME 8
! 156: #define UDIV_TIME 60
! 157: #endif /* LONGLONG_STANDALONE */
! 158: #endif /* hppa */
! 159:
! 160: #if defined (__ia64) && W_TYPE_SIZE == 64
! 161: #if defined (__GNUC__)
! 162: #define umul_ppmm(ph, pl, m0, m1) \
! 163: do { \
! 164: UDItype __m0 = (m0), __m1 = (m1); \
! 165: __asm__ ("xma.hu %0 = %1, %2, f0" \
! 166: : "=e" (ph) \
! 167: : "e" (m0), "e" (m1)); \
! 168: (pl) = __m0 * __m1; \
! 169: } while (0)
! 170: #endif
! 171: #endif
! 172:
! 173:
1.1 maekawa 174: #if defined (__GNUC__) && !defined (NO_ASM)
175:
176: /* We sometimes need to clobber "cc" with gcc2, but that would not be
177: understood by gcc1. Use cpp to avoid major code duplication. */
178: #if __GNUC__ < 2
179: #define __CLOBBER_CC
180: #define __AND_CLOBBER_CC
181: #else /* __GNUC__ >= 2 */
182: #define __CLOBBER_CC : "cc"
183: #define __AND_CLOBBER_CC , "cc"
184: #endif /* __GNUC__ < 2 */
185:
186: #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
187: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 188: __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \
! 189: : "=r" (sh), "=&r" (sl) \
! 190: : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
1.1 maekawa 191: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 192: __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \
! 193: : "=r" (sh), "=&r" (sl) \
! 194: : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
1.1 maekawa 195: #define umul_ppmm(xh, xl, m0, m1) \
196: do { \
197: USItype __m0 = (m0), __m1 = (m1); \
198: __asm__ ("multiplu %0,%1,%2" \
1.1.1.2 ! maekawa 199: : "=r" (xl) \
! 200: : "r" (__m0), "r" (__m1)); \
1.1 maekawa 201: __asm__ ("multmu %0,%1,%2" \
1.1.1.2 ! maekawa 202: : "=r" (xh) \
! 203: : "r" (__m0), "r" (__m1)); \
1.1 maekawa 204: } while (0)
205: #define udiv_qrnnd(q, r, n1, n0, d) \
206: __asm__ ("dividu %0,%3,%4" \
1.1.1.2 ! maekawa 207: : "=r" (q), "=q" (r) \
! 208: : "1" (n1), "r" (n0), "r" (d))
1.1 maekawa 209: #define count_leading_zeros(count, x) \
210: __asm__ ("clz %0,%1" \
1.1.1.2 ! maekawa 211: : "=r" (count) \
! 212: : "r" (x))
1.1 maekawa 213: #define COUNT_LEADING_ZEROS_0 32
214: #endif /* __a29k__ */
215:
216: #if defined (__arm__) && W_TYPE_SIZE == 32
217: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 218: __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
! 219: : "=r" (sh), "=&r" (sl) \
! 220: : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
1.1 maekawa 221: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 222: __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
! 223: : "=r" (sh), "=&r" (sl) \
! 224: : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
! 225: #if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
! 226: #define umul_ppmm(xh, xl, a, b) \
! 227: __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
! 228: #define smul_ppmm(xh, xl, a, b) \
! 229: __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
! 230: #define UMUL_TIME 5
! 231: #else
1.1 maekawa 232: #define umul_ppmm(xh, xl, a, b) \
233: __asm__ ("%@ Inlined umul_ppmm
234: mov %|r0, %2, lsr #16
235: mov %|r2, %3, lsr #16
236: bic %|r1, %2, %|r0, lsl #16
237: bic %|r2, %3, %|r2, lsl #16
238: mul %1, %|r1, %|r2
239: mul %|r2, %|r0, %|r2
240: mul %|r1, %0, %|r1
241: mul %0, %|r0, %0
242: adds %|r1, %|r2, %|r1
243: addcs %0, %0, #65536
244: adds %1, %1, %|r1, lsl #16
245: adc %0, %0, %|r1, lsr #16" \
1.1.1.2 ! maekawa 246: : "=&r" (xh), "=r" (xl) \
! 247: : "r" (a), "r" (b) \
1.1 maekawa 248: : "r0", "r1", "r2")
249: #define UMUL_TIME 20
1.1.1.2 ! maekawa 250: #endif
1.1 maekawa 251: #define UDIV_TIME 100
252: #endif /* __arm__ */
253:
254: #if defined (__clipper__) && W_TYPE_SIZE == 32
255: #define umul_ppmm(w1, w0, u, v) \
256: ({union {UDItype __ll; \
257: struct {USItype __l, __h;} __i; \
1.1.1.2 ! maekawa 258: } __x; \
1.1 maekawa 259: __asm__ ("mulwux %2,%0" \
1.1.1.2 ! maekawa 260: : "=r" (__x.__ll) \
! 261: : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
! 262: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 263: #define smul_ppmm(w1, w0, u, v) \
264: ({union {DItype __ll; \
265: struct {SItype __l, __h;} __i; \
1.1.1.2 ! maekawa 266: } __x; \
1.1 maekawa 267: __asm__ ("mulwx %2,%0" \
1.1.1.2 ! maekawa 268: : "=r" (__x.__ll) \
! 269: : "%0" ((SItype)(u)), "r" ((SItype)(v))); \
! 270: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 271: #define __umulsidi3(u, v) \
272: ({UDItype __w; \
273: __asm__ ("mulwux %2,%0" \
1.1.1.2 ! maekawa 274: : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
1.1 maekawa 275: __w; })
276: #endif /* __clipper__ */
277:
1.1.1.2 ! maekawa 278: /* Fujitsu vector computers. */
! 279: #if defined (__uxp__) && W_TYPE_SIZE == 32
! 280: #define umul_ppmm(ph, pl, u, v) \
! 281: do { \
! 282: union {UDItype __ll; \
! 283: struct {USItype __h, __l;} __i; \
! 284: } __x; \
! 285: __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\
! 286: (ph) = __x.__i.__h; \
! 287: (pl) = __x.__i.__l; \
! 288: } while (0)
! 289: #define smul_ppmm(ph, pl, u, v) \
! 290: do { \
! 291: union {UDItype __ll; \
! 292: struct {USItype __h, __l;} __i; \
! 293: } __x; \
! 294: __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \
! 295: (ph) = __x.__i.__h; \
! 296: (pl) = __x.__i.__l; \
! 297: } while (0)
! 298: #endif
! 299:
1.1 maekawa 300: #if defined (__gmicro__) && W_TYPE_SIZE == 32
301: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 302: __asm__ ("add.w %5,%1\n\taddx %3,%0" \
! 303: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
! 304: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 305: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 306: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 307: __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \
! 308: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
! 309: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 310: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 311: #define umul_ppmm(ph, pl, m0, m1) \
312: __asm__ ("mulx %3,%0,%1" \
1.1.1.2 ! maekawa 313: : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \
! 314: : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
1.1 maekawa 315: #define udiv_qrnnd(q, r, nh, nl, d) \
316: __asm__ ("divx %4,%0,%1" \
1.1.1.2 ! maekawa 317: : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \
! 318: : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
1.1 maekawa 319: #define count_leading_zeros(count, x) \
320: __asm__ ("bsch/1 %1,%0" \
1.1.1.2 ! maekawa 321: : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
1.1 maekawa 322: #endif
323:
324: #if defined (__hppa) && W_TYPE_SIZE == 32
325: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 326: __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
! 327: : "=r" (sh), "=&r" (sl) \
! 328: : "%rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
1.1 maekawa 329: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 330: __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
! 331: : "=r" (sh), "=&r" (sl) \
! 332: : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
1.1 maekawa 333: #if defined (_PA_RISC1_1)
334: #define umul_ppmm(wh, wl, u, v) \
335: do { \
336: union {UDItype __ll; \
337: struct {USItype __h, __l;} __i; \
1.1.1.2 ! maekawa 338: } __x; \
! 339: __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \
! 340: (wh) = __x.__i.__h; \
! 341: (wl) = __x.__i.__l; \
1.1 maekawa 342: } while (0)
343: #define UMUL_TIME 8
344: #define UDIV_TIME 60
345: #else
346: #define UMUL_TIME 40
347: #define UDIV_TIME 80
348: #endif
349: #ifndef LONGLONG_STANDALONE
350: #define udiv_qrnnd(q, r, n1, n0, d) \
351: do { USItype __r; \
1.1.1.2 ! maekawa 352: (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
1.1 maekawa 353: (r) = __r; \
354: } while (0)
1.1.1.2 ! maekawa 355: extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype));
1.1 maekawa 356: #endif /* LONGLONG_STANDALONE */
357: #define count_leading_zeros(count, x) \
358: do { \
359: USItype __tmp; \
360: __asm__ ( \
361: "ldi 1,%0
362: extru,= %1,15,16,%%r0 ; Bits 31..16 zero?
363: extru,tr %1,15,16,%1 ; No. Shift down, skip add.
364: ldo 16(%0),%0 ; Yes. Perform add.
365: extru,= %1,23,8,%%r0 ; Bits 15..8 zero?
366: extru,tr %1,23,8,%1 ; No. Shift down, skip add.
367: ldo 8(%0),%0 ; Yes. Perform add.
368: extru,= %1,27,4,%%r0 ; Bits 7..4 zero?
369: extru,tr %1,27,4,%1 ; No. Shift down, skip add.
370: ldo 4(%0),%0 ; Yes. Perform add.
371: extru,= %1,29,2,%%r0 ; Bits 3..2 zero?
372: extru,tr %1,29,2,%1 ; No. Shift down, skip add.
373: ldo 2(%0),%0 ; Yes. Perform add.
374: extru %1,30,1,%1 ; Extract bit 1.
375: sub %0,%1,%0 ; Subtract it.
376: " : "=r" (count), "=r" (__tmp) : "1" (x)); \
377: } while (0)
378: #endif /* hppa */
379:
380: #if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
381: #define smul_ppmm(xh, xl, m0, m1) \
382: do { \
383: union {DItype __ll; \
384: struct {USItype __h, __l;} __i; \
1.1.1.2 ! maekawa 385: } __x; \
1.1 maekawa 386: __asm__ ("mr %0,%3" \
1.1.1.2 ! maekawa 387: : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
! 388: : "%1" (m0), "r" (m1)); \
! 389: (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1.1 maekawa 390: } while (0)
391: #define sdiv_qrnnd(q, r, n1, n0, d) \
392: do { \
393: union {DItype __ll; \
394: struct {USItype __h, __l;} __i; \
1.1.1.2 ! maekawa 395: } __x; \
! 396: __x.__i.__h = n1; __x.__i.__l = n0; \
1.1 maekawa 397: __asm__ ("dr %0,%2" \
1.1.1.2 ! maekawa 398: : "=r" (__x.__ll) \
! 399: : "0" (__x.__ll), "r" (d)); \
! 400: (q) = __x.__i.__l; (r) = __x.__i.__h; \
1.1 maekawa 401: } while (0)
402: #endif
403:
404: #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
405: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 406: __asm__ ("addl %5,%1\n\tadcl %3,%0" \
! 407: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
! 408: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 409: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 410: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 411: __asm__ ("subl %5,%1\n\tsbbl %3,%0" \
! 412: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
! 413: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 414: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 415: #define umul_ppmm(w1, w0, u, v) \
416: __asm__ ("mull %3" \
1.1.1.2 ! maekawa 417: : "=a" (w0), "=d" (w1) \
! 418: : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
1.1 maekawa 419: #define udiv_qrnnd(q, r, n1, n0, d) \
420: __asm__ ("divl %4" \
1.1.1.2 ! maekawa 421: : "=a" (q), "=d" (r) \
! 422: : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(d)))
1.1 maekawa 423: #define count_leading_zeros(count, x) \
424: do { \
425: USItype __cbtmp; \
1.1.1.2 ! maekawa 426: __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
1.1 maekawa 427: (count) = __cbtmp ^ 31; \
428: } while (0)
429: #define count_trailing_zeros(count, x) \
430: __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
431: #ifndef UMUL_TIME
1.1.1.2 ! maekawa 432: #define UMUL_TIME 10
1.1 maekawa 433: #endif
434: #ifndef UDIV_TIME
435: #define UDIV_TIME 40
436: #endif
437: #endif /* 80x86 */
438:
439: #if defined (__i860__) && W_TYPE_SIZE == 32
440: #define rshift_rhlc(r,h,l,c) \
441: __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \
442: "=r" (r) : "r" (h), "r" (l), "rn" (c))
443: #endif /* i860 */
444:
445: #if defined (__i960__) && W_TYPE_SIZE == 32
446: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
447: __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \
1.1.1.2 ! maekawa 448: : "=r" (sh), "=&r" (sl) \
! 449: : "%dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
1.1 maekawa 450: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
451: __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \
1.1.1.2 ! maekawa 452: : "=r" (sh), "=&r" (sl) \
! 453: : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
1.1 maekawa 454: #define umul_ppmm(w1, w0, u, v) \
455: ({union {UDItype __ll; \
456: struct {USItype __l, __h;} __i; \
1.1.1.2 ! maekawa 457: } __x; \
! 458: __asm__ ("emul %2,%1,%0" \
! 459: : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \
! 460: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 461: #define __umulsidi3(u, v) \
462: ({UDItype __w; \
1.1.1.2 ! maekawa 463: __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \
1.1 maekawa 464: __w; })
465: #define udiv_qrnnd(q, r, nh, nl, d) \
466: do { \
467: union {UDItype __ll; \
468: struct {USItype __l, __h;} __i; \
469: } __nn; \
470: __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
471: __asm__ ("ediv %d,%n,%0" \
1.1.1.2 ! maekawa 472: : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
1.1 maekawa 473: (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
474: } while (0)
475: #define count_leading_zeros(count, x) \
476: do { \
477: USItype __cbtmp; \
1.1.1.2 ! maekawa 478: __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \
1.1 maekawa 479: (count) = __cbtmp ^ 31; \
480: } while (0)
481: #define COUNT_LEADING_ZEROS_0 (-32) /* sic */
482: #if defined (__i960mx) /* what is the proper symbol to test??? */
483: #define rshift_rhlc(r,h,l,c) \
484: do { \
485: union {UDItype __ll; \
486: struct {USItype __l, __h;} __i; \
487: } __nn; \
488: __nn.__i.__h = (h); __nn.__i.__l = (l); \
1.1.1.2 ! maekawa 489: __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
1.1 maekawa 490: }
491: #endif /* i960mx */
492: #endif /* i960 */
493:
1.1.1.2 ! maekawa 494: #if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
! 495: || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
! 496: || defined (__mc5307__)) && W_TYPE_SIZE == 32
1.1 maekawa 497: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 498: __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
! 499: : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
! 500: : "%0" ((USItype)(ah)), "d" ((USItype)(bh)), \
! 501: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 502: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 503: __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
! 504: : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
! 505: : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \
! 506: "1" ((USItype)(al)), "g" ((USItype)(bl)))
! 507: /* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */
! 508: #if defined (__mc68020__) || defined(mc68020) \
! 509: || defined (__mc68030__) || defined (mc68030) \
! 510: || defined (__mc68040__) || defined (mc68040) \
! 511: || defined (__mc68332__) || defined (mc68332) \
! 512: || defined (__NeXT__)
1.1 maekawa 513: #define umul_ppmm(w1, w0, u, v) \
514: __asm__ ("mulu%.l %3,%1:%0" \
1.1.1.2 ! maekawa 515: : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \
! 516: : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
1.1 maekawa 517: #define UMUL_TIME 45
518: #define udiv_qrnnd(q, r, n1, n0, d) \
519: __asm__ ("divu%.l %4,%1:%0" \
1.1.1.2 ! maekawa 520: : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
! 521: : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
1.1 maekawa 522: #define UDIV_TIME 90
523: #define sdiv_qrnnd(q, r, n1, n0, d) \
524: __asm__ ("divs%.l %4,%1:%0" \
1.1.1.2 ! maekawa 525: : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
! 526: : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
! 527: #else /* for other 68k family members use 16x16->32 multiplication */
1.1 maekawa 528: #define umul_ppmm(xh, xl, a, b) \
529: do { USItype __umul_tmp1, __umul_tmp2; \
530: __asm__ ("| Inlined umul_ppmm
531: move%.l %5,%3
532: move%.l %2,%0
533: move%.w %3,%1
534: swap %3
535: swap %0
1.1.1.2 ! maekawa 536: mulu%.w %2,%1
! 537: mulu%.w %3,%0
! 538: mulu%.w %2,%3
1.1 maekawa 539: swap %2
1.1.1.2 ! maekawa 540: mulu%.w %5,%2
1.1 maekawa 541: add%.l %3,%2
542: jcc 1f
543: add%.l %#0x10000,%0
544: 1: move%.l %2,%3
545: clr%.w %2
546: swap %2
547: swap %3
548: clr%.w %3
549: add%.l %3,%1
550: addx%.l %2,%0
551: | End inlined umul_ppmm" \
552: : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
553: "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
554: : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
555: } while (0)
556: #define UMUL_TIME 100
557: #define UDIV_TIME 400
558: #endif /* not mc68020 */
1.1.1.2 ! maekawa 559: /* The '020, '030, '040 and '060 have bitfield insns. */
! 560: #if defined (__mc68020__) || defined (mc68020) \
! 561: || defined (__mc68030__) || defined (mc68030) \
! 562: || defined (__mc68040__) || defined (mc68040) \
! 563: || defined (__mc68060__) || defined (mc68060) \
! 564: || defined (__NeXT__)
! 565: #define count_leading_zeros(count, x) \
! 566: __asm__ ("bfffo %1{%b2:%b2},%0" \
! 567: : "=d" ((USItype) (count)) \
! 568: : "od" ((USItype) (x)), "n" (0))
! 569: #define COUNT_LEADING_ZEROS_0 32
! 570: #endif
1.1 maekawa 571: #endif /* mc68000 */
572:
573: #if defined (__m88000__) && W_TYPE_SIZE == 32
574: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 575: __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
! 576: : "=r" (sh), "=&r" (sl) \
! 577: : "%rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
1.1 maekawa 578: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 579: __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
! 580: : "=r" (sh), "=&r" (sl) \
! 581: : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
1.1 maekawa 582: #define count_leading_zeros(count, x) \
583: do { \
584: USItype __cbtmp; \
1.1.1.2 ! maekawa 585: __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \
1.1 maekawa 586: (count) = __cbtmp ^ 31; \
587: } while (0)
588: #define COUNT_LEADING_ZEROS_0 63 /* sic */
589: #if defined (__m88110__)
590: #define umul_ppmm(wh, wl, u, v) \
591: do { \
592: union {UDItype __ll; \
593: struct {USItype __h, __l;} __i; \
1.1.1.2 ! maekawa 594: } __x; \
! 595: __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
! 596: (wh) = __x.__i.__h; \
! 597: (wl) = __x.__i.__l; \
1.1 maekawa 598: } while (0)
599: #define udiv_qrnnd(q, r, n1, n0, d) \
600: ({union {UDItype __ll; \
601: struct {USItype __h, __l;} __i; \
1.1.1.2 ! maekawa 602: } __x, __q; \
! 603: __x.__i.__h = (n1); __x.__i.__l = (n0); \
1.1 maekawa 604: __asm__ ("divu.d %0,%1,%2" \
1.1.1.2 ! maekawa 605: : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
! 606: (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
1.1 maekawa 607: #define UMUL_TIME 5
608: #define UDIV_TIME 25
609: #else
610: #define UMUL_TIME 17
611: #define UDIV_TIME 150
612: #endif /* __m88110__ */
613: #endif /* __m88000__ */
614:
1.1.1.2 ! maekawa 615: #if defined (__mips) && W_TYPE_SIZE == 32
1.1 maekawa 616: #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
617: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 618: __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
1.1 maekawa 619: #else
620: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 621: __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \
! 622: : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
1.1 maekawa 623: #endif
624: #define UMUL_TIME 10
625: #define UDIV_TIME 100
1.1.1.2 ! maekawa 626: #endif /* __mips */
1.1 maekawa 627:
628: #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
629: #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
630: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 631: __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
1.1 maekawa 632: #else
633: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 634: __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \
! 635: : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
1.1 maekawa 636: #endif
637: #define UMUL_TIME 20
638: #define UDIV_TIME 140
1.1.1.2 ! maekawa 639: #endif /* __mips */
1.1 maekawa 640:
641: #if defined (__ns32000__) && W_TYPE_SIZE == 32
642: #define umul_ppmm(w1, w0, u, v) \
643: ({union {UDItype __ll; \
644: struct {USItype __l, __h;} __i; \
1.1.1.2 ! maekawa 645: } __x; \
1.1 maekawa 646: __asm__ ("meid %2,%0" \
1.1.1.2 ! maekawa 647: : "=g" (__x.__ll) \
! 648: : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
! 649: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 650: #define __umulsidi3(u, v) \
651: ({UDItype __w; \
652: __asm__ ("meid %2,%0" \
653: : "=g" (__w) \
1.1.1.2 ! maekawa 654: : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
1.1 maekawa 655: __w; })
656: #define udiv_qrnnd(q, r, n1, n0, d) \
657: ({union {UDItype __ll; \
658: struct {USItype __l, __h;} __i; \
1.1.1.2 ! maekawa 659: } __x; \
! 660: __x.__i.__h = (n1); __x.__i.__l = (n0); \
1.1 maekawa 661: __asm__ ("deid %2,%0" \
1.1.1.2 ! maekawa 662: : "=g" (__x.__ll) \
! 663: : "0" (__x.__ll), "g" ((USItype)(d))); \
! 664: (r) = __x.__i.__l; (q) = __x.__i.__h; })
1.1 maekawa 665: #define count_trailing_zeros(count,x) \
1.1.1.2 ! maekawa 666: do { \
1.1 maekawa 667: __asm__ ("ffsd %2,%0" \
668: : "=r" ((USItype) (count)) \
1.1.1.2 ! maekawa 669: : "0" ((USItype) 0), "r" ((USItype) (x))); \
1.1 maekawa 670: } while (0)
671: #endif /* __ns32000__ */
672:
1.1.1.2 ! maekawa 673: /* We should test _IBMR2 here when we add assembly support for the system
! 674: vendor compilers. */
! 675: #if (defined (_ARCH_PPC) || defined (_ARCH_PWR) || defined (__powerpc__)) && W_TYPE_SIZE == 32
1.1 maekawa 676: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
677: do { \
678: if (__builtin_constant_p (bh) && (bh) == 0) \
679: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
1.1.1.2 ! maekawa 680: : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
! 681: else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
1.1 maekawa 682: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
1.1.1.2 ! maekawa 683: : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
1.1 maekawa 684: else \
685: __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
1.1.1.2 ! maekawa 686: : "=r" (sh), "=&r" (sl) \
! 687: : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
1.1 maekawa 688: } while (0)
689: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
690: do { \
691: if (__builtin_constant_p (ah) && (ah) == 0) \
692: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
1.1.1.2 ! maekawa 693: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
! 694: else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
1.1 maekawa 695: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
1.1.1.2 ! maekawa 696: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1.1 maekawa 697: else if (__builtin_constant_p (bh) && (bh) == 0) \
698: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
1.1.1.2 ! maekawa 699: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
! 700: else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
1.1 maekawa 701: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
1.1.1.2 ! maekawa 702: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1.1 maekawa 703: else \
704: __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
1.1.1.2 ! maekawa 705: : "=r" (sh), "=&r" (sl) \
! 706: : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
1.1 maekawa 707: } while (0)
708: #define count_leading_zeros(count, x) \
1.1.1.2 ! maekawa 709: __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
1.1 maekawa 710: #define COUNT_LEADING_ZEROS_0 32
1.1.1.2 ! maekawa 711: #if defined (_ARCH_PPC) || defined (__powerpc__)
1.1 maekawa 712: #define umul_ppmm(ph, pl, m0, m1) \
713: do { \
714: USItype __m0 = (m0), __m1 = (m1); \
1.1.1.2 ! maekawa 715: __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1.1 maekawa 716: (pl) = __m0 * __m1; \
717: } while (0)
718: #define UMUL_TIME 15
719: #define smul_ppmm(ph, pl, m0, m1) \
720: do { \
721: SItype __m0 = (m0), __m1 = (m1); \
1.1.1.2 ! maekawa 722: __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1.1 maekawa 723: (pl) = __m0 * __m1; \
724: } while (0)
725: #define SMUL_TIME 14
726: #define UDIV_TIME 120
727: #else
728: #define UMUL_TIME 8
729: #define smul_ppmm(xh, xl, m0, m1) \
1.1.1.2 ! maekawa 730: __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
1.1 maekawa 731: #define SMUL_TIME 4
732: #define sdiv_qrnnd(q, r, nh, nl, d) \
1.1.1.2 ! maekawa 733: __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
1.1 maekawa 734: #define UDIV_TIME 100
735: #endif
1.1.1.2 ! maekawa 736: #endif /* 32-bit POWER architecture variants. */
! 737:
! 738: /* We should test _IBMR2 here when we add assembly support for the system
! 739: vendor compilers. */
! 740: #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
! 741: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! 742: do { \
! 743: if (__builtin_constant_p (bh) && (bh) == 0) \
! 744: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
! 745: : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
! 746: else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
! 747: __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
! 748: : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
! 749: else \
! 750: __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
! 751: : "=r" (sh), "=&r" (sl) \
! 752: : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
! 753: } while (0)
! 754: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! 755: do { \
! 756: if (__builtin_constant_p (ah) && (ah) == 0) \
! 757: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
! 758: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
! 759: else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
! 760: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
! 761: : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
! 762: else if (__builtin_constant_p (bh) && (bh) == 0) \
! 763: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
! 764: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
! 765: else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
! 766: __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
! 767: : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
! 768: else \
! 769: __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
! 770: : "=r" (sh), "=&r" (sl) \
! 771: : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
! 772: } while (0)
! 773: #define count_leading_zeros(count, x) \
! 774: __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
! 775: #define COUNT_LEADING_ZEROS_0 64
! 776: #define umul_ppmm(ph, pl, m0, m1) \
! 777: do { \
! 778: UDItype __m0 = (m0), __m1 = (m1); \
! 779: __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
! 780: (pl) = __m0 * __m1; \
! 781: } while (0)
! 782: #define UMUL_TIME 15
! 783: #define smul_ppmm(ph, pl, m0, m1) \
! 784: do { \
! 785: DItype __m0 = (m0), __m1 = (m1); \
! 786: __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
! 787: (pl) = __m0 * __m1; \
! 788: } while (0)
! 789: #define SMUL_TIME 14 /* ??? */
! 790: #define UDIV_TIME 120 /* ??? */
! 791: #endif /* 64-bit PowerPC. */
1.1 maekawa 792:
793: #if defined (__pyr__) && W_TYPE_SIZE == 32
794: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 795: __asm__ ("addw %5,%1\n\taddwc %3,%0" \
! 796: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
! 797: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 798: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 799: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 800: __asm__ ("subw %5,%1\n\tsubwb %3,%0" \
! 801: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
! 802: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 803: "1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 804: /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
805: #define umul_ppmm(w1, w0, u, v) \
806: ({union {UDItype __ll; \
807: struct {USItype __h, __l;} __i; \
1.1.1.2 ! maekawa 808: } __x; \
! 809: __asm__ ("movw %1,%R0\n\tuemul %2,%0" \
! 810: : "=&r" (__x.__ll) \
! 811: : "g" ((USItype) (u)), "g" ((USItype)(v))); \
! 812: (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1.1 maekawa 813: #endif /* __pyr__ */
814:
815: #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
816: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 817: __asm__ ("a %1,%5\n\tae %0,%3" \
! 818: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
! 819: : "%0" ((USItype)(ah)), "r" ((USItype)(bh)), \
! 820: "%1" ((USItype)(al)), "r" ((USItype)(bl)))
1.1 maekawa 821: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 822: __asm__ ("s %1,%5\n\tse %0,%3" \
! 823: : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
! 824: : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
! 825: "1" ((USItype)(al)), "r" ((USItype)(bl)))
! 826: #define smul_ppmm(ph, pl, m0, m1) \
! 827: __asm__ ( \
1.1 maekawa 828: "s r2,r2
1.1.1.2 ! maekawa 829: mts r10,%2
1.1 maekawa 830: m r2,%3
831: m r2,%3
832: m r2,%3
833: m r2,%3
834: m r2,%3
835: m r2,%3
836: m r2,%3
837: m r2,%3
838: m r2,%3
839: m r2,%3
840: m r2,%3
841: m r2,%3
842: m r2,%3
843: m r2,%3
844: m r2,%3
845: m r2,%3
846: cas %0,r2,r0
847: mfs r10,%1" \
1.1.1.2 ! maekawa 848: : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \
! 849: : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
! 850: : "r2"); \
1.1 maekawa 851: #define UMUL_TIME 20
852: #define UDIV_TIME 200
853: #define count_leading_zeros(count, x) \
854: do { \
855: if ((x) >= 0x10000) \
856: __asm__ ("clz %0,%1" \
1.1.1.2 ! maekawa 857: : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \
1.1 maekawa 858: else \
859: { \
860: __asm__ ("clz %0,%1" \
1.1.1.2 ! maekawa 861: : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \
1.1 maekawa 862: (count) += 16; \
863: } \
864: } while (0)
865: #endif /* RT/ROMP */
866:
867: #if defined (__sh2__) && W_TYPE_SIZE == 32
868: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 869: __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
! 870: : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
1.1 maekawa 871: #define UMUL_TIME 5
872: #endif
873:
874: #if defined (__sparc__) && W_TYPE_SIZE == 32
875: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 876: __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
! 877: : "=r" (sh), "=&r" (sl) \
! 878: : "%rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \
1.1 maekawa 879: __CLOBBER_CC)
880: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 881: __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
! 882: : "=r" (sh), "=&r" (sl) \
! 883: : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
1.1 maekawa 884: __CLOBBER_CC)
1.1.1.2 ! maekawa 885: #if defined (__sparc_v9__) || defined (__sparcv9)
! 886: /* Perhaps we should use floating-point operations here? */
! 887: #if 0
! 888: /* Triggers a bug making mpz/tests/t-gcd.c fail.
! 889: Perhaps we simply need explicitly zero-extend the inputs? */
! 890: #define umul_ppmm(w1, w0, u, v) \
! 891: __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \
! 892: "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
! 893: #else
! 894: /* Use v8 umul until above bug is fixed. */
! 895: #define umul_ppmm(w1, w0, u, v) \
! 896: __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
! 897: #endif
! 898: /* Use a plain v8 divide for v9. */
! 899: #define udiv_qrnnd(q, r, n1, n0, d) \
! 900: do { \
! 901: USItype __q; \
! 902: __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
! 903: : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
! 904: (r) = (n0) - __q * (d); \
! 905: (q) = __q; \
! 906: } while (0)
! 907: #else
1.1 maekawa 908: #if defined (__sparc_v8__)
909: /* Don't match immediate range because, 1) it is not often useful,
910: 2) the 'I' flag thinks of the range as a 13 bit signed interval,
911: while we want to match a 13 bit interval, sign extended to 32 bits,
912: but INTERPRETED AS UNSIGNED. */
913: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 914: __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1.1 maekawa 915: #define UMUL_TIME 5
916: #ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
917: #define udiv_qrnnd(q, r, n1, n0, d) \
918: do { \
919: USItype __q; \
920: __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1.1.1.2 ! maekawa 921: : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
1.1 maekawa 922: (r) = (n0) - __q * (d); \
923: (q) = __q; \
924: } while (0)
925: #define UDIV_TIME 25
1.1.1.2 ! maekawa 926: #else
! 927: #define UDIV_TIME 60 /* SuperSPARC timing */
1.1 maekawa 928: #endif /* SUPERSPARC */
929: #else /* ! __sparc_v8__ */
930: #if defined (__sparclite__)
931: /* This has hardware multiply but not divide. It also has two additional
932: instructions scan (ffs from high bit) and divscc. */
933: #define umul_ppmm(w1, w0, u, v) \
1.1.1.2 ! maekawa 934: __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1.1 maekawa 935: #define UMUL_TIME 5
936: #define udiv_qrnnd(q, r, n1, n0, d) \
937: __asm__ ("! Inlined udiv_qrnnd
938: wr %%g0,%2,%%y ! Not a delayed write for sparclite
939: tst %%g0
940: divscc %3,%4,%%g1
941: divscc %%g1,%4,%%g1
942: divscc %%g1,%4,%%g1
943: divscc %%g1,%4,%%g1
944: divscc %%g1,%4,%%g1
945: divscc %%g1,%4,%%g1
946: divscc %%g1,%4,%%g1
947: divscc %%g1,%4,%%g1
948: divscc %%g1,%4,%%g1
949: divscc %%g1,%4,%%g1
950: divscc %%g1,%4,%%g1
951: divscc %%g1,%4,%%g1
952: divscc %%g1,%4,%%g1
953: divscc %%g1,%4,%%g1
954: divscc %%g1,%4,%%g1
955: divscc %%g1,%4,%%g1
956: divscc %%g1,%4,%%g1
957: divscc %%g1,%4,%%g1
958: divscc %%g1,%4,%%g1
959: divscc %%g1,%4,%%g1
960: divscc %%g1,%4,%%g1
961: divscc %%g1,%4,%%g1
962: divscc %%g1,%4,%%g1
963: divscc %%g1,%4,%%g1
964: divscc %%g1,%4,%%g1
965: divscc %%g1,%4,%%g1
966: divscc %%g1,%4,%%g1
967: divscc %%g1,%4,%%g1
968: divscc %%g1,%4,%%g1
969: divscc %%g1,%4,%%g1
970: divscc %%g1,%4,%%g1
971: divscc %%g1,%4,%0
972: rd %%y,%1
973: bl,a 1f
974: add %1,%4,%1
975: 1: ! End of inline udiv_qrnnd" \
1.1.1.2 ! maekawa 976: : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d)
1.1 maekawa 977: : "%g1" __AND_CLOBBER_CC)
978: #define UDIV_TIME 37
979: #define count_leading_zeros(count, x) \
1.1.1.2 ! maekawa 980: __asm__ ("scan %1,0,%0" : "=r" (x) : "r" (count))
1.1 maekawa 981: /* Early sparclites return 63 for an argument of 0, but they warn that future
982: implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
983: undefined. */
984: #endif /* __sparclite__ */
985: #endif /* __sparc_v8__ */
1.1.1.2 ! maekawa 986: #endif /* __sparc_v9__ */
1.1 maekawa 987: /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
988: #ifndef umul_ppmm
989: #define umul_ppmm(w1, w0, u, v) \
990: __asm__ ("! Inlined umul_ppmm
991: wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
992: sra %3,31,%%g2 ! Don't move this insn
993: and %2,%%g2,%%g2 ! Don't move this insn
994: andcc %%g0,0,%%g1 ! Don't move this insn
995: mulscc %%g1,%3,%%g1
996: mulscc %%g1,%3,%%g1
997: mulscc %%g1,%3,%%g1
998: mulscc %%g1,%3,%%g1
999: mulscc %%g1,%3,%%g1
1000: mulscc %%g1,%3,%%g1
1001: mulscc %%g1,%3,%%g1
1002: mulscc %%g1,%3,%%g1
1003: mulscc %%g1,%3,%%g1
1004: mulscc %%g1,%3,%%g1
1005: mulscc %%g1,%3,%%g1
1006: mulscc %%g1,%3,%%g1
1007: mulscc %%g1,%3,%%g1
1008: mulscc %%g1,%3,%%g1
1009: mulscc %%g1,%3,%%g1
1010: mulscc %%g1,%3,%%g1
1011: mulscc %%g1,%3,%%g1
1012: mulscc %%g1,%3,%%g1
1013: mulscc %%g1,%3,%%g1
1014: mulscc %%g1,%3,%%g1
1015: mulscc %%g1,%3,%%g1
1016: mulscc %%g1,%3,%%g1
1017: mulscc %%g1,%3,%%g1
1018: mulscc %%g1,%3,%%g1
1019: mulscc %%g1,%3,%%g1
1020: mulscc %%g1,%3,%%g1
1021: mulscc %%g1,%3,%%g1
1022: mulscc %%g1,%3,%%g1
1023: mulscc %%g1,%3,%%g1
1024: mulscc %%g1,%3,%%g1
1025: mulscc %%g1,%3,%%g1
1026: mulscc %%g1,%3,%%g1
1027: mulscc %%g1,0,%%g1
1028: add %%g1,%%g2,%0
1029: rd %%y,%1" \
1.1.1.2 ! maekawa 1030: : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
1.1 maekawa 1031: : "%g1", "%g2" __AND_CLOBBER_CC)
1032: #define UMUL_TIME 39 /* 39 instructions */
1033: #endif
1034: #ifndef udiv_qrnnd
1035: #ifndef LONGLONG_STANDALONE
1036: #define udiv_qrnnd(q, r, n1, n0, d) \
1037: do { USItype __r; \
1.1.1.2 ! maekawa 1038: (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
1.1 maekawa 1039: (r) = __r; \
1040: } while (0)
1.1.1.2 ! maekawa 1041: extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype));
! 1042: #ifndef UDIV_TIME
1.1 maekawa 1043: #define UDIV_TIME 140
1.1.1.2 ! maekawa 1044: #endif
1.1 maekawa 1045: #endif /* LONGLONG_STANDALONE */
1046: #endif /* udiv_qrnnd */
1047: #endif /* __sparc__ */
1048:
1049: #if defined (__vax__) && W_TYPE_SIZE == 32
1050: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 1051: __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
! 1052: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
! 1053: : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 1054: "%1" ((USItype)(al)), "g" ((USItype)(bl)))
1.1 maekawa 1055: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1.1.1.2 ! maekawa 1056: __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
! 1057: : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
! 1058: : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
! 1059: "1" ((USItype)(al)), "g" ((USItype)(bl)))
! 1060: #define smul_ppmm(xh, xl, m0, m1) \
1.1 maekawa 1061: do { \
1062: union {UDItype __ll; \
1063: struct {USItype __l, __h;} __i; \
1.1.1.2 ! maekawa 1064: } __x; \
1.1 maekawa 1065: USItype __m0 = (m0), __m1 = (m1); \
1066: __asm__ ("emul %1,%2,$0,%0" \
1.1.1.2 ! maekawa 1067: : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \
! 1068: (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1.1 maekawa 1069: } while (0)
1070: #define sdiv_qrnnd(q, r, n1, n0, d) \
1071: do { \
1072: union {DItype __ll; \
1073: struct {SItype __l, __h;} __i; \
1.1.1.2 ! maekawa 1074: } __x; \
! 1075: __x.__i.__h = n1; __x.__i.__l = n0; \
1.1 maekawa 1076: __asm__ ("ediv %3,%2,%0,%1" \
1.1.1.2 ! maekawa 1077: : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
1.1 maekawa 1078: } while (0)
1079: #endif /* __vax__ */
1080:
1081: #if defined (__z8000__) && W_TYPE_SIZE == 16
1082: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1083: __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1.1.1.2 ! maekawa 1084: : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
! 1085: : "%0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
! 1086: "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
1.1 maekawa 1087: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1088: __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1.1.1.2 ! maekawa 1089: : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
! 1090: : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
! 1091: "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
1.1 maekawa 1092: #define umul_ppmm(xh, xl, m0, m1) \
1093: do { \
1094: union {long int __ll; \
1095: struct {unsigned int __h, __l;} __i; \
1.1.1.2 ! maekawa 1096: } __x; \
1.1 maekawa 1097: unsigned int __m0 = (m0), __m1 = (m1); \
1098: __asm__ ("mult %S0,%H3" \
1.1.1.2 ! maekawa 1099: : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
! 1100: : "%1" (m0), "rQR" (m1)); \
! 1101: (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1.1 maekawa 1102: (xh) += ((((signed int) __m0 >> 15) & __m1) \
1103: + (((signed int) __m1 >> 15) & __m0)); \
1104: } while (0)
1105: #endif /* __z8000__ */
1106:
1107: #endif /* __GNUC__ */
1108:
1109:
1110: #if !defined (umul_ppmm) && defined (__umulsidi3)
1111: #define umul_ppmm(ph, pl, m0, m1) \
1112: { \
1113: UDWtype __ll = __umulsidi3 (m0, m1); \
1114: ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1115: pl = (UWtype) __ll; \
1116: }
1117: #endif
1118:
1119: #if !defined (__umulsidi3)
1120: #define __umulsidi3(u, v) \
1121: ({UWtype __hi, __lo; \
1122: umul_ppmm (__hi, __lo, u, v); \
1123: ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1124: #endif
1125:
1.1.1.2 ! maekawa 1126:
! 1127: /* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA
! 1128: versions above are different and we don't want to conflict. */
! 1129:
! 1130: #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm
! 1131: #define mpn_umul_ppmm __MPN(umul_ppmm)
! 1132: extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
! 1133: #define umul_ppmm(wh, wl, u, v) \
! 1134: do { \
! 1135: mp_limb_t __umul_ppmm__p0; \
! 1136: (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \
! 1137: (mp_limb_t) (u), (mp_limb_t) (v)); \
! 1138: (wl) = __umul_ppmm__p0; \
! 1139: } while (0)
! 1140: #endif
! 1141:
! 1142: #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd
! 1143: #define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
! 1144: extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *,
! 1145: mp_limb_t, mp_limb_t, mp_limb_t));
! 1146: #define udiv_qrnnd(q, r, n1, n0, d) \
! 1147: do { \
! 1148: mp_limb_t __udiv_qrnnd__r; \
! 1149: (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \
! 1150: (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \
! 1151: (r) = __udiv_qrnnd__r; \
! 1152: } while (0)
! 1153: #endif
! 1154:
! 1155:
1.1 maekawa 1156: /* If this machine has no inline assembler, use C macros. */
1157:
1158: #if !defined (add_ssaaaa)
1159: #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1160: do { \
1161: UWtype __x; \
1162: __x = (al) + (bl); \
1163: (sh) = (ah) + (bh) + (__x < (al)); \
1164: (sl) = __x; \
1165: } while (0)
1166: #endif
1167:
1168: #if !defined (sub_ddmmss)
1169: #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1170: do { \
1171: UWtype __x; \
1172: __x = (al) - (bl); \
1173: (sh) = (ah) - (bh) - (__x > (al)); \
1174: (sl) = __x; \
1175: } while (0)
1176: #endif
1177:
1.1.1.2 ! maekawa 1178: /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
! 1179: smul_ppmm. */
! 1180: #if !defined (umul_ppmm) && defined (smul_ppmm)
! 1181: #define umul_ppmm(w1, w0, u, v) \
! 1182: do { \
! 1183: UWtype __w1; \
! 1184: UWtype __xm0 = (u), __xm1 = (v); \
! 1185: smul_ppmm (__w1, w0, __xm0, __xm1); \
! 1186: (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
! 1187: + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
! 1188: } while (0)
! 1189: #endif
! 1190:
! 1191: /* If we still don't have umul_ppmm, define it using plain C. */
1.1 maekawa 1192: #if !defined (umul_ppmm)
1193: #define umul_ppmm(w1, w0, u, v) \
1194: do { \
1195: UWtype __x0, __x1, __x2, __x3; \
1196: UHWtype __ul, __vl, __uh, __vh; \
1197: UWtype __u = (u), __v = (v); \
1198: \
1199: __ul = __ll_lowpart (__u); \
1200: __uh = __ll_highpart (__u); \
1201: __vl = __ll_lowpart (__v); \
1202: __vh = __ll_highpart (__v); \
1203: \
1204: __x0 = (UWtype) __ul * __vl; \
1205: __x1 = (UWtype) __ul * __vh; \
1206: __x2 = (UWtype) __uh * __vl; \
1207: __x3 = (UWtype) __uh * __vh; \
1208: \
1209: __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1210: __x1 += __x2; /* but this indeed can */ \
1211: if (__x1 < __x2) /* did we get it? */ \
1212: __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1213: \
1214: (w1) = __x3 + __ll_highpart (__x1); \
1.1.1.2 ! maekawa 1215: (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \
1.1 maekawa 1216: } while (0)
1217: #endif
1218:
1.1.1.2 ! maekawa 1219: /* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
! 1220: exist in one form or another. */
! 1221: #if !defined (smul_ppmm)
1.1 maekawa 1222: #define smul_ppmm(w1, w0, u, v) \
1223: do { \
1224: UWtype __w1; \
1.1.1.2 ! maekawa 1225: UWtype __xm0 = (u), __xm1 = (v); \
! 1226: umul_ppmm (__w1, w0, __xm0, __xm1); \
! 1227: (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
! 1228: - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1.1 maekawa 1229: } while (0)
1230: #endif
1231:
1232: /* Define this unconditionally, so it can be used for debugging. */
1233: #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1234: do { \
1235: UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1236: __d1 = __ll_highpart (d); \
1237: __d0 = __ll_lowpart (d); \
1238: \
1239: __q1 = (n1) / __d1; \
1.1.1.2 ! maekawa 1240: __r1 = (n1) - __q1 * __d1; \
1.1 maekawa 1241: __m = (UWtype) __q1 * __d0; \
1242: __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1243: if (__r1 < __m) \
1244: { \
1245: __q1--, __r1 += (d); \
1246: if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1247: if (__r1 < __m) \
1248: __q1--, __r1 += (d); \
1249: } \
1250: __r1 -= __m; \
1251: \
1252: __q0 = __r1 / __d1; \
1.1.1.2 ! maekawa 1253: __r0 = __r1 - __q0 * __d1; \
1.1 maekawa 1254: __m = (UWtype) __q0 * __d0; \
1255: __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1256: if (__r0 < __m) \
1257: { \
1258: __q0--, __r0 += (d); \
1259: if (__r0 >= (d)) \
1260: if (__r0 < __m) \
1261: __q0--, __r0 += (d); \
1262: } \
1263: __r0 -= __m; \
1264: \
1265: (q) = (UWtype) __q1 * __ll_B | __q0; \
1266: (r) = __r0; \
1267: } while (0)
1268:
1269: /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1270: __udiv_w_sdiv (defined in libgcc or elsewhere). */
1271: #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1272: #define udiv_qrnnd(q, r, nh, nl, d) \
1273: do { \
1274: UWtype __r; \
1275: (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1276: (r) = __r; \
1277: } while (0)
1278: #endif
1279:
1280: /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1281: #if !defined (udiv_qrnnd)
1282: #define UDIV_NEEDS_NORMALIZATION 1
1283: #define udiv_qrnnd __udiv_qrnnd_c
1284: #endif
1285:
1286: #if !defined (count_leading_zeros)
1287: extern
1.1.1.2 ! maekawa 1288: #if __STDC__
1.1 maekawa 1289: const
1290: #endif
1291: unsigned char __clz_tab[];
1292: #define count_leading_zeros(count, x) \
1293: do { \
1294: UWtype __xr = (x); \
1295: UWtype __a; \
1296: \
1297: if (W_TYPE_SIZE <= 32) \
1298: { \
1299: __a = __xr < ((UWtype) 1 << 2*__BITS4) \
1300: ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
1301: : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\
1302: } \
1303: else \
1304: { \
1305: for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
1306: if (((__xr >> __a) & 0xff) != 0) \
1307: break; \
1308: } \
1309: \
1310: (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
1311: } while (0)
1312: /* This version gives a well-defined value for zero. */
1313: #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1.1.1.2 ! maekawa 1314: #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
1.1 maekawa 1315: #endif
1316:
1317: #if !defined (count_trailing_zeros)
1318: /* Define count_trailing_zeros using count_leading_zeros. The latter might be
1319: defined in asm, but if it is not, the C version above is good enough. */
1320: #define count_trailing_zeros(count, x) \
1321: do { \
1322: UWtype __ctz_x = (x); \
1323: UWtype __ctz_c; \
1324: count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
1325: (count) = W_TYPE_SIZE - 1 - __ctz_c; \
1326: } while (0)
1327: #endif
1328:
1329: #ifndef UDIV_NEEDS_NORMALIZATION
1330: #define UDIV_NEEDS_NORMALIZATION 0
1331: #endif
1.1.1.2 ! maekawa 1332:
! 1333: /* Give defaults for UMUL_TIME and UDIV_TIME. */
! 1334: #ifndef UMUL_TIME
! 1335: #define UMUL_TIME 1
! 1336: #endif
! 1337:
! 1338: #ifndef UDIV_TIME
! 1339: #define UDIV_TIME UMUL_TIME
! 1340: #endif
! 1341:
! 1342: /* count_trailing_zeros is often on the slow side, so make that the default */
! 1343: #ifndef COUNT_TRAILING_ZEROS_TIME
! 1344: #define COUNT_TRAILING_ZEROS_TIME 15 /* cycles */
! 1345: #endif
! 1346:
! 1347:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>