Annotation of OpenXM_contrib/gmp/tune/modlinv.c, Revision 1.1
1.1 ! ohara 1: /* Alternate implementations of modlimb_invert to compare speeds. */
! 2:
! 3: /*
! 4: Copyright 2000, 2002 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the GNU MP Library.
! 7:
! 8: The GNU MP Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The GNU MP Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA.
! 22: */
! 23:
! 24: #include <stdio.h>
! 25: #include "gmp.h"
! 26: #include "gmp-impl.h"
! 27: #include "longlong.h"
! 28: #include "speed.h"
! 29:
! 30:
! 31: /* Like the standard version in gmp-impl.h, but with the expressions using a
! 32: "1-" form. This has the same number of steps, but "1-" is on the
! 33: dependent chain, whereas the "2*" in the standard version isn't.
! 34: Depending on the CPU this should be the same or a touch slower. */
! 35:
! 36: #if BITS_PER_MP_LIMB <= 32
! 37: #define modlimb_invert_mul1(inv,n) \
! 38: do { \
! 39: mp_limb_t __n = (n); \
! 40: mp_limb_t __inv; \
! 41: ASSERT ((__n & 1) == 1); \
! 42: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
! 43: __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
! 44: __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
! 45: ASSERT (__inv * __n == 1); \
! 46: (inv) = __inv; \
! 47: } while (0)
! 48: #endif
! 49:
! 50: #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
! 51: #define modlimb_invert_mul1(inv,n) \
! 52: do { \
! 53: mp_limb_t __n = (n); \
! 54: mp_limb_t __inv; \
! 55: ASSERT ((__n & 1) == 1); \
! 56: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
! 57: __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
! 58: __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
! 59: __inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \
! 60: ASSERT (__inv * __n == 1); \
! 61: (inv) = __inv; \
! 62: } while (0)
! 63: #endif
! 64:
! 65:
! 66: /* The loop based version used in GMP 3.0 and earlier. Usually slower than
! 67: multiplying, due to the number of steps that must be performed. Much
! 68: slower when the processor has a good multiply. */
! 69:
! 70: #define modlimb_invert_loop(inv,n) \
! 71: do { \
! 72: mp_limb_t __v = (n); \
! 73: mp_limb_t __v_orig = __v; \
! 74: mp_limb_t __make_zero = 1; \
! 75: mp_limb_t __two_i = 1; \
! 76: mp_limb_t __v_inv = 0; \
! 77: \
! 78: ASSERT ((__v & 1) == 1); \
! 79: \
! 80: do \
! 81: { \
! 82: while ((__two_i & __make_zero) == 0) \
! 83: __two_i <<= 1, __v <<= 1; \
! 84: __v_inv += __two_i; \
! 85: __make_zero -= __v; \
! 86: } \
! 87: while (__make_zero); \
! 88: \
! 89: ASSERT (__v_orig * __v_inv == 1); \
! 90: (inv) = __v_inv; \
! 91: } while (0)
! 92:
! 93:
! 94: /* Another loop based version with conditionals, but doing a fixed number of
! 95: steps. */
! 96:
! 97: #define modlimb_invert_cond(inv,n) \
! 98: do { \
! 99: mp_limb_t __n = (n); \
! 100: mp_limb_t __rem = (1 - __n) >> 1; \
! 101: mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
! 102: int __count; \
! 103: \
! 104: ASSERT ((__n & 1) == 1); \
! 105: \
! 106: __count = BITS_PER_MP_LIMB-1; \
! 107: do \
! 108: { \
! 109: __inv >>= 1; \
! 110: if (__rem & 1) \
! 111: { \
! 112: __inv |= GMP_LIMB_HIGHBIT; \
! 113: __rem -= __n; \
! 114: } \
! 115: __rem >>= 1; \
! 116: } \
! 117: while (-- __count); \
! 118: \
! 119: ASSERT (__inv * __n == 1); \
! 120: (inv) = __inv; \
! 121: } while (0)
! 122:
! 123:
! 124: /* Another loop based bitwise version, but purely arithmetic, no
! 125: conditionals. */
! 126:
! 127: #define modlimb_invert_arith(inv,n) \
! 128: do { \
! 129: mp_limb_t __n = (n); \
! 130: mp_limb_t __rem = (1 - __n) >> 1; \
! 131: mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
! 132: mp_limb_t __lowbit; \
! 133: int __count; \
! 134: \
! 135: ASSERT ((__n & 1) == 1); \
! 136: \
! 137: __count = BITS_PER_MP_LIMB-1; \
! 138: do \
! 139: { \
! 140: __lowbit = __rem & 1; \
! 141: __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1)); \
! 142: __rem = (__rem - (__n & -__lowbit)) >> 1; \
! 143: } \
! 144: while (-- __count); \
! 145: \
! 146: ASSERT (__inv * __n == 1); \
! 147: (inv) = __inv; \
! 148: } while (0)
! 149:
! 150:
! 151: double
! 152: speed_modlimb_invert_mul1 (struct speed_params *s)
! 153: {
! 154: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_mul1);
! 155: }
! 156: double
! 157: speed_modlimb_invert_loop (struct speed_params *s)
! 158: {
! 159: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_loop);
! 160: }
! 161: double
! 162: speed_modlimb_invert_cond (struct speed_params *s)
! 163: {
! 164: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_cond);
! 165: }
! 166: double
! 167: speed_modlimb_invert_arith (struct speed_params *s)
! 168: {
! 169: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_arith);
! 170: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>