Annotation of OpenXM_contrib/gmp/tune/modlinv.c, Revision 1.1.1.1
1.1 ohara 1: /* Alternate implementations of modlimb_invert to compare speeds. */
2:
3: /*
4: Copyright 2000, 2002 Free Software Foundation, Inc.
5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: License for more details.
17:
18: You should have received a copy of the GNU Lesser General Public License
19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA.
22: */
23:
24: #include <stdio.h>
25: #include "gmp.h"
26: #include "gmp-impl.h"
27: #include "longlong.h"
28: #include "speed.h"
29:
30:
31: /* Like the standard version in gmp-impl.h, but with the expressions using a
32: "1-" form. This has the same number of steps, but "1-" is on the
33: dependent chain, whereas the "2*" in the standard version isn't.
34: Depending on the CPU this should be the same or a touch slower. */
35:
36: #if BITS_PER_MP_LIMB <= 32
37: #define modlimb_invert_mul1(inv,n) \
38: do { \
39: mp_limb_t __n = (n); \
40: mp_limb_t __inv; \
41: ASSERT ((__n & 1) == 1); \
42: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
43: __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
44: __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
45: ASSERT (__inv * __n == 1); \
46: (inv) = __inv; \
47: } while (0)
48: #endif
49:
50: #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
51: #define modlimb_invert_mul1(inv,n) \
52: do { \
53: mp_limb_t __n = (n); \
54: mp_limb_t __inv; \
55: ASSERT ((__n & 1) == 1); \
56: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
57: __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
58: __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
59: __inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \
60: ASSERT (__inv * __n == 1); \
61: (inv) = __inv; \
62: } while (0)
63: #endif
64:
65:
66: /* The loop based version used in GMP 3.0 and earlier. Usually slower than
67: multiplying, due to the number of steps that must be performed. Much
68: slower when the processor has a good multiply. */
69:
70: #define modlimb_invert_loop(inv,n) \
71: do { \
72: mp_limb_t __v = (n); \
73: mp_limb_t __v_orig = __v; \
74: mp_limb_t __make_zero = 1; \
75: mp_limb_t __two_i = 1; \
76: mp_limb_t __v_inv = 0; \
77: \
78: ASSERT ((__v & 1) == 1); \
79: \
80: do \
81: { \
82: while ((__two_i & __make_zero) == 0) \
83: __two_i <<= 1, __v <<= 1; \
84: __v_inv += __two_i; \
85: __make_zero -= __v; \
86: } \
87: while (__make_zero); \
88: \
89: ASSERT (__v_orig * __v_inv == 1); \
90: (inv) = __v_inv; \
91: } while (0)
92:
93:
94: /* Another loop based version with conditionals, but doing a fixed number of
95: steps. */
96:
97: #define modlimb_invert_cond(inv,n) \
98: do { \
99: mp_limb_t __n = (n); \
100: mp_limb_t __rem = (1 - __n) >> 1; \
101: mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
102: int __count; \
103: \
104: ASSERT ((__n & 1) == 1); \
105: \
106: __count = BITS_PER_MP_LIMB-1; \
107: do \
108: { \
109: __inv >>= 1; \
110: if (__rem & 1) \
111: { \
112: __inv |= GMP_LIMB_HIGHBIT; \
113: __rem -= __n; \
114: } \
115: __rem >>= 1; \
116: } \
117: while (-- __count); \
118: \
119: ASSERT (__inv * __n == 1); \
120: (inv) = __inv; \
121: } while (0)
122:
123:
124: /* Another loop based bitwise version, but purely arithmetic, no
125: conditionals. */
126:
127: #define modlimb_invert_arith(inv,n) \
128: do { \
129: mp_limb_t __n = (n); \
130: mp_limb_t __rem = (1 - __n) >> 1; \
131: mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
132: mp_limb_t __lowbit; \
133: int __count; \
134: \
135: ASSERT ((__n & 1) == 1); \
136: \
137: __count = BITS_PER_MP_LIMB-1; \
138: do \
139: { \
140: __lowbit = __rem & 1; \
141: __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1)); \
142: __rem = (__rem - (__n & -__lowbit)) >> 1; \
143: } \
144: while (-- __count); \
145: \
146: ASSERT (__inv * __n == 1); \
147: (inv) = __inv; \
148: } while (0)
149:
150:
151: double
152: speed_modlimb_invert_mul1 (struct speed_params *s)
153: {
154: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_mul1);
155: }
156: double
157: speed_modlimb_invert_loop (struct speed_params *s)
158: {
159: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_loop);
160: }
161: double
162: speed_modlimb_invert_cond (struct speed_params *s)
163: {
164: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_cond);
165: }
166: double
167: speed_modlimb_invert_arith (struct speed_params *s)
168: {
169: SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_arith);
170: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>