Annotation of OpenXM_contrib/gmp/mpn/generic/dive_1.c, Revision 1.1.1.1
1.1 ohara 1: /* mpn_divexact_1 -- mpn by limb exact division.
2:
3: THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4: CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5: FUTURE GNU MP RELEASES.
6:
7: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
8:
9: This file is part of the GNU MP Library.
10:
11: The GNU MP Library is free software; you can redistribute it and/or modify
12: it under the terms of the GNU Lesser General Public License as published by
13: the Free Software Foundation; either version 2.1 of the License, or (at your
14: option) any later version.
15:
16: The GNU MP Library is distributed in the hope that it will be useful, but
17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19: License for more details.
20:
21: You should have received a copy of the GNU Lesser General Public License
22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24: MA 02111-1307, USA. */
25:
26: #include "gmp.h"
27: #include "gmp-impl.h"
28: #include "longlong.h"
29:
30:
31:
32: /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
33: q will only be correct if d divides a exactly.
34:
35: A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
36: give zero on all CPUs (for instance it doesn't on the x86s). This
37: separate loop might run faster too, helping odd divisors.
38:
39: Possibilities:
40:
41: mpn_divexact_1c could be created, accepting and returning c. This would
42: let a long calculation be done piece by piece. Currently there's no
43: particular need for that, and not returning c means that a final umul can
44: be skipped.
45:
46: Another use for returning c would be letting the caller know whether the
47: division was in fact exact. It would work just to return the carry bit
48: "c=(l>s)" and let the caller do a final umul if interested.
49:
50: When the divisor is even, the factors of two could be handled with a
51: separate mpn_rshift, instead of shifting on the fly. That might be
52: faster on some CPUs and would mean just the shift==0 style loop would be
53: needed.
54:
55: If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
56: shift==0 loop is unnecessary, and could be eliminated if there's no great
57: speed difference.
58:
59: It's not clear whether "/" is the best way to handle size==1. Alpha gcc
60: 2.95 for instance has a poor "/" and might prefer the modular method.
61: Perhaps a tuned parameter should control this.
62:
63: If src[size-1] < divisor then dst[size-1] will be zero, and one divide
64: step could be skipped. A test at last step for s<divisor (or ls in the
65: even case) might be a good way to do that. But if this code is often
66: used with small divisors then it might not be worth bothering */
67:
68: void
69: mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
70: {
71: mp_size_t i;
72: mp_limb_t c, l, ls, s, s_next, inverse, dummy;
73: unsigned shift;
74:
75: ASSERT (size >= 1);
76: ASSERT (divisor != 0);
77: ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
78: ASSERT_MPN (src, size);
79: ASSERT_LIMB (divisor);
80:
81: if (size == 1)
82: {
83: dst[0] = src[0] / divisor;
84: return;
85: }
86:
87: if ((divisor & 1) == 0)
88: {
89: count_trailing_zeros (shift, divisor);
90: divisor >>= shift;
91: }
92: else
93: shift = 0;
94:
95: modlimb_invert (inverse, divisor);
96: divisor <<= GMP_NAIL_BITS;
97:
98: if (shift != 0)
99: {
100: s = src[0];
101: c = 0;
102: i = 0;
103: size--;
104: goto even_entry;
105:
106: do
107: {
108: umul_ppmm (l, dummy, l, divisor);
109: c += l;
110:
111: even_entry:
112: s_next = src[i+1];
113: ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
114: s = s_next;
115:
116: SUBC_LIMB (c, l, ls, c);
117:
118: l = (l * inverse) & GMP_NUMB_MASK;
119: dst[i] = l;
120: i++;
121: }
122: while (i < size);
123:
124: umul_ppmm (l, dummy, l, divisor);
125: c += l;
126: ls = s >> shift;
127: l = ls - c;
128: l = (l * inverse) & GMP_NUMB_MASK;
129: dst[i] = l;
130: }
131: else
132: {
133: l = src[0];
134: l = (l * inverse) & GMP_NUMB_MASK;
135: dst[0] = l;
136: i = 1;
137: c = 0;
138:
139: do
140: {
141: umul_ppmm (l, dummy, l, divisor);
142: c += l;
143:
144: s = src[i];
145: SUBC_LIMB (c, l, s, c);
146:
147: l = (l * inverse) & GMP_NUMB_MASK;
148: dst[i] = l;
149: i++;
150: }
151: while (i < size);
152: }
153: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>