Annotation of OpenXM_contrib/gmp/mpn/generic/dive_1.c, Revision 1.1
1.1 ! ohara 1: /* mpn_divexact_1 -- mpn by limb exact division.
! 2:
! 3: THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
! 4: CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
! 5: FUTURE GNU MP RELEASES.
! 6:
! 7: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
! 8:
! 9: This file is part of the GNU MP Library.
! 10:
! 11: The GNU MP Library is free software; you can redistribute it and/or modify
! 12: it under the terms of the GNU Lesser General Public License as published by
! 13: the Free Software Foundation; either version 2.1 of the License, or (at your
! 14: option) any later version.
! 15:
! 16: The GNU MP Library is distributed in the hope that it will be useful, but
! 17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 19: License for more details.
! 20:
! 21: You should have received a copy of the GNU Lesser General Public License
! 22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 24: MA 02111-1307, USA. */
! 25:
! 26: #include "gmp.h"
! 27: #include "gmp-impl.h"
! 28: #include "longlong.h"
! 29:
! 30:
! 31:
! 32: /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
! 33: q will only be correct if d divides a exactly.
! 34:
! 35: A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
! 36: give zero on all CPUs (for instance it doesn't on the x86s). This
! 37: separate loop might run faster too, helping odd divisors.
! 38:
! 39: Possibilities:
! 40:
! 41: mpn_divexact_1c could be created, accepting and returning c. This would
! 42: let a long calculation be done piece by piece. Currently there's no
! 43: particular need for that, and not returning c means that a final umul can
! 44: be skipped.
! 45:
! 46: Another use for returning c would be letting the caller know whether the
! 47: division was in fact exact. It would work just to return the carry bit
! 48: "c=(l>s)" and let the caller do a final umul if interested.
! 49:
! 50: When the divisor is even, the factors of two could be handled with a
! 51: separate mpn_rshift, instead of shifting on the fly. That might be
! 52: faster on some CPUs and would mean just the shift==0 style loop would be
! 53: needed.
! 54:
! 55: If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
! 56: shift==0 loop is unnecessary, and could be eliminated if there's no great
! 57: speed difference.
! 58:
! 59: It's not clear whether "/" is the best way to handle size==1. Alpha gcc
! 60: 2.95 for instance has a poor "/" and might prefer the modular method.
! 61: Perhaps a tuned parameter should control this.
! 62:
! 63: If src[size-1] < divisor then dst[size-1] will be zero, and one divide
! 64: step could be skipped. A test at last step for s<divisor (or ls in the
! 65: even case) might be a good way to do that. But if this code is often
! 66: used with small divisors then it might not be worth bothering */
! 67:
! 68: void
! 69: mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
! 70: {
! 71: mp_size_t i;
! 72: mp_limb_t c, l, ls, s, s_next, inverse, dummy;
! 73: unsigned shift;
! 74:
! 75: ASSERT (size >= 1);
! 76: ASSERT (divisor != 0);
! 77: ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
! 78: ASSERT_MPN (src, size);
! 79: ASSERT_LIMB (divisor);
! 80:
! 81: if (size == 1)
! 82: {
! 83: dst[0] = src[0] / divisor;
! 84: return;
! 85: }
! 86:
! 87: if ((divisor & 1) == 0)
! 88: {
! 89: count_trailing_zeros (shift, divisor);
! 90: divisor >>= shift;
! 91: }
! 92: else
! 93: shift = 0;
! 94:
! 95: modlimb_invert (inverse, divisor);
! 96: divisor <<= GMP_NAIL_BITS;
! 97:
! 98: if (shift != 0)
! 99: {
! 100: s = src[0];
! 101: c = 0;
! 102: i = 0;
! 103: size--;
! 104: goto even_entry;
! 105:
! 106: do
! 107: {
! 108: umul_ppmm (l, dummy, l, divisor);
! 109: c += l;
! 110:
! 111: even_entry:
! 112: s_next = src[i+1];
! 113: ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
! 114: s = s_next;
! 115:
! 116: SUBC_LIMB (c, l, ls, c);
! 117:
! 118: l = (l * inverse) & GMP_NUMB_MASK;
! 119: dst[i] = l;
! 120: i++;
! 121: }
! 122: while (i < size);
! 123:
! 124: umul_ppmm (l, dummy, l, divisor);
! 125: c += l;
! 126: ls = s >> shift;
! 127: l = ls - c;
! 128: l = (l * inverse) & GMP_NUMB_MASK;
! 129: dst[i] = l;
! 130: }
! 131: else
! 132: {
! 133: l = src[0];
! 134: l = (l * inverse) & GMP_NUMB_MASK;
! 135: dst[0] = l;
! 136: i = 1;
! 137: c = 0;
! 138:
! 139: do
! 140: {
! 141: umul_ppmm (l, dummy, l, divisor);
! 142: c += l;
! 143:
! 144: s = src[i];
! 145: SUBC_LIMB (c, l, s, c);
! 146:
! 147: l = (l * inverse) & GMP_NUMB_MASK;
! 148: dst[i] = l;
! 149: i++;
! 150: }
! 151: while (i < size);
! 152: }
! 153: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>