[BACK]Return to dive_1.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/dive_1.c, Revision 1.1.1.1

1.1       ohara       1: /* mpn_divexact_1 -- mpn by limb exact division.
                      2:
                      3:    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
                      4:    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
                      5:    FUTURE GNU MP RELEASES.
                      6:
                      7: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
                      8:
                      9: This file is part of the GNU MP Library.
                     10:
                     11: The GNU MP Library is free software; you can redistribute it and/or modify
                     12: it under the terms of the GNU Lesser General Public License as published by
                     13: the Free Software Foundation; either version 2.1 of the License, or (at your
                     14: option) any later version.
                     15:
                     16: The GNU MP Library is distributed in the hope that it will be useful, but
                     17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     18: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     19: License for more details.
                     20:
                     21: You should have received a copy of the GNU Lesser General Public License
                     22: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     24: MA 02111-1307, USA. */
                     25:
                     26: #include "gmp.h"
                     27: #include "gmp-impl.h"
                     28: #include "longlong.h"
                     29:
                     30:
                     31:
                     32: /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
                     33:    q will only be correct if d divides a exactly.
                     34:
                     35:    A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
                     36:    give zero on all CPUs (for instance it doesn't on the x86s).  This
                     37:    separate loop might run faster too, helping odd divisors.
                     38:
                     39:    Possibilities:
                     40:
                     41:    mpn_divexact_1c could be created, accepting and returning c.  This would
                     42:    let a long calculation be done piece by piece.  Currently there's no
                     43:    particular need for that, and not returning c means that a final umul can
                     44:    be skipped.
                     45:
                     46:    Another use for returning c would be letting the caller know whether the
                     47:    division was in fact exact.  It would work just to return the carry bit
                     48:    "c=(l>s)" and let the caller do a final umul if interested.
                     49:
                     50:    When the divisor is even, the factors of two could be handled with a
                     51:    separate mpn_rshift, instead of shifting on the fly.  That might be
                     52:    faster on some CPUs and would mean just the shift==0 style loop would be
                     53:    needed.
                     54:
                     55:    If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
                     56:    shift==0 loop is unnecessary, and could be eliminated if there's no great
                     57:    speed difference.
                     58:
                     59:    It's not clear whether "/" is the best way to handle size==1.  Alpha gcc
                     60:    2.95 for instance has a poor "/" and might prefer the modular method.
                     61:    Perhaps a tuned parameter should control this.
                     62:
                     63:    If src[size-1] < divisor then dst[size-1] will be zero, and one divide
                     64:    step could be skipped.  A test at last step for s<divisor (or ls in the
                     65:    even case) might be a good way to do that.  But if this code is often
                     66:    used with small divisors then it might not be worth bothering  */
                     67:
                     68: void
                     69: mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
                     70: {
                     71:   mp_size_t  i;
                     72:   mp_limb_t  c, l, ls, s, s_next, inverse, dummy;
                     73:   unsigned   shift;
                     74:
                     75:   ASSERT (size >= 1);
                     76:   ASSERT (divisor != 0);
                     77:   ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
                     78:   ASSERT_MPN (src, size);
                     79:   ASSERT_LIMB (divisor);
                     80:
                     81:   if (size == 1)
                     82:     {
                     83:       dst[0] = src[0] / divisor;
                     84:       return;
                     85:     }
                     86:
                     87:   if ((divisor & 1) == 0)
                     88:     {
                     89:       count_trailing_zeros (shift, divisor);
                     90:       divisor >>= shift;
                     91:     }
                     92:   else
                     93:     shift = 0;
                     94:
                     95:   modlimb_invert (inverse, divisor);
                     96:   divisor <<= GMP_NAIL_BITS;
                     97:
                     98:   if (shift != 0)
                     99:     {
                    100:       s = src[0];
                    101:       c = 0;
                    102:       i = 0;
                    103:       size--;
                    104:       goto even_entry;
                    105:
                    106:       do
                    107:        {
                    108:          umul_ppmm (l, dummy, l, divisor);
                    109:          c += l;
                    110:
                    111:        even_entry:
                    112:          s_next = src[i+1];
                    113:          ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
                    114:          s = s_next;
                    115:
                    116:           SUBC_LIMB (c, l, ls, c);
                    117:
                    118:          l = (l * inverse) & GMP_NUMB_MASK;
                    119:          dst[i] = l;
                    120:          i++;
                    121:        }
                    122:       while (i < size);
                    123:
                    124:       umul_ppmm (l, dummy, l, divisor);
                    125:       c += l;
                    126:       ls = s >> shift;
                    127:       l = ls - c;
                    128:       l = (l * inverse) & GMP_NUMB_MASK;
                    129:       dst[i] = l;
                    130:     }
                    131:   else
                    132:     {
                    133:       l = src[0];
                    134:       l = (l * inverse) & GMP_NUMB_MASK;
                    135:       dst[0] = l;
                    136:       i = 1;
                    137:       c = 0;
                    138:
                    139:       do
                    140:        {
                    141:          umul_ppmm (l, dummy, l, divisor);
                    142:          c += l;
                    143:
                    144:          s = src[i];
                    145:           SUBC_LIMB (c, l, s, c);
                    146:
                    147:           l = (l * inverse) & GMP_NUMB_MASK;
                    148:          dst[i] = l;
                    149:          i++;
                    150:        }
                    151:       while (i < size);
                    152:     }
                    153: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>