[BACK]Return to gcdext.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/gcdext.c, Revision 1.1.1.3

1.1       maekawa     1: /* mpn_gcdext -- Extended Greatest Common Divisor.
                      2:
1.1.1.3 ! ohara       3: Copyright 1996, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1       maekawa     4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2   maekawa     8: it under the terms of the GNU Lesser General Public License as published by
                      9: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2   maekawa    14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    15: License for more details.
                     16:
1.1.1.2   maekawa    17: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: #include "gmp.h"
                     23: #include "gmp-impl.h"
                     24: #include "longlong.h"
                     25:
1.1.1.2   maekawa    26: #ifndef GCDEXT_THRESHOLD
                     27: #define GCDEXT_THRESHOLD 17
                     28: #endif
                     29:
1.1       maekawa    30: #ifndef EXTEND
                     31: #define EXTEND 1
                     32: #endif
                     33:
                     34: #if STAT
1.1.1.3 ! ohara      35: int arr[GMP_LIMB_BITS + 1];
1.1       maekawa    36: #endif
                     37:
                     38:
1.1.1.2   maekawa    39: /* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE)
                     40:
                     41:    Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the
                     42:    greatest common divisor at GP (unless it is 0), and the first cofactor at
                     43:    SP.  Write the size of the cofactor through the pointer SSIZE.  Return the
                     44:    size of the value at GP.  Note that SP might be a negative number; this is
                     45:    denoted by storing the negative of the size through SSIZE.
                     46:
                     47:    {UP,USIZE} and {VP,VSIZE} are both clobbered.
                     48:
                     49:    The space allocation for all four areas needs to be USIZE+1.
                     50:
                     51:    Preconditions: 1) U >= V.
                     52:                  2) V > 0.  */
                     53:
                     54: /* We use Lehmer's algorithm.  The idea is to extract the most significant
                     55:    bits of the operands, and compute the continued fraction for them.  We then
                     56:    apply the gathered cofactors to the full operands.
                     57:
                     58:    Idea 1: After we have performed a full division, don't shift operands back,
1.1       maekawa    59:           but instead account for the extra factors-of-2 thus introduced.
                     60:    Idea 2: Simple generalization to use divide-and-conquer would give us an
                     61:           algorithm that runs faster than O(n^2).
                     62:    Idea 3: The input numbers need less space as the computation progresses,
1.1.1.2   maekawa    63:           while the s0 and s1 variables need more space.  To save memory, we
1.1       maekawa    64:           could make them share space, and have the latter variables grow
1.1.1.2   maekawa    65:           into the former.
                     66:    Idea 4: We should not do double-limb arithmetic from the start.  Instead,
                     67:           do things in single-limb arithmetic until the quotients differ,
                     68:           and then switch to double-limb arithmetic.  */
                     69:
1.1       maekawa    70:
1.1.1.3 ! ohara      71: /* One-limb division optimized for small quotients.  */
1.1.1.2   maekawa    72: static mp_limb_t
1.1.1.3 ! ohara      73: div1 (mp_limb_t n0, mp_limb_t d0)
1.1.1.2   maekawa    74: {
1.1.1.3 ! ohara      75:   if ((mp_limb_signed_t) n0 < 0)
1.1.1.2   maekawa    76:     {
1.1.1.3 ! ohara      77:       mp_limb_t q;
        !            78:       int cnt;
        !            79:       for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
        !            80:        {
        !            81:          d0 = d0 << 1;
        !            82:        }
        !            83:
        !            84:       q = 0;
        !            85:       while (cnt)
        !            86:        {
        !            87:          q <<= 1;
        !            88:          if (n0 >= d0)
        !            89:            {
        !            90:              n0 = n0 - d0;
        !            91:              q |= 1;
        !            92:            }
        !            93:          d0 = d0 >> 1;
        !            94:          cnt--;
        !            95:        }
        !            96:
        !            97:       return q;
        !            98:     }
        !            99:   else
        !           100:     {
        !           101:       mp_limb_t q;
        !           102:       int cnt;
        !           103:       for (cnt = 0; n0 >= d0; cnt++)
        !           104:        {
        !           105:          d0 = d0 << 1;
        !           106:        }
        !           107:
        !           108:       q = 0;
        !           109:       while (cnt)
        !           110:        {
        !           111:          d0 = d0 >> 1;
        !           112:          q <<= 1;
        !           113:          if (n0 >= d0)
        !           114:            {
        !           115:              n0 = n0 - d0;
        !           116:              q |= 1;
        !           117:            }
        !           118:          cnt--;
        !           119:        }
        !           120:
        !           121:       return q;
1.1.1.2   maekawa   122:     }
1.1.1.3 ! ohara     123: }
1.1.1.2   maekawa   124:
1.1.1.3 ! ohara     125: /* Two-limb division optimized for small quotients.  */
        !           126: static mp_limb_t
        !           127: div2 (mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0)
        !           128: {
1.1.1.2   maekawa   129:   if ((mp_limb_signed_t) n1 < 0)
                    130:     {
                    131:       mp_limb_t q;
                    132:       int cnt;
                    133:       for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++)
                    134:        {
1.1.1.3 ! ohara     135:          d1 = (d1 << 1) | (d0 >> (GMP_LIMB_BITS - 1));
1.1.1.2   maekawa   136:          d0 = d0 << 1;
                    137:        }
                    138:
                    139:       q = 0;
                    140:       while (cnt)
                    141:        {
                    142:          q <<= 1;
                    143:          if (n1 > d1 || (n1 == d1 && n0 >= d0))
                    144:            {
                    145:              sub_ddmmss (n1, n0, n1, n0, d1, d0);
                    146:              q |= 1;
                    147:            }
1.1.1.3 ! ohara     148:          d0 = (d1 << (GMP_LIMB_BITS - 1)) | (d0 >> 1);
1.1.1.2   maekawa   149:          d1 = d1 >> 1;
                    150:          cnt--;
                    151:        }
                    152:
                    153:       return q;
                    154:     }
                    155:   else
                    156:     {
                    157:       mp_limb_t q;
                    158:       int cnt;
                    159:       for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++)
                    160:        {
1.1.1.3 ! ohara     161:          d1 = (d1 << 1) | (d0 >> (GMP_LIMB_BITS - 1));
1.1.1.2   maekawa   162:          d0 = d0 << 1;
                    163:        }
                    164:
                    165:       q = 0;
                    166:       while (cnt)
                    167:        {
1.1.1.3 ! ohara     168:          d0 = (d1 << (GMP_LIMB_BITS - 1)) | (d0 >> 1);
1.1.1.2   maekawa   169:          d1 = d1 >> 1;
                    170:          q <<= 1;
                    171:          if (n1 > d1 || (n1 == d1 && n0 >= d0))
                    172:            {
                    173:              sub_ddmmss (n1, n0, n1, n0, d1, d0);
                    174:              q |= 1;
                    175:            }
                    176:          cnt--;
                    177:        }
                    178:
                    179:       return q;
                    180:     }
                    181: }
1.1       maekawa   182:
                    183: mp_size_t
                    184: #if EXTEND
1.1.1.2   maekawa   185: mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size,
1.1       maekawa   186:            mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
                    187: #else
                    188: mpn_gcd (mp_ptr gp,
                    189:         mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
                    190: #endif
                    191: {
1.1.1.2   maekawa   192:   mp_limb_t A, B, C, D;
1.1       maekawa   193:   int cnt;
                    194:   mp_ptr tp, wp;
                    195: #if RECORD
1.1.1.2   maekawa   196:   mp_limb_t max = 0;
1.1       maekawa   197: #endif
                    198: #if EXTEND
                    199:   mp_ptr s1p;
                    200:   mp_ptr orig_s0p = s0p;
1.1.1.2   maekawa   201:   mp_size_t ssize;
                    202:   int sign = 1;
                    203: #endif
                    204:   int use_double_flag;
1.1       maekawa   205:   TMP_DECL (mark);
                    206:
1.1.1.3 ! ohara     207:   ASSERT (size >= vsize);
        !           208:   ASSERT (vsize >= 1);
        !           209:   ASSERT (up[size-1] != 0);
        !           210:   ASSERT (vp[vsize-1] != 0);
        !           211:   ASSERT (! MPN_OVERLAP_P (up, size+1, vp, vsize+1));
        !           212: #if EXTEND
        !           213:   ASSERT (! MPN_OVERLAP_P (s0p, size, up, size+1));
        !           214:   ASSERT (! MPN_OVERLAP_P (s0p, size, vp, vsize+1));
        !           215: #endif
        !           216:   ASSERT (MPN_SAME_OR_SEPARATE_P (gp, up, size));
        !           217:   ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, size, vp, vsize));
1.1       maekawa   218:
1.1.1.3 ! ohara     219:   TMP_MARK (mark);
1.1.1.2   maekawa   220:
1.1       maekawa   221:   tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
                    222:   wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
1.1.1.2   maekawa   223: #if EXTEND
                    224:   s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
1.1       maekawa   225:
1.1.1.3 ! ohara     226: #if ! WANT_GCDEXT_ONE_STEP
1.1       maekawa   227:   MPN_ZERO (s0p, size);
                    228:   MPN_ZERO (s1p, size);
1.1.1.3 ! ohara     229: #endif
1.1       maekawa   230:
                    231:   s0p[0] = 1;
                    232:   s1p[0] = 0;
                    233:   ssize = 1;
                    234: #endif
                    235:
                    236:   if (size > vsize)
                    237:     {
1.1.1.3 ! ohara     238:       mpn_tdiv_qr (tp, up, (mp_size_t) 0, up, size, vp, vsize);
1.1       maekawa   239:
                    240: #if EXTEND
                    241:       /* This is really what it boils down to in this case... */
                    242:       s0p[0] = 0;
                    243:       s1p[0] = 1;
1.1.1.2   maekawa   244:       sign = -sign;
1.1       maekawa   245: #endif
                    246:       size = vsize;
1.1.1.2   maekawa   247:       MP_PTR_SWAP (up, vp);
1.1       maekawa   248:     }
                    249:
1.1.1.3 ! ohara     250:   use_double_flag = ABOVE_THRESHOLD (size, GCDEXT_THRESHOLD);
        !           251:
1.1       maekawa   252:   for (;;)
                    253:     {
1.1.1.2   maekawa   254:       mp_limb_t asign;
1.1       maekawa   255:       /* Figure out exact size of V.  */
                    256:       vsize = size;
                    257:       MPN_NORMALIZE (vp, vsize);
                    258:       if (vsize <= 1)
                    259:        break;
                    260:
1.1.1.2   maekawa   261:       if (use_double_flag)
1.1       maekawa   262:        {
1.1.1.2   maekawa   263:          mp_limb_t uh, vh, ul, vl;
                    264:          /* Let UH,UL be the most significant limbs of U, and let VH,VL be
                    265:             the corresponding bits from V.  */
                    266:          uh = up[size - 1];
                    267:          vh = vp[size - 1];
                    268:          ul = up[size - 2];
                    269:          vl = vp[size - 2];
                    270:          count_leading_zeros (cnt, uh);
1.1.1.3 ! ohara     271: #if GMP_NAIL_BITS == 0
1.1.1.2   maekawa   272:          if (cnt != 0)
                    273:            {
1.1.1.3 ! ohara     274:              uh = (uh << cnt) | (ul >> (GMP_LIMB_BITS - cnt));
        !           275:              vh = (vh << cnt) | (vl >> (GMP_LIMB_BITS - cnt));
1.1.1.2   maekawa   276:              vl <<= cnt;
                    277:              ul <<= cnt;
                    278:              if (size >= 3)
                    279:                {
1.1.1.3 ! ohara     280:                  ul |= (up[size - 3] >> (GMP_LIMB_BITS - cnt));
        !           281:                  vl |= (vp[size - 3] >> (GMP_LIMB_BITS - cnt));
1.1.1.2   maekawa   282:                }
                    283:            }
1.1.1.3 ! ohara     284: #else
        !           285:          uh = uh << cnt;
        !           286:          vh = vh << cnt;
        !           287:          if (cnt < GMP_NUMB_BITS)
        !           288:            {                        /* GMP_NAIL_BITS <= cnt < GMP_NUMB_BITS */
        !           289:              uh |= ul >> (GMP_NUMB_BITS - cnt);
        !           290:              vh |= vl >> (GMP_NUMB_BITS - cnt);
        !           291:              ul <<= cnt + GMP_NAIL_BITS;
        !           292:              vl <<= cnt + GMP_NAIL_BITS;
        !           293:              if (size >= 3)
        !           294:                {
        !           295:                  if (cnt + GMP_NAIL_BITS > GMP_NUMB_BITS)
        !           296:                    {
        !           297:                      ul |= up[size - 3] << cnt + GMP_NAIL_BITS - GMP_NUMB_BITS;
        !           298:                      vl |= vp[size - 3] << cnt + GMP_NAIL_BITS - GMP_NUMB_BITS;
        !           299:                      if (size >= 4)
        !           300:                        {
        !           301:                          ul |= up[size - 4] >> 2 * GMP_NUMB_BITS - GMP_NAIL_BITS - cnt;
        !           302:                          vl |= vp[size - 4] >> 2 * GMP_NUMB_BITS - GMP_NAIL_BITS - cnt;
        !           303:                        }
        !           304:                    }
        !           305:                  else
        !           306:                    {
        !           307:                      ul |= up[size - 3] >> (GMP_LIMB_BITS - cnt - 2 * GMP_NAIL_BITS);
        !           308:                      vl |= vp[size - 3] >> (GMP_LIMB_BITS - cnt - 2 * GMP_NAIL_BITS);
        !           309:                    }
        !           310:                }
        !           311:            }
        !           312:          else
        !           313:            {                     /* GMP_NUMB_BITS <= cnt <= GMP_LIMB_BITS-1 */
        !           314:              uh |= ul << cnt - GMP_NUMB_BITS;  /* 0 <= c <= GMP_NAIL_BITS-1 */
        !           315:              vh |= vl << cnt - GMP_NUMB_BITS;
        !           316:              if (size >= 3)
        !           317:                {
        !           318:                  if (cnt - GMP_NUMB_BITS != 0)
        !           319:                    {                           /* uh/vh need yet more bits! */
        !           320:                      uh |= up[size - 3] >> 2 * GMP_NUMB_BITS - cnt;
        !           321:                      vh |= vp[size - 3] >> 2 * GMP_NUMB_BITS - cnt;
        !           322:                      ul = up[size - 3] << cnt + GMP_NAIL_BITS - GMP_NUMB_BITS;
        !           323:                      vl = vp[size - 3] << cnt + GMP_NAIL_BITS - GMP_NUMB_BITS;
        !           324:                      if (size >= 4)
        !           325:                        {
        !           326:                          ul |= up[size - 4] >> 2 * GMP_NUMB_BITS - GMP_NAIL_BITS - cnt;
        !           327:                          vl |= vp[size - 4] >> 2 * GMP_NUMB_BITS - GMP_NAIL_BITS - cnt;
        !           328:                        }
        !           329:                    }
        !           330:                  else
        !           331:                    {
        !           332:                      ul = up[size - 3] << GMP_LIMB_BITS - cnt;
        !           333:                      vl = vp[size - 3] << GMP_LIMB_BITS - cnt;
        !           334:                      if (size >= 4)
        !           335:                        {
        !           336:                          ul |= up[size - 4] >> GMP_NUMB_BITS - (GMP_LIMB_BITS - cnt);
        !           337:                          vl |= vp[size - 4] >> GMP_NUMB_BITS - (GMP_LIMB_BITS - cnt);
        !           338:                        }
        !           339:                    }
        !           340:                }
        !           341:              else
        !           342:                {
        !           343:                  ul = 0;
        !           344:                  vl = 0;
        !           345:                }
        !           346:            }
        !           347: #endif
1.1.1.2   maekawa   348:
                    349:          A = 1;
                    350:          B = 0;
                    351:          C = 0;
                    352:          D = 1;
                    353:
                    354:          asign = 0;
                    355:          for (;;)
                    356:            {
1.1.1.3 ! ohara     357:              mp_limb_t Tac, Tbd;
        !           358:              mp_limb_t q1, q2;
1.1.1.2   maekawa   359:              mp_limb_t nh, nl, dh, dl;
                    360:              mp_limb_t t1, t0;
                    361:              mp_limb_t Th, Tl;
                    362:
                    363:              sub_ddmmss (dh, dl, vh, vl, 0, C);
1.1.1.3 ! ohara     364:              if (dh == 0)
1.1.1.2   maekawa   365:                break;
                    366:              add_ssaaaa (nh, nl, uh, ul, 0, A);
1.1.1.3 ! ohara     367:              q1 = div2 (nh, nl, dh, dl);
1.1.1.2   maekawa   368:
                    369:              add_ssaaaa (dh, dl, vh, vl, 0, D);
1.1.1.3 ! ohara     370:              if (dh == 0)
1.1.1.2   maekawa   371:                break;
                    372:              sub_ddmmss (nh, nl, uh, ul, 0, B);
1.1.1.3 ! ohara     373:              q2 = div2 (nh, nl, dh, dl);
1.1.1.2   maekawa   374:
                    375:              if (q1 != q2)
                    376:                break;
                    377:
1.1.1.3 ! ohara     378:              Tac = A + q1 * C;
        !           379:              if (GMP_NAIL_BITS != 0 && Tac > GMP_NUMB_MAX)
        !           380:                break;
        !           381:              Tbd = B + q1 * D;
        !           382:              if (GMP_NAIL_BITS != 0 && Tbd > GMP_NUMB_MAX)
        !           383:                break;
1.1.1.2   maekawa   384:              A = C;
1.1.1.3 ! ohara     385:              C = Tac;
1.1.1.2   maekawa   386:              B = D;
1.1.1.3 ! ohara     387:              D = Tbd;
1.1.1.2   maekawa   388:              umul_ppmm (t1, t0, q1, vl);
                    389:              t1 += q1 * vh;
                    390:              sub_ddmmss (Th, Tl, uh, ul, t1, t0);
                    391:              uh = vh, ul = vl;
                    392:              vh = Th, vl = Tl;
                    393:
1.1.1.3 ! ohara     394:              asign = ~asign;
        !           395:
1.1.1.2   maekawa   396:              add_ssaaaa (dh, dl, vh, vl, 0, C);
1.1.1.3 ! ohara     397: /*           if (dh == 0)      should never happen
        !           398:                break;         */
1.1.1.2   maekawa   399:              sub_ddmmss (nh, nl, uh, ul, 0, A);
1.1.1.3 ! ohara     400:              q1 = div2 (nh, nl, dh, dl);
1.1.1.2   maekawa   401:
                    402:              sub_ddmmss (dh, dl, vh, vl, 0, D);
1.1.1.3 ! ohara     403:              if (dh == 0)
1.1.1.2   maekawa   404:                break;
                    405:              add_ssaaaa (nh, nl, uh, ul, 0, B);
1.1.1.3 ! ohara     406:              q2 = div2 (nh, nl, dh, dl);
1.1.1.2   maekawa   407:
                    408:              if (q1 != q2)
                    409:                break;
                    410:
1.1.1.3 ! ohara     411:              Tac = A + q1 * C;
        !           412:              if (GMP_NAIL_BITS != 0 && Tac > GMP_NUMB_MAX)
        !           413:                break;
        !           414:              Tbd = B + q1 * D;
        !           415:              if (GMP_NAIL_BITS != 0 && Tbd > GMP_NUMB_MAX)
        !           416:                break;
1.1.1.2   maekawa   417:              A = C;
1.1.1.3 ! ohara     418:              C = Tac;
1.1.1.2   maekawa   419:              B = D;
1.1.1.3 ! ohara     420:              D = Tbd;
1.1.1.2   maekawa   421:              umul_ppmm (t1, t0, q1, vl);
                    422:              t1 += q1 * vh;
                    423:              sub_ddmmss (Th, Tl, uh, ul, t1, t0);
                    424:              uh = vh, ul = vl;
                    425:              vh = Th, vl = Tl;
1.1.1.3 ! ohara     426:
        !           427:              asign = ~asign;
1.1.1.2   maekawa   428:            }
                    429: #if EXTEND
                    430:          if (asign)
                    431:            sign = -sign;
                    432: #endif
1.1       maekawa   433:        }
1.1.1.2   maekawa   434:       else /* Same, but using single-limb calculations.  */
                    435:        {
                    436:          mp_limb_t uh, vh;
                    437:          /* Make UH be the most significant limb of U, and make VH be
                    438:             corresponding bits from V.  */
                    439:          uh = up[size - 1];
                    440:          vh = vp[size - 1];
                    441:          count_leading_zeros (cnt, uh);
1.1.1.3 ! ohara     442: #if GMP_NAIL_BITS == 0
1.1.1.2   maekawa   443:          if (cnt != 0)
                    444:            {
1.1.1.3 ! ohara     445:              uh = (uh << cnt) | (up[size - 2] >> (GMP_LIMB_BITS - cnt));
        !           446:              vh = (vh << cnt) | (vp[size - 2] >> (GMP_LIMB_BITS - cnt));
1.1.1.2   maekawa   447:            }
1.1.1.3 ! ohara     448: #else
        !           449:          uh <<= cnt;
        !           450:          vh <<= cnt;
        !           451:          if (cnt < GMP_NUMB_BITS)
        !           452:            {
        !           453:              uh |= up[size - 2] >> (GMP_NUMB_BITS - cnt);
        !           454:              vh |= vp[size - 2] >> (GMP_NUMB_BITS - cnt);
        !           455:            }
        !           456:          else
        !           457:            {
        !           458:              uh |= up[size - 2] << cnt - GMP_NUMB_BITS;
        !           459:              vh |= vp[size - 2] << cnt - GMP_NUMB_BITS;
        !           460:              if (size >= 3)
        !           461:                {
        !           462:                  uh |= up[size - 3] >> 2 * GMP_NUMB_BITS - cnt;
        !           463:                  vh |= vp[size - 3] >> 2 * GMP_NUMB_BITS - cnt;
        !           464:                }
        !           465:            }
        !           466: #endif
1.1       maekawa   467:
1.1.1.2   maekawa   468:          A = 1;
                    469:          B = 0;
                    470:          C = 0;
                    471:          D = 1;
                    472:
                    473:          asign = 0;
                    474:          for (;;)
                    475:            {
                    476:              mp_limb_t q, T;
                    477:              if (vh - C == 0 || vh + D == 0)
                    478:                break;
                    479:
                    480:              q = (uh + A) / (vh - C);
                    481:              if (q != (uh - B) / (vh + D))
                    482:                break;
                    483:
                    484:              T = A + q * C;
                    485:              A = C;
                    486:              C = T;
                    487:              T = B + q * D;
                    488:              B = D;
                    489:              D = T;
                    490:              T = uh - q * vh;
                    491:              uh = vh;
                    492:              vh = T;
                    493:
1.1.1.3 ! ohara     494:              asign = ~asign;
        !           495:
1.1.1.2   maekawa   496:              if (vh - D == 0)
                    497:                break;
                    498:
                    499:              q = (uh - A) / (vh + C);
                    500:              if (q != (uh + B) / (vh - D))
                    501:                break;
                    502:
                    503:              T = A + q * C;
                    504:              A = C;
                    505:              C = T;
                    506:              T = B + q * D;
                    507:              B = D;
                    508:              D = T;
                    509:              T = uh - q * vh;
                    510:              uh = vh;
                    511:              vh = T;
1.1.1.3 ! ohara     512:
        !           513:              asign = ~asign;
1.1.1.2   maekawa   514:            }
                    515: #if EXTEND
                    516:          if (asign)
                    517:            sign = -sign;
                    518: #endif
1.1       maekawa   519:        }
                    520:
                    521: #if RECORD
                    522:       max = MAX (A, max);  max = MAX (B, max);
                    523:       max = MAX (C, max);  max = MAX (D, max);
                    524: #endif
                    525:
                    526:       if (B == 0)
                    527:        {
                    528:          /* This is quite rare.  I.e., optimize something else!  */
                    529:
1.1.1.3 ! ohara     530:          mpn_tdiv_qr (wp, up, (mp_size_t) 0, up, size, vp, vsize);
1.1       maekawa   531:
                    532: #if EXTEND
                    533:          MPN_COPY (tp, s0p, ssize);
                    534:          {
1.1.1.2   maekawa   535:            mp_size_t qsize;
1.1.1.3 ! ohara     536:            mp_size_t i;
1.1.1.2   maekawa   537:
1.1.1.3 ! ohara     538:            qsize = size - vsize + 1; /* size of stored quotient from division */
        !           539:            MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
        !           540:
        !           541:            for (i = 0; i < qsize; i++)
1.1.1.2   maekawa   542:              {
1.1.1.3 ! ohara     543:                mp_limb_t cy;
        !           544:                cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
        !           545:                tp[ssize + i] = cy;
1.1.1.2   maekawa   546:              }
1.1.1.3 ! ohara     547:
1.1.1.2   maekawa   548:            ssize += qsize;
                    549:            ssize -= tp[ssize - 1] == 0;
1.1       maekawa   550:          }
1.1.1.2   maekawa   551:
                    552:          sign = -sign;
                    553:          MP_PTR_SWAP (s0p, s1p);
                    554:          MP_PTR_SWAP (s1p, tp);
1.1       maekawa   555: #endif
                    556:          size = vsize;
1.1.1.2   maekawa   557:          MP_PTR_SWAP (up, vp);
1.1       maekawa   558:        }
                    559:       else
                    560:        {
1.1.1.2   maekawa   561: #if EXTEND
                    562:          mp_size_t tsize, wsize;
                    563: #endif
1.1       maekawa   564:          /* T = U*A + V*B
                    565:             W = U*C + V*D
                    566:             U = T
                    567:             V = W         */
                    568:
                    569: #if STAT
1.1.1.2   maekawa   570:          { mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x);
1.1.1.3 ! ohara     571:          arr[GMP_LIMB_BITS - cnt]++; }
1.1       maekawa   572: #endif
                    573:          if (A == 0)
                    574:            {
1.1.1.2   maekawa   575:              /* B == 1 and C == 1 (D is arbitrary) */
                    576:              mp_limb_t cy;
1.1       maekawa   577:              MPN_COPY (tp, vp, size);
1.1.1.2   maekawa   578:              MPN_COPY (wp, up, size);
                    579:              mpn_submul_1 (wp, vp, size, D);
                    580:              MP_PTR_SWAP (tp, up);
                    581:              MP_PTR_SWAP (wp, vp);
1.1       maekawa   582: #if EXTEND
                    583:              MPN_COPY (tp, s1p, ssize);
1.1.1.2   maekawa   584:              tsize = ssize;
                    585:              tp[ssize] = 0;    /* must zero since wp might spill below */
                    586:              MPN_COPY (wp, s0p, ssize);
                    587:              cy = mpn_addmul_1 (wp, s1p, ssize, D);
                    588:              wp[ssize] = cy;
                    589:              wsize = ssize + (cy != 0);
                    590:              MP_PTR_SWAP (tp, s0p);
                    591:              MP_PTR_SWAP (wp, s1p);
                    592:              ssize = MAX (wsize, tsize);
                    593: #endif
1.1       maekawa   594:            }
                    595:          else
                    596:            {
1.1.1.3 ! ohara     597:              mp_limb_t cy, cy1, cy2;
        !           598:
1.1.1.2   maekawa   599:              if (asign)
1.1       maekawa   600:                {
1.1.1.2   maekawa   601:                  mpn_mul_1 (tp, vp, size, B);
                    602:                  mpn_submul_1 (tp, up, size, A);
                    603:                  mpn_mul_1 (wp, up, size, C);
                    604:                  mpn_submul_1 (wp, vp, size, D);
1.1       maekawa   605:                }
                    606:              else
                    607:                {
1.1.1.2   maekawa   608:                  mpn_mul_1 (tp, up, size, A);
                    609:                  mpn_submul_1 (tp, vp, size, B);
                    610:                  mpn_mul_1 (wp, vp, size, D);
                    611:                  mpn_submul_1 (wp, up, size, C);
1.1.1.3 ! ohara     612:                }
        !           613:              MP_PTR_SWAP (tp, up);
        !           614:              MP_PTR_SWAP (wp, vp);
1.1.1.2   maekawa   615: #if EXTEND
1.1.1.3 ! ohara     616:              /* Compute new s0 */
        !           617:              cy1 = mpn_mul_1 (tp, s0p, ssize, A);
        !           618:              cy2 = mpn_addmul_1 (tp, s1p, ssize, B);
        !           619:              cy = cy1 + cy2;
        !           620:              tp[ssize] = cy & GMP_NUMB_MASK;
        !           621:              tsize = ssize + (cy != 0);
        !           622: #if GMP_NAIL_BITS == 0
        !           623:              if (cy < cy1)
        !           624: #else
        !           625:              if (cy > GMP_NUMB_MAX)
1.1.1.2   maekawa   626: #endif
1.1.1.3 ! ohara     627:                {
        !           628:                  tp[tsize] = 1;
        !           629:                  wp[tsize] = 0;
        !           630:                  tsize++;
        !           631:                  /* This happens just for nails, since we get more work done
        !           632:                     per numb there.  */
1.1       maekawa   633:                }
1.1.1.2   maekawa   634:
1.1.1.3 ! ohara     635:              /* Compute new s1 */
        !           636:              cy1 = mpn_mul_1 (wp, s1p, ssize, D);
        !           637:              cy2 = mpn_addmul_1 (wp, s0p, ssize, C);
        !           638:              cy = cy1 + cy2;
        !           639:              wp[ssize] = cy & GMP_NUMB_MASK;
        !           640:              wsize = ssize + (cy != 0);
        !           641: #if GMP_NAIL_BITS == 0
        !           642:              if (cy < cy1)
        !           643: #else
        !           644:              if (cy > GMP_NUMB_MAX)
        !           645: #endif
        !           646:                {
        !           647:                  wp[wsize] = 1;
        !           648:                  if (wsize >= tsize)
        !           649:                    tp[wsize] = 0;
        !           650:                  wsize++;
        !           651:                }
        !           652:
        !           653:              MP_PTR_SWAP (tp, s0p);
        !           654:              MP_PTR_SWAP (wp, s1p);
        !           655:              ssize = MAX (wsize, tsize);
        !           656: #endif
        !           657:            }
        !           658:          size -= up[size - 1] == 0;
        !           659: #if GMP_NAIL_BITS != 0
1.1.1.2   maekawa   660:          size -= up[size - 1] == 0;
1.1.1.3 ! ohara     661: #endif
1.1       maekawa   662:        }
1.1.1.3 ! ohara     663:
        !           664: #if WANT_GCDEXT_ONE_STEP
        !           665:       TMP_FREE (mark);
        !           666:       return 0;
        !           667: #endif
1.1       maekawa   668:     }
                    669:
                    670: #if RECORD
1.1.1.2   maekawa   671:   printf ("max: %lx\n", max);
                    672: #endif
                    673:
                    674: #if STAT
1.1.1.3 ! ohara     675:  {int i; for (i = 0; i <= GMP_LIMB_BITS; i++) printf ("%d:%d\n", i, arr[i]);}
1.1       maekawa   676: #endif
                    677:
                    678:   if (vsize == 0)
                    679:     {
1.1.1.2   maekawa   680:       if (gp != up && gp != 0)
1.1       maekawa   681:        MPN_COPY (gp, up, size);
                    682: #if EXTEND
1.1.1.2   maekawa   683:       MPN_NORMALIZE (s0p, ssize);
1.1       maekawa   684:       if (orig_s0p != s0p)
                    685:        MPN_COPY (orig_s0p, s0p, ssize);
1.1.1.2   maekawa   686:       *s0size = sign >= 0 ? ssize : -ssize;
1.1       maekawa   687: #endif
                    688:       TMP_FREE (mark);
                    689:       return size;
                    690:     }
                    691:   else
                    692:     {
                    693:       mp_limb_t vl, ul, t;
                    694: #if EXTEND
1.1.1.2   maekawa   695:       mp_size_t qsize, i;
1.1       maekawa   696: #endif
                    697:       vl = vp[0];
                    698: #if EXTEND
                    699:       t = mpn_divmod_1 (wp, up, size, vl);
1.1.1.2   maekawa   700:
1.1       maekawa   701:       MPN_COPY (tp, s0p, ssize);
1.1.1.2   maekawa   702:
                    703:       qsize = size - (wp[size - 1] == 0); /* size of quotient from division */
                    704:       if (ssize < qsize)
1.1       maekawa   705:        {
1.1.1.2   maekawa   706:          MPN_ZERO (tp + ssize, qsize - ssize);
                    707:          MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
                    708:          for (i = 0; i < ssize; i++)
                    709:            {
                    710:              mp_limb_t cy;
                    711:              cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]);
                    712:              tp[qsize + i] = cy;
                    713:            }
                    714:        }
                    715:       else
                    716:        {
                    717:          MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
                    718:          for (i = 0; i < qsize; i++)
                    719:            {
                    720:              mp_limb_t cy;
                    721:              cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
                    722:              tp[ssize + i] = cy;
                    723:            }
                    724:        }
                    725:       ssize += qsize;
                    726:       ssize -= tp[ssize - 1] == 0;
                    727:
                    728:       sign = -sign;
                    729:       MP_PTR_SWAP (s0p, s1p);
                    730:       MP_PTR_SWAP (s1p, tp);
1.1       maekawa   731: #else
                    732:       t = mpn_mod_1 (up, size, vl);
                    733: #endif
                    734:       ul = vl;
                    735:       vl = t;
                    736:       while (vl != 0)
                    737:        {
                    738:          mp_limb_t t;
                    739: #if EXTEND
1.1.1.2   maekawa   740:          mp_limb_t q;
1.1       maekawa   741:          q = ul / vl;
1.1.1.2   maekawa   742:          t = ul - q * vl;
1.1       maekawa   743:
                    744:          MPN_COPY (tp, s0p, ssize);
1.1.1.2   maekawa   745:
                    746:          MPN_ZERO (s1p + ssize, 1); /* zero s1 too */
                    747:
1.1       maekawa   748:          {
1.1.1.2   maekawa   749:            mp_limb_t cy;
                    750:            cy = mpn_addmul_1 (tp, s1p, ssize, q);
                    751:            tp[ssize] = cy;
1.1       maekawa   752:          }
                    753:
1.1.1.2   maekawa   754:          ssize += 1;
                    755:          ssize -= tp[ssize - 1] == 0;
                    756:
                    757:          sign = -sign;
                    758:          MP_PTR_SWAP (s0p, s1p);
                    759:          MP_PTR_SWAP (s1p, tp);
1.1       maekawa   760: #else
                    761:          t = ul % vl;
                    762: #endif
                    763:          ul = vl;
                    764:          vl = t;
                    765:        }
1.1.1.2   maekawa   766:       if (gp != 0)
                    767:        gp[0] = ul;
1.1       maekawa   768: #if EXTEND
1.1.1.2   maekawa   769:       MPN_NORMALIZE (s0p, ssize);
1.1       maekawa   770:       if (orig_s0p != s0p)
                    771:        MPN_COPY (orig_s0p, s0p, ssize);
1.1.1.2   maekawa   772:       *s0size = sign >= 0 ? ssize : -ssize;
1.1       maekawa   773: #endif
                    774:       TMP_FREE (mark);
                    775:       return 1;
                    776:     }
                    777: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>