[BACK]Return to gcdext.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/gcdext.c, Revision 1.1.1.2

1.1       maekawa     1: /* mpn_gcdext -- Extended Greatest Common Divisor.
                      2:
1.1.1.2 ! maekawa     3: Copyright (C) 1996, 1998, 2000 Free Software Foundation, Inc.
1.1       maekawa     4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa     8: it under the terms of the GNU Lesser General Public License as published by
        !             9: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa    14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    15: License for more details.
                     16:
1.1.1.2 ! maekawa    17: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: #include "gmp.h"
                     23: #include "gmp-impl.h"
                     24: #include "longlong.h"
                     25:
1.1.1.2 ! maekawa    26: #ifndef GCDEXT_THRESHOLD
        !            27: #define GCDEXT_THRESHOLD 17
        !            28: #endif
        !            29:
1.1       maekawa    30: #ifndef EXTEND
                     31: #define EXTEND 1
                     32: #endif
                     33:
                     34: #if STAT
                     35: int arr[BITS_PER_MP_LIMB];
                     36: #endif
                     37:
                     38:
1.1.1.2 ! maekawa    39: /* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE)
        !            40:
        !            41:    Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the
        !            42:    greatest common divisor at GP (unless it is 0), and the first cofactor at
        !            43:    SP.  Write the size of the cofactor through the pointer SSIZE.  Return the
        !            44:    size of the value at GP.  Note that SP might be a negative number; this is
        !            45:    denoted by storing the negative of the size through SSIZE.
        !            46:
        !            47:    {UP,USIZE} and {VP,VSIZE} are both clobbered.
        !            48:
        !            49:    The space allocation for all four areas needs to be USIZE+1.
        !            50:
        !            51:    Preconditions: 1) U >= V.
        !            52:                  2) V > 0.  */
        !            53:
        !            54: /* We use Lehmer's algorithm.  The idea is to extract the most significant
        !            55:    bits of the operands, and compute the continued fraction for them.  We then
        !            56:    apply the gathered cofactors to the full operands.
        !            57:
        !            58:    Idea 1: After we have performed a full division, don't shift operands back,
1.1       maekawa    59:           but instead account for the extra factors-of-2 thus introduced.
                     60:    Idea 2: Simple generalization to use divide-and-conquer would give us an
                     61:           algorithm that runs faster than O(n^2).
                     62:    Idea 3: The input numbers need less space as the computation progresses,
1.1.1.2 ! maekawa    63:           while the s0 and s1 variables need more space.  To save memory, we
1.1       maekawa    64:           could make them share space, and have the latter variables grow
1.1.1.2 ! maekawa    65:           into the former.
        !            66:    Idea 4: We should not do double-limb arithmetic from the start.  Instead,
        !            67:           do things in single-limb arithmetic until the quotients differ,
        !            68:           and then switch to double-limb arithmetic.  */
        !            69:
1.1       maekawa    70:
1.1.1.2 ! maekawa    71: /* Division optimized for small quotients.  If the quotient is more than one limb,
        !            72:    store 1 in *qh and return 0.  */
        !            73: static mp_limb_t
        !            74: #if __STDC__
        !            75: div2 (mp_limb_t *qh, mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0)
        !            76: #else
        !            77: div2 (qh, n1, n0, d1, d0)
        !            78:      mp_limb_t *qh;
        !            79:      mp_limb_t n1;
        !            80:      mp_limb_t n0;
        !            81:      mp_limb_t d1;
        !            82:      mp_limb_t d0;
        !            83: #endif
        !            84: {
        !            85:   if (d1 == 0)
        !            86:     {
        !            87:       *qh = 1;
        !            88:       return 0;
        !            89:     }
        !            90:
        !            91:   if ((mp_limb_signed_t) n1 < 0)
        !            92:     {
        !            93:       mp_limb_t q;
        !            94:       int cnt;
        !            95:       for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++)
        !            96:        {
        !            97:          d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
        !            98:          d0 = d0 << 1;
        !            99:        }
        !           100:
        !           101:       q = 0;
        !           102:       while (cnt)
        !           103:        {
        !           104:          q <<= 1;
        !           105:          if (n1 > d1 || (n1 == d1 && n0 >= d0))
        !           106:            {
        !           107:              sub_ddmmss (n1, n0, n1, n0, d1, d0);
        !           108:              q |= 1;
        !           109:            }
        !           110:          d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
        !           111:          d1 = d1 >> 1;
        !           112:          cnt--;
        !           113:        }
        !           114:
        !           115:       *qh = 0;
        !           116:       return q;
        !           117:     }
        !           118:   else
        !           119:     {
        !           120:       mp_limb_t q;
        !           121:       int cnt;
        !           122:       for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++)
        !           123:        {
        !           124:          d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
        !           125:          d0 = d0 << 1;
        !           126:        }
        !           127:
        !           128:       q = 0;
        !           129:       while (cnt)
        !           130:        {
        !           131:          d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
        !           132:          d1 = d1 >> 1;
        !           133:          q <<= 1;
        !           134:          if (n1 > d1 || (n1 == d1 && n0 >= d0))
        !           135:            {
        !           136:              sub_ddmmss (n1, n0, n1, n0, d1, d0);
        !           137:              q |= 1;
        !           138:            }
        !           139:          cnt--;
        !           140:        }
        !           141:
        !           142:       *qh = 0;
        !           143:       return q;
        !           144:     }
        !           145: }
1.1       maekawa   146:
                    147: mp_size_t
                    148: #if EXTEND
                    149: #if __STDC__
1.1.1.2 ! maekawa   150: mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size,
1.1       maekawa   151:            mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
                    152: #else
1.1.1.2 ! maekawa   153: mpn_gcdext (gp, s0p, s0size, up, size, vp, vsize)
1.1       maekawa   154:      mp_ptr gp;
                    155:      mp_ptr s0p;
1.1.1.2 ! maekawa   156:      mp_size_t *s0size;
1.1       maekawa   157:      mp_ptr up;
                    158:      mp_size_t size;
                    159:      mp_ptr vp;
                    160:      mp_size_t vsize;
                    161: #endif
                    162: #else
                    163: #if __STDC__
                    164: mpn_gcd (mp_ptr gp,
                    165:         mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
                    166: #else
                    167: mpn_gcd (gp, up, size, vp, vsize)
                    168:      mp_ptr gp;
                    169:      mp_ptr up;
                    170:      mp_size_t size;
                    171:      mp_ptr vp;
                    172:      mp_size_t vsize;
                    173: #endif
                    174: #endif
                    175: {
1.1.1.2 ! maekawa   176:   mp_limb_t A, B, C, D;
1.1       maekawa   177:   int cnt;
                    178:   mp_ptr tp, wp;
                    179: #if RECORD
1.1.1.2 ! maekawa   180:   mp_limb_t max = 0;
1.1       maekawa   181: #endif
                    182: #if EXTEND
                    183:   mp_ptr s1p;
                    184:   mp_ptr orig_s0p = s0p;
1.1.1.2 ! maekawa   185:   mp_size_t ssize;
        !           186:   int sign = 1;
        !           187: #endif
        !           188:   int use_double_flag;
1.1       maekawa   189:   TMP_DECL (mark);
                    190:
                    191:   TMP_MARK (mark);
                    192:
1.1.1.2 ! maekawa   193:   use_double_flag = (size >= GCDEXT_THRESHOLD);
        !           194:
1.1       maekawa   195:   tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
                    196:   wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
1.1.1.2 ! maekawa   197: #if EXTEND
        !           198:   s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
1.1       maekawa   199:
                    200:   MPN_ZERO (s0p, size);
                    201:   MPN_ZERO (s1p, size);
                    202:
                    203:   s0p[0] = 1;
                    204:   s1p[0] = 0;
                    205:   ssize = 1;
                    206: #endif
                    207:
                    208:   if (size > vsize)
                    209:     {
                    210:       /* Normalize V (and shift up U the same amount).  */
                    211:       count_leading_zeros (cnt, vp[vsize - 1]);
                    212:       if (cnt != 0)
                    213:        {
                    214:          mp_limb_t cy;
                    215:          mpn_lshift (vp, vp, vsize, cnt);
                    216:          cy = mpn_lshift (up, up, size, cnt);
                    217:          up[size] = cy;
                    218:          size += cy != 0;
                    219:        }
                    220:
                    221:       mpn_divmod (up + vsize, up, size, vp, vsize);
                    222: #if EXTEND
                    223:       /* This is really what it boils down to in this case... */
                    224:       s0p[0] = 0;
                    225:       s1p[0] = 1;
1.1.1.2 ! maekawa   226:       sign = -sign;
1.1       maekawa   227: #endif
                    228:       size = vsize;
                    229:       if (cnt != 0)
                    230:        {
                    231:          mpn_rshift (up, up, size, cnt);
                    232:          mpn_rshift (vp, vp, size, cnt);
                    233:        }
1.1.1.2 ! maekawa   234:       MP_PTR_SWAP (up, vp);
1.1       maekawa   235:     }
                    236:
                    237:   for (;;)
                    238:     {
1.1.1.2 ! maekawa   239:       mp_limb_t asign;
1.1       maekawa   240:       /* Figure out exact size of V.  */
                    241:       vsize = size;
                    242:       MPN_NORMALIZE (vp, vsize);
                    243:       if (vsize <= 1)
                    244:        break;
                    245:
1.1.1.2 ! maekawa   246:       if (use_double_flag)
1.1       maekawa   247:        {
1.1.1.2 ! maekawa   248:          mp_limb_t uh, vh, ul, vl;
        !           249:          /* Let UH,UL be the most significant limbs of U, and let VH,VL be
        !           250:             the corresponding bits from V.  */
        !           251:          uh = up[size - 1];
        !           252:          vh = vp[size - 1];
        !           253:          ul = up[size - 2];
        !           254:          vl = vp[size - 2];
        !           255:          count_leading_zeros (cnt, uh);
        !           256:          if (cnt != 0)
        !           257:            {
        !           258:              uh = (uh << cnt) | (ul >> (BITS_PER_MP_LIMB - cnt));
        !           259:              vh = (vh << cnt) | (vl >> (BITS_PER_MP_LIMB - cnt));
        !           260:              vl <<= cnt;
        !           261:              ul <<= cnt;
        !           262:              if (size >= 3)
        !           263:                {
        !           264:                  ul |= (up[size - 3] >> (BITS_PER_MP_LIMB - cnt));
        !           265:                  vl |= (vp[size - 3] >> (BITS_PER_MP_LIMB - cnt));
        !           266:                }
        !           267:            }
        !           268:
        !           269:          A = 1;
        !           270:          B = 0;
        !           271:          C = 0;
        !           272:          D = 1;
        !           273:
        !           274:          asign = 0;
        !           275:          for (;;)
        !           276:            {
        !           277:              mp_limb_t T;
        !           278:              mp_limb_t qh, q1, q2;
        !           279:              mp_limb_t nh, nl, dh, dl;
        !           280:              mp_limb_t t1, t0;
        !           281:              mp_limb_t Th, Tl;
        !           282:
        !           283:              sub_ddmmss (dh, dl, vh, vl, 0, C);
        !           284:              if ((dl | dh) == 0)
        !           285:                break;
        !           286:              add_ssaaaa (nh, nl, uh, ul, 0, A);
        !           287:              q1 = div2 (&qh, nh, nl, dh, dl);
        !           288:              if (qh != 0)
        !           289:                break;          /* could handle this */
        !           290:
        !           291:              add_ssaaaa (dh, dl, vh, vl, 0, D);
        !           292:              if ((dl | dh) == 0)
        !           293:                break;
        !           294:              sub_ddmmss (nh, nl, uh, ul, 0, B);
        !           295:              q2 = div2 (&qh, nh, nl, dh, dl);
        !           296:              if (qh != 0)
        !           297:                break;          /* could handle this */
        !           298:
        !           299:              if (q1 != q2)
        !           300:                break;
        !           301:
        !           302:              asign = ~asign;
        !           303:
        !           304:              T = A + q1 * C;
        !           305:              A = C;
        !           306:              C = T;
        !           307:              T = B + q1 * D;
        !           308:              B = D;
        !           309:              D = T;
        !           310:              umul_ppmm (t1, t0, q1, vl);
        !           311:              t1 += q1 * vh;
        !           312:              sub_ddmmss (Th, Tl, uh, ul, t1, t0);
        !           313:              uh = vh, ul = vl;
        !           314:              vh = Th, vl = Tl;
        !           315:
        !           316:              add_ssaaaa (dh, dl, vh, vl, 0, C);
        !           317:              sub_ddmmss (nh, nl, uh, ul, 0, A);
        !           318:              q1 = div2 (&qh, nh, nl, dh, dl);
        !           319:              if (qh != 0)
        !           320:                break;          /* could handle this */
        !           321:
        !           322:              sub_ddmmss (dh, dl, vh, vl, 0, D);
        !           323:              if ((dl | dh) == 0)
        !           324:                break;
        !           325:              add_ssaaaa (nh, nl, uh, ul, 0, B);
        !           326:              q2 = div2 (&qh, nh, nl, dh, dl);
        !           327:              if (qh != 0)
        !           328:                break;          /* could handle this */
        !           329:
        !           330:              if (q1 != q2)
        !           331:                break;
        !           332:
        !           333:              asign = ~asign;
        !           334:
        !           335:              T = A + q1 * C;
        !           336:              A = C;
        !           337:              C = T;
        !           338:              T = B + q1 * D;
        !           339:              B = D;
        !           340:              D = T;
        !           341:              umul_ppmm (t1, t0, q1, vl);
        !           342:              t1 += q1 * vh;
        !           343:              sub_ddmmss (Th, Tl, uh, ul, t1, t0);
        !           344:              uh = vh, ul = vl;
        !           345:              vh = Th, vl = Tl;
        !           346:            }
        !           347: #if EXTEND
        !           348:          if (asign)
        !           349:            sign = -sign;
        !           350: #endif
1.1       maekawa   351:        }
1.1.1.2 ! maekawa   352:       else /* Same, but using single-limb calculations.  */
        !           353:        {
        !           354:          mp_limb_t uh, vh;
        !           355:          /* Make UH be the most significant limb of U, and make VH be
        !           356:             corresponding bits from V.  */
        !           357:          uh = up[size - 1];
        !           358:          vh = vp[size - 1];
        !           359:          count_leading_zeros (cnt, uh);
        !           360:          if (cnt != 0)
        !           361:            {
        !           362:              uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
        !           363:              vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
        !           364:            }
1.1       maekawa   365:
1.1.1.2 ! maekawa   366:          A = 1;
        !           367:          B = 0;
        !           368:          C = 0;
        !           369:          D = 1;
        !           370:
        !           371:          asign = 0;
        !           372:          for (;;)
        !           373:            {
        !           374:              mp_limb_t q, T;
        !           375:              if (vh - C == 0 || vh + D == 0)
        !           376:                break;
        !           377:
        !           378:              q = (uh + A) / (vh - C);
        !           379:              if (q != (uh - B) / (vh + D))
        !           380:                break;
        !           381:
        !           382:              asign = ~asign;
        !           383:
        !           384:              T = A + q * C;
        !           385:              A = C;
        !           386:              C = T;
        !           387:              T = B + q * D;
        !           388:              B = D;
        !           389:              D = T;
        !           390:              T = uh - q * vh;
        !           391:              uh = vh;
        !           392:              vh = T;
        !           393:
        !           394:              if (vh - D == 0)
        !           395:                break;
        !           396:
        !           397:              q = (uh - A) / (vh + C);
        !           398:              if (q != (uh + B) / (vh - D))
        !           399:                break;
        !           400:
        !           401:              asign = ~asign;
        !           402:
        !           403:              T = A + q * C;
        !           404:              A = C;
        !           405:              C = T;
        !           406:              T = B + q * D;
        !           407:              B = D;
        !           408:              D = T;
        !           409:              T = uh - q * vh;
        !           410:              uh = vh;
        !           411:              vh = T;
        !           412:            }
        !           413: #if EXTEND
        !           414:          if (asign)
        !           415:            sign = -sign;
        !           416: #endif
1.1       maekawa   417:        }
                    418:
                    419: #if RECORD
                    420:       max = MAX (A, max);  max = MAX (B, max);
                    421:       max = MAX (C, max);  max = MAX (D, max);
                    422: #endif
                    423:
                    424:       if (B == 0)
                    425:        {
                    426:          mp_limb_t qh;
                    427:          mp_size_t i;
                    428:          /* This is quite rare.  I.e., optimize something else!  */
                    429:
                    430:          /* Normalize V (and shift up U the same amount).  */
                    431:          count_leading_zeros (cnt, vp[vsize - 1]);
                    432:          if (cnt != 0)
                    433:            {
                    434:              mp_limb_t cy;
                    435:              mpn_lshift (vp, vp, vsize, cnt);
                    436:              cy = mpn_lshift (up, up, size, cnt);
                    437:              up[size] = cy;
                    438:              size += cy != 0;
                    439:            }
                    440:
                    441:          qh = mpn_divmod (up + vsize, up, size, vp, vsize);
                    442: #if EXTEND
                    443:          MPN_COPY (tp, s0p, ssize);
                    444:          {
1.1.1.2 ! maekawa   445:            mp_size_t qsize;
        !           446:
        !           447:            qsize = size - vsize; /* size of stored quotient from division */
        !           448:            if (ssize < qsize)
        !           449:              {
        !           450:                MPN_ZERO (tp + ssize, qsize - ssize);
        !           451:                MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
        !           452:                for (i = 0; i < ssize; i++)
        !           453:                  {
        !           454:                    mp_limb_t cy;
        !           455:                    cy = mpn_addmul_1 (tp + i, up + vsize, qsize, s1p[i]);
        !           456:                    tp[qsize + i] = cy;
        !           457:                  }
        !           458:                if (qh != 0)
        !           459:                  {
        !           460:                    mp_limb_t cy;
        !           461:                    cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
        !           462:                    if (cy != 0)
        !           463:                      abort ();
        !           464:                  }
        !           465:              }
        !           466:            else
        !           467:              {
        !           468:                MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
        !           469:                for (i = 0; i < qsize; i++)
        !           470:                  {
        !           471:                    mp_limb_t cy;
        !           472:                    cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
        !           473:                    tp[ssize + i] = cy;
        !           474:                  }
        !           475:                if (qh != 0)
        !           476:                  {
        !           477:                    mp_limb_t cy;
        !           478:                    cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
        !           479:                    if (cy != 0)
        !           480:                      {
        !           481:                        tp[qsize + ssize] = cy;
        !           482:                        s1p[qsize + ssize] = 0;
        !           483:                        ssize++;
        !           484:                      }
        !           485:                  }
        !           486:              }
        !           487:            ssize += qsize;
        !           488:            ssize -= tp[ssize - 1] == 0;
1.1       maekawa   489:          }
1.1.1.2 ! maekawa   490:
        !           491:          sign = -sign;
        !           492:          MP_PTR_SWAP (s0p, s1p);
        !           493:          MP_PTR_SWAP (s1p, tp);
1.1       maekawa   494: #endif
                    495:          size = vsize;
                    496:          if (cnt != 0)
                    497:            {
                    498:              mpn_rshift (up, up, size, cnt);
                    499:              mpn_rshift (vp, vp, size, cnt);
                    500:            }
1.1.1.2 ! maekawa   501:          MP_PTR_SWAP (up, vp);
1.1       maekawa   502:        }
                    503:       else
                    504:        {
1.1.1.2 ! maekawa   505: #if EXTEND
        !           506:          mp_size_t tsize, wsize;
        !           507: #endif
1.1       maekawa   508:          /* T = U*A + V*B
                    509:             W = U*C + V*D
                    510:             U = T
                    511:             V = W         */
                    512:
                    513: #if STAT
1.1.1.2 ! maekawa   514:          { mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x);
        !           515:          arr[BITS_PER_MP_LIMB - cnt]++; }
1.1       maekawa   516: #endif
                    517:          if (A == 0)
                    518:            {
1.1.1.2 ! maekawa   519:              /* B == 1 and C == 1 (D is arbitrary) */
        !           520:              mp_limb_t cy;
1.1       maekawa   521:              MPN_COPY (tp, vp, size);
1.1.1.2 ! maekawa   522:              MPN_COPY (wp, up, size);
        !           523:              mpn_submul_1 (wp, vp, size, D);
        !           524:              MP_PTR_SWAP (tp, up);
        !           525:              MP_PTR_SWAP (wp, vp);
1.1       maekawa   526: #if EXTEND
                    527:              MPN_COPY (tp, s1p, ssize);
1.1.1.2 ! maekawa   528:              tsize = ssize;
        !           529:              tp[ssize] = 0;    /* must zero since wp might spill below */
        !           530:              MPN_COPY (wp, s0p, ssize);
        !           531:              cy = mpn_addmul_1 (wp, s1p, ssize, D);
        !           532:              wp[ssize] = cy;
        !           533:              wsize = ssize + (cy != 0);
        !           534:              MP_PTR_SWAP (tp, s0p);
        !           535:              MP_PTR_SWAP (wp, s1p);
        !           536:              ssize = MAX (wsize, tsize);
        !           537: #endif
1.1       maekawa   538:            }
                    539:          else
                    540:            {
1.1.1.2 ! maekawa   541:              if (asign)
1.1       maekawa   542:                {
1.1.1.2 ! maekawa   543:                  mp_limb_t cy;
        !           544:                  mpn_mul_1 (tp, vp, size, B);
        !           545:                  mpn_submul_1 (tp, up, size, A);
        !           546:                  mpn_mul_1 (wp, up, size, C);
        !           547:                  mpn_submul_1 (wp, vp, size, D);
        !           548:                  MP_PTR_SWAP (tp, up);
        !           549:                  MP_PTR_SWAP (wp, vp);
        !           550: #if EXTEND
1.1       maekawa   551:                  cy = mpn_mul_1 (tp, s1p, ssize, B);
1.1.1.2 ! maekawa   552:                  cy += mpn_addmul_1 (tp, s0p, ssize, A);
        !           553:                  tp[ssize] = cy;
        !           554:                  tsize = ssize + (cy != 0);
        !           555:                  cy = mpn_mul_1 (wp, s0p, ssize, C);
        !           556:                  cy += mpn_addmul_1 (wp, s1p, ssize, D);
        !           557:                  wp[ssize] = cy;
        !           558:                  wsize = ssize + (cy != 0);
        !           559:                  MP_PTR_SWAP (tp, s0p);
        !           560:                  MP_PTR_SWAP (wp, s1p);
        !           561:                  ssize = MAX (wsize, tsize);
        !           562: #endif
1.1       maekawa   563:                }
                    564:              else
                    565:                {
1.1.1.2 ! maekawa   566:                  mp_limb_t cy;
        !           567:                  mpn_mul_1 (tp, up, size, A);
        !           568:                  mpn_submul_1 (tp, vp, size, B);
        !           569:                  mpn_mul_1 (wp, vp, size, D);
        !           570:                  mpn_submul_1 (wp, up, size, C);
        !           571:                  MP_PTR_SWAP (tp, up);
        !           572:                  MP_PTR_SWAP (wp, vp);
        !           573: #if EXTEND
1.1       maekawa   574:                  cy = mpn_mul_1 (tp, s0p, ssize, A);
1.1.1.2 ! maekawa   575:                  cy += mpn_addmul_1 (tp, s1p, ssize, B);
        !           576:                  tp[ssize] = cy;
        !           577:                  tsize = ssize + (cy != 0);
        !           578:                  cy = mpn_mul_1 (wp, s1p, ssize, D);
        !           579:                  cy += mpn_addmul_1 (wp, s0p, ssize, C);
        !           580:                  wp[ssize] = cy;
        !           581:                  wsize = ssize + (cy != 0);
        !           582:                  MP_PTR_SWAP (tp, s0p);
        !           583:                  MP_PTR_SWAP (wp, s1p);
        !           584:                  ssize = MAX (wsize, tsize);
        !           585: #endif
1.1       maekawa   586:                }
                    587:            }
1.1.1.2 ! maekawa   588:
        !           589:          size -= up[size - 1] == 0;
1.1       maekawa   590:        }
                    591:     }
                    592:
                    593: #if RECORD
1.1.1.2 ! maekawa   594:   printf ("max: %lx\n", max);
        !           595: #endif
        !           596:
        !           597: #if STAT
        !           598:  {int i; for (i = 0; i < BITS_PER_MP_LIMB; i++) printf ("%d:%d\n", i, arr[i]);}
1.1       maekawa   599: #endif
                    600:
                    601:   if (vsize == 0)
                    602:     {
1.1.1.2 ! maekawa   603:       if (gp != up && gp != 0)
1.1       maekawa   604:        MPN_COPY (gp, up, size);
                    605: #if EXTEND
1.1.1.2 ! maekawa   606:       MPN_NORMALIZE (s0p, ssize);
1.1       maekawa   607:       if (orig_s0p != s0p)
                    608:        MPN_COPY (orig_s0p, s0p, ssize);
1.1.1.2 ! maekawa   609:       *s0size = sign >= 0 ? ssize : -ssize;
1.1       maekawa   610: #endif
                    611:       TMP_FREE (mark);
                    612:       return size;
                    613:     }
                    614:   else
                    615:     {
                    616:       mp_limb_t vl, ul, t;
                    617: #if EXTEND
1.1.1.2 ! maekawa   618:       mp_size_t qsize, i;
1.1       maekawa   619: #endif
                    620:       vl = vp[0];
                    621: #if EXTEND
                    622:       t = mpn_divmod_1 (wp, up, size, vl);
1.1.1.2 ! maekawa   623:
1.1       maekawa   624:       MPN_COPY (tp, s0p, ssize);
1.1.1.2 ! maekawa   625:
        !           626:       qsize = size - (wp[size - 1] == 0); /* size of quotient from division */
        !           627:       if (ssize < qsize)
1.1       maekawa   628:        {
1.1.1.2 ! maekawa   629:          MPN_ZERO (tp + ssize, qsize - ssize);
        !           630:          MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
        !           631:          for (i = 0; i < ssize; i++)
        !           632:            {
        !           633:              mp_limb_t cy;
        !           634:              cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]);
        !           635:              tp[qsize + i] = cy;
        !           636:            }
        !           637:        }
        !           638:       else
        !           639:        {
        !           640:          MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
        !           641:          for (i = 0; i < qsize; i++)
        !           642:            {
        !           643:              mp_limb_t cy;
        !           644:              cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
        !           645:              tp[ssize + i] = cy;
        !           646:            }
        !           647:        }
        !           648:       ssize += qsize;
        !           649:       ssize -= tp[ssize - 1] == 0;
        !           650:
        !           651:       sign = -sign;
        !           652:       MP_PTR_SWAP (s0p, s1p);
        !           653:       MP_PTR_SWAP (s1p, tp);
1.1       maekawa   654: #else
                    655:       t = mpn_mod_1 (up, size, vl);
                    656: #endif
                    657:       ul = vl;
                    658:       vl = t;
                    659:       while (vl != 0)
                    660:        {
                    661:          mp_limb_t t;
                    662: #if EXTEND
1.1.1.2 ! maekawa   663:          mp_limb_t q;
1.1       maekawa   664:          q = ul / vl;
1.1.1.2 ! maekawa   665:          t = ul - q * vl;
1.1       maekawa   666:
                    667:          MPN_COPY (tp, s0p, ssize);
1.1.1.2 ! maekawa   668:
        !           669:          MPN_ZERO (s1p + ssize, 1); /* zero s1 too */
        !           670:
1.1       maekawa   671:          {
1.1.1.2 ! maekawa   672:            mp_limb_t cy;
        !           673:            cy = mpn_addmul_1 (tp, s1p, ssize, q);
        !           674:            tp[ssize] = cy;
1.1       maekawa   675:          }
                    676:
1.1.1.2 ! maekawa   677:          ssize += 1;
        !           678:          ssize -= tp[ssize - 1] == 0;
        !           679:
        !           680:          sign = -sign;
        !           681:          MP_PTR_SWAP (s0p, s1p);
        !           682:          MP_PTR_SWAP (s1p, tp);
1.1       maekawa   683: #else
                    684:          t = ul % vl;
                    685: #endif
                    686:          ul = vl;
                    687:          vl = t;
                    688:        }
1.1.1.2 ! maekawa   689:       if (gp != 0)
        !           690:        gp[0] = ul;
1.1       maekawa   691: #if EXTEND
1.1.1.2 ! maekawa   692:       MPN_NORMALIZE (s0p, ssize);
1.1       maekawa   693:       if (orig_s0p != s0p)
                    694:        MPN_COPY (orig_s0p, s0p, ssize);
1.1.1.2 ! maekawa   695:       *s0size = sign >= 0 ? ssize : -ssize;
1.1       maekawa   696: #endif
                    697:       TMP_FREE (mark);
                    698:       return 1;
                    699:     }
                    700: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>