[BACK]Return to gcdext.c CVS log [TXT][DIR] Up to [local] / OpenXM / src / kan96xx / gmp-2.0.2 / mpn / generic

Annotation of OpenXM/src/kan96xx/gmp-2.0.2/mpn/generic/gcdext.c, Revision 1.1

1.1     ! maekawa     1: /* mpn_gcdext -- Extended Greatest Common Divisor.
        !             2:
        !             3: Copyright (C) 1996 Free Software Foundation, Inc.
        !             4:
        !             5: This file is part of the GNU MP Library.
        !             6:
        !             7: The GNU MP Library is free software; you can redistribute it and/or modify
        !             8: it under the terms of the GNU Library General Public License as published by
        !             9: the Free Software Foundation; either version 2 of the License, or (at your
        !            10: option) any later version.
        !            11:
        !            12: The GNU MP Library is distributed in the hope that it will be useful, but
        !            13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
        !            15: License for more details.
        !            16:
        !            17: You should have received a copy of the GNU Library General Public License
        !            18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            20: MA 02111-1307, USA. */
        !            21:
        !            22: #include "gmp.h"
        !            23: #include "gmp-impl.h"
        !            24: #include "longlong.h"
        !            25:
        !            26: #ifndef EXTEND
        !            27: #define EXTEND 1
        !            28: #endif
        !            29:
        !            30: #if STAT
        !            31: int arr[BITS_PER_MP_LIMB];
        !            32: #endif
        !            33:
        !            34: #define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
        !            35:
        !            36: /* Idea 1: After we have performed a full division, don't shift operands back,
        !            37:           but instead account for the extra factors-of-2 thus introduced.
        !            38:    Idea 2: Simple generalization to use divide-and-conquer would give us an
        !            39:           algorithm that runs faster than O(n^2).
        !            40:    Idea 3: The input numbers need less space as the computation progresses,
        !            41:           while the s0 and s1 variables need more space.  To save space, we
        !            42:           could make them share space, and have the latter variables grow
        !            43:           into the former.  */
        !            44:
        !            45: /* Precondition: U >= V.  */
        !            46:
        !            47: mp_size_t
        !            48: #if EXTEND
        !            49: #if __STDC__
        !            50: mpn_gcdext (mp_ptr gp, mp_ptr s0p,
        !            51:            mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
        !            52: #else
        !            53: mpn_gcdext (gp, s0p, up, size, vp, vsize)
        !            54:      mp_ptr gp;
        !            55:      mp_ptr s0p;
        !            56:      mp_ptr up;
        !            57:      mp_size_t size;
        !            58:      mp_ptr vp;
        !            59:      mp_size_t vsize;
        !            60: #endif
        !            61: #else
        !            62: #if __STDC__
        !            63: mpn_gcd (mp_ptr gp,
        !            64:         mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
        !            65: #else
        !            66: mpn_gcd (gp, up, size, vp, vsize)
        !            67:      mp_ptr gp;
        !            68:      mp_ptr up;
        !            69:      mp_size_t size;
        !            70:      mp_ptr vp;
        !            71:      mp_size_t vsize;
        !            72: #endif
        !            73: #endif
        !            74: {
        !            75:   mp_limb_t uh, vh;
        !            76:   mp_limb_signed_t A, B, C, D;
        !            77:   int cnt;
        !            78:   mp_ptr tp, wp;
        !            79: #if RECORD
        !            80:   mp_limb_signed_t min = 0, max = 0;
        !            81: #endif
        !            82: #if EXTEND
        !            83:   mp_ptr s1p;
        !            84:   mp_ptr orig_s0p = s0p;
        !            85:   mp_size_t ssize, orig_size = size;
        !            86:   TMP_DECL (mark);
        !            87:
        !            88:   TMP_MARK (mark);
        !            89:
        !            90:   tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
        !            91:   wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
        !            92:   s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
        !            93:
        !            94:   MPN_ZERO (s0p, size);
        !            95:   MPN_ZERO (s1p, size);
        !            96:
        !            97:   s0p[0] = 1;
        !            98:   s1p[0] = 0;
        !            99:   ssize = 1;
        !           100: #endif
        !           101:
        !           102:   if (size > vsize)
        !           103:     {
        !           104:       /* Normalize V (and shift up U the same amount).  */
        !           105:       count_leading_zeros (cnt, vp[vsize - 1]);
        !           106:       if (cnt != 0)
        !           107:        {
        !           108:          mp_limb_t cy;
        !           109:          mpn_lshift (vp, vp, vsize, cnt);
        !           110:          cy = mpn_lshift (up, up, size, cnt);
        !           111:          up[size] = cy;
        !           112:          size += cy != 0;
        !           113:        }
        !           114:
        !           115:       mpn_divmod (up + vsize, up, size, vp, vsize);
        !           116: #if EXTEND
        !           117:       /* This is really what it boils down to in this case... */
        !           118:       s0p[0] = 0;
        !           119:       s1p[0] = 1;
        !           120: #endif
        !           121:       size = vsize;
        !           122:       if (cnt != 0)
        !           123:        {
        !           124:          mpn_rshift (up, up, size, cnt);
        !           125:          mpn_rshift (vp, vp, size, cnt);
        !           126:        }
        !           127:       {
        !           128:        mp_ptr xp;
        !           129:        xp = up; up = vp; vp = xp;
        !           130:       }
        !           131:     }
        !           132:
        !           133:   for (;;)
        !           134:     {
        !           135:       /* Figure out exact size of V.  */
        !           136:       vsize = size;
        !           137:       MPN_NORMALIZE (vp, vsize);
        !           138:       if (vsize <= 1)
        !           139:        break;
        !           140:
        !           141:       /* Make UH be the most significant limb of U, and make VH be
        !           142:         corresponding bits from V.  */
        !           143:       uh = up[size - 1];
        !           144:       vh = vp[size - 1];
        !           145:       count_leading_zeros (cnt, uh);
        !           146:       if (cnt != 0)
        !           147:        {
        !           148:          uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
        !           149:          vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
        !           150:        }
        !           151:
        !           152: #if 0
        !           153:       /* For now, only handle BITS_PER_MP_LIMB-1 bits.  This makes
        !           154:         room for sign bit.  */
        !           155:       uh >>= 1;
        !           156:       vh >>= 1;
        !           157: #endif
        !           158:       A = 1;
        !           159:       B = 0;
        !           160:       C = 0;
        !           161:       D = 1;
        !           162:
        !           163:       for (;;)
        !           164:        {
        !           165:          mp_limb_signed_t q, T;
        !           166:          if (vh + C == 0 || vh + D == 0)
        !           167:            break;
        !           168:
        !           169:          q = (uh + A) / (vh + C);
        !           170:          if (q != (uh + B) / (vh + D))
        !           171:            break;
        !           172:
        !           173:          T = A - q * C;
        !           174:          A = C;
        !           175:          C = T;
        !           176:          T = B - q * D;
        !           177:          B = D;
        !           178:          D = T;
        !           179:          T = uh - q * vh;
        !           180:          uh = vh;
        !           181:          vh = T;
        !           182:        }
        !           183:
        !           184: #if RECORD
        !           185:       min = MIN (A, min);  min = MIN (B, min);
        !           186:       min = MIN (C, min);  min = MIN (D, min);
        !           187:       max = MAX (A, max);  max = MAX (B, max);
        !           188:       max = MAX (C, max);  max = MAX (D, max);
        !           189: #endif
        !           190:
        !           191:       if (B == 0)
        !           192:        {
        !           193:          mp_limb_t qh;
        !           194:          mp_size_t i;
        !           195:
        !           196:          /* This is quite rare.  I.e., optimize something else!  */
        !           197:
        !           198:          /* Normalize V (and shift up U the same amount).  */
        !           199:          count_leading_zeros (cnt, vp[vsize - 1]);
        !           200:          if (cnt != 0)
        !           201:            {
        !           202:              mp_limb_t cy;
        !           203:              mpn_lshift (vp, vp, vsize, cnt);
        !           204:              cy = mpn_lshift (up, up, size, cnt);
        !           205:              up[size] = cy;
        !           206:              size += cy != 0;
        !           207:            }
        !           208:
        !           209:          qh = mpn_divmod (up + vsize, up, size, vp, vsize);
        !           210: #if EXTEND
        !           211:          MPN_COPY (tp, s0p, ssize);
        !           212:          for (i = 0; i < size - vsize; i++)
        !           213:            {
        !           214:              mp_limb_t cy;
        !           215:              cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
        !           216:              if (cy != 0)
        !           217:                tp[ssize++] = cy;
        !           218:            }
        !           219:          if (qh != 0)
        !           220:            {
        !           221:              mp_limb_t cy;
        !           222:              abort ();
        !           223:              /* XXX since qh == 1, mpn_addmul_1 is overkill */
        !           224:              cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
        !           225:              if (cy != 0)
        !           226:                tp[ssize++] = cy;
        !           227:            }
        !           228: #if 0
        !           229:          MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
        !           230:          MPN_COPY (s1p, tp, ssize);
        !           231: #else
        !           232:          {
        !           233:            mp_ptr xp;
        !           234:            xp = s0p; s0p = s1p; s1p = xp;
        !           235:            xp = s1p; s1p = tp; tp = xp;
        !           236:          }
        !           237: #endif
        !           238: #endif
        !           239:          size = vsize;
        !           240:          if (cnt != 0)
        !           241:            {
        !           242:              mpn_rshift (up, up, size, cnt);
        !           243:              mpn_rshift (vp, vp, size, cnt);
        !           244:            }
        !           245:
        !           246:          {
        !           247:            mp_ptr xp;
        !           248:            xp = up; up = vp; vp = xp;
        !           249:          }
        !           250:          MPN_NORMALIZE (up, size);
        !           251:        }
        !           252:       else
        !           253:        {
        !           254:          /* T = U*A + V*B
        !           255:             W = U*C + V*D
        !           256:             U = T
        !           257:             V = W         */
        !           258:
        !           259:          if (SGN(A) == SGN(B)) /* should be different sign */
        !           260:            abort ();
        !           261:          if (SGN(C) == SGN(D)) /* should be different sign */
        !           262:            abort ();
        !           263: #if STAT
        !           264:          { mp_limb_t x;
        !           265:            x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
        !           266:            count_leading_zeros (cnt, x);
        !           267:            arr[BITS_PER_MP_LIMB - cnt]++; }
        !           268: #endif
        !           269:          if (A == 0)
        !           270:            {
        !           271:              if (B != 1) abort ();
        !           272:              MPN_COPY (tp, vp, size);
        !           273:            }
        !           274:          else
        !           275:            {
        !           276:              if (A < 0)
        !           277:                {
        !           278:                  mpn_mul_1 (tp, vp, size, B);
        !           279:                  mpn_submul_1 (tp, up, size, -A);
        !           280:                }
        !           281:              else
        !           282:                {
        !           283:                  mpn_mul_1 (tp, up, size, A);
        !           284:                  mpn_submul_1 (tp, vp, size, -B);
        !           285:                }
        !           286:            }
        !           287:          if (C < 0)
        !           288:            {
        !           289:              mpn_mul_1 (wp, vp, size, D);
        !           290:              mpn_submul_1 (wp, up, size, -C);
        !           291:            }
        !           292:          else
        !           293:            {
        !           294:              mpn_mul_1 (wp, up, size, C);
        !           295:              mpn_submul_1 (wp, vp, size, -D);
        !           296:            }
        !           297:
        !           298:          {
        !           299:            mp_ptr xp;
        !           300:            xp = tp; tp = up; up = xp;
        !           301:            xp = wp; wp = vp; vp = xp;
        !           302:          }
        !           303:
        !           304: #if EXTEND
        !           305:          { mp_limb_t cy;
        !           306:          MPN_ZERO (tp, orig_size);
        !           307:          if (A == 0)
        !           308:            {
        !           309:              if (B != 1) abort ();
        !           310:              MPN_COPY (tp, s1p, ssize);
        !           311:            }
        !           312:          else
        !           313:            {
        !           314:              if (A < 0)
        !           315:                {
        !           316:                  cy = mpn_mul_1 (tp, s1p, ssize, B);
        !           317:                  cy += mpn_addmul_1 (tp, s0p, ssize, -A);
        !           318:                }
        !           319:              else
        !           320:                {
        !           321:                  cy = mpn_mul_1 (tp, s0p, ssize, A);
        !           322:                  cy += mpn_addmul_1 (tp, s1p, ssize, -B);
        !           323:                }
        !           324:              if (cy != 0)
        !           325:                tp[ssize++] = cy;
        !           326:            }
        !           327:          MPN_ZERO (wp, orig_size);
        !           328:          if (C < 0)
        !           329:            {
        !           330:              cy = mpn_mul_1 (wp, s1p, ssize, D);
        !           331:              cy += mpn_addmul_1 (wp, s0p, ssize, -C);
        !           332:            }
        !           333:          else
        !           334:            {
        !           335:              cy = mpn_mul_1 (wp, s0p, ssize, C);
        !           336:              cy += mpn_addmul_1 (wp, s1p, ssize, -D);
        !           337:            }
        !           338:          if (cy != 0)
        !           339:            wp[ssize++] = cy;
        !           340:          }
        !           341:          {
        !           342:            mp_ptr xp;
        !           343:            xp = tp; tp = s0p; s0p = xp;
        !           344:            xp = wp; wp = s1p; s1p = xp;
        !           345:          }
        !           346: #endif
        !           347: #if 0  /* Is it a win to remove multiple zeros here? */
        !           348:          MPN_NORMALIZE (up, size);
        !           349: #else
        !           350:          if (up[size - 1] == 0)
        !           351:            size--;
        !           352: #endif
        !           353:        }
        !           354:     }
        !           355:
        !           356: #if RECORD
        !           357:   printf ("min: %ld\n", min);
        !           358:   printf ("max: %ld\n", max);
        !           359: #endif
        !           360:
        !           361:   if (vsize == 0)
        !           362:     {
        !           363:       if (gp != up)
        !           364:        MPN_COPY (gp, up, size);
        !           365: #if EXTEND
        !           366:       if (orig_s0p != s0p)
        !           367:        MPN_COPY (orig_s0p, s0p, ssize);
        !           368: #endif
        !           369:       TMP_FREE (mark);
        !           370:       return size;
        !           371:     }
        !           372:   else
        !           373:     {
        !           374:       mp_limb_t vl, ul, t;
        !           375: #if EXTEND
        !           376:       mp_limb_t cy;
        !           377:       mp_size_t i;
        !           378: #endif
        !           379:       vl = vp[0];
        !           380: #if EXTEND
        !           381:       t = mpn_divmod_1 (wp, up, size, vl);
        !           382:       MPN_COPY (tp, s0p, ssize);
        !           383:       for (i = 0; i < size; i++)
        !           384:        {
        !           385:          cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
        !           386:          if (cy != 0)
        !           387:            tp[ssize++] = cy;
        !           388:        }
        !           389: #if 0
        !           390:       MPN_COPY (s0p, s1p, ssize);
        !           391:       MPN_COPY (s1p, tp, ssize);
        !           392: #else
        !           393:       {
        !           394:        mp_ptr xp;
        !           395:        xp = s0p; s0p = s1p; s1p = xp;
        !           396:        xp = s1p; s1p = tp; tp = xp;
        !           397:       }
        !           398: #endif
        !           399: #else
        !           400:       t = mpn_mod_1 (up, size, vl);
        !           401: #endif
        !           402:       ul = vl;
        !           403:       vl = t;
        !           404:       while (vl != 0)
        !           405:        {
        !           406:          mp_limb_t t;
        !           407: #if EXTEND
        !           408:          mp_limb_t q, cy;
        !           409:          q = ul / vl;
        !           410:          t = ul - q*vl;
        !           411:
        !           412:          MPN_COPY (tp, s0p, ssize);
        !           413:          cy = mpn_addmul_1 (tp, s1p, ssize, q);
        !           414:          if (cy != 0)
        !           415:            tp[ssize++] = cy;
        !           416: #if 0
        !           417:          MPN_COPY (s0p, s1p, ssize);
        !           418:          MPN_COPY (s1p, tp, ssize);
        !           419: #else
        !           420:          {
        !           421:            mp_ptr xp;
        !           422:            xp = s0p; s0p = s1p; s1p = xp;
        !           423:            xp = s1p; s1p = tp; tp = xp;
        !           424:          }
        !           425: #endif
        !           426:
        !           427: #else
        !           428:          t = ul % vl;
        !           429: #endif
        !           430:          ul = vl;
        !           431:          vl = t;
        !           432:        }
        !           433:       gp[0] = ul;
        !           434: #if EXTEND
        !           435:       if (orig_s0p != s0p)
        !           436:        MPN_COPY (orig_s0p, s0p, ssize);
        !           437: #endif
        !           438:       TMP_FREE (mark);
        !           439:       return 1;
        !           440:     }
        !           441: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>