[BACK]Return to gcdext.c CVS log [TXT][DIR] Up to [local] / OpenXM / src / kan96xx / gmp-2.0.2-ssh-2 / mpn / generic

Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/generic/gcdext.c, Revision 1.1.1.1

1.1       takayama    1: /* mpn_gcdext -- Extended Greatest Common Divisor.
                      2:
                      3: Copyright (C) 1996 Free Software Foundation, Inc.
                      4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Library General Public License as published by
                      9: the Free Software Foundation; either version 2 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Library General Public License
                     18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: #include "gmp.h"
                     23: #include "gmp-impl.h"
                     24: #include "longlong.h"
                     25:
                     26: #ifndef EXTEND
                     27: #define EXTEND 1
                     28: #endif
                     29:
                     30: #if STAT
                     31: int arr[BITS_PER_MP_LIMB];
                     32: #endif
                     33:
                     34: #define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
                     35:
                     36: /* Idea 1: After we have performed a full division, don't shift operands back,
                     37:           but instead account for the extra factors-of-2 thus introduced.
                     38:    Idea 2: Simple generalization to use divide-and-conquer would give us an
                     39:           algorithm that runs faster than O(n^2).
                     40:    Idea 3: The input numbers need less space as the computation progresses,
                     41:           while the s0 and s1 variables need more space.  To save space, we
                     42:           could make them share space, and have the latter variables grow
                     43:           into the former.  */
                     44:
                     45: /* Precondition: U >= V.  */
                     46:
                     47: mp_size_t
                     48: #if EXTEND
                     49: #if __STDC__
                     50: mpn_gcdext (mp_ptr gp, mp_ptr s0p,
                     51:            mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
                     52: #else
                     53: mpn_gcdext (gp, s0p, up, size, vp, vsize)
                     54:      mp_ptr gp;
                     55:      mp_ptr s0p;
                     56:      mp_ptr up;
                     57:      mp_size_t size;
                     58:      mp_ptr vp;
                     59:      mp_size_t vsize;
                     60: #endif
                     61: #else
                     62: #if __STDC__
                     63: mpn_gcd (mp_ptr gp,
                     64:         mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
                     65: #else
                     66: mpn_gcd (gp, up, size, vp, vsize)
                     67:      mp_ptr gp;
                     68:      mp_ptr up;
                     69:      mp_size_t size;
                     70:      mp_ptr vp;
                     71:      mp_size_t vsize;
                     72: #endif
                     73: #endif
                     74: {
                     75:   mp_limb_t uh, vh;
                     76:   mp_limb_signed_t A, B, C, D;
                     77:   int cnt;
                     78:   mp_ptr tp, wp;
                     79: #if RECORD
                     80:   mp_limb_signed_t min = 0, max = 0;
                     81: #endif
                     82: #if EXTEND
                     83:   mp_ptr s1p;
                     84:   mp_ptr orig_s0p = s0p;
                     85:   mp_size_t ssize, orig_size = size;
                     86:   TMP_DECL (mark);
                     87:
                     88:   TMP_MARK (mark);
                     89:
                     90:   tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
                     91:   wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
                     92:   s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
                     93:
                     94:   MPN_ZERO (s0p, size);
                     95:   MPN_ZERO (s1p, size);
                     96:
                     97:   s0p[0] = 1;
                     98:   s1p[0] = 0;
                     99:   ssize = 1;
                    100: #endif
                    101:
                    102:   if (size > vsize)
                    103:     {
                    104:       /* Normalize V (and shift up U the same amount).  */
                    105:       count_leading_zeros (cnt, vp[vsize - 1]);
                    106:       if (cnt != 0)
                    107:        {
                    108:          mp_limb_t cy;
                    109:          mpn_lshift (vp, vp, vsize, cnt);
                    110:          cy = mpn_lshift (up, up, size, cnt);
                    111:          up[size] = cy;
                    112:          size += cy != 0;
                    113:        }
                    114:
                    115:       mpn_divmod (up + vsize, up, size, vp, vsize);
                    116: #if EXTEND
                    117:       /* This is really what it boils down to in this case... */
                    118:       s0p[0] = 0;
                    119:       s1p[0] = 1;
                    120: #endif
                    121:       size = vsize;
                    122:       if (cnt != 0)
                    123:        {
                    124:          mpn_rshift (up, up, size, cnt);
                    125:          mpn_rshift (vp, vp, size, cnt);
                    126:        }
                    127:       {
                    128:        mp_ptr xp;
                    129:        xp = up; up = vp; vp = xp;
                    130:       }
                    131:     }
                    132:
                    133:   for (;;)
                    134:     {
                    135:       /* Figure out exact size of V.  */
                    136:       vsize = size;
                    137:       MPN_NORMALIZE (vp, vsize);
                    138:       if (vsize <= 1)
                    139:        break;
                    140:
                    141:       /* Make UH be the most significant limb of U, and make VH be
                    142:         corresponding bits from V.  */
                    143:       uh = up[size - 1];
                    144:       vh = vp[size - 1];
                    145:       count_leading_zeros (cnt, uh);
                    146:       if (cnt != 0)
                    147:        {
                    148:          uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
                    149:          vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
                    150:        }
                    151:
                    152: #if 0
                    153:       /* For now, only handle BITS_PER_MP_LIMB-1 bits.  This makes
                    154:         room for sign bit.  */
                    155:       uh >>= 1;
                    156:       vh >>= 1;
                    157: #endif
                    158:       A = 1;
                    159:       B = 0;
                    160:       C = 0;
                    161:       D = 1;
                    162:
                    163:       for (;;)
                    164:        {
                    165:          mp_limb_signed_t q, T;
                    166:          if (vh + C == 0 || vh + D == 0)
                    167:            break;
                    168:
                    169:          q = (uh + A) / (vh + C);
                    170:          if (q != (uh + B) / (vh + D))
                    171:            break;
                    172:
                    173:          T = A - q * C;
                    174:          A = C;
                    175:          C = T;
                    176:          T = B - q * D;
                    177:          B = D;
                    178:          D = T;
                    179:          T = uh - q * vh;
                    180:          uh = vh;
                    181:          vh = T;
                    182:        }
                    183:
                    184: #if RECORD
                    185:       min = MIN (A, min);  min = MIN (B, min);
                    186:       min = MIN (C, min);  min = MIN (D, min);
                    187:       max = MAX (A, max);  max = MAX (B, max);
                    188:       max = MAX (C, max);  max = MAX (D, max);
                    189: #endif
                    190:
                    191:       if (B == 0)
                    192:        {
                    193:          mp_limb_t qh;
                    194:          mp_size_t i;
                    195:
                    196:          /* This is quite rare.  I.e., optimize something else!  */
                    197:
                    198:          /* Normalize V (and shift up U the same amount).  */
                    199:          count_leading_zeros (cnt, vp[vsize - 1]);
                    200:          if (cnt != 0)
                    201:            {
                    202:              mp_limb_t cy;
                    203:              mpn_lshift (vp, vp, vsize, cnt);
                    204:              cy = mpn_lshift (up, up, size, cnt);
                    205:              up[size] = cy;
                    206:              size += cy != 0;
                    207:            }
                    208:
                    209:          qh = mpn_divmod (up + vsize, up, size, vp, vsize);
                    210: #if EXTEND
                    211:          MPN_COPY (tp, s0p, ssize);
                    212:          for (i = 0; i < size - vsize; i++)
                    213:            {
                    214:              mp_limb_t cy;
                    215:              cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
                    216:              if (cy != 0)
                    217:                tp[ssize++] = cy;
                    218:            }
                    219:          if (qh != 0)
                    220:            {
                    221:              mp_limb_t cy;
                    222:              abort ();
                    223:              /* XXX since qh == 1, mpn_addmul_1 is overkill */
                    224:              cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
                    225:              if (cy != 0)
                    226:                tp[ssize++] = cy;
                    227:            }
                    228: #if 0
                    229:          MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
                    230:          MPN_COPY (s1p, tp, ssize);
                    231: #else
                    232:          {
                    233:            mp_ptr xp;
                    234:            xp = s0p; s0p = s1p; s1p = xp;
                    235:            xp = s1p; s1p = tp; tp = xp;
                    236:          }
                    237: #endif
                    238: #endif
                    239:          size = vsize;
                    240:          if (cnt != 0)
                    241:            {
                    242:              mpn_rshift (up, up, size, cnt);
                    243:              mpn_rshift (vp, vp, size, cnt);
                    244:            }
                    245:
                    246:          {
                    247:            mp_ptr xp;
                    248:            xp = up; up = vp; vp = xp;
                    249:          }
                    250:          MPN_NORMALIZE (up, size);
                    251:        }
                    252:       else
                    253:        {
                    254:          /* T = U*A + V*B
                    255:             W = U*C + V*D
                    256:             U = T
                    257:             V = W         */
                    258:
                    259:          if (SGN(A) == SGN(B)) /* should be different sign */
                    260:            abort ();
                    261:          if (SGN(C) == SGN(D)) /* should be different sign */
                    262:            abort ();
                    263: #if STAT
                    264:          { mp_limb_t x;
                    265:            x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
                    266:            count_leading_zeros (cnt, x);
                    267:            arr[BITS_PER_MP_LIMB - cnt]++; }
                    268: #endif
                    269:          if (A == 0)
                    270:            {
                    271:              if (B != 1) abort ();
                    272:              MPN_COPY (tp, vp, size);
                    273:            }
                    274:          else
                    275:            {
                    276:              if (A < 0)
                    277:                {
                    278:                  mpn_mul_1 (tp, vp, size, B);
                    279:                  mpn_submul_1 (tp, up, size, -A);
                    280:                }
                    281:              else
                    282:                {
                    283:                  mpn_mul_1 (tp, up, size, A);
                    284:                  mpn_submul_1 (tp, vp, size, -B);
                    285:                }
                    286:            }
                    287:          if (C < 0)
                    288:            {
                    289:              mpn_mul_1 (wp, vp, size, D);
                    290:              mpn_submul_1 (wp, up, size, -C);
                    291:            }
                    292:          else
                    293:            {
                    294:              mpn_mul_1 (wp, up, size, C);
                    295:              mpn_submul_1 (wp, vp, size, -D);
                    296:            }
                    297:
                    298:          {
                    299:            mp_ptr xp;
                    300:            xp = tp; tp = up; up = xp;
                    301:            xp = wp; wp = vp; vp = xp;
                    302:          }
                    303:
                    304: #if EXTEND
                    305:          { mp_limb_t cy;
                    306:          MPN_ZERO (tp, orig_size);
                    307:          if (A == 0)
                    308:            {
                    309:              if (B != 1) abort ();
                    310:              MPN_COPY (tp, s1p, ssize);
                    311:            }
                    312:          else
                    313:            {
                    314:              if (A < 0)
                    315:                {
                    316:                  cy = mpn_mul_1 (tp, s1p, ssize, B);
                    317:                  cy += mpn_addmul_1 (tp, s0p, ssize, -A);
                    318:                }
                    319:              else
                    320:                {
                    321:                  cy = mpn_mul_1 (tp, s0p, ssize, A);
                    322:                  cy += mpn_addmul_1 (tp, s1p, ssize, -B);
                    323:                }
                    324:              if (cy != 0)
                    325:                tp[ssize++] = cy;
                    326:            }
                    327:          MPN_ZERO (wp, orig_size);
                    328:          if (C < 0)
                    329:            {
                    330:              cy = mpn_mul_1 (wp, s1p, ssize, D);
                    331:              cy += mpn_addmul_1 (wp, s0p, ssize, -C);
                    332:            }
                    333:          else
                    334:            {
                    335:              cy = mpn_mul_1 (wp, s0p, ssize, C);
                    336:              cy += mpn_addmul_1 (wp, s1p, ssize, -D);
                    337:            }
                    338:          if (cy != 0)
                    339:            wp[ssize++] = cy;
                    340:          }
                    341:          {
                    342:            mp_ptr xp;
                    343:            xp = tp; tp = s0p; s0p = xp;
                    344:            xp = wp; wp = s1p; s1p = xp;
                    345:          }
                    346: #endif
                    347: #if 0  /* Is it a win to remove multiple zeros here? */
                    348:          MPN_NORMALIZE (up, size);
                    349: #else
                    350:          if (up[size - 1] == 0)
                    351:            size--;
                    352: #endif
                    353:        }
                    354:     }
                    355:
                    356: #if RECORD
                    357:   printf ("min: %ld\n", min);
                    358:   printf ("max: %ld\n", max);
                    359: #endif
                    360:
                    361:   if (vsize == 0)
                    362:     {
                    363:       if (gp != up)
                    364:        MPN_COPY (gp, up, size);
                    365: #if EXTEND
                    366:       if (orig_s0p != s0p)
                    367:        MPN_COPY (orig_s0p, s0p, ssize);
                    368: #endif
                    369:       TMP_FREE (mark);
                    370:       return size;
                    371:     }
                    372:   else
                    373:     {
                    374:       mp_limb_t vl, ul, t;
                    375: #if EXTEND
                    376:       mp_limb_t cy;
                    377:       mp_size_t i;
                    378: #endif
                    379:       vl = vp[0];
                    380: #if EXTEND
                    381:       t = mpn_divmod_1 (wp, up, size, vl);
                    382:       MPN_COPY (tp, s0p, ssize);
                    383:       for (i = 0; i < size; i++)
                    384:        {
                    385:          cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
                    386:          if (cy != 0)
                    387:            tp[ssize++] = cy;
                    388:        }
                    389: #if 0
                    390:       MPN_COPY (s0p, s1p, ssize);
                    391:       MPN_COPY (s1p, tp, ssize);
                    392: #else
                    393:       {
                    394:        mp_ptr xp;
                    395:        xp = s0p; s0p = s1p; s1p = xp;
                    396:        xp = s1p; s1p = tp; tp = xp;
                    397:       }
                    398: #endif
                    399: #else
                    400:       t = mpn_mod_1 (up, size, vl);
                    401: #endif
                    402:       ul = vl;
                    403:       vl = t;
                    404:       while (vl != 0)
                    405:        {
                    406:          mp_limb_t t;
                    407: #if EXTEND
                    408:          mp_limb_t q, cy;
                    409:          q = ul / vl;
                    410:          t = ul - q*vl;
                    411:
                    412:          MPN_COPY (tp, s0p, ssize);
                    413:          cy = mpn_addmul_1 (tp, s1p, ssize, q);
                    414:          if (cy != 0)
                    415:            tp[ssize++] = cy;
                    416: #if 0
                    417:          MPN_COPY (s0p, s1p, ssize);
                    418:          MPN_COPY (s1p, tp, ssize);
                    419: #else
                    420:          {
                    421:            mp_ptr xp;
                    422:            xp = s0p; s0p = s1p; s1p = xp;
                    423:            xp = s1p; s1p = tp; tp = xp;
                    424:          }
                    425: #endif
                    426:
                    427: #else
                    428:          t = ul % vl;
                    429: #endif
                    430:          ul = vl;
                    431:          vl = t;
                    432:        }
                    433:       gp[0] = ul;
                    434: #if EXTEND
                    435:       if (orig_s0p != s0p)
                    436:        MPN_COPY (orig_s0p, s0p, ssize);
                    437: #endif
                    438:       TMP_FREE (mark);
                    439:       return 1;
                    440:     }
                    441: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>