OpenXM_contrib/gmp/mpn/generic/fib2_ui.c - annotate

Return to fib2_ui.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/fib2_ui.c, Revision 1.1

1.1     ! ohara       1: /* mpn_fib2_ui -- calculate Fibonacci numbers.
        !             2:
        !             3: Copyright 2001, 2002 Free Software Foundation, Inc.
        !             4:
        !             5: This file is part of the GNU MP Library.
        !             6:
        !             7: The GNU MP Library is free software; you can redistribute it and/or modify
        !             8: it under the terms of the GNU Lesser General Public License as published by
        !             9: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            10: option) any later version.
        !            11:
        !            12: The GNU MP Library is distributed in the hope that it will be useful, but
        !            13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            15: License for more details.
        !            16:
        !            17: You should have received a copy of the GNU Lesser General Public License
        !            18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            20: MA 02111-1307, USA. */
        !            21:
        !            22: #include <stdio.h>
        !            23: #include "gmp.h"
        !            24: #include "gmp-impl.h"
        !            25: #include "longlong.h"
        !            26:
        !            27:
        !            28: /* change this to "#define TRACE(x) x" for diagnostics */
        !            29: #define TRACE(x)
        !            30:
        !            31:
        !            32: /* The following table was generated by code at the end of this file. */
        !            33:
        !            34: const mp_limb_t
        !            35: __gmp_fib_table[FIB_TABLE_LIMIT+2] = {
        !            36:
        !            37: #if GMP_NUMB_BITS >= 4
        !            38:   CNST_LIMB (0x1),  /* -1 */
        !            39:   CNST_LIMB (0x0),  /* 0 */
        !            40:   CNST_LIMB (0x1),  /* 1 */
        !            41:   CNST_LIMB (0x1),  /* 2 */
        !            42:   CNST_LIMB (0x2),  /* 3 */
        !            43:   CNST_LIMB (0x3),  /* 4 */
        !            44:   CNST_LIMB (0x5),  /* 5 */
        !            45:   CNST_LIMB (0x8),  /* 6 */
        !            46:   CNST_LIMB (0xD),  /* 7 */
        !            47: #endif
        !            48: #if GMP_NUMB_BITS >= 8
        !            49:   CNST_LIMB (0x15),  /* 8 */
        !            50:   CNST_LIMB (0x22),  /* 9 */
        !            51:   CNST_LIMB (0x37),  /* 10 */
        !            52:   CNST_LIMB (0x59),  /* 11 */
        !            53:   CNST_LIMB (0x90),  /* 12 */
        !            54:   CNST_LIMB (0xE9),  /* 13 */
        !            55: #endif
        !            56: #if GMP_NUMB_BITS >= 16
        !            57:   CNST_LIMB (0x179),  /* 14 */
        !            58:   CNST_LIMB (0x262),  /* 15 */
        !            59:   CNST_LIMB (0x3DB),  /* 16 */
        !            60:   CNST_LIMB (0x63D),  /* 17 */
        !            61:   CNST_LIMB (0xA18),  /* 18 */
        !            62:   CNST_LIMB (0x1055),  /* 19 */
        !            63:   CNST_LIMB (0x1A6D),  /* 20 */
        !            64:   CNST_LIMB (0x2AC2),  /* 21 */
        !            65:   CNST_LIMB (0x452F),  /* 22 */
        !            66:   CNST_LIMB (0x6FF1),  /* 23 */
        !            67:   CNST_LIMB (0xB520),  /* 24 */
        !            68: #endif
        !            69: #if GMP_NUMB_BITS >= 32
        !            70:   CNST_LIMB (0x12511),  /* 25 */
        !            71:   CNST_LIMB (0x1DA31),  /* 26 */
        !            72:   CNST_LIMB (0x2FF42),  /* 27 */
        !            73:   CNST_LIMB (0x4D973),  /* 28 */
        !            74:   CNST_LIMB (0x7D8B5),  /* 29 */
        !            75:   CNST_LIMB (0xCB228),  /* 30 */
        !            76:   CNST_LIMB (0x148ADD),  /* 31 */
        !            77:   CNST_LIMB (0x213D05),  /* 32 */
        !            78:   CNST_LIMB (0x35C7E2),  /* 33 */
        !            79:   CNST_LIMB (0x5704E7),  /* 34 */
        !            80:   CNST_LIMB (0x8CCCC9),  /* 35 */
        !            81:   CNST_LIMB (0xE3D1B0),  /* 36 */
        !            82:   CNST_LIMB (0x1709E79),  /* 37 */
        !            83:   CNST_LIMB (0x2547029),  /* 38 */
        !            84:   CNST_LIMB (0x3C50EA2),  /* 39 */
        !            85:   CNST_LIMB (0x6197ECB),  /* 40 */
        !            86:   CNST_LIMB (0x9DE8D6D),  /* 41 */
        !            87:   CNST_LIMB (0xFF80C38),  /* 42 */
        !            88:   CNST_LIMB (0x19D699A5),  /* 43 */
        !            89:   CNST_LIMB (0x29CEA5DD),  /* 44 */
        !            90:   CNST_LIMB (0x43A53F82),  /* 45 */
        !            91:   CNST_LIMB (0x6D73E55F),  /* 46 */
        !            92:   CNST_LIMB (0xB11924E1),  /* 47 */
        !            93: #endif
        !            94: #if GMP_NUMB_BITS >= 64
        !            95:   CNST_LIMB (0x11E8D0A40),  /* 48 */
        !            96:   CNST_LIMB (0x1CFA62F21),  /* 49 */
        !            97:   CNST_LIMB (0x2EE333961),  /* 50 */
        !            98:   CNST_LIMB (0x4BDD96882),  /* 51 */
        !            99:   CNST_LIMB (0x7AC0CA1E3),  /* 52 */
        !           100:   CNST_LIMB (0xC69E60A65),  /* 53 */
        !           101:   CNST_LIMB (0x1415F2AC48),  /* 54 */
        !           102:   CNST_LIMB (0x207FD8B6AD),  /* 55 */
        !           103:   CNST_LIMB (0x3495CB62F5),  /* 56 */
        !           104:   CNST_LIMB (0x5515A419A2),  /* 57 */
        !           105:   CNST_LIMB (0x89AB6F7C97),  /* 58 */
        !           106:   CNST_LIMB (0xDEC1139639),  /* 59 */
        !           107:   CNST_LIMB (0x1686C8312D0),  /* 60 */
        !           108:   CNST_LIMB (0x2472D96A909),  /* 61 */
        !           109:   CNST_LIMB (0x3AF9A19BBD9),  /* 62 */
        !           110:   CNST_LIMB (0x5F6C7B064E2),  /* 63 */
        !           111:   CNST_LIMB (0x9A661CA20BB),  /* 64 */
        !           112:   CNST_LIMB (0xF9D297A859D),  /* 65 */
        !           113:   CNST_LIMB (0x19438B44A658),  /* 66 */
        !           114:   CNST_LIMB (0x28E0B4BF2BF5),  /* 67 */
        !           115:   CNST_LIMB (0x42244003D24D),  /* 68 */
        !           116:   CNST_LIMB (0x6B04F4C2FE42),  /* 69 */
        !           117:   CNST_LIMB (0xAD2934C6D08F),  /* 70 */
        !           118:   CNST_LIMB (0x1182E2989CED1),  /* 71 */
        !           119:   CNST_LIMB (0x1C5575E509F60),  /* 72 */
        !           120:   CNST_LIMB (0x2DD8587DA6E31),  /* 73 */
        !           121:   CNST_LIMB (0x4A2DCE62B0D91),  /* 74 */
        !           122:   CNST_LIMB (0x780626E057BC2),  /* 75 */
        !           123:   CNST_LIMB (0xC233F54308953),  /* 76 */
        !           124:   CNST_LIMB (0x13A3A1C2360515),  /* 77 */
        !           125:   CNST_LIMB (0x1FC6E116668E68),  /* 78 */
        !           126:   CNST_LIMB (0x336A82D89C937D),  /* 79 */
        !           127:   CNST_LIMB (0x533163EF0321E5),  /* 80 */
        !           128:   CNST_LIMB (0x869BE6C79FB562),  /* 81 */
        !           129:   CNST_LIMB (0xD9CD4AB6A2D747),  /* 82 */
        !           130:   CNST_LIMB (0x16069317E428CA9),  /* 83 */
        !           131:   CNST_LIMB (0x23A367C34E563F0),  /* 84 */
        !           132:   CNST_LIMB (0x39A9FADB327F099),  /* 85 */
        !           133:   CNST_LIMB (0x5D4D629E80D5489),  /* 86 */
        !           134:   CNST_LIMB (0x96F75D79B354522),  /* 87 */
        !           135:   CNST_LIMB (0xF444C01834299AB),  /* 88 */
        !           136:   CNST_LIMB (0x18B3C1D91E77DECD),  /* 89 */
        !           137:   CNST_LIMB (0x27F80DDAA1BA7878),  /* 90 */
        !           138:   CNST_LIMB (0x40ABCFB3C0325745),  /* 91 */
        !           139:   CNST_LIMB (0x68A3DD8E61ECCFBD),  /* 92 */
        !           140:   CNST_LIMB (0xA94FAD42221F2702),  /* 93 */
        !           141: #endif
        !           142: };
        !           143:
        !           144:
        !           145: /* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for
        !           146:    MPN_FIB2_SIZE(n) limbs.
        !           147:
        !           148:    The return value is the actual number of limbs stored, this will be at
        !           149:    least 1.  fp[size-1] will be non-zero, except when n==0, in which case
        !           150:    fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
        !           151:    (for n>0).
        !           152:
        !           153:    Notes:
        !           154:
        !           155:    In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
        !           156:    low limb.
        !           157:
        !           158:    In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
        !           159:    F[k-1]^2.  This F[2k+1] is an F[4m+3] and such numbers are congruent to
        !           160:    1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
        !           161:    that would leave 6 or 7 mod 8).
        !           162:
        !           163:    This property of F[4m+3] can be verified by induction on F[4m+3] =
        !           164:    7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
        !           165:    identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
        !           166:
        !           167:    Enhancements:
        !           168:
        !           169:    If there was an mpn_addlshift, it'd be possible to eliminate the yp
        !           170:    temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
        !           171:    fp+=2*(-1)^n, etc.  */
        !           172:
        !           173: mp_size_t
        !           174: mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
        !           175: {
        !           176:   mp_ptr         xp, yp;
        !           177:   mp_size_t      size;
        !           178:   unsigned long  nfirst, mask;
        !           179:   TMP_DECL (marker);
        !           180:
        !           181:   TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
        !           182:
        !           183:   ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
        !           184:
        !           185:   /* Take a starting pair from the table. */
        !           186:   mask = 1;
        !           187:   for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
        !           188:     mask <<= 1;
        !           189:   TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
        !           190:
        !           191:   f1p[0] = FIB_TABLE ((int) nfirst - 1);
        !           192:   fp[0]  = FIB_TABLE (nfirst);
        !           193:   size = 1;
        !           194:
        !           195:   /* Skip to the end if the table lookup gives the final answer. */
        !           196:   if (mask != 1)
        !           197:     {
        !           198:       mp_size_t  alloc;
        !           199:
        !           200:       TMP_MARK (marker);
        !           201:       alloc = MPN_FIB2_SIZE (n);
        !           202:       TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
        !           203:
        !           204:       do
        !           205:        {
        !           206:          mp_limb_t  c;
        !           207:
        !           208:          /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
        !           209:             n&mask upwards.
        !           210:
        !           211:             The next bit of n is n&(mask>>1) and we'll double to the pair
        !           212:             fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
        !           213:             that bit is 0 or 1 respectively.  */
        !           214:
        !           215:          TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
        !           216:                         n >> refmpn_count_trailing_zeros(mask),
        !           217:                         mask, size, alloc);
        !           218:                 mpn_trace ("fp ", fp, size);
        !           219:                 mpn_trace ("f1p", f1p, size));
        !           220:
        !           221:          /* fp normalized, f1p at most one high zero */
        !           222:          ASSERT (fp[size-1] != 0);
        !           223:          ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
        !           224:
        !           225:          /* f1p[size-1] might be zero, but this occurs rarely, so it's not
        !           226:             worth bothering checking for it */
        !           227:          ASSERT (alloc >= 2*size);
        !           228:          mpn_sqr_n (xp, fp,  size);
        !           229:          mpn_sqr_n (yp, f1p, size);
        !           230:          size *= 2;
        !           231:
        !           232:          /* Shrink if possible.  Since fp was normalized there'll be at
        !           233:             most one high zero on xp (and if there is then there's one on
        !           234:             yp too).  */
        !           235:          ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
        !           236:          size -= (xp[size-1] == 0);
        !           237:          ASSERT (xp[size-1] != 0);  /* only one xp high zero */
        !           238:
        !           239:          /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
        !           240:             n&mask is the low bit of our implied k.  */
        !           241:          c = mpn_lshift (fp, xp, size, 2);
        !           242:          fp[0] |= (n & mask ? 0 : 2);   /* possible +2 */
        !           243:          c -= mpn_sub_n (fp, fp, yp, size);
        !           244:          ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
        !           245:          fp[0] -= (n & mask ? 2 : 0);   /* possible -2 */
        !           246:          ASSERT (alloc >= size+1);
        !           247:          xp[size] = 0;
        !           248:          yp[size] = 0;
        !           249:          fp[size] = c;
        !           250:          size += (c != 0);
        !           251:
        !           252:          /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
        !           253:             F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
        !           254:          ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
        !           255:
        !           256:          /* now n&mask is the new bit of n being considered */
        !           257:          mask >>= 1;
        !           258:
        !           259:          /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
        !           260:             F[2k+1] and F[2k-1].  */
        !           261:          ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
        !           262:
        !           263:          /* Can have a high zero after replacing F[2k+1] with F[2k].
        !           264:             f1p will have a high zero if fp does. */
        !           265:          ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
        !           266:          size -= (fp[size-1] == 0);
        !           267:        }
        !           268:       while (mask != 1);
        !           269:
        !           270:       TMP_FREE (marker);
        !           271:     }
        !           272:
        !           273:   TRACE (printf ("done size=%ld\n", size);
        !           274:         mpn_trace ("fp ", fp, size);
        !           275:         mpn_trace ("f1p", f1p, size));
        !           276:
        !           277:   return size;
        !           278: }
        !           279:
        !           280:
        !           281:
        !           282:
        !           283:
        !           284: /* ------------------------------------------------------------------------- */
        !           285:
        !           286: #if GENERATE_FIB_TABLE
        !           287: /* Generate the tables of fibonacci data.  This doesn't depend on the limb
        !           288:    size of the host, and doesn't need mpz_fib_ui working.
        !           289:
        !           290:    The bit sizes in the table[] below will get specific setups so that a
        !           291:    build with GMP_NUMB_BITS equal to one of those values has as much data in
        !           292:    __gmp_fib_table as will fit that number of bits.
        !           293:
        !           294:    A build with GMP_NUMB_BITS equal to some other value will effectively
        !           295:    fall back to the previous set of generated data.  For instance if 8 and
        !           296:    16 bits have been generated, but a build with 13 bits is done then
        !           297:    __gmp_fib_table will only contain 8 bit values, whereas it could probably
        !           298:    fit a few more.  Everything still works, it's just that the table scheme
        !           299:    is not fully exploited.  */
        !           300:
        !           301: int
        !           302: main (void)
        !           303: {
        !           304:   static struct {
        !           305:     int  bits;
        !           306:     int  fib_limit;
        !           307:     int  luc_limit;
        !           308:   } table[] = {
        !           309:     { 4 },
        !           310:     { 8 },
        !           311:     { 16 },
        !           312:     { 32 },
        !           313:     { 64 },
        !           314:   };
        !           315:
        !           316:   int    i, t;
        !           317:   mpz_t  f[500];
        !           318:   mpz_t  l;
        !           319:
        !           320:   mpz_init (l);
        !           321:   mpz_init_set_si (f[0], 1L);  /* F[-1] */
        !           322:   mpz_init_set_si (f[1], 0L);  /* F[0] */
        !           323:   for (i = 2; i < numberof(f); i++)
        !           324:     {
        !           325:       mpz_init (f[i]);
        !           326:       mpz_add (f[i], f[i-1], f[i-2]);
        !           327:     }
        !           328:
        !           329:   for (i = 1; i < numberof (f); i++)
        !           330:     {
        !           331:       /* L[n] = F[n]+2*F[n-1] */
        !           332:       mpz_add (l, f[i], f[i-1]);
        !           333:       mpz_add (l, l, f[i-1]);
        !           334:
        !           335:       for (t = 0; t < numberof (table); t++)
        !           336:        {
        !           337:          if (mpz_sizeinbase (f[i], 2) <= table[t].bits)
        !           338:            table[t].fib_limit = i-1;
        !           339:          if (mpz_sizeinbase (l, 2) <= table[t].bits)
        !           340:            table[t].luc_limit = i-1;
        !           341:        }
        !           342:     }
        !           343:   if (table[t].fib_limit == numberof (f) + 1)
        !           344:     {
        !           345:       printf ("Oops, need bigger f[] array\n");
        !           346:       abort ();
        !           347:     }
        !           348:
        !           349:   for (t = numberof (table) - 1; t >= 0; t--)
        !           350:     {
        !           351:       printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
        !           352:       printf ("#define FIB_TABLE_LIMIT         %d\n", table[t].fib_limit);
        !           353:       printf ("#define FIB_TABLE_LUCNUM_LIMIT  %d\n", table[t].luc_limit);
        !           354:       if (t != 0)
        !           355:        printf ("#else\n");
        !           356:     }
        !           357:   for (t = 0; t < numberof (table); t++)
        !           358:     printf ("#endif /* %d */\n", table[t].bits);
        !           359:   printf ("\n");
        !           360:   printf ("\n");
        !           361:
        !           362:   printf ("const mp_limb_t\n");
        !           363:   printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
        !           364:   printf ("\n");
        !           365:   t = 0;
        !           366:   i = 0;
        !           367:   printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
        !           368:   for (;;)
        !           369:     {
        !           370:       gmp_printf ("  CNST_LIMB (0x%ZX),  /* %d */\n", f[i], i-1);
        !           371:
        !           372:       if (i-1 == table[t].fib_limit)
        !           373:        {
        !           374:          printf ("#endif\n");
        !           375:          do
        !           376:            {
        !           377:              t++;
        !           378:              if (t >= numberof (table))
        !           379:                goto done;
        !           380:            }
        !           381:          while (i-1 == table[t].fib_limit);
        !           382:          printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
        !           383:        }
        !           384:       i++;
        !           385:     }
        !           386:  done:
        !           387:   printf ("};\n");
        !           388:
        !           389:   return 0;
        !           390: }
        !           391:
        !           392: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>