OpenXM_contrib/gmp/mpn/generic/fib2_ui.c - annotate

Return to fib2_ui.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/fib2_ui.c, Revision 1.1.1.1

1.1       ohara       1: /* mpn_fib2_ui -- calculate Fibonacci numbers.
                      2:
                      3: Copyright 2001, 2002 Free Software Foundation, Inc.
                      4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Lesser General Public License as published by
                      9: the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Lesser General Public License
                     18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: #include <stdio.h>
                     23: #include "gmp.h"
                     24: #include "gmp-impl.h"
                     25: #include "longlong.h"
                     26:
                     27:
                     28: /* change this to "#define TRACE(x) x" for diagnostics */
                     29: #define TRACE(x)
                     30:
                     31:
                     32: /* The following table was generated by code at the end of this file. */
                     33:
                     34: const mp_limb_t
                     35: __gmp_fib_table[FIB_TABLE_LIMIT+2] = {
                     36:
                     37: #if GMP_NUMB_BITS >= 4
                     38:   CNST_LIMB (0x1),  /* -1 */
                     39:   CNST_LIMB (0x0),  /* 0 */
                     40:   CNST_LIMB (0x1),  /* 1 */
                     41:   CNST_LIMB (0x1),  /* 2 */
                     42:   CNST_LIMB (0x2),  /* 3 */
                     43:   CNST_LIMB (0x3),  /* 4 */
                     44:   CNST_LIMB (0x5),  /* 5 */
                     45:   CNST_LIMB (0x8),  /* 6 */
                     46:   CNST_LIMB (0xD),  /* 7 */
                     47: #endif
                     48: #if GMP_NUMB_BITS >= 8
                     49:   CNST_LIMB (0x15),  /* 8 */
                     50:   CNST_LIMB (0x22),  /* 9 */
                     51:   CNST_LIMB (0x37),  /* 10 */
                     52:   CNST_LIMB (0x59),  /* 11 */
                     53:   CNST_LIMB (0x90),  /* 12 */
                     54:   CNST_LIMB (0xE9),  /* 13 */
                     55: #endif
                     56: #if GMP_NUMB_BITS >= 16
                     57:   CNST_LIMB (0x179),  /* 14 */
                     58:   CNST_LIMB (0x262),  /* 15 */
                     59:   CNST_LIMB (0x3DB),  /* 16 */
                     60:   CNST_LIMB (0x63D),  /* 17 */
                     61:   CNST_LIMB (0xA18),  /* 18 */
                     62:   CNST_LIMB (0x1055),  /* 19 */
                     63:   CNST_LIMB (0x1A6D),  /* 20 */
                     64:   CNST_LIMB (0x2AC2),  /* 21 */
                     65:   CNST_LIMB (0x452F),  /* 22 */
                     66:   CNST_LIMB (0x6FF1),  /* 23 */
                     67:   CNST_LIMB (0xB520),  /* 24 */
                     68: #endif
                     69: #if GMP_NUMB_BITS >= 32
                     70:   CNST_LIMB (0x12511),  /* 25 */
                     71:   CNST_LIMB (0x1DA31),  /* 26 */
                     72:   CNST_LIMB (0x2FF42),  /* 27 */
                     73:   CNST_LIMB (0x4D973),  /* 28 */
                     74:   CNST_LIMB (0x7D8B5),  /* 29 */
                     75:   CNST_LIMB (0xCB228),  /* 30 */
                     76:   CNST_LIMB (0x148ADD),  /* 31 */
                     77:   CNST_LIMB (0x213D05),  /* 32 */
                     78:   CNST_LIMB (0x35C7E2),  /* 33 */
                     79:   CNST_LIMB (0x5704E7),  /* 34 */
                     80:   CNST_LIMB (0x8CCCC9),  /* 35 */
                     81:   CNST_LIMB (0xE3D1B0),  /* 36 */
                     82:   CNST_LIMB (0x1709E79),  /* 37 */
                     83:   CNST_LIMB (0x2547029),  /* 38 */
                     84:   CNST_LIMB (0x3C50EA2),  /* 39 */
                     85:   CNST_LIMB (0x6197ECB),  /* 40 */
                     86:   CNST_LIMB (0x9DE8D6D),  /* 41 */
                     87:   CNST_LIMB (0xFF80C38),  /* 42 */
                     88:   CNST_LIMB (0x19D699A5),  /* 43 */
                     89:   CNST_LIMB (0x29CEA5DD),  /* 44 */
                     90:   CNST_LIMB (0x43A53F82),  /* 45 */
                     91:   CNST_LIMB (0x6D73E55F),  /* 46 */
                     92:   CNST_LIMB (0xB11924E1),  /* 47 */
                     93: #endif
                     94: #if GMP_NUMB_BITS >= 64
                     95:   CNST_LIMB (0x11E8D0A40),  /* 48 */
                     96:   CNST_LIMB (0x1CFA62F21),  /* 49 */
                     97:   CNST_LIMB (0x2EE333961),  /* 50 */
                     98:   CNST_LIMB (0x4BDD96882),  /* 51 */
                     99:   CNST_LIMB (0x7AC0CA1E3),  /* 52 */
                    100:   CNST_LIMB (0xC69E60A65),  /* 53 */
                    101:   CNST_LIMB (0x1415F2AC48),  /* 54 */
                    102:   CNST_LIMB (0x207FD8B6AD),  /* 55 */
                    103:   CNST_LIMB (0x3495CB62F5),  /* 56 */
                    104:   CNST_LIMB (0x5515A419A2),  /* 57 */
                    105:   CNST_LIMB (0x89AB6F7C97),  /* 58 */
                    106:   CNST_LIMB (0xDEC1139639),  /* 59 */
                    107:   CNST_LIMB (0x1686C8312D0),  /* 60 */
                    108:   CNST_LIMB (0x2472D96A909),  /* 61 */
                    109:   CNST_LIMB (0x3AF9A19BBD9),  /* 62 */
                    110:   CNST_LIMB (0x5F6C7B064E2),  /* 63 */
                    111:   CNST_LIMB (0x9A661CA20BB),  /* 64 */
                    112:   CNST_LIMB (0xF9D297A859D),  /* 65 */
                    113:   CNST_LIMB (0x19438B44A658),  /* 66 */
                    114:   CNST_LIMB (0x28E0B4BF2BF5),  /* 67 */
                    115:   CNST_LIMB (0x42244003D24D),  /* 68 */
                    116:   CNST_LIMB (0x6B04F4C2FE42),  /* 69 */
                    117:   CNST_LIMB (0xAD2934C6D08F),  /* 70 */
                    118:   CNST_LIMB (0x1182E2989CED1),  /* 71 */
                    119:   CNST_LIMB (0x1C5575E509F60),  /* 72 */
                    120:   CNST_LIMB (0x2DD8587DA6E31),  /* 73 */
                    121:   CNST_LIMB (0x4A2DCE62B0D91),  /* 74 */
                    122:   CNST_LIMB (0x780626E057BC2),  /* 75 */
                    123:   CNST_LIMB (0xC233F54308953),  /* 76 */
                    124:   CNST_LIMB (0x13A3A1C2360515),  /* 77 */
                    125:   CNST_LIMB (0x1FC6E116668E68),  /* 78 */
                    126:   CNST_LIMB (0x336A82D89C937D),  /* 79 */
                    127:   CNST_LIMB (0x533163EF0321E5),  /* 80 */
                    128:   CNST_LIMB (0x869BE6C79FB562),  /* 81 */
                    129:   CNST_LIMB (0xD9CD4AB6A2D747),  /* 82 */
                    130:   CNST_LIMB (0x16069317E428CA9),  /* 83 */
                    131:   CNST_LIMB (0x23A367C34E563F0),  /* 84 */
                    132:   CNST_LIMB (0x39A9FADB327F099),  /* 85 */
                    133:   CNST_LIMB (0x5D4D629E80D5489),  /* 86 */
                    134:   CNST_LIMB (0x96F75D79B354522),  /* 87 */
                    135:   CNST_LIMB (0xF444C01834299AB),  /* 88 */
                    136:   CNST_LIMB (0x18B3C1D91E77DECD),  /* 89 */
                    137:   CNST_LIMB (0x27F80DDAA1BA7878),  /* 90 */
                    138:   CNST_LIMB (0x40ABCFB3C0325745),  /* 91 */
                    139:   CNST_LIMB (0x68A3DD8E61ECCFBD),  /* 92 */
                    140:   CNST_LIMB (0xA94FAD42221F2702),  /* 93 */
                    141: #endif
                    142: };
                    143:
                    144:
                    145: /* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for
                    146:    MPN_FIB2_SIZE(n) limbs.
                    147:
                    148:    The return value is the actual number of limbs stored, this will be at
                    149:    least 1.  fp[size-1] will be non-zero, except when n==0, in which case
                    150:    fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
                    151:    (for n>0).
                    152:
                    153:    Notes:
                    154:
                    155:    In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
                    156:    low limb.
                    157:
                    158:    In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
                    159:    F[k-1]^2.  This F[2k+1] is an F[4m+3] and such numbers are congruent to
                    160:    1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
                    161:    that would leave 6 or 7 mod 8).
                    162:
                    163:    This property of F[4m+3] can be verified by induction on F[4m+3] =
                    164:    7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
                    165:    identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
                    166:
                    167:    Enhancements:
                    168:
                    169:    If there was an mpn_addlshift, it'd be possible to eliminate the yp
                    170:    temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
                    171:    fp+=2*(-1)^n, etc.  */
                    172:
                    173: mp_size_t
                    174: mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
                    175: {
                    176:   mp_ptr         xp, yp;
                    177:   mp_size_t      size;
                    178:   unsigned long  nfirst, mask;
                    179:   TMP_DECL (marker);
                    180:
                    181:   TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
                    182:
                    183:   ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
                    184:
                    185:   /* Take a starting pair from the table. */
                    186:   mask = 1;
                    187:   for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
                    188:     mask <<= 1;
                    189:   TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
                    190:
                    191:   f1p[0] = FIB_TABLE ((int) nfirst - 1);
                    192:   fp[0]  = FIB_TABLE (nfirst);
                    193:   size = 1;
                    194:
                    195:   /* Skip to the end if the table lookup gives the final answer. */
                    196:   if (mask != 1)
                    197:     {
                    198:       mp_size_t  alloc;
                    199:
                    200:       TMP_MARK (marker);
                    201:       alloc = MPN_FIB2_SIZE (n);
                    202:       TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
                    203:
                    204:       do
                    205:        {
                    206:          mp_limb_t  c;
                    207:
                    208:          /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
                    209:             n&mask upwards.
                    210:
                    211:             The next bit of n is n&(mask>>1) and we'll double to the pair
                    212:             fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
                    213:             that bit is 0 or 1 respectively.  */
                    214:
                    215:          TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
                    216:                         n >> refmpn_count_trailing_zeros(mask),
                    217:                         mask, size, alloc);
                    218:                 mpn_trace ("fp ", fp, size);
                    219:                 mpn_trace ("f1p", f1p, size));
                    220:
                    221:          /* fp normalized, f1p at most one high zero */
                    222:          ASSERT (fp[size-1] != 0);
                    223:          ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
                    224:
                    225:          /* f1p[size-1] might be zero, but this occurs rarely, so it's not
                    226:             worth bothering checking for it */
                    227:          ASSERT (alloc >= 2*size);
                    228:          mpn_sqr_n (xp, fp,  size);
                    229:          mpn_sqr_n (yp, f1p, size);
                    230:          size *= 2;
                    231:
                    232:          /* Shrink if possible.  Since fp was normalized there'll be at
                    233:             most one high zero on xp (and if there is then there's one on
                    234:             yp too).  */
                    235:          ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
                    236:          size -= (xp[size-1] == 0);
                    237:          ASSERT (xp[size-1] != 0);  /* only one xp high zero */
                    238:
                    239:          /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
                    240:             n&mask is the low bit of our implied k.  */
                    241:          c = mpn_lshift (fp, xp, size, 2);
                    242:          fp[0] |= (n & mask ? 0 : 2);   /* possible +2 */
                    243:          c -= mpn_sub_n (fp, fp, yp, size);
                    244:          ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
                    245:          fp[0] -= (n & mask ? 2 : 0);   /* possible -2 */
                    246:          ASSERT (alloc >= size+1);
                    247:          xp[size] = 0;
                    248:          yp[size] = 0;
                    249:          fp[size] = c;
                    250:          size += (c != 0);
                    251:
                    252:          /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
                    253:             F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
                    254:          ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
                    255:
                    256:          /* now n&mask is the new bit of n being considered */
                    257:          mask >>= 1;
                    258:
                    259:          /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
                    260:             F[2k+1] and F[2k-1].  */
                    261:          ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
                    262:
                    263:          /* Can have a high zero after replacing F[2k+1] with F[2k].
                    264:             f1p will have a high zero if fp does. */
                    265:          ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
                    266:          size -= (fp[size-1] == 0);
                    267:        }
                    268:       while (mask != 1);
                    269:
                    270:       TMP_FREE (marker);
                    271:     }
                    272:
                    273:   TRACE (printf ("done size=%ld\n", size);
                    274:         mpn_trace ("fp ", fp, size);
                    275:         mpn_trace ("f1p", f1p, size));
                    276:
                    277:   return size;
                    278: }
                    279:
                    280:
                    281:
                    282:
                    283:
                    284: /* ------------------------------------------------------------------------- */
                    285:
                    286: #if GENERATE_FIB_TABLE
                    287: /* Generate the tables of fibonacci data.  This doesn't depend on the limb
                    288:    size of the host, and doesn't need mpz_fib_ui working.
                    289:
                    290:    The bit sizes in the table[] below will get specific setups so that a
                    291:    build with GMP_NUMB_BITS equal to one of those values has as much data in
                    292:    __gmp_fib_table as will fit that number of bits.
                    293:
                    294:    A build with GMP_NUMB_BITS equal to some other value will effectively
                    295:    fall back to the previous set of generated data.  For instance if 8 and
                    296:    16 bits have been generated, but a build with 13 bits is done then
                    297:    __gmp_fib_table will only contain 8 bit values, whereas it could probably
                    298:    fit a few more.  Everything still works, it's just that the table scheme
                    299:    is not fully exploited.  */
                    300:
                    301: int
                    302: main (void)
                    303: {
                    304:   static struct {
                    305:     int  bits;
                    306:     int  fib_limit;
                    307:     int  luc_limit;
                    308:   } table[] = {
                    309:     { 4 },
                    310:     { 8 },
                    311:     { 16 },
                    312:     { 32 },
                    313:     { 64 },
                    314:   };
                    315:
                    316:   int    i, t;
                    317:   mpz_t  f[500];
                    318:   mpz_t  l;
                    319:
                    320:   mpz_init (l);
                    321:   mpz_init_set_si (f[0], 1L);  /* F[-1] */
                    322:   mpz_init_set_si (f[1], 0L);  /* F[0] */
                    323:   for (i = 2; i < numberof(f); i++)
                    324:     {
                    325:       mpz_init (f[i]);
                    326:       mpz_add (f[i], f[i-1], f[i-2]);
                    327:     }
                    328:
                    329:   for (i = 1; i < numberof (f); i++)
                    330:     {
                    331:       /* L[n] = F[n]+2*F[n-1] */
                    332:       mpz_add (l, f[i], f[i-1]);
                    333:       mpz_add (l, l, f[i-1]);
                    334:
                    335:       for (t = 0; t < numberof (table); t++)
                    336:        {
                    337:          if (mpz_sizeinbase (f[i], 2) <= table[t].bits)
                    338:            table[t].fib_limit = i-1;
                    339:          if (mpz_sizeinbase (l, 2) <= table[t].bits)
                    340:            table[t].luc_limit = i-1;
                    341:        }
                    342:     }
                    343:   if (table[t].fib_limit == numberof (f) + 1)
                    344:     {
                    345:       printf ("Oops, need bigger f[] array\n");
                    346:       abort ();
                    347:     }
                    348:
                    349:   for (t = numberof (table) - 1; t >= 0; t--)
                    350:     {
                    351:       printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
                    352:       printf ("#define FIB_TABLE_LIMIT         %d\n", table[t].fib_limit);
                    353:       printf ("#define FIB_TABLE_LUCNUM_LIMIT  %d\n", table[t].luc_limit);
                    354:       if (t != 0)
                    355:        printf ("#else\n");
                    356:     }
                    357:   for (t = 0; t < numberof (table); t++)
                    358:     printf ("#endif /* %d */\n", table[t].bits);
                    359:   printf ("\n");
                    360:   printf ("\n");
                    361:
                    362:   printf ("const mp_limb_t\n");
                    363:   printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
                    364:   printf ("\n");
                    365:   t = 0;
                    366:   i = 0;
                    367:   printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
                    368:   for (;;)
                    369:     {
                    370:       gmp_printf ("  CNST_LIMB (0x%ZX),  /* %d */\n", f[i], i-1);
                    371:
                    372:       if (i-1 == table[t].fib_limit)
                    373:        {
                    374:          printf ("#endif\n");
                    375:          do
                    376:            {
                    377:              t++;
                    378:              if (t >= numberof (table))
                    379:                goto done;
                    380:            }
                    381:          while (i-1 == table[t].fib_limit);
                    382:          printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
                    383:        }
                    384:       i++;
                    385:     }
                    386:  done:
                    387:   printf ("};\n");
                    388:
                    389:   return 0;
                    390: }
                    391:
                    392: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>