/* $Id: level0.h,v 1.3 2000/11/03 21:00:26 karim Exp $ Copyright (C) 2000 The PARI group. This file is part of the PARI/GP package. PARI/GP is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY WHATSOEVER. Check the License for details. You should have received a copy of it, along with the package; see the file 'COPYING'. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* This file defines some "level 0" kernel functions */ /* These functions can be inline, with gcc */ /* If not gcc, they are defined externally with "level0.c" */ /* NB: Those functions are not defined in mp.s */ /* level0.c includes this file and never needs to be changed */ /* The following seven lines are necessary for level0.c and level1.c */ #ifdef LEVEL0 #undef INLINE #define INLINE #endif #ifdef LEVEL1 #undef INLINE #endif #define LOCAL_OVERFLOW #define SAVE_OVERFLOW #define LOCAL_HIREMAINDER #define SAVE_HIREMAINDER #ifndef INLINE BEGINEXTERN extern ulong overflow, hiremainder; extern long addll(ulong x, ulong y); extern long addllx(ulong x, ulong y); extern long subll(ulong x, ulong y); extern long subllx(ulong x, ulong y); extern long shiftl(ulong x, ulong y); extern long shiftlr(ulong x, ulong y); extern long mulll(ulong x, ulong y); extern long addmul(ulong x, ulong y); extern long divll(ulong x, ulong y); extern int bfffo(ulong x); ENDEXTERN #else ulong overflow; ulong hiremainder; INLINE long addll(ulong x, ulong y) { const ulong z = x+y; overflow=(zx); return (long) z; } INLINE long subllx(ulong x, ulong y) { const ulong z = x-y-overflow; overflow = (z>x || (z==x && overflow)); return (long) z; } INLINE long shiftl(ulong x, ulong y) { hiremainder=x>>(BITS_IN_LONG-y); return (x<>y); } #define HIGHWORD(a) ((a) >> BITS_IN_HALFULONG) #define LOWWORD(a) ((a) & LOWMASK) /* Version Peter Montgomery */ /* * Assume (for presentation) that BITS_IN_LONG = 32. * Then 0 <= xhi, xlo, yhi, ylo <= 2^16 - 1. Hence * * -2^31 + 2^16 <= (xhi-2^15)*(ylo-2^15) + (xlo-2^15)*(yhi-2^15) <= 2^31. * * If xhi*ylo + xlo*yhi = 2^32*overflow + xymid, then * * -2^32 + 2^16 <= 2^32*overflow + xymid - 2^15*(xhi + ylo + xlo + yhi) <= 0. * * 2^16*overflow <= (xhi+xlo+yhi+ylo)/2 - xymid/2^16 <= 2^16*overflow + 2^16-1 * * This inequality was derived using exact (rational) arithmetic; * it remains valid when we truncate the two middle terms. */ INLINE long mulll(ulong x, ulong y) { const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x); const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y); ulong xylo,xymid,xyhi,xymidhi,xymidlo; xylo = xlo*ylo; xyhi = xhi*yhi; xymid = (xhi+xlo)*(yhi+ylo) - (xyhi+xylo); xymidhi = HIGHWORD(xymid); xymidlo = xymid << BITS_IN_HALFULONG; xylo += xymidlo; hiremainder = xyhi + xymidhi + (xylo < xymidlo) + (((((xhi+xlo) + (yhi+ylo)) >> 1) - xymidhi) & HIGHMASK); return xylo; } INLINE long addmul(ulong x, ulong y) { const ulong xlo = LOWWORD(x), xhi = HIGHWORD(x); const ulong ylo = LOWWORD(y), yhi = HIGHWORD(y); ulong xylo,xymid,xyhi,xymidhi,xymidlo; xylo = xlo*ylo; xyhi = xhi*yhi; xymid = (xhi+xlo)*(yhi+ylo) - (xyhi+xylo); xylo += hiremainder; xyhi += (xylo < hiremainder); xymidhi = HIGHWORD(xymid); xymidlo = xymid << BITS_IN_HALFULONG; xylo += xymidlo; hiremainder = xyhi + xymidhi + (xylo < xymidlo) + (((((xhi+xlo) + (yhi+ylo)) >> 1) - xymidhi) & HIGHMASK); return xylo; } /* version Peter Montgomery */ INLINE int bfffo(ulong x) { int sc; static int tabshi[16]={4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0}; sc = BITS_IN_LONG - 4; #ifdef LONG_IS_64BIT if (x & 0xffffffff00000000) {sc -= 32; x >>= 32;} #endif if (x > 0xffffUL) {sc -= 16; x >>= 16;} if (x > 0x00ffUL) {sc -= 8; x >>= 8;} if (x > 0x000fUL) {sc -= 4; x >>= 4;} return sc + tabshi[x]; } /* Version Peter Montgomery */ INLINE long divll(ulong x, ulong y) { #define GLUE(hi, lo) (((hi) << BITS_IN_HALFULONG) + (lo)) #define SPLIT(a, b, c) b = HIGHWORD(a); c = LOWWORD(a) ulong v1, v2, u3, u4, q1, q2, aux, aux1, aux2; int k; if (hiremainder >= y) err(talker, "Invalid arguments to divll"); if (hiremainder == 0) { /* Only one division needed */ hiremainder = x % y; return x/y; } if (y <= LOWMASK) { /* Use two half-word divisions */ hiremainder = GLUE(hiremainder, HIGHWORD(x)); q1 = hiremainder / y; hiremainder = GLUE(hiremainder % y, LOWWORD(x)); q2 = hiremainder / y; hiremainder = hiremainder % y; return GLUE(q1, q2); } if (y & HIGHBIT) k = 0; else { k = bfffo(y); hiremainder = (hiremainder << k) + (x >> (BITS_IN_LONG - k)); x <<= k; y <<= k; } SPLIT(y, v1, v2); SPLIT(x, u3, u4); q1 = hiremainder / v1; if (q1 > LOWMASK) q1 = LOWMASK; hiremainder -= q1 * v1; aux = v2 * q1; again: SPLIT(aux, aux1, aux2); if (aux2 > u3) aux1++; if (aux1 > hiremainder) {q1--; hiremainder += v1; aux -= v2; goto again;} hiremainder = GLUE(hiremainder - aux1, LOWWORD(u3 - aux2)); q2 = hiremainder / v1; if (q2 > LOWMASK) q2 = LOWMASK; hiremainder -= q2 * v1; aux = v2 * q2; again2: SPLIT(aux, aux1, aux2); if (aux2 > u4) aux1++; if (aux1 > hiremainder) {q2--; hiremainder += v1; aux -= v2; goto again2;} hiremainder = GLUE(hiremainder - aux1, LOWWORD(u4 - aux2)) >> k; return GLUE(q1, q2); } #endif