[BACK]Return to divrem_1.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / k7 / mmx

Diff for /OpenXM_contrib/gmp/mpn/x86/k7/mmx/Attic/divrem_1.asm between version 1.1.1.1 and 1.1.1.2

version 1.1.1.1, 2000/09/09 14:12:42 version 1.1.1.2, 2003/08/25 16:06:29
Line 1 
Line 1 
 dnl  AMD K7 mpn_divrem_1 -- mpn by limb division.  dnl  AMD K7 mpn_divrem_1, mpn_divrem_1c, mpn_preinv_divrem_1 -- mpn by limb
 dnl  dnl  division.
 dnl  K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.  
   
   dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl  Copyright (C) 1999, 2000 Free Software Foundation, Inc.  dnl
 dnl  
 dnl  This file is part of the GNU MP Library.  dnl  This file is part of the GNU MP Library.
 dnl  dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or  dnl  The GNU MP Library is free software; you can redistribute it and/or
 dnl  modify it under the terms of the GNU Lesser General Public License as  dnl  modify it under the terms of the GNU Lesser General Public License as
 dnl  published by the Free Software Foundation; either version 2.1 of the  dnl  published by the Free Software Foundation; either version 2.1 of the
 dnl  License, or (at your option) any later version.  dnl  License, or (at your option) any later version.
 dnl  dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful,  dnl  The GNU MP Library is distributed in the hope that it will be useful,
 dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of  dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
 dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 dnl  Lesser General Public License for more details.  dnl  Lesser General Public License for more details.
 dnl  dnl
 dnl  You should have received a copy of the GNU Lesser General Public  dnl  You should have received a copy of the GNU Lesser General Public
 dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If  dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
 dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -  dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
 dnl  Suite 330, Boston, MA 02111-1307, USA.  dnl  Suite 330, Boston, MA 02111-1307, USA.
   
   
 include(`../config.m4')  include(`../config.m4')
   
   
   C K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.
   
   
 C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,  C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
 C                         mp_srcptr src, mp_size_t size,  C                         mp_srcptr src, mp_size_t size,
 C                         mp_limb_t divisor);  C                         mp_limb_t divisor);
 C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,  C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
 C                          mp_srcptr src, mp_size_t size,  C                          mp_srcptr src, mp_size_t size,
 C                          mp_limb_t divisor, mp_limb_t carry);  C                          mp_limb_t divisor, mp_limb_t carry);
   C mp_limb_t mpn_preinv_divrem_1 (mp_ptr dst, mp_size_t xsize,
   C                                mp_srcptr src, mp_size_t size,
   C                                mp_limb_t divisor, mp_limb_t inverse,
   C                                unsigned shift);
 C  C
 C The method and nomenclature follow part 8 of "Division by Invariant  C The method and nomenclature follow part 8 of "Division by Invariant
 C Integers using Multiplication" by Granlund and Montgomery, reference in  C Integers using Multiplication" by Granlund and Montgomery, reference in
Line 42  C for m', and "d" for d_norm, which won't cause any co
Line 46  C for m', and "d" for d_norm, which won't cause any co
 C only the normalized divisor that's of any use in the code.  "b" is written  C only the normalized divisor that's of any use in the code.  "b" is written
 C for 2^N, the size of a limb, N being 32 here.  C for 2^N, the size of a limb, N being 32 here.
 C  C
 C mpn_divrem_1 avoids one division if the src high limb is less than the  C mpn_divrem_1 and mpn_preinv_divrem_1 avoid one division if the src high
 C divisor.  mpn_divrem_1c doesn't check for a zero carry, since in normal  C limb is less than the divisor.  mpn_divrem_1c doesn't check for a zero
 C circumstances that will be a very rare event.  C carry, since in normal circumstances that will be a very rare event.
 C  C
   C The test for skipping a division is branch free (once size>=1 is tested).
   C The store to the destination high limb is 0 when a divide is skipped, or
   C if it's not skipped then a copy of the src high limb is used.  The latter
   C is in case src==dst.
   C
 C There's a small bias towards expecting xsize==0, by having code for  C There's a small bias towards expecting xsize==0, by having code for
 C xsize==0 in a straight line and xsize!=0 under forward jumps.  C xsize==0 in a straight line and xsize!=0 under forward jumps.
   C
   C Alternatives:
   C
   C If the divisor is normalized (high bit set) then a division step can
   C always be skipped, since the high destination limb is always 0 or 1 in
   C that case.  It doesn't seem worth checking for this though, since it
   C probably occurs infrequently, in particular note that big_base for a
   C decimal mpn_get_str is not normalized in a 32-bit limb.
   
   
 dnl  MUL_THRESHOLD is the value of xsize+size at which the multiply by  dnl  MUL_THRESHOLD is the value of xsize+size at which the multiply by
Line 62  dnl  even more so on the fractional part.
Line 79  dnl  even more so on the fractional part.
 deflit(MUL_THRESHOLD, 3)  deflit(MUL_THRESHOLD, 3)
   
   
 defframe(PARAM_CARRY,  24)  defframe(PARAM_PREINV_SHIFT,   28)  dnl mpn_preinv_divrem_1
   defframe(PARAM_PREINV_INVERSE, 24)  dnl mpn_preinv_divrem_1
   defframe(PARAM_CARRY,  24)          dnl mpn_divrem_1c
 defframe(PARAM_DIVISOR,20)  defframe(PARAM_DIVISOR,20)
 defframe(PARAM_SIZE,   16)  defframe(PARAM_SIZE,   16)
 defframe(PARAM_SRC,    12)  defframe(PARAM_SRC,    12)
Line 82  defframe(VAR_DST_STOP,-36)
Line 101  defframe(VAR_DST_STOP,-36)
   
 deflit(STACK_SPACE, 36)  deflit(STACK_SPACE, 36)
   
         .text          TEXT
         ALIGN(32)          ALIGN(32)
   
   PROLOGUE(mpn_preinv_divrem_1)
   deflit(`FRAME',0)
           movl    PARAM_XSIZE, %ecx
           movl    PARAM_DST, %edx
           subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
   
           movl    %esi, SAVE_ESI
           movl    PARAM_SRC, %esi
   
           movl    %ebx, SAVE_EBX
           movl    PARAM_SIZE, %ebx
   
           leal    8(%edx,%ecx,4), %edx    C &dst[xsize+2]
           movl    %ebp, SAVE_EBP
           movl    PARAM_DIVISOR, %ebp
   
           movl    %edx, VAR_DST_STOP      C &dst[xsize+2]
           movl    %edi, SAVE_EDI
           xorl    %edi, %edi              C carry
   
           movl    -4(%esi,%ebx,4), %eax   C src high limb
           xor     %ecx, %ecx
   
           C
   
           C
   
           cmpl    %ebp, %eax              C high cmp divisor
   
           cmovc(  %eax, %edi)             C high is carry if high<divisor
           cmovnc( %eax, %ecx)             C 0 if skip div, src high if not
                                           C (the latter in case src==dst)
   
           movl    %ecx, -12(%edx,%ebx,4)  C dst high limb
           sbbl    $0, %ebx                C skip one division if high<divisor
           movl    PARAM_PREINV_SHIFT, %ecx
   
           leal    -8(%edx,%ebx,4), %edx   C &dst[xsize+size]
           movl    $32, %eax
   
           movl    %edx, VAR_DST           C &dst[xsize+size]
   
           shll    %cl, %ebp               C d normalized
           subl    %ecx, %eax
           movl    %ecx, VAR_NORM
   
           movd    %eax, %mm7              C rshift
           movl    PARAM_PREINV_INVERSE, %eax
           jmp     L(start_preinv)
   
   EPILOGUE()
   
   
           ALIGN(16)
   
 PROLOGUE(mpn_divrem_1c)  PROLOGUE(mpn_divrem_1c)
 deflit(`FRAME',0)  deflit(`FRAME',0)
         movl    PARAM_CARRY, %edx          movl    PARAM_CARRY, %edx
Line 104  deflit(`FRAME',STACK_SPACE)
Line 178  deflit(`FRAME',STACK_SPACE)
         movl    %esi, SAVE_ESI          movl    %esi, SAVE_ESI
         movl    PARAM_SRC, %esi          movl    PARAM_SRC, %esi
   
         leal    -4(%edi,%ebx,4), %edi          leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
         jmp     LF(mpn_divrem_1,start_1c)          jmp     L(start_1c)
   
 EPILOGUE()  EPILOGUE()
   
   
         C offset 0x31, close enough to aligned          C offset 0xa1, close enough to aligned
 PROLOGUE(mpn_divrem_1)  PROLOGUE(mpn_divrem_1)
 deflit(`FRAME',0)  deflit(`FRAME',0)
   
Line 119  deflit(`FRAME',0)
Line 193  deflit(`FRAME',0)
         subl    $STACK_SPACE, %esp          subl    $STACK_SPACE, %esp
 deflit(`FRAME',STACK_SPACE)  deflit(`FRAME',STACK_SPACE)
   
         movl    %ebp, SAVE_EBP          movl    %esi, SAVE_ESI
         movl    PARAM_DIVISOR, %ebp          movl    PARAM_SRC, %esi
   
         movl    %ebx, SAVE_EBX          movl    %ebx, SAVE_EBX
         movl    PARAM_XSIZE, %ebx          movl    PARAM_XSIZE, %ebx
   
         movl    %esi, SAVE_ESI          movl    %ebp, SAVE_EBP
         movl    PARAM_SRC, %esi          movl    PARAM_DIVISOR, %ebp
         orl     %ecx, %ecx          orl     %ecx, %ecx              C size
   
         movl    %edi, SAVE_EDI          movl    %edi, SAVE_EDI
         movl    PARAM_DST, %edi          movl    PARAM_DST, %edi
         leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]          leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
   
         jz      L(no_skip_div)          jz      L(no_skip_div)          C if size==0
         movl    -4(%esi,%ecx,4), %eax   C src high limb          movl    -4(%esi,%ecx,4), %eax   C src high limb
           xorl    %esi, %esi
   
         cmpl    %ebp, %eax              C one less div if high<divisor          cmpl    %ebp, %eax              C high cmp divisor
         jnb     L(no_skip_div)  
   
         movl    $0, (%edi,%ecx,4)       C dst high limb          cmovc(  %eax, %edx)             C high is carry if high<divisor
         decl    %ecx                    C size-1          cmovnc( %eax, %esi)             C 0 if skip div, src high if not
         movl    %eax, %edx              C src high limb as initial carry  
           movl    %esi, (%edi,%ecx,4)     C dst high limb
           sbbl    $0, %ecx                C size-1 if high<divisor
           movl    PARAM_SRC, %esi         C reload
 L(no_skip_div):  L(no_skip_div):
   
   
 L(start_1c):  L(start_1c):
         C eax          C eax
         C ebx   xsize          C ebx   xsize
         C ecx   size          C ecx   size
         C edx   carry          C edx   carry
Line 239  L(mul_by_inverse):
Line 316  L(mul_by_inverse):
   
         bsrl    %ebp, %eax              C 31-l          bsrl    %ebp, %eax              C 31-l
   
         leal    12(%edi), %ebx          leal    12(%edi), %ebx          C &dst[xsize+2], loop dst stop
         leal    4(%edi,%ecx,4), %edi    C &dst[xsize+size]          leal    4(%edi,%ecx,4), %edi    C &dst[xsize+size]
   
         movl    %edi, VAR_DST          movl    %edi, VAR_DST
Line 266  L(mul_by_inverse):
Line 343  L(mul_by_inverse):
   
         divl    %ebp                    C floor (b*(b-d)-1) / d          divl    %ebp                    C floor (b*(b-d)-1) / d
   
   L(start_preinv):
           C eax   inverse
           C ebx   size
           C ecx   shift
           C edx
           C esi   src
           C edi   carry
           C ebp   divisor
           C
           C mm7   rshift
   
         orl     %ebx, %ebx              C size          orl     %ebx, %ebx              C size
         movl    %eax, VAR_INVERSE          movl    %eax, VAR_INVERSE
         leal    -12(%esi,%ebx,4), %eax  C &src[size-3]          leal    -12(%esi,%ebx,4), %eax  C &src[size-3]
Line 298  L(start_one):
Line 386  L(start_one):
   
   
 L(start_zero):  L(start_zero):
           C Can be here with xsize==0 if mpn_preinv_divrem_1 had size==1 and
           C skipped a division.
   
         shll    %cl, %edi               C n2 = carry << l          shll    %cl, %edi               C n2 = carry << l
         movl    $0, %esi                C n10 = 0          movl    %edi, %eax              C return value for zero_done
           cmpl    $0, PARAM_XSIZE
   
         C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then          je      L(zero_done)
         C must have xsize!=0  
         jmp     L(fraction_some)          jmp     L(fraction_some)
   
   
Line 383  L(integer_top):
Line 474  L(integer_top):
         C          C
   
         addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag          addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
         leal    1(%edi), %ebx      C n2<<32 + m*(n2+n1))          leal    1(%edi), %ebx      C n2+1
         movl    %ebp, %eax         C d          movl    %ebp, %eax         C d
   
         C          C
Line 438  L(integer_two_left):
Line 529  L(integer_two_left):
         C edi   n2          C edi   n2
         C ebp   divisor          C ebp   divisor
         C          C
         C mm0   src limb, shifted  
         C mm7   rshift          C mm7   rshift
   
         cmpl    $0x80000000, %esi  C n1 as 0=c, 1=nc          cmpl    $0x80000000, %esi  C n1 as 0=c, 1=nc
Line 458  L(integer_two_left):
Line 548  L(integer_two_left):
         C          C
   
         addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag          addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
         leal    1(%edi), %ebx      C n2<<32 + m*(n2+n1))          leal    1(%edi), %ebx      C n2+1
         movl    %ebp, %eax         C d          movl    %ebp, %eax         C d
   
         adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1          adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
Line 499  L(integer_one_left):
Line 589  L(integer_one_left):
         C edi   n2          C edi   n2
         C ebp   divisor          C ebp   divisor
         C          C
         C mm0   src limb, shifted  
         C mm7   rshift          C mm7   rshift
   
         movl    VAR_DST_STOP, %ecx          movl    VAR_DST_STOP, %ecx
Line 519  L(integer_one_left):
Line 608  L(integer_one_left):
         C          C
   
         addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag          addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
         leal    1(%edi), %ebx      C n2<<32 + m*(n2+n1))          leal    1(%edi), %ebx      C n2+1
         movl    %ebp, %eax         C d          movl    %ebp, %eax         C d
   
         C          C
Line 559  L(integer_none):
Line 648  L(integer_none):
         movl    %edi, %eax          movl    %edi, %eax
 L(fraction_done):  L(fraction_done):
         movl    VAR_NORM, %ecx          movl    VAR_NORM, %ecx
   L(zero_done):
         movl    SAVE_EBP, %ebp          movl    SAVE_EBP, %ebp
   
         movl    SAVE_EDI, %edi          movl    SAVE_EDI, %edi
Line 652  L(fraction_some):
Line 742  L(fraction_some):
         C ebp   divisor          C ebp   divisor
   
         movl    PARAM_DST, %esi          movl    PARAM_DST, %esi
         movl    VAR_DST_STOP, %ecx          movl    VAR_DST_STOP, %ecx      C &dst[xsize+2]
         movl    %edi, %eax          movl    %edi, %eax
   
         subl    $8, %ecx          subl    $8, %ecx                C &dst[xsize]
   
         jmp     L(fraction_entry)          jmp     L(fraction_entry)
   
   

Legend:
Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>