=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/x86/pentium/Attic/mul_1.asm,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/x86/pentium/Attic/mul_1.asm 2000/09/09 14:12:44 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/x86/pentium/Attic/mul_1.asm 2003/08/25 16:06:30 1.1.1.2 @@ -1,8 +1,6 @@ dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication. -dnl -dnl P5: 13.0 cycles/limb -dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, dnl Inc. dnl dnl This file is part of the GNU MP Library. @@ -22,58 +20,149 @@ dnl License along with the GNU MP Library; see the fi dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. */ - include(`../config.m4') +C P5: 12.0 cycles/limb + + C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, C mp_limb_t multiplier); +C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier, mp_limb_t carry); +C +defframe(PARAM_CARRY, 20) defframe(PARAM_MULTIPLIER,16) defframe(PARAM_SIZE, 12) defframe(PARAM_SRC, 8) defframe(PARAM_DST, 4) - .text + TEXT ALIGN(8) +PROLOGUE(mpn_mul_1c) +deflit(`FRAME',0) + + movl PARAM_CARRY, %ecx + pushl %esi FRAME_pushl() + + jmp L(start_1c) + +EPILOGUE() + + + ALIGN(8) PROLOGUE(mpn_mul_1) +deflit(`FRAME',0) - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp -deflit(`FRAME',16) + xorl %ecx, %ecx + pushl %esi FRAME_pushl() - movl PARAM_DST, %edi +L(start_1c): movl PARAM_SRC, %esi - movl PARAM_SIZE, %ecx - movl PARAM_MULTIPLIER, %ebp + movl PARAM_SIZE, %eax - leal (%edi,%ecx,4), %edi - leal (%esi,%ecx,4), %esi - negl %ecx - xorl %ebx, %ebx - ALIGN(8) + shrl %eax + jnz L(two_or_more) -L(oop): adcl $0, %ebx - movl (%esi,%ecx,4), %eax - mull %ebp + C one limb only - addl %eax, %ebx + movl (%esi), %eax - movl %ebx, (%edi,%ecx,4) - incl %ecx + mull PARAM_MULTIPLIER - movl %edx, %ebx - jnz L(oop) + addl %eax, %ecx + movl PARAM_DST, %eax - adcl $0, %ebx - movl %ebx, %eax + adcl $0, %edx + popl %esi + + movl %ecx, (%eax) + movl %edx, %eax + + ret + + +L(two_or_more): + C eax size/2 + C ebx + C ecx carry + C edx + C esi src + C edi + C ebp + + pushl %edi FRAME_pushl() + pushl %ebx FRAME_pushl() + + movl PARAM_DST, %edi + leal -1(%eax), %ebx C size/2-1 + + notl %ebx C -size, preserve carry + + leal (%esi,%eax,8), %esi C src end + leal (%edi,%eax,8), %edi C dst end + + pushl %ebp FRAME_pushl() + jnc L(top) + + + C size was odd, process one limb separately + + movl (%esi,%ebx,8), %eax + addl $4, %esi + + mull PARAM_MULTIPLIER + + addl %ecx, %eax + movl %edx, %ecx + + movl %eax, (%edi,%ebx,8) + leal 4(%edi), %edi + + +L(top): + C eax + C ebx counter, negative + C ecx carry + C edx + C esi src end + C edi dst end + C ebp + + adcl $0, %ecx + movl (%esi,%ebx,8), %eax + + mull PARAM_MULTIPLIER + + movl %edx, %ebp + addl %eax, %ecx + + adcl $0, %ebp + movl 4(%esi,%ebx,8), %eax + + mull PARAM_MULTIPLIER + + movl %ecx, (%edi,%ebx,8) + addl %ebp, %eax + + movl %eax, 4(%edi,%ebx,8) + incl %ebx + + movl %edx, %ecx + jnz L(top) + + + adcl $0, %ecx popl %ebp + + movl %ecx, %eax popl %ebx - popl %esi + popl %edi + popl %esi + ret EPILOGUE()