=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/x86/pentium/Attic/aorsmul_1.asm,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/x86/pentium/Attic/aorsmul_1.asm 2000/09/09 14:12:44 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/x86/pentium/Attic/aorsmul_1.asm 2003/08/25 16:06:29 1.1.1.2 @@ -1,9 +1,6 @@ dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication. -dnl -dnl P5: 14.0 cycles/limb - -dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, dnl Inc. dnl dnl This file is part of the GNU MP Library. @@ -23,77 +20,116 @@ dnl License along with the GNU MP Library; see the fi dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. */ - include(`../config.m4') +C P5: 14.0 cycles/limb + + ifdef(`OPERATION_addmul_1', ` define(M4_inst, addl) define(M4_function_1, mpn_addmul_1) + define(M4_function_1c, mpn_addmul_1c) ',`ifdef(`OPERATION_submul_1', ` define(M4_inst, subl) define(M4_function_1, mpn_submul_1) + define(M4_function_1c, mpn_submul_1c) ',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 ')')') -MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) +MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) -C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, -C mp_limb_t mult); +C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); +C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult, mp_limb_t carry); +C +C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); +C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult, mp_limb_t carry); +C +defframe(PARAM_CARRY, 20) defframe(PARAM_MULTIPLIER,16) defframe(PARAM_SIZE, 12) defframe(PARAM_SRC, 8) defframe(PARAM_DST, 4) - .text + TEXT + ALIGN(8) +PROLOGUE(M4_function_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %ecx + pushl %esi FRAME_pushl() + + jmp L(start_1c) + +EPILOGUE() + + + ALIGN(8) PROLOGUE(M4_function_1) +deflit(`FRAME',0) - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp -deflit(`FRAME',16) + xorl %ecx, %ecx + pushl %esi FRAME_pushl() - movl PARAM_DST, %edi +L(start_1c): movl PARAM_SRC, %esi - movl PARAM_SIZE, %ecx - movl PARAM_MULTIPLIER, %ebp + movl PARAM_SIZE, %eax - leal (%edi,%ecx,4), %edi - leal (%esi,%ecx,4), %esi - negl %ecx - xorl %ebx, %ebx - ALIGN(8) + pushl %edi FRAME_pushl() + pushl %ebx FRAME_pushl() -L(oop): adcl $0, %ebx - movl (%esi,%ecx,4), %eax + movl PARAM_DST, %edi + leal -1(%eax), %ebx C size-1 - mull %ebp + leal (%esi,%eax,4), %esi + xorl $-1, %ebx C -size, and clear carry - addl %ebx, %eax - movl (%edi,%ecx,4), %ebx + leal (%edi,%eax,4), %edi +L(top): + C eax + C ebx counter, negative + C ecx carry + C edx + C esi src end + C edi dst end + C ebp + + adcl $0, %ecx + movl (%esi,%ebx,4), %eax + + mull PARAM_MULTIPLIER + + addl %ecx, %eax + movl (%edi,%ebx,4), %ecx + adcl $0, %edx - M4_inst %eax, %ebx + M4_inst %eax, %ecx - movl %ebx, (%edi,%ecx,4) - incl %ecx + movl %ecx, (%edi,%ebx,4) + incl %ebx - movl %edx, %ebx - jnz L(oop) + movl %edx, %ecx + jnz L(top) - adcl $0, %ebx - movl %ebx, %eax - popl %ebp + + adcl $0, %ecx popl %ebx - popl %esi + + movl %ecx, %eax popl %edi + + popl %esi + ret EPILOGUE()