version 1.1.1.1, 2000/09/09 14:12:44 |
version 1.1.1.2, 2003/08/25 16:06:30 |
|
|
dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication. |
dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication. |
dnl |
|
dnl P5: 13.0 cycles/limb |
|
|
|
dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, |
dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, |
dnl Inc. |
dnl Inc. |
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
Line 22 dnl License along with the GNU MP Library; see the fi |
|
Line 20 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. */ |
dnl Suite 330, Boston, MA 02111-1307, USA. */ |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
|
C P5: 12.0 cycles/limb |
|
|
|
|
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, |
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, |
C mp_limb_t multiplier); |
C mp_limb_t multiplier); |
|
C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, |
|
C mp_limb_t multiplier, mp_limb_t carry); |
|
C |
|
|
|
defframe(PARAM_CARRY, 20) |
defframe(PARAM_MULTIPLIER,16) |
defframe(PARAM_MULTIPLIER,16) |
defframe(PARAM_SIZE, 12) |
defframe(PARAM_SIZE, 12) |
defframe(PARAM_SRC, 8) |
defframe(PARAM_SRC, 8) |
defframe(PARAM_DST, 4) |
defframe(PARAM_DST, 4) |
|
|
.text |
TEXT |
ALIGN(8) |
ALIGN(8) |
|
PROLOGUE(mpn_mul_1c) |
|
deflit(`FRAME',0) |
|
|
|
movl PARAM_CARRY, %ecx |
|
pushl %esi FRAME_pushl() |
|
|
|
jmp L(start_1c) |
|
|
|
EPILOGUE() |
|
|
|
|
|
ALIGN(8) |
PROLOGUE(mpn_mul_1) |
PROLOGUE(mpn_mul_1) |
|
deflit(`FRAME',0) |
|
|
pushl %edi |
xorl %ecx, %ecx |
pushl %esi |
pushl %esi FRAME_pushl() |
pushl %ebx |
|
pushl %ebp |
|
deflit(`FRAME',16) |
|
|
|
movl PARAM_DST, %edi |
L(start_1c): |
movl PARAM_SRC, %esi |
movl PARAM_SRC, %esi |
movl PARAM_SIZE, %ecx |
movl PARAM_SIZE, %eax |
movl PARAM_MULTIPLIER, %ebp |
|
|
|
leal (%edi,%ecx,4), %edi |
shrl %eax |
leal (%esi,%ecx,4), %esi |
jnz L(two_or_more) |
negl %ecx |
|
xorl %ebx, %ebx |
|
ALIGN(8) |
|
|
|
L(oop): adcl $0, %ebx |
|
movl (%esi,%ecx,4), %eax |
|
|
|
mull %ebp |
C one limb only |
|
|
addl %eax, %ebx |
movl (%esi), %eax |
|
|
movl %ebx, (%edi,%ecx,4) |
mull PARAM_MULTIPLIER |
incl %ecx |
|
|
|
movl %edx, %ebx |
addl %eax, %ecx |
jnz L(oop) |
movl PARAM_DST, %eax |
|
|
adcl $0, %ebx |
adcl $0, %edx |
movl %ebx, %eax |
popl %esi |
|
|
|
movl %ecx, (%eax) |
|
movl %edx, %eax |
|
|
|
ret |
|
|
|
|
|
L(two_or_more): |
|
C eax size/2 |
|
C ebx |
|
C ecx carry |
|
C edx |
|
C esi src |
|
C edi |
|
C ebp |
|
|
|
pushl %edi FRAME_pushl() |
|
pushl %ebx FRAME_pushl() |
|
|
|
movl PARAM_DST, %edi |
|
leal -1(%eax), %ebx C size/2-1 |
|
|
|
notl %ebx C -size, preserve carry |
|
|
|
leal (%esi,%eax,8), %esi C src end |
|
leal (%edi,%eax,8), %edi C dst end |
|
|
|
pushl %ebp FRAME_pushl() |
|
jnc L(top) |
|
|
|
|
|
C size was odd, process one limb separately |
|
|
|
movl (%esi,%ebx,8), %eax |
|
addl $4, %esi |
|
|
|
mull PARAM_MULTIPLIER |
|
|
|
addl %ecx, %eax |
|
movl %edx, %ecx |
|
|
|
movl %eax, (%edi,%ebx,8) |
|
leal 4(%edi), %edi |
|
|
|
|
|
L(top): |
|
C eax |
|
C ebx counter, negative |
|
C ecx carry |
|
C edx |
|
C esi src end |
|
C edi dst end |
|
C ebp |
|
|
|
adcl $0, %ecx |
|
movl (%esi,%ebx,8), %eax |
|
|
|
mull PARAM_MULTIPLIER |
|
|
|
movl %edx, %ebp |
|
addl %eax, %ecx |
|
|
|
adcl $0, %ebp |
|
movl 4(%esi,%ebx,8), %eax |
|
|
|
mull PARAM_MULTIPLIER |
|
|
|
movl %ecx, (%edi,%ebx,8) |
|
addl %ebp, %eax |
|
|
|
movl %eax, 4(%edi,%ebx,8) |
|
incl %ebx |
|
|
|
movl %edx, %ecx |
|
jnz L(top) |
|
|
|
|
|
adcl $0, %ecx |
popl %ebp |
popl %ebp |
|
|
|
movl %ecx, %eax |
popl %ebx |
popl %ebx |
popl %esi |
|
popl %edi |
popl %edi |
|
popl %esi |
|
|
ret |
ret |
|
|
EPILOGUE() |
EPILOGUE() |