version 1.1.1.1, 2000/09/09 14:12:44 |
version 1.1.1.2, 2003/08/25 16:06:29 |
|
|
dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication. |
dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication. |
dnl |
|
dnl P5: 14.0 cycles/limb |
|
|
|
|
dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation, |
dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, |
|
dnl Inc. |
dnl Inc. |
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
Line 23 dnl License along with the GNU MP Library; see the fi |
|
Line 20 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. */ |
dnl Suite 330, Boston, MA 02111-1307, USA. */ |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
|
C P5: 14.0 cycles/limb |
|
|
|
|
ifdef(`OPERATION_addmul_1', ` |
ifdef(`OPERATION_addmul_1', ` |
define(M4_inst, addl) |
define(M4_inst, addl) |
define(M4_function_1, mpn_addmul_1) |
define(M4_function_1, mpn_addmul_1) |
|
define(M4_function_1c, mpn_addmul_1c) |
|
|
',`ifdef(`OPERATION_submul_1', ` |
',`ifdef(`OPERATION_submul_1', ` |
define(M4_inst, subl) |
define(M4_inst, subl) |
define(M4_function_1, mpn_submul_1) |
define(M4_function_1, mpn_submul_1) |
|
define(M4_function_1c, mpn_submul_1c) |
|
|
',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 |
',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 |
')')') |
')')') |
|
|
MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) |
MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) |
|
|
|
|
C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, |
C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, |
C mp_limb_t mult); |
C mp_limb_t mult); |
|
C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, |
|
C mp_limb_t mult, mp_limb_t carry); |
|
C |
|
C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, |
|
C mp_limb_t mult); |
|
C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, |
|
C mp_limb_t mult, mp_limb_t carry); |
|
C |
|
|
|
defframe(PARAM_CARRY, 20) |
defframe(PARAM_MULTIPLIER,16) |
defframe(PARAM_MULTIPLIER,16) |
defframe(PARAM_SIZE, 12) |
defframe(PARAM_SIZE, 12) |
defframe(PARAM_SRC, 8) |
defframe(PARAM_SRC, 8) |
defframe(PARAM_DST, 4) |
defframe(PARAM_DST, 4) |
|
|
.text |
TEXT |
|
|
ALIGN(8) |
ALIGN(8) |
|
PROLOGUE(M4_function_1c) |
|
deflit(`FRAME',0) |
|
|
|
movl PARAM_CARRY, %ecx |
|
pushl %esi FRAME_pushl() |
|
|
|
jmp L(start_1c) |
|
|
|
EPILOGUE() |
|
|
|
|
|
ALIGN(8) |
PROLOGUE(M4_function_1) |
PROLOGUE(M4_function_1) |
|
deflit(`FRAME',0) |
|
|
pushl %edi |
xorl %ecx, %ecx |
pushl %esi |
pushl %esi FRAME_pushl() |
pushl %ebx |
|
pushl %ebp |
|
deflit(`FRAME',16) |
|
|
|
movl PARAM_DST, %edi |
L(start_1c): |
movl PARAM_SRC, %esi |
movl PARAM_SRC, %esi |
movl PARAM_SIZE, %ecx |
movl PARAM_SIZE, %eax |
movl PARAM_MULTIPLIER, %ebp |
|
|
|
leal (%edi,%ecx,4), %edi |
pushl %edi FRAME_pushl() |
leal (%esi,%ecx,4), %esi |
pushl %ebx FRAME_pushl() |
negl %ecx |
|
xorl %ebx, %ebx |
|
ALIGN(8) |
|
|
|
L(oop): adcl $0, %ebx |
movl PARAM_DST, %edi |
movl (%esi,%ecx,4), %eax |
leal -1(%eax), %ebx C size-1 |
|
|
mull %ebp |
leal (%esi,%eax,4), %esi |
|
xorl $-1, %ebx C -size, and clear carry |
|
|
addl %ebx, %eax |
leal (%edi,%eax,4), %edi |
movl (%edi,%ecx,4), %ebx |
|
|
|
|
L(top): |
|
C eax |
|
C ebx counter, negative |
|
C ecx carry |
|
C edx |
|
C esi src end |
|
C edi dst end |
|
C ebp |
|
|
|
adcl $0, %ecx |
|
movl (%esi,%ebx,4), %eax |
|
|
|
mull PARAM_MULTIPLIER |
|
|
|
addl %ecx, %eax |
|
movl (%edi,%ebx,4), %ecx |
|
|
adcl $0, %edx |
adcl $0, %edx |
M4_inst %eax, %ebx |
M4_inst %eax, %ecx |
|
|
movl %ebx, (%edi,%ecx,4) |
movl %ecx, (%edi,%ebx,4) |
incl %ecx |
incl %ebx |
|
|
movl %edx, %ebx |
movl %edx, %ecx |
jnz L(oop) |
jnz L(top) |
|
|
adcl $0, %ebx |
|
movl %ebx, %eax |
adcl $0, %ecx |
popl %ebp |
|
popl %ebx |
popl %ebx |
popl %esi |
|
|
movl %ecx, %eax |
popl %edi |
popl %edi |
|
|
|
popl %esi |
|
|
ret |
ret |
|
|
EPILOGUE() |
EPILOGUE() |