version 1.1.1.1, 2000/09/09 14:12:42 |
version 1.1.1.2, 2003/08/25 16:06:27 |
|
|
dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction. |
dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction. |
dnl |
|
dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb. |
|
|
|
|
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
|
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
dnl |
dnl |
Line 22 dnl License along with the GNU MP Library; see the fi |
|
Line 19 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. |
dnl Suite 330, Boston, MA 02111-1307, USA. |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
|
C K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb. |
|
|
|
|
ifdef(`OPERATION_add_n', ` |
ifdef(`OPERATION_add_n', ` |
define(M4_inst, adcl) |
define(M4_inst, adcl) |
define(M4_function_n, mpn_add_n) |
define(M4_function_n, mpn_add_n) |
Line 69 deflit(`FRAME',0) |
|
Line 68 deflit(`FRAME',0) |
|
dnl minimum 5 because the unrolled code can't handle less |
dnl minimum 5 because the unrolled code can't handle less |
deflit(UNROLL_THRESHOLD, 5) |
deflit(UNROLL_THRESHOLD, 5) |
|
|
.text |
TEXT |
ALIGN(32) |
ALIGN(32) |
|
|
PROLOGUE(M4_function_nc) |
PROLOGUE(M4_function_nc) |
movl PARAM_CARRY, %eax |
movl PARAM_CARRY, %eax |
jmp LF(M4_function_n,start) |
jmp L(start) |
EPILOGUE() |
EPILOGUE() |
|
|
|
|
|
|
C |
C |
C The store to (%edi) could be done with a stosl; it'd be smaller |
C The store to (%edi) could be done with a stosl; it'd be smaller |
C code, but there's no speed gain and a cld would have to be added |
C code, but there's no speed gain and a cld would have to be added |
C (per mpn/x86/README.family). |
C (per mpn/x86/README). |
|
|
movl (%ebx), %eax |
movl (%ebx), %eax |
leal 4(%ebx), %ebx |
leal 4(%ebx), %ebx |