version 1.1.1.1, 2000/09/09 14:12:42 |
version 1.1.1.2, 2003/08/25 16:06:29 |
|
|
dnl AMD K7 mpn_sqr_basecase -- square an mpn number. |
dnl AMD K7 mpn_sqr_basecase -- square an mpn number. |
dnl |
|
dnl K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product |
|
dnl (measured on the speed difference between 25 and 50 limbs, which is |
|
dnl roughly the Karatsuba recursing range). |
|
|
|
|
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
|
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
dnl |
dnl |
Line 24 dnl License along with the GNU MP Library; see the fi |
|
Line 19 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. |
dnl Suite 330, Boston, MA 02111-1307, USA. |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
|
C K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product |
|
C (measured on the speed difference between 25 and 50 limbs, which is |
|
C roughly the Karatsuba recursing range). |
|
|
|
|
dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for |
dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for |
dnl some comments. |
dnl some comments. |
|
|
deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66) |
deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66) |
|
|
ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE', |
ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE', |
`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)') |
`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)') |
|
|
m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD') |
m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD') |
deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3)) |
deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3)) |
|
|
|
|
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); |
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); |
C |
C |
C With a KARATSUBA_SQR_THRESHOLD around 50 this code is about 1500 bytes, |
C With a SQR_KARATSUBA_THRESHOLD around 50 this code is about 1500 bytes, |
C which is quite a bit, but is considered good value since squares big |
C which is quite a bit, but is considered good value since squares big |
C enough to use most of the code will be spending quite a few cycles in it. |
C enough to use most of the code will be spending quite a few cycles in it. |
|
|
Line 51 defframe(PARAM_SIZE,12) |
|
Line 50 defframe(PARAM_SIZE,12) |
|
defframe(PARAM_SRC, 8) |
defframe(PARAM_SRC, 8) |
defframe(PARAM_DST, 4) |
defframe(PARAM_DST, 4) |
|
|
.text |
TEXT |
ALIGN(32) |
ALIGN(32) |
PROLOGUE(mpn_sqr_basecase) |
PROLOGUE(mpn_sqr_basecase) |
deflit(`FRAME',0) |
deflit(`FRAME',0) |
Line 468 Zdisp( movl, disp_src,(%esi), %eax) |
|
Line 467 Zdisp( movl, disp_src,(%esi), %eax) |
|
|
|
mull %ebp |
mull %ebp |
|
|
dnl Zdisp( addl %ebx, disp_src,(%edi)) |
Zdisp( addl, %ebx, disp_dst,(%edi)) |
addl %ebx, disp_dst(%edi) |
|
ifelse(forloop_last,0, |
ifelse(forloop_last,0, |
` movl $0, %ebx') |
` movl $0, %ebx') |
|
|