version 1.1.1.1, 2000/09/09 14:12:44 |
version 1.1.1.2, 2003/08/25 16:06:29 |
|
|
dnl Intel P6 mpn_sqr_basecase -- square an mpn number. |
dnl Intel P6 mpn_sqr_basecase -- square an mpn number. |
dnl |
|
dnl P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular |
|
dnl product (measured on the speed difference between 20 and 40 limbs, |
|
dnl which is the Karatsuba recursing range). |
|
|
|
|
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. |
dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
|
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
dnl |
dnl |
Line 24 dnl License along with the GNU MP Library; see the fi |
|
Line 19 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. |
dnl Suite 330, Boston, MA 02111-1307, USA. |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
|
C P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular |
|
C product (measured on the speed difference between 20 and 40 limbs, |
|
C which is the Karatsuba recursing range). |
|
|
|
|
dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for |
dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for |
dnl a description. The only difference here is that UNROLL_COUNT can go up |
dnl a description. The only difference here is that UNROLL_COUNT can go up |
dnl to 64 (not 63) making KARATSUBA_SQR_THRESHOLD_MAX 67. |
dnl to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67. |
|
|
deflit(KARATSUBA_SQR_THRESHOLD_MAX, 67) |
deflit(SQR_KARATSUBA_THRESHOLD_MAX, 67) |
|
|
ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE', |
ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE', |
`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)') |
`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)') |
|
|
m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD') |
m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD') |
deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3)) |
deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3)) |
|
|
|
|
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); |
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); |
Line 58 defframe(PARAM_SIZE,12) |
|
Line 57 defframe(PARAM_SIZE,12) |
|
defframe(PARAM_SRC, 8) |
defframe(PARAM_SRC, 8) |
defframe(PARAM_DST, 4) |
defframe(PARAM_DST, 4) |
|
|
.text |
TEXT |
ALIGN(32) |
ALIGN(32) |
PROLOGUE(mpn_sqr_basecase) |
PROLOGUE(mpn_sqr_basecase) |
deflit(`FRAME',0) |
deflit(`FRAME',0) |