=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/x86/k6/Attic/sqr_basecase.asm,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/x86/k6/Attic/sqr_basecase.asm 2000/09/09 14:12:42 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/x86/k6/Attic/sqr_basecase.asm 2003/08/25 16:06:28 1.1.1.2 @@ -1,11 +1,6 @@ dnl AMD K6 mpn_sqr_basecase -- square an mpn number. -dnl -dnl K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular -dnl product (measured on the speed difference between 17 and 33 limbs, -dnl which is roughly the Karatsuba recursing range). - -dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -24,39 +19,43 @@ dnl License along with the GNU MP Library; see the fi dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') -dnl KARATSUBA_SQR_THRESHOLD_MAX is the maximum KARATSUBA_SQR_THRESHOLD this +C K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular +C product (measured on the speed difference between 17 and 33 limbs, +C which is roughly the Karatsuba recursing range). + + +dnl SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this dnl code supports. This value is used only by the tune program to know dnl what it can go up to. (An attempt to compile with a bigger value will dnl trigger some m4_assert()s in the code, making the build fail.) dnl dnl The value is determined by requiring the displacements in the unrolled dnl addmul to fit in single bytes. This means a maximum UNROLL_COUNT of -dnl 63, giving a maximum KARATSUBA_SQR_THRESHOLD of 66. +dnl 63, giving a maximum SQR_KARATSUBA_THRESHOLD of 66. -deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66) +deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66) dnl Allow a value from the tune program to override config.m4. -ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE', -`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)') +ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE', +`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)') dnl UNROLL_COUNT is the number of code chunks in the unrolled addmul. The -dnl number required is determined by KARATSUBA_SQR_THRESHOLD, since -dnl mpn_sqr_basecase only needs to handle sizes < KARATSUBA_SQR_THRESHOLD. +dnl number required is determined by SQR_KARATSUBA_THRESHOLD, since +dnl mpn_sqr_basecase only needs to handle sizes < SQR_KARATSUBA_THRESHOLD. dnl dnl The first addmul is the biggest, and this takes the second least dnl significant limb and multiplies it by the third least significant and -dnl up. Hence for a maximum operand size of KARATSUBA_SQR_THRESHOLD-1 -dnl limbs, UNROLL_COUNT needs to be KARATSUBA_SQR_THRESHOLD-3. +dnl up. Hence for a maximum operand size of SQR_KARATSUBA_THRESHOLD-1 +dnl limbs, UNROLL_COUNT needs to be SQR_KARATSUBA_THRESHOLD-3. -m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD') -deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3)) +m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD') +deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3)) C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); @@ -81,7 +80,7 @@ defframe(PARAM_SIZE,12) defframe(PARAM_SRC, 8) defframe(PARAM_DST, 4) - .text + TEXT ALIGN(32) PROLOGUE(mpn_sqr_basecase) deflit(`FRAME',0) @@ -661,7 +660,7 @@ L(diag): C ----------------------------------------------------------------------------- ifdef(`PIC',` L(pic_calc): - C See README.family about old gas bugs + C See mpn/x86/README about old gas bugs addl (%esp), %ecx addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx addl %edx, %ecx