[BACK]Return to sqr_basecase.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / k6

Diff for /OpenXM_contrib/gmp/mpn/x86/k6/Attic/sqr_basecase.asm between version 1.1.1.1 and 1.1.1.2

version 1.1.1.1, 2000/09/09 14:12:42 version 1.1.1.2, 2003/08/25 16:06:28
Line 1 
Line 1 
 dnl  AMD K6 mpn_sqr_basecase -- square an mpn number.  dnl  AMD K6 mpn_sqr_basecase -- square an mpn number.
 dnl  
 dnl  K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular  
 dnl  product (measured on the speed difference between 17 and 33 limbs,  
 dnl  which is roughly the Karatsuba recursing range).  
   
   dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl  Copyright (C) 1999, 2000 Free Software Foundation, Inc.  
 dnl  dnl
 dnl  This file is part of the GNU MP Library.  dnl  This file is part of the GNU MP Library.
 dnl  dnl
Line 24  dnl  License along with the GNU MP Library; see the fi
Line 19  dnl  License along with the GNU MP Library; see the fi
 dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -  dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
 dnl  Suite 330, Boston, MA 02111-1307, USA.  dnl  Suite 330, Boston, MA 02111-1307, USA.
   
   
 include(`../config.m4')  include(`../config.m4')
   
   
 dnl  KARATSUBA_SQR_THRESHOLD_MAX is the maximum KARATSUBA_SQR_THRESHOLD this  C K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular
   C     product (measured on the speed difference between 17 and 33 limbs,
   C     which is roughly the Karatsuba recursing range).
   
   
   dnl  SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this
 dnl  code supports.  This value is used only by the tune program to know  dnl  code supports.  This value is used only by the tune program to know
 dnl  what it can go up to.  (An attempt to compile with a bigger value will  dnl  what it can go up to.  (An attempt to compile with a bigger value will
 dnl  trigger some m4_assert()s in the code, making the build fail.)  dnl  trigger some m4_assert()s in the code, making the build fail.)
 dnl  dnl
 dnl  The value is determined by requiring the displacements in the unrolled  dnl  The value is determined by requiring the displacements in the unrolled
 dnl  addmul to fit in single bytes.  This means a maximum UNROLL_COUNT of  dnl  addmul to fit in single bytes.  This means a maximum UNROLL_COUNT of
 dnl  63, giving a maximum KARATSUBA_SQR_THRESHOLD of 66.  dnl  63, giving a maximum SQR_KARATSUBA_THRESHOLD of 66.
   
 deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66)  deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66)
   
   
 dnl  Allow a value from the tune program to override config.m4.  dnl  Allow a value from the tune program to override config.m4.
   
 ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',  ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
 `define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')  `define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
   
   
 dnl  UNROLL_COUNT is the number of code chunks in the unrolled addmul.  The  dnl  UNROLL_COUNT is the number of code chunks in the unrolled addmul.  The
 dnl  number required is determined by KARATSUBA_SQR_THRESHOLD, since  dnl  number required is determined by SQR_KARATSUBA_THRESHOLD, since
 dnl  mpn_sqr_basecase only needs to handle sizes < KARATSUBA_SQR_THRESHOLD.  dnl  mpn_sqr_basecase only needs to handle sizes < SQR_KARATSUBA_THRESHOLD.
 dnl  dnl
 dnl  The first addmul is the biggest, and this takes the second least  dnl  The first addmul is the biggest, and this takes the second least
 dnl  significant limb and multiplies it by the third least significant and  dnl  significant limb and multiplies it by the third least significant and
 dnl  up.  Hence for a maximum operand size of KARATSUBA_SQR_THRESHOLD-1  dnl  up.  Hence for a maximum operand size of SQR_KARATSUBA_THRESHOLD-1
 dnl  limbs, UNROLL_COUNT needs to be KARATSUBA_SQR_THRESHOLD-3.  dnl  limbs, UNROLL_COUNT needs to be SQR_KARATSUBA_THRESHOLD-3.
   
 m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')  m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
 deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))  deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
   
   
 C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);  C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
Line 81  defframe(PARAM_SIZE,12)
Line 80  defframe(PARAM_SIZE,12)
 defframe(PARAM_SRC, 8)  defframe(PARAM_SRC, 8)
 defframe(PARAM_DST, 4)  defframe(PARAM_DST, 4)
   
         .text          TEXT
         ALIGN(32)          ALIGN(32)
 PROLOGUE(mpn_sqr_basecase)  PROLOGUE(mpn_sqr_basecase)
 deflit(`FRAME',0)  deflit(`FRAME',0)
Line 661  L(diag):
Line 660  L(diag):
 C -----------------------------------------------------------------------------  C -----------------------------------------------------------------------------
 ifdef(`PIC',`  ifdef(`PIC',`
 L(pic_calc):  L(pic_calc):
         C See README.family about old gas bugs          C See mpn/x86/README about old gas bugs
         addl    (%esp), %ecx          addl    (%esp), %ecx
         addl    $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx          addl    $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
         addl    %edx, %ecx          addl    %edx, %ecx

Legend:
Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>