[BACK]Return to sqr_basecase.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / k7

Diff for /OpenXM_contrib/gmp/mpn/x86/k7/Attic/sqr_basecase.asm between version 1.1.1.1 and 1.1.1.2

version 1.1.1.1, 2000/09/09 14:12:42 version 1.1.1.2, 2003/08/25 16:06:29
Line 1 
Line 1 
 dnl  AMD K7 mpn_sqr_basecase -- square an mpn number.  dnl  AMD K7 mpn_sqr_basecase -- square an mpn number.
 dnl  
 dnl  K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product  
 dnl  (measured on the speed difference between 25 and 50 limbs, which is  
 dnl  roughly the Karatsuba recursing range).  
   
   dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl  Copyright (C) 1999, 2000 Free Software Foundation, Inc.  
 dnl  dnl
 dnl  This file is part of the GNU MP Library.  dnl  This file is part of the GNU MP Library.
 dnl  dnl
Line 24  dnl  License along with the GNU MP Library; see the fi
Line 19  dnl  License along with the GNU MP Library; see the fi
 dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -  dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
 dnl  Suite 330, Boston, MA 02111-1307, USA.  dnl  Suite 330, Boston, MA 02111-1307, USA.
   
   
 include(`../config.m4')  include(`../config.m4')
   
   
   C K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product
   C     (measured on the speed difference between 25 and 50 limbs, which is
   C     roughly the Karatsuba recursing range).
   
   
 dnl  These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for  dnl  These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
 dnl  some comments.  dnl  some comments.
   
 deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66)  deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66)
   
 ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',  ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
 `define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')  `define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
   
 m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')  m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
 deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))  deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
   
   
 C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);  C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C  C
 C With a KARATSUBA_SQR_THRESHOLD around 50 this code is about 1500 bytes,  C With a SQR_KARATSUBA_THRESHOLD around 50 this code is about 1500 bytes,
 C which is quite a bit, but is considered good value since squares big  C which is quite a bit, but is considered good value since squares big
 C enough to use most of the code will be spending quite a few cycles in it.  C enough to use most of the code will be spending quite a few cycles in it.
   
Line 51  defframe(PARAM_SIZE,12)
Line 50  defframe(PARAM_SIZE,12)
 defframe(PARAM_SRC, 8)  defframe(PARAM_SRC, 8)
 defframe(PARAM_DST, 4)  defframe(PARAM_DST, 4)
   
         .text          TEXT
         ALIGN(32)          ALIGN(32)
 PROLOGUE(mpn_sqr_basecase)  PROLOGUE(mpn_sqr_basecase)
 deflit(`FRAME',0)  deflit(`FRAME',0)
Line 468  Zdisp(  movl, disp_src,(%esi), %eax)
Line 467  Zdisp(  movl, disp_src,(%esi), %eax)
   
         mull    %ebp          mull    %ebp
   
 dnl Zdisp(      addl    %ebx, disp_src,(%edi))  Zdisp(  addl,   %ebx, disp_dst,(%edi))
         addl    %ebx, disp_dst(%edi)  
 ifelse(forloop_last,0,  ifelse(forloop_last,0,
 `       movl    $0, %ebx')  `       movl    $0, %ebx')
   

Legend:
Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>