[BACK]Return to copyi.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86

Diff for /OpenXM_contrib/gmp/mpn/x86/Attic/copyi.asm between version 1.1.1.1 and 1.1.1.2

version 1.1.1.1, 2000/09/09 14:12:42 version 1.1.1.2, 2003/08/25 16:06:27
Line 1 
Line 1 
 dnl  x86 mpn_copyi -- copy limb vector, incrementing.  dnl  x86 mpn_copyi -- copy limb vector, incrementing.
   
   dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl  Copyright (C) 1999, 2000 Free Software Foundation, Inc.  
 dnl  dnl
 dnl  This file is part of the GNU MP Library.  dnl  This file is part of the GNU MP Library.
 dnl  dnl
Line 20  dnl  License along with the GNU MP Library; see the fi
Line 19  dnl  License along with the GNU MP Library; see the fi
 dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -  dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
 dnl  Suite 330, Boston, MA 02111-1307, USA.  dnl  Suite 330, Boston, MA 02111-1307, USA.
   
   
 include(`../config.m4')  include(`../config.m4')
   
   
   C     cycles/limb  startup (approx)
   C P5:     1.0         35
   C P6      0.75        45
   C K6      1.0         30
   C K7:     1.3         65
   C P4:     1.0        120
   C
   C (Startup time includes some function call overheads.)
   
   
 C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);  C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C  C
 C Copy src,size to dst,size, working from low to high addresses.  C Copy src,size to dst,size, working from low to high addresses.
Line 31  C
Line 39  C
 C The code here is very generic and can be expected to be reasonable on all  C The code here is very generic and can be expected to be reasonable on all
 C the x86 family.  C the x86 family.
 C  C
 C P5 - 1.0 cycles/limb.  C P6 -  An MMX based copy was tried, but was found to be slower than a rep
   C       movs in all cases.  The fastest MMX found was 0.8 cycles/limb (when
   C       fully aligned).  A rep movs seems to have a startup time of about 15
   C       cycles, but doing something special for small sizes could lead to a
   C       branch misprediction that would destroy any saving.  For now a plain
   C       rep movs seems ok.
 C  C
 C P6 - 0.75 cycles/limb.  An MMX based copy was tried, but was found to be  C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
 C      slower than a rep movs in all cases.  The fastest MMX found was 0.8  C       aligned or a 1.0 rep movs if not.  But that seemed excessive since
 C      cycles/limb (when fully aligned).  A rep movs seems to have a startup  C       it only got an advantage half the time, and even then only showed it
 C      time of about 15 cycles, but doing something special for small sizes  C       above 50 limbs or so.
 C      could lead to a branch misprediction that would destroy any saving.  
 C      For now a plain rep movs seems ok for P6.  
   
 defframe(PARAM_SIZE,12)  defframe(PARAM_SIZE,12)
 defframe(PARAM_SRC, 8)  defframe(PARAM_SRC, 8)
 defframe(PARAM_DST, 4)  defframe(PARAM_DST, 4)
 deflit(`FRAME',0)  deflit(`FRAME',0)
   
         .text          TEXT
         ALIGN(32)          ALIGN(32)
   
         C eax   saved esi          C eax   saved esi
Line 66  PROLOGUE(mpn_copyi)
Line 77  PROLOGUE(mpn_copyi)
   
         movl    PARAM_DST, %edi          movl    PARAM_DST, %edi
   
         cld     C better safe than sorry, see mpn/x86/README.family          cld     C better safe than sorry, see mpn/x86/README
   
         rep          rep
         movsl          movsl

Legend:
Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>