Annotation of OpenXM_contrib/gmp/mpn/x86/pentium4/copyi.asm, Revision 1.1
1.1 ! ohara 1: dnl Pentium-4 mpn_copyi -- copy limb vector, incrementing.
! 2: dnl
! 3:
! 4: dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
! 5: dnl
! 6: dnl This file is part of the GNU MP Library.
! 7: dnl
! 8: dnl The GNU MP Library is free software; you can redistribute it and/or
! 9: dnl modify it under the terms of the GNU Lesser General Public License as
! 10: dnl published by the Free Software Foundation; either version 2.1 of the
! 11: dnl License, or (at your option) any later version.
! 12: dnl
! 13: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 14: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 15: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 16: dnl Lesser General Public License for more details.
! 17: dnl
! 18: dnl You should have received a copy of the GNU Lesser General Public
! 19: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 20: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 21: dnl Suite 330, Boston, MA 02111-1307, USA.
! 22:
! 23:
! 24: dnl The rep/movsl is very slow for small blocks on pentium4. Its startup
! 25: dnl time seems to be about 110 cycles. It then copies at a rate of one
! 26: dnl limb per cycle. We therefore fall back to an open-coded 2 c/l copying
! 27: dnl loop for smaller sizes.
! 28:
! 29: dnl Ultimately, we may want to use 64-bit movd or 128-bit movdqu in some
! 30: dnl nifty unrolled arrangement. Clearly, that could reach much higher
! 31: dnl speeds, at least for large blocks.
! 32:
! 33: include(`../config.m4')
! 34:
! 35:
! 36: defframe(PARAM_SIZE, 12)
! 37: defframe(PARAM_SRC, 8)
! 38: defframe(PARAM_DST, 4)
! 39:
! 40: TEXT
! 41: ALIGN(8)
! 42:
! 43: PROLOGUE(mpn_copyi)
! 44: deflit(`FRAME',0)
! 45:
! 46: movl PARAM_SIZE, %ecx
! 47: cmpl $150, %ecx
! 48: jg L(replmovs)
! 49:
! 50: movl PARAM_SRC, %eax
! 51: movl PARAM_DST, %edx
! 52: movl %ebx, PARAM_SIZE
! 53: testl %ecx, %ecx
! 54: jz L(end)
! 55:
! 56: L(loop):
! 57: movl (%eax), %ebx
! 58: leal 4(%eax), %eax
! 59: addl $-1, %ecx
! 60: movl %ebx, (%edx)
! 61: leal 4(%edx), %edx
! 62:
! 63: jnz L(loop)
! 64:
! 65: L(end):
! 66: movl PARAM_SIZE, %ebx
! 67: ret
! 68:
! 69: L(replmovs):
! 70: cld C better safe than sorry, see mpn/x86/README
! 71:
! 72: movl %esi, %eax
! 73: movl PARAM_SRC, %esi
! 74: movl %edi, %edx
! 75: movl PARAM_DST, %edi
! 76:
! 77: rep
! 78: movsl
! 79:
! 80: movl %eax, %esi
! 81: movl %edx, %edi
! 82:
! 83: ret
! 84:
! 85: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>