Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/rshift.asm, Revision 1.1
1.1 ! maekawa 1: dnl Intel Pentium mpn_rshift -- mpn right shift.
! 2: dnl
! 3: dnl cycles/limb
! 4: dnl P5,P54: 6.0
! 5: dnl P55: 5.375
! 6:
! 7:
! 8: dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
! 9: dnl Foundation, Inc.
! 10: dnl
! 11: dnl This file is part of the GNU MP Library.
! 12: dnl
! 13: dnl The GNU MP Library is free software; you can redistribute it and/or
! 14: dnl modify it under the terms of the GNU Lesser General Public License as
! 15: dnl published by the Free Software Foundation; either version 2.1 of the
! 16: dnl License, or (at your option) any later version.
! 17: dnl
! 18: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 19: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 20: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 21: dnl Lesser General Public License for more details.
! 22: dnl
! 23: dnl You should have received a copy of the GNU Lesser General Public
! 24: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 25: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 26: dnl Suite 330, Boston, MA 02111-1307, USA.
! 27:
! 28:
! 29: include(`../config.m4')
! 30:
! 31:
! 32: C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
! 33: C unsigned shift);
! 34: C
! 35: C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
! 36: C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
! 37:
! 38: defframe(PARAM_SHIFT,16)
! 39: defframe(PARAM_SIZE, 12)
! 40: defframe(PARAM_SRC, 8)
! 41: defframe(PARAM_DST, 4)
! 42:
! 43: .text
! 44: ALIGN(8)
! 45: PROLOGUE(mpn_rshift)
! 46:
! 47: pushl %edi
! 48: pushl %esi
! 49: pushl %ebx
! 50: pushl %ebp
! 51: deflit(`FRAME',16)
! 52:
! 53: movl PARAM_DST,%edi
! 54: movl PARAM_SRC,%esi
! 55: movl PARAM_SIZE,%ebp
! 56: movl PARAM_SHIFT,%ecx
! 57:
! 58: C We can use faster code for shift-by-1 under certain conditions.
! 59: cmp $1,%ecx
! 60: jne L(normal)
! 61: leal 4(%edi),%eax
! 62: cmpl %esi,%eax
! 63: jnc L(special) C jump if res_ptr + 1 >= s_ptr
! 64: leal (%edi,%ebp,4),%eax
! 65: cmpl %eax,%esi
! 66: jnc L(special) C jump if s_ptr >= res_ptr + size
! 67:
! 68: L(normal):
! 69: movl (%esi),%edx
! 70: addl $4,%esi
! 71: xorl %eax,%eax
! 72: shrdl( %cl, %edx, %eax) C compute carry limb
! 73: pushl %eax C push carry limb onto stack
! 74:
! 75: decl %ebp
! 76: pushl %ebp
! 77: shrl $3,%ebp
! 78: jz L(end)
! 79:
! 80: movl (%edi),%eax C fetch destination cache line
! 81:
! 82: ALIGN(4)
! 83: L(oop): movl 28(%edi),%eax C fetch destination cache line
! 84: movl %edx,%ebx
! 85:
! 86: movl (%esi),%eax
! 87: movl 4(%esi),%edx
! 88: shrdl( %cl, %eax, %ebx)
! 89: shrdl( %cl, %edx, %eax)
! 90: movl %ebx,(%edi)
! 91: movl %eax,4(%edi)
! 92:
! 93: movl 8(%esi),%ebx
! 94: movl 12(%esi),%eax
! 95: shrdl( %cl, %ebx, %edx)
! 96: shrdl( %cl, %eax, %ebx)
! 97: movl %edx,8(%edi)
! 98: movl %ebx,12(%edi)
! 99:
! 100: movl 16(%esi),%edx
! 101: movl 20(%esi),%ebx
! 102: shrdl( %cl, %edx, %eax)
! 103: shrdl( %cl, %ebx, %edx)
! 104: movl %eax,16(%edi)
! 105: movl %edx,20(%edi)
! 106:
! 107: movl 24(%esi),%eax
! 108: movl 28(%esi),%edx
! 109: shrdl( %cl, %eax, %ebx)
! 110: shrdl( %cl, %edx, %eax)
! 111: movl %ebx,24(%edi)
! 112: movl %eax,28(%edi)
! 113:
! 114: addl $32,%esi
! 115: addl $32,%edi
! 116: decl %ebp
! 117: jnz L(oop)
! 118:
! 119: L(end): popl %ebp
! 120: andl $7,%ebp
! 121: jz L(end2)
! 122: L(oop2):
! 123: movl (%esi),%eax
! 124: shrdl( %cl,%eax,%edx) C compute result limb
! 125: movl %edx,(%edi)
! 126: movl %eax,%edx
! 127: addl $4,%esi
! 128: addl $4,%edi
! 129: decl %ebp
! 130: jnz L(oop2)
! 131:
! 132: L(end2):
! 133: shrl %cl,%edx C compute most significant limb
! 134: movl %edx,(%edi) C store it
! 135:
! 136: popl %eax C pop carry limb
! 137:
! 138: popl %ebp
! 139: popl %ebx
! 140: popl %esi
! 141: popl %edi
! 142: ret
! 143:
! 144:
! 145: C We loop from least significant end of the arrays, which is only
! 146: C permissable if the source and destination don't overlap, since the
! 147: C function is documented to work for overlapping source and destination.
! 148:
! 149: L(special):
! 150: leal -4(%edi,%ebp,4),%edi
! 151: leal -4(%esi,%ebp,4),%esi
! 152:
! 153: movl (%esi),%edx
! 154: subl $4,%esi
! 155:
! 156: decl %ebp
! 157: pushl %ebp
! 158: shrl $3,%ebp
! 159:
! 160: shrl %edx
! 161: incl %ebp
! 162: decl %ebp
! 163: jz L(Lend)
! 164:
! 165: movl (%edi),%eax C fetch destination cache line
! 166:
! 167: ALIGN(4)
! 168: L(Loop):
! 169: movl -28(%edi),%eax C fetch destination cache line
! 170: movl %edx,%ebx
! 171:
! 172: movl (%esi),%eax
! 173: movl -4(%esi),%edx
! 174: rcrl %eax
! 175: movl %ebx,(%edi)
! 176: rcrl %edx
! 177: movl %eax,-4(%edi)
! 178:
! 179: movl -8(%esi),%ebx
! 180: movl -12(%esi),%eax
! 181: rcrl %ebx
! 182: movl %edx,-8(%edi)
! 183: rcrl %eax
! 184: movl %ebx,-12(%edi)
! 185:
! 186: movl -16(%esi),%edx
! 187: movl -20(%esi),%ebx
! 188: rcrl %edx
! 189: movl %eax,-16(%edi)
! 190: rcrl %ebx
! 191: movl %edx,-20(%edi)
! 192:
! 193: movl -24(%esi),%eax
! 194: movl -28(%esi),%edx
! 195: rcrl %eax
! 196: movl %ebx,-24(%edi)
! 197: rcrl %edx
! 198: movl %eax,-28(%edi)
! 199:
! 200: leal -32(%esi),%esi C use leal not to clobber carry
! 201: leal -32(%edi),%edi
! 202: decl %ebp
! 203: jnz L(Loop)
! 204:
! 205: L(Lend):
! 206: popl %ebp
! 207: sbbl %eax,%eax C save carry in %eax
! 208: andl $7,%ebp
! 209: jz L(Lend2)
! 210: addl %eax,%eax C restore carry from eax
! 211: L(Loop2):
! 212: movl %edx,%ebx
! 213: movl (%esi),%edx
! 214: rcrl %edx
! 215: movl %ebx,(%edi)
! 216:
! 217: leal -4(%esi),%esi C use leal not to clobber carry
! 218: leal -4(%edi),%edi
! 219: decl %ebp
! 220: jnz L(Loop2)
! 221:
! 222: jmp L(L1)
! 223: L(Lend2):
! 224: addl %eax,%eax C restore carry from eax
! 225: L(L1): movl %edx,(%edi) C store last limb
! 226:
! 227: movl $0,%eax
! 228: rcrl %eax
! 229:
! 230: popl %ebp
! 231: popl %ebx
! 232: popl %esi
! 233: popl %edi
! 234: ret
! 235:
! 236: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>