Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/lshift.asm, Revision 1.1
1.1 ! maekawa 1: dnl Intel Pentium mpn_lshift -- mpn left shift.
! 2: dnl
! 3: dnl cycles/limb
! 4: dnl P5,P54: 6.0
! 5: dnl P55: 5.375
! 6:
! 7:
! 8: dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
! 9: dnl Foundation, Inc.
! 10: dnl
! 11: dnl This file is part of the GNU MP Library.
! 12: dnl
! 13: dnl The GNU MP Library is free software; you can redistribute it and/or
! 14: dnl modify it under the terms of the GNU Lesser General Public License as
! 15: dnl published by the Free Software Foundation; either version 2.1 of the
! 16: dnl License, or (at your option) any later version.
! 17: dnl
! 18: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 19: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 20: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 21: dnl Lesser General Public License for more details.
! 22: dnl
! 23: dnl You should have received a copy of the GNU Lesser General Public
! 24: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 25: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 26: dnl Suite 330, Boston, MA 02111-1307, USA.
! 27:
! 28:
! 29: include(`../config.m4')
! 30:
! 31:
! 32: C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
! 33: C unsigned shift);
! 34: C
! 35: C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
! 36: C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
! 37:
! 38: defframe(PARAM_SHIFT,16)
! 39: defframe(PARAM_SIZE, 12)
! 40: defframe(PARAM_SRC, 8)
! 41: defframe(PARAM_DST, 4)
! 42:
! 43: .text
! 44: ALIGN(8)
! 45: PROLOGUE(mpn_lshift)
! 46:
! 47: pushl %edi
! 48: pushl %esi
! 49: pushl %ebx
! 50: pushl %ebp
! 51: deflit(`FRAME',16)
! 52:
! 53: movl PARAM_DST,%edi
! 54: movl PARAM_SRC,%esi
! 55: movl PARAM_SIZE,%ebp
! 56: movl PARAM_SHIFT,%ecx
! 57:
! 58: C We can use faster code for shift-by-1 under certain conditions.
! 59: cmp $1,%ecx
! 60: jne L(normal)
! 61: leal 4(%esi),%eax
! 62: cmpl %edi,%eax
! 63: jnc L(special) C jump if s_ptr + 1 >= res_ptr
! 64: leal (%esi,%ebp,4),%eax
! 65: cmpl %eax,%edi
! 66: jnc L(special) C jump if res_ptr >= s_ptr + size
! 67:
! 68: L(normal):
! 69: leal -4(%edi,%ebp,4),%edi
! 70: leal -4(%esi,%ebp,4),%esi
! 71:
! 72: movl (%esi),%edx
! 73: subl $4,%esi
! 74: xorl %eax,%eax
! 75: shldl( %cl, %edx, %eax) C compute carry limb
! 76: pushl %eax C push carry limb onto stack
! 77:
! 78: decl %ebp
! 79: pushl %ebp
! 80: shrl $3,%ebp
! 81: jz L(end)
! 82:
! 83: movl (%edi),%eax C fetch destination cache line
! 84:
! 85: ALIGN(4)
! 86: L(oop): movl -28(%edi),%eax C fetch destination cache line
! 87: movl %edx,%ebx
! 88:
! 89: movl (%esi),%eax
! 90: movl -4(%esi),%edx
! 91: shldl( %cl, %eax, %ebx)
! 92: shldl( %cl, %edx, %eax)
! 93: movl %ebx,(%edi)
! 94: movl %eax,-4(%edi)
! 95:
! 96: movl -8(%esi),%ebx
! 97: movl -12(%esi),%eax
! 98: shldl( %cl, %ebx, %edx)
! 99: shldl( %cl, %eax, %ebx)
! 100: movl %edx,-8(%edi)
! 101: movl %ebx,-12(%edi)
! 102:
! 103: movl -16(%esi),%edx
! 104: movl -20(%esi),%ebx
! 105: shldl( %cl, %edx, %eax)
! 106: shldl( %cl, %ebx, %edx)
! 107: movl %eax,-16(%edi)
! 108: movl %edx,-20(%edi)
! 109:
! 110: movl -24(%esi),%eax
! 111: movl -28(%esi),%edx
! 112: shldl( %cl, %eax, %ebx)
! 113: shldl( %cl, %edx, %eax)
! 114: movl %ebx,-24(%edi)
! 115: movl %eax,-28(%edi)
! 116:
! 117: subl $32,%esi
! 118: subl $32,%edi
! 119: decl %ebp
! 120: jnz L(oop)
! 121:
! 122: L(end): popl %ebp
! 123: andl $7,%ebp
! 124: jz L(end2)
! 125: L(oop2):
! 126: movl (%esi),%eax
! 127: shldl( %cl,%eax,%edx)
! 128: movl %edx,(%edi)
! 129: movl %eax,%edx
! 130: subl $4,%esi
! 131: subl $4,%edi
! 132: decl %ebp
! 133: jnz L(oop2)
! 134:
! 135: L(end2):
! 136: shll %cl,%edx C compute least significant limb
! 137: movl %edx,(%edi) C store it
! 138:
! 139: popl %eax C pop carry limb
! 140:
! 141: popl %ebp
! 142: popl %ebx
! 143: popl %esi
! 144: popl %edi
! 145: ret
! 146:
! 147:
! 148: C We loop from least significant end of the arrays, which is only
! 149: C permissable if the source and destination don't overlap, since the
! 150: C function is documented to work for overlapping source and destination.
! 151:
! 152: L(special):
! 153: movl (%esi),%edx
! 154: addl $4,%esi
! 155:
! 156: decl %ebp
! 157: pushl %ebp
! 158: shrl $3,%ebp
! 159:
! 160: addl %edx,%edx
! 161: incl %ebp
! 162: decl %ebp
! 163: jz L(Lend)
! 164:
! 165: movl (%edi),%eax C fetch destination cache line
! 166:
! 167: ALIGN(4)
! 168: L(Loop):
! 169: movl 28(%edi),%eax C fetch destination cache line
! 170: movl %edx,%ebx
! 171:
! 172: movl (%esi),%eax
! 173: movl 4(%esi),%edx
! 174: adcl %eax,%eax
! 175: movl %ebx,(%edi)
! 176: adcl %edx,%edx
! 177: movl %eax,4(%edi)
! 178:
! 179: movl 8(%esi),%ebx
! 180: movl 12(%esi),%eax
! 181: adcl %ebx,%ebx
! 182: movl %edx,8(%edi)
! 183: adcl %eax,%eax
! 184: movl %ebx,12(%edi)
! 185:
! 186: movl 16(%esi),%edx
! 187: movl 20(%esi),%ebx
! 188: adcl %edx,%edx
! 189: movl %eax,16(%edi)
! 190: adcl %ebx,%ebx
! 191: movl %edx,20(%edi)
! 192:
! 193: movl 24(%esi),%eax
! 194: movl 28(%esi),%edx
! 195: adcl %eax,%eax
! 196: movl %ebx,24(%edi)
! 197: adcl %edx,%edx
! 198: movl %eax,28(%edi)
! 199:
! 200: leal 32(%esi),%esi C use leal not to clobber carry
! 201: leal 32(%edi),%edi
! 202: decl %ebp
! 203: jnz L(Loop)
! 204:
! 205: L(Lend):
! 206: popl %ebp
! 207: sbbl %eax,%eax C save carry in %eax
! 208: andl $7,%ebp
! 209: jz L(Lend2)
! 210: addl %eax,%eax C restore carry from eax
! 211: L(Loop2):
! 212: movl %edx,%ebx
! 213: movl (%esi),%edx
! 214: adcl %edx,%edx
! 215: movl %ebx,(%edi)
! 216:
! 217: leal 4(%esi),%esi C use leal not to clobber carry
! 218: leal 4(%edi),%edi
! 219: decl %ebp
! 220: jnz L(Loop2)
! 221:
! 222: jmp L(L1)
! 223: L(Lend2):
! 224: addl %eax,%eax C restore carry from eax
! 225: L(L1): movl %edx,(%edi) C store last limb
! 226:
! 227: sbbl %eax,%eax
! 228: negl %eax
! 229:
! 230: popl %ebp
! 231: popl %ebx
! 232: popl %esi
! 233: popl %edi
! 234: ret
! 235:
! 236: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>