Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/rshift.S, Revision 1.1
1.1 ! maekawa 1: /* Pentium optimized __mpn_rshift --
! 2:
! 3: Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: /*
! 23: INPUT PARAMETERS
! 24: res_ptr (sp + 4)
! 25: s_ptr (sp + 8)
! 26: size (sp + 12)
! 27: cnt (sp + 16)
! 28: */
! 29:
! 30: #include "sysdep.h"
! 31: #include "asm-syntax.h"
! 32:
! 33: .text
! 34: ALIGN (3)
! 35: .globl C_SYMBOL_NAME(__mpn_rshift)
! 36: C_SYMBOL_NAME(__mpn_rshift:)
! 37: pushl %edi
! 38: pushl %esi
! 39: pushl %ebx
! 40: pushl %ebp
! 41:
! 42: movl 20(%esp),%edi /* res_ptr */
! 43: movl 24(%esp),%esi /* s_ptr */
! 44: movl 28(%esp),%ebp /* size */
! 45: movl 32(%esp),%ecx /* cnt */
! 46:
! 47: /* We can use faster code for shift-by-1 under certain conditions. */
! 48: cmp $1,%ecx
! 49: jne Lnormal
! 50: leal 4(%edi),%eax
! 51: cmpl %esi,%eax
! 52: jnc Lspecial /* jump if res_ptr + 1 >= s_ptr */
! 53: leal (%edi,%ebp,4),%eax
! 54: cmpl %eax,%esi
! 55: jnc Lspecial /* jump if s_ptr >= res_ptr + size */
! 56:
! 57: Lnormal:
! 58: movl (%esi),%edx
! 59: addl $4,%esi
! 60: xorl %eax,%eax
! 61: shrdl %cl,%edx,%eax /* compute carry limb */
! 62: pushl %eax /* push carry limb onto stack */
! 63:
! 64: decl %ebp
! 65: pushl %ebp
! 66: shrl $3,%ebp
! 67: jz Lend
! 68:
! 69: movl (%edi),%eax /* fetch destination cache line */
! 70:
! 71: ALIGN (2)
! 72: Loop: movl 28(%edi),%eax /* fetch destination cache line */
! 73: movl %edx,%ebx
! 74:
! 75: movl (%esi),%eax
! 76: movl 4(%esi),%edx
! 77: shrdl %cl,%eax,%ebx
! 78: shrdl %cl,%edx,%eax
! 79: movl %ebx,(%edi)
! 80: movl %eax,4(%edi)
! 81:
! 82: movl 8(%esi),%ebx
! 83: movl 12(%esi),%eax
! 84: shrdl %cl,%ebx,%edx
! 85: shrdl %cl,%eax,%ebx
! 86: movl %edx,8(%edi)
! 87: movl %ebx,12(%edi)
! 88:
! 89: movl 16(%esi),%edx
! 90: movl 20(%esi),%ebx
! 91: shrdl %cl,%edx,%eax
! 92: shrdl %cl,%ebx,%edx
! 93: movl %eax,16(%edi)
! 94: movl %edx,20(%edi)
! 95:
! 96: movl 24(%esi),%eax
! 97: movl 28(%esi),%edx
! 98: shrdl %cl,%eax,%ebx
! 99: shrdl %cl,%edx,%eax
! 100: movl %ebx,24(%edi)
! 101: movl %eax,28(%edi)
! 102:
! 103: addl $32,%esi
! 104: addl $32,%edi
! 105: decl %ebp
! 106: jnz Loop
! 107:
! 108: Lend: popl %ebp
! 109: andl $7,%ebp
! 110: jz Lend2
! 111: Loop2: movl (%esi),%eax
! 112: shrdl %cl,%eax,%edx /* compute result limb */
! 113: movl %edx,(%edi)
! 114: movl %eax,%edx
! 115: addl $4,%esi
! 116: addl $4,%edi
! 117: decl %ebp
! 118: jnz Loop2
! 119:
! 120: Lend2: shrl %cl,%edx /* compute most significant limb */
! 121: movl %edx,(%edi) /* store it */
! 122:
! 123: popl %eax /* pop carry limb */
! 124:
! 125: popl %ebp
! 126: popl %ebx
! 127: popl %esi
! 128: popl %edi
! 129: ret
! 130:
! 131: /* We loop from least significant end of the arrays, which is only
! 132: permissable if the source and destination don't overlap, since the
! 133: function is documented to work for overlapping source and destination.
! 134: */
! 135:
! 136: Lspecial:
! 137: leal -4(%edi,%ebp,4),%edi
! 138: leal -4(%esi,%ebp,4),%esi
! 139:
! 140: movl (%esi),%edx
! 141: subl $4,%esi
! 142:
! 143: decl %ebp
! 144: pushl %ebp
! 145: shrl $3,%ebp
! 146:
! 147: shrl $1,%edx
! 148: incl %ebp
! 149: decl %ebp
! 150: jz LLend
! 151:
! 152: movl (%edi),%eax /* fetch destination cache line */
! 153:
! 154: ALIGN (2)
! 155: LLoop: movl -28(%edi),%eax /* fetch destination cache line */
! 156: movl %edx,%ebx
! 157:
! 158: movl (%esi),%eax
! 159: movl -4(%esi),%edx
! 160: rcrl $1,%eax
! 161: movl %ebx,(%edi)
! 162: rcrl $1,%edx
! 163: movl %eax,-4(%edi)
! 164:
! 165: movl -8(%esi),%ebx
! 166: movl -12(%esi),%eax
! 167: rcrl $1,%ebx
! 168: movl %edx,-8(%edi)
! 169: rcrl $1,%eax
! 170: movl %ebx,-12(%edi)
! 171:
! 172: movl -16(%esi),%edx
! 173: movl -20(%esi),%ebx
! 174: rcrl $1,%edx
! 175: movl %eax,-16(%edi)
! 176: rcrl $1,%ebx
! 177: movl %edx,-20(%edi)
! 178:
! 179: movl -24(%esi),%eax
! 180: movl -28(%esi),%edx
! 181: rcrl $1,%eax
! 182: movl %ebx,-24(%edi)
! 183: rcrl $1,%edx
! 184: movl %eax,-28(%edi)
! 185:
! 186: leal -32(%esi),%esi /* use leal not to clobber carry */
! 187: leal -32(%edi),%edi
! 188: decl %ebp
! 189: jnz LLoop
! 190:
! 191: LLend: popl %ebp
! 192: sbbl %eax,%eax /* save carry in %eax */
! 193: andl $7,%ebp
! 194: jz LLend2
! 195: addl %eax,%eax /* restore carry from eax */
! 196: LLoop2: movl %edx,%ebx
! 197: movl (%esi),%edx
! 198: rcrl $1,%edx
! 199: movl %ebx,(%edi)
! 200:
! 201: leal -4(%esi),%esi /* use leal not to clobber carry */
! 202: leal -4(%edi),%edi
! 203: decl %ebp
! 204: jnz LLoop2
! 205:
! 206: jmp LL1
! 207: LLend2: addl %eax,%eax /* restore carry from eax */
! 208: LL1: movl %edx,(%edi) /* store last limb */
! 209:
! 210: movl $0,%eax
! 211: rcrl $1,%eax
! 212:
! 213: popl %ebp
! 214: popl %ebx
! 215: popl %esi
! 216: popl %edi
! 217: ret
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>