Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/lshift.S, Revision 1.1
1.1 ! maekawa 1: /* Pentium optimized __mpn_lshift --
! 2:
! 3: Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: /*
! 23: INPUT PARAMETERS
! 24: res_ptr (sp + 4)
! 25: s_ptr (sp + 8)
! 26: size (sp + 12)
! 27: cnt (sp + 16)
! 28: */
! 29:
! 30: #include "sysdep.h"
! 31: #include "asm-syntax.h"
! 32:
! 33: .text
! 34: ALIGN (3)
! 35: .globl C_SYMBOL_NAME(__mpn_lshift)
! 36: C_SYMBOL_NAME(__mpn_lshift:)
! 37: pushl %edi
! 38: pushl %esi
! 39: pushl %ebx
! 40: pushl %ebp
! 41:
! 42: movl 20(%esp),%edi /* res_ptr */
! 43: movl 24(%esp),%esi /* s_ptr */
! 44: movl 28(%esp),%ebp /* size */
! 45: movl 32(%esp),%ecx /* cnt */
! 46:
! 47: /* We can use faster code for shift-by-1 under certain conditions. */
! 48: cmp $1,%ecx
! 49: jne Lnormal
! 50: leal 4(%esi),%eax
! 51: cmpl %edi,%eax
! 52: jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
! 53: leal (%esi,%ebp,4),%eax
! 54: cmpl %eax,%edi
! 55: jnc Lspecial /* jump if res_ptr >= s_ptr + size */
! 56:
! 57: Lnormal:
! 58: leal -4(%edi,%ebp,4),%edi
! 59: leal -4(%esi,%ebp,4),%esi
! 60:
! 61: movl (%esi),%edx
! 62: subl $4,%esi
! 63: xorl %eax,%eax
! 64: shldl %cl,%edx,%eax /* compute carry limb */
! 65: pushl %eax /* push carry limb onto stack */
! 66:
! 67: decl %ebp
! 68: pushl %ebp
! 69: shrl $3,%ebp
! 70: jz Lend
! 71:
! 72: movl (%edi),%eax /* fetch destination cache line */
! 73:
! 74: ALIGN (2)
! 75: Loop: movl -28(%edi),%eax /* fetch destination cache line */
! 76: movl %edx,%ebx
! 77:
! 78: movl (%esi),%eax
! 79: movl -4(%esi),%edx
! 80: shldl %cl,%eax,%ebx
! 81: shldl %cl,%edx,%eax
! 82: movl %ebx,(%edi)
! 83: movl %eax,-4(%edi)
! 84:
! 85: movl -8(%esi),%ebx
! 86: movl -12(%esi),%eax
! 87: shldl %cl,%ebx,%edx
! 88: shldl %cl,%eax,%ebx
! 89: movl %edx,-8(%edi)
! 90: movl %ebx,-12(%edi)
! 91:
! 92: movl -16(%esi),%edx
! 93: movl -20(%esi),%ebx
! 94: shldl %cl,%edx,%eax
! 95: shldl %cl,%ebx,%edx
! 96: movl %eax,-16(%edi)
! 97: movl %edx,-20(%edi)
! 98:
! 99: movl -24(%esi),%eax
! 100: movl -28(%esi),%edx
! 101: shldl %cl,%eax,%ebx
! 102: shldl %cl,%edx,%eax
! 103: movl %ebx,-24(%edi)
! 104: movl %eax,-28(%edi)
! 105:
! 106: subl $32,%esi
! 107: subl $32,%edi
! 108: decl %ebp
! 109: jnz Loop
! 110:
! 111: Lend: popl %ebp
! 112: andl $7,%ebp
! 113: jz Lend2
! 114: Loop2: movl (%esi),%eax
! 115: shldl %cl,%eax,%edx
! 116: movl %edx,(%edi)
! 117: movl %eax,%edx
! 118: subl $4,%esi
! 119: subl $4,%edi
! 120: decl %ebp
! 121: jnz Loop2
! 122:
! 123: Lend2: shll %cl,%edx /* compute least significant limb */
! 124: movl %edx,(%edi) /* store it */
! 125:
! 126: popl %eax /* pop carry limb */
! 127:
! 128: popl %ebp
! 129: popl %ebx
! 130: popl %esi
! 131: popl %edi
! 132: ret
! 133:
! 134: /* We loop from least significant end of the arrays, which is only
! 135: permissable if the source and destination don't overlap, since the
! 136: function is documented to work for overlapping source and destination.
! 137: */
! 138:
! 139: Lspecial:
! 140: movl (%esi),%edx
! 141: addl $4,%esi
! 142:
! 143: decl %ebp
! 144: pushl %ebp
! 145: shrl $3,%ebp
! 146:
! 147: addl %edx,%edx
! 148: incl %ebp
! 149: decl %ebp
! 150: jz LLend
! 151:
! 152: movl (%edi),%eax /* fetch destination cache line */
! 153:
! 154: ALIGN (2)
! 155: LLoop: movl 28(%edi),%eax /* fetch destination cache line */
! 156: movl %edx,%ebx
! 157:
! 158: movl (%esi),%eax
! 159: movl 4(%esi),%edx
! 160: adcl %eax,%eax
! 161: movl %ebx,(%edi)
! 162: adcl %edx,%edx
! 163: movl %eax,4(%edi)
! 164:
! 165: movl 8(%esi),%ebx
! 166: movl 12(%esi),%eax
! 167: adcl %ebx,%ebx
! 168: movl %edx,8(%edi)
! 169: adcl %eax,%eax
! 170: movl %ebx,12(%edi)
! 171:
! 172: movl 16(%esi),%edx
! 173: movl 20(%esi),%ebx
! 174: adcl %edx,%edx
! 175: movl %eax,16(%edi)
! 176: adcl %ebx,%ebx
! 177: movl %edx,20(%edi)
! 178:
! 179: movl 24(%esi),%eax
! 180: movl 28(%esi),%edx
! 181: adcl %eax,%eax
! 182: movl %ebx,24(%edi)
! 183: adcl %edx,%edx
! 184: movl %eax,28(%edi)
! 185:
! 186: leal 32(%esi),%esi /* use leal not to clobber carry */
! 187: leal 32(%edi),%edi
! 188: decl %ebp
! 189: jnz LLoop
! 190:
! 191: LLend: popl %ebp
! 192: sbbl %eax,%eax /* save carry in %eax */
! 193: andl $7,%ebp
! 194: jz LLend2
! 195: addl %eax,%eax /* restore carry from eax */
! 196: LLoop2: movl %edx,%ebx
! 197: movl (%esi),%edx
! 198: adcl %edx,%edx
! 199: movl %ebx,(%edi)
! 200:
! 201: leal 4(%esi),%esi /* use leal not to clobber carry */
! 202: leal 4(%edi),%edi
! 203: decl %ebp
! 204: jnz LLoop2
! 205:
! 206: jmp LL1
! 207: LLend2: addl %eax,%eax /* restore carry from eax */
! 208: LL1: movl %edx,(%edi) /* store last limb */
! 209:
! 210: sbbl %eax,%eax
! 211: negl %eax
! 212:
! 213: popl %ebp
! 214: popl %ebx
! 215: popl %esi
! 216: popl %edi
! 217: ret
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>