Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mod_34lsub1.asm, Revision 1.1
1.1 ! ohara 1: dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
! 2:
! 3: dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
! 4: dnl
! 5: dnl This file is part of the GNU MP Library.
! 6: dnl
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or
! 8: dnl modify it under the terms of the GNU Lesser General Public License as
! 9: dnl published by the Free Software Foundation; either version 2.1 of the
! 10: dnl License, or (at your option) any later version.
! 11: dnl
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 15: dnl Lesser General Public License for more details.
! 16: dnl
! 17: dnl You should have received a copy of the GNU Lesser General Public
! 18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 20: dnl Suite 330, Boston, MA 02111-1307, USA.
! 21:
! 22: include(`../config.m4')
! 23:
! 24:
! 25: C P5: 1.66 cycles/limb
! 26:
! 27:
! 28: C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
! 29: C
! 30:
! 31: defframe(PARAM_SIZE, 8)
! 32: defframe(PARAM_SRC, 4)
! 33:
! 34: TEXT
! 35: ALIGN(16)
! 36: PROLOGUE(mpn_mod_34lsub1)
! 37: deflit(`FRAME',0)
! 38:
! 39: movl PARAM_SIZE, %ecx
! 40: movl PARAM_SRC, %edx
! 41:
! 42: subl $2, %ecx
! 43: ja L(three_or_more)
! 44:
! 45: movl (%edx), %eax
! 46: jne L(one)
! 47:
! 48:
! 49: movl 4(%edx), %ecx
! 50: movl %eax, %edx
! 51:
! 52: shrl $24, %edx
! 53: andl $0xFFFFFF, %eax
! 54:
! 55: addl %edx, %eax
! 56: movl %ecx, %edx
! 57:
! 58: shrl $16, %ecx
! 59: andl $0xFFFF, %edx
! 60:
! 61: shll $8, %edx
! 62: addl %ecx, %eax
! 63:
! 64: addl %edx, %eax
! 65:
! 66: L(one):
! 67: ret
! 68:
! 69:
! 70: L(three_or_more):
! 71: C eax
! 72: C ebx
! 73: C ecx size-2
! 74: C edx src
! 75: C esi
! 76: C edi
! 77: C ebp
! 78:
! 79: pushl %ebx FRAME_pushl()
! 80: pushl %esi FRAME_pushl()
! 81:
! 82: pushl %edi FRAME_pushl()
! 83: pushl %ebp FRAME_pushl()
! 84:
! 85: xorl %esi, %esi C 0mod3
! 86: xorl %edi, %edi C 1mod3
! 87:
! 88: xorl %ebp, %ebp C 2mod3, and clear carry
! 89:
! 90: L(top):
! 91: C eax scratch
! 92: C ebx scratch
! 93: C ecx counter, limbs
! 94: C edx src
! 95: C esi 0mod3
! 96: C edi 1mod3
! 97: C ebp 2mod3
! 98:
! 99: movl (%edx), %eax
! 100: movl 4(%edx), %ebx
! 101:
! 102: adcl %eax, %esi
! 103: movl 8(%edx), %eax
! 104:
! 105: adcl %ebx, %edi
! 106: leal 12(%edx), %edx
! 107:
! 108: adcl %eax, %ebp
! 109: leal -2(%ecx), %ecx
! 110:
! 111: decl %ecx
! 112: jg L(top)
! 113:
! 114:
! 115: C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
! 116:
! 117: movl $0xFFFFFFFF, %ebx C mask
! 118: incl %ecx
! 119:
! 120: js L(combine) C 0 more
! 121:
! 122: movl (%edx), %eax
! 123: movl $0xFFFFFF00, %ebx
! 124:
! 125: adcl %eax, %esi
! 126: decl %ecx
! 127:
! 128: js L(combine) C 1 more
! 129:
! 130: movl 4(%edx), %eax
! 131: movl $0xFFFF0000, %ebx
! 132:
! 133: adcl %eax, %edi
! 134:
! 135:
! 136:
! 137: L(combine):
! 138: C eax
! 139: C ebx mask
! 140: C ecx
! 141: C edx
! 142: C esi 0mod3
! 143: C edi 1mod3
! 144: C ebp 2mod3
! 145:
! 146: sbbl %ecx, %ecx C carry
! 147: movl %esi, %eax C 0mod3
! 148:
! 149: andl %ebx, %ecx C masked for position
! 150: andl $0xFFFFFF, %eax C 0mod3 low
! 151:
! 152: shrl $24, %esi C 0mod3 high
! 153: subl %ecx, %eax C apply carry
! 154:
! 155: addl %esi, %eax C apply 0mod3
! 156: movl %edi, %ebx C 1mod3
! 157:
! 158: shrl $16, %edi C 1mod3 high
! 159: andl $0x0000FFFF, %ebx
! 160:
! 161: shll $8, %ebx C 1mod3 low
! 162: addl %edi, %eax C apply 1mod3 high
! 163:
! 164: addl %ebx, %eax C apply 1mod3 low
! 165: movl %ebp, %ebx C 2mod3
! 166:
! 167: shrl $8, %ebp C 2mod3 high
! 168: andl $0xFF, %ebx
! 169:
! 170: shll $16, %ebx C 2mod3 low
! 171: addl %ebp, %eax C apply 2mod3 high
! 172:
! 173: addl %ebx, %eax C apply 2mod3 low
! 174:
! 175: popl %ebp
! 176: popl %edi
! 177:
! 178: popl %esi
! 179: popl %ebx
! 180:
! 181: ret
! 182:
! 183: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>