Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/logops_n.asm, Revision 1.1
1.1 ! ohara 1: dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
! 2:
! 3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
! 4: dnl
! 5: dnl This file is part of the GNU MP Library.
! 6: dnl
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or
! 8: dnl modify it under the terms of the GNU Lesser General Public License as
! 9: dnl published by the Free Software Foundation; either version 2.1 of the
! 10: dnl License, or (at your option) any later version.
! 11: dnl
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 15: dnl Lesser General Public License for more details.
! 16: dnl
! 17: dnl You should have received a copy of the GNU Lesser General Public
! 18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 20: dnl Suite 330, Boston, MA 02111-1307, USA.
! 21:
! 22: include(`../config.m4')
! 23:
! 24:
! 25: C P5: 3.0 c/l and, ior, xor
! 26: C 3.5 c/l andn, iorn, nand, nior, xnor
! 27:
! 28:
! 29: define(M4_choose_op,
! 30: `ifdef(`OPERATION_$1',`
! 31: define(`M4_function', `mpn_$1')
! 32: define(`M4_want_pre', `$4')
! 33: define(`M4op', `$3')
! 34: define(`M4_want_post',`$2')
! 35: ')')
! 36: define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
! 37: define(M4post,`ifelse(M4_want_post,yes,`$1')')
! 38:
! 39: M4_choose_op( and_n, , andl, )
! 40: M4_choose_op( andn_n, , andl, yes)
! 41: M4_choose_op( nand_n, yes, andl, )
! 42: M4_choose_op( ior_n, , orl, )
! 43: M4_choose_op( iorn_n, , orl, yes)
! 44: M4_choose_op( nior_n, yes, orl, )
! 45: M4_choose_op( xor_n, , xorl, )
! 46: M4_choose_op( xnor_n, yes, xorl, )
! 47:
! 48: ifdef(`M4_function',,
! 49: `m4_error(`Unrecognised or undefined OPERATION symbol
! 50: ')')
! 51:
! 52: MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
! 53:
! 54: NAILS_SUPPORT(0-31)
! 55:
! 56:
! 57: C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
! 58: C
! 59: C Nothing complicated here, just some care to avoid data cache bank clashes
! 60: C and AGIs.
! 61: C
! 62: C We're one register short of being able to do a simple 4 loads, 2 ops, 2
! 63: C stores. Instead %ebp is juggled a bit and nops are introduced to keep the
! 64: C pairings as intended. An in-place operation would free up a register, for
! 65: C an 0.5 c/l speedup, if that's worth bothering with.
! 66: C
! 67: C This code seems best for P55 too. Data alignment is a big problem for MMX
! 68: C and the pairing restrictions on movq and integer instructions make life
! 69: C difficult.
! 70:
! 71: defframe(PARAM_SIZE,16)
! 72: defframe(PARAM_YP, 12)
! 73: defframe(PARAM_XP, 8)
! 74: defframe(PARAM_WP, 4)
! 75:
! 76: TEXT
! 77: ALIGN(8)
! 78:
! 79: PROLOGUE(M4_function)
! 80: deflit(`FRAME',0)
! 81:
! 82: pushl %ebx FRAME_pushl()
! 83: pushl %esi FRAME_pushl()
! 84:
! 85: pushl %edi FRAME_pushl()
! 86: pushl %ebp FRAME_pushl()
! 87:
! 88: movl PARAM_SIZE, %ecx
! 89: movl PARAM_XP, %ebx
! 90:
! 91: movl PARAM_YP, %esi
! 92: movl PARAM_WP, %edi
! 93:
! 94: shrl %ecx
! 95: jnc L(entry)
! 96:
! 97: movl (%ebx,%ecx,8), %eax C risk of data cache bank clash here
! 98: movl (%esi,%ecx,8), %edx
! 99:
! 100: M4pre(` notl_or_xorl_GMP_NUMB_MASK(%edx)')
! 101:
! 102: M4op %edx, %eax
! 103:
! 104: M4post(`xorl $GMP_NUMB_MASK, %eax')
! 105: orl %ecx, %ecx
! 106:
! 107: movl %eax, (%edi,%ecx,8)
! 108: jz L(done)
! 109:
! 110: jmp L(entry)
! 111:
! 112:
! 113: L(top):
! 114: C eax
! 115: C ebx xp
! 116: C ecx counter, limb pairs, decrementing
! 117: C edx
! 118: C esi yp
! 119: C edi wp
! 120: C ebp
! 121:
! 122: M4op %ebp, %edx
! 123: nop
! 124:
! 125: M4post(`xorl $GMP_NUMB_MASK, %eax')
! 126: M4post(`xorl $GMP_NUMB_MASK, %edx')
! 127:
! 128: movl %eax, 4(%edi,%ecx,8)
! 129: movl %edx, (%edi,%ecx,8)
! 130:
! 131: L(entry):
! 132: movl -4(%ebx,%ecx,8), %ebp
! 133: nop
! 134:
! 135: movl -4(%esi,%ecx,8), %eax
! 136: movl -8(%esi,%ecx,8), %edx
! 137:
! 138: M4pre(` xorl $GMP_NUMB_MASK, %eax')
! 139: M4pre(` xorl $GMP_NUMB_MASK, %edx')
! 140:
! 141: M4op %ebp, %eax
! 142: movl -8(%ebx,%ecx,8), %ebp
! 143:
! 144: decl %ecx
! 145: jnz L(top)
! 146:
! 147:
! 148: M4op %ebp, %edx
! 149: nop
! 150:
! 151: M4post(`xorl $GMP_NUMB_MASK, %eax')
! 152: M4post(`xorl $GMP_NUMB_MASK, %edx')
! 153:
! 154: movl %eax, 4(%edi,%ecx,8)
! 155: movl %edx, (%edi,%ecx,8)
! 156:
! 157:
! 158: L(done):
! 159: popl %ebp
! 160: popl %edi
! 161:
! 162: popl %esi
! 163: popl %ebx
! 164:
! 165: ret
! 166:
! 167: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>