Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/logops_n.asm, Revision 1.1.1.1
1.1 ohara 1: dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
2:
3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
25: C P5: 3.0 c/l and, ior, xor
26: C 3.5 c/l andn, iorn, nand, nior, xnor
27:
28:
29: define(M4_choose_op,
30: `ifdef(`OPERATION_$1',`
31: define(`M4_function', `mpn_$1')
32: define(`M4_want_pre', `$4')
33: define(`M4op', `$3')
34: define(`M4_want_post',`$2')
35: ')')
36: define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
37: define(M4post,`ifelse(M4_want_post,yes,`$1')')
38:
39: M4_choose_op( and_n, , andl, )
40: M4_choose_op( andn_n, , andl, yes)
41: M4_choose_op( nand_n, yes, andl, )
42: M4_choose_op( ior_n, , orl, )
43: M4_choose_op( iorn_n, , orl, yes)
44: M4_choose_op( nior_n, yes, orl, )
45: M4_choose_op( xor_n, , xorl, )
46: M4_choose_op( xnor_n, yes, xorl, )
47:
48: ifdef(`M4_function',,
49: `m4_error(`Unrecognised or undefined OPERATION symbol
50: ')')
51:
52: MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
53:
54: NAILS_SUPPORT(0-31)
55:
56:
57: C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
58: C
59: C Nothing complicated here, just some care to avoid data cache bank clashes
60: C and AGIs.
61: C
62: C We're one register short of being able to do a simple 4 loads, 2 ops, 2
63: C stores. Instead %ebp is juggled a bit and nops are introduced to keep the
64: C pairings as intended. An in-place operation would free up a register, for
65: C an 0.5 c/l speedup, if that's worth bothering with.
66: C
67: C This code seems best for P55 too. Data alignment is a big problem for MMX
68: C and the pairing restrictions on movq and integer instructions make life
69: C difficult.
70:
71: defframe(PARAM_SIZE,16)
72: defframe(PARAM_YP, 12)
73: defframe(PARAM_XP, 8)
74: defframe(PARAM_WP, 4)
75:
76: TEXT
77: ALIGN(8)
78:
79: PROLOGUE(M4_function)
80: deflit(`FRAME',0)
81:
82: pushl %ebx FRAME_pushl()
83: pushl %esi FRAME_pushl()
84:
85: pushl %edi FRAME_pushl()
86: pushl %ebp FRAME_pushl()
87:
88: movl PARAM_SIZE, %ecx
89: movl PARAM_XP, %ebx
90:
91: movl PARAM_YP, %esi
92: movl PARAM_WP, %edi
93:
94: shrl %ecx
95: jnc L(entry)
96:
97: movl (%ebx,%ecx,8), %eax C risk of data cache bank clash here
98: movl (%esi,%ecx,8), %edx
99:
100: M4pre(` notl_or_xorl_GMP_NUMB_MASK(%edx)')
101:
102: M4op %edx, %eax
103:
104: M4post(`xorl $GMP_NUMB_MASK, %eax')
105: orl %ecx, %ecx
106:
107: movl %eax, (%edi,%ecx,8)
108: jz L(done)
109:
110: jmp L(entry)
111:
112:
113: L(top):
114: C eax
115: C ebx xp
116: C ecx counter, limb pairs, decrementing
117: C edx
118: C esi yp
119: C edi wp
120: C ebp
121:
122: M4op %ebp, %edx
123: nop
124:
125: M4post(`xorl $GMP_NUMB_MASK, %eax')
126: M4post(`xorl $GMP_NUMB_MASK, %edx')
127:
128: movl %eax, 4(%edi,%ecx,8)
129: movl %edx, (%edi,%ecx,8)
130:
131: L(entry):
132: movl -4(%ebx,%ecx,8), %ebp
133: nop
134:
135: movl -4(%esi,%ecx,8), %eax
136: movl -8(%esi,%ecx,8), %edx
137:
138: M4pre(` xorl $GMP_NUMB_MASK, %eax')
139: M4pre(` xorl $GMP_NUMB_MASK, %edx')
140:
141: M4op %ebp, %eax
142: movl -8(%ebx,%ecx,8), %ebp
143:
144: decl %ecx
145: jnz L(top)
146:
147:
148: M4op %ebp, %edx
149: nop
150:
151: M4post(`xorl $GMP_NUMB_MASK, %eax')
152: M4post(`xorl $GMP_NUMB_MASK, %edx')
153:
154: movl %eax, 4(%edi,%ecx,8)
155: movl %edx, (%edi,%ecx,8)
156:
157:
158: L(done):
159: popl %ebp
160: popl %edi
161:
162: popl %esi
163: popl %ebx
164:
165: ret
166:
167: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>