Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/aors_n.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2: dnl
3: dnl P5: 2.375 cycles/limb
4:
5:
6: dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
7: dnl Foundation, Inc.
8: dnl
9: dnl This file is part of the GNU MP Library.
10: dnl
11: dnl The GNU MP Library is free software; you can redistribute it and/or
12: dnl modify it under the terms of the GNU Lesser General Public License as
13: dnl published by the Free Software Foundation; either version 2.1 of the
14: dnl License, or (at your option) any later version.
15: dnl
16: dnl The GNU MP Library is distributed in the hope that it will be useful,
17: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
18: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19: dnl Lesser General Public License for more details.
20: dnl
21: dnl You should have received a copy of the GNU Lesser General Public
22: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
23: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
24: dnl Suite 330, Boston, MA 02111-1307, USA.
25:
26:
27: include(`../config.m4')
28:
29:
30: ifdef(`OPERATION_add_n',`
31: define(M4_inst, adcl)
32: define(M4_function_n, mpn_add_n)
33: define(M4_function_nc, mpn_add_nc)
34:
35: ',`ifdef(`OPERATION_sub_n',`
36: define(M4_inst, sbbl)
37: define(M4_function_n, mpn_sub_n)
38: define(M4_function_nc, mpn_sub_nc)
39:
40: ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
41: ')')')
42:
43: MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
44:
45:
46: C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
47: C mp_size_t size);
48: C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
49: C mp_size_t size, mp_limb_t carry);
50:
51: defframe(PARAM_CARRY,20)
52: defframe(PARAM_SIZE, 16)
53: defframe(PARAM_SRC2, 12)
54: defframe(PARAM_SRC1, 8)
55: defframe(PARAM_DST, 4)
56:
57: .text
58: ALIGN(8)
59: PROLOGUE(M4_function_nc)
60:
61: pushl %edi
62: pushl %esi
63: pushl %ebx
64: pushl %ebp
65: deflit(`FRAME',16)
66:
67: movl PARAM_DST,%edi
68: movl PARAM_SRC1,%esi
69: movl PARAM_SRC2,%ebp
70: movl PARAM_SIZE,%ecx
71:
72: movl (%ebp),%ebx
73:
74: decl %ecx
75: movl %ecx,%edx
76: shrl $3,%ecx
77: andl $7,%edx
78: testl %ecx,%ecx C zero carry flag
79: jz L(endgo)
80:
81: pushl %edx
82: FRAME_pushl()
83: movl PARAM_CARRY,%eax
84: shrl $1,%eax C shift bit 0 into carry
85: jmp LF(M4_function_n,oop)
86:
87: L(endgo):
88: deflit(`FRAME',16)
89: movl PARAM_CARRY,%eax
90: shrl $1,%eax C shift bit 0 into carry
91: jmp LF(M4_function_n,end)
92:
93: EPILOGUE()
94:
95:
96: ALIGN(8)
97: PROLOGUE(M4_function_n)
98:
99: pushl %edi
100: pushl %esi
101: pushl %ebx
102: pushl %ebp
103: deflit(`FRAME',16)
104:
105: movl PARAM_DST,%edi
106: movl PARAM_SRC1,%esi
107: movl PARAM_SRC2,%ebp
108: movl PARAM_SIZE,%ecx
109:
110: movl (%ebp),%ebx
111:
112: decl %ecx
113: movl %ecx,%edx
114: shrl $3,%ecx
115: andl $7,%edx
116: testl %ecx,%ecx C zero carry flag
117: jz L(end)
118: pushl %edx
119: FRAME_pushl()
120:
121: ALIGN(8)
122: L(oop): movl 28(%edi),%eax C fetch destination cache line
123: leal 32(%edi),%edi
124:
125: L(1): movl (%esi),%eax
126: movl 4(%esi),%edx
127: M4_inst %ebx,%eax
128: movl 4(%ebp),%ebx
129: M4_inst %ebx,%edx
130: movl 8(%ebp),%ebx
131: movl %eax,-32(%edi)
132: movl %edx,-28(%edi)
133:
134: L(2): movl 8(%esi),%eax
135: movl 12(%esi),%edx
136: M4_inst %ebx,%eax
137: movl 12(%ebp),%ebx
138: M4_inst %ebx,%edx
139: movl 16(%ebp),%ebx
140: movl %eax,-24(%edi)
141: movl %edx,-20(%edi)
142:
143: L(3): movl 16(%esi),%eax
144: movl 20(%esi),%edx
145: M4_inst %ebx,%eax
146: movl 20(%ebp),%ebx
147: M4_inst %ebx,%edx
148: movl 24(%ebp),%ebx
149: movl %eax,-16(%edi)
150: movl %edx,-12(%edi)
151:
152: L(4): movl 24(%esi),%eax
153: movl 28(%esi),%edx
154: M4_inst %ebx,%eax
155: movl 28(%ebp),%ebx
156: M4_inst %ebx,%edx
157: movl 32(%ebp),%ebx
158: movl %eax,-8(%edi)
159: movl %edx,-4(%edi)
160:
161: leal 32(%esi),%esi
162: leal 32(%ebp),%ebp
163: decl %ecx
164: jnz L(oop)
165:
166: popl %edx
167: FRAME_popl()
168: L(end):
169: decl %edx C test %edx w/o clobbering carry
170: js L(end2)
171: incl %edx
172: L(oop2):
173: leal 4(%edi),%edi
174: movl (%esi),%eax
175: M4_inst %ebx,%eax
176: movl 4(%ebp),%ebx
177: movl %eax,-4(%edi)
178: leal 4(%esi),%esi
179: leal 4(%ebp),%ebp
180: decl %edx
181: jnz L(oop2)
182: L(end2):
183: movl (%esi),%eax
184: M4_inst %ebx,%eax
185: movl %eax,(%edi)
186:
187: sbbl %eax,%eax
188: negl %eax
189:
190: popl %ebp
191: popl %ebx
192: popl %esi
193: popl %edi
194: ret
195:
196: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>