Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/aors_n.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2:
1.1.1.2 ! ohara 3: dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
1.1 maekawa 4: dnl Foundation, Inc.
5: dnl
6: dnl This file is part of the GNU MP Library.
7: dnl
8: dnl The GNU MP Library is free software; you can redistribute it and/or
9: dnl modify it under the terms of the GNU Lesser General Public License as
10: dnl published by the Free Software Foundation; either version 2.1 of the
11: dnl License, or (at your option) any later version.
12: dnl
13: dnl The GNU MP Library is distributed in the hope that it will be useful,
14: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
15: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16: dnl Lesser General Public License for more details.
17: dnl
18: dnl You should have received a copy of the GNU Lesser General Public
19: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
20: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
21: dnl Suite 330, Boston, MA 02111-1307, USA.
22:
23: include(`../config.m4')
24:
25:
1.1.1.2 ! ohara 26: C P5: 2.375 cycles/limb
! 27:
! 28:
1.1 maekawa 29: ifdef(`OPERATION_add_n',`
30: define(M4_inst, adcl)
31: define(M4_function_n, mpn_add_n)
32: define(M4_function_nc, mpn_add_nc)
33:
34: ',`ifdef(`OPERATION_sub_n',`
35: define(M4_inst, sbbl)
36: define(M4_function_n, mpn_sub_n)
37: define(M4_function_nc, mpn_sub_nc)
38:
39: ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
40: ')')')
41:
42: MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
43:
44:
45: C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
46: C mp_size_t size);
47: C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
48: C mp_size_t size, mp_limb_t carry);
49:
50: defframe(PARAM_CARRY,20)
51: defframe(PARAM_SIZE, 16)
52: defframe(PARAM_SRC2, 12)
53: defframe(PARAM_SRC1, 8)
54: defframe(PARAM_DST, 4)
55:
1.1.1.2 ! ohara 56: TEXT
1.1 maekawa 57: ALIGN(8)
58: PROLOGUE(M4_function_nc)
59:
60: pushl %edi
61: pushl %esi
62: pushl %ebx
63: pushl %ebp
64: deflit(`FRAME',16)
65:
66: movl PARAM_DST,%edi
67: movl PARAM_SRC1,%esi
68: movl PARAM_SRC2,%ebp
69: movl PARAM_SIZE,%ecx
70:
71: movl (%ebp),%ebx
72:
73: decl %ecx
74: movl %ecx,%edx
75: shrl $3,%ecx
76: andl $7,%edx
77: testl %ecx,%ecx C zero carry flag
78: jz L(endgo)
79:
80: pushl %edx
81: FRAME_pushl()
82: movl PARAM_CARRY,%eax
83: shrl $1,%eax C shift bit 0 into carry
1.1.1.2 ! ohara 84: jmp L(oop)
1.1 maekawa 85:
86: L(endgo):
87: deflit(`FRAME',16)
88: movl PARAM_CARRY,%eax
89: shrl $1,%eax C shift bit 0 into carry
1.1.1.2 ! ohara 90: jmp L(end)
1.1 maekawa 91:
92: EPILOGUE()
93:
94:
95: ALIGN(8)
96: PROLOGUE(M4_function_n)
97:
98: pushl %edi
99: pushl %esi
100: pushl %ebx
101: pushl %ebp
102: deflit(`FRAME',16)
103:
104: movl PARAM_DST,%edi
105: movl PARAM_SRC1,%esi
106: movl PARAM_SRC2,%ebp
107: movl PARAM_SIZE,%ecx
108:
109: movl (%ebp),%ebx
110:
111: decl %ecx
112: movl %ecx,%edx
113: shrl $3,%ecx
114: andl $7,%edx
115: testl %ecx,%ecx C zero carry flag
116: jz L(end)
117: pushl %edx
118: FRAME_pushl()
119:
120: ALIGN(8)
121: L(oop): movl 28(%edi),%eax C fetch destination cache line
122: leal 32(%edi),%edi
123:
124: L(1): movl (%esi),%eax
125: movl 4(%esi),%edx
126: M4_inst %ebx,%eax
127: movl 4(%ebp),%ebx
128: M4_inst %ebx,%edx
129: movl 8(%ebp),%ebx
130: movl %eax,-32(%edi)
131: movl %edx,-28(%edi)
132:
133: L(2): movl 8(%esi),%eax
134: movl 12(%esi),%edx
135: M4_inst %ebx,%eax
136: movl 12(%ebp),%ebx
137: M4_inst %ebx,%edx
138: movl 16(%ebp),%ebx
139: movl %eax,-24(%edi)
140: movl %edx,-20(%edi)
141:
142: L(3): movl 16(%esi),%eax
143: movl 20(%esi),%edx
144: M4_inst %ebx,%eax
145: movl 20(%ebp),%ebx
146: M4_inst %ebx,%edx
147: movl 24(%ebp),%ebx
148: movl %eax,-16(%edi)
149: movl %edx,-12(%edi)
150:
151: L(4): movl 24(%esi),%eax
152: movl 28(%esi),%edx
153: M4_inst %ebx,%eax
154: movl 28(%ebp),%ebx
155: M4_inst %ebx,%edx
156: movl 32(%ebp),%ebx
157: movl %eax,-8(%edi)
158: movl %edx,-4(%edi)
159:
160: leal 32(%esi),%esi
161: leal 32(%ebp),%ebp
162: decl %ecx
163: jnz L(oop)
164:
165: popl %edx
166: FRAME_popl()
167: L(end):
168: decl %edx C test %edx w/o clobbering carry
169: js L(end2)
170: incl %edx
171: L(oop2):
172: leal 4(%edi),%edi
173: movl (%esi),%eax
174: M4_inst %ebx,%eax
175: movl 4(%ebp),%ebx
176: movl %eax,-4(%edi)
177: leal 4(%esi),%esi
178: leal 4(%ebp),%ebp
179: decl %edx
180: jnz L(oop2)
181: L(end2):
182: movl (%esi),%eax
183: M4_inst %ebx,%eax
184: movl %eax,(%edi)
185:
186: sbbl %eax,%eax
187: negl %eax
188:
189: popl %ebp
190: popl %ebx
191: popl %esi
192: popl %edi
193: ret
194:
195: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>