Annotation of OpenXM_contrib/gmp/mpn/x86/aors_n.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2:
1.1.1.2 ! ohara 3: dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
1.1 maekawa 4: dnl Foundation, Inc.
5: dnl
6: dnl This file is part of the GNU MP Library.
7: dnl
8: dnl The GNU MP Library is free software; you can redistribute it and/or
9: dnl modify it under the terms of the GNU Lesser General Public License as
10: dnl published by the Free Software Foundation; either version 2.1 of the
11: dnl License, or (at your option) any later version.
12: dnl
13: dnl The GNU MP Library is distributed in the hope that it will be useful,
14: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
15: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16: dnl Lesser General Public License for more details.
17: dnl
18: dnl You should have received a copy of the GNU Lesser General Public
19: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
20: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
21: dnl Suite 330, Boston, MA 02111-1307, USA.
22:
23: include(`../config.m4')
24:
25:
1.1.1.2 ! ohara 26: C cycles/limb
! 27: C P5: 3.375
! 28: C P6: 3.7
! 29: C K6: 3.5
! 30: C K7: 2.25
! 31: C P4: 8.75
! 32:
! 33:
1.1 maekawa 34: ifdef(`OPERATION_add_n',`
35: define(M4_inst, adcl)
36: define(M4_function_n, mpn_add_n)
37: define(M4_function_nc, mpn_add_nc)
38:
39: ',`ifdef(`OPERATION_sub_n',`
40: define(M4_inst, sbbl)
41: define(M4_function_n, mpn_sub_n)
42: define(M4_function_nc, mpn_sub_nc)
43:
44: ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
45: ')')')
46:
47: MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
48:
49:
50: C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
51: C mp_size_t size);
52: C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
53: C mp_size_t size, mp_limb_t carry);
54:
55: defframe(PARAM_CARRY,20)
56: defframe(PARAM_SIZE, 16)
57: defframe(PARAM_SRC2, 12)
58: defframe(PARAM_SRC1, 8)
59: defframe(PARAM_DST, 4)
60:
1.1.1.2 ! ohara 61: TEXT
1.1 maekawa 62: ALIGN(8)
63:
64: PROLOGUE(M4_function_nc)
65: deflit(`FRAME',0)
66:
67: pushl %edi FRAME_pushl()
68: pushl %esi FRAME_pushl()
69:
70: movl PARAM_DST,%edi
71: movl PARAM_SRC1,%esi
72: movl PARAM_SRC2,%edx
73: movl PARAM_SIZE,%ecx
74:
75: movl %ecx,%eax
76: shrl $3,%ecx C compute count for unrolled loop
77: negl %eax
78: andl $7,%eax C get index where to start loop
1.1.1.2 ! ohara 79: jz L(oopgo) C necessary special case for 0
1.1 maekawa 80: incl %ecx C adjust loop count
81: shll $2,%eax C adjustment for pointers...
82: subl %eax,%edi C ... since they are offset ...
83: subl %eax,%esi C ... by a constant when we ...
84: subl %eax,%edx C ... enter the loop
85: shrl $2,%eax C restore previous value
86:
87: ifdef(`PIC',`
88: C Calculate start address in loop for PIC. Due to limitations in
89: C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
90: call L(0a)
91: L(0a): leal (%eax,%eax,8),%eax
92: addl (%esp),%eax
1.1.1.2 ! ohara 93: addl $L(oop)-L(0a)-3,%eax
1.1 maekawa 94: addl $4,%esp
95: ',`
96: C Calculate start address in loop for non-PIC.
1.1.1.2 ! ohara 97: leal L(oop)-3(%eax,%eax,8),%eax
1.1 maekawa 98: ')
99:
100: C These lines initialize carry from the 5th parameter. Should be
101: C possible to simplify.
102: pushl %ebp FRAME_pushl()
103: movl PARAM_CARRY,%ebp
104: shrl $1,%ebp C shift bit 0 into carry
105: popl %ebp FRAME_popl()
106:
107: jmp *%eax C jump into loop
108:
109: EPILOGUE()
110:
111:
112: ALIGN(8)
113: PROLOGUE(M4_function_n)
114: deflit(`FRAME',0)
115:
116: pushl %edi FRAME_pushl()
117: pushl %esi FRAME_pushl()
118:
119: movl PARAM_DST,%edi
120: movl PARAM_SRC1,%esi
121: movl PARAM_SRC2,%edx
122: movl PARAM_SIZE,%ecx
123:
124: movl %ecx,%eax
125: shrl $3,%ecx C compute count for unrolled loop
126: negl %eax
127: andl $7,%eax C get index where to start loop
128: jz L(oop) C necessary special case for 0
129: incl %ecx C adjust loop count
130: shll $2,%eax C adjustment for pointers...
131: subl %eax,%edi C ... since they are offset ...
132: subl %eax,%esi C ... by a constant when we ...
133: subl %eax,%edx C ... enter the loop
134: shrl $2,%eax C restore previous value
135:
136: ifdef(`PIC',`
137: C Calculate start address in loop for PIC. Due to limitations in
138: C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
139: call L(0b)
140: L(0b): leal (%eax,%eax,8),%eax
141: addl (%esp),%eax
142: addl $L(oop)-L(0b)-3,%eax
143: addl $4,%esp
144: ',`
145: C Calculate start address in loop for non-PIC.
146: leal L(oop)-3(%eax,%eax,8),%eax
147: ')
148: jmp *%eax C jump into loop
149:
150: L(oopgo):
151: pushl %ebp FRAME_pushl()
152: movl PARAM_CARRY,%ebp
153: shrl $1,%ebp C shift bit 0 into carry
154: popl %ebp FRAME_popl()
155:
156: ALIGN(8)
157: L(oop): movl (%esi),%eax
158: M4_inst (%edx),%eax
159: movl %eax,(%edi)
160: movl 4(%esi),%eax
161: M4_inst 4(%edx),%eax
162: movl %eax,4(%edi)
163: movl 8(%esi),%eax
164: M4_inst 8(%edx),%eax
165: movl %eax,8(%edi)
166: movl 12(%esi),%eax
167: M4_inst 12(%edx),%eax
168: movl %eax,12(%edi)
169: movl 16(%esi),%eax
170: M4_inst 16(%edx),%eax
171: movl %eax,16(%edi)
172: movl 20(%esi),%eax
173: M4_inst 20(%edx),%eax
174: movl %eax,20(%edi)
175: movl 24(%esi),%eax
176: M4_inst 24(%edx),%eax
177: movl %eax,24(%edi)
178: movl 28(%esi),%eax
179: M4_inst 28(%edx),%eax
180: movl %eax,28(%edi)
181: leal 32(%edi),%edi
182: leal 32(%esi),%esi
183: leal 32(%edx),%edx
184: decl %ecx
185: jnz L(oop)
186:
187: sbbl %eax,%eax
188: negl %eax
189:
190: popl %esi
191: popl %edi
192: ret
193:
194: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>