Annotation of OpenXM_contrib/gmp/mpn/x86/aors_n.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2:
3: dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
4: dnl Foundation, Inc.
5: dnl
6: dnl This file is part of the GNU MP Library.
7: dnl
8: dnl The GNU MP Library is free software; you can redistribute it and/or
9: dnl modify it under the terms of the GNU Lesser General Public License as
10: dnl published by the Free Software Foundation; either version 2.1 of the
11: dnl License, or (at your option) any later version.
12: dnl
13: dnl The GNU MP Library is distributed in the hope that it will be useful,
14: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
15: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16: dnl Lesser General Public License for more details.
17: dnl
18: dnl You should have received a copy of the GNU Lesser General Public
19: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
20: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
21: dnl Suite 330, Boston, MA 02111-1307, USA.
22:
23:
24: include(`../config.m4')
25:
26:
27: ifdef(`OPERATION_add_n',`
28: define(M4_inst, adcl)
29: define(M4_function_n, mpn_add_n)
30: define(M4_function_nc, mpn_add_nc)
31:
32: ',`ifdef(`OPERATION_sub_n',`
33: define(M4_inst, sbbl)
34: define(M4_function_n, mpn_sub_n)
35: define(M4_function_nc, mpn_sub_nc)
36:
37: ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
38: ')')')
39:
40: MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
41:
42:
43: C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
44: C mp_size_t size);
45: C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
46: C mp_size_t size, mp_limb_t carry);
47:
48: defframe(PARAM_CARRY,20)
49: defframe(PARAM_SIZE, 16)
50: defframe(PARAM_SRC2, 12)
51: defframe(PARAM_SRC1, 8)
52: defframe(PARAM_DST, 4)
53:
54: .text
55: ALIGN(8)
56:
57: PROLOGUE(M4_function_nc)
58: deflit(`FRAME',0)
59:
60: pushl %edi FRAME_pushl()
61: pushl %esi FRAME_pushl()
62:
63: movl PARAM_DST,%edi
64: movl PARAM_SRC1,%esi
65: movl PARAM_SRC2,%edx
66: movl PARAM_SIZE,%ecx
67:
68: movl %ecx,%eax
69: shrl $3,%ecx C compute count for unrolled loop
70: negl %eax
71: andl $7,%eax C get index where to start loop
72: jz LF(M4_function_n,oopgo) C necessary special case for 0
73: incl %ecx C adjust loop count
74: shll $2,%eax C adjustment for pointers...
75: subl %eax,%edi C ... since they are offset ...
76: subl %eax,%esi C ... by a constant when we ...
77: subl %eax,%edx C ... enter the loop
78: shrl $2,%eax C restore previous value
79:
80: ifdef(`PIC',`
81: C Calculate start address in loop for PIC. Due to limitations in
82: C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
83: call L(0a)
84: L(0a): leal (%eax,%eax,8),%eax
85: addl (%esp),%eax
86: addl $LF(M4_function_n,oop)-L(0a)-3,%eax
87: addl $4,%esp
88: ',`
89: C Calculate start address in loop for non-PIC.
90: leal LF(M4_function_n,oop)-3(%eax,%eax,8),%eax
91: ')
92:
93: C These lines initialize carry from the 5th parameter. Should be
94: C possible to simplify.
95: pushl %ebp FRAME_pushl()
96: movl PARAM_CARRY,%ebp
97: shrl $1,%ebp C shift bit 0 into carry
98: popl %ebp FRAME_popl()
99:
100: jmp *%eax C jump into loop
101:
102: EPILOGUE()
103:
104:
105: ALIGN(8)
106: PROLOGUE(M4_function_n)
107: deflit(`FRAME',0)
108:
109: pushl %edi FRAME_pushl()
110: pushl %esi FRAME_pushl()
111:
112: movl PARAM_DST,%edi
113: movl PARAM_SRC1,%esi
114: movl PARAM_SRC2,%edx
115: movl PARAM_SIZE,%ecx
116:
117: movl %ecx,%eax
118: shrl $3,%ecx C compute count for unrolled loop
119: negl %eax
120: andl $7,%eax C get index where to start loop
121: jz L(oop) C necessary special case for 0
122: incl %ecx C adjust loop count
123: shll $2,%eax C adjustment for pointers...
124: subl %eax,%edi C ... since they are offset ...
125: subl %eax,%esi C ... by a constant when we ...
126: subl %eax,%edx C ... enter the loop
127: shrl $2,%eax C restore previous value
128:
129: ifdef(`PIC',`
130: C Calculate start address in loop for PIC. Due to limitations in
131: C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
132: call L(0b)
133: L(0b): leal (%eax,%eax,8),%eax
134: addl (%esp),%eax
135: addl $L(oop)-L(0b)-3,%eax
136: addl $4,%esp
137: ',`
138: C Calculate start address in loop for non-PIC.
139: leal L(oop)-3(%eax,%eax,8),%eax
140: ')
141: jmp *%eax C jump into loop
142:
143: L(oopgo):
144: pushl %ebp FRAME_pushl()
145: movl PARAM_CARRY,%ebp
146: shrl $1,%ebp C shift bit 0 into carry
147: popl %ebp FRAME_popl()
148:
149: ALIGN(8)
150: L(oop): movl (%esi),%eax
151: M4_inst (%edx),%eax
152: movl %eax,(%edi)
153: movl 4(%esi),%eax
154: M4_inst 4(%edx),%eax
155: movl %eax,4(%edi)
156: movl 8(%esi),%eax
157: M4_inst 8(%edx),%eax
158: movl %eax,8(%edi)
159: movl 12(%esi),%eax
160: M4_inst 12(%edx),%eax
161: movl %eax,12(%edi)
162: movl 16(%esi),%eax
163: M4_inst 16(%edx),%eax
164: movl %eax,16(%edi)
165: movl 20(%esi),%eax
166: M4_inst 20(%edx),%eax
167: movl %eax,20(%edi)
168: movl 24(%esi),%eax
169: M4_inst 24(%edx),%eax
170: movl %eax,24(%edi)
171: movl 28(%esi),%eax
172: M4_inst 28(%edx),%eax
173: movl %eax,28(%edi)
174: leal 32(%edi),%edi
175: leal 32(%esi),%esi
176: leal 32(%edx),%edx
177: decl %ecx
178: jnz L(oop)
179:
180: sbbl %eax,%eax
181: negl %eax
182:
183: popl %esi
184: popl %edi
185: ret
186:
187: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>