Annotation of OpenXM_contrib/gmp/mpn/x86/addsub_n.S, Revision 1.1.1.1
1.1 maekawa 1: /* Currently not working and not used. */
2:
3: /*
4: Copyright (C) 1999 Free Software Foundation, Inc.
5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: License for more details.
17:
18: You should have received a copy of the GNU Lesser General Public License
19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA.
22: */
23:
24:
25: #define SAVE_BORROW_RESTORE_CARRY(r) adcl r,r; shll $31,r
26: #define SAVE_CARRY_RESTORE_BORROW(r) adcl r,r
27:
28: .globl mpn_addsub_n_0
29: .globl mpn_addsub_n_1
30:
31: /* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s2,r2==s1.
32: We let subtraction and addition alternate in being two limbs
33: ahead of the other, thereby avoiding some SAVE_RESTORE. */
34: // r1 = r2 + r1 edi = esi + edi
35: // r2 = r2 - r1 esi = esi - edi
36: // s1 s2
37: // r2 r1
38: // eax,ebx,ecx,edx,esi,edi,ebp
39: mpn_addsub_n_0:
40: pushl %edi
41: pushl %esi
42: pushl %ebx
43: pushl %ebp
44:
45: movl 20(%esp),%edi /* res_ptr */
46: movl 24(%esp),%esi /* s1_ptr */
47: movl 36(%esp),%ebp /* size */
48:
49: shrl $2,%ebp
50: xorl %edx,%edx
51: .align 4
52: Loop0: // L=load E=execute S=store
53: movl (%esi),%ebx // sub 0 L
54: movl 4(%esi),%ecx // sub 1 L
55: sbbl (%edi),%ebx // sub 0 LE
56: sbbl 4(%edi),%ecx // sub 1 LE
57: // SAVE_BORROW_RESTORE_CARRY(%edx)
58: movl (%esi),%eax // add 0 L
59: adcl %eax,(%edi) // add 0 LES
60: movl 4(%esi),%eax // add 1 L
61: adcl %eax,4(%edi) // add 1 LES
62: movl %ebx,(%esi) // sub 0 S
63: movl %ecx,4(%esi) // sub 1 S
64: movl 8(%esi),%ebx // add 2 L
65: adcl 8(%edi),%ebx // add 2 LE
66: movl 12(%esi),%ecx // add 3 L
67: adcl 12(%edi),%ecx // add 3 LE
68: // SAVE_CARRY_RESTORE_BORROW(%edx)
69: movl 8(%edi),%eax // sub 2 L
70: sbbl %eax,8(%esi) // sub 2 LES
71: movl 12(%edi),%eax // sub 3 L
72: sbbl %eax,12(%esi) // sub 3 LES
73: movl %ebx,8(%edi) // add 2 S
74: movl %ecx,12(%edi) // add 3 S
75: leal 16(%esi),%esi
76: leal 16(%edi),%edi
77: decl %ebp
78: jnz Loop0
79:
80: popl %ebp
81: popl %ebx
82: popl %esi
83: popl %edi
84: ret
85:
86: /* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s1,r2==s2.
87: We let subtraction and addition alternate in being two limbs
88: ahead of the other, thereby avoiding some SAVE_RESTORE. */
89: // r1 = r1 + r2 edi = edi + esi
90: // r2 = r1 - r2 esi = edi - esi
91: // s2 s1
92: // r2 r1
93: // eax,ebx,ecx,edx,esi,edi,ebp
94: mpn_addsub_n_1:
95: pushl %edi
96: pushl %esi
97: pushl %ebx
98: pushl %ebp
99:
100: movl 20(%esp),%edi /* res_ptr */
101: movl 24(%esp),%esi /* s1_ptr */
102: movl 36(%esp),%ebp /* size */
103:
104: shrl $2,%ebp
105: xorl %edx,%edx
106: .align 4
107: Loop1: // L=load E=execute S=store
108: movl (%edi),%ebx // sub 0 L
109: sbbl (%esi),%ebx // sub 0 LE
110: movl 4(%edi),%ecx // sub 1 L
111: sbbl 4(%esi),%ecx // sub 1 LE
112: // SAVE_BORROW_RESTORE_CARRY(%edx)
113: movl (%esi),%eax // add 0 L
114: adcl %eax,(%edi) // add 0 LES
115: movl 4(%esi),%eax // add 1 L
116: adcl %eax,4(%edi) // add 1 LES
117: movl %ebx,(%esi) // sub 0 S
118: movl %ecx,4(%esi) // sub 1 S
119: movl 8(%esi),%ebx // add 2 L
120: adcl 8(%edi),%ebx // add 2 LE
121: movl 12(%esi),%ecx // add 3 L
122: adcl 12(%edi),%ecx // add 3 LE
123: // SAVE_CARRY_RESTORE_BORROW(%edx)
124: movl 8(%edi),%eax // sub 2 L
125: sbbl 8(%esi),%eax // sub 2 LES
126: movl %eax,8(%esi) // sub 2 S
127: movl 12(%edi),%eax // sub 3 L
128: sbbl 12(%esi),%eax // sub 3 LE
129: movl %eax,12(%esi) // sub 3 S
130: movl %ebx,8(%edi) // add 2 S
131: movl %ecx,12(%edi) // add 3 S
132: leal 16(%esi),%esi
133: leal 16(%edi),%edi
134: decl %ebp
135: jnz Loop1
136:
137: popl %ebp
138: popl %ebx
139: popl %esi
140: popl %edi
141: ret
142:
143: .globl mpn_copy
144: mpn_copy:
145: pushl %edi
146: pushl %esi
147: pushl %ebx
148: pushl %ebp
149:
150: movl 20(%esp),%edi /* res_ptr */
151: movl 24(%esp),%esi /* s1_ptr */
152: movl 28(%esp),%ebp /* size */
153:
154: shrl $2,%ebp
155: .align 4
156: Loop2:
157: movl (%esi),%eax
158: movl 4(%esi),%ebx
159: movl %eax,(%edi)
160: movl %ebx,4(%edi)
161: movl 8(%esi),%eax
162: movl 12(%esi),%ebx
163: movl %eax,8(%edi)
164: movl %ebx,12(%edi)
165: leal 16(%esi),%esi
166: leal 16(%edi),%edi
167: decl %ebp
168: jnz Loop2
169:
170: popl %ebp
171: popl %ebx
172: popl %esi
173: popl %edi
174: ret
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>