Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/lshift.S, Revision 1.1.1.1
1.1 maekawa 1: /* Pentium optimized __mpn_lshift --
2:
3: Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Library General Public License as published by
9: the Free Software Foundation; either version 2 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Library General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: /*
23: INPUT PARAMETERS
24: res_ptr (sp + 4)
25: s_ptr (sp + 8)
26: size (sp + 12)
27: cnt (sp + 16)
28: */
29:
30: #include "sysdep.h"
31: #include "asm-syntax.h"
32:
33: .text
34: ALIGN (3)
35: .globl C_SYMBOL_NAME(__mpn_lshift)
36: C_SYMBOL_NAME(__mpn_lshift:)
37: pushl %edi
38: pushl %esi
39: pushl %ebx
40: pushl %ebp
41:
42: movl 20(%esp),%edi /* res_ptr */
43: movl 24(%esp),%esi /* s_ptr */
44: movl 28(%esp),%ebp /* size */
45: movl 32(%esp),%ecx /* cnt */
46:
47: /* We can use faster code for shift-by-1 under certain conditions. */
48: cmp $1,%ecx
49: jne Lnormal
50: leal 4(%esi),%eax
51: cmpl %edi,%eax
52: jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
53: leal (%esi,%ebp,4),%eax
54: cmpl %eax,%edi
55: jnc Lspecial /* jump if res_ptr >= s_ptr + size */
56:
57: Lnormal:
58: leal -4(%edi,%ebp,4),%edi
59: leal -4(%esi,%ebp,4),%esi
60:
61: movl (%esi),%edx
62: subl $4,%esi
63: xorl %eax,%eax
64: shldl %cl,%edx,%eax /* compute carry limb */
65: pushl %eax /* push carry limb onto stack */
66:
67: decl %ebp
68: pushl %ebp
69: shrl $3,%ebp
70: jz Lend
71:
72: movl (%edi),%eax /* fetch destination cache line */
73:
74: ALIGN (2)
75: Loop: movl -28(%edi),%eax /* fetch destination cache line */
76: movl %edx,%ebx
77:
78: movl (%esi),%eax
79: movl -4(%esi),%edx
80: shldl %cl,%eax,%ebx
81: shldl %cl,%edx,%eax
82: movl %ebx,(%edi)
83: movl %eax,-4(%edi)
84:
85: movl -8(%esi),%ebx
86: movl -12(%esi),%eax
87: shldl %cl,%ebx,%edx
88: shldl %cl,%eax,%ebx
89: movl %edx,-8(%edi)
90: movl %ebx,-12(%edi)
91:
92: movl -16(%esi),%edx
93: movl -20(%esi),%ebx
94: shldl %cl,%edx,%eax
95: shldl %cl,%ebx,%edx
96: movl %eax,-16(%edi)
97: movl %edx,-20(%edi)
98:
99: movl -24(%esi),%eax
100: movl -28(%esi),%edx
101: shldl %cl,%eax,%ebx
102: shldl %cl,%edx,%eax
103: movl %ebx,-24(%edi)
104: movl %eax,-28(%edi)
105:
106: subl $32,%esi
107: subl $32,%edi
108: decl %ebp
109: jnz Loop
110:
111: Lend: popl %ebp
112: andl $7,%ebp
113: jz Lend2
114: Loop2: movl (%esi),%eax
115: shldl %cl,%eax,%edx
116: movl %edx,(%edi)
117: movl %eax,%edx
118: subl $4,%esi
119: subl $4,%edi
120: decl %ebp
121: jnz Loop2
122:
123: Lend2: shll %cl,%edx /* compute least significant limb */
124: movl %edx,(%edi) /* store it */
125:
126: popl %eax /* pop carry limb */
127:
128: popl %ebp
129: popl %ebx
130: popl %esi
131: popl %edi
132: ret
133:
134: /* We loop from least significant end of the arrays, which is only
135: permissable if the source and destination don't overlap, since the
136: function is documented to work for overlapping source and destination.
137: */
138:
139: Lspecial:
140: movl (%esi),%edx
141: addl $4,%esi
142:
143: decl %ebp
144: pushl %ebp
145: shrl $3,%ebp
146:
147: addl %edx,%edx
148: incl %ebp
149: decl %ebp
150: jz LLend
151:
152: movl (%edi),%eax /* fetch destination cache line */
153:
154: ALIGN (2)
155: LLoop: movl 28(%edi),%eax /* fetch destination cache line */
156: movl %edx,%ebx
157:
158: movl (%esi),%eax
159: movl 4(%esi),%edx
160: adcl %eax,%eax
161: movl %ebx,(%edi)
162: adcl %edx,%edx
163: movl %eax,4(%edi)
164:
165: movl 8(%esi),%ebx
166: movl 12(%esi),%eax
167: adcl %ebx,%ebx
168: movl %edx,8(%edi)
169: adcl %eax,%eax
170: movl %ebx,12(%edi)
171:
172: movl 16(%esi),%edx
173: movl 20(%esi),%ebx
174: adcl %edx,%edx
175: movl %eax,16(%edi)
176: adcl %ebx,%ebx
177: movl %edx,20(%edi)
178:
179: movl 24(%esi),%eax
180: movl 28(%esi),%edx
181: adcl %eax,%eax
182: movl %ebx,24(%edi)
183: adcl %edx,%edx
184: movl %eax,28(%edi)
185:
186: leal 32(%esi),%esi /* use leal not to clobber carry */
187: leal 32(%edi),%edi
188: decl %ebp
189: jnz LLoop
190:
191: LLend: popl %ebp
192: sbbl %eax,%eax /* save carry in %eax */
193: andl $7,%ebp
194: jz LLend2
195: addl %eax,%eax /* restore carry from eax */
196: LLoop2: movl %edx,%ebx
197: movl (%esi),%edx
198: adcl %edx,%edx
199: movl %ebx,(%edi)
200:
201: leal 4(%esi),%esi /* use leal not to clobber carry */
202: leal 4(%edi),%edi
203: decl %ebp
204: jnz LLoop2
205:
206: jmp LL1
207: LLend2: addl %eax,%eax /* restore carry from eax */
208: LL1: movl %edx,(%edi) /* store last limb */
209:
210: sbbl %eax,%eax
211: negl %eax
212:
213: popl %ebp
214: popl %ebx
215: popl %esi
216: popl %edi
217: ret
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>