Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/rshift.S, Revision 1.1.1.1
1.1 maekawa 1: /* Pentium optimized __mpn_rshift --
2:
3: Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Library General Public License as published by
9: the Free Software Foundation; either version 2 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Library General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: /*
23: INPUT PARAMETERS
24: res_ptr (sp + 4)
25: s_ptr (sp + 8)
26: size (sp + 12)
27: cnt (sp + 16)
28: */
29:
30: #include "sysdep.h"
31: #include "asm-syntax.h"
32:
33: .text
34: ALIGN (3)
35: .globl C_SYMBOL_NAME(__mpn_rshift)
36: C_SYMBOL_NAME(__mpn_rshift:)
37: pushl %edi
38: pushl %esi
39: pushl %ebx
40: pushl %ebp
41:
42: movl 20(%esp),%edi /* res_ptr */
43: movl 24(%esp),%esi /* s_ptr */
44: movl 28(%esp),%ebp /* size */
45: movl 32(%esp),%ecx /* cnt */
46:
47: /* We can use faster code for shift-by-1 under certain conditions. */
48: cmp $1,%ecx
49: jne Lnormal
50: leal 4(%edi),%eax
51: cmpl %esi,%eax
52: jnc Lspecial /* jump if res_ptr + 1 >= s_ptr */
53: leal (%edi,%ebp,4),%eax
54: cmpl %eax,%esi
55: jnc Lspecial /* jump if s_ptr >= res_ptr + size */
56:
57: Lnormal:
58: movl (%esi),%edx
59: addl $4,%esi
60: xorl %eax,%eax
61: shrdl %cl,%edx,%eax /* compute carry limb */
62: pushl %eax /* push carry limb onto stack */
63:
64: decl %ebp
65: pushl %ebp
66: shrl $3,%ebp
67: jz Lend
68:
69: movl (%edi),%eax /* fetch destination cache line */
70:
71: ALIGN (2)
72: Loop: movl 28(%edi),%eax /* fetch destination cache line */
73: movl %edx,%ebx
74:
75: movl (%esi),%eax
76: movl 4(%esi),%edx
77: shrdl %cl,%eax,%ebx
78: shrdl %cl,%edx,%eax
79: movl %ebx,(%edi)
80: movl %eax,4(%edi)
81:
82: movl 8(%esi),%ebx
83: movl 12(%esi),%eax
84: shrdl %cl,%ebx,%edx
85: shrdl %cl,%eax,%ebx
86: movl %edx,8(%edi)
87: movl %ebx,12(%edi)
88:
89: movl 16(%esi),%edx
90: movl 20(%esi),%ebx
91: shrdl %cl,%edx,%eax
92: shrdl %cl,%ebx,%edx
93: movl %eax,16(%edi)
94: movl %edx,20(%edi)
95:
96: movl 24(%esi),%eax
97: movl 28(%esi),%edx
98: shrdl %cl,%eax,%ebx
99: shrdl %cl,%edx,%eax
100: movl %ebx,24(%edi)
101: movl %eax,28(%edi)
102:
103: addl $32,%esi
104: addl $32,%edi
105: decl %ebp
106: jnz Loop
107:
108: Lend: popl %ebp
109: andl $7,%ebp
110: jz Lend2
111: Loop2: movl (%esi),%eax
112: shrdl %cl,%eax,%edx /* compute result limb */
113: movl %edx,(%edi)
114: movl %eax,%edx
115: addl $4,%esi
116: addl $4,%edi
117: decl %ebp
118: jnz Loop2
119:
120: Lend2: shrl %cl,%edx /* compute most significant limb */
121: movl %edx,(%edi) /* store it */
122:
123: popl %eax /* pop carry limb */
124:
125: popl %ebp
126: popl %ebx
127: popl %esi
128: popl %edi
129: ret
130:
131: /* We loop from least significant end of the arrays, which is only
132: permissable if the source and destination don't overlap, since the
133: function is documented to work for overlapping source and destination.
134: */
135:
136: Lspecial:
137: leal -4(%edi,%ebp,4),%edi
138: leal -4(%esi,%ebp,4),%esi
139:
140: movl (%esi),%edx
141: subl $4,%esi
142:
143: decl %ebp
144: pushl %ebp
145: shrl $3,%ebp
146:
147: shrl $1,%edx
148: incl %ebp
149: decl %ebp
150: jz LLend
151:
152: movl (%edi),%eax /* fetch destination cache line */
153:
154: ALIGN (2)
155: LLoop: movl -28(%edi),%eax /* fetch destination cache line */
156: movl %edx,%ebx
157:
158: movl (%esi),%eax
159: movl -4(%esi),%edx
160: rcrl $1,%eax
161: movl %ebx,(%edi)
162: rcrl $1,%edx
163: movl %eax,-4(%edi)
164:
165: movl -8(%esi),%ebx
166: movl -12(%esi),%eax
167: rcrl $1,%ebx
168: movl %edx,-8(%edi)
169: rcrl $1,%eax
170: movl %ebx,-12(%edi)
171:
172: movl -16(%esi),%edx
173: movl -20(%esi),%ebx
174: rcrl $1,%edx
175: movl %eax,-16(%edi)
176: rcrl $1,%ebx
177: movl %edx,-20(%edi)
178:
179: movl -24(%esi),%eax
180: movl -28(%esi),%edx
181: rcrl $1,%eax
182: movl %ebx,-24(%edi)
183: rcrl $1,%edx
184: movl %eax,-28(%edi)
185:
186: leal -32(%esi),%esi /* use leal not to clobber carry */
187: leal -32(%edi),%edi
188: decl %ebp
189: jnz LLoop
190:
191: LLend: popl %ebp
192: sbbl %eax,%eax /* save carry in %eax */
193: andl $7,%ebp
194: jz LLend2
195: addl %eax,%eax /* restore carry from eax */
196: LLoop2: movl %edx,%ebx
197: movl (%esi),%edx
198: rcrl $1,%edx
199: movl %ebx,(%edi)
200:
201: leal -4(%esi),%esi /* use leal not to clobber carry */
202: leal -4(%edi),%edi
203: decl %ebp
204: jnz LLoop2
205:
206: jmp LL1
207: LLend2: addl %eax,%eax /* restore carry from eax */
208: LL1: movl %edx,(%edi) /* store last limb */
209:
210: movl $0,%eax
211: rcrl $1,%eax
212:
213: popl %ebp
214: popl %ebx
215: popl %esi
216: popl %edi
217: ret
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>