Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mod_34lsub1.asm, Revision 1.1.1.1
1.1 ohara 1: dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
2:
3: dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
25: C P5: 1.66 cycles/limb
26:
27:
28: C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
29: C
30:
31: defframe(PARAM_SIZE, 8)
32: defframe(PARAM_SRC, 4)
33:
34: TEXT
35: ALIGN(16)
36: PROLOGUE(mpn_mod_34lsub1)
37: deflit(`FRAME',0)
38:
39: movl PARAM_SIZE, %ecx
40: movl PARAM_SRC, %edx
41:
42: subl $2, %ecx
43: ja L(three_or_more)
44:
45: movl (%edx), %eax
46: jne L(one)
47:
48:
49: movl 4(%edx), %ecx
50: movl %eax, %edx
51:
52: shrl $24, %edx
53: andl $0xFFFFFF, %eax
54:
55: addl %edx, %eax
56: movl %ecx, %edx
57:
58: shrl $16, %ecx
59: andl $0xFFFF, %edx
60:
61: shll $8, %edx
62: addl %ecx, %eax
63:
64: addl %edx, %eax
65:
66: L(one):
67: ret
68:
69:
70: L(three_or_more):
71: C eax
72: C ebx
73: C ecx size-2
74: C edx src
75: C esi
76: C edi
77: C ebp
78:
79: pushl %ebx FRAME_pushl()
80: pushl %esi FRAME_pushl()
81:
82: pushl %edi FRAME_pushl()
83: pushl %ebp FRAME_pushl()
84:
85: xorl %esi, %esi C 0mod3
86: xorl %edi, %edi C 1mod3
87:
88: xorl %ebp, %ebp C 2mod3, and clear carry
89:
90: L(top):
91: C eax scratch
92: C ebx scratch
93: C ecx counter, limbs
94: C edx src
95: C esi 0mod3
96: C edi 1mod3
97: C ebp 2mod3
98:
99: movl (%edx), %eax
100: movl 4(%edx), %ebx
101:
102: adcl %eax, %esi
103: movl 8(%edx), %eax
104:
105: adcl %ebx, %edi
106: leal 12(%edx), %edx
107:
108: adcl %eax, %ebp
109: leal -2(%ecx), %ecx
110:
111: decl %ecx
112: jg L(top)
113:
114:
115: C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
116:
117: movl $0xFFFFFFFF, %ebx C mask
118: incl %ecx
119:
120: js L(combine) C 0 more
121:
122: movl (%edx), %eax
123: movl $0xFFFFFF00, %ebx
124:
125: adcl %eax, %esi
126: decl %ecx
127:
128: js L(combine) C 1 more
129:
130: movl 4(%edx), %eax
131: movl $0xFFFF0000, %ebx
132:
133: adcl %eax, %edi
134:
135:
136:
137: L(combine):
138: C eax
139: C ebx mask
140: C ecx
141: C edx
142: C esi 0mod3
143: C edi 1mod3
144: C ebp 2mod3
145:
146: sbbl %ecx, %ecx C carry
147: movl %esi, %eax C 0mod3
148:
149: andl %ebx, %ecx C masked for position
150: andl $0xFFFFFF, %eax C 0mod3 low
151:
152: shrl $24, %esi C 0mod3 high
153: subl %ecx, %eax C apply carry
154:
155: addl %esi, %eax C apply 0mod3
156: movl %edi, %ebx C 1mod3
157:
158: shrl $16, %edi C 1mod3 high
159: andl $0x0000FFFF, %ebx
160:
161: shll $8, %ebx C 1mod3 low
162: addl %edi, %eax C apply 1mod3 high
163:
164: addl %ebx, %eax C apply 1mod3 low
165: movl %ebp, %ebx C 2mod3
166:
167: shrl $8, %ebp C 2mod3 high
168: andl $0xFF, %ebx
169:
170: shll $16, %ebx C 2mod3 low
171: addl %ebp, %eax C apply 2mod3 high
172:
173: addl %ebx, %eax C apply 2mod3 low
174:
175: popl %ebp
176: popl %edi
177:
178: popl %esi
179: popl %ebx
180:
181: ret
182:
183: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>