Annotation of OpenXM_contrib/gmp/mpn/x86/dive_1.asm, Revision 1.1.1.1
1.1 ohara 1: dnl x86 mpn_divexact_1 -- mpn by limb exact division.
2:
3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
25: C cycles/limb
26: C P54 30.0
27: C P55 29.0
28: C P6 13.0 odd divisor, 12.0 even (strangely)
29: C K6 14.0
30: C K7 12.0
31: C P4 42.0
32:
33:
34: C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
35: C mp_limb_t divisor);
36: C
37:
38: defframe(PARAM_DIVISOR,16)
39: defframe(PARAM_SIZE, 12)
40: defframe(PARAM_SRC, 8)
41: defframe(PARAM_DST, 4)
42:
43: dnl re-use parameter space
44: define(VAR_INVERSE,`PARAM_SRC')
45:
46: TEXT
47:
48: ALIGN(16)
49: PROLOGUE(mpn_divexact_1)
50: deflit(`FRAME',0)
51:
52: movl PARAM_DIVISOR, %eax
53: pushl %ebp FRAME_pushl()
54:
55: movl PARAM_SIZE, %ebp
56: pushl %edi FRAME_pushl()
57:
58: pushl %ebx FRAME_pushl()
59: movl $-1, %ecx C shift count
60:
61: pushl %esi FRAME_pushl()
62:
63: L(strip_twos):
64: incl %ecx
65:
66: shrl %eax
67: jnc L(strip_twos)
68:
69: leal 1(%eax,%eax), %ebx C d without twos
70: andl $127, %eax C d/2, 7 bits
71:
72: ifdef(`PIC',`
73: call L(movl_eip_edx)
74: addl $_GLOBAL_OFFSET_TABLE_, %edx
75: movl modlimb_invert_table@GOT(%edx), %edx
76: movzbl (%eax,%edx), %eax C inv 8 bits
77: ',`
78: dnl non-PIC
79: movzbl modlimb_invert_table(%eax), %eax C inv 8 bits
80: ')
81:
82: leal (%eax,%eax), %edx C 2*inv
83: movl %ebx, PARAM_DIVISOR C d without twos
84:
85: imull %eax, %eax C inv*inv
86:
87: movl PARAM_SRC, %esi
88: movl PARAM_DST, %edi
89:
90: imull %ebx, %eax C inv*inv*d
91:
92: subl %eax, %edx C inv = 2*inv - inv*inv*d
93: leal (%edx,%edx), %eax C 2*inv
94:
95: imull %edx, %edx C inv*inv
96:
97: leal (%esi,%ebp,4), %esi C src end
98: leal (%edi,%ebp,4), %edi C dst end
99: negl %ebp C -size
100:
101: imull %ebx, %edx C inv*inv*d
102:
103: subl %edx, %eax C inv = 2*inv - inv*inv*d
104:
105: ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
106: pushl %eax FRAME_pushl()
107: imull PARAM_DIVISOR, %eax
108: cmpl $1, %eax
109: popl %eax FRAME_popl()')
110:
111: movl %eax, VAR_INVERSE
112: movl (%esi,%ebp,4), %eax C src[0]
113:
114: xorl %ebx, %ebx
115: xorl %edx, %edx
116:
117: incl %ebp
118: jz L(one)
119:
120: movl (%esi,%ebp,4), %edx C src[1]
121:
122: shrdl( %cl, %edx, %eax)
123:
124: movl VAR_INVERSE, %edx
125: jmp L(entry)
126:
127:
128: ALIGN(8)
129: nop C k6 code alignment
130: nop
131: L(top):
132: C eax q
133: C ebx carry bit, 0 or -1
134: C ecx shift
135: C edx carry limb
136: C esi src end
137: C edi dst end
138: C ebp counter, limbs, negative
139:
140: movl -4(%esi,%ebp,4), %eax
141: subl %ebx, %edx C accumulate carry bit
142:
143: movl (%esi,%ebp,4), %ebx
144:
145: shrdl( %cl, %ebx, %eax)
146:
147: subl %edx, %eax C apply carry limb
148: movl VAR_INVERSE, %edx
149:
150: sbbl %ebx, %ebx
151:
152: L(entry):
153: imull %edx, %eax
154:
155: movl %eax, -4(%edi,%ebp,4)
156: movl PARAM_DIVISOR, %edx
157:
158: mull %edx
159:
160: incl %ebp
161: jnz L(top)
162:
163:
164: movl -4(%esi), %eax C src high limb
165: L(one):
166: shrl %cl, %eax
167: popl %esi FRAME_popl()
168:
169: addl %ebx, %eax C apply carry bit
170: popl %ebx FRAME_popl()
171:
172: subl %edx, %eax C apply carry limb
173:
174: imull VAR_INVERSE, %eax
175:
176: movl %eax, -4(%edi)
177:
178: popl %edi
179: popl %ebp
180:
181: ret
182:
183:
184: ifdef(`PIC',`
185: L(movl_eip_edx):
186: movl (%esp), %edx
187: ret
188: ')
189:
190: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>