Annotation of OpenXM_contrib/gmp/mpn/x86/diveby3.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl x86 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
2:
3:
4: dnl Copyright (C) 2000 Free Software Foundation, Inc.
5: dnl
6: dnl This file is part of the GNU MP Library.
7: dnl
8: dnl The GNU MP Library is free software; you can redistribute it and/or
9: dnl modify it under the terms of the GNU Lesser General Public License as
10: dnl published by the Free Software Foundation; either version 2.1 of the
11: dnl License, or (at your option) any later version.
12: dnl
13: dnl The GNU MP Library is distributed in the hope that it will be useful,
14: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
15: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16: dnl Lesser General Public License for more details.
17: dnl
18: dnl You should have received a copy of the GNU Lesser General Public
19: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
20: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
21: dnl Suite 330, Boston, MA 02111-1307, USA.
22:
23:
24: dnl The following all have their own optimized versions of this routine,
25: dnl but for reference the code here runs as follows.
26: dnl
27: dnl cycles/limb
28: dnl P54 18.0
29: dnl P55 17.0
30: dnl P6 14.5
31: dnl K6 14.0
32: dnl K7 10.0
33:
34:
35: include(`../config.m4')
36:
37:
38: C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
39: C mp_limb_t carry);
40:
41: defframe(PARAM_CARRY,16)
42: defframe(PARAM_SIZE, 12)
43: defframe(PARAM_SRC, 8)
44: defframe(PARAM_DST, 4)
45:
46: dnl multiplicative inverse of 3, modulo 2^32
47: deflit(INVERSE_3, 0xAAAAAAAB)
48:
49: dnl ceil(b/3) and ceil(b*2/3) where b=2^32
50: deflit(ONE_THIRD_CEIL, 0x55555556)
51: deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB)
52:
53: .text
54: ALIGN(8)
55:
56: PROLOGUE(mpn_divexact_by3c)
57: deflit(`FRAME',0)
58:
59: movl PARAM_SRC, %ecx
60: pushl %ebp FRAME_pushl()
61:
62: movl PARAM_SIZE, %ebp
63: pushl %edi FRAME_pushl()
64:
65: movl PARAM_DST, %edi
66: pushl %esi FRAME_pushl()
67:
68: movl $INVERSE_3, %esi
69: pushl %ebx FRAME_pushl()
70:
71: leal (%ecx,%ebp,4), %ecx
72: movl PARAM_CARRY, %ebx
73:
74: leal (%edi,%ebp,4), %edi
75: negl %ebp
76:
77:
78: ALIGN(8)
79: L(top):
80: C eax scratch, low product
81: C ebx carry limb (0 to 3)
82: C ecx &src[size]
83: C edx scratch, high product
84: C esi multiplier
85: C edi &dst[size]
86: C ebp counter, limbs, negative
87:
88: movl (%ecx,%ebp,4), %eax
89:
90: subl %ebx, %eax
91:
92: setc %bl
93:
94: imull %esi
95:
96: cmpl $ONE_THIRD_CEIL, %eax
97: movl %eax, (%edi,%ebp,4)
98:
99: sbbl $-1, %ebx C +1 if eax>=ceil(b/3)
100: cmpl $TWO_THIRDS_CEIL, %eax
101:
102: sbbl $-1, %ebx C +1 if eax>=ceil(b*2/3)
103: incl %ebp
104:
105: jnz L(top)
106:
107:
108: movl %ebx, %eax
109: popl %ebx
110: popl %esi
111: popl %edi
112: popl %ebp
113: ret
114:
115: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>