Annotation of OpenXM_contrib/gmp/mpn/x86/k7/diveby3.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl AMD K7 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
2:
1.1.1.2 ! ohara 3: dnl Copyright 2000, 2002 Free Software Foundation, Inc.
1.1 maekawa 4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
1.1.1.2 ! ohara 25: C K7: 8.0 cycles/limb
! 26:
! 27:
1.1 maekawa 28: C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
29: C mp_limb_t carry);
30:
31: defframe(PARAM_CARRY,16)
32: defframe(PARAM_SIZE, 12)
33: defframe(PARAM_SRC, 8)
34: defframe(PARAM_DST, 4)
35:
36: dnl multiplicative inverse of 3, modulo 2^32
37: deflit(INVERSE_3, 0xAAAAAAAB)
38:
39: dnl ceil(b/3) and floor(b*2/3) where b=2^32
40: deflit(ONE_THIRD_CEIL, 0x55555556)
41: deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA)
42:
1.1.1.2 ! ohara 43: TEXT
1.1 maekawa 44: ALIGN(32)
45:
46: PROLOGUE(mpn_divexact_by3c)
47: deflit(`FRAME',0)
48:
49: movl PARAM_SRC, %ecx
50: pushl %ebx defframe_pushl(SAVE_EBX)
51:
52: movl PARAM_CARRY, %ebx
53: pushl %ebp defframe_pushl(SAVE_EBP)
54:
55: movl PARAM_SIZE, %ebp
56: pushl %edi defframe_pushl(SAVE_EDI)
57:
58: movl (%ecx), %eax C src low limb
59: pushl %esi defframe_pushl(SAVE_ESI)
60:
61: movl PARAM_DST, %edi
62: movl $TWO_THIRDS_FLOOR, %esi
63: leal -4(%ecx,%ebp,4), %ecx C &src[size-1]
64:
65: subl %ebx, %eax
66:
67: setc %bl
68: decl %ebp
69: jz L(last)
70:
71: leal (%edi,%ebp,4), %edi C &dst[size-1]
72: negl %ebp
73:
74:
75: ALIGN(16)
76: L(top):
77: C eax src limb, carry subtracted
78: C ebx carry limb (0 or 1)
79: C ecx &src[size-1]
80: C edx scratch
81: C esi TWO_THIRDS_FLOOR
82: C edi &dst[size-1]
83: C ebp counter, limbs, negative
84:
85: imull $INVERSE_3, %eax, %edx
86:
87: movl 4(%ecx,%ebp,4), %eax C next src limb
88: cmpl $ONE_THIRD_CEIL, %edx
89:
90: sbbl $-1, %ebx C +1 if result>=ceil(b/3)
91: cmpl %edx, %esi
92:
93: sbbl %ebx, %eax C and further 1 if result>=ceil(b*2/3)
94: movl %edx, (%edi,%ebp,4)
95: incl %ebp
96:
97: setc %bl C new carry
98: jnz L(top)
99:
100:
101:
102: L(last):
103: C eax src limb, carry subtracted
104: C ebx carry limb (0 or 1)
105: C ecx &src[size-1]
106: C edx scratch
107: C esi multiplier
108: C edi &dst[size-1]
109: C ebp
110:
1.1.1.2 ! ohara 111: imull $INVERSE_3, %eax, %eax
1.1 maekawa 112:
113: cmpl $ONE_THIRD_CEIL, %eax
114: movl %eax, (%edi)
115: movl SAVE_EBP, %ebp
116:
117: sbbl $-1, %ebx C +1 if eax>=ceil(b/3)
118: cmpl %eax, %esi
119: movl $0, %eax
120:
121: adcl %ebx, %eax C further +1 if eax>=ceil(b*2/3)
122: movl SAVE_EDI, %edi
123: movl SAVE_ESI, %esi
124:
125: movl SAVE_EBX, %ebx
126: addl $FRAME, %esp
127:
128: ret
129:
130: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>