Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/popcount.asm, Revision 1.1.1.1
1.1 ohara 1: dnl Intel P5 mpn_popcount -- mpn bit population count.
2:
3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
25: C P5: 8.0 cycles/limb
26:
27:
28: C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
29: C
30: C An arithmetic approach has been found to be slower than the table lookup,
31: C due to needing too many instructions.
32:
33: C The slightly strange quoting here helps the renaming done by tune/many.pl.
34: deflit(TABLE_NAME,
35: m4_assert_defined(`GSYM_PREFIX')
36: GSYM_PREFIX`'mpn_popcount``'_table')
37:
38: RODATA
39: ALIGN(8)
40: GLOBL TABLE_NAME
41: TABLE_NAME:
42: forloop(i,0,255,
43: ` .byte m4_popcount(i)
44: ')
45:
46: defframe(PARAM_SIZE,8)
47: defframe(PARAM_SRC, 4)
48:
49: TEXT
50: ALIGN(8)
51:
52: PROLOGUE(mpn_popcount)
53: deflit(`FRAME',0)
54:
55: movl PARAM_SIZE, %ecx
56: pushl %esi FRAME_pushl()
57:
58: ifdef(`PIC',`
59: pushl %ebx FRAME_pushl()
60: pushl %ebp FRAME_pushl()
61:
62: call L(here)
63: L(here):
64: popl %ebp
65: shll %ecx C size in byte pairs
66:
67: addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
68: movl PARAM_SRC, %esi
69:
70: xorl %eax, %eax C total
71: xorl %ebx, %ebx C byte
72:
73: movl TABLE_NAME@GOT(%ebp), %ebp
74: xorl %edx, %edx C byte
75: define(TABLE,`(%ebp,$1)')
76: ',`
77: dnl non-PIC
78: shll %ecx C size in byte pairs
79: movl PARAM_SRC, %esi
80:
81: pushl %ebx FRAME_pushl()
82: xorl %eax, %eax C total
83:
84: xorl %ebx, %ebx C byte
85: xorl %edx, %edx C byte
86:
87: define(TABLE,`TABLE_NAME`'($1)')
88: ')
89:
90:
91: ALIGN(8) C necessary on P55 for claimed speed
92: L(top):
93: C eax total
94: C ebx byte
95: C ecx counter, 2*size to 2
96: C edx byte
97: C esi src
98: C edi
99: C ebp [PIC] table
100:
101: addl %ebx, %eax
102: movb -1(%esi,%ecx,2), %bl
103:
104: addl %edx, %eax
105: movb -2(%esi,%ecx,2), %dl
106:
107: movb TABLE(%ebx), %bl
108: decl %ecx
109:
110: movb TABLE(%edx), %dl
111: jnz L(top)
112:
113:
114: ifdef(`PIC',`
115: popl %ebp
116: ')
117: addl %ebx, %eax
118: popl %ebx
119:
120: addl %edx, %eax
121: popl %esi
122:
123: ret
124:
125: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>