Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/popcount.asm, Revision 1.1
1.1 ! ohara 1: dnl Intel P5 mpn_popcount -- mpn bit population count.
! 2:
! 3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
! 4: dnl
! 5: dnl This file is part of the GNU MP Library.
! 6: dnl
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or
! 8: dnl modify it under the terms of the GNU Lesser General Public License as
! 9: dnl published by the Free Software Foundation; either version 2.1 of the
! 10: dnl License, or (at your option) any later version.
! 11: dnl
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 15: dnl Lesser General Public License for more details.
! 16: dnl
! 17: dnl You should have received a copy of the GNU Lesser General Public
! 18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 20: dnl Suite 330, Boston, MA 02111-1307, USA.
! 21:
! 22: include(`../config.m4')
! 23:
! 24:
! 25: C P5: 8.0 cycles/limb
! 26:
! 27:
! 28: C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
! 29: C
! 30: C An arithmetic approach has been found to be slower than the table lookup,
! 31: C due to needing too many instructions.
! 32:
! 33: C The slightly strange quoting here helps the renaming done by tune/many.pl.
! 34: deflit(TABLE_NAME,
! 35: m4_assert_defined(`GSYM_PREFIX')
! 36: GSYM_PREFIX`'mpn_popcount``'_table')
! 37:
! 38: RODATA
! 39: ALIGN(8)
! 40: GLOBL TABLE_NAME
! 41: TABLE_NAME:
! 42: forloop(i,0,255,
! 43: ` .byte m4_popcount(i)
! 44: ')
! 45:
! 46: defframe(PARAM_SIZE,8)
! 47: defframe(PARAM_SRC, 4)
! 48:
! 49: TEXT
! 50: ALIGN(8)
! 51:
! 52: PROLOGUE(mpn_popcount)
! 53: deflit(`FRAME',0)
! 54:
! 55: movl PARAM_SIZE, %ecx
! 56: pushl %esi FRAME_pushl()
! 57:
! 58: ifdef(`PIC',`
! 59: pushl %ebx FRAME_pushl()
! 60: pushl %ebp FRAME_pushl()
! 61:
! 62: call L(here)
! 63: L(here):
! 64: popl %ebp
! 65: shll %ecx C size in byte pairs
! 66:
! 67: addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
! 68: movl PARAM_SRC, %esi
! 69:
! 70: xorl %eax, %eax C total
! 71: xorl %ebx, %ebx C byte
! 72:
! 73: movl TABLE_NAME@GOT(%ebp), %ebp
! 74: xorl %edx, %edx C byte
! 75: define(TABLE,`(%ebp,$1)')
! 76: ',`
! 77: dnl non-PIC
! 78: shll %ecx C size in byte pairs
! 79: movl PARAM_SRC, %esi
! 80:
! 81: pushl %ebx FRAME_pushl()
! 82: xorl %eax, %eax C total
! 83:
! 84: xorl %ebx, %ebx C byte
! 85: xorl %edx, %edx C byte
! 86:
! 87: define(TABLE,`TABLE_NAME`'($1)')
! 88: ')
! 89:
! 90:
! 91: ALIGN(8) C necessary on P55 for claimed speed
! 92: L(top):
! 93: C eax total
! 94: C ebx byte
! 95: C ecx counter, 2*size to 2
! 96: C edx byte
! 97: C esi src
! 98: C edi
! 99: C ebp [PIC] table
! 100:
! 101: addl %ebx, %eax
! 102: movb -1(%esi,%ecx,2), %bl
! 103:
! 104: addl %edx, %eax
! 105: movb -2(%esi,%ecx,2), %dl
! 106:
! 107: movb TABLE(%ebx), %bl
! 108: decl %ecx
! 109:
! 110: movb TABLE(%edx), %dl
! 111: jnz L(top)
! 112:
! 113:
! 114: ifdef(`PIC',`
! 115: popl %ebp
! 116: ')
! 117: addl %ebx, %eax
! 118: popl %ebx
! 119:
! 120: addl %edx, %eax
! 121: popl %esi
! 122:
! 123: ret
! 124:
! 125: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>