version 1.1.1.2, 2000/12/01 05:45:02 |
version 1.1.1.3, 2003/08/25 16:06:29 |
|
|
dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming |
dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming |
dnl distance. |
dnl distance. |
dnl |
|
dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb |
|
|
|
|
dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. |
dnl Copyright (C) 2000 Free Software Foundation, Inc. |
|
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
dnl |
dnl |
Line 23 dnl License along with the GNU MP Library; see the fi |
|
Line 20 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. |
dnl Suite 330, Boston, MA 02111-1307, USA. |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on |
C K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb |
dnl FreeBSD 3.3 and 3.4 doesn't recognise it. |
|
|
|
define(psadbw_mm4_mm0, |
|
`ifelse(m4_ifdef_anyof_p(`HAVE_TARGET_CPU_athlon', |
|
`HAVE_TARGET_CPU_pentium3'),1, |
|
`.byte 0x0f,0xf6,0xc4 C psadbw %mm4, %mm0', |
|
|
|
`m4_warning(`warning, using simulated and only partly functional psadbw, use for testing only |
|
') C this works enough for the sum of bytes done below, making it |
|
C possible to test on an older cpu |
|
leal -8(%esp), %esp |
|
movq %mm4, (%esp) |
|
movq %mm0, %mm4 |
|
forloop(i,1,7, |
|
` psrlq $ 8, %mm4 |
|
paddb %mm4, %mm0 |
|
') |
|
pushl $ 0 |
|
pushl $ 0xFF |
|
pand (%esp), %mm0 |
|
movq 8(%esp), %mm4 |
|
leal 16(%esp), %esp |
|
')') |
|
|
|
|
|
C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); |
C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); |
C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); |
C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); |
C |
C |
Line 96 MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) |
|
Line 69 MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) |
|
ifdef(`PIC',,` |
ifdef(`PIC',,` |
dnl non-PIC |
dnl non-PIC |
|
|
DATA |
RODATA |
ALIGN(8) |
ALIGN(8) |
|
|
define(LS, |
L(rodata_AAAAAAAAAAAAAAAA): |
m4_assert_numargs(1) |
|
`LF(M4_function,`$1')') |
|
|
|
LS(rodata_AAAAAAAAAAAAAAAA): |
|
.long 0xAAAAAAAA |
.long 0xAAAAAAAA |
.long 0xAAAAAAAA |
.long 0xAAAAAAAA |
|
|
LS(rodata_3333333333333333): |
L(rodata_3333333333333333): |
.long 0x33333333 |
.long 0x33333333 |
.long 0x33333333 |
.long 0x33333333 |
|
|
LS(rodata_0F0F0F0F0F0F0F0F): |
L(rodata_0F0F0F0F0F0F0F0F): |
.long 0x0F0F0F0F |
.long 0x0F0F0F0F |
.long 0x0F0F0F0F |
.long 0x0F0F0F0F |
') |
') |
|
|
.text |
TEXT |
ALIGN(32) |
ALIGN(32) |
|
|
PROLOGUE(M4_function) |
PROLOGUE(M4_function) |
deflit(`FRAME',0) |
deflit(`FRAME',0) |
|
|
movl PARAM_SIZE, %ecx |
movl PARAM_SIZE, %ecx |
orl %ecx, %ecx |
|
jz L(zero) |
|
|
|
ifdef(`PIC',` |
ifdef(`PIC',` |
movl $0xAAAAAAAA, %eax |
movl $0xAAAAAAAA, %eax |
|
|
punpckldq %mm5, %mm5 |
punpckldq %mm5, %mm5 |
|
|
',` |
',` |
movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7 |
movq L(rodata_AAAAAAAAAAAAAAAA), %mm7 |
movq LS(rodata_3333333333333333), %mm6 |
movq L(rodata_3333333333333333), %mm6 |
movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5 |
movq L(rodata_0F0F0F0F0F0F0F0F), %mm5 |
') |
') |
pxor %mm4, %mm4 |
pxor %mm4, %mm4 |
|
|
Line 166 HAM(` movl PARAM_SRC2, %edx') |
|
Line 133 HAM(` movl PARAM_SRC2, %edx') |
|
|
|
movd (%eax,%ecx,8), %mm1 |
movd (%eax,%ecx,8), %mm1 |
|
|
HAM(` movd 0(%edx,%ecx,8), %mm0 |
HAM(` movd (%edx,%ecx,8), %mm0 |
pxor %mm0, %mm1 |
pxor %mm0, %mm1 |
') |
') |
orl %ecx, %ecx |
orl %ecx, %ecx |
|
|
paddd %mm1, %mm0 C bytes |
paddd %mm1, %mm0 C bytes |
|
|
|
|
psadbw_mm4_mm0 |
psadbw( %mm4, %mm0) |
|
|
paddd %mm0, %mm2 C add to total |
paddd %mm0, %mm2 C add to total |
jnz L(top) |
jnz L(top) |
|
|
|
|
movd %mm2, %eax |
movd %mm2, %eax |
emms |
emms |
ret |
|
|
|
|
|
L(zero): |
|
movl $0, %eax |
|
ret |
ret |
|
|
EPILOGUE() |
EPILOGUE() |