version 1.1, 2000/09/09 14:12:42 |
version 1.1.1.2, 2003/08/25 16:06:29 |
|
|
dnl AMD K7 mpn_lshift -- mpn left shift. |
dnl AMD K7 mpn_lshift -- mpn left shift. |
dnl |
|
dnl K7: 1.21 cycles/limb (at 16 limbs/loop). |
|
|
|
|
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
|
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
dnl |
dnl |
Line 22 dnl License along with the GNU MP Library; see the fi |
|
Line 19 dnl License along with the GNU MP Library; see the fi |
|
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - |
dnl Suite 330, Boston, MA 02111-1307, USA. |
dnl Suite 330, Boston, MA 02111-1307, USA. |
|
|
|
|
include(`../config.m4') |
include(`../config.m4') |
|
|
|
|
|
C K7: 1.21 cycles/limb (at 16 limbs/loop). |
|
|
|
|
|
|
dnl K7: UNROLL_COUNT cycles/limb |
dnl K7: UNROLL_COUNT cycles/limb |
dnl 4 1.51 |
dnl 4 1.51 |
dnl 8 1.26 |
dnl 8 1.26 |
Line 61 defframe(SAVE_ESI, -8) |
|
Line 61 defframe(SAVE_ESI, -8) |
|
defframe(SAVE_EBX, -12) |
defframe(SAVE_EBX, -12) |
deflit(SAVE_SIZE, 12) |
deflit(SAVE_SIZE, 12) |
|
|
.text |
TEXT |
ALIGN(32) |
ALIGN(32) |
|
|
PROLOGUE(mpn_lshift) |
PROLOGUE(mpn_lshift) |
|
|
|
|
ifdef(`PIC',` |
ifdef(`PIC',` |
L(pic_calc): |
L(pic_calc): |
C See README.family about old gas bugs |
C See mpn/x86/README about old gas bugs |
leal (%eax,%eax,4), %esi |
leal (%eax,%eax,4), %esi |
addl $L(entry)-L(here), %esi |
addl $L(entry)-L(here), %esi |
addl (%esp), %esi |
addl (%esp), %esi |
Line 313 forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, ` |
|
Line 313 forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, ` |
|
deflit(`disp0', eval(-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) |
deflit(`disp0', eval(-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) |
deflit(`disp1', eval(disp0 - 8)) |
deflit(`disp1', eval(disp0 - 8)) |
|
|
movq disp0(%edx), %mm0 |
Zdisp( movq, disp0,(%edx), %mm0) |
psllq %mm6, %mm2 |
psllq %mm6, %mm2 |
|
|
movq %mm0, %mm1 |
movq %mm0, %mm1 |
psrlq %mm7, %mm0 |
psrlq %mm7, %mm0 |
|
|
por %mm2, %mm0 |
por %mm2, %mm0 |
movq %mm0, disp0(%edi) |
Zdisp( movq, %mm0, disp0,(%edi)) |
|
|
|
|
movq disp1(%edx), %mm0 |
Zdisp( movq, disp1,(%edx), %mm0) |
psllq %mm6, %mm1 |
psllq %mm6, %mm1 |
|
|
movq %mm0, %mm2 |
movq %mm0, %mm2 |
psrlq %mm7, %mm0 |
psrlq %mm7, %mm0 |
|
|
por %mm1, %mm0 |
por %mm1, %mm0 |
movq %mm0, disp1(%edi) |
Zdisp( movq, %mm0, disp1,(%edi)) |
') |
') |
|
|
subl $UNROLL_BYTES, %edx |
subl $UNROLL_BYTES, %edx |