version 1.1.1.1, 2000/09/09 14:12:41 |
version 1.1.1.2, 2003/08/25 16:06:26 |
|
|
dnl SPARC 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and |
dnl SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and |
dnl subtract the result from a second limb vector. |
dnl subtract the result from a second limb vector. |
|
|
dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. |
dnl Copyright 2001, 2002 Free Software Foundation, Inc. |
|
|
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
|
|
Line 23 dnl MA 02111-1307, USA. |
|
Line 23 dnl MA 02111-1307, USA. |
|
include(`../config.m4') |
include(`../config.m4') |
|
|
C INPUT PARAMETERS |
C INPUT PARAMETERS |
C res_ptr i0 |
C rp i0 |
C s1_ptr i1 |
C up i1 |
C size i2 |
C n i2 |
C s2_limb i3 |
C v i3 |
|
|
ASM_START() |
ASM_START() |
.register %g2,#scratch |
REGISTER(%g2,#scratch) |
.register %g3,#scratch |
|
|
|
PROLOGUE(mpn_submul_1) |
PROLOGUE(mpn_submul_1) |
save %sp,-256,%sp |
save %sp,-176,%sp |
|
|
C We store 0.0 in f10 and keep it invariant accross thw two |
sllx %i2, 3, %g2 |
C function calls below. Note that this is not ABI conformant, |
or %g0, %i1, %o1 |
C but since the functions are local, that's acceptable. |
add %g2, 15, %o0 |
ifdef(`PIC', |
or %g0, %i2, %o2 |
`L(pc): rd %pc,%o7 |
and %o0, -16, %o0 |
ld [%o7+L(noll)-L(pc)],%f10', |
sub %sp, %o0, %sp |
` sethi %hh(L(noll)),%g2 |
add %sp, 2223, %o0 |
sethi %lm(L(noll)),%g1 |
or %g0, %o0, %l0 |
or %g2,%hm(L(noll)),%g2 |
call mpn_mul_1 |
or %g1,%lo(L(noll)),%g1 |
or %g0, %i3, %o3 |
sllx %g2,32,%g2 |
or %g0, %o0, %l1 C preserve carry value from mpn_mul_1 |
ld [%g1+%g2],%f10') |
or %g0, %i0, %o0 |
|
or %g0, %i0, %o1 |
sub %i1,%i0,%g1 |
or %g0, %l0, %o2 |
srlx %g1,3,%g1 |
call mpn_sub_n |
cmp %g1,%i2 |
or %g0, %i2, %o3 |
bcc,pt %xcc,L(nooverlap) |
|
nop |
|
|
|
sllx %i2,3,%g2 C compute stack allocation byte count |
|
add %g2,15,%o0 |
|
and %o0,-16,%o0 |
|
sub %sp,%o0,%sp |
|
add %sp,2223,%o0 |
|
|
|
mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp |
|
call mpn_copyi |
|
mov %i2,%o2 C copy n to mpn_copyi's count parameter |
|
|
|
add %sp,2223,%i1 |
|
|
|
L(nooverlap): |
|
C First multiply-add with low 32 bits of s2_limb |
|
mov %i0,%o0 |
|
mov %i1,%o1 |
|
add %i2,%i2,%o2 |
|
call submull |
|
srl %i3,0,%o3 |
|
|
|
mov %o0,%l0 C keep carry-out from accmull |
|
|
|
C Now multiply-add with high 32 bits of s2_limb, unless it is zero. |
|
srlx %i3,32,%o3 |
|
brz,a,pn %o3,L(small) |
|
mov %o0,%i0 |
|
mov %i1,%o1 |
|
add %i2,%i2,%o2 |
|
call submulu |
|
add %i0,4,%o0 |
|
|
|
add %l0,%o0,%i0 |
|
L(small): |
|
ret |
ret |
restore %g0,%g0,%g0 |
restore %l1, %o0, %o0 C sum carry values |
EPILOGUE(mpn_submul_1) |
EPILOGUE(mpn_submul_1) |
|
|
C Put a zero in the text segment to allow us to t the address |
|
C quickly when compiling for PIC |
|
TEXT |
|
ALIGN(4) |
|
L(noll): |
|
.word 0 |
|
|
|
define(`LO',`(+4)') |
|
define(`HI',`(-4)') |
|
|
|
define(`DLO',`(+4)') |
|
define(`DHI',`(-4)') |
|
define(`LOWPART') |
|
define(`E',`L(l.$1)') |
|
include_mpn(`sparc64/submul1h.asm') |
|
|
|
define(`DLO',`(-4)') |
|
define(`DHI',`(+4)') |
|
undefine(`LOWPART') |
|
define(`E',`L(u.$1)') |
|
include_mpn(`sparc64/submul1h.asm') |
|