=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/sparc64/Attic/submul_1.asm,v retrieving revision 1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/sparc64/Attic/submul_1.asm 2000/09/09 14:12:41 1.1 +++ OpenXM_contrib/gmp/mpn/sparc64/Attic/submul_1.asm 2003/08/25 16:06:26 1.1.1.2 @@ -1,7 +1,7 @@ -dnl SPARC 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and +dnl SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and dnl subtract the result from a second limb vector. -dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. +dnl Copyright 2001, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -23,92 +23,33 @@ dnl MA 02111-1307, USA. include(`../config.m4') C INPUT PARAMETERS -C res_ptr i0 -C s1_ptr i1 -C size i2 -C s2_limb i3 +C rp i0 +C up i1 +C n i2 +C v i3 ASM_START() - .register %g2,#scratch - .register %g3,#scratch + REGISTER(%g2,#scratch) PROLOGUE(mpn_submul_1) - save %sp,-256,%sp + save %sp,-176,%sp -C We store 0.0 in f10 and keep it invariant accross thw two -C function calls below. Note that this is not ABI conformant, -C but since the functions are local, that's acceptable. -ifdef(`PIC', -`L(pc): rd %pc,%o7 - ld [%o7+L(noll)-L(pc)],%f10', -` sethi %hh(L(noll)),%g2 - sethi %lm(L(noll)),%g1 - or %g2,%hm(L(noll)),%g2 - or %g1,%lo(L(noll)),%g1 - sllx %g2,32,%g2 - ld [%g1+%g2],%f10') - - sub %i1,%i0,%g1 - srlx %g1,3,%g1 - cmp %g1,%i2 - bcc,pt %xcc,L(nooverlap) - nop - - sllx %i2,3,%g2 C compute stack allocation byte count - add %g2,15,%o0 - and %o0,-16,%o0 - sub %sp,%o0,%sp - add %sp,2223,%o0 - - mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp - call mpn_copyi - mov %i2,%o2 C copy n to mpn_copyi's count parameter - - add %sp,2223,%i1 - -L(nooverlap): -C First multiply-add with low 32 bits of s2_limb - mov %i0,%o0 - mov %i1,%o1 - add %i2,%i2,%o2 - call submull - srl %i3,0,%o3 - - mov %o0,%l0 C keep carry-out from accmull - -C Now multiply-add with high 32 bits of s2_limb, unless it is zero. - srlx %i3,32,%o3 - brz,a,pn %o3,L(small) - mov %o0,%i0 - mov %i1,%o1 - add %i2,%i2,%o2 - call submulu - add %i0,4,%o0 - - add %l0,%o0,%i0 -L(small): + sllx %i2, 3, %g2 + or %g0, %i1, %o1 + add %g2, 15, %o0 + or %g0, %i2, %o2 + and %o0, -16, %o0 + sub %sp, %o0, %sp + add %sp, 2223, %o0 + or %g0, %o0, %l0 + call mpn_mul_1 + or %g0, %i3, %o3 + or %g0, %o0, %l1 C preserve carry value from mpn_mul_1 + or %g0, %i0, %o0 + or %g0, %i0, %o1 + or %g0, %l0, %o2 + call mpn_sub_n + or %g0, %i2, %o3 ret - restore %g0,%g0,%g0 + restore %l1, %o0, %o0 C sum carry values EPILOGUE(mpn_submul_1) - -C Put a zero in the text segment to allow us to t the address -C quickly when compiling for PIC - TEXT - ALIGN(4) -L(noll): - .word 0 - -define(`LO',`(+4)') -define(`HI',`(-4)') - -define(`DLO',`(+4)') -define(`DHI',`(-4)') -define(`LOWPART') -define(`E',`L(l.$1)') -include_mpn(`sparc64/submul1h.asm') - -define(`DLO',`(-4)') -define(`DHI',`(+4)') -undefine(`LOWPART') -define(`E',`L(u.$1)') -include_mpn(`sparc64/submul1h.asm')