=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/sparc64/Attic/rshift.asm,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/sparc64/Attic/rshift.asm 2000/09/09 14:12:41 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/sparc64/Attic/rshift.asm 2003/08/25 16:06:26 1.1.1.2 @@ -1,94 +1,146 @@ -! SPARC v9 __gmpn_rshift -- +dnl SPARC v9 mpn_rshift -! Copyright (C) 1996, 2000 Free Software Foundation, Inc. +dnl Copyright 1996, 2000, 2001, 2002 Free Software Foundation, Inc. -! This file is part of the GNU MP Library. +dnl This file is part of the GNU MP Library. -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -! MA 02111-1307, USA. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. - -! INPUT PARAMETERS -! res_ptr %o0 -! src_ptr %o1 -! size %o2 -! cnt %o3 - include(`../config.m4') -ASM_START() - .register %g2,#scratch - .register %g3,#scratch -PROLOGUE(mpn_rshift) - ldx [%o1],%g2 ! load first limb - sub %g0,%o3,%o5 ! negate shift count - add %o2,-1,%o2 - and %o2,4-1,%g4 ! number of limbs in first loop - sllx %g2,%o5,%g1 ! compute function result - brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop - mov %g1,%g5 +C INPUT PARAMETERS +define(`rp',`%i0') +define(`up',`%i1') +define(`n',`%i2') +define(`cnt',`%i3') - sub %o2,%g4,%o2 ! adjust count for main loop +define(`u0',`%l0') +define(`u1',`%l2') +define(`u2',`%l4') +define(`u3',`%l6') -L(loop0): - ldx [%o1+8],%g3 - add %o0,8,%o0 - add %o1,8,%o1 - add %g4,-1,%g4 - srlx %g2,%o3,%o4 - sllx %g3,%o5,%g1 - mov %g3,%g2 - or %o4,%g1,%o4 - brnz,pt %g4,L(loop0) - stx %o4,[%o0-8] +define(`tnc',`%i4') -L(0): brz,pn %o2,L(end) - nop +define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe +define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe -L(loop1): - ldx [%o1+8],%g3 - add %o0,32,%o0 - add %o2,-4,%o2 - srlx %g2,%o3,%o4 - sllx %g3,%o5,%g1 +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_rshift) + save %sp,-160,%sp - ldx [%o1+16],%g2 - srlx %g3,%o3,%g4 - or %o4,%g1,%o4 - stx %o4,[%o0-32] - sllx %g2,%o5,%g1 + sub %g0,cnt,tnc C negate shift count + ldx [up],u3 C load first limb + subcc n,5,n + sllx u3,tnc,%i5 C compute function result + srlx u3,cnt,%g3 + bl,pn %icc,.Lend1234 + fanop - ldx [%o1+24],%g3 - srlx %g2,%o3,%o4 - or %g4,%g1,%g4 - stx %g4,[%o0-24] - sllx %g3,%o5,%g1 + subcc n,4,n + ldx [up+8],u0 + ldx [up+16],u1 + add up,32,up + ldx [up-8],u2 + ldx [up+0],u3 + sllx u0,tnc,%g2 - ldx [%o1+32],%g2 - srlx %g3,%o3,%g4 - or %o4,%g1,%o4 - stx %o4,[%o0-16] - sllx %g2,%o5,%g1 + bl,pn %icc,.Lend5678 + fanop - add %o1,32,%o1 - or %g4,%g1,%g4 - brnz %o2,L(loop1) - stx %g4,[%o0-8] + b,a .Loop + .align 16 +.Loop: + srlx u0,cnt,%g1 + or %g3,%g2,%g3 + ldx [up+8],u0 + fanop +C -- + sllx u1,tnc,%g2 + subcc n,4,n + stx %g3,[rp+0] + fanop +C -- + srlx u1,cnt,%g3 + or %g1,%g2,%g1 + ldx [up+16],u1 + fanop +C -- + sllx u2,tnc,%g2 + stx %g1,[rp+8] + add up,32,up + fanop +C -- + srlx u2,cnt,%g1 + or %g3,%g2,%g3 + ldx [up-8],u2 + fanop +C -- + sllx u3,tnc,%g2 + stx %g3,[rp+16] + add rp,32,rp + fanop +C -- + srlx u3,cnt,%g3 + or %g1,%g2,%g1 + ldx [up+0],u3 + fanop +C -- + sllx u0,tnc,%g2 + stx %g1,[rp-8] + bge,pt %icc,.Loop + fanop +C -- +.Lend5678: + srlx u0,cnt,%g1 + or %g3,%g2,%g3 + sllx u1,tnc,%g2 + stx %g3,[rp+0] + srlx u1,cnt,%g3 + or %g1,%g2,%g1 + sllx u2,tnc,%g2 + stx %g1,[rp+8] + srlx u2,cnt,%g1 + or %g3,%g2,%g3 + sllx u3,tnc,%g2 + stx %g3,[rp+16] + add rp,32,rp + srlx u3,cnt,%g3 C carry... + or %g1,%g2,%g1 + stx %g1,[rp-8] -L(end): srlx %g2,%o3,%g2 - stx %g2,[%o0-0] - retl - mov %g5,%o0 +.Lend1234: + addcc n,4,n + bz,pn %icc,.Lret + fanop +.Loop0: + add rp,8,rp + subcc n,1,n + ldx [up+8],u3 + add up,8,up + sllx u3,tnc,%g2 + or %g3,%g2,%g3 + stx %g3,[rp-8] + srlx u3,cnt,%g3 + bnz,pt %icc,.Loop0 + fanop +.Lret: + stx %g3,[rp+0] + mov %i5,%i0 + ret + restore EPILOGUE(mpn_rshift)