Annotation of OpenXM_contrib/gmp/mpn/sparc64/sub_n.asm, Revision 1.1
1.1 ! maekawa 1: ! SPARC v9 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! 2: ! store difference in a third limb vector.
! 3:
! 4: ! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
! 5:
! 6: ! This file is part of the GNU MP Library.
! 7:
! 8: ! The GNU MP Library is free software; you can redistribute it and/or modify
! 9: ! it under the terms of the GNU Lesser General Public License as published by
! 10: ! the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: ! option) any later version.
! 12:
! 13: ! The GNU MP Library is distributed in the hope that it will be useful, but
! 14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: ! License for more details.
! 17:
! 18: ! You should have received a copy of the GNU Lesser General Public License
! 19: ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: ! MA 02111-1307, USA.
! 22:
! 23:
! 24: ! INPUT PARAMETERS
! 25: ! res_ptr %o0
! 26: ! s1_ptr %o1
! 27: ! s2_ptr %o2
! 28: ! size %o3
! 29:
! 30: include(`../config.m4')
! 31:
! 32: ASM_START()
! 33: .register %g2,#scratch
! 34: .register %g3,#scratch
! 35: PROLOGUE(mpn_sub_n)
! 36:
! 37: ! 12 mem ops >= 12 cycles
! 38: ! 8 shift insn >= 8 cycles
! 39: ! 8 addccc, executing alone, +8 cycles
! 40: ! Unrolling not mandatory...perhaps 2-way is best?
! 41: ! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
! 42: ! All in all, it runs at 5 cycles/limb
! 43:
! 44: save %sp,-160,%sp
! 45:
! 46: addcc %g0,%g0,%g0
! 47:
! 48: add %i3,-4,%i3
! 49: brlz,pn %i3,L(there)
! 50: nop
! 51:
! 52: ldx [%i1+0],%l0
! 53: ldx [%i2+0],%l4
! 54: ldx [%i1+8],%l1
! 55: ldx [%i2+8],%l5
! 56: ldx [%i1+16],%l2
! 57: ldx [%i2+16],%l6
! 58: ldx [%i1+24],%l3
! 59: ldx [%i2+24],%l7
! 60: add %i1,32,%i1
! 61: add %i2,32,%i2
! 62:
! 63: add %i3,-4,%i3
! 64: brlz,pn %i3,L(skip)
! 65: nop
! 66: b L(loop1) ! jump instead of executing many NOPs
! 67: nop
! 68: ALIGN(32)
! 69: !--------- Start main loop ---------
! 70: L(loop1):
! 71: subccc %l0,%l4,%g1
! 72: !-
! 73: srlx %l0,32,%o0
! 74: ldx [%i1+0],%l0
! 75: !-
! 76: srlx %l4,32,%o4
! 77: ldx [%i2+0],%l4
! 78: !-
! 79: subccc %o0,%o4,%g0
! 80: !-
! 81: subccc %l1,%l5,%g2
! 82: !-
! 83: srlx %l1,32,%o1
! 84: ldx [%i1+8],%l1
! 85: !-
! 86: srlx %l5,32,%o5
! 87: ldx [%i2+8],%l5
! 88: !-
! 89: subccc %o1,%o5,%g0
! 90: !-
! 91: subccc %l2,%l6,%g3
! 92: !-
! 93: srlx %l2,32,%o2
! 94: ldx [%i1+16],%l2
! 95: !-
! 96: srlx %l6,32,%g5 ! asymmetry
! 97: ldx [%i2+16],%l6
! 98: !-
! 99: subccc %o2,%g5,%g0
! 100: !-
! 101: subccc %l3,%l7,%g4
! 102: !-
! 103: srlx %l3,32,%o3
! 104: ldx [%i1+24],%l3
! 105: add %i1,32,%i1
! 106: !-
! 107: srlx %l7,32,%o7
! 108: ldx [%i2+24],%l7
! 109: add %i2,32,%i2
! 110: !-
! 111: subccc %o3,%o7,%g0
! 112: !-
! 113: stx %g1,[%i0+0]
! 114: !-
! 115: stx %g2,[%i0+8]
! 116: !-
! 117: stx %g3,[%i0+16]
! 118: add %i3,-4,%i3
! 119: !-
! 120: stx %g4,[%i0+24]
! 121: add %i0,32,%i0
! 122:
! 123: brgez,pt %i3,L(loop1)
! 124: nop
! 125: !--------- End main loop ---------
! 126: L(skip):
! 127: subccc %l0,%l4,%g1
! 128: srlx %l0,32,%o0
! 129: srlx %l4,32,%o4
! 130: subccc %o0,%o4,%g0
! 131: subccc %l1,%l5,%g2
! 132: srlx %l1,32,%o1
! 133: srlx %l5,32,%o5
! 134: subccc %o1,%o5,%g0
! 135: subccc %l2,%l6,%g3
! 136: srlx %l2,32,%o2
! 137: srlx %l6,32,%g5 ! asymmetry
! 138: subccc %o2,%g5,%g0
! 139: subccc %l3,%l7,%g4
! 140: srlx %l3,32,%o3
! 141: srlx %l7,32,%o7
! 142: subccc %o3,%o7,%g0
! 143: stx %g1,[%i0+0]
! 144: stx %g2,[%i0+8]
! 145: stx %g3,[%i0+16]
! 146: stx %g4,[%i0+24]
! 147: add %i0,32,%i0
! 148:
! 149: L(there):
! 150: add %i3,4,%i3
! 151: brz,pt %i3,L(end)
! 152: nop
! 153:
! 154: L(loop2):
! 155: ldx [%i1+0],%l0
! 156: add %i1,8,%i1
! 157: ldx [%i2+0],%l4
! 158: add %i2,8,%i2
! 159: srlx %l0,32,%g2
! 160: srlx %l4,32,%g3
! 161: subccc %l0,%l4,%g1
! 162: subccc %g2,%g3,%g0
! 163: stx %g1,[%i0+0]
! 164: add %i0,8,%i0
! 165: add %i3,-1,%i3
! 166: brgz,pt %i3,L(loop2)
! 167: nop
! 168:
! 169: L(end): addc %g0,%g0,%i0
! 170: ret
! 171: restore
! 172: EPILOGUE(mpn_sub_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>