Annotation of OpenXM_contrib/gmp/mpn/sparc64/sub_n.asm, Revision 1.1.1.2
1.1.1.2 ! ohara 1: dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! 2: dnl store difference in a third limb vector.
1.1 maekawa 3:
1.1.1.2 ! ohara 4: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 5:
1.1.1.2 ! ohara 6: dnl This file is part of the GNU MP Library.
1.1 maekawa 7:
1.1.1.2 ! ohara 8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 9: dnl it under the terms of the GNU Lesser General Public License as published
! 10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 11: dnl your option) any later version.
! 12:
! 13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: dnl License for more details.
! 17:
! 18: dnl You should have received a copy of the GNU Lesser General Public License
! 19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: dnl MA 02111-1307, USA.
1.1 maekawa 22:
23: include(`../config.m4')
24:
1.1.1.2 ! ohara 25: C Compute carry-out from the most significant bits of u,v, and r, where
! 26: C r=u-v-carry_in, using logic operations.
1.1 maekawa 27:
1.1.1.2 ! ohara 28: C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
! 29: C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
! 30: C Therefore, it seems futile to try to optimize this any further...
! 31:
! 32: C INPUT PARAMETERS
! 33: define(`rp',`%i0')
! 34: define(`up',`%i1')
! 35: define(`vp',`%i2')
! 36: define(`n',`%i3')
! 37:
! 38: define(`u0',`%l0')
! 39: define(`u1',`%l2')
! 40: define(`u2',`%l4')
! 41: define(`u3',`%l6')
! 42: define(`v0',`%l1')
! 43: define(`v1',`%l3')
! 44: define(`v2',`%l5')
! 45: define(`v3',`%l7')
1.1 maekawa 46:
1.1.1.2 ! ohara 47: define(`cy',`%i4')
! 48:
! 49: define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
! 50: define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
1.1 maekawa 51:
1.1.1.2 ! ohara 52: ASM_START()
! 53: REGISTER(%g2,#scratch)
! 54: REGISTER(%g3,#scratch)
! 55: PROLOGUE(mpn_sub_n)
! 56: save %sp,-160,%sp
1.1 maekawa 57:
1.1.1.2 ! ohara 58: fitod %f0,%f0 C make sure f0 contains small, quiet number
! 59: subcc n,4,%g0
! 60: bl,pn %icc,.Loop0
! 61: mov 0,cy
! 62:
! 63: ldx [up+0],u0
! 64: ldx [vp+0],v0
! 65: add up,32,up
! 66: ldx [up-24],u1
! 67: ldx [vp+8],v1
! 68: add vp,32,vp
! 69: ldx [up-16],u2
! 70: ldx [vp-16],v2
! 71: ldx [up-8],u3
! 72: ldx [vp-8],v3
! 73: subcc n,8,n
! 74: sub u0,v0,%g1 C main sub
! 75: sub %g1,cy,%g4 C carry sub
! 76: orn u0,v0,%g2
! 77: bl,pn %icc,.Lend4567
! 78: fanop
! 79: b,a .Loop
! 80:
! 81: .align 16
! 82: C START MAIN LOOP
! 83: .Loop: orn %g4,%g2,%g2
! 84: andn u0,v0,%g3
! 85: ldx [up+0],u0
! 86: fanop
! 87: C --
! 88: andn %g2,%g3,%g2
! 89: ldx [vp+0],v0
! 90: add up,32,up
! 91: fanop
! 92: C --
! 93: srlx %g2,63,cy
! 94: sub u1,v1,%g1
! 95: stx %g4,[rp+0]
! 96: fanop
! 97: C --
! 98: sub %g1,cy,%g4
! 99: orn u1,v1,%g2
! 100: fmnop
! 101: fanop
! 102: C --
! 103: orn %g4,%g2,%g2
! 104: andn u1,v1,%g3
! 105: ldx [up-24],u1
! 106: fanop
! 107: C --
! 108: andn %g2,%g3,%g2
! 109: ldx [vp+8],v1
! 110: add vp,32,vp
! 111: fanop
! 112: C --
! 113: srlx %g2,63,cy
! 114: sub u2,v2,%g1
! 115: stx %g4,[rp+8]
! 116: fanop
! 117: C --
! 118: sub %g1,cy,%g4
! 119: orn u2,v2,%g2
! 120: fmnop
! 121: fanop
! 122: C --
! 123: orn %g4,%g2,%g2
! 124: andn u2,v2,%g3
! 125: ldx [up-16],u2
! 126: fanop
! 127: C --
! 128: andn %g2,%g3,%g2
! 129: ldx [vp-16],v2
! 130: add rp,32,rp
! 131: fanop
! 132: C --
! 133: srlx %g2,63,cy
! 134: sub u3,v3,%g1
! 135: stx %g4,[rp-16]
! 136: fanop
! 137: C --
! 138: sub %g1,cy,%g4
! 139: orn u3,v3,%g2
! 140: fmnop
! 141: fanop
! 142: C --
! 143: orn %g4,%g2,%g2
! 144: andn u3,v3,%g3
! 145: ldx [up-8],u3
! 146: fanop
! 147: C --
! 148: andn %g2,%g3,%g2
! 149: subcc n,4,n
! 150: ldx [vp-8],v3
! 151: fanop
! 152: C --
! 153: srlx %g2,63,cy
! 154: sub u0,v0,%g1
! 155: stx %g4,[rp-8]
! 156: fanop
! 157: C --
! 158: sub %g1,cy,%g4
! 159: orn u0,v0,%g2
! 160: bge,pt %icc,.Loop
! 161: fanop
! 162: C END MAIN LOOP
! 163: .Lend4567:
! 164: orn %g4,%g2,%g2
! 165: andn u0,v0,%g3
! 166: andn %g2,%g3,%g2
! 167: srlx %g2,63,cy
! 168: sub u1,v1,%g1
! 169: stx %g4,[rp+0]
! 170: sub %g1,cy,%g4
! 171: orn u1,v1,%g2
! 172: orn %g4,%g2,%g2
! 173: andn u1,v1,%g3
! 174: andn %g2,%g3,%g2
! 175: srlx %g2,63,cy
! 176: sub u2,v2,%g1
! 177: stx %g4,[rp+8]
! 178: sub %g1,cy,%g4
! 179: orn u2,v2,%g2
! 180: orn %g4,%g2,%g2
! 181: andn u2,v2,%g3
! 182: andn %g2,%g3,%g2
! 183: add rp,32,rp
! 184: srlx %g2,63,cy
! 185: sub u3,v3,%g1
! 186: stx %g4,[rp-16]
! 187: sub %g1,cy,%g4
! 188: orn u3,v3,%g2
! 189: orn %g4,%g2,%g2
! 190: andn u3,v3,%g3
! 191: andn %g2,%g3,%g2
! 192: srlx %g2,63,cy
! 193: stx %g4,[rp-8]
! 194:
! 195: addcc n,4,n
! 196: bz,pn %icc,.Lret
! 197: fanop
! 198:
! 199: .Loop0: ldx [up],u0
! 200: add up,8,up
! 201: ldx [vp],v0
! 202: add vp,8,vp
! 203: add rp,8,rp
! 204: subcc n,1,n
! 205: sub u0,v0,%g1
! 206: orn u0,v0,%g2
! 207: sub %g1,cy,%g4
! 208: andn u0,v0,%g3
! 209: orn %g4,%g2,%g2
! 210: stx %g4,[rp-8]
! 211: andn %g2,%g3,%g2
! 212: bnz,pt %icc,.Loop0
! 213: srlx %g2,63,cy
1.1 maekawa 214:
1.1.1.2 ! ohara 215: .Lret: mov cy,%i0
1.1 maekawa 216: ret
217: restore
218: EPILOGUE(mpn_sub_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>