Annotation of OpenXM_contrib/gmp/mpn/sparc64/add_n.asm, Revision 1.1.1.2
1.1.1.2 ! ohara 1: dnl SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
! 2: dnl store sum in a third limb vector.
1.1 maekawa 3:
1.1.1.2 ! ohara 4: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 5:
1.1.1.2 ! ohara 6: dnl This file is part of the GNU MP Library.
1.1 maekawa 7:
1.1.1.2 ! ohara 8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 9: dnl it under the terms of the GNU Lesser General Public License as published
! 10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 11: dnl your option) any later version.
! 12:
! 13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: dnl License for more details.
! 17:
! 18: dnl You should have received a copy of the GNU Lesser General Public License
! 19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: dnl MA 02111-1307, USA.
1.1 maekawa 22:
23: include(`../config.m4')
24:
1.1.1.2 ! ohara 25: C Compute carry-out from the most significant bits of u,v, and r, where
! 26: C r=u+v+carry_in, using logic operations.
1.1 maekawa 27:
1.1.1.2 ! ohara 28: C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
! 29: C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
! 30: C Therefore, it seems futile to try to optimize this any further...
! 31:
! 32: C INPUT PARAMETERS
! 33: define(`rp',`%i0')
! 34: define(`up',`%i1')
! 35: define(`vp',`%i2')
! 36: define(`n',`%i3')
! 37:
! 38: define(`u0',`%l0')
! 39: define(`u1',`%l2')
! 40: define(`u2',`%l4')
! 41: define(`u3',`%l6')
! 42: define(`v0',`%l1')
! 43: define(`v1',`%l3')
! 44: define(`v2',`%l5')
! 45: define(`v3',`%l7')
1.1 maekawa 46:
1.1.1.2 ! ohara 47: define(`cy',`%i4')
! 48:
! 49: define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
! 50: define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
1.1 maekawa 51:
1.1.1.2 ! ohara 52: ASM_START()
! 53: REGISTER(%g2,#scratch)
! 54: REGISTER(%g3,#scratch)
! 55: PROLOGUE(mpn_add_n)
! 56: save %sp,-160,%sp
1.1 maekawa 57:
1.1.1.2 ! ohara 58: fitod %f0,%f0 C make sure f0 contains small, quiet number
! 59: subcc n,4,%g0
! 60: bl,pn %icc,.Loop0
! 61: mov 0,cy
! 62:
! 63: ldx [up+0],u0
! 64: ldx [vp+0],v0
! 65: add up,32,up
! 66: ldx [up-24],u1
! 67: ldx [vp+8],v1
! 68: add vp,32,vp
! 69: ldx [up-16],u2
! 70: ldx [vp-16],v2
! 71: ldx [up-8],u3
! 72: ldx [vp-8],v3
! 73: subcc n,8,n
! 74: add u0,v0,%g1 C main add
! 75: add %g1,cy,%g4 C carry add
! 76: or u0,v0,%g2
! 77: bl,pn %icc,.Lend4567
! 78: fanop
! 79: b,a .Loop
! 80:
! 81: .align 16
! 82: C START MAIN LOOP
! 83: .Loop: andn %g2,%g4,%g2
! 84: and u0,v0,%g3
! 85: ldx [up+0],u0
! 86: fanop
! 87: C --
! 88: or %g3,%g2,%g2
! 89: ldx [vp+0],v0
! 90: add up,32,up
! 91: fanop
! 92: C --
! 93: srlx %g2,63,cy
! 94: add u1,v1,%g1
! 95: stx %g4,[rp+0]
! 96: fanop
! 97: C --
! 98: add %g1,cy,%g4
! 99: or u1,v1,%g2
! 100: fmnop
! 101: fanop
! 102: C --
! 103: andn %g2,%g4,%g2
! 104: and u1,v1,%g3
! 105: ldx [up-24],u1
! 106: fanop
! 107: C --
! 108: or %g3,%g2,%g2
! 109: ldx [vp+8],v1
! 110: add vp,32,vp
! 111: fanop
! 112: C --
! 113: srlx %g2,63,cy
! 114: add u2,v2,%g1
! 115: stx %g4,[rp+8]
! 116: fanop
! 117: C --
! 118: add %g1,cy,%g4
! 119: or u2,v2,%g2
! 120: fmnop
! 121: fanop
! 122: C --
! 123: andn %g2,%g4,%g2
! 124: and u2,v2,%g3
! 125: ldx [up-16],u2
! 126: fanop
! 127: C --
! 128: or %g3,%g2,%g2
! 129: ldx [vp-16],v2
! 130: add rp,32,rp
! 131: fanop
! 132: C --
! 133: srlx %g2,63,cy
! 134: add u3,v3,%g1
! 135: stx %g4,[rp-16]
! 136: fanop
! 137: C --
! 138: add %g1,cy,%g4
! 139: or u3,v3,%g2
! 140: fmnop
! 141: fanop
! 142: C --
! 143: andn %g2,%g4,%g2
! 144: and u3,v3,%g3
! 145: ldx [up-8],u3
! 146: fanop
! 147: C --
! 148: or %g3,%g2,%g2
! 149: subcc n,4,n
! 150: ldx [vp-8],v3
! 151: fanop
! 152: C --
! 153: srlx %g2,63,cy
! 154: add u0,v0,%g1
! 155: stx %g4,[rp-8]
! 156: fanop
! 157: C --
! 158: add %g1,cy,%g4
! 159: or u0,v0,%g2
! 160: bge,pt %icc,.Loop
! 161: fanop
! 162: C END MAIN LOOP
! 163: .Lend4567:
! 164: andn %g2,%g4,%g2
! 165: and u0,v0,%g3
! 166: or %g3,%g2,%g2
! 167: srlx %g2,63,cy
! 168: add u1,v1,%g1
! 169: stx %g4,[rp+0]
! 170: add %g1,cy,%g4
! 171: or u1,v1,%g2
! 172: andn %g2,%g4,%g2
! 173: and u1,v1,%g3
! 174: or %g3,%g2,%g2
! 175: srlx %g2,63,cy
! 176: add u2,v2,%g1
! 177: stx %g4,[rp+8]
! 178: add %g1,cy,%g4
! 179: or u2,v2,%g2
! 180: andn %g2,%g4,%g2
! 181: and u2,v2,%g3
! 182: or %g3,%g2,%g2
! 183: add rp,32,rp
! 184: srlx %g2,63,cy
! 185: add u3,v3,%g1
! 186: stx %g4,[rp-16]
! 187: add %g1,cy,%g4
! 188: or u3,v3,%g2
! 189: andn %g2,%g4,%g2
! 190: and u3,v3,%g3
! 191: or %g3,%g2,%g2
! 192: srlx %g2,63,cy
! 193: stx %g4,[rp-8]
! 194:
! 195: addcc n,4,n
! 196: bz,pn %icc,.Lret
! 197: fanop
! 198:
! 199: .Loop0: ldx [up],u0
! 200: add up,8,up
! 201: ldx [vp],v0
! 202: add vp,8,vp
! 203: add rp,8,rp
! 204: subcc n,1,n
! 205: add u0,v0,%g1
! 206: or u0,v0,%g2
! 207: add %g1,cy,%g4
! 208: and u0,v0,%g3
! 209: andn %g2,%g4,%g2
! 210: stx %g4,[rp-8]
! 211: or %g3,%g2,%g2
! 212: bnz,pt %icc,.Loop0
! 213: srlx %g2,63,cy
1.1 maekawa 214:
1.1.1.2 ! ohara 215: .Lret: mov cy,%i0
1.1 maekawa 216: ret
217: restore
218: EPILOGUE(mpn_add_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>