Annotation of OpenXM_contrib/gmp/mpn/sparc32/v9/submul_1.asm, Revision 1.1
1.1 ! maekawa 1: dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and
! 2: dnl subtract the result from a second limb vector.
! 3:
! 4: dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
! 5:
! 6: dnl This file is part of the GNU MP Library.
! 7:
! 8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 9: dnl it under the terms of the GNU Lesser General Public License as published
! 10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 11: dnl your option) any later version.
! 12:
! 13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: dnl License for more details.
! 17:
! 18: dnl You should have received a copy of the GNU Lesser General Public License
! 19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: dnl MA 02111-1307, USA.
! 22:
! 23:
! 24: include(`../config.m4')
! 25:
! 26: C INPUT PARAMETERS
! 27: C res_ptr i0
! 28: C s1_ptr i1
! 29: C size i2
! 30: C s2_limb i3
! 31:
! 32: ASM_START()
! 33:
! 34: TEXT
! 35: ALIGN(4)
! 36: L(noll):
! 37: .word 0
! 38:
! 39: PROLOGUE(mpn_submul_1)
! 40: save %sp,-256,%sp
! 41:
! 42: ifdef(`PIC',
! 43: `L(pc): rd %pc,%o7
! 44: ld [%o7+L(noll)-L(pc)],%f10',
! 45: ` sethi %hi(L(noll)),%g1
! 46: ld [%g1+%lo(L(noll))],%f10')
! 47:
! 48: sethi %hi(0xffff0000),%o0
! 49: andn %i3,%o0,%o0
! 50: st %o0,[%fp-16]
! 51: ld [%fp-16],%f11
! 52: fxtod %f10,%f6
! 53:
! 54: srl %i3,16,%o0
! 55: st %o0,[%fp-16]
! 56: ld [%fp-16],%f11
! 57: fxtod %f10,%f8
! 58:
! 59: mov 0,%g3 C cy = 0
! 60:
! 61: ld [%i1],%f11
! 62: subcc %i2,1,%i2
! 63: be,pn %icc,L(end1)
! 64: add %i1,4,%i1 C s1_ptr++
! 65:
! 66: fxtod %f10,%f2
! 67: ld [%i1],%f11
! 68: add %i1,4,%i1 C s1_ptr++
! 69: fmuld %f2,%f8,%f16
! 70: fmuld %f2,%f6,%f4
! 71: fdtox %f16,%f14
! 72: std %f14,[%fp-24]
! 73: fdtox %f4,%f12
! 74: subcc %i2,1,%i2
! 75: be,pn %icc,L(end2)
! 76: std %f12,[%fp-16]
! 77:
! 78: fxtod %f10,%f2
! 79: ld [%i1],%f11
! 80: add %i1,4,%i1 C s1_ptr++
! 81: fmuld %f2,%f8,%f16
! 82: fmuld %f2,%f6,%f4
! 83: fdtox %f16,%f14
! 84: std %f14,[%fp-40]
! 85: fdtox %f4,%f12
! 86: subcc %i2,1,%i2
! 87: be,pn %icc,L(end3)
! 88: std %f12,[%fp-32]
! 89:
! 90: fxtod %f10,%f2
! 91: ld [%i1],%f11
! 92: add %i1,4,%i1 C s1_ptr++
! 93: ld [%i0],%g5
! 94: ldx [%fp-24],%g2 C p16
! 95: fmuld %f2,%f8,%f16
! 96: ldx [%fp-16],%g1 C p0
! 97: fmuld %f2,%f6,%f4
! 98: sllx %g2,16,%g2 C align p16
! 99: fdtox %f16,%f14
! 100: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 101: std %f14,[%fp-24]
! 102: fdtox %f4,%f12
! 103: add %i0,4,%i0 C res_ptr++
! 104: subcc %i2,1,%i2
! 105: be,pn %icc,L(end4)
! 106: std %f12,[%fp-16]
! 107:
! 108: b,a L(loopm)
! 109:
! 110: .align 16
! 111: C BEGIN LOOP
! 112: L(loop):
! 113: fxtod %f10,%f2
! 114: ld [%i1],%f11
! 115: add %i1,4,%i1 C s1_ptr++
! 116: add %g3,%g1,%g4 C p += cy
! 117: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 118: ld [%i0],%g5
! 119: srlx %g4,32,%g3
! 120: ldx [%fp-24],%g2 C p16
! 121: fmuld %f2,%f8,%f16
! 122: ldx [%fp-16],%g1 C p0
! 123: fmuld %f2,%f6,%f4
! 124: sllx %g2,16,%g2 C align p16
! 125: st %l2,[%i0-4]
! 126: addx %g3,0,%g3
! 127: fdtox %f16,%f14
! 128: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 129: std %f14,[%fp-24]
! 130: fdtox %f4,%f12
! 131: std %f12,[%fp-16]
! 132: subcc %i2,1,%i2
! 133: be,pn %icc,L(loope)
! 134: add %i0,4,%i0 C res_ptr++
! 135: L(loopm):
! 136: fxtod %f10,%f2
! 137: ld [%i1],%f11
! 138: add %i1,4,%i1 C s1_ptr++
! 139: add %g3,%g1,%g4 C p += cy
! 140: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 141: ld [%i0],%g5
! 142: srlx %g4,32,%g3
! 143: ldx [%fp-40],%g2 C p16
! 144: fmuld %f2,%f8,%f16
! 145: ldx [%fp-32],%g1 C p0
! 146: fmuld %f2,%f6,%f4
! 147: sllx %g2,16,%g2 C align p16
! 148: st %l2,[%i0-4]
! 149: addx %g3,0,%g3
! 150: fdtox %f16,%f14
! 151: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 152: std %f14,[%fp-40]
! 153: fdtox %f4,%f12
! 154: std %f12,[%fp-32]
! 155: subcc %i2,1,%i2
! 156: bne,pt %icc,L(loop)
! 157: add %i0,4,%i0 C res_ptr++
! 158: C END LOOP
! 159:
! 160: fxtod %f10,%f2
! 161: add %g3,%g1,%g4 C p += cy
! 162: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 163: ld [%i0],%g5
! 164: srlx %g4,32,%g3
! 165: ldx [%fp-24],%g2 C p16
! 166: fmuld %f2,%f8,%f16
! 167: ldx [%fp-16],%g1 C p0
! 168: fmuld %f2,%f6,%f4
! 169: sllx %g2,16,%g2 C align p16
! 170: st %l2,[%i0-4]
! 171: b,a L(xxx)
! 172: L(loope):
! 173: L(end4):
! 174: fxtod %f10,%f2
! 175: add %g3,%g1,%g4 C p += cy
! 176: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 177: ld [%i0],%g5
! 178: srlx %g4,32,%g3
! 179: ldx [%fp-40],%g2 C p16
! 180: fmuld %f2,%f8,%f16
! 181: ldx [%fp-32],%g1 C p0
! 182: fmuld %f2,%f6,%f4
! 183: sllx %g2,16,%g2 C align p16
! 184: st %l2,[%i0-4]
! 185: fdtox %f16,%f14
! 186: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 187: std %f14,[%fp-40]
! 188: fdtox %f4,%f12
! 189: std %f12,[%fp-32]
! 190: add %i0,4,%i0 C res_ptr++
! 191:
! 192: add %g3,%g1,%g4 C p += cy
! 193: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 194: ld [%i0],%g5
! 195: srlx %g4,32,%g3
! 196: ldx [%fp-24],%g2 C p16
! 197: ldx [%fp-16],%g1 C p0
! 198: sllx %g2,16,%g2 C align p16
! 199: st %l2,[%i0-4]
! 200: b,a L(yyy)
! 201:
! 202: L(end3):
! 203: fxtod %f10,%f2
! 204: ld [%i0],%g5
! 205: ldx [%fp-24],%g2 C p16
! 206: fmuld %f2,%f8,%f16
! 207: ldx [%fp-16],%g1 C p0
! 208: fmuld %f2,%f6,%f4
! 209: sllx %g2,16,%g2 C align p16
! 210: L(xxx): fdtox %f16,%f14
! 211: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 212: std %f14,[%fp-24]
! 213: fdtox %f4,%f12
! 214: std %f12,[%fp-16]
! 215: add %i0,4,%i0 C res_ptr++
! 216:
! 217: add %g3,%g1,%g4 C p += cy
! 218: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 219: ld [%i0],%g5
! 220: srlx %g4,32,%g3
! 221: ldx [%fp-40],%g2 C p16
! 222: ldx [%fp-32],%g1 C p0
! 223: sllx %g2,16,%g2 C align p16
! 224: st %l2,[%i0-4]
! 225: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 226: add %i0,4,%i0 C res_ptr++
! 227:
! 228: add %g3,%g1,%g4 C p += cy
! 229: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 230: ld [%i0],%g5
! 231: srlx %g4,32,%g3
! 232: ldx [%fp-24],%g2 C p16
! 233: ldx [%fp-16],%g1 C p0
! 234: sllx %g2,16,%g2 C align p16
! 235: st %l2,[%i0-4]
! 236: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 237: add %i0,4,%i0 C res_ptr++
! 238: b,a L(ret)
! 239:
! 240: L(end2):
! 241: fxtod %f10,%f2
! 242: fmuld %f2,%f8,%f16
! 243: fmuld %f2,%f6,%f4
! 244: fdtox %f16,%f14
! 245: std %f14,[%fp-40]
! 246: fdtox %f4,%f12
! 247: std %f12,[%fp-32]
! 248: ld [%i0],%g5
! 249: ldx [%fp-24],%g2 C p16
! 250: ldx [%fp-16],%g1 C p0
! 251: sllx %g2,16,%g2 C align p16
! 252: L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 253: add %i0,4,%i0 C res_ptr++
! 254:
! 255: add %g3,%g1,%g4 C p += cy
! 256: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 257: ld [%i0],%g5
! 258: srlx %g4,32,%g3
! 259: ldx [%fp-40],%g2 C p16
! 260: ldx [%fp-32],%g1 C p0
! 261: sllx %g2,16,%g2 C align p16
! 262: st %l2,[%i0-4]
! 263: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 264: add %i0,4,%i0 C res_ptr++
! 265: b,a L(ret)
! 266:
! 267: L(end1):
! 268: fxtod %f10,%f2
! 269: fmuld %f2,%f8,%f16
! 270: fmuld %f2,%f6,%f4
! 271: fdtox %f16,%f14
! 272: std %f14,[%fp-24]
! 273: fdtox %f4,%f12
! 274: std %f12,[%fp-16]
! 275:
! 276: ld [%i0],%g5
! 277: ldx [%fp-24],%g2 C p16
! 278: ldx [%fp-16],%g1 C p0
! 279: sllx %g2,16,%g2 C align p16
! 280: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
! 281: add %i0,4,%i0 C res_ptr++
! 282:
! 283: L(ret): add %g3,%g1,%g4 C p += cy
! 284: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
! 285: srlx %g4,32,%g3
! 286: st %l2,[%i0-4]
! 287:
! 288: addx %g3,%g0,%g3
! 289: ret
! 290: restore %g0,%g3,%o0 C sideeffect: put cy in retreg
! 291: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>