Annotation of OpenXM_contrib/gmp/mpn/pa64/sqr_diagonal.asm, Revision 1.1
1.1 ! ohara 1: dnl HP-PA 2.0 64-bit mpn_sqr_diagonal.
! 2:
! 3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
! 4:
! 5: dnl This file is part of the GNU MP Library.
! 6:
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 8: dnl it under the terms of the GNU Lesser General Public License as published
! 9: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 10: dnl your option) any later version.
! 11:
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 13: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 15: dnl License for more details.
! 16:
! 17: dnl You should have received a copy of the GNU Lesser General Public License
! 18: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: dnl MA 02111-1307, USA.
! 21:
! 22:
! 23: dnl This code runs at 7.25 cycles/limb on PA8000 and 7.75 cycles/limb on
! 24: dnl PA8500. The cache would saturate at 5 cycles/limb, so there is some room
! 25: dnl for optimization.
! 26:
! 27: include(`../config.m4')
! 28:
! 29: C INPUT PARAMETERS
! 30: define(`rp',`%r26')
! 31: define(`up',`%r25')
! 32: define(`n',`%r24')
! 33:
! 34: define(`p00',`%r28')
! 35: define(`p32',`%r29')
! 36: define(`p64',`%r31')
! 37: define(`t0',`%r19')
! 38: define(`t1',`%r20')
! 39:
! 40: ifdef(`HAVE_ABI_2_0w',
! 41: ` .level 2.0W
! 42: ',` .level 2.0N
! 43: ')
! 44: PROLOGUE(mpn_sqr_diagonal)
! 45: .proc
! 46: .entry
! 47: ldo 128(%r30),%r30
! 48:
! 49: fldds,ma 8(up),%fr8
! 50: addib,= -1,n,L(end1)
! 51: nop
! 52: fldds,ma 8(up),%fr4
! 53: xmpyu %fr8l,%fr8r,%fr10
! 54: fstd %fr10,-120(%r30)
! 55: xmpyu %fr8r,%fr8r,%fr9
! 56: fstd %fr9,0(rp)
! 57: xmpyu %fr8l,%fr8l,%fr11
! 58: fstd %fr11,8(rp)
! 59: addib,= -1,n,L(end2)
! 60: ldo 16(rp),rp
! 61:
! 62: L(loop) fldds,ma 8(up),%fr8 C load next up limb
! 63: xmpyu %fr4l,%fr4r,%fr6
! 64: fstd %fr6,-128(%r30)
! 65: xmpyu %fr4r,%fr4r,%fr5 C multiply in fp regs
! 66: fstd %fr5,0(rp)
! 67: xmpyu %fr4l,%fr4l,%fr7
! 68: fstd %fr7,8(rp)
! 69: ldd -120(%r30),p32
! 70: ldd -16(rp),p00 C accumulate in int regs
! 71: ldd -8(rp),p64
! 72: depd,z p32,30,31,t0
! 73: add t0,p00,p00
! 74: std p00,-16(rp)
! 75: extrd,u p32,32,33,t1
! 76: add,dc t1,p64,p64
! 77: std p64,-8(rp)
! 78: addib,= -1,n,L(exit)
! 79: ldo 16(rp),rp
! 80:
! 81: fldds,ma 8(up),%fr4
! 82: xmpyu %fr8l,%fr8r,%fr10
! 83: fstd %fr10,-120(%r30)
! 84: xmpyu %fr8r,%fr8r,%fr9
! 85: fstd %fr9,0(rp)
! 86: xmpyu %fr8l,%fr8l,%fr11
! 87: fstd %fr11,8(rp)
! 88: ldd -128(%r30),p32
! 89: ldd -16(rp),p00
! 90: ldd -8(rp),p64
! 91: depd,z p32,30,31,t0
! 92: add t0,p00,p00
! 93: std p00,-16(rp)
! 94: extrd,u p32,32,33,t1
! 95: add,dc t1,p64,p64
! 96: std p64,-8(rp)
! 97: addib,<> -1,n,L(loop)
! 98: ldo 16(rp),rp
! 99:
! 100: L(end2) xmpyu %fr4l,%fr4r,%fr6
! 101: fstd %fr6,-128(%r30)
! 102: xmpyu %fr4r,%fr4r,%fr5
! 103: fstd %fr5,0(rp)
! 104: xmpyu %fr4l,%fr4l,%fr7
! 105: fstd %fr7,8(rp)
! 106: ldd -120(%r30),p32
! 107: ldd -16(rp),p00
! 108: ldd -8(rp),p64
! 109: depd,z p32,30,31,t0
! 110: add t0,p00,p00
! 111: std p00,-16(rp)
! 112: extrd,u p32,32,33,t1
! 113: add,dc t1,p64,p64
! 114: std p64,-8(rp)
! 115: ldo 16(rp),rp
! 116: ldd -128(%r30),p32
! 117: ldd -16(rp),p00
! 118: ldd -8(rp),p64
! 119: depd,z p32,30,31,t0
! 120: add t0,p00,p00
! 121: std p00,-16(rp)
! 122: extrd,u p32,32,33,t1
! 123: add,dc t1,p64,p64
! 124: std p64,-8(rp)
! 125: bve (%r2)
! 126: ldo -128(%r30),%r30
! 127:
! 128: L(exit) xmpyu %fr8l,%fr8r,%fr10
! 129: fstd %fr10,-120(%r30)
! 130: xmpyu %fr8r,%fr8r,%fr9
! 131: fstd %fr9,0(rp)
! 132: xmpyu %fr8l,%fr8l,%fr11
! 133: fstd %fr11,8(rp)
! 134: ldd -128(%r30),p32
! 135: ldd -16(rp),p00
! 136: ldd -8(rp),p64
! 137: depd,z p32,31,32,t0
! 138: add t0,p00,p00
! 139: extrd,u p32,31,32,t1
! 140: add,dc t1,p64,p64
! 141: add t0,p00,p00
! 142: add,dc t1,p64,p64
! 143: std p00,-16(rp)
! 144: std p64,-8(rp)
! 145: ldo 16(rp),rp
! 146: ldd -120(%r30),p32
! 147: ldd -16(rp),p00
! 148: ldd -8(rp),p64
! 149: depd,z p32,31,32,t0
! 150: add t0,p00,p00
! 151: extrd,u p32,31,32,t1
! 152: add,dc t1,p64,p64
! 153: add t0,p00,p00
! 154: add,dc t1,p64,p64
! 155: std p00,-16(rp)
! 156: std p64,-8(rp)
! 157: bve (%r2)
! 158: ldo -128(%r30),%r30
! 159:
! 160: L(end1) xmpyu %fr8l,%fr8r,%fr10
! 161: fstd %fr10,-128(%r30)
! 162: xmpyu %fr8r,%fr8r,%fr9
! 163: fstd %fr9,0(rp)
! 164: xmpyu %fr8l,%fr8l,%fr11
! 165: fstd %fr11,8(rp)
! 166: ldo 16(rp),rp
! 167: ldd -128(%r30),p32
! 168: ldd -16(rp),p00
! 169: ldd -8(rp),p64
! 170: depd,z p32,31,32,t0
! 171: add t0,p00,p00
! 172: extrd,u p32,31,32,t1
! 173: add,dc t1,p64,p64
! 174: add t0,p00,p00
! 175: add,dc t1,p64,p64
! 176: std p00,-16(rp)
! 177: std p64,-8(rp)
! 178: bve (%r2)
! 179: ldo -128(%r30),%r30
! 180: .procend
! 181: EPILOGUE(mpn_sqr_diagonal)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>