Annotation of OpenXM_contrib/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm, Revision 1.1
1.1 ! ohara 1: dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
! 2: dnl add the result to a second limb vector.
! 3:
! 4: dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
! 5:
! 6: dnl This file is part of the GNU MP Library.
! 7:
! 8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 9: dnl it under the terms of the GNU Lesser General Public License as published
! 10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 11: dnl your option) any later version.
! 12:
! 13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: dnl License for more details.
! 17:
! 18: dnl You should have received a copy of the GNU Lesser General Public License
! 19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: dnl MA 02111-1307, USA.
! 22:
! 23: include(`../config.m4')
! 24:
! 25: C INPUT PARAMETERS
! 26: define(`res_ptr',`%r26')
! 27: define(`s1_ptr',`%r25')
! 28: define(`size_param',`%r24')
! 29: define(`s2_limb',`%r23')
! 30:
! 31: define(`cylimb',`%r28')
! 32: define(`s0',`%r19')
! 33: define(`s1',`%r20')
! 34: define(`s2',`%r3')
! 35: define(`s3',`%r4')
! 36: define(`lo0',`%r21')
! 37: define(`lo1',`%r5')
! 38: define(`lo2',`%r6')
! 39: define(`lo3',`%r7')
! 40: define(`hi0',`%r22')
! 41: define(`hi1',`%r23') C safe to reuse
! 42: define(`hi2',`%r29')
! 43: define(`hi3',`%r1')
! 44:
! 45: ASM_START()
! 46: PROLOGUE(mpn_addmul_1)
! 47: C .callinfo frame=128,no_calls
! 48:
! 49: ldo 128(%r30),%r30
! 50: stws s2_limb,-16(%r30)
! 51: add %r0,%r0,cylimb C clear cy and cylimb
! 52: addib,< -4,size_param,L(few_limbs)
! 53: fldws -16(%r30),%fr31R
! 54:
! 55: ldo -112(%r30),%r31
! 56: stw %r3,-96(%r30)
! 57: stw %r4,-92(%r30)
! 58: stw %r5,-88(%r30)
! 59: stw %r6,-84(%r30)
! 60: stw %r7,-80(%r30)
! 61:
! 62: bb,>=,n s1_ptr,29,L(0)
! 63:
! 64: fldws,ma 4(s1_ptr),%fr4
! 65: ldws 0(res_ptr),s0
! 66: xmpyu %fr4,%fr31R,%fr5
! 67: fstds %fr5,-16(%r31)
! 68: ldws -16(%r31),cylimb
! 69: ldws -12(%r31),lo0
! 70: add s0,lo0,s0
! 71: addib,< -1,size_param,L(few_limbs)
! 72: stws,ma s0,4(res_ptr)
! 73:
! 74: C start software pipeline ----------------------------------------------------
! 75: .label L(0)
! 76: fldds,ma 8(s1_ptr),%fr4
! 77: fldds,ma 8(s1_ptr),%fr8
! 78:
! 79: xmpyu %fr4L,%fr31R,%fr5
! 80: xmpyu %fr4R,%fr31R,%fr6
! 81: xmpyu %fr8L,%fr31R,%fr9
! 82: xmpyu %fr8R,%fr31R,%fr10
! 83:
! 84: fstds %fr5,-16(%r31)
! 85: fstds %fr6,-8(%r31)
! 86: fstds %fr9,0(%r31)
! 87: fstds %fr10,8(%r31)
! 88:
! 89: ldws -16(%r31),hi0
! 90: ldws -12(%r31),lo0
! 91: ldws -8(%r31),hi1
! 92: ldws -4(%r31),lo1
! 93: ldws 0(%r31),hi2
! 94: ldws 4(%r31),lo2
! 95: ldws 8(%r31),hi3
! 96: ldws 12(%r31),lo3
! 97:
! 98: addc lo0,cylimb,lo0
! 99: addc lo1,hi0,lo1
! 100: addc lo2,hi1,lo2
! 101: addc lo3,hi2,lo3
! 102:
! 103: addib,< -4,size_param,L(end)
! 104: addc %r0,hi3,cylimb C propagate carry into cylimb
! 105: C main loop ------------------------------------------------------------------
! 106: .label L(loop)
! 107: fldds,ma 8(s1_ptr),%fr4
! 108: fldds,ma 8(s1_ptr),%fr8
! 109:
! 110: ldws 0(res_ptr),s0
! 111: xmpyu %fr4L,%fr31R,%fr5
! 112: ldws 4(res_ptr),s1
! 113: xmpyu %fr4R,%fr31R,%fr6
! 114: ldws 8(res_ptr),s2
! 115: xmpyu %fr8L,%fr31R,%fr9
! 116: ldws 12(res_ptr),s3
! 117: xmpyu %fr8R,%fr31R,%fr10
! 118:
! 119: fstds %fr5,-16(%r31)
! 120: add s0,lo0,s0
! 121: fstds %fr6,-8(%r31)
! 122: addc s1,lo1,s1
! 123: fstds %fr9,0(%r31)
! 124: addc s2,lo2,s2
! 125: fstds %fr10,8(%r31)
! 126: addc s3,lo3,s3
! 127:
! 128: ldws -16(%r31),hi0
! 129: ldws -12(%r31),lo0
! 130: ldws -8(%r31),hi1
! 131: ldws -4(%r31),lo1
! 132: ldws 0(%r31),hi2
! 133: ldws 4(%r31),lo2
! 134: ldws 8(%r31),hi3
! 135: ldws 12(%r31),lo3
! 136:
! 137: addc lo0,cylimb,lo0
! 138: stws,ma s0,4(res_ptr)
! 139: addc lo1,hi0,lo1
! 140: stws,ma s1,4(res_ptr)
! 141: addc lo2,hi1,lo2
! 142: stws,ma s2,4(res_ptr)
! 143: addc lo3,hi2,lo3
! 144: stws,ma s3,4(res_ptr)
! 145:
! 146: addib,>= -4,size_param,L(loop)
! 147: addc %r0,hi3,cylimb C propagate carry into cylimb
! 148: C finish software pipeline ---------------------------------------------------
! 149: .label L(end)
! 150: ldws 0(res_ptr),s0
! 151: ldws 4(res_ptr),s1
! 152: ldws 8(res_ptr),s2
! 153: ldws 12(res_ptr),s3
! 154:
! 155: add s0,lo0,s0
! 156: stws,ma s0,4(res_ptr)
! 157: addc s1,lo1,s1
! 158: stws,ma s1,4(res_ptr)
! 159: addc s2,lo2,s2
! 160: stws,ma s2,4(res_ptr)
! 161: addc s3,lo3,s3
! 162: stws,ma s3,4(res_ptr)
! 163:
! 164: C restore callee-saves registers ---------------------------------------------
! 165: ldw -96(%r30),%r3
! 166: ldw -92(%r30),%r4
! 167: ldw -88(%r30),%r5
! 168: ldw -84(%r30),%r6
! 169: ldw -80(%r30),%r7
! 170:
! 171: .label L(few_limbs)
! 172: addib,=,n 4,size_param,L(ret)
! 173:
! 174: .label L(loop2)
! 175: fldws,ma 4(s1_ptr),%fr4
! 176: ldws 0(res_ptr),s0
! 177: xmpyu %fr4,%fr31R,%fr5
! 178: fstds %fr5,-16(%r30)
! 179: ldws -16(%r30),hi0
! 180: ldws -12(%r30),lo0
! 181: addc lo0,cylimb,lo0
! 182: addc %r0,hi0,cylimb
! 183: add s0,lo0,s0
! 184: stws,ma s0,4(res_ptr)
! 185: addib,<> -1,size_param,L(loop2)
! 186: nop
! 187:
! 188: .label L(ret)
! 189: addc %r0,cylimb,cylimb
! 190: bv 0(%r2)
! 191: ldo -128(%r30),%r30
! 192: EPILOGUE(mpn_addmul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>