Annotation of OpenXM_contrib/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S, Revision 1.1
1.1 ! maekawa 1: ; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and
! 2: ; subtract the result from a second limb vector.
! 3:
! 4: ; Copyright (C) 1995 Free Software Foundation, Inc.
! 5:
! 6: ; This file is part of the GNU MP Library.
! 7:
! 8: ; The GNU MP Library is free software; you can redistribute it and/or modify
! 9: ; it under the terms of the GNU Library General Public License as published by
! 10: ; the Free Software Foundation; either version 2 of the License, or (at your
! 11: ; option) any later version.
! 12:
! 13: ; The GNU MP Library is distributed in the hope that it will be useful, but
! 14: ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 16: ; License for more details.
! 17:
! 18: ; You should have received a copy of the GNU Library General Public License
! 19: ; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: ; MA 02111-1307, USA.
! 22:
! 23: ; INPUT PARAMETERS
! 24: #define res_ptr %r26
! 25: #define s1_ptr %r25
! 26: #define size %r24
! 27: #define s2_limb %r23
! 28:
! 29: #define cylimb %r28
! 30: #define s0 %r19
! 31: #define s1 %r20
! 32: #define s2 %r3
! 33: #define s3 %r4
! 34: #define lo0 %r21
! 35: #define lo1 %r5
! 36: #define lo2 %r6
! 37: #define lo3 %r7
! 38: #define hi0 %r22
! 39: #define hi1 %r23 /* safe to reuse */
! 40: #define hi2 %r29
! 41: #define hi3 %r1
! 42:
! 43: .code
! 44: .export __mpn_submul_1
! 45: __mpn_submul_1
! 46: .proc
! 47: .callinfo frame=128,no_calls
! 48: .entry
! 49:
! 50: ldo 128(%r30),%r30
! 51: stws s2_limb,-16(%r30)
! 52: add %r0,%r0,cylimb ; clear cy and cylimb
! 53: addib,< -4,size,L$few_limbs
! 54: fldws -16(%r30),%fr31R
! 55:
! 56: ldo -112(%r30),%r31
! 57: stw %r3,-96(%r30)
! 58: stw %r4,-92(%r30)
! 59: stw %r5,-88(%r30)
! 60: stw %r6,-84(%r30)
! 61: stw %r7,-80(%r30)
! 62:
! 63: bb,>=,n s1_ptr,29,L$0
! 64:
! 65: fldws,ma 4(s1_ptr),%fr4
! 66: ldws 0(res_ptr),s0
! 67: xmpyu %fr4,%fr31R,%fr5
! 68: fstds %fr5,-16(%r31)
! 69: ldws -16(%r31),cylimb
! 70: ldws -12(%r31),lo0
! 71: sub s0,lo0,s0
! 72: add s0,lo0,%r0 ; invert cy
! 73: addib,< -1,size,L$few_limbs
! 74: stws,ma s0,4(res_ptr)
! 75:
! 76: ; start software pipeline ----------------------------------------------------
! 77: L$0 fldds,ma 8(s1_ptr),%fr4
! 78: fldds,ma 8(s1_ptr),%fr8
! 79:
! 80: xmpyu %fr4L,%fr31R,%fr5
! 81: xmpyu %fr4R,%fr31R,%fr6
! 82: xmpyu %fr8L,%fr31R,%fr9
! 83: xmpyu %fr8R,%fr31R,%fr10
! 84:
! 85: fstds %fr5,-16(%r31)
! 86: fstds %fr6,-8(%r31)
! 87: fstds %fr9,0(%r31)
! 88: fstds %fr10,8(%r31)
! 89:
! 90: ldws -16(%r31),hi0
! 91: ldws -12(%r31),lo0
! 92: ldws -8(%r31),hi1
! 93: ldws -4(%r31),lo1
! 94: ldws 0(%r31),hi2
! 95: ldws 4(%r31),lo2
! 96: ldws 8(%r31),hi3
! 97: ldws 12(%r31),lo3
! 98:
! 99: addc lo0,cylimb,lo0
! 100: addc lo1,hi0,lo1
! 101: addc lo2,hi1,lo2
! 102: addc lo3,hi2,lo3
! 103:
! 104: addib,< -4,size,L$end
! 105: addc %r0,hi3,cylimb ; propagate carry into cylimb
! 106: ; main loop ------------------------------------------------------------------
! 107: L$loop fldds,ma 8(s1_ptr),%fr4
! 108: fldds,ma 8(s1_ptr),%fr8
! 109:
! 110: ldws 0(res_ptr),s0
! 111: xmpyu %fr4L,%fr31R,%fr5
! 112: ldws 4(res_ptr),s1
! 113: xmpyu %fr4R,%fr31R,%fr6
! 114: ldws 8(res_ptr),s2
! 115: xmpyu %fr8L,%fr31R,%fr9
! 116: ldws 12(res_ptr),s3
! 117: xmpyu %fr8R,%fr31R,%fr10
! 118:
! 119: fstds %fr5,-16(%r31)
! 120: sub s0,lo0,s0
! 121: fstds %fr6,-8(%r31)
! 122: subb s1,lo1,s1
! 123: fstds %fr9,0(%r31)
! 124: subb s2,lo2,s2
! 125: fstds %fr10,8(%r31)
! 126: subb s3,lo3,s3
! 127: subb %r0,%r0,lo0 ; these two insns ...
! 128: add lo0,lo0,%r0 ; ... just invert cy
! 129:
! 130: ldws -16(%r31),hi0
! 131: ldws -12(%r31),lo0
! 132: ldws -8(%r31),hi1
! 133: ldws -4(%r31),lo1
! 134: ldws 0(%r31),hi2
! 135: ldws 4(%r31),lo2
! 136: ldws 8(%r31),hi3
! 137: ldws 12(%r31),lo3
! 138:
! 139: addc lo0,cylimb,lo0
! 140: stws,ma s0,4(res_ptr)
! 141: addc lo1,hi0,lo1
! 142: stws,ma s1,4(res_ptr)
! 143: addc lo2,hi1,lo2
! 144: stws,ma s2,4(res_ptr)
! 145: addc lo3,hi2,lo3
! 146: stws,ma s3,4(res_ptr)
! 147:
! 148: addib,>= -4,size,L$loop
! 149: addc %r0,hi3,cylimb ; propagate carry into cylimb
! 150: ; finish software pipeline ---------------------------------------------------
! 151: L$end ldws 0(res_ptr),s0
! 152: ldws 4(res_ptr),s1
! 153: ldws 8(res_ptr),s2
! 154: ldws 12(res_ptr),s3
! 155:
! 156: sub s0,lo0,s0
! 157: stws,ma s0,4(res_ptr)
! 158: subb s1,lo1,s1
! 159: stws,ma s1,4(res_ptr)
! 160: subb s2,lo2,s2
! 161: stws,ma s2,4(res_ptr)
! 162: subb s3,lo3,s3
! 163: stws,ma s3,4(res_ptr)
! 164: subb %r0,%r0,lo0 ; these two insns ...
! 165: add lo0,lo0,%r0 ; ... invert cy
! 166:
! 167: ; restore callee-saves registers ---------------------------------------------
! 168: ldw -96(%r30),%r3
! 169: ldw -92(%r30),%r4
! 170: ldw -88(%r30),%r5
! 171: ldw -84(%r30),%r6
! 172: ldw -80(%r30),%r7
! 173:
! 174: L$few_limbs
! 175: addib,=,n 4,size,L$ret
! 176: L$loop2 fldws,ma 4(s1_ptr),%fr4
! 177: ldws 0(res_ptr),s0
! 178: xmpyu %fr4,%fr31R,%fr5
! 179: fstds %fr5,-16(%r30)
! 180: ldws -16(%r30),hi0
! 181: ldws -12(%r30),lo0
! 182: addc lo0,cylimb,lo0
! 183: addc %r0,hi0,cylimb
! 184: sub s0,lo0,s0
! 185: add s0,lo0,%r0 ; invert cy
! 186: stws,ma s0,4(res_ptr)
! 187: addib,<> -1,size,L$loop2
! 188: nop
! 189:
! 190: L$ret addc %r0,cylimb,cylimb
! 191: bv 0(%r2)
! 192: ldo -128(%r30),%r30
! 193:
! 194: .exit
! 195: .procend
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>