File: [local] / OpenXM / src / kan96xx / gmp-2.0.2-ssh-2 / mpn / hppa / hppa1_1 / pa7100 / Attic / addmul_1.S (download)
Revision 1.1.1.1 (vendor branch), Mon Oct 25 08:53:35 1999 UTC (24 years, 10 months ago) by takayama
Branch: TAKAYAMA
CVS Tags: RELEASE_20000124, RELEASE_1_1_2, GMP_2_0_2_SSH_2 Changes since 1.1: +0 -0
lines
gmp-2.0.2-ssh-2 is added.
It is a patched version of gmp-2.0.2 to compile on FreeBSD 3.3.
|
; HP-PA 7100/7200 __mpn_addmul_1 -- Multiply a limb vector with a limb and
; add the result to a second limb vector.
; Copyright (C) 1995 Free Software Foundation, Inc.
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
; License for more details.
; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
; MA 02111-1307, USA.
; INPUT PARAMETERS
#define res_ptr %r26
#define s1_ptr %r25
#define size %r24
#define s2_limb %r23
#define cylimb %r28
#define s0 %r19
#define s1 %r20
#define s2 %r3
#define s3 %r4
#define lo0 %r21
#define lo1 %r5
#define lo2 %r6
#define lo3 %r7
#define hi0 %r22
#define hi1 %r23 /* safe to reuse */
#define hi2 %r29
#define hi3 %r1
.code
.export __mpn_addmul_1
__mpn_addmul_1
.proc
.callinfo frame=128,no_calls
.entry
ldo 128(%r30),%r30
stws s2_limb,-16(%r30)
add %r0,%r0,cylimb ; clear cy and cylimb
addib,< -4,size,L$few_limbs
fldws -16(%r30),%fr31R
ldo -112(%r30),%r31
stw %r3,-96(%r30)
stw %r4,-92(%r30)
stw %r5,-88(%r30)
stw %r6,-84(%r30)
stw %r7,-80(%r30)
bb,>=,n s1_ptr,29,L$0
fldws,ma 4(s1_ptr),%fr4
ldws 0(res_ptr),s0
xmpyu %fr4,%fr31R,%fr5
fstds %fr5,-16(%r31)
ldws -16(%r31),cylimb
ldws -12(%r31),lo0
add s0,lo0,s0
addib,< -1,size,L$few_limbs
stws,ma s0,4(res_ptr)
; start software pipeline ----------------------------------------------------
L$0 fldds,ma 8(s1_ptr),%fr4
fldds,ma 8(s1_ptr),%fr8
xmpyu %fr4L,%fr31R,%fr5
xmpyu %fr4R,%fr31R,%fr6
xmpyu %fr8L,%fr31R,%fr9
xmpyu %fr8R,%fr31R,%fr10
fstds %fr5,-16(%r31)
fstds %fr6,-8(%r31)
fstds %fr9,0(%r31)
fstds %fr10,8(%r31)
ldws -16(%r31),hi0
ldws -12(%r31),lo0
ldws -8(%r31),hi1
ldws -4(%r31),lo1
ldws 0(%r31),hi2
ldws 4(%r31),lo2
ldws 8(%r31),hi3
ldws 12(%r31),lo3
addc lo0,cylimb,lo0
addc lo1,hi0,lo1
addc lo2,hi1,lo2
addc lo3,hi2,lo3
addib,< -4,size,L$end
addc %r0,hi3,cylimb ; propagate carry into cylimb
; main loop ------------------------------------------------------------------
L$loop fldds,ma 8(s1_ptr),%fr4
fldds,ma 8(s1_ptr),%fr8
ldws 0(res_ptr),s0
xmpyu %fr4L,%fr31R,%fr5
ldws 4(res_ptr),s1
xmpyu %fr4R,%fr31R,%fr6
ldws 8(res_ptr),s2
xmpyu %fr8L,%fr31R,%fr9
ldws 12(res_ptr),s3
xmpyu %fr8R,%fr31R,%fr10
fstds %fr5,-16(%r31)
add s0,lo0,s0
fstds %fr6,-8(%r31)
addc s1,lo1,s1
fstds %fr9,0(%r31)
addc s2,lo2,s2
fstds %fr10,8(%r31)
addc s3,lo3,s3
ldws -16(%r31),hi0
ldws -12(%r31),lo0
ldws -8(%r31),hi1
ldws -4(%r31),lo1
ldws 0(%r31),hi2
ldws 4(%r31),lo2
ldws 8(%r31),hi3
ldws 12(%r31),lo3
addc lo0,cylimb,lo0
stws,ma s0,4(res_ptr)
addc lo1,hi0,lo1
stws,ma s1,4(res_ptr)
addc lo2,hi1,lo2
stws,ma s2,4(res_ptr)
addc lo3,hi2,lo3
stws,ma s3,4(res_ptr)
addib,>= -4,size,L$loop
addc %r0,hi3,cylimb ; propagate carry into cylimb
; finish software pipeline ---------------------------------------------------
L$end ldws 0(res_ptr),s0
ldws 4(res_ptr),s1
ldws 8(res_ptr),s2
ldws 12(res_ptr),s3
add s0,lo0,s0
stws,ma s0,4(res_ptr)
addc s1,lo1,s1
stws,ma s1,4(res_ptr)
addc s2,lo2,s2
stws,ma s2,4(res_ptr)
addc s3,lo3,s3
stws,ma s3,4(res_ptr)
; restore callee-saves registers ---------------------------------------------
ldw -96(%r30),%r3
ldw -92(%r30),%r4
ldw -88(%r30),%r5
ldw -84(%r30),%r6
ldw -80(%r30),%r7
L$few_limbs
addib,=,n 4,size,L$ret
L$loop2 fldws,ma 4(s1_ptr),%fr4
ldws 0(res_ptr),s0
xmpyu %fr4,%fr31R,%fr5
fstds %fr5,-16(%r30)
ldws -16(%r30),hi0
ldws -12(%r30),lo0
addc lo0,cylimb,lo0
addc %r0,hi0,cylimb
add s0,lo0,s0
stws,ma s0,4(res_ptr)
addib,<> -1,size,L$loop2
nop
L$ret addc %r0,cylimb,cylimb
bv 0(%r2)
ldo -128(%r30),%r30
.exit
.procend