[BACK]Return to submul_1.S CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / hppa / hppa1_1 / pa7100

File: [local] / OpenXM_contrib / gmp / mpn / hppa / hppa1_1 / pa7100 / Attic / submul_1.S (download)

Revision 1.1.1.1 (vendor branch), Mon Jan 10 15:35:24 2000 UTC (24 years, 6 months ago) by maekawa
Branch: GMP
CVS Tags: VERSION_2_0_2, RELEASE_20000124, RELEASE_1_1_2
Changes since 1.1: +0 -0 lines

Import gmp 2.0.2

; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and
; subtract the result from a second limb vector.

; Copyright (C) 1995 Free Software Foundation, Inc.

; This file is part of the GNU MP Library.

; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Library General Public License as published by
; the Free Software Foundation; either version 2 of the License, or (at your
; option) any later version.

; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
; License for more details.

; You should have received a copy of the GNU Library General Public License
; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
; MA 02111-1307, USA.

; INPUT PARAMETERS
#define res_ptr	%r26
#define s1_ptr	%r25
#define size	%r24
#define s2_limb	%r23

#define cylimb	%r28
#define s0	%r19
#define s1	%r20
#define s2	%r3
#define s3	%r4
#define lo0	%r21
#define lo1	%r5
#define lo2	%r6
#define lo3	%r7
#define hi0	%r22
#define hi1	%r23				/* safe to reuse */
#define hi2	%r29
#define hi3	%r1

	.code
	.export		__mpn_submul_1
__mpn_submul_1
	.proc
	.callinfo	frame=128,no_calls
	.entry

	ldo	128(%r30),%r30
	stws	s2_limb,-16(%r30)
	add	 %r0,%r0,cylimb			; clear cy and cylimb
	addib,<	-4,size,L$few_limbs
	fldws	-16(%r30),%fr31R

	ldo	-112(%r30),%r31
	stw	%r3,-96(%r30)
	stw	%r4,-92(%r30)
	stw	%r5,-88(%r30)
	stw	%r6,-84(%r30)
	stw	%r7,-80(%r30)

	bb,>=,n	 s1_ptr,29,L$0

	fldws,ma 4(s1_ptr),%fr4
	ldws	 0(res_ptr),s0
	xmpyu	 %fr4,%fr31R,%fr5
	fstds	 %fr5,-16(%r31)
	ldws	-16(%r31),cylimb
	ldws	-12(%r31),lo0
	sub	 s0,lo0,s0
	add	 s0,lo0,%r0			; invert cy
	addib,< -1,size,L$few_limbs
	stws,ma	 s0,4(res_ptr)

; start software pipeline ----------------------------------------------------
L$0	fldds,ma 8(s1_ptr),%fr4
	fldds,ma 8(s1_ptr),%fr8

	xmpyu	 %fr4L,%fr31R,%fr5
	xmpyu	 %fr4R,%fr31R,%fr6
	xmpyu	 %fr8L,%fr31R,%fr9
	xmpyu	 %fr8R,%fr31R,%fr10

	fstds	 %fr5,-16(%r31)
	fstds	 %fr6,-8(%r31)
	fstds	 %fr9,0(%r31)
	fstds	 %fr10,8(%r31)

	ldws   -16(%r31),hi0
	ldws   -12(%r31),lo0
	ldws	-8(%r31),hi1
	ldws	-4(%r31),lo1
	ldws	 0(%r31),hi2
	ldws	 4(%r31),lo2
	ldws	 8(%r31),hi3
	ldws	12(%r31),lo3

	addc	 lo0,cylimb,lo0
	addc	 lo1,hi0,lo1
	addc	 lo2,hi1,lo2
	addc	 lo3,hi2,lo3

	addib,<	 -4,size,L$end
	addc	 %r0,hi3,cylimb			; propagate carry into cylimb
; main loop ------------------------------------------------------------------
L$loop	fldds,ma 8(s1_ptr),%fr4
	fldds,ma 8(s1_ptr),%fr8

	ldws	 0(res_ptr),s0
	xmpyu	 %fr4L,%fr31R,%fr5
	ldws	 4(res_ptr),s1
	xmpyu	 %fr4R,%fr31R,%fr6
	ldws	 8(res_ptr),s2
	xmpyu	 %fr8L,%fr31R,%fr9
	ldws	12(res_ptr),s3
	xmpyu	 %fr8R,%fr31R,%fr10

	fstds	 %fr5,-16(%r31)
	sub	 s0,lo0,s0
	fstds	 %fr6,-8(%r31)
	subb	 s1,lo1,s1
	fstds	 %fr9,0(%r31)
	subb	 s2,lo2,s2
	fstds	 %fr10,8(%r31)
	subb	 s3,lo3,s3
	subb	 %r0,%r0,lo0			; these two insns ...
	add	 lo0,lo0,%r0			; ... just invert cy

	ldws   -16(%r31),hi0
	ldws   -12(%r31),lo0
	ldws	-8(%r31),hi1
	ldws	-4(%r31),lo1
	ldws	 0(%r31),hi2
	ldws	 4(%r31),lo2
	ldws	 8(%r31),hi3
	ldws	12(%r31),lo3

	addc	 lo0,cylimb,lo0
	stws,ma	 s0,4(res_ptr)
	addc	 lo1,hi0,lo1
	stws,ma	 s1,4(res_ptr)
	addc	 lo2,hi1,lo2
	stws,ma	 s2,4(res_ptr)
	addc	 lo3,hi2,lo3
	stws,ma	 s3,4(res_ptr)

	addib,>= -4,size,L$loop
	addc	 %r0,hi3,cylimb			; propagate carry into cylimb
; finish software pipeline ---------------------------------------------------
L$end	ldws	 0(res_ptr),s0
	ldws	 4(res_ptr),s1
	ldws	 8(res_ptr),s2
	ldws	12(res_ptr),s3

	sub	 s0,lo0,s0
	stws,ma	 s0,4(res_ptr)
	subb	 s1,lo1,s1
	stws,ma	 s1,4(res_ptr)
	subb	 s2,lo2,s2
	stws,ma	 s2,4(res_ptr)
	subb	 s3,lo3,s3
	stws,ma	 s3,4(res_ptr)
	subb	 %r0,%r0,lo0			; these two insns ...
	add	 lo0,lo0,%r0			; ... invert cy

; restore callee-saves registers ---------------------------------------------
	ldw	-96(%r30),%r3
	ldw	-92(%r30),%r4
	ldw	-88(%r30),%r5
	ldw	-84(%r30),%r6
	ldw	-80(%r30),%r7

L$few_limbs
	addib,=,n 4,size,L$ret
L$loop2	fldws,ma 4(s1_ptr),%fr4
	ldws	 0(res_ptr),s0
	xmpyu	 %fr4,%fr31R,%fr5
	fstds	 %fr5,-16(%r30)
	ldws	-16(%r30),hi0
	ldws	-12(%r30),lo0
	addc	 lo0,cylimb,lo0
	addc	 %r0,hi0,cylimb
	sub	 s0,lo0,s0
	add	 s0,lo0,%r0			; invert cy
	stws,ma	 s0,4(res_ptr)
	addib,<> -1,size,L$loop2
	nop

L$ret	addc	 %r0,cylimb,cylimb
	bv	 0(%r2)
	ldo	 -128(%r30),%r30

	.exit
	.procend