Annotation of OpenXM/src/kan96xx/gmp-2.0.2/mpn/hppa/hppa1_1/pa7100/submul_1.S, Revision 1.1.1.1
1.1 maekawa 1: ; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and
2: ; subtract the result from a second limb vector.
3:
4: ; Copyright (C) 1995 Free Software Foundation, Inc.
5:
6: ; This file is part of the GNU MP Library.
7:
8: ; The GNU MP Library is free software; you can redistribute it and/or modify
9: ; it under the terms of the GNU Library General Public License as published by
10: ; the Free Software Foundation; either version 2 of the License, or (at your
11: ; option) any later version.
12:
13: ; The GNU MP Library is distributed in the hope that it will be useful, but
14: ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16: ; License for more details.
17:
18: ; You should have received a copy of the GNU Library General Public License
19: ; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: ; MA 02111-1307, USA.
22:
23: ; INPUT PARAMETERS
24: #define res_ptr %r26
25: #define s1_ptr %r25
26: #define size %r24
27: #define s2_limb %r23
28:
29: #define cylimb %r28
30: #define s0 %r19
31: #define s1 %r20
32: #define s2 %r3
33: #define s3 %r4
34: #define lo0 %r21
35: #define lo1 %r5
36: #define lo2 %r6
37: #define lo3 %r7
38: #define hi0 %r22
39: #define hi1 %r23 /* safe to reuse */
40: #define hi2 %r29
41: #define hi3 %r1
42:
43: .code
44: .export __mpn_submul_1
45: __mpn_submul_1
46: .proc
47: .callinfo frame=128,no_calls
48: .entry
49:
50: ldo 128(%r30),%r30
51: stws s2_limb,-16(%r30)
52: add %r0,%r0,cylimb ; clear cy and cylimb
53: addib,< -4,size,L$few_limbs
54: fldws -16(%r30),%fr31R
55:
56: ldo -112(%r30),%r31
57: stw %r3,-96(%r30)
58: stw %r4,-92(%r30)
59: stw %r5,-88(%r30)
60: stw %r6,-84(%r30)
61: stw %r7,-80(%r30)
62:
63: bb,>=,n s1_ptr,29,L$0
64:
65: fldws,ma 4(s1_ptr),%fr4
66: ldws 0(res_ptr),s0
67: xmpyu %fr4,%fr31R,%fr5
68: fstds %fr5,-16(%r31)
69: ldws -16(%r31),cylimb
70: ldws -12(%r31),lo0
71: sub s0,lo0,s0
72: add s0,lo0,%r0 ; invert cy
73: addib,< -1,size,L$few_limbs
74: stws,ma s0,4(res_ptr)
75:
76: ; start software pipeline ----------------------------------------------------
77: L$0 fldds,ma 8(s1_ptr),%fr4
78: fldds,ma 8(s1_ptr),%fr8
79:
80: xmpyu %fr4L,%fr31R,%fr5
81: xmpyu %fr4R,%fr31R,%fr6
82: xmpyu %fr8L,%fr31R,%fr9
83: xmpyu %fr8R,%fr31R,%fr10
84:
85: fstds %fr5,-16(%r31)
86: fstds %fr6,-8(%r31)
87: fstds %fr9,0(%r31)
88: fstds %fr10,8(%r31)
89:
90: ldws -16(%r31),hi0
91: ldws -12(%r31),lo0
92: ldws -8(%r31),hi1
93: ldws -4(%r31),lo1
94: ldws 0(%r31),hi2
95: ldws 4(%r31),lo2
96: ldws 8(%r31),hi3
97: ldws 12(%r31),lo3
98:
99: addc lo0,cylimb,lo0
100: addc lo1,hi0,lo1
101: addc lo2,hi1,lo2
102: addc lo3,hi2,lo3
103:
104: addib,< -4,size,L$end
105: addc %r0,hi3,cylimb ; propagate carry into cylimb
106: ; main loop ------------------------------------------------------------------
107: L$loop fldds,ma 8(s1_ptr),%fr4
108: fldds,ma 8(s1_ptr),%fr8
109:
110: ldws 0(res_ptr),s0
111: xmpyu %fr4L,%fr31R,%fr5
112: ldws 4(res_ptr),s1
113: xmpyu %fr4R,%fr31R,%fr6
114: ldws 8(res_ptr),s2
115: xmpyu %fr8L,%fr31R,%fr9
116: ldws 12(res_ptr),s3
117: xmpyu %fr8R,%fr31R,%fr10
118:
119: fstds %fr5,-16(%r31)
120: sub s0,lo0,s0
121: fstds %fr6,-8(%r31)
122: subb s1,lo1,s1
123: fstds %fr9,0(%r31)
124: subb s2,lo2,s2
125: fstds %fr10,8(%r31)
126: subb s3,lo3,s3
127: subb %r0,%r0,lo0 ; these two insns ...
128: add lo0,lo0,%r0 ; ... just invert cy
129:
130: ldws -16(%r31),hi0
131: ldws -12(%r31),lo0
132: ldws -8(%r31),hi1
133: ldws -4(%r31),lo1
134: ldws 0(%r31),hi2
135: ldws 4(%r31),lo2
136: ldws 8(%r31),hi3
137: ldws 12(%r31),lo3
138:
139: addc lo0,cylimb,lo0
140: stws,ma s0,4(res_ptr)
141: addc lo1,hi0,lo1
142: stws,ma s1,4(res_ptr)
143: addc lo2,hi1,lo2
144: stws,ma s2,4(res_ptr)
145: addc lo3,hi2,lo3
146: stws,ma s3,4(res_ptr)
147:
148: addib,>= -4,size,L$loop
149: addc %r0,hi3,cylimb ; propagate carry into cylimb
150: ; finish software pipeline ---------------------------------------------------
151: L$end ldws 0(res_ptr),s0
152: ldws 4(res_ptr),s1
153: ldws 8(res_ptr),s2
154: ldws 12(res_ptr),s3
155:
156: sub s0,lo0,s0
157: stws,ma s0,4(res_ptr)
158: subb s1,lo1,s1
159: stws,ma s1,4(res_ptr)
160: subb s2,lo2,s2
161: stws,ma s2,4(res_ptr)
162: subb s3,lo3,s3
163: stws,ma s3,4(res_ptr)
164: subb %r0,%r0,lo0 ; these two insns ...
165: add lo0,lo0,%r0 ; ... invert cy
166:
167: ; restore callee-saves registers ---------------------------------------------
168: ldw -96(%r30),%r3
169: ldw -92(%r30),%r4
170: ldw -88(%r30),%r5
171: ldw -84(%r30),%r6
172: ldw -80(%r30),%r7
173:
174: L$few_limbs
175: addib,=,n 4,size,L$ret
176: L$loop2 fldws,ma 4(s1_ptr),%fr4
177: ldws 0(res_ptr),s0
178: xmpyu %fr4,%fr31R,%fr5
179: fstds %fr5,-16(%r30)
180: ldws -16(%r30),hi0
181: ldws -12(%r30),lo0
182: addc lo0,cylimb,lo0
183: addc %r0,hi0,cylimb
184: sub s0,lo0,s0
185: add s0,lo0,%r0 ; invert cy
186: stws,ma s0,4(res_ptr)
187: addib,<> -1,size,L$loop2
188: nop
189:
190: L$ret addc %r0,cylimb,cylimb
191: bv 0(%r2)
192: ldo -128(%r30),%r30
193:
194: .exit
195: .procend
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>