Annotation of OpenXM_contrib/gmp/mpn/m88k/mc88110/sub_n.S, Revision 1.1.1.2
1.1.1.2 ! maekawa 1: ; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
1.1 maekawa 2: ; store difference in a third limb vector.
3:
1.1.1.2 ! maekawa 4: ; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
1.1 maekawa 5:
6: ; This file is part of the GNU MP Library.
7:
8: ; The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa 9: ; it under the terms of the GNU Lesser General Public License as published by
! 10: ; the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: ; option) any later version.
12:
13: ; The GNU MP Library is distributed in the hope that it will be useful, but
14: ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa 15: ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: ; License for more details.
17:
1.1.1.2 ! maekawa 18: ; You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: ; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: ; MA 02111-1307, USA.
22:
23:
24: ; INPUT PARAMETERS
25: #define res_ptr r2
26: #define s1_ptr r3
27: #define s2_ptr r4
28: #define size r5
29:
30: #include "sysdep.h"
31:
32: text
33: align 16
1.1.1.2 ! maekawa 34: global C_SYMBOL_NAME(__gmpn_sub_n)
! 35: C_SYMBOL_NAME(__gmpn_sub_n):
1.1 maekawa 36: subu.co r0,r0,r0 ; set cy flag
37: xor r12,s2_ptr,res_ptr
38: bb1 2,r12,L1
39: ; ** V1a **
40: L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned
41: /* Add least significant limb separately to align res_ptr and s2_ptr */
42: ld r10,s1_ptr,0
43: addu s1_ptr,s1_ptr,4
44: ld r8,s2_ptr,0
45: addu s2_ptr,s2_ptr,4
46: subu size,size,1
47: subu.co r6,r10,r8
48: st r6,res_ptr,0
49: addu res_ptr,res_ptr,4
50: L_v1: cmp r12,size,2
51: bb1 lt,r12,Lend2
52:
53: ld r10,s1_ptr,0
54: ld r12,s1_ptr,4
55: ld.d r8,s2_ptr,0
56: subu size,size,10
57: bcnd lt0,size,Lfin1
58: /* Add blocks of 8 limbs until less than 8 limbs remain */
59: align 8
60: Loop1: subu size,size,8
61: subu.cio r6,r10,r8
62: ld r10,s1_ptr,8
63: subu.cio r7,r12,r9
64: ld r12,s1_ptr,12
65: ld.d r8,s2_ptr,8
66: st.d r6,res_ptr,0
67: subu.cio r6,r10,r8
68: ld r10,s1_ptr,16
69: subu.cio r7,r12,r9
70: ld r12,s1_ptr,20
71: ld.d r8,s2_ptr,16
72: st.d r6,res_ptr,8
73: subu.cio r6,r10,r8
74: ld r10,s1_ptr,24
75: subu.cio r7,r12,r9
76: ld r12,s1_ptr,28
77: ld.d r8,s2_ptr,24
78: st.d r6,res_ptr,16
79: subu.cio r6,r10,r8
80: ld r10,s1_ptr,32
81: subu.cio r7,r12,r9
82: ld r12,s1_ptr,36
83: addu s1_ptr,s1_ptr,32
84: ld.d r8,s2_ptr,32
85: addu s2_ptr,s2_ptr,32
86: st.d r6,res_ptr,24
87: addu res_ptr,res_ptr,32
88: bcnd ge0,size,Loop1
89:
90: Lfin1: addu size,size,8-2
91: bcnd lt0,size,Lend1
92: /* Add blocks of 2 limbs until less than 2 limbs remain */
93: Loope1: subu.cio r6,r10,r8
94: ld r10,s1_ptr,8
95: subu.cio r7,r12,r9
96: ld r12,s1_ptr,12
97: ld.d r8,s2_ptr,8
98: st.d r6,res_ptr,0
99: subu size,size,2
100: addu s1_ptr,s1_ptr,8
101: addu s2_ptr,s2_ptr,8
102: addu res_ptr,res_ptr,8
103: bcnd ge0,size,Loope1
104: Lend1: subu.cio r6,r10,r8
105: subu.cio r7,r12,r9
106: st.d r6,res_ptr,0
107:
108: bb0 0,size,Lret1
109: /* Add last limb */
110: ld r10,s1_ptr,8
111: ld r8,s2_ptr,8
112: subu.cio r6,r10,r8
113: st r6,res_ptr,8
114:
115: Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
116: jmp.n r1
117: xor r2,r2,1
118:
119: L1: xor r12,s1_ptr,res_ptr
120: bb1 2,r12,L2
121: ; ** V1b **
122: bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned
123: /* Add least significant limb separately to align res_ptr and s1_ptr */
124: ld r10,s2_ptr,0
125: addu s2_ptr,s2_ptr,4
126: ld r8,s1_ptr,0
127: addu s1_ptr,s1_ptr,4
128: subu size,size,1
129: subu.co r6,r8,r10
130: st r6,res_ptr,0
131: addu res_ptr,res_ptr,4
132: L_v1b: cmp r12,size,2
133: bb1 lt,r12,Lend2
134:
135: ld r10,s2_ptr,0
136: ld r12,s2_ptr,4
137: ld.d r8,s1_ptr,0
138: subu size,size,10
139: bcnd lt0,size,Lfin1b
140: /* Add blocks of 8 limbs until less than 8 limbs remain */
141: align 8
142: Loop1b: subu size,size,8
143: subu.cio r6,r8,r10
144: ld r10,s2_ptr,8
145: subu.cio r7,r9,r12
146: ld r12,s2_ptr,12
147: ld.d r8,s1_ptr,8
148: st.d r6,res_ptr,0
149: subu.cio r6,r8,r10
150: ld r10,s2_ptr,16
151: subu.cio r7,r9,r12
152: ld r12,s2_ptr,20
153: ld.d r8,s1_ptr,16
154: st.d r6,res_ptr,8
155: subu.cio r6,r8,r10
156: ld r10,s2_ptr,24
157: subu.cio r7,r9,r12
158: ld r12,s2_ptr,28
159: ld.d r8,s1_ptr,24
160: st.d r6,res_ptr,16
161: subu.cio r6,r8,r10
162: ld r10,s2_ptr,32
163: subu.cio r7,r9,r12
164: ld r12,s2_ptr,36
165: addu s2_ptr,s2_ptr,32
166: ld.d r8,s1_ptr,32
167: addu s1_ptr,s1_ptr,32
168: st.d r6,res_ptr,24
169: addu res_ptr,res_ptr,32
170: bcnd ge0,size,Loop1b
171:
172: Lfin1b: addu size,size,8-2
173: bcnd lt0,size,Lend1b
174: /* Add blocks of 2 limbs until less than 2 limbs remain */
175: Loope1b:subu.cio r6,r8,r10
176: ld r10,s2_ptr,8
177: subu.cio r7,r9,r12
178: ld r12,s2_ptr,12
179: ld.d r8,s1_ptr,8
180: st.d r6,res_ptr,0
181: subu size,size,2
182: addu s1_ptr,s1_ptr,8
183: addu s2_ptr,s2_ptr,8
184: addu res_ptr,res_ptr,8
185: bcnd ge0,size,Loope1b
186: Lend1b: subu.cio r6,r8,r10
187: subu.cio r7,r9,r12
188: st.d r6,res_ptr,0
189:
190: bb0 0,size,Lret1b
191: /* Add last limb */
192: ld r10,s2_ptr,8
193: ld r8,s1_ptr,8
194: subu.cio r6,r8,r10
195: st r6,res_ptr,8
196:
197: Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
198: jmp.n r1
199: xor r2,r2,1
200:
201: ; ** V2 **
202: /* If we come here, the alignment of s1_ptr and res_ptr as well as the
203: alignment of s2_ptr and res_ptr differ. Since there are only two ways
204: things can be aligned (that we care about) we now know that the alignment
205: of s1_ptr and s2_ptr are the same. */
206:
207: L2: cmp r12,size,1
208: bb1 eq,r12,Ljone
209: bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
210: /* Add least significant limb separately to align res_ptr and s2_ptr */
211: ld r10,s1_ptr,0
212: addu s1_ptr,s1_ptr,4
213: ld r8,s2_ptr,0
214: addu s2_ptr,s2_ptr,4
215: subu size,size,1
216: subu.co r6,r10,r8
217: st r6,res_ptr,0
218: addu res_ptr,res_ptr,4
219:
220: L_v2: subu size,size,8
221: bcnd lt0,size,Lfin2
222: /* Add blocks of 8 limbs until less than 8 limbs remain */
223: align 8
224: Loop2: subu size,size,8
225: ld.d r8,s1_ptr,0
226: ld.d r6,s2_ptr,0
227: subu.cio r8,r8,r6
228: st r8,res_ptr,0
229: subu.cio r9,r9,r7
230: st r9,res_ptr,4
231: ld.d r8,s1_ptr,8
232: ld.d r6,s2_ptr,8
233: subu.cio r8,r8,r6
234: st r8,res_ptr,8
235: subu.cio r9,r9,r7
236: st r9,res_ptr,12
237: ld.d r8,s1_ptr,16
238: ld.d r6,s2_ptr,16
239: subu.cio r8,r8,r6
240: st r8,res_ptr,16
241: subu.cio r9,r9,r7
242: st r9,res_ptr,20
243: ld.d r8,s1_ptr,24
244: ld.d r6,s2_ptr,24
245: subu.cio r8,r8,r6
246: st r8,res_ptr,24
247: subu.cio r9,r9,r7
248: st r9,res_ptr,28
249: addu s1_ptr,s1_ptr,32
250: addu s2_ptr,s2_ptr,32
251: addu res_ptr,res_ptr,32
252: bcnd ge0,size,Loop2
253:
254: Lfin2: addu size,size,8-2
255: bcnd lt0,size,Lend2
256: Loope2: ld.d r8,s1_ptr,0
257: ld.d r6,s2_ptr,0
258: subu.cio r8,r8,r6
259: st r8,res_ptr,0
260: subu.cio r9,r9,r7
261: st r9,res_ptr,4
262: subu size,size,2
263: addu s1_ptr,s1_ptr,8
264: addu s2_ptr,s2_ptr,8
265: addu res_ptr,res_ptr,8
266: bcnd ge0,size,Loope2
267: Lend2: bb0 0,size,Lret2
268: /* Add last limb */
269: Ljone: ld r10,s1_ptr,0
270: ld r8,s2_ptr,0
271: subu.cio r6,r10,r8
272: st r6,res_ptr,0
273:
274: Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
275: jmp.n r1
276: xor r2,r2,1
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>