Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/alpha/ev5/sub_n.s, Revision 1.1.1.1
1.1 takayama 1: # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2: # store difference in a third limb vector.
3:
4: # Copyright (C) 1995 Free Software Foundation, Inc.
5:
6: # This file is part of the GNU MP Library.
7:
8: # The GNU MP Library is free software; you can redistribute it and/or modify
9: # it under the terms of the GNU Library General Public License as published by
10: # the Free Software Foundation; either version 2 of the License, or (at your
11: # option) any later version.
12:
13: # The GNU MP Library is distributed in the hope that it will be useful, but
14: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16: # License for more details.
17:
18: # You should have received a copy of the GNU Library General Public License
19: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: # MA 02111-1307, USA.
22:
23:
24: # INPUT PARAMETERS
25: # res_ptr $16
26: # s1_ptr $17
27: # s2_ptr $18
28: # size $19
29:
30: .set noreorder
31: .set noat
32: .text
33: .align 3
34: .globl __mpn_sub_n
35: .ent __mpn_sub_n
36: __mpn_sub_n:
37: .frame $30,0,$26,0
38:
39: or $31,$31,$25 # clear cy
40: subq $19,4,$19 # decr loop cnt
41: blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
42: # Start software pipeline for 1st loop
43: ldq $0,0($18)
44: ldq $1,8($18)
45: ldq $4,0($17)
46: ldq $5,8($17)
47: addq $17,32,$17 # update s1_ptr
48: ldq $2,16($18)
49: subq $4,$0,$20 # 1st main sub
50: ldq $3,24($18)
51: subq $19,4,$19 # decr loop cnt
52: ldq $6,-16($17)
53: cmpult $4,$20,$25 # compute cy from last sub
54: ldq $7,-8($17)
55: addq $1,$25,$28 # cy add
56: addq $18,32,$18 # update s2_ptr
57: subq $5,$28,$21 # 2nd main sub
58: cmpult $28,$25,$8 # compute cy from last add
59: blt $19,.Lend1 # if less than 4 limbs remain, jump
60: # 1st loop handles groups of 4 limbs in a software pipeline
61: .align 4
62: .Loop: cmpult $5,$21,$25 # compute cy from last add
63: ldq $0,0($18)
64: or $8,$25,$25 # combine cy from the two adds
65: ldq $1,8($18)
66: addq $2,$25,$28 # cy add
67: ldq $4,0($17)
68: subq $6,$28,$22 # 3rd main sub
69: ldq $5,8($17)
70: cmpult $28,$25,$8 # compute cy from last add
71: cmpult $6,$22,$25 # compute cy from last add
72: stq $20,0($16)
73: or $8,$25,$25 # combine cy from the two adds
74: stq $21,8($16)
75: addq $3,$25,$28 # cy add
76: subq $7,$28,$23 # 4th main sub
77: cmpult $28,$25,$8 # compute cy from last add
78: cmpult $7,$23,$25 # compute cy from last add
79: addq $17,32,$17 # update s1_ptr
80: or $8,$25,$25 # combine cy from the two adds
81: addq $16,32,$16 # update res_ptr
82: addq $0,$25,$28 # cy add
83: ldq $2,16($18)
84: subq $4,$28,$20 # 1st main sub
85: ldq $3,24($18)
86: cmpult $28,$25,$8 # compute cy from last add
87: ldq $6,-16($17)
88: cmpult $4,$20,$25 # compute cy from last add
89: ldq $7,-8($17)
90: or $8,$25,$25 # combine cy from the two adds
91: subq $19,4,$19 # decr loop cnt
92: stq $22,-16($16)
93: addq $1,$25,$28 # cy add
94: stq $23,-8($16)
95: subq $5,$28,$21 # 2nd main sub
96: addq $18,32,$18 # update s2_ptr
97: cmpult $28,$25,$8 # compute cy from last add
98: bge $19,.Loop
99: # Finish software pipeline for 1st loop
100: .Lend1: cmpult $5,$21,$25 # compute cy from last add
101: or $8,$25,$25 # combine cy from the two adds
102: addq $2,$25,$28 # cy add
103: subq $6,$28,$22 # 3rd main sub
104: cmpult $28,$25,$8 # compute cy from last add
105: cmpult $6,$22,$25 # compute cy from last add
106: stq $20,0($16)
107: or $8,$25,$25 # combine cy from the two adds
108: stq $21,8($16)
109: addq $3,$25,$28 # cy add
110: subq $7,$28,$23 # 4th main sub
111: cmpult $28,$25,$8 # compute cy from last add
112: cmpult $7,$23,$25 # compute cy from last add
113: or $8,$25,$25 # combine cy from the two adds
114: addq $16,32,$16 # update res_ptr
115: stq $22,-16($16)
116: stq $23,-8($16)
117: .Lend2: addq $19,4,$19 # restore loop cnt
118: beq $19,.Lret
119: # Start software pipeline for 2nd loop
120: ldq $0,0($18)
121: ldq $4,0($17)
122: subq $19,1,$19
123: beq $19,.Lend0
124: # 2nd loop handles remaining 1-3 limbs
125: .align 4
126: .Loop0: addq $0,$25,$28 # cy add
127: ldq $0,8($18)
128: subq $4,$28,$20 # main sub
129: ldq $1,8($17)
130: addq $18,8,$18
131: cmpult $28,$25,$8 # compute cy from last add
132: addq $17,8,$17
133: stq $20,0($16)
134: cmpult $4,$20,$25 # compute cy from last add
135: subq $19,1,$19 # decr loop cnt
136: or $8,$25,$25 # combine cy from the two adds
137: addq $16,8,$16
138: or $1,$31,$4
139: bne $19,.Loop0
140: .Lend0: addq $0,$25,$28 # cy add
141: subq $4,$28,$20 # main sub
142: cmpult $28,$25,$8 # compute cy from last add
143: cmpult $4,$20,$25 # compute cy from last add
144: stq $20,0($16)
145: or $8,$25,$25 # combine cy from the two adds
146:
147: .Lret: or $25,$31,$0 # return cy
148: ret $31,($26),1
149: .end __mpn_sub_n
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>