Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/rshift.s, Revision 1.1.1.1
1.1 maekawa 1: # Alpha EV5 __mpn_rshift --
2:
3: # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
4:
5: # This file is part of the GNU MP Library.
6:
7: # The GNU MP Library is free software; you can redistribute it and/or modify
8: # it under the terms of the GNU Library General Public License as published by
9: # the Free Software Foundation; either version 2 of the License, or (at your
10: # option) any later version.
11:
12: # The GNU MP Library is distributed in the hope that it will be useful, but
13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15: # License for more details.
16:
17: # You should have received a copy of the GNU Library General Public License
18: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: # MA 02111-1307, USA.
21:
22:
23: # INPUT PARAMETERS
24: # res_ptr r16
25: # s1_ptr r17
26: # size r18
27: # cnt r19
28:
29: # This code runs at 3.25 cycles/limb on the EV5.
30:
31: .set noreorder
32: .set noat
33: .text
34: .align 3
35: .globl __mpn_rshift
36: .ent __mpn_rshift
37: __mpn_rshift:
38: .frame $30,0,$26,0
39:
40: ldq $4,0($17) # load first limb
41: subq $31,$19,$20
42: subq $18,1,$18
43: and $18,4-1,$28 # number of limbs in first loop
44: sll $4,$20,$0 # compute function result
45:
46: beq $28,.L0
47: subq $18,$28,$18
48:
49: .align 3
50: .Loop0: ldq $3,8($17)
51: addq $16,8,$16
52: srl $4,$19,$5
53: addq $17,8,$17
54: subq $28,1,$28
55: sll $3,$20,$6
56: or $3,$3,$4
57: or $5,$6,$8
58: stq $8,-8($16)
59: bne $28,.Loop0
60:
61: .L0: srl $4,$19,$24
62: beq $18,.Lend
63: # warm up phase 1
64: ldq $1,8($17)
65: subq $18,4,$18
66: ldq $2,16($17)
67: ldq $3,24($17)
68: ldq $4,32($17)
69: beq $18,.Lend1
70: # warm up phase 2
71: sll $1,$20,$7
72: srl $1,$19,$21
73: sll $2,$20,$8
74: ldq $1,40($17)
75: srl $2,$19,$22
76: ldq $2,48($17)
77: sll $3,$20,$5
78: or $7,$24,$7
79: srl $3,$19,$23
80: or $8,$21,$8
81: sll $4,$20,$6
82: ldq $3,56($17)
83: srl $4,$19,$24
84: ldq $4,64($17)
85: subq $18,4,$18
86: beq $18,.Lend2
87: .align 4
88: # main loop
89: .Loop: stq $7,0($16)
90: or $5,$22,$5
91: stq $8,8($16)
92: or $6,$23,$6
93:
94: sll $1,$20,$7
95: subq $18,4,$18
96: srl $1,$19,$21
97: unop # ldq $31,-96($17)
98:
99: sll $2,$20,$8
100: ldq $1,72($17)
101: srl $2,$19,$22
102: ldq $2,80($17)
103:
104: stq $5,16($16)
105: or $7,$24,$7
106: stq $6,24($16)
107: or $8,$21,$8
108:
109: sll $3,$20,$5
110: unop # ldq $31,-96($17)
111: srl $3,$19,$23
112: addq $16,32,$16
113:
114: sll $4,$20,$6
115: ldq $3,88($17)
116: srl $4,$19,$24
117: ldq $4,96($17)
118:
119: addq $17,32,$17
120: bne $18,.Loop
121: # cool down phase 2/1
122: .Lend2: stq $7,0($16)
123: or $5,$22,$5
124: stq $8,8($16)
125: or $6,$23,$6
126: sll $1,$20,$7
127: srl $1,$19,$21
128: sll $2,$20,$8
129: srl $2,$19,$22
130: stq $5,16($16)
131: or $7,$24,$7
132: stq $6,24($16)
133: or $8,$21,$8
134: sll $3,$20,$5
135: srl $3,$19,$23
136: sll $4,$20,$6
137: srl $4,$19,$24
138: # cool down phase 2/2
139: stq $7,32($16)
140: or $5,$22,$5
141: stq $8,40($16)
142: or $6,$23,$6
143: stq $5,48($16)
144: stq $6,56($16)
145: # cool down phase 2/3
146: stq $24,64($16)
147: ret $31,($26),1
148:
149: # cool down phase 1/1
150: .Lend1: sll $1,$20,$7
151: srl $1,$19,$21
152: sll $2,$20,$8
153: srl $2,$19,$22
154: sll $3,$20,$5
155: or $7,$24,$7
156: srl $3,$19,$23
157: or $8,$21,$8
158: sll $4,$20,$6
159: srl $4,$19,$24
160: # cool down phase 1/2
161: stq $7,0($16)
162: or $5,$22,$5
163: stq $8,8($16)
164: or $6,$23,$6
165: stq $5,16($16)
166: stq $6,24($16)
167: stq $24,32($16)
168: ret $31,($26),1
169:
170: .Lend: stq $24,0($16)
171: ret $31,($26),1
172: .end __mpn_rshift
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>