Annotation of OpenXM_contrib/gmp/mpn/powerpc64/lshift.asm, Revision 1.1.1.1
1.1 maekawa 1: # PowerPC-64 mpn_lshift -- Shift a number left.
2:
3: # Copyright (C) 1999, 2000 Free Software Foundation, Inc.
4:
5: # This file is part of the GNU MP Library.
6:
7: # The GNU MP Library is free software; you can redistribute it and/or modify
8: # it under the terms of the GNU Lesser General Public License as published by
9: # the Free Software Foundation; either version 2.1 of the License, or (at your
10: # option) any later version.
11:
12: # The GNU MP Library is distributed in the hope that it will be useful, but
13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15: # License for more details.
16:
17: # You should have received a copy of the GNU Lesser General Public License
18: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: # MA 02111-1307, USA.
21:
22:
23: # INPUT PARAMETERS
24: # res_ptr r3
25: # s1_ptr r4
26: # size r5
27: # cnt r6
28:
29: include(`../config.m4')
30:
31: ASM_START()
32: PROLOGUE(mpn_lshift)
33: cmpdi cr0,r5,20 # more than 20 limbs?
34: sldi r0,r5,3
35: add r4,r4,r0 # make r4 point at end of s1
36: add r7,r3,r0 # make r7 point at end of res
37: bgt .LBIG # branch if more than 12 limbs
38:
39: mtctr r5 # copy size into CTR
40: subfic r8,r6,64
41: ldu r11,-8(r4) # load first s1 limb
42: srd r3,r11,r8 # compute function return value
43: bdz .Lend1
44:
45: .Loop: ldu r10,-8(r4)
46: sld r9,r11,r6
47: srd r12,r10,r8
48: or r9,r9,r12
49: stdu r9,-8(r7)
50: bdz .Lend2
51: ldu r11,-8(r4)
52: sld r9,r10,r6
53: srd r12,r11,r8
54: or r9,r9,r12
55: stdu r9,-8(r7)
56: bdnz .Loop
57:
58: .Lend1: sld r0,r11,r6
59: std r0,-8(r7)
60: blr
61: .Lend2: sld r0,r10,r6
62: std r0,-8(r7)
63: blr
64:
65: .LBIG:
66: std r24,-64(1)
67: std r25,-56(1)
68: std r26,-48(1)
69: std r27,-40(1)
70: std r28,-32(1)
71: std r29,-24(1)
72: std r30,-16(1)
73: std r31,-8(1)
74: ldu r9,-8(r4)
75: subfic r8,r6,64
76: srd r3,r9,r8 # compute function return value
77: sld r0,r9,r6
78: addi r5,r5,-1
79:
80: andi. r10,r5,3 # count for spill loop
81: beq .Le
82: mtctr r10
83: ldu r28,-8(r4)
84: bdz .Lxe0
85:
86: .Loop0: sld r12,r28,r6
87: srd r24,r28,r8
88: ldu r28,-8(r4)
89: or r24,r0,r24
90: stdu r24,-8(r7)
91: mr r0,r12
92: bdnz .Loop0 # taken at most once!
93:
94: .Lxe0: sld r12,r28,r6
95: srd r24,r28,r8
96: or r24,r0,r24
97: stdu r24,-8(r7)
98: mr r0,r12
99:
100: .Le: srdi r5,r5,2 # count for unrolled loop
101: addi r5,r5,-1
102: mtctr r5
103: ld r28,-8(r4)
104: ld r29,-16(r4)
105: ld r30,-24(r4)
106: ldu r31,-32(r4)
107:
108: .LoopU: sld r9,r28,r6
109: srd r24,r28,r8
110: ld r28,-8(r4)
111: sld r10,r29,r6
112: srd r25,r29,r8
113: ld r29,-16(r4)
114: sld r11,r30,r6
115: srd r26,r30,r8
116: ld r30,-24(r4)
117: sld r12,r31,r6
118: srd r27,r31,r8
119: ldu r31,-32(r4)
120: or r24,r0,r24
121: std r24,-8(r7)
122: or r25,r9,r25
123: std r25,-16(r7)
124: or r26,r10,r26
125: std r26,-24(r7)
126: or r27,r11,r27
127: stdu r27,-32(r7)
128: mr r0,r12
129: bdnz .LoopU
130:
131: sld r9,r28,r6
132: srd r24,r28,r8
133: sld r10,r29,r6
134: srd r25,r29,r8
135: sld r11,r30,r6
136: srd r26,r30,r8
137: sld r12,r31,r6
138: srd r27,r31,r8
139: or r24,r0,r24
140: std r24,-8(r7)
141: or r25,r9,r25
142: std r25,-16(r7)
143: or r26,r10,r26
144: std r26,-24(r7)
145: or r27,r11,r27
146: stdu r27,-32(r7)
147: mr r0,r12
148:
149: std r0,-8(r7)
150: ld r24,-64(1)
151: ld r25,-56(1)
152: ld r26,-48(1)
153: ld r27,-40(1)
154: ld r28,-32(1)
155: ld r29,-24(1)
156: ld r30,-16(1)
157: ld r31,-8(1)
158: blr
159: EPILOGUE(mpn_lshift)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>