Annotation of OpenXM_contrib/gmp/mpn/powerpc64/lshift.asm, Revision 1.1.1.2
1.1.1.2 ! ohara 1: # PowerPC-64 mpn_lshift -- Shift a number left.
1.1 maekawa 2:
1.1.1.2 ! ohara 3: # Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 4:
5: # This file is part of the GNU MP Library.
6:
7: # The GNU MP Library is free software; you can redistribute it and/or modify
8: # it under the terms of the GNU Lesser General Public License as published by
9: # the Free Software Foundation; either version 2.1 of the License, or (at your
10: # option) any later version.
11:
12: # The GNU MP Library is distributed in the hope that it will be useful, but
13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15: # License for more details.
16:
17: # You should have received a copy of the GNU Lesser General Public License
18: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: # MA 02111-1307, USA.
21:
1.1.1.2 ! ohara 22: include(`../config.m4')
! 23:
! 24:
! 25: # ppc630: 1.6375 cycles/limb
! 26:
1.1 maekawa 27:
28: # INPUT PARAMETERS
29: # res_ptr r3
30: # s1_ptr r4
31: # size r5
32: # cnt r6
33:
34: ASM_START()
35: PROLOGUE(mpn_lshift)
36: cmpdi cr0,r5,20 # more than 20 limbs?
37: sldi r0,r5,3
38: add r4,r4,r0 # make r4 point at end of s1
39: add r7,r3,r0 # make r7 point at end of res
40: bgt .LBIG # branch if more than 12 limbs
41:
42: mtctr r5 # copy size into CTR
43: subfic r8,r6,64
44: ldu r11,-8(r4) # load first s1 limb
45: srd r3,r11,r8 # compute function return value
46: bdz .Lend1
47:
48: .Loop: ldu r10,-8(r4)
49: sld r9,r11,r6
50: srd r12,r10,r8
51: or r9,r9,r12
52: stdu r9,-8(r7)
53: bdz .Lend2
54: ldu r11,-8(r4)
55: sld r9,r10,r6
56: srd r12,r11,r8
57: or r9,r9,r12
58: stdu r9,-8(r7)
59: bdnz .Loop
60:
61: .Lend1: sld r0,r11,r6
62: std r0,-8(r7)
63: blr
64: .Lend2: sld r0,r10,r6
65: std r0,-8(r7)
66: blr
67:
68: .LBIG:
1.1.1.2 ! ohara 69: std r24,-64(r1)
! 70: std r25,-56(r1)
! 71: std r26,-48(r1)
! 72: std r27,-40(r1)
! 73: std r28,-32(r1)
! 74: std r29,-24(r1)
! 75: std r30,-16(r1)
! 76: std r31,-8(r1)
1.1 maekawa 77: ldu r9,-8(r4)
78: subfic r8,r6,64
79: srd r3,r9,r8 # compute function return value
80: sld r0,r9,r6
81: addi r5,r5,-1
82:
83: andi. r10,r5,3 # count for spill loop
84: beq .Le
85: mtctr r10
86: ldu r28,-8(r4)
87: bdz .Lxe0
88:
89: .Loop0: sld r12,r28,r6
90: srd r24,r28,r8
91: ldu r28,-8(r4)
92: or r24,r0,r24
93: stdu r24,-8(r7)
94: mr r0,r12
95: bdnz .Loop0 # taken at most once!
96:
97: .Lxe0: sld r12,r28,r6
98: srd r24,r28,r8
99: or r24,r0,r24
100: stdu r24,-8(r7)
101: mr r0,r12
102:
103: .Le: srdi r5,r5,2 # count for unrolled loop
104: addi r5,r5,-1
105: mtctr r5
106: ld r28,-8(r4)
107: ld r29,-16(r4)
108: ld r30,-24(r4)
109: ldu r31,-32(r4)
110:
111: .LoopU: sld r9,r28,r6
112: srd r24,r28,r8
113: ld r28,-8(r4)
114: sld r10,r29,r6
115: srd r25,r29,r8
116: ld r29,-16(r4)
117: sld r11,r30,r6
118: srd r26,r30,r8
119: ld r30,-24(r4)
120: sld r12,r31,r6
121: srd r27,r31,r8
122: ldu r31,-32(r4)
123: or r24,r0,r24
124: std r24,-8(r7)
125: or r25,r9,r25
126: std r25,-16(r7)
127: or r26,r10,r26
128: std r26,-24(r7)
129: or r27,r11,r27
130: stdu r27,-32(r7)
131: mr r0,r12
132: bdnz .LoopU
133:
134: sld r9,r28,r6
135: srd r24,r28,r8
136: sld r10,r29,r6
137: srd r25,r29,r8
138: sld r11,r30,r6
139: srd r26,r30,r8
140: sld r12,r31,r6
141: srd r27,r31,r8
142: or r24,r0,r24
143: std r24,-8(r7)
144: or r25,r9,r25
145: std r25,-16(r7)
146: or r26,r10,r26
147: std r26,-24(r7)
148: or r27,r11,r27
149: stdu r27,-32(r7)
150: mr r0,r12
151:
152: std r0,-8(r7)
1.1.1.2 ! ohara 153: ld r24,-64(r1)
! 154: ld r25,-56(r1)
! 155: ld r26,-48(r1)
! 156: ld r27,-40(r1)
! 157: ld r28,-32(r1)
! 158: ld r29,-24(r1)
! 159: ld r30,-16(r1)
! 160: ld r31,-8(r1)
1.1 maekawa 161: blr
162: EPILOGUE(mpn_lshift)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>