version 1.1.1.1, 2000/09/09 14:12:38 |
version 1.1.1.2, 2003/08/25 16:06:24 |
|
|
dnl PowerPC-32 mpn_lshift -- Shift a number left. |
dnl PowerPC-32 mpn_lshift -- Shift a number left. |
|
|
dnl Copyright (C) 1995, 1998, 2000 Free Software Foundation, Inc. |
dnl Copyright 1995, 1998, 2000, 2002 Free Software Foundation, Inc. |
|
|
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
|
|
Line 19 dnl along with the GNU MP Library; see the file COPYIN |
|
Line 19 dnl along with the GNU MP Library; see the file COPYIN |
|
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
dnl MA 02111-1307, USA. |
dnl MA 02111-1307, USA. |
|
|
|
include(`../config.m4') |
|
|
|
|
|
C cycles/limb |
|
C 604e: 3.0 |
|
C 750: 3.0 |
|
|
|
|
dnl INPUT PARAMETERS |
dnl INPUT PARAMETERS |
dnl res_ptr r3 |
dnl res_ptr r3 |
dnl s1_ptr r4 |
dnl s1_ptr r4 |
dnl size r5 |
dnl size r5 |
dnl cnt r6 |
dnl cnt r6 |
|
|
include(`../config.m4') |
|
|
|
ASM_START() |
ASM_START() |
PROLOGUE(mpn_lshift) |
PROLOGUE(mpn_lshift) |
cmpi cr0,r5,12 C more than 12 limbs? |
cmpi cr0,r5,12 C more than 12 limbs? |
slwi r0,r5,2 |
slwi r0,r5,2 |
add r4,r4,r0 C make r4 point at end of s1 |
add r4,r4,r0 C make r4 point at end of s1 |
add r7,r3,r0 C make r7 point at end of res |
add r7,r3,r0 C make r7 point at end of res |
bgt .LBIG C branch if more than 12 limbs |
bgt L(BIG) C branch if more than 12 limbs |
|
|
mtctr r5 C copy size into CTR |
mtctr r5 C copy size into CTR |
subfic r8,r6,32 |
subfic r8,r6,32 |
lwzu r11,-4(r4) C load first s1 limb |
lwzu r11,-4(r4) C load first s1 limb |
srw r3,r11,r8 C compute function return value |
srw r3,r11,r8 C compute function return value |
bdz .Lend1 |
bdz L(end1) |
|
|
.Loop: lwzu r10,-4(r4) |
L(oop): lwzu r10,-4(r4) |
slw r9,r11,r6 |
slw r9,r11,r6 |
srw r12,r10,r8 |
srw r12,r10,r8 |
or r9,r9,r12 |
or r9,r9,r12 |
stwu r9,-4(r7) |
stwu r9,-4(r7) |
bdz .Lend2 |
bdz L(end2) |
lwzu r11,-4(r4) |
lwzu r11,-4(r4) |
slw r9,r10,r6 |
slw r9,r10,r6 |
srw r12,r11,r8 |
srw r12,r11,r8 |
or r9,r9,r12 |
or r9,r9,r12 |
stwu r9,-4(r7) |
stwu r9,-4(r7) |
bdnz .Loop |
bdnz L(oop) |
|
|
.Lend1: slw r0,r11,r6 |
L(end1): |
|
slw r0,r11,r6 |
stw r0,-4(r7) |
stw r0,-4(r7) |
blr |
blr |
.Lend2: slw r0,r10,r6 |
L(end2): |
|
slw r0,r10,r6 |
stw r0,-4(r7) |
stw r0,-4(r7) |
blr |
blr |
|
|
.LBIG: |
L(BIG): |
stmw r24,-32(r1) C save registers we are supposed to preserve |
stmw r24,-32(r1) C save registers we are supposed to preserve |
lwzu r9,-4(r4) |
lwzu r9,-4(r4) |
subfic r8,r6,32 |
subfic r8,r6,32 |
Line 71 PROLOGUE(mpn_lshift) |
|
Line 78 PROLOGUE(mpn_lshift) |
|
addi r5,r5,-1 |
addi r5,r5,-1 |
|
|
andi. r10,r5,3 C count for spill loop |
andi. r10,r5,3 C count for spill loop |
beq .Le |
beq L(e) |
mtctr r10 |
mtctr r10 |
lwzu r28,-4(r4) |
lwzu r28,-4(r4) |
bdz .Lxe0 |
bdz L(xe0) |
|
|
.Loop0: slw r12,r28,r6 |
L(oop0): slw r12,r28,r6 |
srw r24,r28,r8 |
srw r24,r28,r8 |
lwzu r28,-4(r4) |
lwzu r28,-4(r4) |
or r24,r0,r24 |
or r24,r0,r24 |
stwu r24,-4(r7) |
stwu r24,-4(r7) |
mr r0,r12 |
mr r0,r12 |
bdnz .Loop0 C taken at most once! |
bdnz L(oop0) C taken at most once! |
|
|
.Lxe0: slw r12,r28,r6 |
L(xe0): slw r12,r28,r6 |
srw r24,r28,r8 |
srw r24,r28,r8 |
or r24,r0,r24 |
or r24,r0,r24 |
stwu r24,-4(r7) |
stwu r24,-4(r7) |
mr r0,r12 |
mr r0,r12 |
|
|
.Le: srwi r5,r5,2 C count for unrolled loop |
L(e): srwi r5,r5,2 C count for unrolled loop |
addi r5,r5,-1 |
addi r5,r5,-1 |
mtctr r5 |
mtctr r5 |
lwz r28,-4(r4) |
lwz r28,-4(r4) |
Line 98 PROLOGUE(mpn_lshift) |
|
Line 105 PROLOGUE(mpn_lshift) |
|
lwz r30,-12(r4) |
lwz r30,-12(r4) |
lwzu r31,-16(r4) |
lwzu r31,-16(r4) |
|
|
.LoopU: slw r9,r28,r6 |
L(oopU): slw r9,r28,r6 |
srw r24,r28,r8 |
srw r24,r28,r8 |
lwz r28,-4(r4) |
lwz r28,-4(r4) |
slw r10,r29,r6 |
slw r10,r29,r6 |
Line 119 PROLOGUE(mpn_lshift) |
|
Line 126 PROLOGUE(mpn_lshift) |
|
or r27,r11,r27 |
or r27,r11,r27 |
stwu r27,-16(r7) |
stwu r27,-16(r7) |
mr r0,r12 |
mr r0,r12 |
bdnz .LoopU |
bdnz L(oopU) |
|
|
slw r9,r28,r6 |
slw r9,r28,r6 |
srw r24,r28,r8 |
srw r24,r28,r8 |