Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/alpha/ev5/lshift.s, Revision 1.1
1.1 ! takayama 1: # Alpha EV5 __mpn_lshift --
! 2:
! 3: # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
! 4:
! 5: # This file is part of the GNU MP Library.
! 6:
! 7: # The GNU MP Library is free software; you can redistribute it and/or modify
! 8: # it under the terms of the GNU Library General Public License as published by
! 9: # the Free Software Foundation; either version 2 of the License, or (at your
! 10: # option) any later version.
! 11:
! 12: # The GNU MP Library is distributed in the hope that it will be useful, but
! 13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: # License for more details.
! 16:
! 17: # You should have received a copy of the GNU Library General Public License
! 18: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: # MA 02111-1307, USA.
! 21:
! 22:
! 23: # INPUT PARAMETERS
! 24: # res_ptr r16
! 25: # s1_ptr r17
! 26: # size r18
! 27: # cnt r19
! 28:
! 29: # This code runs at 3.25 cycles/limb on the EV5.
! 30:
! 31: .set noreorder
! 32: .set noat
! 33: .text
! 34: .align 3
! 35: .globl __mpn_lshift
! 36: .ent __mpn_lshift
! 37: __mpn_lshift:
! 38: .frame $30,0,$26,0
! 39:
! 40: s8addq $18,$17,$17 # make r17 point at end of s1
! 41: ldq $4,-8($17) # load first limb
! 42: subq $31,$19,$20
! 43: s8addq $18,$16,$16 # make r16 point at end of RES
! 44: subq $18,1,$18
! 45: and $18,4-1,$28 # number of limbs in first loop
! 46: srl $4,$20,$0 # compute function result
! 47:
! 48: beq $28,.L0
! 49: subq $18,$28,$18
! 50:
! 51: .align 3
! 52: .Loop0: ldq $3,-16($17)
! 53: subq $16,8,$16
! 54: sll $4,$19,$5
! 55: subq $17,8,$17
! 56: subq $28,1,$28
! 57: srl $3,$20,$6
! 58: or $3,$3,$4
! 59: or $5,$6,$8
! 60: stq $8,0($16)
! 61: bne $28,.Loop0
! 62:
! 63: .L0: sll $4,$19,$24
! 64: beq $18,.Lend
! 65: # warm up phase 1
! 66: ldq $1,-16($17)
! 67: subq $18,4,$18
! 68: ldq $2,-24($17)
! 69: ldq $3,-32($17)
! 70: ldq $4,-40($17)
! 71: beq $18,.Lend1
! 72: # warm up phase 2
! 73: srl $1,$20,$7
! 74: sll $1,$19,$21
! 75: srl $2,$20,$8
! 76: ldq $1,-48($17)
! 77: sll $2,$19,$22
! 78: ldq $2,-56($17)
! 79: srl $3,$20,$5
! 80: or $7,$24,$7
! 81: sll $3,$19,$23
! 82: or $8,$21,$8
! 83: srl $4,$20,$6
! 84: ldq $3,-64($17)
! 85: sll $4,$19,$24
! 86: ldq $4,-72($17)
! 87: subq $18,4,$18
! 88: beq $18,.Lend2
! 89: .align 4
! 90: # main loop
! 91: .Loop: stq $7,-8($16)
! 92: or $5,$22,$5
! 93: stq $8,-16($16)
! 94: or $6,$23,$6
! 95:
! 96: srl $1,$20,$7
! 97: subq $18,4,$18
! 98: sll $1,$19,$21
! 99: unop # ldq $31,-96($17)
! 100:
! 101: srl $2,$20,$8
! 102: ldq $1,-80($17)
! 103: sll $2,$19,$22
! 104: ldq $2,-88($17)
! 105:
! 106: stq $5,-24($16)
! 107: or $7,$24,$7
! 108: stq $6,-32($16)
! 109: or $8,$21,$8
! 110:
! 111: srl $3,$20,$5
! 112: unop # ldq $31,-96($17)
! 113: sll $3,$19,$23
! 114: subq $16,32,$16
! 115:
! 116: srl $4,$20,$6
! 117: ldq $3,-96($17)
! 118: sll $4,$19,$24
! 119: ldq $4,-104($17)
! 120:
! 121: subq $17,32,$17
! 122: bne $18,.Loop
! 123: # cool down phase 2/1
! 124: .Lend2: stq $7,-8($16)
! 125: or $5,$22,$5
! 126: stq $8,-16($16)
! 127: or $6,$23,$6
! 128: srl $1,$20,$7
! 129: sll $1,$19,$21
! 130: srl $2,$20,$8
! 131: sll $2,$19,$22
! 132: stq $5,-24($16)
! 133: or $7,$24,$7
! 134: stq $6,-32($16)
! 135: or $8,$21,$8
! 136: srl $3,$20,$5
! 137: sll $3,$19,$23
! 138: srl $4,$20,$6
! 139: sll $4,$19,$24
! 140: # cool down phase 2/2
! 141: stq $7,-40($16)
! 142: or $5,$22,$5
! 143: stq $8,-48($16)
! 144: or $6,$23,$6
! 145: stq $5,-56($16)
! 146: stq $6,-64($16)
! 147: # cool down phase 2/3
! 148: stq $24,-72($16)
! 149: ret $31,($26),1
! 150:
! 151: # cool down phase 1/1
! 152: .Lend1: srl $1,$20,$7
! 153: sll $1,$19,$21
! 154: srl $2,$20,$8
! 155: sll $2,$19,$22
! 156: srl $3,$20,$5
! 157: or $7,$24,$7
! 158: sll $3,$19,$23
! 159: or $8,$21,$8
! 160: srl $4,$20,$6
! 161: sll $4,$19,$24
! 162: # cool down phase 1/2
! 163: stq $7,-8($16)
! 164: or $5,$22,$5
! 165: stq $8,-16($16)
! 166: or $6,$23,$6
! 167: stq $5,-24($16)
! 168: stq $6,-32($16)
! 169: stq $24,-40($16)
! 170: ret $31,($26),1
! 171:
! 172: .Lend: stq $24,-8($16)
! 173: ret $31,($26),1
! 174: .end __mpn_lshift
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>