[BACK]Return to lshift.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev5

Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/lshift.asm, Revision 1.1.1.1

1.1       maekawa     1: dnl  Alpha EV5 __gmpn_lshift -- Shift a number left.
                      2:
                      3: dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
                      4:
                      5: dnl  This file is part of the GNU MP Library.
                      6:
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      8: dnl  it under the terms of the GNU Lesser General Public License as published by
                      9: dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: dnl  option) any later version.
                     11:
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     13: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: dnl  License for more details.
                     16:
                     17: dnl  You should have received a copy of the GNU Lesser General Public License
                     18: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: dnl  MA 02111-1307, USA.
                     21:
                     22: include(`../config.m4')
                     23:
                     24: dnl  INPUT PARAMETERS
                     25: dnl  res_ptr   r16
                     26: dnl  s1_ptr    r17
                     27: dnl  size      r18
                     28: dnl  cnt       r19
                     29:
                     30: dnl  This code runs at 3.25 cycles/limb on the EV5.
                     31:
                     32: ASM_START()
                     33: PROLOGUE(mpn_lshift)
                     34:        s8addq  r18,r17,r17     C make r17 point at end of s1
                     35:        ldq     r4,-8(r17)      C load first limb
                     36:        subq    r31,r19,r20
                     37:        s8addq  r18,r16,r16     C make r16 point at end of RES
                     38:        subq    r18,1,r18
                     39:        and     r18,4-1,r28     C number of limbs in first loop
                     40:        srl     r4,r20,r0       C compute function result
                     41:
                     42:        beq     r28,$L0
                     43:        subq    r18,r28,r18
                     44:
                     45:        ALIGN(8)
                     46: $Loop0:        ldq     r3,-16(r17)
                     47:        subq    r16,8,r16
                     48:        sll     r4,r19,r5
                     49:        subq    r17,8,r17
                     50:        subq    r28,1,r28
                     51:        srl     r3,r20,r6
                     52:        bis     r3,r3,r4
                     53:        bis     r5,r6,r8
                     54:        stq     r8,0(r16)
                     55:        bne     r28,$Loop0
                     56:
                     57: $L0:   sll     r4,r19,r24
                     58:        beq     r18,$Lend
                     59: C warm up phase 1
                     60:        ldq     r1,-16(r17)
                     61:        subq    r18,4,r18
                     62:        ldq     r2,-24(r17)
                     63:        ldq     r3,-32(r17)
                     64:        ldq     r4,-40(r17)
                     65:        beq     r18,$Lend1
                     66: C warm up phase 2
                     67:        srl     r1,r20,r7
                     68:        sll     r1,r19,r21
                     69:        srl     r2,r20,r8
                     70:        ldq     r1,-48(r17)
                     71:        sll     r2,r19,r22
                     72:        ldq     r2,-56(r17)
                     73:        srl     r3,r20,r5
                     74:        bis     r7,r24,r7
                     75:        sll     r3,r19,r23
                     76:        bis     r8,r21,r8
                     77:        srl     r4,r20,r6
                     78:        ldq     r3,-64(r17)
                     79:        sll     r4,r19,r24
                     80:        ldq     r4,-72(r17)
                     81:        subq    r18,4,r18
                     82:        beq     r18,$Lend2
                     83:        ALIGN(16)
                     84: C main loop
                     85: $Loop: stq     r7,-8(r16)
                     86:        bis     r5,r22,r5
                     87:        stq     r8,-16(r16)
                     88:        bis     r6,r23,r6
                     89:
                     90:        srl     r1,r20,r7
                     91:        subq    r18,4,r18
                     92:        sll     r1,r19,r21
                     93:        unop    C ldq   r31,-96(r17)
                     94:
                     95:        srl     r2,r20,r8
                     96:        ldq     r1,-80(r17)
                     97:        sll     r2,r19,r22
                     98:        ldq     r2,-88(r17)
                     99:
                    100:        stq     r5,-24(r16)
                    101:        bis     r7,r24,r7
                    102:        stq     r6,-32(r16)
                    103:        bis     r8,r21,r8
                    104:
                    105:        srl     r3,r20,r5
                    106:        unop    C ldq   r31,-96(r17)
                    107:        sll     r3,r19,r23
                    108:        subq    r16,32,r16
                    109:
                    110:        srl     r4,r20,r6
                    111:        ldq     r3,-96(r17)
                    112:        sll     r4,r19,r24
                    113:        ldq     r4,-104(r17)
                    114:
                    115:        subq    r17,32,r17
                    116:        bne     r18,$Loop
                    117: C cool down phase 2/1
                    118: $Lend2:        stq     r7,-8(r16)
                    119:        bis     r5,r22,r5
                    120:        stq     r8,-16(r16)
                    121:        bis     r6,r23,r6
                    122:        srl     r1,r20,r7
                    123:        sll     r1,r19,r21
                    124:        srl     r2,r20,r8
                    125:        sll     r2,r19,r22
                    126:        stq     r5,-24(r16)
                    127:        bis     r7,r24,r7
                    128:        stq     r6,-32(r16)
                    129:        bis     r8,r21,r8
                    130:        srl     r3,r20,r5
                    131:        sll     r3,r19,r23
                    132:        srl     r4,r20,r6
                    133:        sll     r4,r19,r24
                    134: C cool down phase 2/2
                    135:        stq     r7,-40(r16)
                    136:        bis     r5,r22,r5
                    137:        stq     r8,-48(r16)
                    138:        bis     r6,r23,r6
                    139:        stq     r5,-56(r16)
                    140:        stq     r6,-64(r16)
                    141: C cool down phase 2/3
                    142:        stq     r24,-72(r16)
                    143:        ret     r31,(r26),1
                    144:
                    145: C cool down phase 1/1
                    146: $Lend1:        srl     r1,r20,r7
                    147:        sll     r1,r19,r21
                    148:        srl     r2,r20,r8
                    149:        sll     r2,r19,r22
                    150:        srl     r3,r20,r5
                    151:        bis     r7,r24,r7
                    152:        sll     r3,r19,r23
                    153:        bis     r8,r21,r8
                    154:        srl     r4,r20,r6
                    155:        sll     r4,r19,r24
                    156: C cool down phase 1/2
                    157:        stq     r7,-8(r16)
                    158:        bis     r5,r22,r5
                    159:        stq     r8,-16(r16)
                    160:        bis     r6,r23,r6
                    161:        stq     r5,-24(r16)
                    162:        stq     r6,-32(r16)
                    163:        stq     r24,-40(r16)
                    164:        ret     r31,(r26),1
                    165:
                    166: $Lend: stq     r24,-8(r16)
                    167:        ret     r31,(r26),1
                    168: EPILOGUE(mpn_lshift)
                    169: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>