[BACK]Return to rshift.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev5

Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/rshift.asm, Revision 1.1.1.1

1.1       maekawa     1: dnl  Alpha EV5 __gmpn_rshift -- Shift a number right.
                      2:
                      3: dnl  Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
                      4:
                      5: dnl  This file is part of the GNU MP Library.
                      6:
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      8: dnl  it under the terms of the GNU Lesser General Public License as published by
                      9: dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: dnl  option) any later version.
                     11:
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     13: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: dnl  License for more details.
                     16:
                     17: dnl  You should have received a copy of the GNU Lesser General Public License
                     18: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: dnl  MA 02111-1307, USA.
                     21:
                     22: include(`../config.m4')
                     23:
                     24: dnl  INPUT PARAMETERS
                     25: dnl  res_ptr   r16
                     26: dnl  s1_ptr    r17
                     27: dnl  size      r18
                     28: dnl  cnt       r19
                     29:
                     30: dnl  This code runs at 3.25 cycles/limb on the EV5.
                     31:
                     32: ASM_START()
                     33: PROLOGUE(mpn_rshift)
                     34:        ldq     r4,0(r17)       C load first limb
                     35:        subq    r31,r19,r20
                     36:        subq    r18,1,r18
                     37:        and     r18,4-1,r28     C number of limbs in first loop
                     38:        sll     r4,r20,r0       C compute function result
                     39:
                     40:        beq     r28,$L0
                     41:        subq    r18,r28,r18
                     42:
                     43:        ALIGN(8)
                     44: $Loop0:        ldq     r3,8(r17)
                     45:        addq    r16,8,r16
                     46:        srl     r4,r19,r5
                     47:        addq    r17,8,r17
                     48:        subq    r28,1,r28
                     49:        sll     r3,r20,r6
                     50:        bis     r3,r3,r4
                     51:        bis     r5,r6,r8
                     52:        stq     r8,-8(r16)
                     53:        bne     r28,$Loop0
                     54:
                     55: $L0:   srl     r4,r19,r24
                     56:        beq     r18,$Lend
                     57: C warm up phase 1
                     58:        ldq     r1,8(r17)
                     59:        subq    r18,4,r18
                     60:        ldq     r2,16(r17)
                     61:        ldq     r3,24(r17)
                     62:        ldq     r4,32(r17)
                     63:        beq     r18,$Lend1
                     64: C warm up phase 2
                     65:        sll     r1,r20,r7
                     66:        srl     r1,r19,r21
                     67:        sll     r2,r20,r8
                     68:        ldq     r1,40(r17)
                     69:        srl     r2,r19,r22
                     70:        ldq     r2,48(r17)
                     71:        sll     r3,r20,r5
                     72:        bis     r7,r24,r7
                     73:        srl     r3,r19,r23
                     74:        bis     r8,r21,r8
                     75:        sll     r4,r20,r6
                     76:        ldq     r3,56(r17)
                     77:        srl     r4,r19,r24
                     78:        ldq     r4,64(r17)
                     79:        subq    r18,4,r18
                     80:        beq     r18,$Lend2
                     81:        ALIGN(16)
                     82: C main loop
                     83: $Loop: stq     r7,0(r16)
                     84:        bis     r5,r22,r5
                     85:        stq     r8,8(r16)
                     86:        bis     r6,r23,r6
                     87:
                     88:        sll     r1,r20,r7
                     89:        subq    r18,4,r18
                     90:        srl     r1,r19,r21
                     91:        unop    C ldq   r31,-96(r17)
                     92:
                     93:        sll     r2,r20,r8
                     94:        ldq     r1,72(r17)
                     95:        srl     r2,r19,r22
                     96:        ldq     r2,80(r17)
                     97:
                     98:        stq     r5,16(r16)
                     99:        bis     r7,r24,r7
                    100:        stq     r6,24(r16)
                    101:        bis     r8,r21,r8
                    102:
                    103:        sll     r3,r20,r5
                    104:        unop    C ldq   r31,-96(r17)
                    105:        srl     r3,r19,r23
                    106:        addq    r16,32,r16
                    107:
                    108:        sll     r4,r20,r6
                    109:        ldq     r3,88(r17)
                    110:        srl     r4,r19,r24
                    111:        ldq     r4,96(r17)
                    112:
                    113:        addq    r17,32,r17
                    114:        bne     r18,$Loop
                    115: C cool down phase 2/1
                    116: $Lend2:        stq     r7,0(r16)
                    117:        bis     r5,r22,r5
                    118:        stq     r8,8(r16)
                    119:        bis     r6,r23,r6
                    120:        sll     r1,r20,r7
                    121:        srl     r1,r19,r21
                    122:        sll     r2,r20,r8
                    123:        srl     r2,r19,r22
                    124:        stq     r5,16(r16)
                    125:        bis     r7,r24,r7
                    126:        stq     r6,24(r16)
                    127:        bis     r8,r21,r8
                    128:        sll     r3,r20,r5
                    129:        srl     r3,r19,r23
                    130:        sll     r4,r20,r6
                    131:        srl     r4,r19,r24
                    132: C cool down phase 2/2
                    133:        stq     r7,32(r16)
                    134:        bis     r5,r22,r5
                    135:        stq     r8,40(r16)
                    136:        bis     r6,r23,r6
                    137:        stq     r5,48(r16)
                    138:        stq     r6,56(r16)
                    139: C cool down phase 2/3
                    140:        stq     r24,64(r16)
                    141:        ret     r31,(r26),1
                    142:
                    143: C cool down phase 1/1
                    144: $Lend1:        sll     r1,r20,r7
                    145:        srl     r1,r19,r21
                    146:        sll     r2,r20,r8
                    147:        srl     r2,r19,r22
                    148:        sll     r3,r20,r5
                    149:        bis     r7,r24,r7
                    150:        srl     r3,r19,r23
                    151:        bis     r8,r21,r8
                    152:        sll     r4,r20,r6
                    153:        srl     r4,r19,r24
                    154: C cool down phase 1/2
                    155:        stq     r7,0(r16)
                    156:        bis     r5,r22,r5
                    157:        stq     r8,8(r16)
                    158:        bis     r6,r23,r6
                    159:        stq     r5,16(r16)
                    160:        stq     r6,24(r16)
                    161:        stq     r24,32(r16)
                    162:        ret     r31,(r26),1
                    163:
                    164: $Lend: stq     r24,0(r16)
                    165:        ret     r31,(r26),1
                    166: EPILOGUE(mpn_rshift)
                    167: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>