[BACK]Return to lshift.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / powerpc64

Annotation of OpenXM_contrib/gmp/mpn/powerpc64/lshift.asm, Revision 1.1.1.2

1.1.1.2 ! ohara       1: # PowerPC-64 mpn_lshift -- Shift a number left.
1.1       maekawa     2:
1.1.1.2 ! ohara       3: # Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1       maekawa     4:
                      5: # This file is part of the GNU MP Library.
                      6:
                      7: # The GNU MP Library is free software; you can redistribute it and/or modify
                      8: # it under the terms of the GNU Lesser General Public License as published by
                      9: # the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: # option) any later version.
                     11:
                     12: # The GNU MP Library is distributed in the hope that it will be useful, but
                     13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: # License for more details.
                     16:
                     17: # You should have received a copy of the GNU Lesser General Public License
                     18: # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: # MA 02111-1307, USA.
                     21:
1.1.1.2 ! ohara      22: include(`../config.m4')
        !            23:
        !            24:
        !            25: # ppc630: 1.6375 cycles/limb
        !            26:
1.1       maekawa    27:
                     28: # INPUT PARAMETERS
                     29: # res_ptr      r3
                     30: # s1_ptr       r4
                     31: # size         r5
                     32: # cnt          r6
                     33:
                     34: ASM_START()
                     35: PROLOGUE(mpn_lshift)
                     36:        cmpdi   cr0,r5,20       # more than 20 limbs?
                     37:        sldi    r0,r5,3
                     38:        add     r4,r4,r0        # make r4 point at end of s1
                     39:        add     r7,r3,r0        # make r7 point at end of res
                     40:        bgt     .LBIG           # branch if more than 12 limbs
                     41:
                     42:        mtctr   r5              # copy size into CTR
                     43:        subfic  r8,r6,64
                     44:        ldu     r11,-8(r4)      # load first s1 limb
                     45:        srd     r3,r11,r8       # compute function return value
                     46:        bdz     .Lend1
                     47:
                     48: .Loop: ldu     r10,-8(r4)
                     49:        sld     r9,r11,r6
                     50:        srd     r12,r10,r8
                     51:        or      r9,r9,r12
                     52:        stdu    r9,-8(r7)
                     53:        bdz     .Lend2
                     54:        ldu     r11,-8(r4)
                     55:        sld     r9,r10,r6
                     56:        srd     r12,r11,r8
                     57:        or      r9,r9,r12
                     58:        stdu    r9,-8(r7)
                     59:        bdnz    .Loop
                     60:
                     61: .Lend1:        sld     r0,r11,r6
                     62:        std     r0,-8(r7)
                     63:        blr
                     64: .Lend2:        sld     r0,r10,r6
                     65:        std     r0,-8(r7)
                     66:        blr
                     67:
                     68: .LBIG:
1.1.1.2 ! ohara      69:        std     r24,-64(r1)
        !            70:        std     r25,-56(r1)
        !            71:        std     r26,-48(r1)
        !            72:        std     r27,-40(r1)
        !            73:        std     r28,-32(r1)
        !            74:        std     r29,-24(r1)
        !            75:        std     r30,-16(r1)
        !            76:        std     r31,-8(r1)
1.1       maekawa    77:        ldu     r9,-8(r4)
                     78:        subfic  r8,r6,64
                     79:        srd     r3,r9,r8        # compute function return value
                     80:        sld     r0,r9,r6
                     81:        addi    r5,r5,-1
                     82:
                     83:        andi.   r10,r5,3        # count for spill loop
                     84:        beq     .Le
                     85:        mtctr   r10
                     86:        ldu     r28,-8(r4)
                     87:        bdz     .Lxe0
                     88:
                     89: .Loop0:        sld     r12,r28,r6
                     90:        srd     r24,r28,r8
                     91:        ldu     r28,-8(r4)
                     92:        or      r24,r0,r24
                     93:        stdu    r24,-8(r7)
                     94:        mr      r0,r12
                     95:        bdnz    .Loop0          # taken at most once!
                     96:
                     97: .Lxe0: sld     r12,r28,r6
                     98:        srd     r24,r28,r8
                     99:        or      r24,r0,r24
                    100:        stdu    r24,-8(r7)
                    101:        mr      r0,r12
                    102:
                    103: .Le:   srdi    r5,r5,2         # count for unrolled loop
                    104:        addi    r5,r5,-1
                    105:        mtctr   r5
                    106:        ld      r28,-8(r4)
                    107:        ld      r29,-16(r4)
                    108:        ld      r30,-24(r4)
                    109:        ldu     r31,-32(r4)
                    110:
                    111: .LoopU:        sld     r9,r28,r6
                    112:        srd     r24,r28,r8
                    113:        ld      r28,-8(r4)
                    114:        sld     r10,r29,r6
                    115:        srd     r25,r29,r8
                    116:        ld      r29,-16(r4)
                    117:        sld     r11,r30,r6
                    118:        srd     r26,r30,r8
                    119:        ld      r30,-24(r4)
                    120:        sld     r12,r31,r6
                    121:        srd     r27,r31,r8
                    122:        ldu     r31,-32(r4)
                    123:        or      r24,r0,r24
                    124:        std     r24,-8(r7)
                    125:        or      r25,r9,r25
                    126:        std     r25,-16(r7)
                    127:        or      r26,r10,r26
                    128:        std     r26,-24(r7)
                    129:        or      r27,r11,r27
                    130:        stdu    r27,-32(r7)
                    131:        mr      r0,r12
                    132:        bdnz    .LoopU
                    133:
                    134:        sld     r9,r28,r6
                    135:        srd     r24,r28,r8
                    136:        sld     r10,r29,r6
                    137:        srd     r25,r29,r8
                    138:        sld     r11,r30,r6
                    139:        srd     r26,r30,r8
                    140:        sld     r12,r31,r6
                    141:        srd     r27,r31,r8
                    142:        or      r24,r0,r24
                    143:        std     r24,-8(r7)
                    144:        or      r25,r9,r25
                    145:        std     r25,-16(r7)
                    146:        or      r26,r10,r26
                    147:        std     r26,-24(r7)
                    148:        or      r27,r11,r27
                    149:        stdu    r27,-32(r7)
                    150:        mr      r0,r12
                    151:
                    152:        std     r0,-8(r7)
1.1.1.2 ! ohara     153:        ld      r24,-64(r1)
        !           154:        ld      r25,-56(r1)
        !           155:        ld      r26,-48(r1)
        !           156:        ld      r27,-40(r1)
        !           157:        ld      r28,-32(r1)
        !           158:        ld      r29,-24(r1)
        !           159:        ld      r30,-16(r1)
        !           160:        ld      r31,-8(r1)
1.1       maekawa   161:        blr
                    162: EPILOGUE(mpn_lshift)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>