[BACK]Return to add_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / sparc64

Annotation of OpenXM_contrib/gmp/mpn/sparc64/add_n.asm, Revision 1.1

1.1     ! maekawa     1: ! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
        !             2: ! sum in a third limb vector.
        !             3:
        !             4: ! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
        !             5:
        !             6: ! This file is part of the GNU MP Library.
        !             7:
        !             8: ! The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: ! it under the terms of the GNU Lesser General Public License as published by
        !            10: ! the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: ! option) any later version.
        !            12:
        !            13: ! The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: ! License for more details.
        !            17:
        !            18: ! You should have received a copy of the GNU Lesser General Public License
        !            19: ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: ! MA 02111-1307, USA.
        !            22:
        !            23:
        !            24: ! INPUT PARAMETERS
        !            25: ! res_ptr      %o0
        !            26: ! s1_ptr       %o1
        !            27: ! s2_ptr       %o2
        !            28: ! size         %o3
        !            29:
        !            30: include(`../config.m4')
        !            31:
        !            32: ASM_START()
        !            33:        .register       %g2,#scratch
        !            34:        .register       %g3,#scratch
        !            35: PROLOGUE(mpn_add_n)
        !            36:
        !            37: ! 12 mem ops >= 12 cycles
        !            38: ! 8 shift insn >= 8 cycles
        !            39: ! 8 addccc, executing alone, +8 cycles
        !            40: ! Unrolling not mandatory...perhaps 2-way is best?
        !            41: ! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
        !            42: ! All in all, it runs at 5 cycles/limb
        !            43:
        !            44:        save    %sp,-160,%sp
        !            45:
        !            46:        addcc   %g0,%g0,%g0
        !            47:
        !            48:        add     %i3,-4,%i3
        !            49:        brlz,pn %i3,L(there)
        !            50:        nop
        !            51:
        !            52:        ldx     [%i1+0],%l0
        !            53:        ldx     [%i2+0],%l4
        !            54:        ldx     [%i1+8],%l1
        !            55:        ldx     [%i2+8],%l5
        !            56:        ldx     [%i1+16],%l2
        !            57:        ldx     [%i2+16],%l6
        !            58:        ldx     [%i1+24],%l3
        !            59:        ldx     [%i2+24],%l7
        !            60:        add     %i1,32,%i1
        !            61:        add     %i2,32,%i2
        !            62:
        !            63:        add     %i3,-4,%i3
        !            64:        brlz,pn %i3,L(skip)
        !            65:        nop
        !            66:        b       L(loop1)        ! jump instead of executing many NOPs
        !            67:        nop
        !            68:        ALIGN(32)
        !            69: !---------  Start main loop ---------
        !            70: L(loop1):
        !            71:        addccc  %l0,%l4,%g1
        !            72: !-
        !            73:        srlx    %l0,32,%o0
        !            74:        ldx     [%i1+0],%l0
        !            75: !-
        !            76:        srlx    %l4,32,%o4
        !            77:        ldx     [%i2+0],%l4
        !            78: !-
        !            79:        addccc  %o0,%o4,%g0
        !            80: !-
        !            81:        addccc  %l1,%l5,%g2
        !            82: !-
        !            83:        srlx    %l1,32,%o1
        !            84:        ldx     [%i1+8],%l1
        !            85: !-
        !            86:        srlx    %l5,32,%o5
        !            87:        ldx     [%i2+8],%l5
        !            88: !-
        !            89:        addccc  %o1,%o5,%g0
        !            90: !-
        !            91:        addccc  %l2,%l6,%g3
        !            92: !-
        !            93:        srlx    %l2,32,%o2
        !            94:        ldx     [%i1+16],%l2
        !            95: !-
        !            96:        srlx    %l6,32,%g5      ! asymmetry
        !            97:        ldx     [%i2+16],%l6
        !            98: !-
        !            99:        addccc  %o2,%g5,%g0
        !           100: !-
        !           101:        addccc  %l3,%l7,%g4
        !           102: !-
        !           103:        srlx    %l3,32,%o3
        !           104:        ldx     [%i1+24],%l3
        !           105:        add     %i1,32,%i1
        !           106: !-
        !           107:        srlx    %l7,32,%o7
        !           108:        ldx     [%i2+24],%l7
        !           109:        add     %i2,32,%i2
        !           110: !-
        !           111:        addccc  %o3,%o7,%g0
        !           112: !-
        !           113:        stx     %g1,[%i0+0]
        !           114: !-
        !           115:        stx     %g2,[%i0+8]
        !           116: !-
        !           117:        stx     %g3,[%i0+16]
        !           118:        add     %i3,-4,%i3
        !           119: !-
        !           120:        stx     %g4,[%i0+24]
        !           121:        add     %i0,32,%i0
        !           122:
        !           123:        brgez,pt        %i3,L(loop1)
        !           124:        nop
        !           125: !---------  End main loop ---------
        !           126: L(skip):
        !           127:        addccc  %l0,%l4,%g1
        !           128:        srlx    %l0,32,%o0
        !           129:        srlx    %l4,32,%o4
        !           130:        addccc  %o0,%o4,%g0
        !           131:        addccc  %l1,%l5,%g2
        !           132:        srlx    %l1,32,%o1
        !           133:        srlx    %l5,32,%o5
        !           134:        addccc  %o1,%o5,%g0
        !           135:        addccc  %l2,%l6,%g3
        !           136:        srlx    %l2,32,%o2
        !           137:        srlx    %l6,32,%g5      ! asymmetry
        !           138:        addccc  %o2,%g5,%g0
        !           139:        addccc  %l3,%l7,%g4
        !           140:        srlx    %l3,32,%o3
        !           141:        srlx    %l7,32,%o7
        !           142:        addccc  %o3,%o7,%g0
        !           143:        stx     %g1,[%i0+0]
        !           144:        stx     %g2,[%i0+8]
        !           145:        stx     %g3,[%i0+16]
        !           146:        stx     %g4,[%i0+24]
        !           147:        add     %i0,32,%i0
        !           148:
        !           149: L(there):
        !           150:        add     %i3,4,%i3
        !           151:        brz,pt  %i3,L(end)
        !           152:        nop
        !           153:
        !           154: L(loop2):
        !           155:        ldx     [%i1+0],%l0
        !           156:        add     %i1,8,%i1
        !           157:        ldx     [%i2+0],%l4
        !           158:        add     %i2,8,%i2
        !           159:        srlx    %l0,32,%g2
        !           160:        srlx    %l4,32,%g3
        !           161:        addccc  %l0,%l4,%g1
        !           162:        addccc  %g2,%g3,%g0
        !           163:        stx     %g1,[%i0+0]
        !           164:        add     %i0,8,%i0
        !           165:        add     %i3,-1,%i3
        !           166:        brgz,pt %i3,L(loop2)
        !           167:        nop
        !           168:
        !           169: L(end):        addc    %g0,%g0,%i0
        !           170:        ret
        !           171:        restore
        !           172: EPILOGUE(mpn_add_n)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>