OpenXM_contrib/gmp/mpn/sparc64/add_n.asm - annotate

Return to add_n.asm CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / sparc64
Annotation of OpenXM_contrib/gmp/mpn/sparc64/add_n.asm, Revision 1.1.1.1

1.1       maekawa     1: ! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
                      2: ! sum in a third limb vector.
                      3:
                      4: ! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
                      5:
                      6: ! This file is part of the GNU MP Library.
                      7:
                      8: ! The GNU MP Library is free software; you can redistribute it and/or modify
                      9: ! it under the terms of the GNU Lesser General Public License as published by
                     10: ! the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: ! option) any later version.
                     12:
                     13: ! The GNU MP Library is distributed in the hope that it will be useful, but
                     14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: ! License for more details.
                     17:
                     18: ! You should have received a copy of the GNU Lesser General Public License
                     19: ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: ! MA 02111-1307, USA.
                     22:
                     23:
                     24: ! INPUT PARAMETERS
                     25: ! res_ptr      %o0
                     26: ! s1_ptr       %o1
                     27: ! s2_ptr       %o2
                     28: ! size         %o3
                     29:
                     30: include(`../config.m4')
                     31:
                     32: ASM_START()
                     33:        .register       %g2,#scratch
                     34:        .register       %g3,#scratch
                     35: PROLOGUE(mpn_add_n)
                     36:
                     37: ! 12 mem ops >= 12 cycles
                     38: ! 8 shift insn >= 8 cycles
                     39: ! 8 addccc, executing alone, +8 cycles
                     40: ! Unrolling not mandatory...perhaps 2-way is best?
                     41: ! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
                     42: ! All in all, it runs at 5 cycles/limb
                     43:
                     44:        save    %sp,-160,%sp
                     45:
                     46:        addcc   %g0,%g0,%g0
                     47:
                     48:        add     %i3,-4,%i3
                     49:        brlz,pn %i3,L(there)
                     50:        nop
                     51:
                     52:        ldx     [%i1+0],%l0
                     53:        ldx     [%i2+0],%l4
                     54:        ldx     [%i1+8],%l1
                     55:        ldx     [%i2+8],%l5
                     56:        ldx     [%i1+16],%l2
                     57:        ldx     [%i2+16],%l6
                     58:        ldx     [%i1+24],%l3
                     59:        ldx     [%i2+24],%l7
                     60:        add     %i1,32,%i1
                     61:        add     %i2,32,%i2
                     62:
                     63:        add     %i3,-4,%i3
                     64:        brlz,pn %i3,L(skip)
                     65:        nop
                     66:        b       L(loop1)        ! jump instead of executing many NOPs
                     67:        nop
                     68:        ALIGN(32)
                     69: !---------  Start main loop ---------
                     70: L(loop1):
                     71:        addccc  %l0,%l4,%g1
                     72: !-
                     73:        srlx    %l0,32,%o0
                     74:        ldx     [%i1+0],%l0
                     75: !-
                     76:        srlx    %l4,32,%o4
                     77:        ldx     [%i2+0],%l4
                     78: !-
                     79:        addccc  %o0,%o4,%g0
                     80: !-
                     81:        addccc  %l1,%l5,%g2
                     82: !-
                     83:        srlx    %l1,32,%o1
                     84:        ldx     [%i1+8],%l1
                     85: !-
                     86:        srlx    %l5,32,%o5
                     87:        ldx     [%i2+8],%l5
                     88: !-
                     89:        addccc  %o1,%o5,%g0
                     90: !-
                     91:        addccc  %l2,%l6,%g3
                     92: !-
                     93:        srlx    %l2,32,%o2
                     94:        ldx     [%i1+16],%l2
                     95: !-
                     96:        srlx    %l6,32,%g5      ! asymmetry
                     97:        ldx     [%i2+16],%l6
                     98: !-
                     99:        addccc  %o2,%g5,%g0
                    100: !-
                    101:        addccc  %l3,%l7,%g4
                    102: !-
                    103:        srlx    %l3,32,%o3
                    104:        ldx     [%i1+24],%l3
                    105:        add     %i1,32,%i1
                    106: !-
                    107:        srlx    %l7,32,%o7
                    108:        ldx     [%i2+24],%l7
                    109:        add     %i2,32,%i2
                    110: !-
                    111:        addccc  %o3,%o7,%g0
                    112: !-
                    113:        stx     %g1,[%i0+0]
                    114: !-
                    115:        stx     %g2,[%i0+8]
                    116: !-
                    117:        stx     %g3,[%i0+16]
                    118:        add     %i3,-4,%i3
                    119: !-
                    120:        stx     %g4,[%i0+24]
                    121:        add     %i0,32,%i0
                    122:
                    123:        brgez,pt        %i3,L(loop1)
                    124:        nop
                    125: !---------  End main loop ---------
                    126: L(skip):
                    127:        addccc  %l0,%l4,%g1
                    128:        srlx    %l0,32,%o0
                    129:        srlx    %l4,32,%o4
                    130:        addccc  %o0,%o4,%g0
                    131:        addccc  %l1,%l5,%g2
                    132:        srlx    %l1,32,%o1
                    133:        srlx    %l5,32,%o5
                    134:        addccc  %o1,%o5,%g0
                    135:        addccc  %l2,%l6,%g3
                    136:        srlx    %l2,32,%o2
                    137:        srlx    %l6,32,%g5      ! asymmetry
                    138:        addccc  %o2,%g5,%g0
                    139:        addccc  %l3,%l7,%g4
                    140:        srlx    %l3,32,%o3
                    141:        srlx    %l7,32,%o7
                    142:        addccc  %o3,%o7,%g0
                    143:        stx     %g1,[%i0+0]
                    144:        stx     %g2,[%i0+8]
                    145:        stx     %g3,[%i0+16]
                    146:        stx     %g4,[%i0+24]
                    147:        add     %i0,32,%i0
                    148:
                    149: L(there):
                    150:        add     %i3,4,%i3
                    151:        brz,pt  %i3,L(end)
                    152:        nop
                    153:
                    154: L(loop2):
                    155:        ldx     [%i1+0],%l0
                    156:        add     %i1,8,%i1
                    157:        ldx     [%i2+0],%l4
                    158:        add     %i2,8,%i2
                    159:        srlx    %l0,32,%g2
                    160:        srlx    %l4,32,%g3
                    161:        addccc  %l0,%l4,%g1
                    162:        addccc  %g2,%g3,%g0
                    163:        stx     %g1,[%i0+0]
                    164:        add     %i0,8,%i0
                    165:        add     %i3,-1,%i3
                    166:        brgz,pt %i3,L(loop2)
                    167:        nop
                    168:
                    169: L(end):        addc    %g0,%g0,%i0
                    170:        ret
                    171:        restore
                    172: EPILOGUE(mpn_add_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>