[BACK]Return to add_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / pa64

Annotation of OpenXM_contrib/gmp/mpn/pa64/add_n.asm, Revision 1.1

1.1     ! ohara       1: dnl  HP-PA 2.0 mpn_add_n -- Add two limb vectors of the same length > 0 and
        !             2: dnl  store sum in a third limb vector.
        !             3:
        !             4: dnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
        !             5:
        !             6: dnl  This file is part of the GNU MP Library.
        !             7:
        !             8: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: dnl  it under the terms of the GNU Lesser General Public License as published
        !            10: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
        !            11: dnl  your option) any later version.
        !            12:
        !            13: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: dnl  License for more details.
        !            17:
        !            18: dnl  You should have received a copy of the GNU Lesser General Public License
        !            19: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: dnl  MA 02111-1307, USA.
        !            22:
        !            23:
        !            24: dnl  This runs at 2 cycles/limb on PA8000 and 1.75 cycles/limb on PA8500.  It
        !            25: dnl  should be possible to reach the cache bandwith 1.5 cycles/limb at least
        !            26: dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
        !            27: dnl  where the processor gets confused about where carry comes from.
        !            28:
        !            29: include(`../config.m4')
        !            30:
        !            31: dnl INPUT PARAMETERS
        !            32: define(`rp',`%r26')
        !            33: define(`up',`%r25')
        !            34: define(`vp',`%r24')
        !            35: define(`n',`%r23')
        !            36:
        !            37: ifdef(`HAVE_ABI_2_0w',
        !            38: `       .level  2.0W
        !            39: ',`     .level  2.0N
        !            40: ')
        !            41: PROLOGUE(mpn_add_n)
        !            42:        sub             %r0, n, %r22
        !            43:        depw,z          %r22, 30, 3, %r28       C r28 = 2 * (-n & 7)
        !            44:        depw,z          %r22, 28, 3, %r22       C r22 = 8 * (-n & 7)
        !            45:        sub             up, %r22, up            C offset up
        !            46:        sub             vp, %r22, vp            C offset vp
        !            47:        sub             rp, %r22, rp            C offset rp
        !            48:        blr             %r28, %r0               C branch into loop
        !            49:        add             %r0, %r0, %r0           C reset carry
        !            50:
        !            51: L(loop)        ldd             0(up), %r20
        !            52:        ldd             0(vp), %r31
        !            53:        add,dc          %r20, %r31, %r20
        !            54:        std             %r20, 0(rp)
        !            55: L(7)   ldd             8(up), %r21
        !            56:        ldd             8(vp), %r19
        !            57:        add,dc          %r21, %r19, %r21
        !            58:        std             %r21, 8(rp)
        !            59: L(6)   ldd             16(up), %r20
        !            60:        ldd             16(vp), %r31
        !            61:        add,dc          %r20, %r31, %r20
        !            62:        std             %r20, 16(rp)
        !            63: L(5)   ldd             24(up), %r21
        !            64:        ldd             24(vp), %r19
        !            65:        add,dc          %r21, %r19, %r21
        !            66:        std             %r21, 24(rp)
        !            67: L(4)   ldd             32(up), %r20
        !            68:        ldd             32(vp), %r31
        !            69:        add,dc          %r20, %r31, %r20
        !            70:        std             %r20, 32(rp)
        !            71: L(3)   ldd             40(up), %r21
        !            72:        ldd             40(vp), %r19
        !            73:        add,dc          %r21, %r19, %r21
        !            74:        std             %r21, 40(rp)
        !            75: L(2)   ldd             48(up), %r20
        !            76:        ldd             48(vp), %r31
        !            77:        add,dc          %r20, %r31, %r20
        !            78:        std             %r20, 48(rp)
        !            79: L(1)   ldd             56(up), %r21
        !            80:        ldd             56(vp), %r19
        !            81:        add,dc          %r21, %r19, %r21
        !            82:        ldo             64(up), up
        !            83:        std             %r21, 56(rp)
        !            84:        ldo             64(vp), vp
        !            85:        addib,>         -8, n, L(loop)
        !            86:        ldo             64(rp), rp
        !            87:
        !            88:        add,dc          %r0, %r0, %r29
        !            89:        bve             (%r2)
        !            90: ifdef(`HAVE_ABI_2_0w',
        !            91: `      copy            %r29, %r28
        !            92: ',`    ldi             0, %r28
        !            93: ')
        !            94: EPILOGUE(mpn_add_n)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>