[BACK]Return to sub_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / pa64

Annotation of OpenXM_contrib/gmp/mpn/pa64/sub_n.asm, Revision 1.1.1.1

1.1       ohara       1: dnl  HP-PA 2.0 mpn_sub_n -- Subtract two limb vectors of the same length > 0
                      2: dnl  and store difference in a third limb vector.
                      3:
                      4: dnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
                      5:
                      6: dnl  This file is part of the GNU MP Library.
                      7:
                      8: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      9: dnl  it under the terms of the GNU Lesser General Public License as published
                     10: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
                     11: dnl  your option) any later version.
                     12:
                     13: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     14: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: dnl  License for more details.
                     17:
                     18: dnl  You should have received a copy of the GNU Lesser General Public License
                     19: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: dnl  MA 02111-1307, USA.
                     22:
                     23:
                     24: dnl  This runs at 2 cycles/limb on PA8000 and 1.75 cycles/limb on PA8500.  It
                     25: dnl  should be possible to reach the cache bandwith 1.5 cycles/limb at least
                     26: dnl  with PA8500.  The problem now is stalling of the first SUB,DB after LDO,
                     27: dnl  where the processor gets confused about where carry comes from.
                     28:
                     29: include(`../config.m4')
                     30:
                     31: dnl INPUT PARAMETERS
                     32: define(`rp',`%r26')
                     33: define(`up',`%r25')
                     34: define(`vp',`%r24')
                     35: define(`n',`%r23')
                     36:
                     37: ifdef(`HAVE_ABI_2_0w',
                     38: `       .level  2.0W
                     39: ',`     .level  2.0N
                     40: ')
                     41: PROLOGUE(mpn_sub_n)
                     42:        sub             %r0, n, %r22
                     43:        depw,z          %r22, 30, 3, %r28       C r28 = 2 * (-n & 7)
                     44:        depw,z          %r22, 28, 3, %r22       C r22 = 8 * (-n & 7)
                     45:        sub             up, %r22, up            C offset up
                     46:        sub             vp, %r22, vp            C offset vp
                     47:        blr             %r28, %r0               C branch into loop
                     48:        sub             rp, %r22, rp            C offset rp and set carry
                     49:
                     50: L(loop)        ldd             0(up), %r20
                     51:        ldd             0(vp), %r31
                     52:        sub,db          %r20, %r31, %r20
                     53:        std             %r20, 0(rp)
                     54: L(7)   ldd             8(up), %r21
                     55:        ldd             8(vp), %r19
                     56:        sub,db          %r21, %r19, %r21
                     57:        std             %r21, 8(rp)
                     58: L(6)   ldd             16(up), %r20
                     59:        ldd             16(vp), %r31
                     60:        sub,db          %r20, %r31, %r20
                     61:        std             %r20, 16(rp)
                     62: L(5)   ldd             24(up), %r21
                     63:        ldd             24(vp), %r19
                     64:        sub,db          %r21, %r19, %r21
                     65:        std             %r21, 24(rp)
                     66: L(4)   ldd             32(up), %r20
                     67:        ldd             32(vp), %r31
                     68:        sub,db          %r20, %r31, %r20
                     69:        std             %r20, 32(rp)
                     70: L(3)   ldd             40(up), %r21
                     71:        ldd             40(vp), %r19
                     72:        sub,db          %r21, %r19, %r21
                     73:        std             %r21, 40(rp)
                     74: L(2)   ldd             48(up), %r20
                     75:        ldd             48(vp), %r31
                     76:        sub,db          %r20, %r31, %r20
                     77:        std             %r20, 48(rp)
                     78: L(1)   ldd             56(up), %r21
                     79:        ldd             56(vp),%r19
                     80:        sub, db         %r21, %r19, %r21
                     81:        ldo             64(up), up
                     82:        std             %r21, 56(rp)
                     83:        ldo             64(vp), vp
                     84:        addib,>         -8, n, L(loop)
                     85:        ldo             64(rp), rp
                     86:
                     87:        add,dc          %r0, %r0, %r29
                     88:        subi            1, %r29, %r29
                     89:        bve             (%r2)
                     90: ifdef(`HAVE_ABI_2_0w',
                     91: `      copy            %r29, %r28
                     92: ',`    ldi             0, %r28
                     93: ')
                     94: EPILOGUE(mpn_sub_n)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>