OpenXM_contrib/gmp/mpn/ia64/addmul_1.asm - annotate

Return to addmul_1.asm CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / ia64
Annotation of OpenXM_contrib/gmp/mpn/ia64/addmul_1.asm, Revision 1.1

1.1     ! ohara       1: dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
        !             2: dnl  result to a second limb vector.
        !             3:
        !             4: dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
        !             5:
        !             6: dnl  This file is part of the GNU MP Library.
        !             7:
        !             8: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: dnl  it under the terms of the GNU Lesser General Public License as published
        !            10: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
        !            11: dnl  your option) any later version.
        !            12:
        !            13: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: dnl  License for more details.
        !            17:
        !            18: dnl  You should have received a copy of the GNU Lesser General Public License
        !            19: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: dnl  MA 02111-1307, USA.
        !            22:
        !            23: include(`../config.m4')
        !            24:
        !            25: C INPUT PARAMETERS
        !            26: C rp = r32
        !            27: C up = r33
        !            28: C n = r34
        !            29: C v = r35
        !            30:
        !            31: C         cycles/limb
        !            32: C Itanium:    4
        !            33: C Itanium 2:  ?
        !            34:
        !            35: C The full speed is reached C only for really huge operands.  See README for
        !            36: C possible improvements.
        !            37:
        !            38:
        !            39: ASM_START()
        !            40: PROLOGUE(mpn_addmul_1)
        !            41:        .prologue
        !            42:        .save   ar.pfs, r21
        !            43:                alloc           r21 = ar.pfs, 4, 12, 0, 16
        !            44:        .save   ar.lc, r2
        !            45:                mov             r2 = ar.lc
        !            46:                mov             r20 = ar.ec
        !            47:        .save   pr, r22
        !            48:                mov             r22 = pr
        !            49:        .body
        !            50: ifdef(`HAVE_ABI_32',
        !            51: `              addp4   r32 = 0, r32
        !            52:                addp4   r33 = 0, r33
        !            53:                sxt4    r34 = r34
        !            54:                ;;
        !            55: ')
        !            56:   { .mfi;      setf.sig        f6 = r35
        !            57:                nop.f           0
        !            58:                adds            r19 = -1, r34           C n - 1
        !            59: } { .mfi;      cmp.ne          p6, p7 = r0, r0
        !            60:                nop.f           0
        !            61:                mov             r18 = r32       ;;
        !            62: } { .mfi;      mov             r16 = r32
        !            63:                nop.f           0
        !            64:                mov             ar.lc = r19
        !            65: } { .mfi;      mov             r17 = r33
        !            66:                nop.f           0
        !            67:                mov             ar.ec = 7
        !            68: } { .mii;      cmp.ne          p6, p7 = r0, r0
        !            69:                mov             pr.rot = 1<<16
        !            70:                mov             r32 = 0                 C clear "carry in"
        !            71: } { .mib;      mov             r33 = 0                 C clear for cmp
        !            72:                mov             r34 = 0                 C clear for cmp
        !            73:                nop.b           0
        !            74: } { .mib;      mov             r35 = 0                 C clear for cmp
        !            75:                mov             r36 = 0                 C clear for cmp
        !            76:                nop.b           0               ;;
        !            77: }
        !            78:                .align  32
        !            79: .Loop:
        !            80:        .pred.rel "mutex",p6,p7
        !            81:   { .mfi; (p16)        ldf8            f32 = [r17], 8          C  *0,3,6,9,12,15,18
        !            82:          (p19) xma.l           f40 = f35, f6, f39      C  0,3,6,*9,12,15,18
        !            83:           (p6) add             r14 = r33, r38, 1       C  0,3,6,9,12,15,*18
        !            84: } { .mfi; (p16)        ldf8            f36 = [r16], 8          C  *0,3,6,9,12,15,18
        !            85:          (p19) xma.hu          f44 = f35, f6, f39      C  0,3,6,*9,12,15,18
        !            86:           (p7) add             r14 = r33, r38  ;;      C  0,3,6,9,12,15,*18
        !            87: } { .mii; (p21)        getf.sig        r32 = f42               C  1,4,7,10,13,*16,19
        !            88:           (p6) cmp.leu         p8, p9 = r14, r33       C  1,4,7,10,13,16,*19
        !            89:           (p7) cmp.ltu         p8, p9 = r14, r33;;     C  1,4,7,10,13,16,*19
        !            90: }
        !            91:        .pred.rel "mutex",p8,p9
        !            92:   { .mib; (p21)        getf.sig        r36 = f46               C  2,5,8,11,14,*17,20
        !            93:           (p8) cmp.eq          p6, p7 = r0, r0
        !            94:                nop.b           0
        !            95: } { .mib; (p22)        st8             [r18] = r14, 8          C  2,5,8,11,14,17,*20
        !            96:           (p9) cmp.ne          p6, p7 = r0, r0
        !            97:                br.ctop.sptk    .Loop           ;;
        !            98: }
        !            99:        .pred.rel "mutex",p6,p7
        !           100:           (p6) add             r8 = 1, r38
        !           101:           (p7) mov             r8 = r38
        !           102:                mov             pr = r22,0x1fffe
        !           103:                mov             ar.lc = r2
        !           104:                mov             ar.ec = r20
        !           105:                mov             ar.pfs = r21;;
        !           106:                br.ret.sptk.many b0
        !           107: EPILOGUE(mpn_addmul_1)
        !           108: ASM_END()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>