[BACK]Return to invert_limb.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / ia64

Annotation of OpenXM_contrib/gmp/mpn/ia64/invert_limb.asm, Revision 1.1.1.1

1.1       ohara       1: dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.
                      2:
                      3: dnl  Copyright (C) 2000 Free Software Foundation, Inc.
                      4:
                      5: dnl  This file is part of the GNU MP Library.
                      6:
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      8: dnl  it under the terms of the GNU Lesser General Public License as published
                      9: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
                     10: dnl  your option) any later version.
                     11:
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     13: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: dnl  License for more details.
                     16:
                     17: dnl  You should have received a copy of the GNU Lesser General Public License
                     18: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: dnl  MA 02111-1307, USA.
                     21:
                     22: include(`../config.m4')
                     23:
                     24: C INPUT PARAMETERS
                     25: C d = r32
                     26:
                     27: C It should be possible to avoid the xmpy.hu and the following tests by
                     28: C explicitly chopping in the last fma.  That would save about 10 cycles.
                     29:
                     30: ASM_START()
                     31:        .section        .rodata
                     32:        .align 16
                     33: .LC0:  data4 0x00000000, 0x80000000, 0x0000403f, 0x00000000    C 2^64
                     34:        data4 0x00000000, 0x80000000, 0x0000407f, 0x00000000    C 2^128
                     35:
                     36: PROLOGUE(mpn_invert_limb)
                     37:        addl            r14 = @ltoff(.LC0),gp
                     38:        add             r8 = r32,r32;;                  C check for d = 2^63
                     39:        ld8             r14 = [r14]
                     40:        cmp.eq          p6,p7 = 0,r8;;                  C check for d = 2^63
                     41:        ldfe            f10 = [r14],16                  C 2^64
                     42:        setf.sig        f7 = r32
                     43:        mov             r8 = -1
                     44:    (p6)        br.ret.spnt     b0;;
                     45:        ldfe            f8 = [r14]                      C 2^128
                     46:        fmpy.s1         f11 = f7,f10;;                  C scale by 2^64
                     47:        fsub.s1         f6 = f8,f11;;
                     48:        frcpa.s1        f8,p6 = f6,f7;;
                     49:    (p6) fnma.s1                f9 = f7,f8,f1
                     50:    (p6) fmpy.s1                f10 = f6,f8;;
                     51:    (p6) fmpy.s1                f11 = f9,f9
                     52:    (p6) fma.s1         f10 = f9,f10,f10;;
                     53:    (p6) fma.s1         f8 = f9,f8,f8
                     54:    (p6) fma.s1         f9 = f11,f10,f10;;
                     55:    (p6) fma.s1         f8 = f11,f8,f8
                     56:    (p6) fnma.s1                f10 = f7,f9,f6;;
                     57:    (p6) fma.s1         f8 = f10,f8,f9;;
                     58:        fcvt.fxu.trunc.s1 f8 = f8;;
                     59:        xmpy.hu         f10 = f8,f7;;                   C di * d
                     60:        getf.sig        r8 = f8
                     61:        getf.sig        r14 = f10;;
                     62:        add             r32 = r32,r14;;
                     63:        cmp.ltu         p6,p7 = r32,r14;;               C got overflow?
                     64:    (p6) add            r8 = -1,r8                      C adjust di down
                     65:        br.ret.sptk     b0
                     66: EPILOGUE(mpn_invert_limb)
                     67: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>