Annotation of OpenXM_contrib/gmp/mpn/ia64/invert_limb.asm, Revision 1.1
1.1 ! ohara 1: dnl IA-64 mpn_invert_limb -- Invert a normalized limb.
! 2:
! 3: dnl Copyright (C) 2000 Free Software Foundation, Inc.
! 4:
! 5: dnl This file is part of the GNU MP Library.
! 6:
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 8: dnl it under the terms of the GNU Lesser General Public License as published
! 9: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 10: dnl your option) any later version.
! 11:
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 13: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 15: dnl License for more details.
! 16:
! 17: dnl You should have received a copy of the GNU Lesser General Public License
! 18: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: dnl MA 02111-1307, USA.
! 21:
! 22: include(`../config.m4')
! 23:
! 24: C INPUT PARAMETERS
! 25: C d = r32
! 26:
! 27: C It should be possible to avoid the xmpy.hu and the following tests by
! 28: C explicitly chopping in the last fma. That would save about 10 cycles.
! 29:
! 30: ASM_START()
! 31: .section .rodata
! 32: .align 16
! 33: .LC0: data4 0x00000000, 0x80000000, 0x0000403f, 0x00000000 C 2^64
! 34: data4 0x00000000, 0x80000000, 0x0000407f, 0x00000000 C 2^128
! 35:
! 36: PROLOGUE(mpn_invert_limb)
! 37: addl r14 = @ltoff(.LC0),gp
! 38: add r8 = r32,r32;; C check for d = 2^63
! 39: ld8 r14 = [r14]
! 40: cmp.eq p6,p7 = 0,r8;; C check for d = 2^63
! 41: ldfe f10 = [r14],16 C 2^64
! 42: setf.sig f7 = r32
! 43: mov r8 = -1
! 44: (p6) br.ret.spnt b0;;
! 45: ldfe f8 = [r14] C 2^128
! 46: fmpy.s1 f11 = f7,f10;; C scale by 2^64
! 47: fsub.s1 f6 = f8,f11;;
! 48: frcpa.s1 f8,p6 = f6,f7;;
! 49: (p6) fnma.s1 f9 = f7,f8,f1
! 50: (p6) fmpy.s1 f10 = f6,f8;;
! 51: (p6) fmpy.s1 f11 = f9,f9
! 52: (p6) fma.s1 f10 = f9,f10,f10;;
! 53: (p6) fma.s1 f8 = f9,f8,f8
! 54: (p6) fma.s1 f9 = f11,f10,f10;;
! 55: (p6) fma.s1 f8 = f11,f8,f8
! 56: (p6) fnma.s1 f10 = f7,f9,f6;;
! 57: (p6) fma.s1 f8 = f10,f8,f9;;
! 58: fcvt.fxu.trunc.s1 f8 = f8;;
! 59: xmpy.hu f10 = f8,f7;; C di * d
! 60: getf.sig r8 = f8
! 61: getf.sig r14 = f10;;
! 62: add r32 = r32,r14;;
! 63: cmp.ltu p6,p7 = r32,r14;; C got overflow?
! 64: (p6) add r8 = -1,r8 C adjust di down
! 65: br.ret.sptk b0
! 66: EPILOGUE(mpn_invert_limb)
! 67: ASM_END()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>