Annotation of OpenXM_contrib/gmp/mpn/ia64/invert_limb.asm, Revision 1.1.1.1
1.1 ohara 1: dnl IA-64 mpn_invert_limb -- Invert a normalized limb.
2:
3: dnl Copyright (C) 2000 Free Software Foundation, Inc.
4:
5: dnl This file is part of the GNU MP Library.
6:
7: dnl The GNU MP Library is free software; you can redistribute it and/or modify
8: dnl it under the terms of the GNU Lesser General Public License as published
9: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
10: dnl your option) any later version.
11:
12: dnl The GNU MP Library is distributed in the hope that it will be useful, but
13: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15: dnl License for more details.
16:
17: dnl You should have received a copy of the GNU Lesser General Public License
18: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: dnl MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24: C INPUT PARAMETERS
25: C d = r32
26:
27: C It should be possible to avoid the xmpy.hu and the following tests by
28: C explicitly chopping in the last fma. That would save about 10 cycles.
29:
30: ASM_START()
31: .section .rodata
32: .align 16
33: .LC0: data4 0x00000000, 0x80000000, 0x0000403f, 0x00000000 C 2^64
34: data4 0x00000000, 0x80000000, 0x0000407f, 0x00000000 C 2^128
35:
36: PROLOGUE(mpn_invert_limb)
37: addl r14 = @ltoff(.LC0),gp
38: add r8 = r32,r32;; C check for d = 2^63
39: ld8 r14 = [r14]
40: cmp.eq p6,p7 = 0,r8;; C check for d = 2^63
41: ldfe f10 = [r14],16 C 2^64
42: setf.sig f7 = r32
43: mov r8 = -1
44: (p6) br.ret.spnt b0;;
45: ldfe f8 = [r14] C 2^128
46: fmpy.s1 f11 = f7,f10;; C scale by 2^64
47: fsub.s1 f6 = f8,f11;;
48: frcpa.s1 f8,p6 = f6,f7;;
49: (p6) fnma.s1 f9 = f7,f8,f1
50: (p6) fmpy.s1 f10 = f6,f8;;
51: (p6) fmpy.s1 f11 = f9,f9
52: (p6) fma.s1 f10 = f9,f10,f10;;
53: (p6) fma.s1 f8 = f9,f8,f8
54: (p6) fma.s1 f9 = f11,f10,f10;;
55: (p6) fma.s1 f8 = f11,f8,f8
56: (p6) fnma.s1 f10 = f7,f9,f6;;
57: (p6) fma.s1 f8 = f10,f8,f9;;
58: fcvt.fxu.trunc.s1 f8 = f8;;
59: xmpy.hu f10 = f8,f7;; C di * d
60: getf.sig r8 = f8
61: getf.sig r14 = f10;;
62: add r32 = r32,r14;;
63: cmp.ltu p6,p7 = r32,r14;; C got overflow?
64: (p6) add r8 = -1,r8 C adjust di down
65: br.ret.sptk b0
66: EPILOGUE(mpn_invert_limb)
67: ASM_END()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>