Annotation of OpenXM_contrib/gmp/mpn/sparc32/v9/add_n.asm, Revision 1.1.1.1
1.1 ohara 1: dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
2: dnl sum in a third limb vector.
3:
4: dnl Copyright 2001 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: include(`../config.m4')
25:
26: C INPUT PARAMETERS
27: define(rp,%o0)
28: define(s1p,%o1)
29: define(s2p,%o2)
30: define(n,%o3)
31: define(cy,%g1)
32:
33: C This code uses 64-bit operations on `o' and `g' registers. It doesn't
34: C require that `o' registers' upper 32 bits are preserved by the operating
35: C system, but if they are not, they must be zeroed. That is indeed what
36: C happens at least on Slowaris 2.5 and 2.6.
37:
38: C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
39: C about 10 cycles/limb from the Ecache.
40:
41: ASM_START()
42: PROLOGUE(mpn_add_n)
43: lduw [s1p+0],%o4
44: lduw [s2p+0],%o5
45: addcc n,-2,n
46: bl,pn %icc,L(end1)
47: lduw [s1p+4],%g2
48: lduw [s2p+4],%g3
49: be,pn %icc,L(end2)
50: mov 0,cy
51:
52: .align 16
53: L(loop):
54: add %o4,%o5,%g4
55: add rp,8,rp
56: lduw [s1p+8],%o4
57: fitod %f0,%f2
58: C ---
59: add cy,%g4,%g4
60: addcc n,-1,n
61: lduw [s2p+8],%o5
62: fitod %f0,%f2
63: C ---
64: srlx %g4,32,cy
65: add s2p,8,s2p
66: stw %g4,[rp-8]
67: be,pn %icc,L(exito)+4
68: C ---
69: add %g2,%g3,%g4
70: addcc n,-1,n
71: lduw [s1p+12],%g2
72: fitod %f0,%f2
73: C ---
74: add cy,%g4,%g4
75: add s1p,8,s1p
76: lduw [s2p+4],%g3
77: fitod %f0,%f2
78: C ---
79: srlx %g4,32,cy
80: bne,pt %icc,L(loop)
81: stw %g4,[rp-4]
82: C ---
83: L(exite):
84: add %o4,%o5,%g4
85: add cy,%g4,%g4
86: srlx %g4,32,cy
87: stw %g4,[rp+0]
88: add %g2,%g3,%g4
89: add cy,%g4,%g4
90: stw %g4,[rp+4]
91: retl
92: srlx %g4,32,%o0
93:
94: L(exito):
95: add %g2,%g3,%g4
96: add cy,%g4,%g4
97: srlx %g4,32,cy
98: stw %g4,[rp-4]
99: add %o4,%o5,%g4
100: add cy,%g4,%g4
101: stw %g4,[rp+0]
102: retl
103: srlx %g4,32,%o0
104:
105: L(end1):
106: add %o4,%o5,%g4
107: stw %g4,[rp+0]
108: retl
109: srlx %g4,32,%o0
110:
111: L(end2):
112: add %o4,%o5,%g4
113: srlx %g4,32,cy
114: stw %g4,[rp+0]
115: add %g2,%g3,%g4
116: add cy,%g4,%g4
117: stw %g4,[rp+4]
118: retl
119: srlx %g4,32,%o0
120: EPILOGUE(mpn_add_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>