Annotation of OpenXM_contrib/gmp/mpn/sparc64/add_n.asm, Revision 1.1.1.1
1.1 maekawa 1: ! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
2: ! sum in a third limb vector.
3:
4: ! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
5:
6: ! This file is part of the GNU MP Library.
7:
8: ! The GNU MP Library is free software; you can redistribute it and/or modify
9: ! it under the terms of the GNU Lesser General Public License as published by
10: ! the Free Software Foundation; either version 2.1 of the License, or (at your
11: ! option) any later version.
12:
13: ! The GNU MP Library is distributed in the hope that it will be useful, but
14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: ! License for more details.
17:
18: ! You should have received a copy of the GNU Lesser General Public License
19: ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: ! MA 02111-1307, USA.
22:
23:
24: ! INPUT PARAMETERS
25: ! res_ptr %o0
26: ! s1_ptr %o1
27: ! s2_ptr %o2
28: ! size %o3
29:
30: include(`../config.m4')
31:
32: ASM_START()
33: .register %g2,#scratch
34: .register %g3,#scratch
35: PROLOGUE(mpn_add_n)
36:
37: ! 12 mem ops >= 12 cycles
38: ! 8 shift insn >= 8 cycles
39: ! 8 addccc, executing alone, +8 cycles
40: ! Unrolling not mandatory...perhaps 2-way is best?
41: ! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
42: ! All in all, it runs at 5 cycles/limb
43:
44: save %sp,-160,%sp
45:
46: addcc %g0,%g0,%g0
47:
48: add %i3,-4,%i3
49: brlz,pn %i3,L(there)
50: nop
51:
52: ldx [%i1+0],%l0
53: ldx [%i2+0],%l4
54: ldx [%i1+8],%l1
55: ldx [%i2+8],%l5
56: ldx [%i1+16],%l2
57: ldx [%i2+16],%l6
58: ldx [%i1+24],%l3
59: ldx [%i2+24],%l7
60: add %i1,32,%i1
61: add %i2,32,%i2
62:
63: add %i3,-4,%i3
64: brlz,pn %i3,L(skip)
65: nop
66: b L(loop1) ! jump instead of executing many NOPs
67: nop
68: ALIGN(32)
69: !--------- Start main loop ---------
70: L(loop1):
71: addccc %l0,%l4,%g1
72: !-
73: srlx %l0,32,%o0
74: ldx [%i1+0],%l0
75: !-
76: srlx %l4,32,%o4
77: ldx [%i2+0],%l4
78: !-
79: addccc %o0,%o4,%g0
80: !-
81: addccc %l1,%l5,%g2
82: !-
83: srlx %l1,32,%o1
84: ldx [%i1+8],%l1
85: !-
86: srlx %l5,32,%o5
87: ldx [%i2+8],%l5
88: !-
89: addccc %o1,%o5,%g0
90: !-
91: addccc %l2,%l6,%g3
92: !-
93: srlx %l2,32,%o2
94: ldx [%i1+16],%l2
95: !-
96: srlx %l6,32,%g5 ! asymmetry
97: ldx [%i2+16],%l6
98: !-
99: addccc %o2,%g5,%g0
100: !-
101: addccc %l3,%l7,%g4
102: !-
103: srlx %l3,32,%o3
104: ldx [%i1+24],%l3
105: add %i1,32,%i1
106: !-
107: srlx %l7,32,%o7
108: ldx [%i2+24],%l7
109: add %i2,32,%i2
110: !-
111: addccc %o3,%o7,%g0
112: !-
113: stx %g1,[%i0+0]
114: !-
115: stx %g2,[%i0+8]
116: !-
117: stx %g3,[%i0+16]
118: add %i3,-4,%i3
119: !-
120: stx %g4,[%i0+24]
121: add %i0,32,%i0
122:
123: brgez,pt %i3,L(loop1)
124: nop
125: !--------- End main loop ---------
126: L(skip):
127: addccc %l0,%l4,%g1
128: srlx %l0,32,%o0
129: srlx %l4,32,%o4
130: addccc %o0,%o4,%g0
131: addccc %l1,%l5,%g2
132: srlx %l1,32,%o1
133: srlx %l5,32,%o5
134: addccc %o1,%o5,%g0
135: addccc %l2,%l6,%g3
136: srlx %l2,32,%o2
137: srlx %l6,32,%g5 ! asymmetry
138: addccc %o2,%g5,%g0
139: addccc %l3,%l7,%g4
140: srlx %l3,32,%o3
141: srlx %l7,32,%o7
142: addccc %o3,%o7,%g0
143: stx %g1,[%i0+0]
144: stx %g2,[%i0+8]
145: stx %g3,[%i0+16]
146: stx %g4,[%i0+24]
147: add %i0,32,%i0
148:
149: L(there):
150: add %i3,4,%i3
151: brz,pt %i3,L(end)
152: nop
153:
154: L(loop2):
155: ldx [%i1+0],%l0
156: add %i1,8,%i1
157: ldx [%i2+0],%l4
158: add %i2,8,%i2
159: srlx %l0,32,%g2
160: srlx %l4,32,%g3
161: addccc %l0,%l4,%g1
162: addccc %g2,%g3,%g0
163: stx %g1,[%i0+0]
164: add %i0,8,%i0
165: add %i3,-1,%i3
166: brgz,pt %i3,L(loop2)
167: nop
168:
169: L(end): addc %g0,%g0,%i0
170: ret
171: restore
172: EPILOGUE(mpn_add_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>