Annotation of OpenXM_contrib/gmp/mpn/power/addmul_1.asm, Revision 1.1.1.1
1.1 ohara 1: dnl IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
2: dnl result to a second limb vector.
3:
4: dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: dnl INPUT PARAMETERS
25: dnl res_ptr r3
26: dnl s1_ptr r4
27: dnl size r5
28: dnl s2_limb r6
29:
30: dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
31: dnl instruction. To obtain that operation, we have to use the 32x32->64
32: dnl signed multiplication instruction, and add the appropriate compensation to
33: dnl the high limb of the result. We add the multiplicand if the multiplier
34: dnl has its most significant bit set, and we add the multiplier if the
35: dnl multiplicand has its most significant bit set. We need to preserve the
36: dnl carry flag between each iteration, so we have to compute the compensation
37: dnl carefully (the natural, srai+and doesn't work). Since all POWER can
38: dnl branch in zero cycles, we use conditional branches to for the additions.
39:
40: include(`../config.m4')
41:
42: ASM_START()
43: PROLOGUE(mpn_addmul_1)
44: cal 3,-4(3)
45: l 0,0(4)
46: cmpi 0,6,0
47: mtctr 5
48: mul 9,0,6
49: srai 7,0,31
50: and 7,7,6
51: mfmq 8
52: cax 9,9,7
53: l 7,4(3)
54: a 8,8,7 C add res_limb
55: blt Lneg
56: Lpos: bdz Lend
57:
58: Lploop: lu 0,4(4)
59: stu 8,4(3)
60: cmpi 0,0,0
61: mul 10,0,6
62: mfmq 0
63: ae 8,0,9 C low limb + old_cy_limb + old cy
64: l 7,4(3)
65: aze 10,10 C propagate cy to new cy_limb
66: a 8,8,7 C add res_limb
67: bge Lp0
68: cax 10,10,6 C adjust high limb for negative limb from s1
69: Lp0: bdz Lend0
70: lu 0,4(4)
71: stu 8,4(3)
72: cmpi 0,0,0
73: mul 9,0,6
74: mfmq 0
75: ae 8,0,10
76: l 7,4(3)
77: aze 9,9
78: a 8,8,7
79: bge Lp1
80: cax 9,9,6 C adjust high limb for negative limb from s1
81: Lp1: bdn Lploop
82:
83: b Lend
84:
85: Lneg: cax 9,9,0
86: bdz Lend
87: Lnloop: lu 0,4(4)
88: stu 8,4(3)
89: cmpi 0,0,0
90: mul 10,0,6
91: mfmq 7
92: ae 8,7,9
93: l 7,4(3)
94: ae 10,10,0 C propagate cy to new cy_limb
95: a 8,8,7 C add res_limb
96: bge Ln0
97: cax 10,10,6 C adjust high limb for negative limb from s1
98: Ln0: bdz Lend0
99: lu 0,4(4)
100: stu 8,4(3)
101: cmpi 0,0,0
102: mul 9,0,6
103: mfmq 7
104: ae 8,7,10
105: l 7,4(3)
106: ae 9,9,0 C propagate cy to new cy_limb
107: a 8,8,7 C add res_limb
108: bge Ln1
109: cax 9,9,6 C adjust high limb for negative limb from s1
110: Ln1: bdn Lnloop
111: b Lend
112:
113: Lend0: cal 9,0(10)
114: Lend: st 8,4(3)
115: aze 3,9
116: br
117: EPILOGUE(mpn_addmul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>