Annotation of OpenXM_contrib/gmp/mpn/power/submul_1.asm, Revision 1.1.1.1
1.1 ohara 1: dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
2: dnl the result from a second limb vector.
3:
4: dnl Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: dnl INPUT PARAMETERS
25: dnl res_ptr r3
26: dnl s1_ptr r4
27: dnl size r5
28: dnl s2_limb r6
29:
30: dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
31: dnl instruction. To obtain that operation, we have to use the 32x32->64
32: dnl signed multiplication instruction, and add the appropriate compensation to
33: dnl the high limb of the result. We add the multiplicand if the multiplier
34: dnl has its most significant bit set, and we add the multiplier if the
35: dnl multiplicand has its most significant bit set. We need to preserve the
36: dnl carry flag between each iteration, so we have to compute the compensation
37: dnl carefully (the natural, srai+and doesn't work). Since all POWER can
38: dnl branch in zero cycles, we use conditional branches to for the additions.
39:
40: include(`../config.m4')
41:
42: ASM_START()
43: PROLOGUE(mpn_submul_1)
44: cal 3,-4(3)
45: l 0,0(4)
46: cmpi 0,6,0
47: mtctr 5
48: mul 9,0,6
49: srai 7,0,31
50: and 7,7,6
51: mfmq 11
52: cax 9,9,7
53: l 7,4(3)
54: sf 8,11,7 C add res_limb
55: a 11,8,11 C invert cy (r11 is junk)
56: blt Lneg
57: Lpos: bdz Lend
58:
59: Lploop: lu 0,4(4)
60: stu 8,4(3)
61: cmpi 0,0,0
62: mul 10,0,6
63: mfmq 0
64: ae 11,0,9 C low limb + old_cy_limb + old cy
65: l 7,4(3)
66: aze 10,10 C propagate cy to new cy_limb
67: sf 8,11,7 C add res_limb
68: a 11,8,11 C invert cy (r11 is junk)
69: bge Lp0
70: cax 10,10,6 C adjust high limb for negative limb from s1
71: Lp0: bdz Lend0
72: lu 0,4(4)
73: stu 8,4(3)
74: cmpi 0,0,0
75: mul 9,0,6
76: mfmq 0
77: ae 11,0,10
78: l 7,4(3)
79: aze 9,9
80: sf 8,11,7
81: a 11,8,11 C invert cy (r11 is junk)
82: bge Lp1
83: cax 9,9,6 C adjust high limb for negative limb from s1
84: Lp1: bdn Lploop
85:
86: b Lend
87:
88: Lneg: cax 9,9,0
89: bdz Lend
90: Lnloop: lu 0,4(4)
91: stu 8,4(3)
92: cmpi 0,0,0
93: mul 10,0,6
94: mfmq 7
95: ae 11,7,9
96: l 7,4(3)
97: ae 10,10,0 C propagate cy to new cy_limb
98: sf 8,11,7 C add res_limb
99: a 11,8,11 C invert cy (r11 is junk)
100: bge Ln0
101: cax 10,10,6 C adjust high limb for negative limb from s1
102: Ln0: bdz Lend0
103: lu 0,4(4)
104: stu 8,4(3)
105: cmpi 0,0,0
106: mul 9,0,6
107: mfmq 7
108: ae 11,7,10
109: l 7,4(3)
110: ae 9,9,0 C propagate cy to new cy_limb
111: sf 8,11,7 C add res_limb
112: a 11,8,11 C invert cy (r11 is junk)
113: bge Ln1
114: cax 9,9,6 C adjust high limb for negative limb from s1
115: Ln1: bdn Lnloop
116: b Lend
117:
118: Lend0: cal 9,0(10)
119: Lend: st 8,4(3)
120: aze 3,9
121: br
122: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>