Annotation of OpenXM_contrib/gmp/mpn/powerpc32/submul_1.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
2: dnl the result from a second limb vector.
3:
1.1.1.2 ! ohara 4: dnl Copyright 1995, 1997, 1998, 2000, 2002 Free Software Foundation, Inc.
1.1 maekawa 5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published by
10: dnl the Free Software Foundation; either version 2.1 of the License, or (at your
11: dnl option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: dnl INPUT PARAMETERS
25: dnl res_ptr r3
26: dnl s1_ptr r4
27: dnl size r5
28: dnl s2_limb r6
29:
30: dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603
31: dnl or PPC750 since I don't have access to any such machines.
32:
33: include(`../config.m4')
34:
35: ASM_START()
36: PROLOGUE(mpn_submul_1)
37: cmpi cr0,r5,9 C more than 9 limbs?
1.1.1.2 ! ohara 38: bgt cr0,L(big) C branch if more than 9 limbs
1.1 maekawa 39:
40: mtctr r5
41: lwz r0,0(r4)
42: mullw r7,r0,r6
43: mulhwu r10,r0,r6
44: lwz r9,0(r3)
45: subfc r8,r7,r9
46: addc r7,r7,r8 C invert cy (r7 is junk)
47: addi r3,r3,-4
1.1.1.2 ! ohara 48: bdz L(end)
! 49: L(loop):
1.1 maekawa 50: lwzu r0,4(r4)
51: stwu r8,4(r3)
52: mullw r8,r0,r6
53: adde r7,r8,r10
54: mulhwu r10,r0,r6
55: lwz r9,4(r3)
56: addze r10,r10
57: subfc r8,r7,r9
58: addc r7,r7,r8 C invert cy (r7 is junk)
1.1.1.2 ! ohara 59: bdnz L(loop)
! 60: L(end): stw r8,4(r3)
1.1 maekawa 61: addze r3,r10
62: blr
63:
1.1.1.2 ! ohara 64: L(big): stmw r30,-32(r1)
1.1 maekawa 65: addi r5,r5,-1
66: srwi r0,r5,2
67: mtctr r0
68:
69: lwz r7,0(r4)
70: mullw r8,r7,r6
71: mulhwu r0,r7,r6
72: lwz r7,0(r3)
73: subfc r7,r8,r7
74: addc r8,r8,r7
75: stw r7,0(r3)
76:
1.1.1.2 ! ohara 77: L(loopU):
1.1 maekawa 78: lwz r7,4(r4)
79: lwz r12,8(r4)
80: lwz r30,12(r4)
81: lwzu r31,16(r4)
82: mullw r8,r7,r6
83: mullw r9,r12,r6
84: mullw r10,r30,r6
85: mullw r11,r31,r6
86: adde r8,r8,r0 C add cy_limb
87: mulhwu r0,r7,r6
88: lwz r7,4(r3)
89: adde r9,r9,r0
90: mulhwu r0,r12,r6
91: lwz r12,8(r3)
92: adde r10,r10,r0
93: mulhwu r0,r30,r6
94: lwz r30,12(r3)
95: adde r11,r11,r0
96: mulhwu r0,r31,r6
97: lwz r31,16(r3)
98: addze r0,r0 C new cy_limb
99: subfc r7,r8,r7
100: stw r7,4(r3)
101: subfe r12,r9,r12
102: stw r12,8(r3)
103: subfe r30,r10,r30
104: stw r30,12(r3)
105: subfe r31,r11,r31
106: stwu r31,16(r3)
107: subfe r11,r11,r11 C invert ...
108: addic r11,r11,1 C ... carry
1.1.1.2 ! ohara 109: bdnz L(loopU)
1.1 maekawa 110:
111: andi. r31,r5,3
112: mtctr r31
1.1.1.2 ! ohara 113: beq cr0,L(endx)
1.1 maekawa 114:
1.1.1.2 ! ohara 115: L(loopE):
1.1 maekawa 116: lwzu r7,4(r4)
117: mullw r8,r7,r6
118: adde r8,r8,r0 C add cy_limb
119: mulhwu r0,r7,r6
120: lwz r7,4(r3)
121: addze r0,r0 C new cy_limb
122: subfc r7,r8,r7
123: addc r8,r8,r7
124: stwu r7,4(r3)
1.1.1.2 ! ohara 125: bdnz L(loopE)
! 126: L(endx):
1.1 maekawa 127: addze r3,r0
128: lmw r30,-32(r1)
129: blr
130: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>