Annotation of OpenXM_contrib/gmp/mpn/powerpc32/mul_1.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store
2: dnl the result in a second limb vector.
3:
4: dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published by
10: dnl the Free Software Foundation; either version 2.1 of the License, or (at your
11: dnl option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: dnl INPUT PARAMETERS
25: dnl res_ptr r3
26: dnl s1_ptr r4
27: dnl size r5
28: dnl s2_limb r6
29:
30: dnl This is optimized for the PPC604 but it runs decently even on PPC601. It
31: dnl has not been tested on a PPC603 since I don't have access to any such
32: dnl machines.
33:
34: include(`../config.m4')
35:
36: ASM_START()
37: PROLOGUE(mpn_mul_1)
38: mtctr r5
39: addi r3,r3,-4 C adjust res_ptr, it's offset before it's used
40: li r12,0 C clear upper product reg
41: addic r0,r0,0 C clear cy
42: C Start software pipeline
43: lwz r8,0(r4)
44: bdz .Lend3
45: stmw r30,-8(r1) C save registers we are supposed to preserve
46: lwzu r9,4(r4)
47: mullw r11,r8,r6
48: mulhwu r0,r8,r6
49: bdz .Lend1
50: C Software pipelined main loop
51: .Loop: lwz r8,4(r4)
52: mullw r10,r9,r6
53: adde r30,r11,r12
54: mulhwu r12,r9,r6
55: stw r30,4(r3)
56: bdz .Lend2
57: lwzu r9,8(r4)
58: mullw r11,r8,r6
59: adde r31,r10,r0
60: mulhwu r0,r8,r6
61: stwu r31,8(r3)
62: bdnz .Loop
63: C Finish software pipeline
64: .Lend1: mullw r10,r9,r6
65: adde r30,r11,r12
66: mulhwu r12,r9,r6
67: stw r30,4(r3)
68: adde r31,r10,r0
69: stwu r31,8(r3)
70: addze r3,r12
71: lmw r30,-8(r1) C restore registers from stack
72: blr
73: .Lend2: mullw r11,r8,r6
74: adde r31,r10,r0
75: mulhwu r0,r8,r6
76: stwu r31,8(r3)
77: adde r30,r11,r12
78: stw r30,4(r3)
79: addze r3,r0
80: lmw r30,-8(r1) C restore registers from stack
81: blr
82: .Lend3: mullw r11,r8,r6
83: stw r11,4(r3)
84: mulhwu r3,r8,r6
85: blr
86: EPILOGUE(mpn_mul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>