Annotation of OpenXM_contrib/gmp/mpn/powerpc32/mul_1.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store
2: dnl the result in a second limb vector.
3:
1.1.1.2 ! ohara 4: dnl Copyright 1995, 1997, 2000, 2002 Free Software Foundation, Inc.
1.1 maekawa 5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published by
10: dnl the Free Software Foundation; either version 2.1 of the License, or (at your
11: dnl option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: dnl INPUT PARAMETERS
25: dnl res_ptr r3
26: dnl s1_ptr r4
27: dnl size r5
28: dnl s2_limb r6
29:
30: dnl This is optimized for the PPC604 but it runs decently even on PPC601. It
31: dnl has not been tested on a PPC603 since I don't have access to any such
32: dnl machines.
33:
34: include(`../config.m4')
35:
36: ASM_START()
37: PROLOGUE(mpn_mul_1)
38: mtctr r5
39: addi r3,r3,-4 C adjust res_ptr, it's offset before it's used
40: li r12,0 C clear upper product reg
41: addic r0,r0,0 C clear cy
42: C Start software pipeline
43: lwz r8,0(r4)
1.1.1.2 ! ohara 44: bdz L(end3)
1.1 maekawa 45: lwzu r9,4(r4)
46: mullw r11,r8,r6
47: mulhwu r0,r8,r6
1.1.1.2 ! ohara 48: bdz L(end1)
1.1 maekawa 49: C Software pipelined main loop
1.1.1.2 ! ohara 50: L(oop): lwz r8,4(r4)
1.1 maekawa 51: mullw r10,r9,r6
1.1.1.2 ! ohara 52: adde r5,r11,r12
1.1 maekawa 53: mulhwu r12,r9,r6
1.1.1.2 ! ohara 54: stw r5,4(r3)
! 55: bdz L(end2)
1.1 maekawa 56: lwzu r9,8(r4)
57: mullw r11,r8,r6
1.1.1.2 ! ohara 58: adde r7,r10,r0
1.1 maekawa 59: mulhwu r0,r8,r6
1.1.1.2 ! ohara 60: stwu r7,8(r3)
! 61: bdnz L(oop)
1.1 maekawa 62: C Finish software pipeline
1.1.1.2 ! ohara 63: L(end1):
! 64: mullw r10,r9,r6
! 65: adde r5,r11,r12
1.1 maekawa 66: mulhwu r12,r9,r6
1.1.1.2 ! ohara 67: stw r5,4(r3)
! 68: adde r7,r10,r0
! 69: stwu r7,8(r3)
1.1 maekawa 70: addze r3,r12
71: blr
1.1.1.2 ! ohara 72: L(end2):
! 73: mullw r11,r8,r6
! 74: adde r7,r10,r0
1.1 maekawa 75: mulhwu r0,r8,r6
1.1.1.2 ! ohara 76: stwu r7,8(r3)
! 77: adde r5,r11,r12
! 78: stw r5,4(r3)
1.1 maekawa 79: addze r3,r0
80: blr
1.1.1.2 ! ohara 81: L(end3):
! 82: mullw r11,r8,r6
1.1 maekawa 83: stw r11,4(r3)
84: mulhwu r3,r8,r6
85: blr
86: EPILOGUE(mpn_mul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>