Annotation of OpenXM_contrib/gmp/mpn/powerpc32/submul_1.asm, Revision 1.1
1.1 ! maekawa 1: dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
! 2: dnl the result from a second limb vector.
! 3:
! 4: dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc.
! 5:
! 6: dnl This file is part of the GNU MP Library.
! 7:
! 8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 9: dnl it under the terms of the GNU Lesser General Public License as published by
! 10: dnl the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: dnl option) any later version.
! 12:
! 13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: dnl License for more details.
! 17:
! 18: dnl You should have received a copy of the GNU Lesser General Public License
! 19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: dnl MA 02111-1307, USA.
! 22:
! 23:
! 24: dnl INPUT PARAMETERS
! 25: dnl res_ptr r3
! 26: dnl s1_ptr r4
! 27: dnl size r5
! 28: dnl s2_limb r6
! 29:
! 30: dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603
! 31: dnl or PPC750 since I don't have access to any such machines.
! 32:
! 33: include(`../config.m4')
! 34:
! 35: ASM_START()
! 36: PROLOGUE(mpn_submul_1)
! 37: cmpi cr0,r5,9 C more than 9 limbs?
! 38: bgt cr0,.Lbig C branch if more than 9 limbs
! 39:
! 40: mtctr r5
! 41: lwz r0,0(r4)
! 42: mullw r7,r0,r6
! 43: mulhwu r10,r0,r6
! 44: lwz r9,0(r3)
! 45: subfc r8,r7,r9
! 46: addc r7,r7,r8 C invert cy (r7 is junk)
! 47: addi r3,r3,-4
! 48: bdz .Lend
! 49: .Lloop:
! 50: lwzu r0,4(r4)
! 51: stwu r8,4(r3)
! 52: mullw r8,r0,r6
! 53: adde r7,r8,r10
! 54: mulhwu r10,r0,r6
! 55: lwz r9,4(r3)
! 56: addze r10,r10
! 57: subfc r8,r7,r9
! 58: addc r7,r7,r8 C invert cy (r7 is junk)
! 59: bdnz .Lloop
! 60: .Lend: stw r8,4(r3)
! 61: addze r3,r10
! 62: blr
! 63:
! 64: .Lbig: stmw r30,-32(r1)
! 65: addi r5,r5,-1
! 66: srwi r0,r5,2
! 67: mtctr r0
! 68:
! 69: lwz r7,0(r4)
! 70: mullw r8,r7,r6
! 71: mulhwu r0,r7,r6
! 72: lwz r7,0(r3)
! 73: subfc r7,r8,r7
! 74: addc r8,r8,r7
! 75: stw r7,0(r3)
! 76:
! 77: .LloopU:
! 78: lwz r7,4(r4)
! 79: lwz r12,8(r4)
! 80: lwz r30,12(r4)
! 81: lwzu r31,16(r4)
! 82: mullw r8,r7,r6
! 83: mullw r9,r12,r6
! 84: mullw r10,r30,r6
! 85: mullw r11,r31,r6
! 86: adde r8,r8,r0 C add cy_limb
! 87: mulhwu r0,r7,r6
! 88: lwz r7,4(r3)
! 89: adde r9,r9,r0
! 90: mulhwu r0,r12,r6
! 91: lwz r12,8(r3)
! 92: adde r10,r10,r0
! 93: mulhwu r0,r30,r6
! 94: lwz r30,12(r3)
! 95: adde r11,r11,r0
! 96: mulhwu r0,r31,r6
! 97: lwz r31,16(r3)
! 98: addze r0,r0 C new cy_limb
! 99: subfc r7,r8,r7
! 100: stw r7,4(r3)
! 101: subfe r12,r9,r12
! 102: stw r12,8(r3)
! 103: subfe r30,r10,r30
! 104: stw r30,12(r3)
! 105: subfe r31,r11,r31
! 106: stwu r31,16(r3)
! 107: subfe r11,r11,r11 C invert ...
! 108: addic r11,r11,1 C ... carry
! 109: bdnz .LloopU
! 110:
! 111: andi. r31,r5,3
! 112: mtctr r31
! 113: beq cr0,.Lendx
! 114:
! 115: .LloopE:
! 116: lwzu r7,4(r4)
! 117: mullw r8,r7,r6
! 118: adde r8,r8,r0 C add cy_limb
! 119: mulhwu r0,r7,r6
! 120: lwz r7,4(r3)
! 121: addze r0,r0 C new cy_limb
! 122: subfc r7,r8,r7
! 123: addc r8,r8,r7
! 124: stwu r7,4(r3)
! 125: bdnz .LloopE
! 126: .Lendx:
! 127: addze r3,r0
! 128: lmw r30,-32(r1)
! 129: blr
! 130: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>