[BACK]Return to submul_1.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / power

Annotation of OpenXM_contrib/gmp/mpn/power/submul_1.asm, Revision 1.1

1.1     ! ohara       1: dnl  IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
        !             2: dnl  the result from a second limb vector.
        !             3:
        !             4: dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
        !             5:
        !             6: dnl  This file is part of the GNU MP Library.
        !             7:
        !             8: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: dnl  it under the terms of the GNU Lesser General Public License as published
        !            10: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
        !            11: dnl  your option) any later version.
        !            12:
        !            13: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: dnl  License for more details.
        !            17:
        !            18: dnl  You should have received a copy of the GNU Lesser General Public License
        !            19: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: dnl  MA 02111-1307, USA.
        !            22:
        !            23:
        !            24: dnl  INPUT PARAMETERS
        !            25: dnl  res_ptr   r3
        !            26: dnl  s1_ptr    r4
        !            27: dnl  size      r5
        !            28: dnl  s2_limb   r6
        !            29:
        !            30: dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
        !            31: dnl  instruction.  To obtain that operation, we have to use the 32x32->64
        !            32: dnl  signed multiplication instruction, and add the appropriate compensation to
        !            33: dnl  the high limb of the result.  We add the multiplicand if the multiplier
        !            34: dnl  has its most significant bit set, and we add the multiplier if the
        !            35: dnl  multiplicand has its most significant bit set.  We need to preserve the
        !            36: dnl  carry flag between each iteration, so we have to compute the compensation
        !            37: dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
        !            38: dnl  branch in zero cycles, we use conditional branches to for the additions.
        !            39:
        !            40: include(`../config.m4')
        !            41:
        !            42: ASM_START()
        !            43: PROLOGUE(mpn_submul_1)
        !            44:        cal     3,-4(3)
        !            45:        l       0,0(4)
        !            46:        cmpi    0,6,0
        !            47:        mtctr   5
        !            48:        mul     9,0,6
        !            49:        srai    7,0,31
        !            50:        and     7,7,6
        !            51:        mfmq    11
        !            52:        cax     9,9,7
        !            53:        l       7,4(3)
        !            54:        sf      8,11,7          C add res_limb
        !            55:        a       11,8,11         C invert cy (r11 is junk)
        !            56:        blt     Lneg
        !            57: Lpos:  bdz     Lend
        !            58:
        !            59: Lploop:        lu      0,4(4)
        !            60:        stu     8,4(3)
        !            61:        cmpi    0,0,0
        !            62:        mul     10,0,6
        !            63:        mfmq    0
        !            64:        ae      11,0,9          C low limb + old_cy_limb + old cy
        !            65:        l       7,4(3)
        !            66:        aze     10,10           C propagate cy to new cy_limb
        !            67:        sf      8,11,7          C add res_limb
        !            68:        a       11,8,11         C invert cy (r11 is junk)
        !            69:        bge     Lp0
        !            70:        cax     10,10,6         C adjust high limb for negative limb from s1
        !            71: Lp0:   bdz     Lend0
        !            72:        lu      0,4(4)
        !            73:        stu     8,4(3)
        !            74:        cmpi    0,0,0
        !            75:        mul     9,0,6
        !            76:        mfmq    0
        !            77:        ae      11,0,10
        !            78:        l       7,4(3)
        !            79:        aze     9,9
        !            80:        sf      8,11,7
        !            81:        a       11,8,11         C invert cy (r11 is junk)
        !            82:        bge     Lp1
        !            83:        cax     9,9,6           C adjust high limb for negative limb from s1
        !            84: Lp1:   bdn     Lploop
        !            85:
        !            86:        b       Lend
        !            87:
        !            88: Lneg:  cax     9,9,0
        !            89:        bdz     Lend
        !            90: Lnloop:        lu      0,4(4)
        !            91:        stu     8,4(3)
        !            92:        cmpi    0,0,0
        !            93:        mul     10,0,6
        !            94:        mfmq    7
        !            95:        ae      11,7,9
        !            96:        l       7,4(3)
        !            97:        ae      10,10,0         C propagate cy to new cy_limb
        !            98:        sf      8,11,7          C add res_limb
        !            99:        a       11,8,11         C invert cy (r11 is junk)
        !           100:        bge     Ln0
        !           101:        cax     10,10,6         C adjust high limb for negative limb from s1
        !           102: Ln0:   bdz     Lend0
        !           103:        lu      0,4(4)
        !           104:        stu     8,4(3)
        !           105:        cmpi    0,0,0
        !           106:        mul     9,0,6
        !           107:        mfmq    7
        !           108:        ae      11,7,10
        !           109:        l       7,4(3)
        !           110:        ae      9,9,0           C propagate cy to new cy_limb
        !           111:        sf      8,11,7          C add res_limb
        !           112:        a       11,8,11         C invert cy (r11 is junk)
        !           113:        bge     Ln1
        !           114:        cax     9,9,6           C adjust high limb for negative limb from s1
        !           115: Ln1:   bdn     Lnloop
        !           116:        b       Lend
        !           117:
        !           118: Lend0: cal     9,0(10)
        !           119: Lend:  st      8,4(3)
        !           120:        aze     3,9
        !           121:        br
        !           122: EPILOGUE(mpn_submul_1)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>