[BACK]Return to sub_n.s CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / m88k

Annotation of OpenXM_contrib/gmp/mpn/m88k/sub_n.s, Revision 1.1

1.1     ! maekawa     1: ; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
        !             2: ; store difference in a third limb vector.
        !             3:
        !             4: ; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
        !             5:
        !             6: ; This file is part of the GNU MP Library.
        !             7:
        !             8: ; The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: ; it under the terms of the GNU Library General Public License as published by
        !            10: ; the Free Software Foundation; either version 2 of the License, or (at your
        !            11: ; option) any later version.
        !            12:
        !            13: ; The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: ; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
        !            16: ; License for more details.
        !            17:
        !            18: ; You should have received a copy of the GNU Library General Public License
        !            19: ; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: ; MA 02111-1307, USA.
        !            22:
        !            23:
        !            24: ; INPUT PARAMETERS
        !            25: ; res_ptr      r2
        !            26: ; s1_ptr       r3
        !            27: ; s2_ptr       r4
        !            28: ; size         r5
        !            29:
        !            30: ; This code has been optimized to run one instruction per clock, avoiding
        !            31: ; load stalls and writeback contention.  As a result, the instruction
        !            32: ; order is not always natural.
        !            33:
        !            34: ; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
        !            35: ; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
        !            36:
        !            37:        text
        !            38:        align    16
        !            39:        global   ___mpn_sub_n
        !            40: ___mpn_sub_n:
        !            41:        ld      r6,r3,0                 ; read first limb from s1_ptr
        !            42:        extu    r10,r5,3
        !            43:        ld      r7,r4,0                 ; read first limb from s2_ptr
        !            44:
        !            45:        subu    r5,r0,r5
        !            46:        mak     r5,r5,3<4>
        !            47:        bcnd.n  eq0,r5,Lzero
        !            48:        subu.co r0,r0,r0                ; initialize carry
        !            49:
        !            50:        or      r12,r0,lo16(Lbase)
        !            51:        or.u    r12,r12,hi16(Lbase)
        !            52:        addu    r12,r12,r5              ; r12 is address for entering in loop
        !            53:
        !            54:        extu    r5,r5,2                 ; divide by 4
        !            55:        subu    r2,r2,r5                ; adjust res_ptr
        !            56:        subu    r3,r3,r5                ; adjust s1_ptr
        !            57:        subu    r4,r4,r5                ; adjust s2_ptr
        !            58:
        !            59:        or      r8,r6,r0
        !            60:
        !            61:        jmp.n   r12
        !            62:         or     r9,r7,r0
        !            63:
        !            64: Loop:  addu    r3,r3,32
        !            65:        st      r8,r2,28
        !            66:        addu    r4,r4,32
        !            67:        ld      r6,r3,0
        !            68:        addu    r2,r2,32
        !            69:        ld      r7,r4,0
        !            70: Lzero: subu    r10,r10,1               ; subtract 0 + 8r limbs (adj loop cnt)
        !            71: Lbase: ld      r8,r3,4
        !            72:        subu.cio r6,r6,r7
        !            73:        ld      r9,r4,4
        !            74:        st      r6,r2,0
        !            75:        ld      r6,r3,8                 ; subtract 7 + 8r limbs
        !            76:        subu.cio r8,r8,r9
        !            77:        ld      r7,r4,8
        !            78:        st      r8,r2,4
        !            79:        ld      r8,r3,12                ; subtract 6 + 8r limbs
        !            80:        subu.cio r6,r6,r7
        !            81:        ld      r9,r4,12
        !            82:        st      r6,r2,8
        !            83:        ld      r6,r3,16                ; subtract 5 + 8r limbs
        !            84:        subu.cio r8,r8,r9
        !            85:        ld      r7,r4,16
        !            86:        st      r8,r2,12
        !            87:        ld      r8,r3,20                ; subtract 4 + 8r limbs
        !            88:        subu.cio r6,r6,r7
        !            89:        ld      r9,r4,20
        !            90:        st      r6,r2,16
        !            91:        ld      r6,r3,24                ; subtract 3 + 8r limbs
        !            92:        subu.cio r8,r8,r9
        !            93:        ld      r7,r4,24
        !            94:        st      r8,r2,20
        !            95:        ld      r8,r3,28                ; subtract 2 + 8r limbs
        !            96:        subu.cio r6,r6,r7
        !            97:        ld      r9,r4,28
        !            98:        st      r6,r2,24
        !            99:        bcnd.n  ne0,r10,Loop            ; subtract 1 + 8r limbs
        !           100:         subu.cio r8,r8,r9
        !           101:
        !           102:        st      r8,r2,28                ; store most significant limb
        !           103:
        !           104:        addu.ci r2,r0,r0                ; return carry-out from most sign. limb
        !           105:        jmp.n    r1
        !           106:         xor    r2,r2,1

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>