[BACK]Return to add_n.s CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / m88k

Annotation of OpenXM_contrib/gmp/mpn/m88k/add_n.s, Revision 1.1

1.1     ! maekawa     1: ; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
        !             2: ; sum in a third limb vector.
        !             3:
        !             4: ; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
        !             5:
        !             6: ; This file is part of the GNU MP Library.
        !             7:
        !             8: ; The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: ; it under the terms of the GNU Library General Public License as published by
        !            10: ; the Free Software Foundation; either version 2 of the License, or (at your
        !            11: ; option) any later version.
        !            12:
        !            13: ; The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: ; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
        !            16: ; License for more details.
        !            17:
        !            18: ; You should have received a copy of the GNU Library General Public License
        !            19: ; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: ; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: ; MA 02111-1307, USA.
        !            22:
        !            23:
        !            24: ; INPUT PARAMETERS
        !            25: ; res_ptr      r2
        !            26: ; s1_ptr       r3
        !            27: ; s2_ptr       r4
        !            28: ; size         r5
        !            29:
        !            30: ; This code has been optimized to run one instruction per clock, avoiding
        !            31: ; load stalls and writeback contention.  As a result, the instruction
        !            32: ; order is not always natural.
        !            33:
        !            34: ; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
        !            35: ; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
        !            36:
        !            37:        text
        !            38:        align    16
        !            39:        global   ___mpn_add_n
        !            40: ___mpn_add_n:
        !            41:        ld      r6,r3,0                 ; read first limb from s1_ptr
        !            42:        extu    r10,r5,3
        !            43:        ld      r7,r4,0                 ; read first limb from s2_ptr
        !            44:
        !            45:        subu.co r5,r0,r5                ; (clear carry as side effect)
        !            46:        mak     r5,r5,3<4>
        !            47:        bcnd    eq0,r5,Lzero
        !            48:
        !            49:        or      r12,r0,lo16(Lbase)
        !            50:        or.u    r12,r12,hi16(Lbase)
        !            51:        addu    r12,r12,r5              ; r12 is address for entering in loop
        !            52:
        !            53:        extu    r5,r5,2                 ; divide by 4
        !            54:        subu    r2,r2,r5                ; adjust res_ptr
        !            55:        subu    r3,r3,r5                ; adjust s1_ptr
        !            56:        subu    r4,r4,r5                ; adjust s2_ptr
        !            57:
        !            58:        or      r8,r6,r0
        !            59:
        !            60:        jmp.n   r12
        !            61:         or     r9,r7,r0
        !            62:
        !            63: Loop:  addu    r3,r3,32
        !            64:        st      r8,r2,28
        !            65:        addu    r4,r4,32
        !            66:        ld      r6,r3,0
        !            67:        addu    r2,r2,32
        !            68:        ld      r7,r4,0
        !            69: Lzero: subu    r10,r10,1               ; add 0 + 8r limbs (adj loop cnt)
        !            70: Lbase: ld      r8,r3,4
        !            71:        addu.cio r6,r6,r7
        !            72:        ld      r9,r4,4
        !            73:        st      r6,r2,0
        !            74:        ld      r6,r3,8                 ; add 7 + 8r limbs
        !            75:        addu.cio r8,r8,r9
        !            76:        ld      r7,r4,8
        !            77:        st      r8,r2,4
        !            78:        ld      r8,r3,12                ; add 6 + 8r limbs
        !            79:        addu.cio r6,r6,r7
        !            80:        ld      r9,r4,12
        !            81:        st      r6,r2,8
        !            82:        ld      r6,r3,16                ; add 5 + 8r limbs
        !            83:        addu.cio r8,r8,r9
        !            84:        ld      r7,r4,16
        !            85:        st      r8,r2,12
        !            86:        ld      r8,r3,20                ; add 4 + 8r limbs
        !            87:        addu.cio r6,r6,r7
        !            88:        ld      r9,r4,20
        !            89:        st      r6,r2,16
        !            90:        ld      r6,r3,24                ; add 3 + 8r limbs
        !            91:        addu.cio r8,r8,r9
        !            92:        ld      r7,r4,24
        !            93:        st      r8,r2,20
        !            94:        ld      r8,r3,28                ; add 2 + 8r limbs
        !            95:        addu.cio r6,r6,r7
        !            96:        ld      r9,r4,28
        !            97:        st      r6,r2,24
        !            98:        bcnd.n  ne0,r10,Loop            ; add 1 + 8r limbs
        !            99:         addu.cio r8,r8,r9
        !           100:
        !           101:        st      r8,r2,28                ; store most significant limb
        !           102:
        !           103:        jmp.n    r1
        !           104:         addu.ci r2,r0,r0               ; return carry-out from most sign. limb

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>