[BACK]Return to addsub_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / powerpc64

Annotation of OpenXM_contrib/gmp/mpn/powerpc64/addsub_n.asm, Revision 1.1.1.1

1.1       maekawa     1: # PowerPC-64 mpn_addsub_n -- Simultaneous add and sub.
                      2:
                      3: # Copyright (C) 1999, 2000 Free Software Foundation, Inc.
                      4:
                      5: # This file is part of the GNU MP Library.
                      6:
                      7: # The GNU MP Library is free software; you can redistribute it and/or modify
                      8: # it under the terms of the GNU Lesser General Public License as published by
                      9: # the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: # option) any later version.
                     11:
                     12: # The GNU MP Library is distributed in the hope that it will be useful, but
                     13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: # License for more details.
                     16:
                     17: # You should have received a copy of the GNU Lesser General Public License
                     18: # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: # MA 02111-1307, USA.
                     21:
                     22:
                     23: # INPUT PARAMETERS
                     24: # res_ptr      r3
                     25: # s1_ptr       r4
                     26: # s2_ptr       r5
                     27: # size         r6
                     28:
                     29: include(`asm-syntax.m4')
                     30:
                     31: define(SAVE_BORROW_RESTORE_CARRY,
                     32:        `sldi $1,$1,63
                     33:        adde $1,$1,$1')
                     34: define(SAVE_CARRY_RESTORE_BORROW,
                     35:        `sldi $1,$1,63
                     36:        adde $1,$1,$1')
                     37:
                     38: # 19991117
                     39:
                     40: # This is just crafted for testing some ideas, and verifying that we can make
                     41: # it run fast.  It runs at 2.55 cycles/limb on the 630, which is very good.
                     42: # We should play a little with the schedule.  No time has been spent on that.
                     43:
                     44: # To finish this, the loop warm up and cool down code needs to be written,
                     45: # and the result need to be tested.  Also, the proper calling sequence should
                     46: # be used.
                     47:
                     48: #             r1p r2p s1p s2p n
                     49: # Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
                     50:
                     51: ASM_START()
                     52: PROLOGUE(mpn_addsub_n)
                     53:        std     r14,-64(1)
                     54:        std     r15,-56(1)
                     55:        std     r16,-48(1)
                     56:        std     r17,-40(1)
                     57:        std     r18,-32(1)
                     58:        std     r19,-24(1)
                     59:
                     60:        srdi    r7,r7,2
                     61:        mtctr   r7              # copy size into CTR
                     62:        addic   r0,r0,0         # clear cy
                     63:        addi    r3,r3,-8        # offset res_ptr, it's updated before it's used
                     64:        addi    r4,r4,-8        # offset res_ptr, it's updated before it's used
                     65:
                     66: .Loop:
                     67:        adde    r12,r8,r9
                     68:        std     r12,8(r3)
                     69:        adde    r12,r10,r11
                     70:        std     r12,16(r3)
                     71:
                     72:        SAVE_CARRY_RESTORE_BORROW(r0)
                     73:
                     74:        subfe   r12,r8,r9
                     75:        std     r12,8(r4)
                     76:        ld      r8,8(r5)        # s1 L 1
                     77:        ld      r9,8(r6)        # s2 L 1
                     78:        subfe   r12,r10,r11
                     79:        std     r12,16(r4)
                     80:        ld      r10,16(r5)      # s1 L 2
                     81:        ld      r11,16(r6)      # s2 L 2
                     82: # pair -------------------------
                     83:        subfe   r12,r14,r15
                     84:        std     r12,24(r4)
                     85:        subfe   r12,r16,r17
                     86:        stdu    r12,32(r4)
                     87:
                     88:        SAVE_BORROW_RESTORE_CARRY(r0)
                     89:
                     90:        adde    r12,r14,r15
                     91:        std     r12,24(r3)
                     92:        ld      r14,24(r5)      # s1 L 3
                     93:        ld      r15,24(r6)      # s2 L 3
                     94:        adde    r12,r16,r17
                     95:        stdu    r12,32(r3)
                     96:        ldu     r16,32(r5)      # s1 L 4
                     97:        ldu     r17,32(r6)      # s2 L 4
                     98:        bdnz    .Loop
                     99:
                    100:        ld      r14,-64(1)
                    101:        ld      r15,-56(1)
                    102:        ld      r16,-48(1)
                    103:        ld      r17,-40(1)
                    104:        ld      r18,-32(1)
                    105:        ld      r19,-24(1)
                    106:        blr
                    107: EPILOGUE(mpn_addsub_n)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>