Annotation of OpenXM_contrib/gmp/mpn/powerpc64/addsub_n.asm, Revision 1.1
1.1 ! maekawa 1: # PowerPC-64 mpn_addsub_n -- Simultaneous add and sub.
! 2:
! 3: # Copyright (C) 1999, 2000 Free Software Foundation, Inc.
! 4:
! 5: # This file is part of the GNU MP Library.
! 6:
! 7: # The GNU MP Library is free software; you can redistribute it and/or modify
! 8: # it under the terms of the GNU Lesser General Public License as published by
! 9: # the Free Software Foundation; either version 2.1 of the License, or (at your
! 10: # option) any later version.
! 11:
! 12: # The GNU MP Library is distributed in the hope that it will be useful, but
! 13: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 15: # License for more details.
! 16:
! 17: # You should have received a copy of the GNU Lesser General Public License
! 18: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: # MA 02111-1307, USA.
! 21:
! 22:
! 23: # INPUT PARAMETERS
! 24: # res_ptr r3
! 25: # s1_ptr r4
! 26: # s2_ptr r5
! 27: # size r6
! 28:
! 29: include(`asm-syntax.m4')
! 30:
! 31: define(SAVE_BORROW_RESTORE_CARRY,
! 32: `sldi $1,$1,63
! 33: adde $1,$1,$1')
! 34: define(SAVE_CARRY_RESTORE_BORROW,
! 35: `sldi $1,$1,63
! 36: adde $1,$1,$1')
! 37:
! 38: # 19991117
! 39:
! 40: # This is just crafted for testing some ideas, and verifying that we can make
! 41: # it run fast. It runs at 2.55 cycles/limb on the 630, which is very good.
! 42: # We should play a little with the schedule. No time has been spent on that.
! 43:
! 44: # To finish this, the loop warm up and cool down code needs to be written,
! 45: # and the result need to be tested. Also, the proper calling sequence should
! 46: # be used.
! 47:
! 48: # r1p r2p s1p s2p n
! 49: # Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
! 50:
! 51: ASM_START()
! 52: PROLOGUE(mpn_addsub_n)
! 53: std r14,-64(1)
! 54: std r15,-56(1)
! 55: std r16,-48(1)
! 56: std r17,-40(1)
! 57: std r18,-32(1)
! 58: std r19,-24(1)
! 59:
! 60: srdi r7,r7,2
! 61: mtctr r7 # copy size into CTR
! 62: addic r0,r0,0 # clear cy
! 63: addi r3,r3,-8 # offset res_ptr, it's updated before it's used
! 64: addi r4,r4,-8 # offset res_ptr, it's updated before it's used
! 65:
! 66: .Loop:
! 67: adde r12,r8,r9
! 68: std r12,8(r3)
! 69: adde r12,r10,r11
! 70: std r12,16(r3)
! 71:
! 72: SAVE_CARRY_RESTORE_BORROW(r0)
! 73:
! 74: subfe r12,r8,r9
! 75: std r12,8(r4)
! 76: ld r8,8(r5) # s1 L 1
! 77: ld r9,8(r6) # s2 L 1
! 78: subfe r12,r10,r11
! 79: std r12,16(r4)
! 80: ld r10,16(r5) # s1 L 2
! 81: ld r11,16(r6) # s2 L 2
! 82: # pair -------------------------
! 83: subfe r12,r14,r15
! 84: std r12,24(r4)
! 85: subfe r12,r16,r17
! 86: stdu r12,32(r4)
! 87:
! 88: SAVE_BORROW_RESTORE_CARRY(r0)
! 89:
! 90: adde r12,r14,r15
! 91: std r12,24(r3)
! 92: ld r14,24(r5) # s1 L 3
! 93: ld r15,24(r6) # s2 L 3
! 94: adde r12,r16,r17
! 95: stdu r12,32(r3)
! 96: ldu r16,32(r5) # s1 L 4
! 97: ldu r17,32(r6) # s2 L 4
! 98: bdnz .Loop
! 99:
! 100: ld r14,-64(1)
! 101: ld r15,-56(1)
! 102: ld r16,-48(1)
! 103: ld r17,-40(1)
! 104: ld r18,-32(1)
! 105: ld r19,-24(1)
! 106: blr
! 107: EPILOGUE(mpn_addsub_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>