Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/add_n.s, Revision 1.1
1.1 ! maekawa 1: # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
! 2: # store sum in a third limb vector.
! 3:
! 4: # Copyright (C) 1995 Free Software Foundation, Inc.
! 5:
! 6: # This file is part of the GNU MP Library.
! 7:
! 8: # The GNU MP Library is free software; you can redistribute it and/or modify
! 9: # it under the terms of the GNU Library General Public License as published by
! 10: # the Free Software Foundation; either version 2 of the License, or (at your
! 11: # option) any later version.
! 12:
! 13: # The GNU MP Library is distributed in the hope that it will be useful, but
! 14: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 16: # License for more details.
! 17:
! 18: # You should have received a copy of the GNU Library General Public License
! 19: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: # MA 02111-1307, USA.
! 22:
! 23:
! 24: # INPUT PARAMETERS
! 25: # res_ptr $16
! 26: # s1_ptr $17
! 27: # s2_ptr $18
! 28: # size $19
! 29:
! 30: .set noreorder
! 31: .set noat
! 32: .text
! 33: .align 3
! 34: .globl __mpn_add_n
! 35: .ent __mpn_add_n
! 36: __mpn_add_n:
! 37: .frame $30,0,$26,0
! 38:
! 39: or $31,$31,$25 # clear cy
! 40: subq $19,4,$19 # decr loop cnt
! 41: blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
! 42: # Start software pipeline for 1st loop
! 43: ldq $0,0($18)
! 44: ldq $1,8($18)
! 45: ldq $4,0($17)
! 46: ldq $5,8($17)
! 47: addq $17,32,$17 # update s1_ptr
! 48: ldq $2,16($18)
! 49: addq $0,$4,$20 # 1st main add
! 50: ldq $3,24($18)
! 51: subq $19,4,$19 # decr loop cnt
! 52: ldq $6,-16($17)
! 53: cmpult $20,$0,$25 # compute cy from last add
! 54: ldq $7,-8($17)
! 55: addq $1,$25,$28 # cy add
! 56: addq $18,32,$18 # update s2_ptr
! 57: addq $5,$28,$21 # 2nd main add
! 58: cmpult $28,$25,$8 # compute cy from last add
! 59: blt $19,.Lend1 # if less than 4 limbs remain, jump
! 60: # 1st loop handles groups of 4 limbs in a software pipeline
! 61: .align 4
! 62: .Loop: cmpult $21,$28,$25 # compute cy from last add
! 63: ldq $0,0($18)
! 64: or $8,$25,$25 # combine cy from the two adds
! 65: ldq $1,8($18)
! 66: addq $2,$25,$28 # cy add
! 67: ldq $4,0($17)
! 68: addq $28,$6,$22 # 3rd main add
! 69: ldq $5,8($17)
! 70: cmpult $28,$25,$8 # compute cy from last add
! 71: cmpult $22,$28,$25 # compute cy from last add
! 72: stq $20,0($16)
! 73: or $8,$25,$25 # combine cy from the two adds
! 74: stq $21,8($16)
! 75: addq $3,$25,$28 # cy add
! 76: addq $28,$7,$23 # 4th main add
! 77: cmpult $28,$25,$8 # compute cy from last add
! 78: cmpult $23,$28,$25 # compute cy from last add
! 79: addq $17,32,$17 # update s1_ptr
! 80: or $8,$25,$25 # combine cy from the two adds
! 81: addq $16,32,$16 # update res_ptr
! 82: addq $0,$25,$28 # cy add
! 83: ldq $2,16($18)
! 84: addq $4,$28,$20 # 1st main add
! 85: ldq $3,24($18)
! 86: cmpult $28,$25,$8 # compute cy from last add
! 87: ldq $6,-16($17)
! 88: cmpult $20,$28,$25 # compute cy from last add
! 89: ldq $7,-8($17)
! 90: or $8,$25,$25 # combine cy from the two adds
! 91: subq $19,4,$19 # decr loop cnt
! 92: stq $22,-16($16)
! 93: addq $1,$25,$28 # cy add
! 94: stq $23,-8($16)
! 95: addq $5,$28,$21 # 2nd main add
! 96: addq $18,32,$18 # update s2_ptr
! 97: cmpult $28,$25,$8 # compute cy from last add
! 98: bge $19,.Loop
! 99: # Finish software pipeline for 1st loop
! 100: .Lend1: cmpult $21,$28,$25 # compute cy from last add
! 101: or $8,$25,$25 # combine cy from the two adds
! 102: addq $2,$25,$28 # cy add
! 103: addq $28,$6,$22 # 3rd main add
! 104: cmpult $28,$25,$8 # compute cy from last add
! 105: cmpult $22,$28,$25 # compute cy from last add
! 106: stq $20,0($16)
! 107: or $8,$25,$25 # combine cy from the two adds
! 108: stq $21,8($16)
! 109: addq $3,$25,$28 # cy add
! 110: addq $28,$7,$23 # 4th main add
! 111: cmpult $28,$25,$8 # compute cy from last add
! 112: cmpult $23,$28,$25 # compute cy from last add
! 113: or $8,$25,$25 # combine cy from the two adds
! 114: addq $16,32,$16 # update res_ptr
! 115: stq $22,-16($16)
! 116: stq $23,-8($16)
! 117: .Lend2: addq $19,4,$19 # restore loop cnt
! 118: beq $19,.Lret
! 119: # Start software pipeline for 2nd loop
! 120: ldq $0,0($18)
! 121: ldq $4,0($17)
! 122: subq $19,1,$19
! 123: beq $19,.Lend0
! 124: # 2nd loop handles remaining 1-3 limbs
! 125: .align 4
! 126: .Loop0: addq $0,$25,$28 # cy add
! 127: ldq $0,8($18)
! 128: addq $4,$28,$20 # main add
! 129: ldq $4,8($17)
! 130: addq $18,8,$18
! 131: cmpult $28,$25,$8 # compute cy from last add
! 132: addq $17,8,$17
! 133: stq $20,0($16)
! 134: cmpult $20,$28,$25 # compute cy from last add
! 135: subq $19,1,$19 # decr loop cnt
! 136: or $8,$25,$25 # combine cy from the two adds
! 137: addq $16,8,$16
! 138: bne $19,.Loop0
! 139: .Lend0: addq $0,$25,$28 # cy add
! 140: addq $4,$28,$20 # main add
! 141: cmpult $28,$25,$8 # compute cy from last add
! 142: cmpult $20,$28,$25 # compute cy from last add
! 143: stq $20,0($16)
! 144: or $8,$25,$25 # combine cy from the two adds
! 145:
! 146: .Lret: or $25,$31,$0 # return cy
! 147: ret $31,($26),1
! 148: .end __mpn_add_n
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>