Annotation of OpenXM_contrib/gmp/mpn/alpha/ev6/nails/add_n.asm, Revision 1.1
1.1 ! ohara 1: dnl Alpha ev6 nails mpn_add_n.
! 2:
! 3: dnl Copyright 2002 Free Software Foundation, Inc.
! 4: dnl
! 5: dnl This file is part of the GNU MP Library.
! 6: dnl
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or
! 8: dnl modify it under the terms of the GNU Lesser General Public License as
! 9: dnl published by the Free Software Foundation; either version 2.1 of the
! 10: dnl License, or (at your option) any later version.
! 11: dnl
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 15: dnl Lesser General Public License for more details.
! 16: dnl
! 17: dnl You should have received a copy of the GNU Lesser General Public
! 18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 20: dnl Suite 330, Boston, MA 02111-1307, USA.
! 21:
! 22: include(`../config.m4')
! 23:
! 24: dnl INPUT PARAMETERS
! 25: define(`rp',`r16')
! 26: define(`up',`r17')
! 27: define(`vp',`r18')
! 28: define(`n',`r19')
! 29:
! 30: define(`rl0',`r0')
! 31: define(`rl1',`r1')
! 32: define(`rl2',`r2')
! 33: define(`rl3',`r3')
! 34:
! 35: define(`ul0',`r4')
! 36: define(`ul1',`r5')
! 37: define(`ul2',`r6')
! 38: define(`ul3',`r7')
! 39:
! 40: define(`vl0',`r22')
! 41: define(`vl1',`r23')
! 42: define(`vl2',`r24')
! 43: define(`vl3',`r25')
! 44:
! 45: define(`numb_mask',`r21')
! 46:
! 47: define(`NAIL_BITS',`GMP_NAIL_BITS')
! 48: define(`CYSH',`GMP_NUMB_BITS')
! 49:
! 50: dnl This declaration is munged by configure
! 51: NAILS_SUPPORT(1-63)
! 52:
! 53: dnl Runs at 2.5 cycles/limb. It would be possible to reach 2.0 cycles/limb
! 54: dnl with 8-way unrolling.
! 55:
! 56: ASM_START()
! 57: PROLOGUE(mpn_add_n)
! 58: lda numb_mask, -1(r31)
! 59: srl numb_mask, NAIL_BITS, numb_mask
! 60: bis r31, r31, r20
! 61:
! 62: and n, 3, r25
! 63: lda n, -4(n)
! 64: beq r25, L_4_or_more
! 65:
! 66: Loop0: ldq ul0, 0(up)
! 67: lda up, 8(up)
! 68: ldq vl0, 0(vp)
! 69: lda vp, 8(vp)
! 70: lda rp, 8(rp)
! 71: lda r25, -1(r25)
! 72: addq ul0, vl0, rl0
! 73: addq rl0, r20, rl0
! 74: and rl0, numb_mask, r28
! 75: stq r28, -8(rp)
! 76: srl rl0, CYSH, r20
! 77: bne r25, Loop0
! 78:
! 79: blt n, Lret
! 80:
! 81: L_4_or_more:
! 82: ldq ul0, 0(up)
! 83: ldq vl0, 0(vp)
! 84: ldq ul1, 8(up)
! 85: ldq vl1, 8(vp)
! 86: ldq ul2, 16(up)
! 87: ldq vl2, 16(vp)
! 88: ldq ul3, 24(up)
! 89: ldq vl3, 24(vp)
! 90: lda up, 32(up)
! 91: lda vp, 32(vp)
! 92: lda n, -4(n)
! 93: bge n, L_8_or_more
! 94: L_0_to_7:
! 95: addq ul0, vl0, rl0 C main-add 0
! 96: addq rl0, r20, rl0 C cy-add 0
! 97: addq ul1, vl1, rl1 C main-add 1
! 98: srl rl0, CYSH, r20 C gen cy 0
! 99: addq rl1, r20, rl1 C cy-add 1
! 100: and rl0,numb_mask, r27
! 101: br r31, Lcj0
! 102:
! 103: L_8_or_more:
! 104: addq ul0, vl0, rl0 C main-add 0
! 105: ldq ul0, 0(up)
! 106: ldq vl0, 0(vp)
! 107: addq rl0, r20, rl0 C cy-add 0
! 108: addq ul1, vl1, rl1 C main-add 1
! 109: srl rl0, CYSH, r20 C gen cy 0
! 110: ldq ul1, 8(up)
! 111: ldq vl1, 8(vp)
! 112: addq rl1, r20, rl1 C cy-add 1
! 113: and rl0,numb_mask, r27
! 114: addq ul2, vl2, rl2 C main-add 2
! 115: srl rl1, CYSH, r20 C gen cy 1
! 116: ldq ul2, 16(up)
! 117: ldq vl2, 16(vp)
! 118: addq rl2, r20, rl2 C cy-add 2
! 119: and rl1,numb_mask, r28
! 120: stq r27, 0(rp)
! 121: addq ul3, vl3, rl3 C main-add 3
! 122: srl rl2, CYSH, r20 C gen cy 2
! 123: ldq ul3, 24(up)
! 124: ldq vl3, 24(vp)
! 125: addq rl3, r20, rl3 C cy-add 3
! 126: and rl2,numb_mask, r27
! 127: stq r28, 8(rp)
! 128: lda rp, 32(rp)
! 129: lda up, 32(up)
! 130: lda vp, 32(vp)
! 131: lda n, -4(n)
! 132: blt n, L_end
! 133:
! 134: ALIGN(32)
! 135: Loop:
! 136: addq ul0, vl0, rl0 C main-add 0
! 137: srl rl3, CYSH, r20 C gen cy 3
! 138: ldq ul0, 0(up)
! 139: ldq vl0, 0(vp)
! 140:
! 141: addq rl0, r20, rl0 C cy-add 0
! 142: and rl3,numb_mask, r28
! 143: stq r27, -16(rp)
! 144: bis r31, r31, r31
! 145:
! 146: addq ul1, vl1, rl1 C main-add 1
! 147: srl rl0, CYSH, r20 C gen cy 0
! 148: ldq ul1, 8(up)
! 149: ldq vl1, 8(vp)
! 150:
! 151: addq rl1, r20, rl1 C cy-add 1
! 152: and rl0,numb_mask, r27
! 153: stq r28, -8(rp)
! 154: bis r31, r31, r31
! 155:
! 156: addq ul2, vl2, rl2 C main-add 2
! 157: srl rl1, CYSH, r20 C gen cy 1
! 158: ldq ul2, 16(up)
! 159: ldq vl2, 16(vp)
! 160:
! 161: addq rl2, r20, rl2 C cy-add 2
! 162: and rl1,numb_mask, r28
! 163: stq r27, 0(rp)
! 164: bis r31, r31, r31
! 165:
! 166: addq ul3, vl3, rl3 C main-add 3
! 167: srl rl2, CYSH, r20 C gen cy 2
! 168: ldq ul3, 24(up)
! 169: ldq vl3, 24(vp)
! 170:
! 171: addq rl3, r20, rl3 C cy-add 3
! 172: and rl2,numb_mask, r27
! 173: stq r28, 8(rp)
! 174: bis r31, r31, r31
! 175:
! 176: bis r31, r31, r31
! 177: lda n, -4(n)
! 178: lda up, 32(up)
! 179: lda vp, 32(vp)
! 180:
! 181: bis r31, r31, r31
! 182: bis r31, r31, r31
! 183: lda rp, 32(rp)
! 184: bge n, Loop
! 185: L_end:
! 186: addq ul0, vl0, rl0 C main-add 0
! 187: srl rl3, CYSH, r20 C gen cy 3
! 188: addq rl0, r20, rl0 C cy-add 0
! 189: and rl3,numb_mask, r28
! 190: stq r27, -16(rp)
! 191: addq ul1, vl1, rl1 C main-add 1
! 192: srl rl0, CYSH, r20 C gen cy 0
! 193: addq rl1, r20, rl1 C cy-add 1
! 194: and rl0,numb_mask, r27
! 195: stq r28, -8(rp)
! 196: Lcj0: addq ul2, vl2, rl2 C main-add 2
! 197: srl rl1, CYSH, r20 C gen cy 1
! 198: addq rl2, r20, rl2 C cy-add 2
! 199: and rl1,numb_mask, r28
! 200: stq r27, 0(rp)
! 201: addq ul3, vl3, rl3 C main-add 3
! 202: srl rl2, CYSH, r20 C gen cy 2
! 203: addq rl3, r20, rl3 C cy-add 3
! 204: and rl2,numb_mask, r27
! 205: stq r28, 8(rp)
! 206:
! 207: srl rl3, CYSH, r20 C gen cy 3
! 208: and rl3,numb_mask, r28
! 209: stq r27, 16(rp)
! 210: stq r28, 24(rp)
! 211: Lret:
! 212: and r20, 1, r0
! 213: ret r31, (r26), 1
! 214: EPILOGUE(mpn_add_n)
! 215: ASM_END()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>