Annotation of OpenXM_contrib/gmp/mpn/ia64/popcount.asm, Revision 1.1
1.1 ! ohara 1: dnl IA-64 mpn_popcount.
! 2:
! 3: dnl Copyright 2000, 2001 Free Software Foundation, Inc.
! 4:
! 5: dnl This file is part of the GNU MP Library.
! 6:
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or modify
! 8: dnl it under the terms of the GNU Lesser General Public License as published
! 9: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 10: dnl your option) any later version.
! 11:
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful, but
! 13: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 15: dnl License for more details.
! 16:
! 17: dnl You should have received a copy of the GNU Lesser General Public License
! 18: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: dnl MA 02111-1307, USA.
! 21:
! 22: dnl Runs at 1 cycle/limb on the Itanium. That is the peak performance for the
! 23: dnl popcnt instruction, so this is optimal code. It should be straightforward
! 24: dnl to write mpn_hamdist with the same awesome performance.
! 25:
! 26: include(`../config.m4')
! 27:
! 28: C INPUT PARAMETERS
! 29: C sp = r32
! 30: C n = r33
! 31:
! 32: ASM_START()
! 33: PROLOGUE(mpn_popcount)
! 34: .prologue
! 35: .save ar.lc, r2
! 36: mov r2 = ar.lc
! 37: .body
! 38: and r22 = 3, r33
! 39: shr.u r23 = r33, 2 ;;
! 40: mov ar.lc = r22
! 41: mov r8 = 0 ;;
! 42: br.cloop.dpnt .Loop0 ;;
! 43: br .L0
! 44: .Loop0: ld8 r16 = [r32], 8 ;;
! 45: popcnt r20 = r16 ;;
! 46: add r8 = r8, r20
! 47: br.cloop.dptk .Loop0 ;;
! 48:
! 49: .L0: mov ar.lc = r23 ;;
! 50: br.cloop.dptk .L1 ;;
! 51: br.ret.sptk.many b0 ;;
! 52: .L1: ld8 r16 = [r32], 8 ;;
! 53: ld8 r17 = [r32], 8 ;;
! 54: ld8 r18 = [r32], 8 ;;
! 55: ld8 r19 = [r32], 8 ;;
! 56: br.cloop.dptk .L2 ;;
! 57: br .Ldone1 ;;
! 58: .L2:
! 59: popcnt r20 = r16
! 60: ld8 r16 = [r32], 8 ;;
! 61: popcnt r21 = r17
! 62: ld8 r17 = [r32], 8 ;;
! 63: popcnt r22 = r18
! 64: ld8 r18 = [r32], 8 ;;
! 65: popcnt r23 = r19
! 66: ld8 r19 = [r32], 8 ;;
! 67: br.cloop.dptk .Loop ;;
! 68: br .Ldone0
! 69:
! 70: .Loop: add r8 = r8, r20
! 71: popcnt r20 = r16
! 72: ld8 r16 = [r32], 8 ;;
! 73: add r8 = r8, r21
! 74: popcnt r21 = r17
! 75: ld8 r17 = [r32], 8 ;;
! 76: add r8 = r8, r22
! 77: popcnt r22 = r18
! 78: ld8 r18 = [r32], 8 ;;
! 79: add r8 = r8, r23
! 80: popcnt r23 = r19
! 81: ld8 r19 = [r32], 8
! 82: br.cloop.dptk .Loop ;;
! 83:
! 84: .Ldone0:
! 85: add r8 = r8, r20
! 86: popcnt r20 = r16 ;;
! 87: add r8 = r8, r21
! 88: popcnt r21 = r17 ;;
! 89: add r8 = r8, r22
! 90: popcnt r22 = r18 ;;
! 91: add r8 = r8, r23
! 92: popcnt r23 = r19 ;;
! 93: add r21 = r21, r20
! 94: add r23 = r23, r22 ;;
! 95: add r8 = r8, r21 ;;
! 96: add r8 = r8, r23
! 97: br.ret.sptk.many b0
! 98:
! 99: .Ldone1:
! 100: popcnt r20 = r16
! 101: popcnt r21 = r17
! 102: popcnt r22 = r18
! 103: popcnt r23 = r19 ;;
! 104: add r21 = r21, r20
! 105: add r23 = r23, r22 ;;
! 106: add r8 = r8, r21 ;;
! 107: add r8 = r8, r23
! 108: mov ar.lc = r2
! 109: br.ret.sptk.many b0
! 110: EPILOGUE(mpn_popcount)
! 111: ASM_END()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>