[BACK]Return to popcount.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / ia64

Annotation of OpenXM_contrib/gmp/mpn/ia64/popcount.asm, Revision 1.1.1.1

1.1       ohara       1: dnl  IA-64 mpn_popcount.
                      2:
                      3: dnl  Copyright 2000, 2001 Free Software Foundation, Inc.
                      4:
                      5: dnl  This file is part of the GNU MP Library.
                      6:
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      8: dnl  it under the terms of the GNU Lesser General Public License as published
                      9: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
                     10: dnl  your option) any later version.
                     11:
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     13: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: dnl  License for more details.
                     16:
                     17: dnl  You should have received a copy of the GNU Lesser General Public License
                     18: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: dnl  MA 02111-1307, USA.
                     21:
                     22: dnl  Runs at 1 cycle/limb on the Itanium.  That is the peak performance for the
                     23: dnl  popcnt instruction, so this is optimal code.  It should be straightforward
                     24: dnl  to write mpn_hamdist with the same awesome performance.
                     25:
                     26: include(`../config.m4')
                     27:
                     28: C INPUT PARAMETERS
                     29: C sp = r32
                     30: C n = r33
                     31:
                     32: ASM_START()
                     33: PROLOGUE(mpn_popcount)
                     34:        .prologue
                     35:        .save   ar.lc, r2
                     36:                mov     r2 = ar.lc
                     37:        .body
                     38:                and     r22 = 3, r33
                     39:                shr.u   r23 = r33, 2    ;;
                     40:                mov     ar.lc = r22
                     41:                mov     r8 = 0          ;;
                     42:                br.cloop.dpnt   .Loop0  ;;
                     43:                br      .L0
                     44: .Loop0:                ld8     r16 = [r32], 8  ;;
                     45:                popcnt  r20 = r16       ;;
                     46:                add     r8 = r8, r20
                     47:                br.cloop.dptk   .Loop0  ;;
                     48:
                     49: .L0:           mov     ar.lc = r23     ;;
                     50:                br.cloop.dptk   .L1     ;;
                     51:                br.ret.sptk.many b0     ;;
                     52: .L1:           ld8     r16 = [r32], 8  ;;
                     53:                ld8     r17 = [r32], 8  ;;
                     54:                ld8     r18 = [r32], 8  ;;
                     55:                ld8     r19 = [r32], 8  ;;
                     56:                br.cloop.dptk   .L2    ;;
                     57:                br              .Ldone1 ;;
                     58: .L2:
                     59:                popcnt  r20 = r16
                     60:                ld8     r16 = [r32], 8  ;;
                     61:                popcnt  r21 = r17
                     62:                ld8     r17 = [r32], 8  ;;
                     63:                popcnt  r22 = r18
                     64:                ld8     r18 = [r32], 8  ;;
                     65:                popcnt  r23 = r19
                     66:                ld8     r19 = [r32], 8  ;;
                     67:                br.cloop.dptk   .Loop  ;;
                     68:                br              .Ldone0
                     69:
                     70: .Loop:         add     r8 = r8, r20
                     71:                popcnt  r20 = r16
                     72:                ld8     r16 = [r32], 8  ;;
                     73:                add     r8 = r8, r21
                     74:                popcnt  r21 = r17
                     75:                ld8     r17 = [r32], 8  ;;
                     76:                add     r8 = r8, r22
                     77:                popcnt  r22 = r18
                     78:                ld8     r18 = [r32], 8  ;;
                     79:                add     r8 = r8, r23
                     80:                popcnt  r23 = r19
                     81:                ld8     r19 = [r32], 8
                     82:                br.cloop.dptk   .Loop   ;;
                     83:
                     84: .Ldone0:
                     85:                add     r8 = r8, r20
                     86:                popcnt  r20 = r16       ;;
                     87:                add     r8 = r8, r21
                     88:                popcnt  r21 = r17       ;;
                     89:                add     r8 = r8, r22
                     90:                popcnt  r22 = r18       ;;
                     91:                add     r8 = r8, r23
                     92:                popcnt  r23 = r19       ;;
                     93:                add     r21 = r21, r20
                     94:                add     r23 = r23, r22  ;;
                     95:                add     r8 = r8, r21    ;;
                     96:                add     r8 = r8, r23
                     97:                br.ret.sptk.many b0
                     98:
                     99: .Ldone1:
                    100:                popcnt  r20 = r16
                    101:                popcnt  r21 = r17
                    102:                popcnt  r22 = r18
                    103:                popcnt  r23 = r19       ;;
                    104:                add     r21 = r21, r20
                    105:                add     r23 = r23, r22  ;;
                    106:                add     r8 = r8, r21    ;;
                    107:                add     r8 = r8, r23
                    108:                mov     ar.lc = r2
                    109:        br.ret.sptk.many b0
                    110: EPILOGUE(mpn_popcount)
                    111: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>