[BACK]Return to cntlz.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha

Annotation of OpenXM_contrib/gmp/mpn/alpha/cntlz.asm, Revision 1.1.1.1

1.1       maekawa     1: dnl  Alpha auxiliary for longlong.h's count_leading_zeros
                      2:
                      3: dnl  Copyright (C) 1997, 2000 Free Software Foundation, Inc.
                      4:
                      5: dnl  This file is part of the GNU MP Library.
                      6:
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      8: dnl  it under the terms of the GNU Lesser General Public License as published by
                      9: dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: dnl  option) any later version.
                     11:
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     13: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: dnl  License for more details.
                     16:
                     17: dnl  You should have received a copy of the GNU Lesser General Public License
                     18: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: dnl  MA 02111-1307, USA.
                     21:
                     22: include(`../config.m4')
                     23:
                     24: dnl  DISCUSSION:
                     25:
                     26: dnl  Other methods have been tried, and using a 128-entry table actually trims
                     27: dnl  about 10% of the execution time (on a 21164) when the table is in the L1
                     28: dnl  cache.  But under non-benchmarking conditions, the table will hardly be in
                     29: dnl  the L1 cache.  Tricky bit-fiddling methods with multiplies and magic tables
                     30: dnl  are also possible, but they require many more instructions than the current
                     31: dnl  code.  (But for count_trailing_zeros, such tricks are beneficial.)
                     32: dnl  Finally, converting to floating-point and extracting the exponent is much
                     33: dnl  slower.
                     34:
                     35: ASM_START()
                     36: PROLOGUE(MPN(count_leading_zeros))
                     37:        bis     r31,63,r0               C initialize partial result count
                     38:
                     39:        srl     r16,32,r1               C shift down 32 steps -> r1
                     40:        cmovne  r1,r1,r16               C select r1 if non-zero
                     41:        cmovne  r1,31,r0                C if r1 is nonzero choose smaller count
                     42:
                     43:        srl     r16,16,r1               C shift down 16 steps -> r1
                     44:        subq    r0,16,r2                C generate new partial result count
                     45:        cmovne  r1,r1,r16               C choose new r1 if non-zero
                     46:        cmovne  r1,r2,r0                C choose new count if r1 was non-zero
                     47:
                     48:        srl     r16,8,r1
                     49:        subq    r0,8,r2
                     50:        cmovne  r1,r1,r16
                     51:        cmovne  r1,r2,r0
                     52:
                     53:        srl     r16,4,r1
                     54:        subq    r0,4,r2
                     55:        cmovne  r1,r1,r16
                     56:        cmovne  r1,r2,r0
                     57:
                     58:        srl     r16,2,r1
                     59:        subq    r0,2,r2
                     60:        cmovne  r1,r1,r16
                     61:        cmovne  r1,r2,r0
                     62:
                     63:        srl     r16,1,r1                C extract bit 1
                     64:        subq    r0,r1,r0                C subtract it from partial result
                     65:
                     66:        ret     r31,(r26),1
                     67: EPILOGUE(MPN(count_leading_zeros))
                     68: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>