[BACK]Return to cntlz.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha

Diff for /OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm between version 1.1.1.1 and 1.1.1.2

version 1.1.1.1, 2000/09/09 14:12:22 version 1.1.1.2, 2003/08/25 16:06:18
Line 1 
Line 1 
 dnl  Alpha auxiliary for longlong.h's count_leading_zeros  dnl  Alpha auxiliary for longlong.h's count_leading_zeros
   
 dnl  Copyright (C) 1997, 2000 Free Software Foundation, Inc.  dnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
   
 dnl  This file is part of the GNU MP Library.  dnl  This file is part of the GNU MP Library.
   
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
 dnl  it under the terms of the GNU Lesser General Public License as published by  dnl  it under the terms of the GNU Lesser General Public License as published
 dnl  the Free Software Foundation; either version 2.1 of the License, or (at your  dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
 dnl  option) any later version.  dnl  your option) any later version.
   
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Line 21  dnl  MA 02111-1307, USA.
Line 21  dnl  MA 02111-1307, USA.
   
 include(`../config.m4')  include(`../config.m4')
   
 dnl  DISCUSSION:  
   
 dnl  Other methods have been tried, and using a 128-entry table actually trims  
 dnl  about 10% of the execution time (on a 21164) when the table is in the L1  
 dnl  cache.  But under non-benchmarking conditions, the table will hardly be in  
 dnl  the L1 cache.  Tricky bit-fiddling methods with multiplies and magic tables  
 dnl  are also possible, but they require many more instructions than the current  
 dnl  code.  (But for count_trailing_zeros, such tricks are beneficial.)  
 dnl  Finally, converting to floating-point and extracting the exponent is much  
 dnl  slower.  
   
 ASM_START()  ASM_START()
 PROLOGUE(MPN(count_leading_zeros))  EXTERN(__clz_tab)
         bis     r31,63,r0               C initialize partial result count  PROLOGUE(mpn_count_leading_zeros,gp)
           cmpbge  r31,  r16, r1
         srl     r16,32,r1               C shift down 32 steps -> r1          LEA(r3,__clz_tab)
         cmovne  r1,r1,r16               C select r1 if non-zero          sra     r1,   1,   r1
         cmovne  r1,31,r0                C if r1 is nonzero choose smaller count          xor     r1,   127, r1
           srl     r16,  1,   r16
         srl     r16,16,r1               C shift down 16 steps -> r1          addq    r1,   r3,  r1
         subq    r0,16,r2                C generate new partial result count          ldq_u   r0,   0(r1)
         cmovne  r1,r1,r16               C choose new r1 if non-zero          lda     r2,   64
         cmovne  r1,r2,r0                C choose new count if r1 was non-zero          extbl   r0,   r1,   r0
           s8subl  r0,   8,    r0
         srl     r16,8,r1          srl     r16,  r0,   r16
         subq    r0,8,r2          addq    r16,  r3,   r16
         cmovne  r1,r1,r16          ldq_u   r1,   0(r16)
         cmovne  r1,r2,r0          extbl   r1,   r16,  r1
           subq    r2,   r1,   r2
         srl     r16,4,r1          subq    r2,   r0,   r0
         subq    r0,4,r2          ret     r31,  (r26),1
         cmovne  r1,r1,r16  EPILOGUE(mpn_count_leading_zeros)
         cmovne  r1,r2,r0  
   
         srl     r16,2,r1  
         subq    r0,2,r2  
         cmovne  r1,r1,r16  
         cmovne  r1,r2,r0  
   
         srl     r16,1,r1                C extract bit 1  
         subq    r0,r1,r0                C subtract it from partial result  
   
         ret     r31,(r26),1  
 EPILOGUE(MPN(count_leading_zeros))  
 ASM_END()  ASM_END()

Legend:
Removed from v.1.1.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>