===================================================================
RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.2
diff -u -p -r1.1.1.1 -r1.1.1.2
--- OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm	2000/09/09 14:12:22	1.1.1.1
+++ OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm	2003/08/25 16:06:18	1.1.1.2
@@ -1,13 +1,13 @@
 dnl  Alpha auxiliary for longlong.h's count_leading_zeros
 
-dnl  Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+dnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published by
-dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
-dnl  option) any later version.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl  your option) any later version.
 
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -21,48 +21,26 @@ dnl  MA 02111-1307, USA.
 
 include(`../config.m4')
 
-dnl  DISCUSSION:
 
-dnl  Other methods have been tried, and using a 128-entry table actually trims
-dnl  about 10% of the execution time (on a 21164) when the table is in the L1
-dnl  cache.  But under non-benchmarking conditions, the table will hardly be in
-dnl  the L1 cache.  Tricky bit-fiddling methods with multiplies and magic tables
-dnl  are also possible, but they require many more instructions than the current
-dnl  code.  (But for count_trailing_zeros, such tricks are beneficial.)
-dnl  Finally, converting to floating-point and extracting the exponent is much
-dnl  slower.
-
 ASM_START()
-PROLOGUE(MPN(count_leading_zeros))
-	bis	r31,63,r0		C initialize partial result count
-
-	srl	r16,32,r1		C shift down 32 steps -> r1
-	cmovne	r1,r1,r16		C select r1 if non-zero
-	cmovne	r1,31,r0		C if r1 is nonzero choose smaller count
-
-	srl	r16,16,r1		C shift down 16 steps -> r1
-	subq	r0,16,r2		C generate new partial result count
-	cmovne	r1,r1,r16		C choose new r1 if non-zero
-	cmovne	r1,r2,r0		C choose new count if r1 was non-zero
-
-	srl	r16,8,r1
-	subq	r0,8,r2
-	cmovne	r1,r1,r16
-	cmovne	r1,r2,r0
-
-	srl	r16,4,r1
-	subq	r0,4,r2
-	cmovne	r1,r1,r16
-	cmovne	r1,r2,r0
-
-	srl	r16,2,r1
-	subq	r0,2,r2
-	cmovne	r1,r1,r16
-	cmovne	r1,r2,r0
-
-	srl	r16,1,r1		C extract bit 1
-	subq	r0,r1,r0		C subtract it from partial result
-
-	ret	r31,(r26),1
-EPILOGUE(MPN(count_leading_zeros))
+EXTERN(__clz_tab)
+PROLOGUE(mpn_count_leading_zeros,gp)
+	cmpbge	r31,  r16, r1
+	LEA(r3,__clz_tab)
+	sra	r1,   1,   r1
+	xor	r1,   127, r1
+	srl	r16,  1,   r16
+	addq	r1,   r3,  r1
+	ldq_u	r0,   0(r1)
+	lda	r2,   64
+	extbl	r0,   r1,   r0
+	s8subl	r0,   8,    r0
+	srl	r16,  r0,   r16
+	addq	r16,  r3,   r16
+	ldq_u	r1,   0(r16)
+	extbl	r1,   r16,  r1
+	subq	r2,   r1,   r2
+	subq	r2,   r0,   r0
+	ret	r31,  (r26),1
+EPILOGUE(mpn_count_leading_zeros)
 ASM_END()