=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm 2000/09/09 14:12:22 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/alpha/Attic/cntlz.asm 2003/08/25 16:06:18 1.1.1.2 @@ -1,13 +1,13 @@ dnl Alpha auxiliary for longlong.h's count_leading_zeros -dnl Copyright (C) 1997, 2000 Free Software Foundation, Inc. +dnl Copyright 1997, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published by -dnl the Free Software Foundation; either version 2.1 of the License, or (at your -dnl option) any later version. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY @@ -21,48 +21,26 @@ dnl MA 02111-1307, USA. include(`../config.m4') -dnl DISCUSSION: -dnl Other methods have been tried, and using a 128-entry table actually trims -dnl about 10% of the execution time (on a 21164) when the table is in the L1 -dnl cache. But under non-benchmarking conditions, the table will hardly be in -dnl the L1 cache. Tricky bit-fiddling methods with multiplies and magic tables -dnl are also possible, but they require many more instructions than the current -dnl code. (But for count_trailing_zeros, such tricks are beneficial.) -dnl Finally, converting to floating-point and extracting the exponent is much -dnl slower. - ASM_START() -PROLOGUE(MPN(count_leading_zeros)) - bis r31,63,r0 C initialize partial result count - - srl r16,32,r1 C shift down 32 steps -> r1 - cmovne r1,r1,r16 C select r1 if non-zero - cmovne r1,31,r0 C if r1 is nonzero choose smaller count - - srl r16,16,r1 C shift down 16 steps -> r1 - subq r0,16,r2 C generate new partial result count - cmovne r1,r1,r16 C choose new r1 if non-zero - cmovne r1,r2,r0 C choose new count if r1 was non-zero - - srl r16,8,r1 - subq r0,8,r2 - cmovne r1,r1,r16 - cmovne r1,r2,r0 - - srl r16,4,r1 - subq r0,4,r2 - cmovne r1,r1,r16 - cmovne r1,r2,r0 - - srl r16,2,r1 - subq r0,2,r2 - cmovne r1,r1,r16 - cmovne r1,r2,r0 - - srl r16,1,r1 C extract bit 1 - subq r0,r1,r0 C subtract it from partial result - - ret r31,(r26),1 -EPILOGUE(MPN(count_leading_zeros)) +EXTERN(__clz_tab) +PROLOGUE(mpn_count_leading_zeros,gp) + cmpbge r31, r16, r1 + LEA(r3,__clz_tab) + sra r1, 1, r1 + xor r1, 127, r1 + srl r16, 1, r16 + addq r1, r3, r1 + ldq_u r0, 0(r1) + lda r2, 64 + extbl r0, r1, r0 + s8subl r0, 8, r0 + srl r16, r0, r16 + addq r16, r3, r16 + ldq_u r1, 0(r16) + extbl r1, r16, r1 + subq r2, r1, r2 + subq r2, r0, r0 + ret r31, (r26),1 +EPILOGUE(mpn_count_leading_zeros) ASM_END()