=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/alpha/Attic/mul_1.asm,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/alpha/Attic/mul_1.asm 2000/09/09 14:12:22 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/alpha/Attic/mul_1.asm 2003/08/25 16:06:18 1.1.1.2 @@ -1,14 +1,14 @@ dnl Alpha __gmpn_mul_1 -- Multiply a limb vector with a limb and store dnl the result in a second limb vector. -dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. +dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public License as published by -dnl the Free Software Foundation; either version 2.1 of the License, or (at your -dnl option) any later version. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY @@ -22,50 +22,69 @@ dnl MA 02111-1307, USA. include(`../config.m4') -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl size r18 -dnl s2_limb r19 +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 +C vl r19 +C cl r20 -dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7 -dnl cycles/limb on EV6. +C This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7 +C cycles/limb on EV6. ASM_START() +PROLOGUE(mpn_mul_1c) + ldq r2,0(r17) C r2 = s1_limb + lda r18,-1(r18) C size-- + mulq r2,r19,r3 C r3 = prod_low + umulh r2,r19,r4 C r4 = prod_high + beq r18,$Le1c C jump if size was == 1 + ldq r2,8(r17) C r2 = s1_limb + lda r18,-1(r18) C size-- + addq r3,r20,r3 C r3 = cy_limb + cl + stq r3,0(r16) + cmpult r3,r20,r0 C r0 = carry from (cy_limb + cl) + bne r18,$Loop C jump if size was == 2 + br r31,$Le2 +$Le1c: addq r3,r20,r3 C r3 = cy_limb + cl + cmpult r3,r20,r0 C r0 = carry from (cy_limb + cl) +$Le1: stq r3,0(r16) + addq r4,r0,r0 + ret r31,(r26),1 +EPILOGUE(mpn_mul_1c) + PROLOGUE(mpn_mul_1) ldq r2,0(r17) C r2 = s1_limb - subq r18,1,r18 C size-- + lda r18,-1(r18) C size-- mulq r2,r19,r3 C r3 = prod_low - bic r31,r31,r4 C clear cy_limb - umulh r2,r19,r0 C r0 = prod_high - beq r18,$Lend1 C jump if size was == 1 + bic r31,r31,r0 C clear cy_limb + umulh r2,r19,r4 C r4 = prod_high + beq r18,$Le1 C jump if size was == 1 ldq r2,8(r17) C r2 = s1_limb - subq r18,1,r18 C size-- + lda r18,-1(r18) C size-- stq r3,0(r16) - beq r18,$Lend2 C jump if size was == 2 + beq r18,$Le2 C jump if size was == 2 ALIGN(8) $Loop: mulq r2,r19,r3 C r3 = prod_low addq r4,r0,r0 C cy_limb = cy_limb + 'cy' - subq r18,1,r18 C size-- - umulh r2,r19,r4 C r4 = cy_limb + lda r18,-1(r18) C size-- + umulh r2,r19,r4 C r4 = prod_high ldq r2,16(r17) C r2 = s1_limb - addq r17,8,r17 C s1_ptr++ + lda r17,8(r17) C s1_ptr++ addq r3,r0,r3 C r3 = cy_limb + prod_low stq r3,8(r16) cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) - addq r16,8,r16 C res_ptr++ + lda r16,8(r16) C res_ptr++ bne r18,$Loop -$Lend2: mulq r2,r19,r3 C r3 = prod_low +$Le2: mulq r2,r19,r3 C r3 = prod_low addq r4,r0,r0 C cy_limb = cy_limb + 'cy' - umulh r2,r19,r4 C r4 = cy_limb + umulh r2,r19,r4 C r4 = prod_high addq r3,r0,r3 C r3 = cy_limb + prod_low cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) stq r3,8(r16) addq r4,r0,r0 C cy_limb = prod_high + cy - ret r31,(r26),1 -$Lend1: stq r3,0(r16) ret r31,(r26),1 EPILOGUE(mpn_mul_1) ASM_END()