Annotation of OpenXM_contrib/gmp/mpn/alpha/mul_1.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl Alpha __gmpn_mul_1 -- Multiply a limb vector with a limb and store
2: dnl the result in a second limb vector.
3:
1.1.1.2 ! ohara 4: dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
1.1 maekawa 5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! ohara 9: dnl it under the terms of the GNU Lesser General Public License as published
! 10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
! 11: dnl your option) any later version.
1.1 maekawa 12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23: include(`../config.m4')
24:
1.1.1.2 ! ohara 25: C INPUT PARAMETERS
! 26: C rp r16
! 27: C up r17
! 28: C n r18
! 29: C vl r19
! 30: C cl r20
1.1 maekawa 31:
1.1.1.2 ! ohara 32: C This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
! 33: C cycles/limb on EV6.
1.1 maekawa 34:
35: ASM_START()
1.1.1.2 ! ohara 36: PROLOGUE(mpn_mul_1c)
! 37: ldq r2,0(r17) C r2 = s1_limb
! 38: lda r18,-1(r18) C size--
! 39: mulq r2,r19,r3 C r3 = prod_low
! 40: umulh r2,r19,r4 C r4 = prod_high
! 41: beq r18,$Le1c C jump if size was == 1
! 42: ldq r2,8(r17) C r2 = s1_limb
! 43: lda r18,-1(r18) C size--
! 44: addq r3,r20,r3 C r3 = cy_limb + cl
! 45: stq r3,0(r16)
! 46: cmpult r3,r20,r0 C r0 = carry from (cy_limb + cl)
! 47: bne r18,$Loop C jump if size was == 2
! 48: br r31,$Le2
! 49: $Le1c: addq r3,r20,r3 C r3 = cy_limb + cl
! 50: cmpult r3,r20,r0 C r0 = carry from (cy_limb + cl)
! 51: $Le1: stq r3,0(r16)
! 52: addq r4,r0,r0
! 53: ret r31,(r26),1
! 54: EPILOGUE(mpn_mul_1c)
! 55:
1.1 maekawa 56: PROLOGUE(mpn_mul_1)
57: ldq r2,0(r17) C r2 = s1_limb
1.1.1.2 ! ohara 58: lda r18,-1(r18) C size--
1.1 maekawa 59: mulq r2,r19,r3 C r3 = prod_low
1.1.1.2 ! ohara 60: bic r31,r31,r0 C clear cy_limb
! 61: umulh r2,r19,r4 C r4 = prod_high
! 62: beq r18,$Le1 C jump if size was == 1
1.1 maekawa 63: ldq r2,8(r17) C r2 = s1_limb
1.1.1.2 ! ohara 64: lda r18,-1(r18) C size--
1.1 maekawa 65: stq r3,0(r16)
1.1.1.2 ! ohara 66: beq r18,$Le2 C jump if size was == 2
1.1 maekawa 67:
68: ALIGN(8)
69: $Loop: mulq r2,r19,r3 C r3 = prod_low
70: addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
1.1.1.2 ! ohara 71: lda r18,-1(r18) C size--
! 72: umulh r2,r19,r4 C r4 = prod_high
1.1 maekawa 73: ldq r2,16(r17) C r2 = s1_limb
1.1.1.2 ! ohara 74: lda r17,8(r17) C s1_ptr++
1.1 maekawa 75: addq r3,r0,r3 C r3 = cy_limb + prod_low
76: stq r3,8(r16)
77: cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
1.1.1.2 ! ohara 78: lda r16,8(r16) C res_ptr++
1.1 maekawa 79: bne r18,$Loop
80:
1.1.1.2 ! ohara 81: $Le2: mulq r2,r19,r3 C r3 = prod_low
1.1 maekawa 82: addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
1.1.1.2 ! ohara 83: umulh r2,r19,r4 C r4 = prod_high
1.1 maekawa 84: addq r3,r0,r3 C r3 = cy_limb + prod_low
85: cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
86: stq r3,8(r16)
87: addq r4,r0,r0 C cy_limb = prod_high + cy
88: ret r31,(r26),1
89: EPILOGUE(mpn_mul_1)
90: ASM_END()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>