Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/alpha/submul_1.s, Revision 1.1.1.1
1.1 takayama 1: # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
2: # subtract the result from a second limb vector.
3:
4: # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
5:
6: # This file is part of the GNU MP Library.
7:
8: # The GNU MP Library is free software; you can redistribute it and/or modify
9: # it under the terms of the GNU Library General Public License as published by
10: # the Free Software Foundation; either version 2 of the License, or (at your
11: # option) any later version.
12:
13: # The GNU MP Library is distributed in the hope that it will be useful, but
14: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16: # License for more details.
17:
18: # You should have received a copy of the GNU Library General Public License
19: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: # MA 02111-1307, USA.
22:
23:
24: # INPUT PARAMETERS
25: # res_ptr r16
26: # s1_ptr r17
27: # size r18
28: # s2_limb r19
29:
30: # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
31:
32: .set noreorder
33: .set noat
34: .text
35: .align 3
36: .globl __mpn_submul_1
37: .ent __mpn_submul_1 2
38: __mpn_submul_1:
39: .frame $30,0,$26
40:
41: ldq $2,0($17) # $2 = s1_limb
42: addq $17,8,$17 # s1_ptr++
43: subq $18,1,$18 # size--
44: mulq $2,$19,$3 # $3 = prod_low
45: ldq $5,0($16) # $5 = *res_ptr
46: umulh $2,$19,$0 # $0 = prod_high
47: beq $18,.Lend1 # jump if size was == 1
48: ldq $2,0($17) # $2 = s1_limb
49: addq $17,8,$17 # s1_ptr++
50: subq $18,1,$18 # size--
51: subq $5,$3,$3
52: cmpult $5,$3,$4
53: stq $3,0($16)
54: addq $16,8,$16 # res_ptr++
55: beq $18,.Lend2 # jump if size was == 2
56:
57: .align 3
58: .Loop: mulq $2,$19,$3 # $3 = prod_low
59: ldq $5,0($16) # $5 = *res_ptr
60: addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
61: subq $18,1,$18 # size--
62: umulh $2,$19,$4 # $4 = cy_limb
63: ldq $2,0($17) # $2 = s1_limb
64: addq $17,8,$17 # s1_ptr++
65: addq $3,$0,$3 # $3 = cy_limb + prod_low
66: cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
67: subq $5,$3,$3
68: cmpult $5,$3,$5
69: stq $3,0($16)
70: addq $16,8,$16 # res_ptr++
71: addq $5,$0,$0 # combine carries
72: bne $18,.Loop
73:
74: .Lend2: mulq $2,$19,$3 # $3 = prod_low
75: ldq $5,0($16) # $5 = *res_ptr
76: addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
77: umulh $2,$19,$4 # $4 = cy_limb
78: addq $3,$0,$3 # $3 = cy_limb + prod_low
79: cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
80: subq $5,$3,$3
81: cmpult $5,$3,$5
82: stq $3,0($16)
83: addq $5,$0,$0 # combine carries
84: addq $4,$0,$0 # cy_limb = prod_high + cy
85: ret $31,($26),1
86: .Lend1: subq $5,$3,$3
87: cmpult $5,$3,$5
88: stq $3,0($16)
89: addq $0,$5,$0
90: ret $31,($26),1
91:
92: .end __mpn_submul_1
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>