Annotation of OpenXM_contrib/gmp/mpn/arm/addmul_1.asm, Revision 1.1.1.1
1.1 ohara 1: dnl ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
2: dnl to a second limb vector.
3: dnl Contributed by Robert Harley.
4:
5: dnl Copyright 1998, 2000, 2001 Free Software Foundation, Inc.
6:
7: dnl This file is part of the GNU MP Library.
8:
9: dnl The GNU MP Library is free software; you can redistribute it and/or modify
10: dnl it under the terms of the GNU Lesser General Public License as published
11: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
12: dnl your option) any later version.
13:
14: dnl The GNU MP Library is distributed in the hope that it will be useful, but
15: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17: dnl License for more details.
18:
19: dnl You should have received a copy of the GNU Lesser General Public License
20: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
21: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22: dnl MA 02111-1307, USA.
23:
24: include(`../config.m4')
25:
26: C This runs at 10 cycles/limb in the StrongARM. Should make this look more
27: C like the submul code, since that uses fewer registers and runs slightly
28: C faster.
29:
30: define(`rp',`r0')
31: define(`up',`r1')
32: define(`n',`r2')
33: define(`v',`r3')
34:
35:
36: ASM_START()
37: PROLOGUE(mpn_addmul_1)
38: stmfd sp!, { r8-r11, lr }
39: mov r11, #0
40: mov ip, #0
41: movs n, n, lsr #1
42: bcc L(skip1)
43: ldr lr, [up], #4
44: ldr r9, [rp]
45: umlal r9, ip, v, lr
46: str r9, [rp], #4
47: L(skip1):
48: movs n, n, lsr #1
49: bcc L(skip2)
50: ldmia rp, { r9, r10 }
51: adds r8, ip, r9
52: adc r9, r11, #0
53: ldmia up!, { ip, lr }
54: umlal r8, r9, v, ip
55: adds r9, r9, r10
56: adc ip, r11, #0
57: umlal r9, ip, v, lr
58: stmia rp!, { r8, r9 }
59: L(skip2):
60: teq n, #0
61: beq L(return)
62: stmfd sp!, { r4-r7 }
63: L(addmul_loop):
64: ldmia rp, { r5, r6, r7, r8 }
65: adds r4, ip, r5
66: adc r5, r11, #0
67: ldmia up!, { r9, r10, ip, lr }
68: umlal r4, r5, v, r9
69: adds r5, r5, r6
70: adc r6, r11, #0
71: umlal r5, r6, v, r10
72: adds r6, r6, r7
73: adc r7, r11, #0
74: umlal r6, r7, v, ip
75: adds r7, r7, r8
76: adc ip, r11, #0
77: umlal r7, ip, v, lr
78: subs n, n, #1
79: stmia rp!, { r4, r5, r6, r7 }
80: bne L(addmul_loop)
81: ldmfd sp!, { r4-r7 }
82: L(return):
83: mov r0, ip
84: ldmfd sp!, { r8-r11, pc }
85: EPILOGUE(mpn_addmul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>