Annotation of OpenXM_contrib/gmp/mpn/arm/submul_1.asm, Revision 1.1.1.1
1.1 ohara 1: dnl ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
2: dnl result from a second limb vector.
3: dnl Based on mpn_addmul_1, which was contributed by Robert Harley.
4:
5: dnl Copyright 1998, 2000, 2001 Free Software Foundation, Inc.
6:
7: dnl This file is part of the GNU MP Library.
8:
9: dnl The GNU MP Library is free software; you can redistribute it and/or modify
10: dnl it under the terms of the GNU Lesser General Public License as published
11: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
12: dnl your option) any later version.
13:
14: dnl The GNU MP Library is distributed in the hope that it will be useful, but
15: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17: dnl License for more details.
18:
19: dnl You should have received a copy of the GNU Lesser General Public License
20: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
21: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22: dnl MA 02111-1307, USA.
23:
24: include(`../config.m4')
25:
26: C This runs at 9.75 cycles/limb in the StrongARM.
27:
28: C Could use some register cleanup. Some fewer registers might be needed, or
29: C r11 could be utilized for better speed. Could avoid saving all registers for
30: C small (n <= 3) operands.
31:
32: define(`rp',`r0')
33: define(`up',`r1')
34: define(`n',`r2')
35: define(`v',`r3')
36:
37: ASM_START()
38: PROLOGUE(mpn_submul_1)
39: stmfd sp!, { r4-r10, lr }
40: mov r4, #0
41: movs n, n, lsr #1
42: bcc L(skip1)
43: ldr lr, [up], #4
44: umull r4, r12, v, lr
45: ldr r6, [rp]
46: subs r6, r6, r4
47: sbc r4, r0, r0
48: sub r4, r12, r4
49: str r6, [rp], #4
50: L(skip1):
51: movs n, n, lsr #1
52: bcc L(skip2)
53: ldmia up!, { r9, r10 }
54: mov r5, #0
55: umlal r4, r5, v, r9
56: mov r9, #0
57: umlal r5, r9, v, r10
58: ldmia rp, { r6, r7 }
59: subs r6, r6, r4
60: sbcs r7, r7, r5
61: sbc r4, r0, r0
62: sub r4, r9, r4
63: stmia rp!, { r6, r7 }
64: L(skip2):
65: teq n, #0
66: beq L(return)
67:
68: L(submul_loop):
69: ldmia up!, { r9, r10, r12, lr }
70: mov r5, #0
71: umlal r4, r5, v, r9
72: mov r9, #0
73: umlal r5, r9, v, r10
74: mov r10, #0
75: umlal r9, r10, v, r12
76: mov r12, #0
77: umlal r10, r12, v, lr
78: ldmia rp, { r6, r7, r8, lr }
79: subs r6, r6, r4
80: sbcs r7, r7, r5
81: sbcs r8, r8, r9
82: sbcs lr, lr, r10
83: sbc r4, r0, r0
84: sub r4, r12, r4
85: subs n, n, #1
86: stmia rp!, { r6, r7, r8, lr }
87: bne L(submul_loop)
88: L(return):
89: mov r0, r4
90: ldmfd sp!, { r4-r10, pc }
91: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>