Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mul_2.asm, Revision 1.1.1.1
1.1 ohara 1: dnl Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
2:
3: dnl Copyright 2001, 2002 Free Software Foundation, Inc.
4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA. */
21:
22: include(`../config.m4')
23:
24:
25: C P5: 24.0 cycles/limb
26:
27:
28: C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size,
29: C mp_srcptr mult);
30: C
31: C At 24 c/l this is only 2 cycles faster than a separate mul_1 and addmul_1,
32: C but has the advantage of making just one pass over the operands.
33: C
34: C There's not enough registers to use PARAM_MULT directly, so the multiplier
35: C limbs are transferred to local variables on the stack.
36:
37: defframe(PARAM_MULT, 16)
38: defframe(PARAM_SIZE, 12)
39: defframe(PARAM_SRC, 8)
40: defframe(PARAM_DST, 4)
41:
42: dnl re-use parameter space
43: define(VAR_MULT_LOW, `PARAM_SRC')
44: define(VAR_MULT_HIGH,`PARAM_DST')
45:
46: TEXT
47: ALIGN(8)
48: PROLOGUE(mpn_mul_2)
49: deflit(`FRAME',0)
50:
51: pushl %esi FRAME_pushl()
52: pushl %edi FRAME_pushl()
53:
54: movl PARAM_SRC, %esi
55: movl PARAM_DST, %edi
56:
57: movl PARAM_MULT, %eax
58: movl PARAM_SIZE, %ecx
59:
60: movl 4(%eax), %edx C mult high
61: movl (%eax), %eax C mult low
62:
63: movl %eax, VAR_MULT_LOW
64: movl %edx, VAR_MULT_HIGH
65:
66: pushl %ebx FRAME_pushl()
67: pushl %ebp FRAME_pushl()
68:
69: mull (%esi) C src[0] * mult[0]
70:
71: movl %eax, %ebp C in case src==dst
72: movl (%esi), %eax C src[0]
73:
74: movl %ebp, (%edi) C dst[0]
75: movl %edx, %ebx C initial low carry
76:
77: xorl %ebp, %ebp C initial high carry
78: leal (%edi,%ecx,4), %edi C dst end
79:
80: mull VAR_MULT_HIGH C src[0] * mult[1]
81:
82: subl $2, %ecx C size-2
83: js L(done)
84:
85: leal 8(%esi,%ecx,4), %esi C &src[size]
86: xorl $-1, %ecx C -(size-1)
87:
88:
89:
90: L(top):
91: C eax low prod
92: C ebx low carry
93: C ecx counter, negative
94: C edx high prod
95: C esi src end
96: C edi dst end
97: C ebp high carry (0 or -1)
98:
99: andl $1, %ebp C 1 or 0
100: addl %eax, %ebx
101:
102: adcl %edx, %ebp
103: ASSERT(nc)
104: movl (%esi,%ecx,4), %eax
105:
106: mull VAR_MULT_LOW
107:
108: addl %eax, %ebx C low carry
109: movl (%esi,%ecx,4), %eax
110:
111: adcl %ebp, %edx C high carry
112: movl %ebx, (%edi,%ecx,4)
113:
114: sbbl %ebp, %ebp C new high carry, -1 or 0
115: movl %edx, %ebx C new low carry
116:
117: mull VAR_MULT_HIGH
118:
119: incl %ecx
120: jnz L(top)
121:
122:
123: L(done):
124: andl $1, %ebp C 1 or 0
125: addl %ebx, %eax
126:
127: adcl %ebp, %edx
128: ASSERT(nc)
129: movl %eax, (%edi) C store carry low
130:
131: movl %edx, %eax C return carry high
132:
133: popl %ebp
134: popl %ebx
135:
136: popl %edi
137: popl %esi
138:
139: ret
140:
141: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>