Annotation of OpenXM_contrib/gmp/mpn/x86/mul_1.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
2: dnl with a limb and store the result in a second limb vector.
3: dnl
4: dnl cycles/limb
5: dnl P6: 5.5
6: dnl
7: dnl The following CPUs have their own optimized code, but for reference the
8: dnl code here runs as follows.
9: dnl
10: dnl cycles/limb
11: dnl P5: 12.5
12: dnl K6: 10.5
13: dnl K7: 4.5
14:
15:
16: dnl Copyright (C) 1992, 1994, 1997, 1998, 1999, 2000 Free Software
17: dnl Foundation, Inc.
18: dnl
19: dnl This file is part of the GNU MP Library.
20: dnl
21: dnl The GNU MP Library is free software; you can redistribute it and/or
22: dnl modify it under the terms of the GNU Lesser General Public License as
23: dnl published by the Free Software Foundation; either version 2.1 of the
24: dnl License, or (at your option) any later version.
25: dnl
26: dnl The GNU MP Library is distributed in the hope that it will be useful,
27: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
28: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29: dnl Lesser General Public License for more details.
30: dnl
31: dnl You should have received a copy of the GNU Lesser General Public
32: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
33: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
34: dnl Suite 330, Boston, MA 02111-1307, USA.
35:
36:
37: include(`../config.m4')
38:
39:
40: C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
41: C mp_limb_t multiplier);
42:
43: defframe(PARAM_MULTIPLIER,16)
44: defframe(PARAM_SIZE, 12)
45: defframe(PARAM_SRC, 8)
46: defframe(PARAM_DST, 4)
47:
48: TEXT
49: ALIGN(8)
50: PROLOGUE(mpn_mul_1)
51: deflit(`FRAME',0)
52:
53: pushl %edi
54: pushl %esi
55: pushl %ebx
56: pushl %ebp
57: deflit(`FRAME',16)
58:
59: movl PARAM_DST,%edi
60: movl PARAM_SRC,%esi
61: movl PARAM_SIZE,%ecx
62:
63: xorl %ebx,%ebx
64: andl $3,%ecx
65: jz L(end0)
66:
67: L(oop0):
68: movl (%esi),%eax
69: mull PARAM_MULTIPLIER
70: leal 4(%esi),%esi
71: addl %ebx,%eax
72: movl $0,%ebx
73: adcl %ebx,%edx
74: movl %eax,(%edi)
75: movl %edx,%ebx C propagate carry into cylimb
76:
77: leal 4(%edi),%edi
78: decl %ecx
79: jnz L(oop0)
80:
81: L(end0):
82: movl PARAM_SIZE,%ecx
83: shrl $2,%ecx
84: jz L(end)
85:
86:
87: ALIGN(8)
88: L(oop): movl (%esi),%eax
89: mull PARAM_MULTIPLIER
90: addl %eax,%ebx
91: movl $0,%ebp
92: adcl %edx,%ebp
93:
94: movl 4(%esi),%eax
95: mull PARAM_MULTIPLIER
96: movl %ebx,(%edi)
97: addl %eax,%ebp C new lo + cylimb
98: movl $0,%ebx
99: adcl %edx,%ebx
100:
101: movl 8(%esi),%eax
102: mull PARAM_MULTIPLIER
103: movl %ebp,4(%edi)
104: addl %eax,%ebx C new lo + cylimb
105: movl $0,%ebp
106: adcl %edx,%ebp
107:
108: movl 12(%esi),%eax
109: mull PARAM_MULTIPLIER
110: movl %ebx,8(%edi)
111: addl %eax,%ebp C new lo + cylimb
112: movl $0,%ebx
113: adcl %edx,%ebx
114:
115: movl %ebp,12(%edi)
116:
117: leal 16(%esi),%esi
118: leal 16(%edi),%edi
119: decl %ecx
120: jnz L(oop)
121:
122: L(end): movl %ebx,%eax
123:
124: popl %ebp
125: popl %ebx
126: popl %esi
127: popl %edi
128: ret
129:
130: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>