Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mul_basecase.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
2: dnl
3: dnl P5: 14.2 cycles/crossproduct (approx)
4:
5:
6: dnl Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
7: dnl
8: dnl This file is part of the GNU MP Library.
9: dnl
10: dnl The GNU MP Library is free software; you can redistribute it and/or
11: dnl modify it under the terms of the GNU Lesser General Public License as
12: dnl published by the Free Software Foundation; either version 2.1 of the
13: dnl License, or (at your option) any later version.
14: dnl
15: dnl The GNU MP Library is distributed in the hope that it will be useful,
16: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
17: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18: dnl Lesser General Public License for more details.
19: dnl
20: dnl You should have received a copy of the GNU Lesser General Public
21: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
22: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
23: dnl Suite 330, Boston, MA 02111-1307, USA.
24:
25:
26: include(`../config.m4')
27:
28:
29: C void mpn_mul_basecase (mp_ptr wp,
30: C mp_srcptr xp, mp_size_t xsize,
31: C mp_srcptr yp, mp_size_t ysize);
32:
33: defframe(PARAM_YSIZE, 20)
34: defframe(PARAM_YP, 16)
35: defframe(PARAM_XSIZE, 12)
36: defframe(PARAM_XP, 8)
37: defframe(PARAM_WP, 4)
38:
39: defframe(VAR_COUNTER, -4)
40:
41: .text
42: ALIGN(8)
43: PROLOGUE(mpn_mul_basecase)
44:
45: pushl %eax C dummy push for allocating stack slot
46: pushl %esi
47: pushl %ebp
48: pushl %edi
49: deflit(`FRAME',16)
50:
51: movl PARAM_XP,%esi
52: movl PARAM_WP,%edi
53: movl PARAM_YP,%ebp
54:
55: movl (%esi),%eax C load xp[0]
56: mull (%ebp) C multiply by yp[0]
57: movl %eax,(%edi) C store to wp[0]
58: movl PARAM_XSIZE,%ecx C xsize
59: decl %ecx C If xsize = 1, ysize = 1 too
60: jz L(done)
61:
62: movl PARAM_XSIZE,%eax
63: pushl %ebx
64: FRAME_pushl()
65: movl %edx,%ebx
66: leal (%esi,%eax,4),%esi C make xp point at end
67: leal (%edi,%eax,4),%edi C offset wp by xsize
68: negl %ecx C negate j size/index for inner loop
69: xorl %eax,%eax C clear carry
70:
71: ALIGN(8)
72: L(oop1): adcl $0,%ebx
73: movl (%esi,%ecx,4),%eax C load next limb at xp[j]
74: mull (%ebp)
75: addl %ebx,%eax
76: movl %eax,(%edi,%ecx,4)
77: incl %ecx
78: movl %edx,%ebx
79: jnz L(oop1)
80:
81: adcl $0,%ebx
82: movl PARAM_YSIZE,%eax
83: movl %ebx,(%edi) C most significant limb of product
84: addl $4,%edi C increment wp
85: decl %eax
86: jz L(skip)
87: movl %eax,VAR_COUNTER C set index i to ysize
88:
89: L(outer):
90: addl $4,%ebp C make ebp point to next y limb
91: movl PARAM_XSIZE,%ecx
92: negl %ecx
93: xorl %ebx,%ebx
94:
95: C code at 0x61 here, close enough to aligned
96: L(oop2):
97: adcl $0,%ebx
98: movl (%esi,%ecx,4),%eax
99: mull (%ebp)
100: addl %ebx,%eax
101: movl (%edi,%ecx,4),%ebx
102: adcl $0,%edx
103: addl %eax,%ebx
104: movl %ebx,(%edi,%ecx,4)
105: incl %ecx
106: movl %edx,%ebx
107: jnz L(oop2)
108:
109: adcl $0,%ebx
110:
111: movl %ebx,(%edi)
112: addl $4,%edi
113: movl VAR_COUNTER,%eax
114: decl %eax
115: movl %eax,VAR_COUNTER
116: jnz L(outer)
117:
118: L(skip):
119: popl %ebx
120: popl %edi
121: popl %ebp
122: popl %esi
123: addl $4,%esp
124: ret
125:
126: L(done):
127: movl %edx,4(%edi) C store to wp[1]
128: popl %edi
129: popl %ebp
130: popl %esi
131: popl %eax C dummy pop for deallocating stack slot
132: ret
133:
134: EPILOGUE()
135:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>