Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mul_basecase.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
2:
1.1.1.2 ! ohara 3: dnl Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
1.1 maekawa 4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
1.1.1.2 ! ohara 25: C P5: 14.2 cycles/crossproduct (approx)
! 26:
! 27:
1.1 maekawa 28: C void mpn_mul_basecase (mp_ptr wp,
29: C mp_srcptr xp, mp_size_t xsize,
30: C mp_srcptr yp, mp_size_t ysize);
31:
32: defframe(PARAM_YSIZE, 20)
33: defframe(PARAM_YP, 16)
34: defframe(PARAM_XSIZE, 12)
35: defframe(PARAM_XP, 8)
36: defframe(PARAM_WP, 4)
37:
38: defframe(VAR_COUNTER, -4)
39:
1.1.1.2 ! ohara 40: TEXT
1.1 maekawa 41: ALIGN(8)
42: PROLOGUE(mpn_mul_basecase)
43:
44: pushl %eax C dummy push for allocating stack slot
45: pushl %esi
46: pushl %ebp
47: pushl %edi
48: deflit(`FRAME',16)
49:
50: movl PARAM_XP,%esi
51: movl PARAM_WP,%edi
52: movl PARAM_YP,%ebp
53:
54: movl (%esi),%eax C load xp[0]
55: mull (%ebp) C multiply by yp[0]
56: movl %eax,(%edi) C store to wp[0]
57: movl PARAM_XSIZE,%ecx C xsize
58: decl %ecx C If xsize = 1, ysize = 1 too
59: jz L(done)
60:
61: movl PARAM_XSIZE,%eax
62: pushl %ebx
63: FRAME_pushl()
64: movl %edx,%ebx
65: leal (%esi,%eax,4),%esi C make xp point at end
66: leal (%edi,%eax,4),%edi C offset wp by xsize
67: negl %ecx C negate j size/index for inner loop
68: xorl %eax,%eax C clear carry
69:
70: ALIGN(8)
71: L(oop1): adcl $0,%ebx
72: movl (%esi,%ecx,4),%eax C load next limb at xp[j]
73: mull (%ebp)
74: addl %ebx,%eax
75: movl %eax,(%edi,%ecx,4)
76: incl %ecx
77: movl %edx,%ebx
78: jnz L(oop1)
79:
80: adcl $0,%ebx
81: movl PARAM_YSIZE,%eax
82: movl %ebx,(%edi) C most significant limb of product
83: addl $4,%edi C increment wp
84: decl %eax
85: jz L(skip)
86: movl %eax,VAR_COUNTER C set index i to ysize
87:
88: L(outer):
89: addl $4,%ebp C make ebp point to next y limb
90: movl PARAM_XSIZE,%ecx
91: negl %ecx
92: xorl %ebx,%ebx
93:
94: C code at 0x61 here, close enough to aligned
95: L(oop2):
96: adcl $0,%ebx
97: movl (%esi,%ecx,4),%eax
98: mull (%ebp)
99: addl %ebx,%eax
100: movl (%edi,%ecx,4),%ebx
101: adcl $0,%edx
102: addl %eax,%ebx
103: movl %ebx,(%edi,%ecx,4)
104: incl %ecx
105: movl %edx,%ebx
106: jnz L(oop2)
107:
108: adcl $0,%ebx
109:
110: movl %ebx,(%edi)
111: addl $4,%edi
112: movl VAR_COUNTER,%eax
113: decl %eax
114: movl %eax,VAR_COUNTER
115: jnz L(outer)
116:
117: L(skip):
118: popl %ebx
119: popl %edi
120: popl %ebp
121: popl %esi
122: addl $4,%esp
123: ret
124:
125: L(done):
126: movl %edx,4(%edi) C store to wp[1]
127: popl %edi
128: popl %ebp
129: popl %esi
130: popl %eax C dummy pop for deallocating stack slot
131: ret
132:
133: EPILOGUE()
134:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>