Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/copyd.asm, Revision 1.1.1.1
1.1 ohara 1: dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing.
2:
3: dnl Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
4: dnl
5: dnl This file is part of the GNU MP Library.
6: dnl
7: dnl The GNU MP Library is free software; you can redistribute it and/or
8: dnl modify it under the terms of the GNU Lesser General Public License as
9: dnl published by the Free Software Foundation; either version 2.1 of the
10: dnl License, or (at your option) any later version.
11: dnl
12: dnl The GNU MP Library is distributed in the hope that it will be useful,
13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: dnl Lesser General Public License for more details.
16: dnl
17: dnl You should have received a copy of the GNU Lesser General Public
18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
20: dnl Suite 330, Boston, MA 02111-1307, USA.
21:
22: include(`../config.m4')
23:
24:
25: C P5: 1.25 cycles/limb
26:
27:
28: C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
29: C
30: C See comments in copyi.asm.
31:
32: defframe(PARAM_SIZE,12)
33: defframe(PARAM_SRC, 8)
34: defframe(PARAM_DST, 4)
35:
36: PROLOGUE(mpn_copyd)
37: deflit(`FRAME',0)
38:
39: movl PARAM_SRC, %eax
40: movl PARAM_SIZE, %ecx
41:
42: pushl %esi FRAME_pushl()
43: pushl %edi FRAME_pushl()
44:
45: leal -4(%eax,%ecx,4), %eax C &src[size-1]
46: movl PARAM_DST, %edx
47:
48: subl $7, %ecx C size-7
49: jle L(end)
50:
51: movl 28-4(%edx,%ecx,4), %esi C prefetch cache, dst[size-1]
52: nop
53:
54: L(top):
55: C eax src, decrementing
56: C ebx
57: C ecx counter, limbs
58: C edx dst
59: C esi scratch
60: C edi scratch
61: C ebp
62:
63: movl 28-32(%edx,%ecx,4), %esi C prefetch dst cache line
64: subl $8, %ecx
65:
66: movl (%eax), %esi C read words pairwise
67: movl -4(%eax), %edi
68: movl %esi, 56(%edx,%ecx,4) C store words pairwise
69: movl %edi, 52(%edx,%ecx,4)
70:
71: movl -8(%eax), %esi
72: movl -12(%eax), %edi
73: movl %esi, 48(%edx,%ecx,4)
74: movl %edi, 44(%edx,%ecx,4)
75:
76: movl -16(%eax), %esi
77: movl -20(%eax), %edi
78: movl %esi, 40(%edx,%ecx,4)
79: movl %edi, 36(%edx,%ecx,4)
80:
81: movl -24(%eax), %esi
82: movl -28(%eax), %edi
83: movl %esi, 32(%edx,%ecx,4)
84: movl %edi, 28(%edx,%ecx,4)
85:
86: leal -32(%eax), %eax
87: jg L(top)
88:
89:
90: L(end):
91: C ecx -7 to 0, representing respectively 0 to 7 limbs remaining
92: C eax src end
93: C edx dst, next location to store
94:
95: addl $4, %ecx
96: jle L(no4)
97:
98: movl (%eax), %esi
99: movl -4(%eax), %edi
100: movl %esi, 8(%edx,%ecx,4)
101: movl %edi, 4(%edx,%ecx,4)
102:
103: movl -8(%eax), %esi
104: movl -12(%eax), %edi
105: movl %esi, (%edx,%ecx,4)
106: movl %edi, -4(%edx,%ecx,4)
107:
108: subl $16, %eax
109: subl $4, %ecx
110: L(no4):
111:
112: addl $2, %ecx
113: jle L(no2)
114:
115: movl (%eax), %esi
116: movl -4(%eax), %edi
117: movl %esi, (%edx,%ecx,4)
118: movl %edi, -4(%edx,%ecx,4)
119:
120: subl $8, %eax
121: subl $2, %ecx
122: L(no2):
123:
124: jnz L(done)
125:
126: movl (%eax), %ecx
127: movl %ecx, (%edx) C risk of cache bank clash here
128:
129: L(done):
130: popl %edi
131: popl %esi
132:
133: ret
134:
135: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>