Annotation of OpenXM_contrib/gmp/mpn/x86/addsub_n.S, Revision 1.1
1.1 ! maekawa 1: /* Currently not working and not used. */
! 2:
! 3: /*
! 4: Copyright (C) 1999 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the GNU MP Library.
! 7:
! 8: The GNU MP Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The GNU MP Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA.
! 22: */
! 23:
! 24:
! 25: #define SAVE_BORROW_RESTORE_CARRY(r) adcl r,r; shll $31,r
! 26: #define SAVE_CARRY_RESTORE_BORROW(r) adcl r,r
! 27:
! 28: .globl mpn_addsub_n_0
! 29: .globl mpn_addsub_n_1
! 30:
! 31: /* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s2,r2==s1.
! 32: We let subtraction and addition alternate in being two limbs
! 33: ahead of the other, thereby avoiding some SAVE_RESTORE. */
! 34: // r1 = r2 + r1 edi = esi + edi
! 35: // r2 = r2 - r1 esi = esi - edi
! 36: // s1 s2
! 37: // r2 r1
! 38: // eax,ebx,ecx,edx,esi,edi,ebp
! 39: mpn_addsub_n_0:
! 40: pushl %edi
! 41: pushl %esi
! 42: pushl %ebx
! 43: pushl %ebp
! 44:
! 45: movl 20(%esp),%edi /* res_ptr */
! 46: movl 24(%esp),%esi /* s1_ptr */
! 47: movl 36(%esp),%ebp /* size */
! 48:
! 49: shrl $2,%ebp
! 50: xorl %edx,%edx
! 51: .align 4
! 52: Loop0: // L=load E=execute S=store
! 53: movl (%esi),%ebx // sub 0 L
! 54: movl 4(%esi),%ecx // sub 1 L
! 55: sbbl (%edi),%ebx // sub 0 LE
! 56: sbbl 4(%edi),%ecx // sub 1 LE
! 57: // SAVE_BORROW_RESTORE_CARRY(%edx)
! 58: movl (%esi),%eax // add 0 L
! 59: adcl %eax,(%edi) // add 0 LES
! 60: movl 4(%esi),%eax // add 1 L
! 61: adcl %eax,4(%edi) // add 1 LES
! 62: movl %ebx,(%esi) // sub 0 S
! 63: movl %ecx,4(%esi) // sub 1 S
! 64: movl 8(%esi),%ebx // add 2 L
! 65: adcl 8(%edi),%ebx // add 2 LE
! 66: movl 12(%esi),%ecx // add 3 L
! 67: adcl 12(%edi),%ecx // add 3 LE
! 68: // SAVE_CARRY_RESTORE_BORROW(%edx)
! 69: movl 8(%edi),%eax // sub 2 L
! 70: sbbl %eax,8(%esi) // sub 2 LES
! 71: movl 12(%edi),%eax // sub 3 L
! 72: sbbl %eax,12(%esi) // sub 3 LES
! 73: movl %ebx,8(%edi) // add 2 S
! 74: movl %ecx,12(%edi) // add 3 S
! 75: leal 16(%esi),%esi
! 76: leal 16(%edi),%edi
! 77: decl %ebp
! 78: jnz Loop0
! 79:
! 80: popl %ebp
! 81: popl %ebx
! 82: popl %esi
! 83: popl %edi
! 84: ret
! 85:
! 86: /* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s1,r2==s2.
! 87: We let subtraction and addition alternate in being two limbs
! 88: ahead of the other, thereby avoiding some SAVE_RESTORE. */
! 89: // r1 = r1 + r2 edi = edi + esi
! 90: // r2 = r1 - r2 esi = edi - esi
! 91: // s2 s1
! 92: // r2 r1
! 93: // eax,ebx,ecx,edx,esi,edi,ebp
! 94: mpn_addsub_n_1:
! 95: pushl %edi
! 96: pushl %esi
! 97: pushl %ebx
! 98: pushl %ebp
! 99:
! 100: movl 20(%esp),%edi /* res_ptr */
! 101: movl 24(%esp),%esi /* s1_ptr */
! 102: movl 36(%esp),%ebp /* size */
! 103:
! 104: shrl $2,%ebp
! 105: xorl %edx,%edx
! 106: .align 4
! 107: Loop1: // L=load E=execute S=store
! 108: movl (%edi),%ebx // sub 0 L
! 109: sbbl (%esi),%ebx // sub 0 LE
! 110: movl 4(%edi),%ecx // sub 1 L
! 111: sbbl 4(%esi),%ecx // sub 1 LE
! 112: // SAVE_BORROW_RESTORE_CARRY(%edx)
! 113: movl (%esi),%eax // add 0 L
! 114: adcl %eax,(%edi) // add 0 LES
! 115: movl 4(%esi),%eax // add 1 L
! 116: adcl %eax,4(%edi) // add 1 LES
! 117: movl %ebx,(%esi) // sub 0 S
! 118: movl %ecx,4(%esi) // sub 1 S
! 119: movl 8(%esi),%ebx // add 2 L
! 120: adcl 8(%edi),%ebx // add 2 LE
! 121: movl 12(%esi),%ecx // add 3 L
! 122: adcl 12(%edi),%ecx // add 3 LE
! 123: // SAVE_CARRY_RESTORE_BORROW(%edx)
! 124: movl 8(%edi),%eax // sub 2 L
! 125: sbbl 8(%esi),%eax // sub 2 LES
! 126: movl %eax,8(%esi) // sub 2 S
! 127: movl 12(%edi),%eax // sub 3 L
! 128: sbbl 12(%esi),%eax // sub 3 LE
! 129: movl %eax,12(%esi) // sub 3 S
! 130: movl %ebx,8(%edi) // add 2 S
! 131: movl %ecx,12(%edi) // add 3 S
! 132: leal 16(%esi),%esi
! 133: leal 16(%edi),%edi
! 134: decl %ebp
! 135: jnz Loop1
! 136:
! 137: popl %ebp
! 138: popl %ebx
! 139: popl %esi
! 140: popl %edi
! 141: ret
! 142:
! 143: .globl mpn_copy
! 144: mpn_copy:
! 145: pushl %edi
! 146: pushl %esi
! 147: pushl %ebx
! 148: pushl %ebp
! 149:
! 150: movl 20(%esp),%edi /* res_ptr */
! 151: movl 24(%esp),%esi /* s1_ptr */
! 152: movl 28(%esp),%ebp /* size */
! 153:
! 154: shrl $2,%ebp
! 155: .align 4
! 156: Loop2:
! 157: movl (%esi),%eax
! 158: movl 4(%esi),%ebx
! 159: movl %eax,(%edi)
! 160: movl %ebx,4(%edi)
! 161: movl 8(%esi),%eax
! 162: movl 12(%esi),%ebx
! 163: movl %eax,8(%edi)
! 164: movl %ebx,12(%edi)
! 165: leal 16(%esi),%esi
! 166: leal 16(%edi),%edi
! 167: decl %ebp
! 168: jnz Loop2
! 169:
! 170: popl %ebp
! 171: popl %ebx
! 172: popl %esi
! 173: popl %edi
! 174: ret
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>