Annotation of OpenXM_contrib/gmp/mpn/x86/k6/gcd_finda.asm, Revision 1.1
1.1 ! ohara 1: dnl AMD K6 mpn_gcd_finda.
! 2:
! 3: dnl Copyright 2000, 2002 Free Software Foundation, Inc.
! 4: dnl
! 5: dnl This file is part of the GNU MP Library.
! 6: dnl
! 7: dnl The GNU MP Library is free software; you can redistribute it and/or
! 8: dnl modify it under the terms of the GNU Lesser General Public License as
! 9: dnl published by the Free Software Foundation; either version 2.1 of the
! 10: dnl License, or (at your option) any later version.
! 11: dnl
! 12: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 13: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 14: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 15: dnl Lesser General Public License for more details.
! 16: dnl
! 17: dnl You should have received a copy of the GNU Lesser General Public
! 18: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 19: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 20: dnl Suite 330, Boston, MA 02111-1307, USA.
! 21:
! 22: include(`../config.m4')
! 23:
! 24:
! 25: C K6: 680 cycles (approx) on average
! 26:
! 27:
! 28: dnl How many trial subtractions to attempt before launching into a full
! 29: dnl division.
! 30:
! 31: deflit(TRIAL_SUBS, 8)
! 32:
! 33:
! 34: C mp_limb_t mpn_gcd_finda (const mp_limb_t cp[2]);
! 35: C
! 36: C This code is probably not optimal, but it's already a good improvement
! 37: C over the generic C.
! 38: C
! 39:
! 40: defframe(PARAM_CP, 4)
! 41:
! 42: defframe(SAVE_EBX, -4)
! 43: defframe(SAVE_ESI, -8)
! 44: defframe(SAVE_EDI, -12)
! 45: defframe(SAVE_EBP, -16)
! 46:
! 47: defframe(VAR_N2H, -20)
! 48: defframe(VAR_N2L, -24)
! 49: defframe(VAR_Q, -28)
! 50: defframe(VAR_N2L_NORM, -32)
! 51:
! 52: deflit(STACK_SPACE, 32)
! 53:
! 54: TEXT
! 55: ALIGN(32)
! 56:
! 57: PROLOGUE(mpn_gcd_finda)
! 58: deflit(`FRAME',0)
! 59:
! 60: movl PARAM_CP, %eax
! 61: subl $STACK_SPACE, %esp
! 62: deflit(`FRAME',STACK_SPACE)
! 63:
! 64: movl %ebx, SAVE_EBX
! 65:
! 66: movl %esi, SAVE_ESI
! 67: movl (%eax), %ecx
! 68:
! 69: movl %edi, SAVE_EDI
! 70: movl 4(%eax), %edx
! 71:
! 72: movl %ebp, SAVE_EBP
! 73:
! 74: ASSERT(nz,`orl %ecx, %ecx')
! 75: ASSERT(nz,`orl %edx, %edx')
! 76:
! 77: movl %ecx, %eax
! 78: movl %edx, %ebx
! 79:
! 80: negl %eax
! 81: notl %ebx
! 82:
! 83: cmpl %ecx, %eax
! 84: movl %ebx, %esi
! 85:
! 86: sbbl %edx, %esi
! 87:
! 88: jbe L(top)
! 89:
! 90: movl %ecx, %eax
! 91: movl %edx, %ebx
! 92:
! 93: negl %ecx
! 94: notl %edx
! 95:
! 96: jmp L(top)
! 97:
! 98:
! 99: ALIGN(8)
! 100: L(restore):
! 101: C eax n2 l
! 102: C ebx n2 h
! 103: C ecx n1-n2 l
! 104: C edx n1-n2 h
! 105: C esi old n1 h
! 106: C edi
! 107: C ebp
! 108:
! 109: movl %ebx, %edx
! 110: movl %esi, %ebx
! 111:
! 112: movl %eax, %esi
! 113: addl %ecx, %eax
! 114:
! 115: movl %esi, %ecx
! 116:
! 117:
! 118: L(top):
! 119: C n1 >= n2
! 120: C
! 121: C eax n2 l
! 122: C ebx n2 h
! 123: C ecx n1 l
! 124: C edx n1 h
! 125: C esi
! 126: C edi
! 127: C ebp
! 128:
! 129: orl %ebx, %ebx
! 130: jz L(done)
! 131:
! 132: L(entry):
! 133: subl %eax, %ecx
! 134: sbbl %ebx, %edx
! 135: ASSERT(nc)
! 136:
! 137: forloop(i,1,TRIAL_SUBS,`
! 138: movl %edx, %esi
! 139: subl %eax, %ecx
! 140:
! 141: sbbl %ebx, %edx
! 142: jc L(restore)
! 143: ')
! 144:
! 145:
! 146: C n1 >= n2
! 147: C
! 148: C eax n2 l
! 149: C ebx n2 h
! 150: C ecx n1 l
! 151: C edx n1 h
! 152: C esi
! 153: C edi
! 154: C ebp
! 155:
! 156: movl %eax, VAR_N2L
! 157: movl %ecx, %esi C n1l
! 158:
! 159: bsrl %ebx, %ecx
! 160:
! 161: movl %ebx, VAR_N2H
! 162: notl %ecx C n2h leading zeros (low 5 bits)
! 163:
! 164: shldl( %cl, %eax, %ebx) C n2h normalized
! 165:
! 166: shll %cl, %eax C n2l normalized
! 167: movl %edx, %edi C n1h
! 168:
! 169: movl %eax, VAR_N2L_NORM
! 170: xorl %ebp, %ebp
! 171:
! 172: shldl( %cl, %edi, %ebp) C n1h shifted
! 173: shldl( %cl, %esi, %edi) C n1m shifted
! 174:
! 175: shll %cl, %esi C n1l shifted
! 176: movl %ebp, %edx
! 177:
! 178: movl %edi, %eax
! 179:
! 180: divl %ebx C n1h:n1m / n2h
! 181:
! 182: movl %edx, %edi C n1h:n1m:n1l - q*n2h
! 183: movl VAR_N2L_NORM, %edx
! 184:
! 185: mull %edx C q*n2l
! 186:
! 187: subl %eax, %esi
! 188: movl VAR_N2L_NORM, %ebp
! 189:
! 190: sbbl %edx, %edi C n1h:n1m:n1l - q*(n2h:n2l)
! 191:
! 192: jnc L(div_done)
! 193: addl %ebp, %esi
! 194:
! 195: adcl %ebx, %edi C addback n2h:n2l
! 196:
! 197: jc L(div_done)
! 198: addl %ebp, %esi
! 199:
! 200: adcl %ebx, %edi C further addback n2h:n2l
! 201: ASSERT(c)
! 202:
! 203: L(div_done):
! 204: shrdl( %cl, %edi, %esi)
! 205:
! 206: shrl %cl, %edi C unshift n1m:n1l remainder
! 207: movl %esi, %eax
! 208:
! 209: movl VAR_N2L, %ecx
! 210: movl %edi, %ebx
! 211:
! 212: movl VAR_N2H, %edx
! 213: orl %ebx, %ebx
! 214:
! 215: jnz L(entry)
! 216:
! 217:
! 218: L(done):
! 219: movl SAVE_EBX, %ebx
! 220: movl SAVE_ESI, %esi
! 221: movl SAVE_EDI, %edi
! 222: movl SAVE_EBP, %ebp
! 223: addl $STACK_SPACE, %esp
! 224: ret
! 225:
! 226: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>