[BACK]Return to gcd_finda.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / k6

Annotation of OpenXM_contrib/gmp/mpn/x86/k6/gcd_finda.asm, Revision 1.1

1.1     ! ohara       1: dnl  AMD K6 mpn_gcd_finda.
        !             2:
        !             3: dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
        !             4: dnl
        !             5: dnl  This file is part of the GNU MP Library.
        !             6: dnl
        !             7: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !             8: dnl  modify it under the terms of the GNU Lesser General Public License as
        !             9: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            10: dnl  License, or (at your option) any later version.
        !            11: dnl
        !            12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            15: dnl  Lesser General Public License for more details.
        !            16: dnl
        !            17: dnl  You should have received a copy of the GNU Lesser General Public
        !            18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            20: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            21:
        !            22: include(`../config.m4')
        !            23:
        !            24:
        !            25: C K6: 680 cycles (approx) on average
        !            26:
        !            27:
        !            28: dnl  How many trial subtractions to attempt before launching into a full
        !            29: dnl  division.
        !            30:
        !            31: deflit(TRIAL_SUBS, 8)
        !            32:
        !            33:
        !            34: C mp_limb_t mpn_gcd_finda (const mp_limb_t cp[2]);
        !            35: C
        !            36: C This code is probably not optimal, but it's already a good improvement
        !            37: C over the generic C.
        !            38: C
        !            39:
        !            40: defframe(PARAM_CP, 4)
        !            41:
        !            42: defframe(SAVE_EBX,      -4)
        !            43: defframe(SAVE_ESI,      -8)
        !            44: defframe(SAVE_EDI,     -12)
        !            45: defframe(SAVE_EBP,     -16)
        !            46:
        !            47: defframe(VAR_N2H,      -20)
        !            48: defframe(VAR_N2L,      -24)
        !            49: defframe(VAR_Q,        -28)
        !            50: defframe(VAR_N2L_NORM, -32)
        !            51:
        !            52: deflit(STACK_SPACE, 32)
        !            53:
        !            54:        TEXT
        !            55:        ALIGN(32)
        !            56:
        !            57: PROLOGUE(mpn_gcd_finda)
        !            58: deflit(`FRAME',0)
        !            59:
        !            60:        movl    PARAM_CP, %eax
        !            61:        subl    $STACK_SPACE, %esp
        !            62: deflit(`FRAME',STACK_SPACE)
        !            63:
        !            64:        movl    %ebx, SAVE_EBX
        !            65:
        !            66:        movl    %esi, SAVE_ESI
        !            67:        movl    (%eax), %ecx
        !            68:
        !            69:        movl    %edi, SAVE_EDI
        !            70:        movl    4(%eax), %edx
        !            71:
        !            72:        movl    %ebp, SAVE_EBP
        !            73:
        !            74:        ASSERT(nz,`orl %ecx, %ecx')
        !            75:        ASSERT(nz,`orl %edx, %edx')
        !            76:
        !            77:        movl    %ecx, %eax
        !            78:        movl    %edx, %ebx
        !            79:
        !            80:        negl    %eax
        !            81:        notl    %ebx
        !            82:
        !            83:        cmpl    %ecx, %eax
        !            84:        movl    %ebx, %esi
        !            85:
        !            86:        sbbl    %edx, %esi
        !            87:
        !            88:        jbe     L(top)
        !            89:
        !            90:        movl    %ecx, %eax
        !            91:        movl    %edx, %ebx
        !            92:
        !            93:        negl    %ecx
        !            94:        notl    %edx
        !            95:
        !            96:        jmp     L(top)
        !            97:
        !            98:
        !            99:        ALIGN(8)
        !           100: L(restore):
        !           101:        C eax   n2 l
        !           102:        C ebx   n2 h
        !           103:        C ecx   n1-n2 l
        !           104:        C edx   n1-n2 h
        !           105:        C esi   old n1 h
        !           106:        C edi
        !           107:        C ebp
        !           108:
        !           109:        movl    %ebx, %edx
        !           110:        movl    %esi, %ebx
        !           111:
        !           112:        movl    %eax, %esi
        !           113:        addl    %ecx, %eax
        !           114:
        !           115:        movl    %esi, %ecx
        !           116:
        !           117:
        !           118: L(top):
        !           119:        C n1 >= n2
        !           120:        C
        !           121:        C eax   n2 l
        !           122:        C ebx   n2 h
        !           123:        C ecx   n1 l
        !           124:        C edx   n1 h
        !           125:        C esi
        !           126:        C edi
        !           127:        C ebp
        !           128:
        !           129:        orl     %ebx, %ebx
        !           130:        jz      L(done)
        !           131:
        !           132: L(entry):
        !           133:        subl    %eax, %ecx
        !           134:        sbbl    %ebx, %edx
        !           135:        ASSERT(nc)
        !           136:
        !           137: forloop(i,1,TRIAL_SUBS,`
        !           138:        movl    %edx, %esi
        !           139:        subl    %eax, %ecx
        !           140:
        !           141:        sbbl    %ebx, %edx
        !           142:        jc      L(restore)
        !           143: ')
        !           144:
        !           145:
        !           146:        C n1 >= n2
        !           147:        C
        !           148:        C eax   n2 l
        !           149:        C ebx   n2 h
        !           150:        C ecx   n1 l
        !           151:        C edx   n1 h
        !           152:        C esi
        !           153:        C edi
        !           154:        C ebp
        !           155:
        !           156:        movl    %eax, VAR_N2L
        !           157:        movl    %ecx, %esi              C n1l
        !           158:
        !           159:        bsrl    %ebx, %ecx
        !           160:
        !           161:        movl    %ebx, VAR_N2H
        !           162:        notl    %ecx                    C n2h leading zeros (low 5 bits)
        !           163:
        !           164:        shldl(  %cl, %eax, %ebx)        C n2h normalized
        !           165:
        !           166:        shll    %cl, %eax               C n2l normalized
        !           167:        movl    %edx, %edi              C n1h
        !           168:
        !           169:        movl    %eax, VAR_N2L_NORM
        !           170:        xorl    %ebp, %ebp
        !           171:
        !           172:        shldl(  %cl, %edi, %ebp)        C n1h shifted
        !           173:        shldl(  %cl, %esi, %edi)        C n1m shifted
        !           174:
        !           175:        shll    %cl, %esi               C n1l shifted
        !           176:        movl    %ebp, %edx
        !           177:
        !           178:        movl    %edi, %eax
        !           179:
        !           180:        divl    %ebx                    C n1h:n1m / n2h
        !           181:
        !           182:        movl    %edx, %edi              C n1h:n1m:n1l - q*n2h
        !           183:        movl    VAR_N2L_NORM, %edx
        !           184:
        !           185:        mull    %edx                    C q*n2l
        !           186:
        !           187:        subl    %eax, %esi
        !           188:        movl    VAR_N2L_NORM, %ebp
        !           189:
        !           190:        sbbl    %edx, %edi              C n1h:n1m:n1l - q*(n2h:n2l)
        !           191:
        !           192:        jnc     L(div_done)
        !           193:        addl    %ebp, %esi
        !           194:
        !           195:        adcl    %ebx, %edi              C addback n2h:n2l
        !           196:
        !           197:        jc      L(div_done)
        !           198:        addl    %ebp, %esi
        !           199:
        !           200:        adcl    %ebx, %edi              C further addback n2h:n2l
        !           201:        ASSERT(c)
        !           202:
        !           203: L(div_done):
        !           204:        shrdl(  %cl, %edi, %esi)
        !           205:
        !           206:        shrl    %cl, %edi               C unshift n1m:n1l remainder
        !           207:        movl    %esi, %eax
        !           208:
        !           209:        movl    VAR_N2L, %ecx
        !           210:        movl    %edi, %ebx
        !           211:
        !           212:        movl    VAR_N2H, %edx
        !           213:        orl     %ebx, %ebx
        !           214:
        !           215:        jnz     L(entry)
        !           216:
        !           217:
        !           218: L(done):
        !           219:        movl    SAVE_EBX, %ebx
        !           220:        movl    SAVE_ESI, %esi
        !           221:        movl    SAVE_EDI, %edi
        !           222:        movl    SAVE_EBP, %ebp
        !           223:        addl    $STACK_SPACE, %esp
        !           224:        ret
        !           225:
        !           226: EPILOGUE()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>