[BACK]Return to mod_34lsub1.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / k7

Annotation of OpenXM_contrib/gmp/mpn/x86/k7/mod_34lsub1.asm, Revision 1.1

1.1     ! ohara       1: dnl  AMD K7 mpn_mod_32lsub1 -- remainder modulo 2^24-1.
        !             2:
        !             3: dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
        !             4: dnl
        !             5: dnl  This file is part of the GNU MP Library.
        !             6: dnl
        !             7: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !             8: dnl  modify it under the terms of the GNU Lesser General Public License as
        !             9: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            10: dnl  License, or (at your option) any later version.
        !            11: dnl
        !            12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            15: dnl  Lesser General Public License for more details.
        !            16: dnl
        !            17: dnl  You should have received a copy of the GNU Lesser General Public
        !            18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            20: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            21:
        !            22: include(`../config.m4')
        !            23:
        !            24:
        !            25: C K7: 1.0 cycles/limb
        !            26:
        !            27:
        !            28: C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
        !            29: C
        !            30: C The loop form below and the 64 byte code alignment seem necessary for the
        !            31: C claimed speed.  This is a bit strange, since normally k7 isn't very
        !            32: C sensitive to such things.  Perhaps there has to be 6 instructions in the
        !            33: C first 16 bytes for the BTB entry or something.
        !            34:
        !            35: defframe(PARAM_SIZE, 8)
        !            36: defframe(PARAM_SRC,  4)
        !            37:
        !            38: dnl  re-use parameter space
        !            39: define(SAVE_EDI, `PARAM_SIZE')
        !            40:
        !            41:        TEXT
        !            42:        ALIGN(64)
        !            43: PROLOGUE(mpn_mod_34lsub1)
        !            44: deflit(`FRAME',0)
        !            45:
        !            46:        movl    PARAM_SIZE, %ecx
        !            47:        movl    PARAM_SRC, %edx
        !            48:
        !            49:        subl    $2, %ecx
        !            50:        ja      L(three_or_more)
        !            51:
        !            52:        movl    (%edx), %eax
        !            53:        jb      L(one)
        !            54:
        !            55:        movl    4(%edx), %ecx
        !            56:        movl    %eax, %edx
        !            57:        shrl    $24, %eax               C src[0] low
        !            58:
        !            59:        andl    $0xFFFFFF, %edx         C src[0] high
        !            60:        addl    %edx, %eax
        !            61:        movl    %ecx, %edx
        !            62:
        !            63:        andl    $0xFFFF, %ecx
        !            64:        shrl    $16, %edx               C src[1] high
        !            65:        addl    %edx, %eax
        !            66:
        !            67:        shll    $8, %ecx                C src[1] low
        !            68:        addl    %ecx, %eax
        !            69:
        !            70: L(one):
        !            71:        ret
        !            72:
        !            73:
        !            74: L(three_or_more):
        !            75:        C eax
        !            76:        C ebx
        !            77:        C ecx   size-2
        !            78:        C edx   src
        !            79:        C esi
        !            80:        C edi
        !            81:        C ebp
        !            82:
        !            83:        pushl   %ebx    FRAME_pushl()
        !            84:        xorl    %eax, %eax
        !            85:        xorl    %ebx, %ebx
        !            86:
        !            87:        movl    %edi, SAVE_EDI
        !            88:        pushl   %esi    FRAME_pushl()
        !            89:        xorl    %esi, %esi              C and clear carry flag
        !            90:
        !            91:
        !            92:        C code offset 0x40 at this point
        !            93: L(top):
        !            94:        C eax   acc 0mod3
        !            95:        C ebx   acc 1mod3
        !            96:        C ecx   counter, limbs
        !            97:        C edx   src
        !            98:        C esi   acc 2mod3
        !            99:        C edi
        !           100:        C ebp
        !           101:
        !           102:        leal    24(%edx), %edx
        !           103:        leal    -2(%ecx), %ecx
        !           104:        adcl    -24(%edx), %eax
        !           105:        adcl    -20(%edx), %ebx
        !           106:        adcl    -16(%edx), %esi
        !           107:
        !           108:        decl    %ecx
        !           109:        jng     L(done_loop)
        !           110:
        !           111:        leal    -2(%ecx), %ecx
        !           112:        adcl    -12(%edx), %eax
        !           113:        adcl    -8(%edx), %ebx
        !           114:        adcl    -4(%edx), %esi
        !           115:
        !           116:        decl    %ecx
        !           117:        jg      L(top)
        !           118:
        !           119:
        !           120:        leal    12(%edx), %edx
        !           121:
        !           122:
        !           123: L(done_loop):
        !           124:        C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
        !           125:
        !           126:        incl    %ecx
        !           127:        movl    $0xFFFFFFFF, %edi
        !           128:        js      L(combine)
        !           129:
        !           130:        adcl    -12(%edx), %eax
        !           131:        decl    %ecx
        !           132:        movl    $0xFFFFFF00, %edi
        !           133:        js      L(combine)
        !           134:
        !           135:        adcl    -8(%edx), %ebx
        !           136:        movl    $0xFFFF0000, %edi
        !           137:
        !           138:
        !           139: L(combine):
        !           140:        C eax   acc 0mod3
        !           141:        C ebx   acc 1mod3
        !           142:        C ecx
        !           143:        C edx
        !           144:        C esi   acc 2mod3
        !           145:        C edi   mask
        !           146:        C ebp
        !           147:
        !           148:        sbbl    %ecx, %ecx              C carry
        !           149:        movl    %eax, %edx              C 0mod3
        !           150:        shrl    $24, %eax               C 0mod3 high
        !           151:
        !           152:        andl    %edi, %ecx              C carry masked
        !           153:        andl    $0x00FFFFFF, %edx       C 0mod3 low
        !           154:        movl    %ebx, %edi              C 1mod3
        !           155:
        !           156:        subl    %ecx, %eax              C apply carry
        !           157:        shrl    $16, %ebx               C 1mod3 high
        !           158:        andl    $0xFFFF, %edi
        !           159:
        !           160:        addl    %edx, %eax              C apply 0mod3 low
        !           161:        movl    %esi, %edx              C 2mod3
        !           162:        shll    $8, %edi                C 1mod3 low
        !           163:
        !           164:        addl    %ebx, %eax              C apply 1mod3 high
        !           165:        shrl    $8, %esi                C 2mod3 high
        !           166:        andl    $0xFF, %edx             C 2mod3 low
        !           167:
        !           168:        addl    %edi, %eax              C apply 1mod3 low
        !           169:        shll    $16, %edx               C 2mod3 low
        !           170:
        !           171:        addl    %esi, %eax              C apply 2mod3 high
        !           172:        popl    %esi    FRAME_popl()
        !           173:
        !           174:        movl    SAVE_EDI, %edi
        !           175:        addl    %edx, %eax              C apply 2mod3 low
        !           176:        popl    %ebx    FRAME_popl()
        !           177:
        !           178:        ret
        !           179:
        !           180: EPILOGUE()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>