[BACK]Return to logops_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / pentium

Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/logops_n.asm, Revision 1.1

1.1     ! ohara       1: dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
        !             2:
        !             3: dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
        !             4: dnl
        !             5: dnl  This file is part of the GNU MP Library.
        !             6: dnl
        !             7: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !             8: dnl  modify it under the terms of the GNU Lesser General Public License as
        !             9: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            10: dnl  License, or (at your option) any later version.
        !            11: dnl
        !            12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            15: dnl  Lesser General Public License for more details.
        !            16: dnl
        !            17: dnl  You should have received a copy of the GNU Lesser General Public
        !            18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            20: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            21:
        !            22: include(`../config.m4')
        !            23:
        !            24:
        !            25: C P5: 3.0 c/l  and, ior, xor
        !            26: C     3.5 c/l  andn, iorn, nand, nior, xnor
        !            27:
        !            28:
        !            29: define(M4_choose_op,
        !            30: `ifdef(`OPERATION_$1',`
        !            31: define(`M4_function', `mpn_$1')
        !            32: define(`M4_want_pre', `$4')
        !            33: define(`M4op',        `$3')
        !            34: define(`M4_want_post',`$2')
        !            35: ')')
        !            36: define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
        !            37: define(M4post,`ifelse(M4_want_post,yes,`$1')')
        !            38:
        !            39: M4_choose_op( and_n,     , andl,    )
        !            40: M4_choose_op( andn_n,    , andl, yes)
        !            41: M4_choose_op( nand_n, yes, andl,    )
        !            42: M4_choose_op( ior_n,     ,  orl,    )
        !            43: M4_choose_op( iorn_n,    ,  orl, yes)
        !            44: M4_choose_op( nior_n, yes,  orl,    )
        !            45: M4_choose_op( xor_n,     , xorl,    )
        !            46: M4_choose_op( xnor_n, yes, xorl,    )
        !            47:
        !            48: ifdef(`M4_function',,
        !            49: `m4_error(`Unrecognised or undefined OPERATION symbol
        !            50: ')')
        !            51:
        !            52: MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
        !            53:
        !            54: NAILS_SUPPORT(0-31)
        !            55:
        !            56:
        !            57: C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
        !            58: C
        !            59: C Nothing complicated here, just some care to avoid data cache bank clashes
        !            60: C and AGIs.
        !            61: C
        !            62: C We're one register short of being able to do a simple 4 loads, 2 ops, 2
        !            63: C stores.  Instead %ebp is juggled a bit and nops are introduced to keep the
        !            64: C pairings as intended.  An in-place operation would free up a register, for
        !            65: C an 0.5 c/l speedup, if that's worth bothering with.
        !            66: C
        !            67: C This code seems best for P55 too.  Data alignment is a big problem for MMX
        !            68: C and the pairing restrictions on movq and integer instructions make life
        !            69: C difficult.
        !            70:
        !            71: defframe(PARAM_SIZE,16)
        !            72: defframe(PARAM_YP,  12)
        !            73: defframe(PARAM_XP,   8)
        !            74: defframe(PARAM_WP,   4)
        !            75:
        !            76:        TEXT
        !            77:        ALIGN(8)
        !            78:
        !            79: PROLOGUE(M4_function)
        !            80: deflit(`FRAME',0)
        !            81:
        !            82:        pushl   %ebx    FRAME_pushl()
        !            83:        pushl   %esi    FRAME_pushl()
        !            84:
        !            85:        pushl   %edi    FRAME_pushl()
        !            86:        pushl   %ebp    FRAME_pushl()
        !            87:
        !            88:        movl    PARAM_SIZE, %ecx
        !            89:        movl    PARAM_XP, %ebx
        !            90:
        !            91:        movl    PARAM_YP, %esi
        !            92:        movl    PARAM_WP, %edi
        !            93:
        !            94:        shrl    %ecx
        !            95:        jnc     L(entry)
        !            96:
        !            97:        movl    (%ebx,%ecx,8), %eax     C risk of data cache bank clash here
        !            98:        movl    (%esi,%ecx,8), %edx
        !            99:
        !           100: M4pre(`        notl_or_xorl_GMP_NUMB_MASK(%edx)')
        !           101:
        !           102:        M4op    %edx, %eax
        !           103:
        !           104: M4post(`xorl   $GMP_NUMB_MASK, %eax')
        !           105:        orl     %ecx, %ecx
        !           106:
        !           107:        movl    %eax, (%edi,%ecx,8)
        !           108:        jz      L(done)
        !           109:
        !           110:        jmp     L(entry)
        !           111:
        !           112:
        !           113: L(top):
        !           114:        C eax
        !           115:        C ebx   xp
        !           116:        C ecx   counter, limb pairs, decrementing
        !           117:        C edx
        !           118:        C esi   yp
        !           119:        C edi   wp
        !           120:        C ebp
        !           121:
        !           122:        M4op    %ebp, %edx
        !           123:        nop
        !           124:
        !           125: M4post(`xorl   $GMP_NUMB_MASK, %eax')
        !           126: M4post(`xorl   $GMP_NUMB_MASK, %edx')
        !           127:
        !           128:        movl    %eax, 4(%edi,%ecx,8)
        !           129:        movl    %edx, (%edi,%ecx,8)
        !           130:
        !           131: L(entry):
        !           132:        movl    -4(%ebx,%ecx,8), %ebp
        !           133:        nop
        !           134:
        !           135:        movl    -4(%esi,%ecx,8), %eax
        !           136:        movl    -8(%esi,%ecx,8), %edx
        !           137:
        !           138: M4pre(`        xorl    $GMP_NUMB_MASK, %eax')
        !           139: M4pre(`        xorl    $GMP_NUMB_MASK, %edx')
        !           140:
        !           141:        M4op    %ebp, %eax
        !           142:        movl    -8(%ebx,%ecx,8), %ebp
        !           143:
        !           144:        decl    %ecx
        !           145:        jnz     L(top)
        !           146:
        !           147:
        !           148:        M4op    %ebp, %edx
        !           149:        nop
        !           150:
        !           151: M4post(`xorl   $GMP_NUMB_MASK, %eax')
        !           152: M4post(`xorl   $GMP_NUMB_MASK, %edx')
        !           153:
        !           154:        movl    %eax, 4(%edi,%ecx,8)
        !           155:        movl    %edx, (%edi,%ecx,8)
        !           156:
        !           157:
        !           158: L(done):
        !           159:        popl    %ebp
        !           160:        popl    %edi
        !           161:
        !           162:        popl    %esi
        !           163:        popl    %ebx
        !           164:
        !           165:        ret
        !           166:
        !           167: EPILOGUE()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>