[BACK]Return to logops_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / pentium

Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/logops_n.asm, Revision 1.1.1.1

1.1       ohara       1: dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
                      2:
                      3: dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
                      4: dnl
                      5: dnl  This file is part of the GNU MP Library.
                      6: dnl
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or
                      8: dnl  modify it under the terms of the GNU Lesser General Public License as
                      9: dnl  published by the Free Software Foundation; either version 2.1 of the
                     10: dnl  License, or (at your option) any later version.
                     11: dnl
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
                     13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     15: dnl  Lesser General Public License for more details.
                     16: dnl
                     17: dnl  You should have received a copy of the GNU Lesser General Public
                     18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
                     19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
                     20: dnl  Suite 330, Boston, MA 02111-1307, USA.
                     21:
                     22: include(`../config.m4')
                     23:
                     24:
                     25: C P5: 3.0 c/l  and, ior, xor
                     26: C     3.5 c/l  andn, iorn, nand, nior, xnor
                     27:
                     28:
                     29: define(M4_choose_op,
                     30: `ifdef(`OPERATION_$1',`
                     31: define(`M4_function', `mpn_$1')
                     32: define(`M4_want_pre', `$4')
                     33: define(`M4op',        `$3')
                     34: define(`M4_want_post',`$2')
                     35: ')')
                     36: define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
                     37: define(M4post,`ifelse(M4_want_post,yes,`$1')')
                     38:
                     39: M4_choose_op( and_n,     , andl,    )
                     40: M4_choose_op( andn_n,    , andl, yes)
                     41: M4_choose_op( nand_n, yes, andl,    )
                     42: M4_choose_op( ior_n,     ,  orl,    )
                     43: M4_choose_op( iorn_n,    ,  orl, yes)
                     44: M4_choose_op( nior_n, yes,  orl,    )
                     45: M4_choose_op( xor_n,     , xorl,    )
                     46: M4_choose_op( xnor_n, yes, xorl,    )
                     47:
                     48: ifdef(`M4_function',,
                     49: `m4_error(`Unrecognised or undefined OPERATION symbol
                     50: ')')
                     51:
                     52: MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
                     53:
                     54: NAILS_SUPPORT(0-31)
                     55:
                     56:
                     57: C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
                     58: C
                     59: C Nothing complicated here, just some care to avoid data cache bank clashes
                     60: C and AGIs.
                     61: C
                     62: C We're one register short of being able to do a simple 4 loads, 2 ops, 2
                     63: C stores.  Instead %ebp is juggled a bit and nops are introduced to keep the
                     64: C pairings as intended.  An in-place operation would free up a register, for
                     65: C an 0.5 c/l speedup, if that's worth bothering with.
                     66: C
                     67: C This code seems best for P55 too.  Data alignment is a big problem for MMX
                     68: C and the pairing restrictions on movq and integer instructions make life
                     69: C difficult.
                     70:
                     71: defframe(PARAM_SIZE,16)
                     72: defframe(PARAM_YP,  12)
                     73: defframe(PARAM_XP,   8)
                     74: defframe(PARAM_WP,   4)
                     75:
                     76:        TEXT
                     77:        ALIGN(8)
                     78:
                     79: PROLOGUE(M4_function)
                     80: deflit(`FRAME',0)
                     81:
                     82:        pushl   %ebx    FRAME_pushl()
                     83:        pushl   %esi    FRAME_pushl()
                     84:
                     85:        pushl   %edi    FRAME_pushl()
                     86:        pushl   %ebp    FRAME_pushl()
                     87:
                     88:        movl    PARAM_SIZE, %ecx
                     89:        movl    PARAM_XP, %ebx
                     90:
                     91:        movl    PARAM_YP, %esi
                     92:        movl    PARAM_WP, %edi
                     93:
                     94:        shrl    %ecx
                     95:        jnc     L(entry)
                     96:
                     97:        movl    (%ebx,%ecx,8), %eax     C risk of data cache bank clash here
                     98:        movl    (%esi,%ecx,8), %edx
                     99:
                    100: M4pre(`        notl_or_xorl_GMP_NUMB_MASK(%edx)')
                    101:
                    102:        M4op    %edx, %eax
                    103:
                    104: M4post(`xorl   $GMP_NUMB_MASK, %eax')
                    105:        orl     %ecx, %ecx
                    106:
                    107:        movl    %eax, (%edi,%ecx,8)
                    108:        jz      L(done)
                    109:
                    110:        jmp     L(entry)
                    111:
                    112:
                    113: L(top):
                    114:        C eax
                    115:        C ebx   xp
                    116:        C ecx   counter, limb pairs, decrementing
                    117:        C edx
                    118:        C esi   yp
                    119:        C edi   wp
                    120:        C ebp
                    121:
                    122:        M4op    %ebp, %edx
                    123:        nop
                    124:
                    125: M4post(`xorl   $GMP_NUMB_MASK, %eax')
                    126: M4post(`xorl   $GMP_NUMB_MASK, %edx')
                    127:
                    128:        movl    %eax, 4(%edi,%ecx,8)
                    129:        movl    %edx, (%edi,%ecx,8)
                    130:
                    131: L(entry):
                    132:        movl    -4(%ebx,%ecx,8), %ebp
                    133:        nop
                    134:
                    135:        movl    -4(%esi,%ecx,8), %eax
                    136:        movl    -8(%esi,%ecx,8), %edx
                    137:
                    138: M4pre(`        xorl    $GMP_NUMB_MASK, %eax')
                    139: M4pre(`        xorl    $GMP_NUMB_MASK, %edx')
                    140:
                    141:        M4op    %ebp, %eax
                    142:        movl    -8(%ebx,%ecx,8), %ebp
                    143:
                    144:        decl    %ecx
                    145:        jnz     L(top)
                    146:
                    147:
                    148:        M4op    %ebp, %edx
                    149:        nop
                    150:
                    151: M4post(`xorl   $GMP_NUMB_MASK, %eax')
                    152: M4post(`xorl   $GMP_NUMB_MASK, %edx')
                    153:
                    154:        movl    %eax, 4(%edi,%ecx,8)
                    155:        movl    %edx, (%edi,%ecx,8)
                    156:
                    157:
                    158: L(done):
                    159:        popl    %ebp
                    160:        popl    %edi
                    161:
                    162:        popl    %esi
                    163:        popl    %ebx
                    164:
                    165:        ret
                    166:
                    167: EPILOGUE()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>