=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/x86/Attic/x86-defs.m4,v retrieving revision 1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/x86/Attic/x86-defs.m4 2000/09/09 14:12:42 1.1 +++ OpenXM_contrib/gmp/mpn/x86/Attic/x86-defs.m4 2003/08/25 16:06:27 1.1.1.2 @@ -3,7 +3,7 @@ divert(-1) dnl m4 macros for x86 assembler. -dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -38,121 +38,108 @@ dnl dnl define(foo, `mov $ 123, %eax') dnl dnl This is only a problem in macro definitions, not in ordinary text, -dnl nor in macro parameters like text passed to forloop() or ifdef(). +dnl and not in macro parameters like text passed to forloop() or ifdef(). deflit(BYTES_PER_MP_LIMB, 4) -dnl -------------------------------------------------------------------------- -dnl Replacement PROLOGUE/EPILOGUE with more sophisticated error checking. -dnl Nesting and overlapping not allowed. -dnl +dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We +dnl undefine PIC since we don't need to be position independent in this +dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc. +ifdef(`DLL_EXPORT',`undefine(`PIC')') -dnl Usage: PROLOGUE(functionname) -dnl -dnl Generate a function prologue. functionname gets GSYM_PREFIX added. -dnl Examples, -dnl -dnl PROLOGUE(mpn_add_n) -dnl PROLOGUE(somefun) -define(`PROLOGUE', -m4_assert_numargs(1) -m4_assert_defined(`PROLOGUE_cpu') -`ifdef(`PROLOGUE_current_function', -`m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1) -)')dnl -m4_file_seen()dnl -define(`PROLOGUE_current_function',`$1')dnl -PROLOGUE_cpu(GSYM_PREFIX`'$1)') - - -dnl Usage: EPILOGUE() +dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) dnl -dnl Notice the function name is passed to EPILOGUE_cpu(), letting it use $1 -dnl instead of the long PROLOGUE_current_function symbol. - -define(`EPILOGUE', -m4_assert_numargs(0) -m4_assert_defined(`EPILOGUE_cpu') -`ifdef(`PROLOGUE_current_function',, -`m4_error(`EPILOGUE'() with no `PROLOGUE'() -)')dnl -EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl -undefine(`PROLOGUE_current_function')') - -m4wrap_prepend( -`ifdef(`PROLOGUE_current_function', -`m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen -')')') - - -dnl Usage: PROLOGUE_assert_inside() +dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code, +dnl since different alignments are wanted in various circumstances. So for +dnl instance, dnl -dnl Use this unquoted on a line on its own at the start of a macro -dnl definition to add some code to check the macro is only used inside a -dnl PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is -dnl defined. +dnl TEXT +dnl ALIGN(16) +dnl PROLOGUE(mpn_add_n) +dnl ... +dnl EPILOGUE() -define(PROLOGUE_assert_inside, -m4_assert_numargs(0) -``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '') - -define(PROLOGUE_assert_inside_internal, +define(`PROLOGUE_cpu', m4_assert_numargs(1) -`ifdef(`PROLOGUE_current_function',, -`m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair -')')') + `GLOBL $1 + TYPE($1,`function') +$1: +ifelse(WANT_PROFILING,`no',,`call_mcount +')') -dnl Usage: L(labelname) -dnl LF(functionname,labelname) +dnl Usage: call_mcount dnl -dnl Generate a local label in the current or given function. For LF(), -dnl functionname gets GSYM_PREFIX added, the same as with PROLOGUE(). +dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of +dnl the assembler routines use %ebp this way, so it's done only for the +dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount +dnl gets the current function from (%esp) and the parent from 4(%ebp). dnl -dnl For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn), -dnl -dnl L(bar) => L__gmpn_add_n__bar -dnl LF(somefun,bar) => Lsomefun__bar -dnl -dnl The funtion name and label name get two underscores between them rather -dnl than one to guard against clashing with a separate external symbol that -dnl happened to be called functionname_labelname. (Though this would only -dnl happen if the local label prefix is is empty.) Underscores are used so -dnl the whole label will still be a valid C identifier and so can be easily -dnl used in gdb. +dnl For `prof' style profiling gcc generates mcount calls without setting +dnl up %ebp, and the same is done here. -dnl LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the -dnl L macro and making an infinite recursion. -define(LF, -m4_assert_numargs(2) -m4_assert_defined(`LSYM_PREFIX') -`defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2') +define(`call_mcount', +m4_assert_numargs(-1) +m4_assert_defined(`WANT_PROFILING') +m4_assert_defined(`MCOUNT_PIC_REG') +m4_assert_defined(`MCOUNT_NONPIC_REG') +m4_assert_defined(`MCOUNT_PIC_CALL') +m4_assert_defined(`MCOUNT_NONPIC_CALL') +`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,, +` DATA + ALIGN(4) +L(mcount_data_`'mcount_data_counter): + W32 0 + TEXT +')dnl +ifelse(WANT_PROFILING,`gprof', +` pushl %ebp + movl %esp, %ebp +')dnl +ifdef(`PIC', +` pushl %ebx + mcount_movl_GOT_ebx +ifelse(MCOUNT_PIC_REG,,, +` leal L(mcount_data_`'mcount_data_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG') +MCOUNT_PIC_CALL + popl %ebx +',`dnl non-PIC +ifelse(MCOUNT_NONPIC_REG,,, +` movl `$'L(mcount_data_`'mcount_data_counter), MCOUNT_NONPIC_REG +')dnl +MCOUNT_NONPIC_CALL +')dnl +ifelse(WANT_PROFILING,`gprof', +` popl %ebp +') +define(`mcount_data_counter',eval(mcount_data_counter+1))') -define(`L', -m4_assert_numargs(1) -PROLOGUE_assert_inside() -`LF(PROLOGUE_current_function,`$1')') +define(mcount_data_counter,1) +dnl Called: mcount_movl_GOT_ebx +dnl Label H is "here", the %eip obtained from the call. C is the called +dnl subroutine. J is the jump across that subroutine. A fetch and "ret" +dnl is always done so calls and returns are balanced for the benefit of the +dnl various x86s that have return stack branch prediction. +define(mcount_movl_GOT_ebx, +m4_assert_numargs(-1) +` call L(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter) +L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter): + jmp L(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter) +L(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter): + movl (%esp), %ebx + ret +L(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter): + addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter)], %ebx +define(`mcount_movl_GOT_ebx_counter',incr(mcount_movl_GOT_ebx_counter))') -dnl Called: PROLOGUE_cpu(gsym) -dnl EPILOGUE_cpu(gsym) +define(mcount_movl_GOT_ebx_counter,1) -define(PROLOGUE_cpu, -m4_assert_numargs(1) - `GLOBL $1 - TYPE($1,`function') -$1:') -define(EPILOGUE_cpu, -m4_assert_numargs(1) -` SIZE($1,.-$1)') - - - dnl -------------------------------------------------------------------------- dnl Various x86 macros. dnl @@ -196,8 +183,7 @@ dnl delete any following parenthesized expression. n dnl out say as 16(%esp)(%edi). This isn't valid assembler and should dnl provoke an error, which is better than silently giving just 16(%esp). dnl -dnl See README.family for more on the suggested way to access the stack -dnl frame. +dnl See README for more on the suggested way to access the stack frame. define(defframe, m4_assert_numargs(2) @@ -208,6 +194,7 @@ defframe_empty_if_zero(FRAME+($2))(%esp)')') dnl Called: defframe_empty_if_zero(expression) define(defframe_empty_if_zero, +m4_assert_numargs(1) `ifelse(defframe_empty_if_zero_disabled,1, `eval($1)', `m4_empty_if_zero($1)')') @@ -256,7 +243,7 @@ m4_assert_defined(`FRAME') dnl Usage: defframe_pushl(name) dnl -dnl Do a combination of a FRAME_pushl() and a defframe() to name the stack +dnl Do a combination FRAME_pushl() and a defframe() to name the stack dnl location just pushed. This should come after a pushl instruction. dnl Putting it on the same line works and avoids lengthening the code. For dnl example, @@ -267,6 +254,7 @@ dnl Notice the defframe() is done with an unquoted -F dnl current value without tracking future changes. define(defframe_pushl, +m4_assert_numargs(1) `FRAME_pushl()defframe(`$1',-FRAME)') @@ -292,9 +280,9 @@ dnl emms may as well be used. define(femms_available_p, m4_assert_numargs(-1) `m4_ifdef_anyof_p( - `HAVE_TARGET_CPU_k62', - `HAVE_TARGET_CPU_k63', - `HAVE_TARGET_CPU_athlon')') + `HAVE_HOST_CPU_k62', + `HAVE_HOST_CPU_k63', + `HAVE_HOST_CPU_athlon')') define(emms_or_femms, m4_assert_numargs(-1) @@ -303,7 +291,7 @@ m4_assert_numargs(-1) dnl Usage: femms dnl -dnl The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the +dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the dnl following is a replacement using .byte. dnl dnl If femms isn't available, an emms is generated instead, for convenience @@ -320,43 +308,47 @@ m4_warning(`warning, using emms in place of femms, use dnl Usage: jadcl0(op) dnl -dnl Issue a jnc/incl as a substitute for adcl $0,op. This isn't an exact -dnl replacement, since it doesn't set the flags like adcl does. +dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an +dnl exact replacement, since it doesn't set the flags like adcl does. dnl dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch dnl misprediction penalty is small, and the multiply algorithm used leads dnl to a carry bit on average only 1/4 of the time. dnl -dnl jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for -dnl comparison. For example, +dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl +dnl for comparison. For example, dnl dnl define(`jadcl0_disabled',1) dnl dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is dnl the same size as an adcl. This makes it possible to use the exact same -dnl computed jump code when testing the relative speed of jnc/incl and adcl -dnl with jadcl0_disabled. +dnl computed jump code when testing the relative speed of the two. define(jadcl0, m4_assert_numargs(1) `ifelse(jadcl0_disabled,1, `adcl $`'0, $1', - `jnc 1f + `jnc L(jadcl0_`'jadcl0_counter) incl $1 -1:dnl')') +L(jadcl0_`'jadcl0_counter): +define(`jadcl0_counter',incr(jadcl0_counter))')') +define(jadcl0_counter,1) + dnl Usage: cmov_available_p dnl dnl Expand to 1 if cmov is available, 0 if not. define(cmov_available_p, +m4_assert_numargs(-1) `m4_ifdef_anyof_p( - `HAVE_TARGET_CPU_pentiumpro', - `HAVE_TARGET_CPU_pentium2', - `HAVE_TARGET_CPU_pentium3', - `HAVE_TARGET_CPU_athlon')') + `HAVE_HOST_CPU_pentiumpro', + `HAVE_HOST_CPU_pentium2', + `HAVE_HOST_CPU_pentium3', + `HAVE_HOST_CPU_pentium4', + `HAVE_HOST_CPU_athlon')') dnl Usage: x86_lookup(target, key,value, key,value, ...) @@ -370,6 +362,7 @@ dnl dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not. define(x86_lookup, +m4_assert_numargs_range(1,999) `ifelse(eval($#<3),1, `m4_error(`unrecognised part of x86 instruction: $1 ')', @@ -377,6 +370,7 @@ define(x86_lookup, `x86_lookup(`$1',shift(shift(shift($@))))')')') define(x86_lookup_p, +m4_assert_numargs_range(1,999) `ifelse(eval($#<3),1, `0', `ifelse(`$1',`$2', `1', `x86_lookup_p(`$1',shift(shift(shift($@))))')')') @@ -440,7 +434,7 @@ define(x86_opcode_tttn_list, dnl Usage: cmovCC(srcreg,dstreg) dnl -dnl Generate a cmov instruction if the target supports cmov, or simulate it +dnl Generate a cmov instruction if the host supports cmov, or simulate it dnl with a conditional jump if not (the latter being meant only for dnl testing). For example, dnl @@ -489,11 +483,15 @@ dnl need to be changed to do a fetch even if the cond dnl to trigger exceptions the same way a real cmov does. define(cmov_simulate, m4_assert_numargs(3) - `j$1 1f C cmov$1 $2, $3 - jmp 2f -1: movl $2, $3 -2:') +`j$1 L(cmov_T`'cmov_counter) C cmov$1 $2, $3 + jmp L(cmov_F`'cmov_counter) +L(cmov_T`'cmov_counter): + movl $2, $3 +L(cmov_F`'cmov_counter): +define(`cmov_counter',incr(cmov_counter))') +define(cmov_counter,1) + dnl Called: cmov_bytes_tttn(name,tttn,src,dst) define(cmov_bytes_tttn, m4_assert_numargs(4) @@ -504,6 +502,78 @@ eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3 C `$1 $3, $4'') +dnl Usage: x86_opcode_regmmx(reg) +dnl +dnl Validate the given mmx register, and return its number, 0 to 7. + +define(x86_opcode_regmmx, +m4_assert_numargs(1) +`x86_lookup(`$1',x86_opcode_regmmx_list)') + +define(x86_opcode_regmmx_list, +``%mm0',0, +`%mm1',1, +`%mm2',2, +`%mm3',3, +`%mm4',4, +`%mm5',5, +`%mm6',6, +`%mm7',7') + + +dnl Usage: psadbw(src,dst) +dnl +dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on +dnl FreeBSD 3.3 and 3.4 doesn't recognise it, so instead emit .byte +dnl sequences. +dnl +dnl Only register->register forms are supported here, which suffices for +dnl the current code. + +define(psadbw, +m4_instruction_wrapper() +m4_assert_numargs(2) +`ifelse(psadbw_available_p,1, +`psadbw_bytes(`$1',`$2')', +`psadbw_simulate(`$1',`$2')')') + +define(psadbw_available_p, +m4_assert_numargs(-1) +`m4_ifdef_anyof_p(`HAVE_HOST_CPU_pentium3', + `HAVE_HOST_CPU_pentium4', + `HAVE_HOST_CPU_athlon')') + +dnl Called: psadbw_bytes(src,dst) +define(psadbw_bytes, +m4_assert_numargs(2) +`.byte 0x0f,0xf6,dnl +eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl + C `psadbw $1, $2'') + +dnl Called: psadbw_simulate(src,dst) +define(psadbw_simulate, +m4_assert_numargs(2) +`m4_warning(`warning, using simulated and only partly functional psadbw, use testing only +') C This works enough for the sum of bytes done in some of the popcounts, + C but is otherwise a long way short of correct. + pushl %eax + pushl %edx + pushf + subl $`'8, %esp + movq $2, (%esp) + movzbl (%esp), %eax +forloop(i,1,7, +` movzbl i`'(%esp), %edx + addl %edx, %eax +') + movd %eax, $2 + addl $`'8, %esp + popf + popl %edx + popl %eax +') + + dnl Usage: loop_or_decljnz label dnl dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever @@ -515,16 +585,18 @@ dnl this loop_or_decljnz variation is enough to let t dnl all chips. define(loop_or_decljnz, +m4_assert_numargs(-1) `ifelse(loop_is_better_p,1, `loop', `decl %ecx jnz')') define(loop_is_better_p, -`m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6', - `HAVE_TARGET_CPU_k62', - `HAVE_TARGET_CPU_k63', - `HAVE_TARGET_CPU_i386')') +m4_assert_numargs(-1) +`m4_ifdef_anyof_p(`HAVE_HOST_CPU_k6', + `HAVE_HOST_CPU_k62', + `HAVE_HOST_CPU_k63', + `HAVE_HOST_CPU_i386')') dnl Usage: Zdisp(inst,op,op,op) @@ -553,41 +625,67 @@ dnl displacement argument is eval()ed. dnl dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is dnl implemented with a table of instructions and encodings. A new entry is -dnl needed for any different operation or registers. +dnl needed for any different operation or registers. The table is split +dnl into separate macros to avoid overflowing BSD m4 macro expansion space. define(Zdisp, +m4_assert_numargs(4) `define(`Zdisp_found',0)dnl -Zdisp_match( movl, %eax, 0,(%edi), `137,71,0', $@)`'dnl -Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0', $@)`'dnl -Zdisp_match( movl, %esi, 0,(%edi), `137,119,0', $@)`'dnl -Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0', $@)`'dnl -Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0', $@)`'dnl -Zdisp_match( movl, 0,(%esi), %eax, `139,70,0', $@)`'dnl +Zdisp_1($@)dnl +Zdisp_2($@)dnl +Zdisp_3($@)dnl +Zdisp_4($@)dnl +ifelse(Zdisp_found,0, +`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 +')')') + +define(Zdisp_1,`dnl +Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl +Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl +Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl +Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl +Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl +Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl +Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl +Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl +Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl +Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl +Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl +') +define(Zdisp_2,`dnl +Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl +Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl +Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl +Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl +Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl +Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl +Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl -Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0', $@)`'dnl -Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0', $@)`'dnl -Zdisp_match( addl, %esi, 0,(%edi), `1,119,0', $@)`'dnl -Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0', $@)`'dnl -Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0', $@)`'dnl -Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0', $@)`'dnl +') +define(Zdisp_3,`dnl Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl -Zdisp_match( movq, 0,(%esi), %mm0, `15,111,70,0', $@)`'dnl -Zdisp_match( movq, %mm0, 0,(%edi), `15,127,71,0', $@)`'dnl +Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl +Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl +Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl +Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl +') +define(Zdisp_4,`dnl +Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl -ifelse(Zdisp_found,0, -`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 -')')') +Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl +') define(Zdisp_match, +m4_assert_numargs(9) `ifelse(eval(m4_stringequal_p(`$1',`$6') && m4_stringequal_p(`$2',0) && m4_stringequal_p(`$3',`$8') @@ -636,6 +734,7 @@ dnl just a plain "shldl ...", an error results. This dnl variant treatment of %cl isn't accidentally bypassed. define(define_shd_instruction, +m4_assert_numargs(1) `define($1, m4_instruction_wrapper() m4_assert_numargs(3) @@ -657,7 +756,7 @@ m4_assert_defined(`WANT_SHLDL_CL') ``$1' `$2', `$3', `$4'')') -dnl Usage: ASSERT(cond, instructions) +dnl Usage: ASSERT([cond][,instructions]) dnl dnl If WANT_ASSERT is 1, output the given instructions and expect the given dnl flags condition to then be satisfied. For example, @@ -673,20 +772,30 @@ dnl When `instructions' is not empty, a pushf/popf is dnl flags, but the instructions themselves must preserve any registers that dnl matter. FRAME is adjusted for the push and pop, so the instructions dnl given can use defframe() stack variables. +dnl +dnl The condition can be omitted to just output the given instructions when +dnl assertion checking is wanted. In this case the pushf/popf is omitted. +dnl For example, +dnl +dnl ASSERT(, `movl %eax, VAR_KEEPVAL') define(ASSERT, m4_assert_numargs_range(1,2) `ifelse(WANT_ASSERT,1, +`ifelse(`$1',, + `$2', `C ASSERT ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')') $2 - j`$1' 1f + j`$1' L(ASSERT_ok`'ASSERT_counter) ud2 C assertion failed -1: +L(ASSERT_ok`'ASSERT_counter): ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')') -')') +define(`ASSERT_counter',incr(ASSERT_counter))')')') +define(ASSERT_counter,1) + dnl Usage: movl_text_address(label,register) dnl dnl Get the address of a text segment label, using either a plain movl or a @@ -698,16 +807,33 @@ dnl This macro is only meant for use in ASSERT()s or dnl the PIC sequence it generates will want to be done with a ret balancing dnl the call on CPUs with return address branch predition. dnl -dnl The addl generated here has a backward reference to 1b, and so won't -dnl suffer from the two forwards references bug in old gas (described in -dnl mpn/x86/README.family). +dnl The addl generated here has a backward reference to the label, and so +dnl won't suffer from the two forwards references bug in old gas (described +dnl in mpn/x86/README). define(movl_text_address, +m4_assert_numargs(2) `ifdef(`PIC', - `call 1f -1: popl $2 C %eip - addl `$'$1-1b, $2', + `call L(movl_text_address_`'movl_text_address_counter) +L(movl_text_address_`'movl_text_address_counter): + popl $2 C %eip + addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2 +define(`movl_text_address_counter',incr(movl_text_address_counter))', `movl `$'$1, $2')') + +define(movl_text_address_counter,1) + + +dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg) +dnl +dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as +dnl appropriate for nails in use or not. + +define(notl_or_xorl_GMP_NUMB_MASK, +m4_assert_numargs(1) +`ifelse(GMP_NAIL_BITS,0, +`notl `$1'', +`xorl $GMP_NUMB_MASK, `$1'')') divert`'dnl