version 1.1.1.1, 2000/09/09 14:12:42 |
version 1.1.1.2, 2003/08/25 16:06:27 |
|
|
dnl m4 macros for x86 assembler. |
dnl m4 macros for x86 assembler. |
|
|
|
|
dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
dnl |
dnl |
dnl This file is part of the GNU MP Library. |
dnl This file is part of the GNU MP Library. |
dnl |
dnl |
|
|
dnl define(foo, `mov $ 123, %eax') |
dnl define(foo, `mov $ 123, %eax') |
dnl |
dnl |
dnl This is only a problem in macro definitions, not in ordinary text, |
dnl This is only a problem in macro definitions, not in ordinary text, |
dnl nor in macro parameters like text passed to forloop() or ifdef(). |
dnl and not in macro parameters like text passed to forloop() or ifdef(). |
|
|
|
|
deflit(BYTES_PER_MP_LIMB, 4) |
deflit(BYTES_PER_MP_LIMB, 4) |
|
|
|
|
dnl -------------------------------------------------------------------------- |
dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We |
dnl Replacement PROLOGUE/EPILOGUE with more sophisticated error checking. |
dnl undefine PIC since we don't need to be position independent in this |
dnl Nesting and overlapping not allowed. |
dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc. |
dnl |
|
|
|
|
ifdef(`DLL_EXPORT',`undefine(`PIC')') |
|
|
dnl Usage: PROLOGUE(functionname) |
|
dnl |
|
dnl Generate a function prologue. functionname gets GSYM_PREFIX added. |
|
dnl Examples, |
|
dnl |
|
dnl PROLOGUE(mpn_add_n) |
|
dnl PROLOGUE(somefun) |
|
|
|
define(`PROLOGUE', |
dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) |
m4_assert_numargs(1) |
|
m4_assert_defined(`PROLOGUE_cpu') |
|
`ifdef(`PROLOGUE_current_function', |
|
`m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1) |
|
)')dnl |
|
m4_file_seen()dnl |
|
define(`PROLOGUE_current_function',`$1')dnl |
|
PROLOGUE_cpu(GSYM_PREFIX`'$1)') |
|
|
|
|
|
dnl Usage: EPILOGUE() |
|
dnl |
dnl |
dnl Notice the function name is passed to EPILOGUE_cpu(), letting it use $1 |
dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code, |
dnl instead of the long PROLOGUE_current_function symbol. |
dnl since different alignments are wanted in various circumstances. So for |
|
dnl instance, |
define(`EPILOGUE', |
|
m4_assert_numargs(0) |
|
m4_assert_defined(`EPILOGUE_cpu') |
|
`ifdef(`PROLOGUE_current_function',, |
|
`m4_error(`EPILOGUE'() with no `PROLOGUE'() |
|
)')dnl |
|
EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl |
|
undefine(`PROLOGUE_current_function')') |
|
|
|
m4wrap_prepend( |
|
`ifdef(`PROLOGUE_current_function', |
|
`m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen |
|
')')') |
|
|
|
|
|
dnl Usage: PROLOGUE_assert_inside() |
|
dnl |
dnl |
dnl Use this unquoted on a line on its own at the start of a macro |
dnl TEXT |
dnl definition to add some code to check the macro is only used inside a |
dnl ALIGN(16) |
dnl PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is |
dnl PROLOGUE(mpn_add_n) |
dnl defined. |
dnl ... |
|
dnl EPILOGUE() |
|
|
define(PROLOGUE_assert_inside, |
define(`PROLOGUE_cpu', |
m4_assert_numargs(0) |
|
``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '') |
|
|
|
define(PROLOGUE_assert_inside_internal, |
|
m4_assert_numargs(1) |
m4_assert_numargs(1) |
`ifdef(`PROLOGUE_current_function',, |
`GLOBL $1 |
`m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair |
TYPE($1,`function') |
')')') |
$1: |
|
ifelse(WANT_PROFILING,`no',,`call_mcount |
|
')') |
|
|
|
|
dnl Usage: L(labelname) |
dnl Usage: call_mcount |
dnl LF(functionname,labelname) |
|
dnl |
dnl |
dnl Generate a local label in the current or given function. For LF(), |
dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of |
dnl functionname gets GSYM_PREFIX added, the same as with PROLOGUE(). |
dnl the assembler routines use %ebp this way, so it's done only for the |
|
dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount |
|
dnl gets the current function from (%esp) and the parent from 4(%ebp). |
dnl |
dnl |
dnl For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn), |
dnl For `prof' style profiling gcc generates mcount calls without setting |
dnl |
dnl up %ebp, and the same is done here. |
dnl L(bar) => L__gmpn_add_n__bar |
|
dnl LF(somefun,bar) => Lsomefun__bar |
|
dnl |
|
dnl The funtion name and label name get two underscores between them rather |
|
dnl than one to guard against clashing with a separate external symbol that |
|
dnl happened to be called functionname_labelname. (Though this would only |
|
dnl happen if the local label prefix is is empty.) Underscores are used so |
|
dnl the whole label will still be a valid C identifier and so can be easily |
|
dnl used in gdb. |
|
|
|
dnl LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the |
define(`call_mcount', |
dnl L macro and making an infinite recursion. |
m4_assert_numargs(-1) |
define(LF, |
m4_assert_defined(`WANT_PROFILING') |
m4_assert_numargs(2) |
m4_assert_defined(`MCOUNT_PIC_REG') |
m4_assert_defined(`LSYM_PREFIX') |
m4_assert_defined(`MCOUNT_NONPIC_REG') |
`defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2') |
m4_assert_defined(`MCOUNT_PIC_CALL') |
|
m4_assert_defined(`MCOUNT_NONPIC_CALL') |
|
`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,, |
|
` DATA |
|
ALIGN(4) |
|
L(mcount_data_`'mcount_data_counter): |
|
W32 0 |
|
TEXT |
|
')dnl |
|
ifelse(WANT_PROFILING,`gprof', |
|
` pushl %ebp |
|
movl %esp, %ebp |
|
')dnl |
|
ifdef(`PIC', |
|
` pushl %ebx |
|
mcount_movl_GOT_ebx |
|
ifelse(MCOUNT_PIC_REG,,, |
|
` leal L(mcount_data_`'mcount_data_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG') |
|
MCOUNT_PIC_CALL |
|
popl %ebx |
|
',`dnl non-PIC |
|
ifelse(MCOUNT_NONPIC_REG,,, |
|
` movl `$'L(mcount_data_`'mcount_data_counter), MCOUNT_NONPIC_REG |
|
')dnl |
|
MCOUNT_NONPIC_CALL |
|
')dnl |
|
ifelse(WANT_PROFILING,`gprof', |
|
` popl %ebp |
|
') |
|
define(`mcount_data_counter',eval(mcount_data_counter+1))') |
|
|
define(`L', |
define(mcount_data_counter,1) |
m4_assert_numargs(1) |
|
PROLOGUE_assert_inside() |
|
`LF(PROLOGUE_current_function,`$1')') |
|
|
|
|
dnl Called: mcount_movl_GOT_ebx |
|
dnl Label H is "here", the %eip obtained from the call. C is the called |
|
dnl subroutine. J is the jump across that subroutine. A fetch and "ret" |
|
dnl is always done so calls and returns are balanced for the benefit of the |
|
dnl various x86s that have return stack branch prediction. |
|
define(mcount_movl_GOT_ebx, |
|
m4_assert_numargs(-1) |
|
` call L(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter) |
|
L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter): |
|
jmp L(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter) |
|
L(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter): |
|
movl (%esp), %ebx |
|
ret |
|
L(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter): |
|
addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter)], %ebx |
|
define(`mcount_movl_GOT_ebx_counter',incr(mcount_movl_GOT_ebx_counter))') |
|
|
dnl Called: PROLOGUE_cpu(gsym) |
define(mcount_movl_GOT_ebx_counter,1) |
dnl EPILOGUE_cpu(gsym) |
|
|
|
define(PROLOGUE_cpu, |
|
m4_assert_numargs(1) |
|
`GLOBL $1 |
|
TYPE($1,`function') |
|
$1:') |
|
|
|
define(EPILOGUE_cpu, |
|
m4_assert_numargs(1) |
|
` SIZE($1,.-$1)') |
|
|
|
|
|
|
|
dnl -------------------------------------------------------------------------- |
dnl -------------------------------------------------------------------------- |
dnl Various x86 macros. |
dnl Various x86 macros. |
dnl |
dnl |
Line 196 dnl delete any following parenthesized expression. n |
|
Line 183 dnl delete any following parenthesized expression. n |
|
dnl out say as 16(%esp)(%edi). This isn't valid assembler and should |
dnl out say as 16(%esp)(%edi). This isn't valid assembler and should |
dnl provoke an error, which is better than silently giving just 16(%esp). |
dnl provoke an error, which is better than silently giving just 16(%esp). |
dnl |
dnl |
dnl See README.family for more on the suggested way to access the stack |
dnl See README for more on the suggested way to access the stack frame. |
dnl frame. |
|
|
|
define(defframe, |
define(defframe, |
m4_assert_numargs(2) |
m4_assert_numargs(2) |
Line 208 defframe_empty_if_zero(FRAME+($2))(%esp)')') |
|
Line 194 defframe_empty_if_zero(FRAME+($2))(%esp)')') |
|
|
|
dnl Called: defframe_empty_if_zero(expression) |
dnl Called: defframe_empty_if_zero(expression) |
define(defframe_empty_if_zero, |
define(defframe_empty_if_zero, |
|
m4_assert_numargs(1) |
`ifelse(defframe_empty_if_zero_disabled,1, |
`ifelse(defframe_empty_if_zero_disabled,1, |
`eval($1)', |
`eval($1)', |
`m4_empty_if_zero($1)')') |
`m4_empty_if_zero($1)')') |
Line 256 m4_assert_defined(`FRAME') |
|
Line 243 m4_assert_defined(`FRAME') |
|
|
|
dnl Usage: defframe_pushl(name) |
dnl Usage: defframe_pushl(name) |
dnl |
dnl |
dnl Do a combination of a FRAME_pushl() and a defframe() to name the stack |
dnl Do a combination FRAME_pushl() and a defframe() to name the stack |
dnl location just pushed. This should come after a pushl instruction. |
dnl location just pushed. This should come after a pushl instruction. |
dnl Putting it on the same line works and avoids lengthening the code. For |
dnl Putting it on the same line works and avoids lengthening the code. For |
dnl example, |
dnl example, |
Line 267 dnl Notice the defframe() is done with an unquoted -F |
|
Line 254 dnl Notice the defframe() is done with an unquoted -F |
|
dnl current value without tracking future changes. |
dnl current value without tracking future changes. |
|
|
define(defframe_pushl, |
define(defframe_pushl, |
|
m4_assert_numargs(1) |
`FRAME_pushl()defframe(`$1',-FRAME)') |
`FRAME_pushl()defframe(`$1',-FRAME)') |
|
|
|
|
Line 292 dnl emms may as well be used. |
|
Line 280 dnl emms may as well be used. |
|
define(femms_available_p, |
define(femms_available_p, |
m4_assert_numargs(-1) |
m4_assert_numargs(-1) |
`m4_ifdef_anyof_p( |
`m4_ifdef_anyof_p( |
`HAVE_TARGET_CPU_k62', |
`HAVE_HOST_CPU_k62', |
`HAVE_TARGET_CPU_k63', |
`HAVE_HOST_CPU_k63', |
`HAVE_TARGET_CPU_athlon')') |
`HAVE_HOST_CPU_athlon')') |
|
|
define(emms_or_femms, |
define(emms_or_femms, |
m4_assert_numargs(-1) |
m4_assert_numargs(-1) |
Line 303 m4_assert_numargs(-1) |
|
Line 291 m4_assert_numargs(-1) |
|
|
|
dnl Usage: femms |
dnl Usage: femms |
dnl |
dnl |
dnl The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the |
dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the |
dnl following is a replacement using .byte. |
dnl following is a replacement using .byte. |
dnl |
dnl |
dnl If femms isn't available, an emms is generated instead, for convenience |
dnl If femms isn't available, an emms is generated instead, for convenience |
Line 320 m4_warning(`warning, using emms in place of femms, use |
|
Line 308 m4_warning(`warning, using emms in place of femms, use |
|
|
|
dnl Usage: jadcl0(op) |
dnl Usage: jadcl0(op) |
dnl |
dnl |
dnl Issue a jnc/incl as a substitute for adcl $0,op. This isn't an exact |
dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an |
dnl replacement, since it doesn't set the flags like adcl does. |
dnl exact replacement, since it doesn't set the flags like adcl does. |
dnl |
dnl |
dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and |
dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and |
dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch |
dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch |
dnl misprediction penalty is small, and the multiply algorithm used leads |
dnl misprediction penalty is small, and the multiply algorithm used leads |
dnl to a carry bit on average only 1/4 of the time. |
dnl to a carry bit on average only 1/4 of the time. |
dnl |
dnl |
dnl jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for |
dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl |
dnl comparison. For example, |
dnl for comparison. For example, |
dnl |
dnl |
dnl define(`jadcl0_disabled',1) |
dnl define(`jadcl0_disabled',1) |
dnl |
dnl |
dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is |
dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is |
dnl the same size as an adcl. This makes it possible to use the exact same |
dnl the same size as an adcl. This makes it possible to use the exact same |
dnl computed jump code when testing the relative speed of jnc/incl and adcl |
dnl computed jump code when testing the relative speed of the two. |
dnl with jadcl0_disabled. |
|
|
|
define(jadcl0, |
define(jadcl0, |
m4_assert_numargs(1) |
m4_assert_numargs(1) |
`ifelse(jadcl0_disabled,1, |
`ifelse(jadcl0_disabled,1, |
`adcl $`'0, $1', |
`adcl $`'0, $1', |
`jnc 1f |
`jnc L(jadcl0_`'jadcl0_counter) |
incl $1 |
incl $1 |
1:dnl')') |
L(jadcl0_`'jadcl0_counter): |
|
define(`jadcl0_counter',incr(jadcl0_counter))')') |
|
|
|
define(jadcl0_counter,1) |
|
|
|
|
dnl Usage: cmov_available_p |
dnl Usage: cmov_available_p |
dnl |
dnl |
dnl Expand to 1 if cmov is available, 0 if not. |
dnl Expand to 1 if cmov is available, 0 if not. |
|
|
define(cmov_available_p, |
define(cmov_available_p, |
|
m4_assert_numargs(-1) |
`m4_ifdef_anyof_p( |
`m4_ifdef_anyof_p( |
`HAVE_TARGET_CPU_pentiumpro', |
`HAVE_HOST_CPU_pentiumpro', |
`HAVE_TARGET_CPU_pentium2', |
`HAVE_HOST_CPU_pentium2', |
`HAVE_TARGET_CPU_pentium3', |
`HAVE_HOST_CPU_pentium3', |
`HAVE_TARGET_CPU_athlon')') |
`HAVE_HOST_CPU_pentium4', |
|
`HAVE_HOST_CPU_athlon')') |
|
|
|
|
dnl Usage: x86_lookup(target, key,value, key,value, ...) |
dnl Usage: x86_lookup(target, key,value, key,value, ...) |
|
|
dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not. |
dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not. |
|
|
define(x86_lookup, |
define(x86_lookup, |
|
m4_assert_numargs_range(1,999) |
`ifelse(eval($#<3),1, |
`ifelse(eval($#<3),1, |
`m4_error(`unrecognised part of x86 instruction: $1 |
`m4_error(`unrecognised part of x86 instruction: $1 |
')', |
')', |
Line 377 define(x86_lookup, |
|
Line 370 define(x86_lookup, |
|
`x86_lookup(`$1',shift(shift(shift($@))))')')') |
`x86_lookup(`$1',shift(shift(shift($@))))')')') |
|
|
define(x86_lookup_p, |
define(x86_lookup_p, |
|
m4_assert_numargs_range(1,999) |
`ifelse(eval($#<3),1, `0', |
`ifelse(eval($#<3),1, `0', |
`ifelse(`$1',`$2', `1', |
`ifelse(`$1',`$2', `1', |
`x86_lookup_p(`$1',shift(shift(shift($@))))')')') |
`x86_lookup_p(`$1',shift(shift(shift($@))))')')') |
Line 440 define(x86_opcode_tttn_list, |
|
Line 434 define(x86_opcode_tttn_list, |
|
|
|
dnl Usage: cmovCC(srcreg,dstreg) |
dnl Usage: cmovCC(srcreg,dstreg) |
dnl |
dnl |
dnl Generate a cmov instruction if the target supports cmov, or simulate it |
dnl Generate a cmov instruction if the host supports cmov, or simulate it |
dnl with a conditional jump if not (the latter being meant only for |
dnl with a conditional jump if not (the latter being meant only for |
dnl testing). For example, |
dnl testing). For example, |
dnl |
dnl |
Line 489 dnl need to be changed to do a fetch even if the cond |
|
Line 483 dnl need to be changed to do a fetch even if the cond |
|
dnl to trigger exceptions the same way a real cmov does. |
dnl to trigger exceptions the same way a real cmov does. |
define(cmov_simulate, |
define(cmov_simulate, |
m4_assert_numargs(3) |
m4_assert_numargs(3) |
`j$1 1f C cmov$1 $2, $3 |
`j$1 L(cmov_T`'cmov_counter) C cmov$1 $2, $3 |
jmp 2f |
jmp L(cmov_F`'cmov_counter) |
1: movl $2, $3 |
L(cmov_T`'cmov_counter): |
2:') |
movl $2, $3 |
|
L(cmov_F`'cmov_counter): |
|
define(`cmov_counter',incr(cmov_counter))') |
|
|
|
define(cmov_counter,1) |
|
|
dnl Called: cmov_bytes_tttn(name,tttn,src,dst) |
dnl Called: cmov_bytes_tttn(name,tttn,src,dst) |
define(cmov_bytes_tttn, |
define(cmov_bytes_tttn, |
m4_assert_numargs(4) |
m4_assert_numargs(4) |
Line 504 eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3 |
|
Line 502 eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3 |
|
C `$1 $3, $4'') |
C `$1 $3, $4'') |
|
|
|
|
|
dnl Usage: x86_opcode_regmmx(reg) |
|
dnl |
|
dnl Validate the given mmx register, and return its number, 0 to 7. |
|
|
|
define(x86_opcode_regmmx, |
|
m4_assert_numargs(1) |
|
`x86_lookup(`$1',x86_opcode_regmmx_list)') |
|
|
|
define(x86_opcode_regmmx_list, |
|
``%mm0',0, |
|
`%mm1',1, |
|
`%mm2',2, |
|
`%mm3',3, |
|
`%mm4',4, |
|
`%mm5',5, |
|
`%mm6',6, |
|
`%mm7',7') |
|
|
|
|
|
dnl Usage: psadbw(src,dst) |
|
dnl |
|
dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on |
|
dnl FreeBSD 3.3 and 3.4 doesn't recognise it, so instead emit .byte |
|
dnl sequences. |
|
dnl |
|
dnl Only register->register forms are supported here, which suffices for |
|
dnl the current code. |
|
|
|
define(psadbw, |
|
m4_instruction_wrapper() |
|
m4_assert_numargs(2) |
|
`ifelse(psadbw_available_p,1, |
|
`psadbw_bytes(`$1',`$2')', |
|
`psadbw_simulate(`$1',`$2')')') |
|
|
|
define(psadbw_available_p, |
|
m4_assert_numargs(-1) |
|
`m4_ifdef_anyof_p(`HAVE_HOST_CPU_pentium3', |
|
`HAVE_HOST_CPU_pentium4', |
|
`HAVE_HOST_CPU_athlon')') |
|
|
|
dnl Called: psadbw_bytes(src,dst) |
|
define(psadbw_bytes, |
|
m4_assert_numargs(2) |
|
`.byte 0x0f,0xf6,dnl |
|
eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl |
|
C `psadbw $1, $2'') |
|
|
|
dnl Called: psadbw_simulate(src,dst) |
|
define(psadbw_simulate, |
|
m4_assert_numargs(2) |
|
`m4_warning(`warning, using simulated and only partly functional psadbw, use testing only |
|
') C This works enough for the sum of bytes done in some of the popcounts, |
|
C but is otherwise a long way short of correct. |
|
pushl %eax |
|
pushl %edx |
|
pushf |
|
subl $`'8, %esp |
|
movq $2, (%esp) |
|
movzbl (%esp), %eax |
|
forloop(i,1,7, |
|
` movzbl i`'(%esp), %edx |
|
addl %edx, %eax |
|
') |
|
movd %eax, $2 |
|
addl $`'8, %esp |
|
popf |
|
popl %edx |
|
popl %eax |
|
') |
|
|
|
|
dnl Usage: loop_or_decljnz label |
dnl Usage: loop_or_decljnz label |
dnl |
dnl |
dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever |
dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever |
Line 515 dnl this loop_or_decljnz variation is enough to let t |
|
Line 585 dnl this loop_or_decljnz variation is enough to let t |
|
dnl all chips. |
dnl all chips. |
|
|
define(loop_or_decljnz, |
define(loop_or_decljnz, |
|
m4_assert_numargs(-1) |
`ifelse(loop_is_better_p,1, |
`ifelse(loop_is_better_p,1, |
`loop', |
`loop', |
`decl %ecx |
`decl %ecx |
jnz')') |
jnz')') |
|
|
define(loop_is_better_p, |
define(loop_is_better_p, |
`m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6', |
m4_assert_numargs(-1) |
`HAVE_TARGET_CPU_k62', |
`m4_ifdef_anyof_p(`HAVE_HOST_CPU_k6', |
`HAVE_TARGET_CPU_k63', |
`HAVE_HOST_CPU_k62', |
`HAVE_TARGET_CPU_i386')') |
`HAVE_HOST_CPU_k63', |
|
`HAVE_HOST_CPU_i386')') |
|
|
|
|
dnl Usage: Zdisp(inst,op,op,op) |
dnl Usage: Zdisp(inst,op,op,op) |
Line 553 dnl displacement argument is eval()ed. |
|
Line 625 dnl displacement argument is eval()ed. |
|
dnl |
dnl |
dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is |
dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is |
dnl implemented with a table of instructions and encodings. A new entry is |
dnl implemented with a table of instructions and encodings. A new entry is |
dnl needed for any different operation or registers. |
dnl needed for any different operation or registers. The table is split |
|
dnl into separate macros to avoid overflowing BSD m4 macro expansion space. |
|
|
define(Zdisp, |
define(Zdisp, |
|
m4_assert_numargs(4) |
`define(`Zdisp_found',0)dnl |
`define(`Zdisp_found',0)dnl |
Zdisp_match( movl, %eax, 0,(%edi), `137,71,0', $@)`'dnl |
Zdisp_1($@)dnl |
Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0', $@)`'dnl |
Zdisp_2($@)dnl |
Zdisp_match( movl, %esi, 0,(%edi), `137,119,0', $@)`'dnl |
Zdisp_3($@)dnl |
Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0', $@)`'dnl |
Zdisp_4($@)dnl |
Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0', $@)`'dnl |
ifelse(Zdisp_found,0, |
Zdisp_match( movl, 0,(%esi), %eax, `139,70,0', $@)`'dnl |
`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 |
|
')')') |
|
|
|
define(Zdisp_1,`dnl |
|
Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl |
|
Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl |
|
Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl |
|
Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl |
|
Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl |
|
Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl |
|
Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl |
|
Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl |
|
Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl |
|
Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl |
|
Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl |
|
') |
|
define(Zdisp_2,`dnl |
|
Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl |
|
Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl |
|
Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl |
|
Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl |
|
Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl |
|
Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl |
|
Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl |
Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl |
Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl |
Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0', $@)`'dnl |
') |
Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0', $@)`'dnl |
define(Zdisp_3,`dnl |
Zdisp_match( addl, %esi, 0,(%edi), `1,119,0', $@)`'dnl |
|
Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0', $@)`'dnl |
|
Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0', $@)`'dnl |
|
Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0', $@)`'dnl |
|
Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl |
Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl |
Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl |
Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl |
Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl |
Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl |
Zdisp_match( movq, 0,(%esi), %mm0, `15,111,70,0', $@)`'dnl |
Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl |
Zdisp_match( movq, %mm0, 0,(%edi), `15,127,71,0', $@)`'dnl |
Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl |
|
Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl |
|
Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl |
Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl |
Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl |
Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl |
Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl |
Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl |
Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl |
|
') |
|
define(Zdisp_4,`dnl |
|
Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl |
Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl |
Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl |
Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl |
Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl |
Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl |
Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl |
Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl |
Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl |
Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl |
Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl |
ifelse(Zdisp_found,0, |
Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl |
`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 |
') |
')')') |
|
|
|
define(Zdisp_match, |
define(Zdisp_match, |
|
m4_assert_numargs(9) |
`ifelse(eval(m4_stringequal_p(`$1',`$6') |
`ifelse(eval(m4_stringequal_p(`$1',`$6') |
&& m4_stringequal_p(`$2',0) |
&& m4_stringequal_p(`$2',0) |
&& m4_stringequal_p(`$3',`$8') |
&& m4_stringequal_p(`$3',`$8') |
Line 636 dnl just a plain "shldl ...", an error results. This |
|
Line 734 dnl just a plain "shldl ...", an error results. This |
|
dnl variant treatment of %cl isn't accidentally bypassed. |
dnl variant treatment of %cl isn't accidentally bypassed. |
|
|
define(define_shd_instruction, |
define(define_shd_instruction, |
|
m4_assert_numargs(1) |
`define($1, |
`define($1, |
m4_instruction_wrapper() |
m4_instruction_wrapper() |
m4_assert_numargs(3) |
m4_assert_numargs(3) |
Line 657 m4_assert_defined(`WANT_SHLDL_CL') |
|
Line 756 m4_assert_defined(`WANT_SHLDL_CL') |
|
``$1' `$2', `$3', `$4'')') |
``$1' `$2', `$3', `$4'')') |
|
|
|
|
dnl Usage: ASSERT(cond, instructions) |
dnl Usage: ASSERT([cond][,instructions]) |
dnl |
dnl |
dnl If WANT_ASSERT is 1, output the given instructions and expect the given |
dnl If WANT_ASSERT is 1, output the given instructions and expect the given |
dnl flags condition to then be satisfied. For example, |
dnl flags condition to then be satisfied. For example, |
Line 673 dnl When `instructions' is not empty, a pushf/popf is |
|
Line 772 dnl When `instructions' is not empty, a pushf/popf is |
|
dnl flags, but the instructions themselves must preserve any registers that |
dnl flags, but the instructions themselves must preserve any registers that |
dnl matter. FRAME is adjusted for the push and pop, so the instructions |
dnl matter. FRAME is adjusted for the push and pop, so the instructions |
dnl given can use defframe() stack variables. |
dnl given can use defframe() stack variables. |
|
dnl |
|
dnl The condition can be omitted to just output the given instructions when |
|
dnl assertion checking is wanted. In this case the pushf/popf is omitted. |
|
dnl For example, |
|
dnl |
|
dnl ASSERT(, `movl %eax, VAR_KEEPVAL') |
|
|
define(ASSERT, |
define(ASSERT, |
m4_assert_numargs_range(1,2) |
m4_assert_numargs_range(1,2) |
`ifelse(WANT_ASSERT,1, |
`ifelse(WANT_ASSERT,1, |
|
`ifelse(`$1',, |
|
`$2', |
`C ASSERT |
`C ASSERT |
ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')') |
ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')') |
$2 |
$2 |
j`$1' 1f |
j`$1' L(ASSERT_ok`'ASSERT_counter) |
ud2 C assertion failed |
ud2 C assertion failed |
1: |
L(ASSERT_ok`'ASSERT_counter): |
ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')') |
ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')') |
')') |
define(`ASSERT_counter',incr(ASSERT_counter))')')') |
|
|
|
define(ASSERT_counter,1) |
|
|
|
|
dnl Usage: movl_text_address(label,register) |
dnl Usage: movl_text_address(label,register) |
dnl |
dnl |
dnl Get the address of a text segment label, using either a plain movl or a |
dnl Get the address of a text segment label, using either a plain movl or a |
Line 698 dnl This macro is only meant for use in ASSERT()s or |
|
Line 807 dnl This macro is only meant for use in ASSERT()s or |
|
dnl the PIC sequence it generates will want to be done with a ret balancing |
dnl the PIC sequence it generates will want to be done with a ret balancing |
dnl the call on CPUs with return address branch predition. |
dnl the call on CPUs with return address branch predition. |
dnl |
dnl |
dnl The addl generated here has a backward reference to 1b, and so won't |
dnl The addl generated here has a backward reference to the label, and so |
dnl suffer from the two forwards references bug in old gas (described in |
dnl won't suffer from the two forwards references bug in old gas (described |
dnl mpn/x86/README.family). |
dnl in mpn/x86/README). |
|
|
define(movl_text_address, |
define(movl_text_address, |
|
m4_assert_numargs(2) |
`ifdef(`PIC', |
`ifdef(`PIC', |
`call 1f |
`call L(movl_text_address_`'movl_text_address_counter) |
1: popl $2 C %eip |
L(movl_text_address_`'movl_text_address_counter): |
addl `$'$1-1b, $2', |
popl $2 C %eip |
|
addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2 |
|
define(`movl_text_address_counter',incr(movl_text_address_counter))', |
`movl `$'$1, $2')') |
`movl `$'$1, $2')') |
|
|
|
define(movl_text_address_counter,1) |
|
|
|
|
|
dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg) |
|
dnl |
|
dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as |
|
dnl appropriate for nails in use or not. |
|
|
|
define(notl_or_xorl_GMP_NUMB_MASK, |
|
m4_assert_numargs(1) |
|
`ifelse(GMP_NAIL_BITS,0, |
|
`notl `$1'', |
|
`xorl $GMP_NUMB_MASK, `$1'')') |
|
|
|
|
divert`'dnl |
divert`'dnl |