[BACK]Return to x86-defs.m4 CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86

Annotation of OpenXM_contrib/gmp/mpn/x86/x86-defs.m4, Revision 1.1

1.1     ! maekawa     1: divert(-1)
        !             2:
        !             3: dnl  m4 macros for x86 assembler.
        !             4:
        !             5:
        !             6: dnl  Copyright (C) 1999, 2000 Free Software Foundation, Inc.
        !             7: dnl
        !             8: dnl  This file is part of the GNU MP Library.
        !             9: dnl
        !            10: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !            11: dnl  modify it under the terms of the GNU Lesser General Public License as
        !            12: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            13: dnl  License, or (at your option) any later version.
        !            14: dnl
        !            15: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            16: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            17: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            18: dnl  Lesser General Public License for more details.
        !            19: dnl
        !            20: dnl  You should have received a copy of the GNU Lesser General Public
        !            21: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            22: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            23: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            24:
        !            25:
        !            26: dnl  Notes:
        !            27: dnl
        !            28: dnl  m4 isn't perfect for processing BSD style x86 assembler code, the main
        !            29: dnl  problems are,
        !            30: dnl
        !            31: dnl  1. Doing define(foo,123) and then using foo in an addressing mode like
        !            32: dnl     foo(%ebx) expands as a macro rather than a constant.  This is worked
        !            33: dnl     around by using deflit() from asm-defs.m4, instead of define().
        !            34: dnl
        !            35: dnl  2. Immediates in macro definitions need a space or `' to stop the $
        !            36: dnl     looking like a macro parameter.  For example,
        !            37: dnl
        !            38: dnl            define(foo, `mov $ 123, %eax')
        !            39: dnl
        !            40: dnl     This is only a problem in macro definitions, not in ordinary text,
        !            41: dnl     nor in macro parameters like text passed to forloop() or ifdef().
        !            42:
        !            43:
        !            44: deflit(BYTES_PER_MP_LIMB, 4)
        !            45:
        !            46:
        !            47: dnl  --------------------------------------------------------------------------
        !            48: dnl  Replacement PROLOGUE/EPILOGUE with more sophisticated error checking.
        !            49: dnl  Nesting and overlapping not allowed.
        !            50: dnl
        !            51:
        !            52:
        !            53: dnl  Usage: PROLOGUE(functionname)
        !            54: dnl
        !            55: dnl  Generate a function prologue.  functionname gets GSYM_PREFIX added.
        !            56: dnl  Examples,
        !            57: dnl
        !            58: dnl         PROLOGUE(mpn_add_n)
        !            59: dnl         PROLOGUE(somefun)
        !            60:
        !            61: define(`PROLOGUE',
        !            62: m4_assert_numargs(1)
        !            63: m4_assert_defined(`PROLOGUE_cpu')
        !            64: `ifdef(`PROLOGUE_current_function',
        !            65: `m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1)
        !            66: )')dnl
        !            67: m4_file_seen()dnl
        !            68: define(`PROLOGUE_current_function',`$1')dnl
        !            69: PROLOGUE_cpu(GSYM_PREFIX`'$1)')
        !            70:
        !            71:
        !            72: dnl  Usage: EPILOGUE()
        !            73: dnl
        !            74: dnl  Notice the function name is passed to EPILOGUE_cpu(), letting it use $1
        !            75: dnl  instead of the long PROLOGUE_current_function symbol.
        !            76:
        !            77: define(`EPILOGUE',
        !            78: m4_assert_numargs(0)
        !            79: m4_assert_defined(`EPILOGUE_cpu')
        !            80: `ifdef(`PROLOGUE_current_function',,
        !            81: `m4_error(`EPILOGUE'() with no `PROLOGUE'()
        !            82: )')dnl
        !            83: EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl
        !            84: undefine(`PROLOGUE_current_function')')
        !            85:
        !            86: m4wrap_prepend(
        !            87: `ifdef(`PROLOGUE_current_function',
        !            88: `m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen
        !            89: ')')')
        !            90:
        !            91:
        !            92: dnl  Usage: PROLOGUE_assert_inside()
        !            93: dnl
        !            94: dnl  Use this unquoted on a line on its own at the start of a macro
        !            95: dnl  definition to add some code to check the macro is only used inside a
        !            96: dnl  PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is
        !            97: dnl  defined.
        !            98:
        !            99: define(PROLOGUE_assert_inside,
        !           100: m4_assert_numargs(0)
        !           101: ``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '')
        !           102:
        !           103: define(PROLOGUE_assert_inside_internal,
        !           104: m4_assert_numargs(1)
        !           105: `ifdef(`PROLOGUE_current_function',,
        !           106: `m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair
        !           107: ')')')
        !           108:
        !           109:
        !           110: dnl  Usage: L(labelname)
        !           111: dnl         LF(functionname,labelname)
        !           112: dnl
        !           113: dnl  Generate a local label in the current or given function.  For LF(),
        !           114: dnl  functionname gets GSYM_PREFIX added, the same as with PROLOGUE().
        !           115: dnl
        !           116: dnl  For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn),
        !           117: dnl
        !           118: dnl         L(bar)          => L__gmpn_add_n__bar
        !           119: dnl         LF(somefun,bar) => Lsomefun__bar
        !           120: dnl
        !           121: dnl  The funtion name and label name get two underscores between them rather
        !           122: dnl  than one to guard against clashing with a separate external symbol that
        !           123: dnl  happened to be called functionname_labelname.  (Though this would only
        !           124: dnl  happen if the local label prefix is is empty.)  Underscores are used so
        !           125: dnl  the whole label will still be a valid C identifier and so can be easily
        !           126: dnl  used in gdb.
        !           127:
        !           128: dnl  LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the
        !           129: dnl  L macro and making an infinite recursion.
        !           130: define(LF,
        !           131: m4_assert_numargs(2)
        !           132: m4_assert_defined(`LSYM_PREFIX')
        !           133: `defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2')
        !           134:
        !           135: define(`L',
        !           136: m4_assert_numargs(1)
        !           137: PROLOGUE_assert_inside()
        !           138: `LF(PROLOGUE_current_function,`$1')')
        !           139:
        !           140:
        !           141: dnl  Called: PROLOGUE_cpu(gsym)
        !           142: dnl          EPILOGUE_cpu(gsym)
        !           143:
        !           144: define(PROLOGUE_cpu,
        !           145: m4_assert_numargs(1)
        !           146:        `GLOBL  $1
        !           147:        TYPE($1,`function')
        !           148: $1:')
        !           149:
        !           150: define(EPILOGUE_cpu,
        !           151: m4_assert_numargs(1)
        !           152: `      SIZE($1,.-$1)')
        !           153:
        !           154:
        !           155:
        !           156: dnl  --------------------------------------------------------------------------
        !           157: dnl  Various x86 macros.
        !           158: dnl
        !           159:
        !           160:
        !           161: dnl  Usage: ALIGN_OFFSET(bytes,offset)
        !           162: dnl
        !           163: dnl  Align to `offset' away from a multiple of `bytes'.
        !           164: dnl
        !           165: dnl  This is useful for testing, for example align to something very strict
        !           166: dnl  and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
        !           167: dnl
        !           168: dnl  Generally you wouldn't execute across the padding, but it's done with
        !           169: dnl  nop's so it'll work.
        !           170:
        !           171: define(ALIGN_OFFSET,
        !           172: m4_assert_numargs(2)
        !           173: `ALIGN($1)
        !           174: forloop(`i',1,$2,`     nop
        !           175: ')')
        !           176:
        !           177:
        !           178: dnl  Usage: defframe(name,offset)
        !           179: dnl
        !           180: dnl  Make a definition like the following with which to access a parameter
        !           181: dnl  or variable on the stack.
        !           182: dnl
        !           183: dnl         define(name,`FRAME+offset(%esp)')
        !           184: dnl
        !           185: dnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
        !           186: dnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
        !           187: dnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
        !           188: dnl  zero offset is wanted.
        !           189: dnl
        !           190: dnl  The new macro also gets a check that when it's used FRAME is actually
        !           191: dnl  defined, and that the final %esp offset isn't negative, which would
        !           192: dnl  mean an attempt to access something below the current %esp.
        !           193: dnl
        !           194: dnl  deflit() is used rather than a plain define(), so the new macro won't
        !           195: dnl  delete any following parenthesized expression.  name(%edi) will come
        !           196: dnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should
        !           197: dnl  provoke an error, which is better than silently giving just 16(%esp).
        !           198: dnl
        !           199: dnl  See README.family for more on the suggested way to access the stack
        !           200: dnl  frame.
        !           201:
        !           202: define(defframe,
        !           203: m4_assert_numargs(2)
        !           204: `deflit(`$1',
        !           205: m4_assert_defined(`FRAME')
        !           206: `defframe_check_notbelow(`$1',$2,FRAME)dnl
        !           207: defframe_empty_if_zero(FRAME+($2))(%esp)')')
        !           208:
        !           209: dnl  Called: defframe_empty_if_zero(expression)
        !           210: define(defframe_empty_if_zero,
        !           211: `ifelse(defframe_empty_if_zero_disabled,1,
        !           212: `eval($1)',
        !           213: `m4_empty_if_zero($1)')')
        !           214:
        !           215: dnl  Called: defframe_check_notbelow(`name',offset,FRAME)
        !           216: define(defframe_check_notbelow,
        !           217: m4_assert_numargs(3)
        !           218: `ifelse(eval(($3)+($2)<0),1,
        !           219: `m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
        !           220: ')')')
        !           221:
        !           222:
        !           223: dnl  Usage: FRAME_pushl()
        !           224: dnl         FRAME_popl()
        !           225: dnl         FRAME_addl_esp(n)
        !           226: dnl         FRAME_subl_esp(n)
        !           227: dnl
        !           228: dnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
        !           229: dnl  %esp of n bytes.
        !           230: dnl
        !           231: dnl  Using these macros is completely optional.  Sometimes it makes more
        !           232: dnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's
        !           233: dnl  jumps and different sequences of FRAME values need to be used in
        !           234: dnl  different places.
        !           235:
        !           236: define(FRAME_pushl,
        !           237: m4_assert_numargs(0)
        !           238: m4_assert_defined(`FRAME')
        !           239: `deflit(`FRAME',eval(FRAME+4))')
        !           240:
        !           241: define(FRAME_popl,
        !           242: m4_assert_numargs(0)
        !           243: m4_assert_defined(`FRAME')
        !           244: `deflit(`FRAME',eval(FRAME-4))')
        !           245:
        !           246: define(FRAME_addl_esp,
        !           247: m4_assert_numargs(1)
        !           248: m4_assert_defined(`FRAME')
        !           249: `deflit(`FRAME',eval(FRAME-($1)))')
        !           250:
        !           251: define(FRAME_subl_esp,
        !           252: m4_assert_numargs(1)
        !           253: m4_assert_defined(`FRAME')
        !           254: `deflit(`FRAME',eval(FRAME+($1)))')
        !           255:
        !           256:
        !           257: dnl  Usage: defframe_pushl(name)
        !           258: dnl
        !           259: dnl  Do a combination of a FRAME_pushl() and a defframe() to name the stack
        !           260: dnl  location just pushed.  This should come after a pushl instruction.
        !           261: dnl  Putting it on the same line works and avoids lengthening the code.  For
        !           262: dnl  example,
        !           263: dnl
        !           264: dnl         pushl   %eax     defframe_pushl(VAR_COUNTER)
        !           265: dnl
        !           266: dnl  Notice the defframe() is done with an unquoted -FRAME thus giving its
        !           267: dnl  current value without tracking future changes.
        !           268:
        !           269: define(defframe_pushl,
        !           270: `FRAME_pushl()defframe(`$1',-FRAME)')
        !           271:
        !           272:
        !           273: dnl  --------------------------------------------------------------------------
        !           274: dnl  Assembler instruction macros.
        !           275: dnl
        !           276:
        !           277:
        !           278: dnl  Usage: emms_or_femms
        !           279: dnl         femms_available_p
        !           280: dnl
        !           281: dnl  femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
        !           282: dnl  femms instruction is available.  emms_or_femms expands to femms if
        !           283: dnl  available, or emms if not.
        !           284: dnl
        !           285: dnl  emms_or_femms is meant for use in the K6 directory where plain K6
        !           286: dnl  (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
        !           287: dnl  supported together.
        !           288: dnl
        !           289: dnl  On K7 femms is no longer faster and is just an alias for emms, so plain
        !           290: dnl  emms may as well be used.
        !           291:
        !           292: define(femms_available_p,
        !           293: m4_assert_numargs(-1)
        !           294: `m4_ifdef_anyof_p(
        !           295:        `HAVE_TARGET_CPU_k62',
        !           296:        `HAVE_TARGET_CPU_k63',
        !           297:        `HAVE_TARGET_CPU_athlon')')
        !           298:
        !           299: define(emms_or_femms,
        !           300: m4_assert_numargs(-1)
        !           301: `ifelse(femms_available_p,1,`femms',`emms')')
        !           302:
        !           303:
        !           304: dnl  Usage: femms
        !           305: dnl
        !           306: dnl  The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the
        !           307: dnl  following is a replacement using .byte.
        !           308: dnl
        !           309: dnl  If femms isn't available, an emms is generated instead, for convenience
        !           310: dnl  when testing on a machine without femms.
        !           311:
        !           312: define(femms,
        !           313: m4_assert_numargs(-1)
        !           314: `ifelse(femms_available_p,1,
        !           315: `.byte 15,14   C AMD 3DNow femms',
        !           316: `emms`'dnl
        !           317: m4_warning(`warning, using emms in place of femms, use for testing only
        !           318: ')')')
        !           319:
        !           320:
        !           321: dnl  Usage: jadcl0(op)
        !           322: dnl
        !           323: dnl  Issue a jnc/incl as a substitute for adcl $0,op.  This isn't an exact
        !           324: dnl  replacement, since it doesn't set the flags like adcl does.
        !           325: dnl
        !           326: dnl  This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
        !           327: dnl  mpn_sqr_basecase because on K6 an adcl is slow, the branch
        !           328: dnl  misprediction penalty is small, and the multiply algorithm used leads
        !           329: dnl  to a carry bit on average only 1/4 of the time.
        !           330: dnl
        !           331: dnl  jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for
        !           332: dnl  comparison.  For example,
        !           333: dnl
        !           334: dnl            define(`jadcl0_disabled',1)
        !           335: dnl
        !           336: dnl  When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
        !           337: dnl  the same size as an adcl.  This makes it possible to use the exact same
        !           338: dnl  computed jump code when testing the relative speed of jnc/incl and adcl
        !           339: dnl  with jadcl0_disabled.
        !           340:
        !           341: define(jadcl0,
        !           342: m4_assert_numargs(1)
        !           343: `ifelse(jadcl0_disabled,1,
        !           344:        `adcl   $`'0, $1',
        !           345:        `jnc    1f
        !           346:        incl    $1
        !           347: 1:dnl')')
        !           348:
        !           349:
        !           350: dnl  Usage: cmov_available_p
        !           351: dnl
        !           352: dnl  Expand to 1 if cmov is available, 0 if not.
        !           353:
        !           354: define(cmov_available_p,
        !           355: `m4_ifdef_anyof_p(
        !           356:        `HAVE_TARGET_CPU_pentiumpro',
        !           357:        `HAVE_TARGET_CPU_pentium2',
        !           358:        `HAVE_TARGET_CPU_pentium3',
        !           359:        `HAVE_TARGET_CPU_athlon')')
        !           360:
        !           361:
        !           362: dnl  Usage: x86_lookup(target, key,value, key,value, ...)
        !           363: dnl         x86_lookup_p(target, key,value, key,value, ...)
        !           364: dnl
        !           365: dnl  Look for `target' among the `key' parameters.
        !           366: dnl
        !           367: dnl  x86_lookup expands to the corresponding `value', or generates an error
        !           368: dnl  if `target' isn't found.
        !           369: dnl
        !           370: dnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.
        !           371:
        !           372: define(x86_lookup,
        !           373: `ifelse(eval($#<3),1,
        !           374: `m4_error(`unrecognised part of x86 instruction: $1
        !           375: ')',
        !           376: `ifelse(`$1',`$2', `$3',
        !           377: `x86_lookup(`$1',shift(shift(shift($@))))')')')
        !           378:
        !           379: define(x86_lookup_p,
        !           380: `ifelse(eval($#<3),1, `0',
        !           381: `ifelse(`$1',`$2',    `1',
        !           382: `x86_lookup_p(`$1',shift(shift(shift($@))))')')')
        !           383:
        !           384:
        !           385: dnl  Usage: x86_opcode_reg32(reg)
        !           386: dnl         x86_opcode_reg32_p(reg)
        !           387: dnl
        !           388: dnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given
        !           389: dnl  32-bit register, eg. `%ebp' turns into 5.
        !           390: dnl
        !           391: dnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
        !           392: dnl  if not.
        !           393:
        !           394: define(x86_opcode_reg32,
        !           395: m4_assert_numargs(1)
        !           396: `x86_lookup(`$1',x86_opcode_reg32_list)')
        !           397:
        !           398: define(x86_opcode_reg32_p,
        !           399: m4_assert_onearg()
        !           400: `x86_lookup_p(`$1',x86_opcode_reg32_list)')
        !           401:
        !           402: define(x86_opcode_reg32_list,
        !           403: ``%eax',0,
        !           404: `%ecx',1,
        !           405: `%edx',2,
        !           406: `%ebx',3,
        !           407: `%esp',4,
        !           408: `%ebp',5,
        !           409: `%esi',6,
        !           410: `%edi',7')
        !           411:
        !           412:
        !           413: dnl  Usage: x86_opcode_tttn(cond)
        !           414: dnl
        !           415: dnl  Expand to the 4-bit "tttn" field value for the given x86 branch
        !           416: dnl  condition (like `c', `ae', etc).
        !           417:
        !           418: define(x86_opcode_tttn,
        !           419: m4_assert_numargs(1)
        !           420: `x86_lookup(`$1',x86_opcode_ttn_list)')
        !           421:
        !           422: define(x86_opcode_tttn_list,
        !           423: ``o',  0,
        !           424: `no',  1,
        !           425: `b',   2, `c',  2, `nae',2,
        !           426: `nb',  3, `nc', 3, `ae', 3,
        !           427: `e',   4, `z',  4,
        !           428: `ne',  5, `nz', 5,
        !           429: `be',  6, `na', 6,
        !           430: `nbe', 7, `a',  7,
        !           431: `s',   8,
        !           432: `ns',  9,
        !           433: `p',  10, `pe', 10, `npo',10,
        !           434: `np', 11, `npe',11, `po', 11,
        !           435: `l',  12, `nge',12,
        !           436: `nl', 13, `ge', 13,
        !           437: `le', 14, `ng', 14,
        !           438: `nle',15, `g',  15')
        !           439:
        !           440:
        !           441: dnl  Usage: cmovCC(srcreg,dstreg)
        !           442: dnl
        !           443: dnl  Generate a cmov instruction if the target supports cmov, or simulate it
        !           444: dnl  with a conditional jump if not (the latter being meant only for
        !           445: dnl  testing).  For example,
        !           446: dnl
        !           447: dnl         cmovz(  %eax, %ebx)
        !           448: dnl
        !           449: dnl  cmov instructions are generated using .byte sequences, since only
        !           450: dnl  recent versions of gas know cmov.
        !           451: dnl
        !           452: dnl  The source operand can only be a plain register.  (m4 code implementing
        !           453: dnl  full memory addressing modes exists, believe it or not, but isn't
        !           454: dnl  currently needed and isn't included.)
        !           455: dnl
        !           456: dnl  All the standard conditions are defined.  Attempting to use one without
        !           457: dnl  the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
        !           458: dnl  an error.  This ensures the necessary .byte sequences aren't
        !           459: dnl  accidentally missed.
        !           460:
        !           461: dnl  Called: define_cmov_many(cond,tttn,cond,tttn,...)
        !           462: define(define_cmov_many,
        !           463: `ifelse(m4_length(`$1'),0,,
        !           464: `define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
        !           465:
        !           466: dnl  Called: define_cmov(cond,tttn)
        !           467: define(define_cmov,
        !           468: m4_assert_numargs(2)
        !           469: `define(`cmov$1',
        !           470: m4_instruction_wrapper()
        !           471: m4_assert_numargs(2)
        !           472: `cmov_internal'(m4_doublequote($`'0),``$1',`$2'',dnl
        !           473: m4_doublequote($`'1),m4_doublequote($`'2)))')
        !           474:
        !           475: define_cmov_many(x86_opcode_tttn_list)
        !           476:
        !           477:
        !           478: dnl  Called: cmov_internal(name,cond,tttn,src,dst)
        !           479: define(cmov_internal,
        !           480: m4_assert_numargs(5)
        !           481: `ifelse(cmov_available_p,1,
        !           482: `cmov_bytes_tttn(`$1',`$3',`$4',`$5')',
        !           483: `m4_warning(`warning, simulating cmov with jump, use for testing only
        !           484: ')cmov_simulate(`$2',`$4',`$5')')')
        !           485:
        !           486: dnl  Called: cmov_simulate(cond,src,dst)
        !           487: dnl  If this is going to be used with memory operands for the source it will
        !           488: dnl  need to be changed to do a fetch even if the condition is false, so as
        !           489: dnl  to trigger exceptions the same way a real cmov does.
        !           490: define(cmov_simulate,
        !           491: m4_assert_numargs(3)
        !           492:        `j$1    1f      C cmov$1 $2, $3
        !           493:        jmp     2f
        !           494: 1:     movl    $2, $3
        !           495: 2:')
        !           496:
        !           497: dnl  Called: cmov_bytes_tttn(name,tttn,src,dst)
        !           498: define(cmov_bytes_tttn,
        !           499: m4_assert_numargs(4)
        !           500: `.byte dnl
        !           501: 15, dnl
        !           502: eval(64+$2), dnl
        !           503: eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
        !           504:        C `$1 $3, $4'')
        !           505:
        !           506:
        !           507: dnl  Usage: loop_or_decljnz label
        !           508: dnl
        !           509: dnl  Generate either a "loop" instruction or a "decl %ecx / jnz", whichever
        !           510: dnl  is better.  "loop" is better on K6 and probably on 386, on other chips
        !           511: dnl  separate decl/jnz is better.
        !           512: dnl
        !           513: dnl  This macro is just for mpn/x86/divrem_1.asm and mpn/x86/mod_1.asm where
        !           514: dnl  this loop_or_decljnz variation is enough to let the code be shared by
        !           515: dnl  all chips.
        !           516:
        !           517: define(loop_or_decljnz,
        !           518: `ifelse(loop_is_better_p,1,
        !           519:        `loop',
        !           520:        `decl   %ecx
        !           521:        jnz')')
        !           522:
        !           523: define(loop_is_better_p,
        !           524: `m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6',
        !           525:                   `HAVE_TARGET_CPU_k62',
        !           526:                   `HAVE_TARGET_CPU_k63',
        !           527:                   `HAVE_TARGET_CPU_i386')')
        !           528:
        !           529:
        !           530: dnl  Usage: Zdisp(inst,op,op,op)
        !           531: dnl
        !           532: dnl  Generate explicit .byte sequences if necessary to force a byte-sized
        !           533: dnl  zero displacement on an instruction.  For example,
        !           534: dnl
        !           535: dnl         Zdisp(  movl,   0,(%esi), %eax)
        !           536: dnl
        !           537: dnl  expands to
        !           538: dnl
        !           539: dnl                 .byte   139,70,0  C movl 0(%esi), %eax
        !           540: dnl
        !           541: dnl  If the displacement given isn't 0, then normal assembler code is
        !           542: dnl  generated.  For example,
        !           543: dnl
        !           544: dnl         Zdisp(  movl,   4,(%esi), %eax)
        !           545: dnl
        !           546: dnl  expands to
        !           547: dnl
        !           548: dnl                 movl    4(%esi), %eax
        !           549: dnl
        !           550: dnl  This means a single Zdisp() form can be used with an expression for the
        !           551: dnl  displacement, and .byte will be used only if necessary.  The
        !           552: dnl  displacement argument is eval()ed.
        !           553: dnl
        !           554: dnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is
        !           555: dnl  implemented with a table of instructions and encodings.  A new entry is
        !           556: dnl  needed for any different operation or registers.
        !           557:
        !           558: define(Zdisp,
        !           559: `define(`Zdisp_found',0)dnl
        !           560: Zdisp_match( movl, %eax, 0,(%edi), `137,71,0',    $@)`'dnl
        !           561: Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0',    $@)`'dnl
        !           562: Zdisp_match( movl, %esi, 0,(%edi), `137,119,0',   $@)`'dnl
        !           563: Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0',    $@)`'dnl
        !           564: Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0',   $@)`'dnl
        !           565: Zdisp_match( movl, 0,(%esi), %eax, `139,70,0',    $@)`'dnl
        !           566: Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl
        !           567: Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0',      $@)`'dnl
        !           568: Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0',      $@)`'dnl
        !           569: Zdisp_match( addl, %esi, 0,(%edi), `1,119,0',     $@)`'dnl
        !           570: Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0',     $@)`'dnl
        !           571: Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0',    $@)`'dnl
        !           572: Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0',    $@)`'dnl
        !           573: Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
        !           574: Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
        !           575: Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
        !           576: Zdisp_match( movq, 0,(%esi),        %mm0, `15,111,70,0',     $@)`'dnl
        !           577: Zdisp_match( movq, %mm0,        0,(%edi), `15,127,71,0',     $@)`'dnl
        !           578: Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
        !           579: Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
        !           580: Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
        !           581: Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
        !           582: Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
        !           583: Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
        !           584: Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
        !           585: Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
        !           586: ifelse(Zdisp_found,0,
        !           587: `m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
        !           588: ')')')
        !           589:
        !           590: define(Zdisp_match,
        !           591: `ifelse(eval(m4_stringequal_p(`$1',`$6')
        !           592:        && m4_stringequal_p(`$2',0)
        !           593:        && m4_stringequal_p(`$3',`$8')
        !           594:        && m4_stringequal_p(`$4',`$9')),1,
        !           595: `define(`Zdisp_found',1)dnl
        !           596: ifelse(eval(`$7'),0,
        !           597: `      .byte   $5  C `$1 0$3, $4'',
        !           598: `      $6      $7$8, $9')',
        !           599:
        !           600: `ifelse(eval(m4_stringequal_p(`$1',`$6')
        !           601:        && m4_stringequal_p(`$2',`$7')
        !           602:        && m4_stringequal_p(`$3',0)
        !           603:        && m4_stringequal_p(`$4',`$9')),1,
        !           604: `define(`Zdisp_found',1)dnl
        !           605: ifelse(eval(`$8'),0,
        !           606: `      .byte   $5  C `$1 $2, 0$4'',
        !           607: `      $6      $7, $8$9')')')')
        !           608:
        !           609:
        !           610: dnl  Usage: shldl(count,src,dst)
        !           611: dnl         shrdl(count,src,dst)
        !           612: dnl         shldw(count,src,dst)
        !           613: dnl         shrdw(count,src,dst)
        !           614: dnl
        !           615: dnl  Generate a double-shift instruction, possibly omitting a %cl count
        !           616: dnl  parameter if that's what the assembler requires, as indicated by
        !           617: dnl  WANT_SHLDL_CL in config.m4.  For example,
        !           618: dnl
        !           619: dnl         shldl(  %cl, %eax, %ebx)
        !           620: dnl
        !           621: dnl  turns into either
        !           622: dnl
        !           623: dnl         shldl   %cl, %eax, %ebx
        !           624: dnl  or
        !           625: dnl         shldl   %eax, %ebx
        !           626: dnl
        !           627: dnl  Immediate counts are always passed through unchanged.  For example,
        !           628: dnl
        !           629: dnl         shrdl(  $2, %esi, %edi)
        !           630: dnl  becomes
        !           631: dnl         shrdl   $2, %esi, %edi
        !           632: dnl
        !           633: dnl
        !           634: dnl  If you forget to use the macro form "shldl( ...)" and instead write
        !           635: dnl  just a plain "shldl ...", an error results.  This ensures the necessary
        !           636: dnl  variant treatment of %cl isn't accidentally bypassed.
        !           637:
        !           638: define(define_shd_instruction,
        !           639: `define($1,
        !           640: m4_instruction_wrapper()
        !           641: m4_assert_numargs(3)
        !           642: `shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
        !           643: m4_doublequote($`'2),m4_doublequote($`'3)))')
        !           644:
        !           645: dnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
        !           646: define_shd_instruction(shldl)
        !           647: define_shd_instruction(shrdl)
        !           648: define_shd_instruction(shldw)
        !           649: define_shd_instruction(shrdw)
        !           650:
        !           651: dnl  Called: shd_instruction(op,count,src,dst)
        !           652: define(shd_instruction,
        !           653: m4_assert_numargs(4)
        !           654: m4_assert_defined(`WANT_SHLDL_CL')
        !           655: `ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
        !           656: ``$1'  `$3', `$4'',
        !           657: ``$1'  `$2', `$3', `$4'')')
        !           658:
        !           659:
        !           660: dnl  Usage: ASSERT(cond, instructions)
        !           661: dnl
        !           662: dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
        !           663: dnl  flags condition to then be satisfied.  For example,
        !           664: dnl
        !           665: dnl         ASSERT(ne, `cmpl %eax, %ebx')
        !           666: dnl
        !           667: dnl  The instructions can be omitted to just assert a flags condition with
        !           668: dnl  no extra calculation.  For example,
        !           669: dnl
        !           670: dnl         ASSERT(nc)
        !           671: dnl
        !           672: dnl  When `instructions' is not empty, a pushf/popf is added to preserve the
        !           673: dnl  flags, but the instructions themselves must preserve any registers that
        !           674: dnl  matter.  FRAME is adjusted for the push and pop, so the instructions
        !           675: dnl  given can use defframe() stack variables.
        !           676:
        !           677: define(ASSERT,
        !           678: m4_assert_numargs_range(1,2)
        !           679: `ifelse(WANT_ASSERT,1,
        !           680:        `C ASSERT
        !           681: ifelse(`$2',,,`        pushf   ifdef(`FRAME',`FRAME_pushl()')')
        !           682:        $2
        !           683:        j`$1'   1f
        !           684:        ud2     C assertion failed
        !           685: 1:
        !           686: ifelse(`$2',,,`        popf    ifdef(`FRAME',`FRAME_popl()')')
        !           687: ')')
        !           688:
        !           689:
        !           690: dnl  Usage: movl_text_address(label,register)
        !           691: dnl
        !           692: dnl  Get the address of a text segment label, using either a plain movl or a
        !           693: dnl  position-independent calculation, as necessary.  For example,
        !           694: dnl
        !           695: dnl         movl_code_address(L(foo),%eax)
        !           696: dnl
        !           697: dnl  This macro is only meant for use in ASSERT()s or when testing, since
        !           698: dnl  the PIC sequence it generates will want to be done with a ret balancing
        !           699: dnl  the call on CPUs with return address branch predition.
        !           700: dnl
        !           701: dnl  The addl generated here has a backward reference to 1b, and so won't
        !           702: dnl  suffer from the two forwards references bug in old gas (described in
        !           703: dnl  mpn/x86/README.family).
        !           704:
        !           705: define(movl_text_address,
        !           706: `ifdef(`PIC',
        !           707:        `call   1f
        !           708: 1:     popl    $2      C %eip
        !           709:        addl    `$'$1-1b, $2',
        !           710:        `movl   `$'$1, $2')')
        !           711:
        !           712:
        !           713: divert`'dnl

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>