Annotation of OpenXM_contrib/gmp/mpn/x86/x86-defs.m4, Revision 1.1.1.1
1.1 maekawa 1: divert(-1)
2:
3: dnl m4 macros for x86 assembler.
4:
5:
6: dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
7: dnl
8: dnl This file is part of the GNU MP Library.
9: dnl
10: dnl The GNU MP Library is free software; you can redistribute it and/or
11: dnl modify it under the terms of the GNU Lesser General Public License as
12: dnl published by the Free Software Foundation; either version 2.1 of the
13: dnl License, or (at your option) any later version.
14: dnl
15: dnl The GNU MP Library is distributed in the hope that it will be useful,
16: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
17: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18: dnl Lesser General Public License for more details.
19: dnl
20: dnl You should have received a copy of the GNU Lesser General Public
21: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
22: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
23: dnl Suite 330, Boston, MA 02111-1307, USA.
24:
25:
26: dnl Notes:
27: dnl
28: dnl m4 isn't perfect for processing BSD style x86 assembler code, the main
29: dnl problems are,
30: dnl
31: dnl 1. Doing define(foo,123) and then using foo in an addressing mode like
32: dnl foo(%ebx) expands as a macro rather than a constant. This is worked
33: dnl around by using deflit() from asm-defs.m4, instead of define().
34: dnl
35: dnl 2. Immediates in macro definitions need a space or `' to stop the $
36: dnl looking like a macro parameter. For example,
37: dnl
38: dnl define(foo, `mov $ 123, %eax')
39: dnl
40: dnl This is only a problem in macro definitions, not in ordinary text,
41: dnl nor in macro parameters like text passed to forloop() or ifdef().
42:
43:
44: deflit(BYTES_PER_MP_LIMB, 4)
45:
46:
47: dnl --------------------------------------------------------------------------
48: dnl Replacement PROLOGUE/EPILOGUE with more sophisticated error checking.
49: dnl Nesting and overlapping not allowed.
50: dnl
51:
52:
53: dnl Usage: PROLOGUE(functionname)
54: dnl
55: dnl Generate a function prologue. functionname gets GSYM_PREFIX added.
56: dnl Examples,
57: dnl
58: dnl PROLOGUE(mpn_add_n)
59: dnl PROLOGUE(somefun)
60:
61: define(`PROLOGUE',
62: m4_assert_numargs(1)
63: m4_assert_defined(`PROLOGUE_cpu')
64: `ifdef(`PROLOGUE_current_function',
65: `m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1)
66: )')dnl
67: m4_file_seen()dnl
68: define(`PROLOGUE_current_function',`$1')dnl
69: PROLOGUE_cpu(GSYM_PREFIX`'$1)')
70:
71:
72: dnl Usage: EPILOGUE()
73: dnl
74: dnl Notice the function name is passed to EPILOGUE_cpu(), letting it use $1
75: dnl instead of the long PROLOGUE_current_function symbol.
76:
77: define(`EPILOGUE',
78: m4_assert_numargs(0)
79: m4_assert_defined(`EPILOGUE_cpu')
80: `ifdef(`PROLOGUE_current_function',,
81: `m4_error(`EPILOGUE'() with no `PROLOGUE'()
82: )')dnl
83: EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl
84: undefine(`PROLOGUE_current_function')')
85:
86: m4wrap_prepend(
87: `ifdef(`PROLOGUE_current_function',
88: `m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen
89: ')')')
90:
91:
92: dnl Usage: PROLOGUE_assert_inside()
93: dnl
94: dnl Use this unquoted on a line on its own at the start of a macro
95: dnl definition to add some code to check the macro is only used inside a
96: dnl PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is
97: dnl defined.
98:
99: define(PROLOGUE_assert_inside,
100: m4_assert_numargs(0)
101: ``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '')
102:
103: define(PROLOGUE_assert_inside_internal,
104: m4_assert_numargs(1)
105: `ifdef(`PROLOGUE_current_function',,
106: `m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair
107: ')')')
108:
109:
110: dnl Usage: L(labelname)
111: dnl LF(functionname,labelname)
112: dnl
113: dnl Generate a local label in the current or given function. For LF(),
114: dnl functionname gets GSYM_PREFIX added, the same as with PROLOGUE().
115: dnl
116: dnl For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn),
117: dnl
118: dnl L(bar) => L__gmpn_add_n__bar
119: dnl LF(somefun,bar) => Lsomefun__bar
120: dnl
121: dnl The funtion name and label name get two underscores between them rather
122: dnl than one to guard against clashing with a separate external symbol that
123: dnl happened to be called functionname_labelname. (Though this would only
124: dnl happen if the local label prefix is is empty.) Underscores are used so
125: dnl the whole label will still be a valid C identifier and so can be easily
126: dnl used in gdb.
127:
128: dnl LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the
129: dnl L macro and making an infinite recursion.
130: define(LF,
131: m4_assert_numargs(2)
132: m4_assert_defined(`LSYM_PREFIX')
133: `defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2')
134:
135: define(`L',
136: m4_assert_numargs(1)
137: PROLOGUE_assert_inside()
138: `LF(PROLOGUE_current_function,`$1')')
139:
140:
141: dnl Called: PROLOGUE_cpu(gsym)
142: dnl EPILOGUE_cpu(gsym)
143:
144: define(PROLOGUE_cpu,
145: m4_assert_numargs(1)
146: `GLOBL $1
147: TYPE($1,`function')
148: $1:')
149:
150: define(EPILOGUE_cpu,
151: m4_assert_numargs(1)
152: ` SIZE($1,.-$1)')
153:
154:
155:
156: dnl --------------------------------------------------------------------------
157: dnl Various x86 macros.
158: dnl
159:
160:
161: dnl Usage: ALIGN_OFFSET(bytes,offset)
162: dnl
163: dnl Align to `offset' away from a multiple of `bytes'.
164: dnl
165: dnl This is useful for testing, for example align to something very strict
166: dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
167: dnl
168: dnl Generally you wouldn't execute across the padding, but it's done with
169: dnl nop's so it'll work.
170:
171: define(ALIGN_OFFSET,
172: m4_assert_numargs(2)
173: `ALIGN($1)
174: forloop(`i',1,$2,` nop
175: ')')
176:
177:
178: dnl Usage: defframe(name,offset)
179: dnl
180: dnl Make a definition like the following with which to access a parameter
181: dnl or variable on the stack.
182: dnl
183: dnl define(name,`FRAME+offset(%esp)')
184: dnl
185: dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
186: dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
187: dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
188: dnl zero offset is wanted.
189: dnl
190: dnl The new macro also gets a check that when it's used FRAME is actually
191: dnl defined, and that the final %esp offset isn't negative, which would
192: dnl mean an attempt to access something below the current %esp.
193: dnl
194: dnl deflit() is used rather than a plain define(), so the new macro won't
195: dnl delete any following parenthesized expression. name(%edi) will come
196: dnl out say as 16(%esp)(%edi). This isn't valid assembler and should
197: dnl provoke an error, which is better than silently giving just 16(%esp).
198: dnl
199: dnl See README.family for more on the suggested way to access the stack
200: dnl frame.
201:
202: define(defframe,
203: m4_assert_numargs(2)
204: `deflit(`$1',
205: m4_assert_defined(`FRAME')
206: `defframe_check_notbelow(`$1',$2,FRAME)dnl
207: defframe_empty_if_zero(FRAME+($2))(%esp)')')
208:
209: dnl Called: defframe_empty_if_zero(expression)
210: define(defframe_empty_if_zero,
211: `ifelse(defframe_empty_if_zero_disabled,1,
212: `eval($1)',
213: `m4_empty_if_zero($1)')')
214:
215: dnl Called: defframe_check_notbelow(`name',offset,FRAME)
216: define(defframe_check_notbelow,
217: m4_assert_numargs(3)
218: `ifelse(eval(($3)+($2)<0),1,
219: `m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
220: ')')')
221:
222:
223: dnl Usage: FRAME_pushl()
224: dnl FRAME_popl()
225: dnl FRAME_addl_esp(n)
226: dnl FRAME_subl_esp(n)
227: dnl
228: dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
229: dnl %esp of n bytes.
230: dnl
231: dnl Using these macros is completely optional. Sometimes it makes more
232: dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
233: dnl jumps and different sequences of FRAME values need to be used in
234: dnl different places.
235:
236: define(FRAME_pushl,
237: m4_assert_numargs(0)
238: m4_assert_defined(`FRAME')
239: `deflit(`FRAME',eval(FRAME+4))')
240:
241: define(FRAME_popl,
242: m4_assert_numargs(0)
243: m4_assert_defined(`FRAME')
244: `deflit(`FRAME',eval(FRAME-4))')
245:
246: define(FRAME_addl_esp,
247: m4_assert_numargs(1)
248: m4_assert_defined(`FRAME')
249: `deflit(`FRAME',eval(FRAME-($1)))')
250:
251: define(FRAME_subl_esp,
252: m4_assert_numargs(1)
253: m4_assert_defined(`FRAME')
254: `deflit(`FRAME',eval(FRAME+($1)))')
255:
256:
257: dnl Usage: defframe_pushl(name)
258: dnl
259: dnl Do a combination of a FRAME_pushl() and a defframe() to name the stack
260: dnl location just pushed. This should come after a pushl instruction.
261: dnl Putting it on the same line works and avoids lengthening the code. For
262: dnl example,
263: dnl
264: dnl pushl %eax defframe_pushl(VAR_COUNTER)
265: dnl
266: dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
267: dnl current value without tracking future changes.
268:
269: define(defframe_pushl,
270: `FRAME_pushl()defframe(`$1',-FRAME)')
271:
272:
273: dnl --------------------------------------------------------------------------
274: dnl Assembler instruction macros.
275: dnl
276:
277:
278: dnl Usage: emms_or_femms
279: dnl femms_available_p
280: dnl
281: dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
282: dnl femms instruction is available. emms_or_femms expands to femms if
283: dnl available, or emms if not.
284: dnl
285: dnl emms_or_femms is meant for use in the K6 directory where plain K6
286: dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
287: dnl supported together.
288: dnl
289: dnl On K7 femms is no longer faster and is just an alias for emms, so plain
290: dnl emms may as well be used.
291:
292: define(femms_available_p,
293: m4_assert_numargs(-1)
294: `m4_ifdef_anyof_p(
295: `HAVE_TARGET_CPU_k62',
296: `HAVE_TARGET_CPU_k63',
297: `HAVE_TARGET_CPU_athlon')')
298:
299: define(emms_or_femms,
300: m4_assert_numargs(-1)
301: `ifelse(femms_available_p,1,`femms',`emms')')
302:
303:
304: dnl Usage: femms
305: dnl
306: dnl The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the
307: dnl following is a replacement using .byte.
308: dnl
309: dnl If femms isn't available, an emms is generated instead, for convenience
310: dnl when testing on a machine without femms.
311:
312: define(femms,
313: m4_assert_numargs(-1)
314: `ifelse(femms_available_p,1,
315: `.byte 15,14 C AMD 3DNow femms',
316: `emms`'dnl
317: m4_warning(`warning, using emms in place of femms, use for testing only
318: ')')')
319:
320:
321: dnl Usage: jadcl0(op)
322: dnl
323: dnl Issue a jnc/incl as a substitute for adcl $0,op. This isn't an exact
324: dnl replacement, since it doesn't set the flags like adcl does.
325: dnl
326: dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
327: dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch
328: dnl misprediction penalty is small, and the multiply algorithm used leads
329: dnl to a carry bit on average only 1/4 of the time.
330: dnl
331: dnl jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for
332: dnl comparison. For example,
333: dnl
334: dnl define(`jadcl0_disabled',1)
335: dnl
336: dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
337: dnl the same size as an adcl. This makes it possible to use the exact same
338: dnl computed jump code when testing the relative speed of jnc/incl and adcl
339: dnl with jadcl0_disabled.
340:
341: define(jadcl0,
342: m4_assert_numargs(1)
343: `ifelse(jadcl0_disabled,1,
344: `adcl $`'0, $1',
345: `jnc 1f
346: incl $1
347: 1:dnl')')
348:
349:
350: dnl Usage: cmov_available_p
351: dnl
352: dnl Expand to 1 if cmov is available, 0 if not.
353:
354: define(cmov_available_p,
355: `m4_ifdef_anyof_p(
356: `HAVE_TARGET_CPU_pentiumpro',
357: `HAVE_TARGET_CPU_pentium2',
358: `HAVE_TARGET_CPU_pentium3',
359: `HAVE_TARGET_CPU_athlon')')
360:
361:
362: dnl Usage: x86_lookup(target, key,value, key,value, ...)
363: dnl x86_lookup_p(target, key,value, key,value, ...)
364: dnl
365: dnl Look for `target' among the `key' parameters.
366: dnl
367: dnl x86_lookup expands to the corresponding `value', or generates an error
368: dnl if `target' isn't found.
369: dnl
370: dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not.
371:
372: define(x86_lookup,
373: `ifelse(eval($#<3),1,
374: `m4_error(`unrecognised part of x86 instruction: $1
375: ')',
376: `ifelse(`$1',`$2', `$3',
377: `x86_lookup(`$1',shift(shift(shift($@))))')')')
378:
379: define(x86_lookup_p,
380: `ifelse(eval($#<3),1, `0',
381: `ifelse(`$1',`$2', `1',
382: `x86_lookup_p(`$1',shift(shift(shift($@))))')')')
383:
384:
385: dnl Usage: x86_opcode_reg32(reg)
386: dnl x86_opcode_reg32_p(reg)
387: dnl
388: dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given
389: dnl 32-bit register, eg. `%ebp' turns into 5.
390: dnl
391: dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
392: dnl if not.
393:
394: define(x86_opcode_reg32,
395: m4_assert_numargs(1)
396: `x86_lookup(`$1',x86_opcode_reg32_list)')
397:
398: define(x86_opcode_reg32_p,
399: m4_assert_onearg()
400: `x86_lookup_p(`$1',x86_opcode_reg32_list)')
401:
402: define(x86_opcode_reg32_list,
403: ``%eax',0,
404: `%ecx',1,
405: `%edx',2,
406: `%ebx',3,
407: `%esp',4,
408: `%ebp',5,
409: `%esi',6,
410: `%edi',7')
411:
412:
413: dnl Usage: x86_opcode_tttn(cond)
414: dnl
415: dnl Expand to the 4-bit "tttn" field value for the given x86 branch
416: dnl condition (like `c', `ae', etc).
417:
418: define(x86_opcode_tttn,
419: m4_assert_numargs(1)
420: `x86_lookup(`$1',x86_opcode_ttn_list)')
421:
422: define(x86_opcode_tttn_list,
423: ``o', 0,
424: `no', 1,
425: `b', 2, `c', 2, `nae',2,
426: `nb', 3, `nc', 3, `ae', 3,
427: `e', 4, `z', 4,
428: `ne', 5, `nz', 5,
429: `be', 6, `na', 6,
430: `nbe', 7, `a', 7,
431: `s', 8,
432: `ns', 9,
433: `p', 10, `pe', 10, `npo',10,
434: `np', 11, `npe',11, `po', 11,
435: `l', 12, `nge',12,
436: `nl', 13, `ge', 13,
437: `le', 14, `ng', 14,
438: `nle',15, `g', 15')
439:
440:
441: dnl Usage: cmovCC(srcreg,dstreg)
442: dnl
443: dnl Generate a cmov instruction if the target supports cmov, or simulate it
444: dnl with a conditional jump if not (the latter being meant only for
445: dnl testing). For example,
446: dnl
447: dnl cmovz( %eax, %ebx)
448: dnl
449: dnl cmov instructions are generated using .byte sequences, since only
450: dnl recent versions of gas know cmov.
451: dnl
452: dnl The source operand can only be a plain register. (m4 code implementing
453: dnl full memory addressing modes exists, believe it or not, but isn't
454: dnl currently needed and isn't included.)
455: dnl
456: dnl All the standard conditions are defined. Attempting to use one without
457: dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
458: dnl an error. This ensures the necessary .byte sequences aren't
459: dnl accidentally missed.
460:
461: dnl Called: define_cmov_many(cond,tttn,cond,tttn,...)
462: define(define_cmov_many,
463: `ifelse(m4_length(`$1'),0,,
464: `define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
465:
466: dnl Called: define_cmov(cond,tttn)
467: define(define_cmov,
468: m4_assert_numargs(2)
469: `define(`cmov$1',
470: m4_instruction_wrapper()
471: m4_assert_numargs(2)
472: `cmov_internal'(m4_doublequote($`'0),``$1',`$2'',dnl
473: m4_doublequote($`'1),m4_doublequote($`'2)))')
474:
475: define_cmov_many(x86_opcode_tttn_list)
476:
477:
478: dnl Called: cmov_internal(name,cond,tttn,src,dst)
479: define(cmov_internal,
480: m4_assert_numargs(5)
481: `ifelse(cmov_available_p,1,
482: `cmov_bytes_tttn(`$1',`$3',`$4',`$5')',
483: `m4_warning(`warning, simulating cmov with jump, use for testing only
484: ')cmov_simulate(`$2',`$4',`$5')')')
485:
486: dnl Called: cmov_simulate(cond,src,dst)
487: dnl If this is going to be used with memory operands for the source it will
488: dnl need to be changed to do a fetch even if the condition is false, so as
489: dnl to trigger exceptions the same way a real cmov does.
490: define(cmov_simulate,
491: m4_assert_numargs(3)
492: `j$1 1f C cmov$1 $2, $3
493: jmp 2f
494: 1: movl $2, $3
495: 2:')
496:
497: dnl Called: cmov_bytes_tttn(name,tttn,src,dst)
498: define(cmov_bytes_tttn,
499: m4_assert_numargs(4)
500: `.byte dnl
501: 15, dnl
502: eval(64+$2), dnl
503: eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
504: C `$1 $3, $4'')
505:
506:
507: dnl Usage: loop_or_decljnz label
508: dnl
509: dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever
510: dnl is better. "loop" is better on K6 and probably on 386, on other chips
511: dnl separate decl/jnz is better.
512: dnl
513: dnl This macro is just for mpn/x86/divrem_1.asm and mpn/x86/mod_1.asm where
514: dnl this loop_or_decljnz variation is enough to let the code be shared by
515: dnl all chips.
516:
517: define(loop_or_decljnz,
518: `ifelse(loop_is_better_p,1,
519: `loop',
520: `decl %ecx
521: jnz')')
522:
523: define(loop_is_better_p,
524: `m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6',
525: `HAVE_TARGET_CPU_k62',
526: `HAVE_TARGET_CPU_k63',
527: `HAVE_TARGET_CPU_i386')')
528:
529:
530: dnl Usage: Zdisp(inst,op,op,op)
531: dnl
532: dnl Generate explicit .byte sequences if necessary to force a byte-sized
533: dnl zero displacement on an instruction. For example,
534: dnl
535: dnl Zdisp( movl, 0,(%esi), %eax)
536: dnl
537: dnl expands to
538: dnl
539: dnl .byte 139,70,0 C movl 0(%esi), %eax
540: dnl
541: dnl If the displacement given isn't 0, then normal assembler code is
542: dnl generated. For example,
543: dnl
544: dnl Zdisp( movl, 4,(%esi), %eax)
545: dnl
546: dnl expands to
547: dnl
548: dnl movl 4(%esi), %eax
549: dnl
550: dnl This means a single Zdisp() form can be used with an expression for the
551: dnl displacement, and .byte will be used only if necessary. The
552: dnl displacement argument is eval()ed.
553: dnl
554: dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is
555: dnl implemented with a table of instructions and encodings. A new entry is
556: dnl needed for any different operation or registers.
557:
558: define(Zdisp,
559: `define(`Zdisp_found',0)dnl
560: Zdisp_match( movl, %eax, 0,(%edi), `137,71,0', $@)`'dnl
561: Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0', $@)`'dnl
562: Zdisp_match( movl, %esi, 0,(%edi), `137,119,0', $@)`'dnl
563: Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0', $@)`'dnl
564: Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0', $@)`'dnl
565: Zdisp_match( movl, 0,(%esi), %eax, `139,70,0', $@)`'dnl
566: Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
567: Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0', $@)`'dnl
568: Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0', $@)`'dnl
569: Zdisp_match( addl, %esi, 0,(%edi), `1,119,0', $@)`'dnl
570: Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0', $@)`'dnl
571: Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0', $@)`'dnl
572: Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0', $@)`'dnl
573: Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
574: Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
575: Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
576: Zdisp_match( movq, 0,(%esi), %mm0, `15,111,70,0', $@)`'dnl
577: Zdisp_match( movq, %mm0, 0,(%edi), `15,127,71,0', $@)`'dnl
578: Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
579: Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
580: Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
581: Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
582: Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
583: Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
584: Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
585: Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
586: ifelse(Zdisp_found,0,
587: `m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
588: ')')')
589:
590: define(Zdisp_match,
591: `ifelse(eval(m4_stringequal_p(`$1',`$6')
592: && m4_stringequal_p(`$2',0)
593: && m4_stringequal_p(`$3',`$8')
594: && m4_stringequal_p(`$4',`$9')),1,
595: `define(`Zdisp_found',1)dnl
596: ifelse(eval(`$7'),0,
597: ` .byte $5 C `$1 0$3, $4'',
598: ` $6 $7$8, $9')',
599:
600: `ifelse(eval(m4_stringequal_p(`$1',`$6')
601: && m4_stringequal_p(`$2',`$7')
602: && m4_stringequal_p(`$3',0)
603: && m4_stringequal_p(`$4',`$9')),1,
604: `define(`Zdisp_found',1)dnl
605: ifelse(eval(`$8'),0,
606: ` .byte $5 C `$1 $2, 0$4'',
607: ` $6 $7, $8$9')')')')
608:
609:
610: dnl Usage: shldl(count,src,dst)
611: dnl shrdl(count,src,dst)
612: dnl shldw(count,src,dst)
613: dnl shrdw(count,src,dst)
614: dnl
615: dnl Generate a double-shift instruction, possibly omitting a %cl count
616: dnl parameter if that's what the assembler requires, as indicated by
617: dnl WANT_SHLDL_CL in config.m4. For example,
618: dnl
619: dnl shldl( %cl, %eax, %ebx)
620: dnl
621: dnl turns into either
622: dnl
623: dnl shldl %cl, %eax, %ebx
624: dnl or
625: dnl shldl %eax, %ebx
626: dnl
627: dnl Immediate counts are always passed through unchanged. For example,
628: dnl
629: dnl shrdl( $2, %esi, %edi)
630: dnl becomes
631: dnl shrdl $2, %esi, %edi
632: dnl
633: dnl
634: dnl If you forget to use the macro form "shldl( ...)" and instead write
635: dnl just a plain "shldl ...", an error results. This ensures the necessary
636: dnl variant treatment of %cl isn't accidentally bypassed.
637:
638: define(define_shd_instruction,
639: `define($1,
640: m4_instruction_wrapper()
641: m4_assert_numargs(3)
642: `shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
643: m4_doublequote($`'2),m4_doublequote($`'3)))')
644:
645: dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
646: define_shd_instruction(shldl)
647: define_shd_instruction(shrdl)
648: define_shd_instruction(shldw)
649: define_shd_instruction(shrdw)
650:
651: dnl Called: shd_instruction(op,count,src,dst)
652: define(shd_instruction,
653: m4_assert_numargs(4)
654: m4_assert_defined(`WANT_SHLDL_CL')
655: `ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
656: ``$1' `$3', `$4'',
657: ``$1' `$2', `$3', `$4'')')
658:
659:
660: dnl Usage: ASSERT(cond, instructions)
661: dnl
662: dnl If WANT_ASSERT is 1, output the given instructions and expect the given
663: dnl flags condition to then be satisfied. For example,
664: dnl
665: dnl ASSERT(ne, `cmpl %eax, %ebx')
666: dnl
667: dnl The instructions can be omitted to just assert a flags condition with
668: dnl no extra calculation. For example,
669: dnl
670: dnl ASSERT(nc)
671: dnl
672: dnl When `instructions' is not empty, a pushf/popf is added to preserve the
673: dnl flags, but the instructions themselves must preserve any registers that
674: dnl matter. FRAME is adjusted for the push and pop, so the instructions
675: dnl given can use defframe() stack variables.
676:
677: define(ASSERT,
678: m4_assert_numargs_range(1,2)
679: `ifelse(WANT_ASSERT,1,
680: `C ASSERT
681: ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
682: $2
683: j`$1' 1f
684: ud2 C assertion failed
685: 1:
686: ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
687: ')')
688:
689:
690: dnl Usage: movl_text_address(label,register)
691: dnl
692: dnl Get the address of a text segment label, using either a plain movl or a
693: dnl position-independent calculation, as necessary. For example,
694: dnl
695: dnl movl_code_address(L(foo),%eax)
696: dnl
697: dnl This macro is only meant for use in ASSERT()s or when testing, since
698: dnl the PIC sequence it generates will want to be done with a ret balancing
699: dnl the call on CPUs with return address branch predition.
700: dnl
701: dnl The addl generated here has a backward reference to 1b, and so won't
702: dnl suffer from the two forwards references bug in old gas (described in
703: dnl mpn/x86/README.family).
704:
705: define(movl_text_address,
706: `ifdef(`PIC',
707: `call 1f
708: 1: popl $2 C %eip
709: addl `$'$1-1b, $2',
710: `movl `$'$1, $2')')
711:
712:
713: divert`'dnl
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>