Annotation of OpenXM_contrib/gmp/mpn/x86/k6/aors_n.asm, Revision 1.1
1.1 ! maekawa 1: dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
! 2: dnl
! 3: dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb.
! 4:
! 5:
! 6: dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
! 7: dnl
! 8: dnl This file is part of the GNU MP Library.
! 9: dnl
! 10: dnl The GNU MP Library is free software; you can redistribute it and/or
! 11: dnl modify it under the terms of the GNU Lesser General Public License as
! 12: dnl published by the Free Software Foundation; either version 2.1 of the
! 13: dnl License, or (at your option) any later version.
! 14: dnl
! 15: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 16: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 18: dnl Lesser General Public License for more details.
! 19: dnl
! 20: dnl You should have received a copy of the GNU Lesser General Public
! 21: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 22: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 23: dnl Suite 330, Boston, MA 02111-1307, USA.
! 24:
! 25:
! 26: include(`../config.m4')
! 27:
! 28:
! 29: ifdef(`OPERATION_add_n', `
! 30: define(M4_inst, adcl)
! 31: define(M4_function_n, mpn_add_n)
! 32: define(M4_function_nc, mpn_add_nc)
! 33: define(M4_description, add)
! 34: ',`ifdef(`OPERATION_sub_n', `
! 35: define(M4_inst, sbbl)
! 36: define(M4_function_n, mpn_sub_n)
! 37: define(M4_function_nc, mpn_sub_nc)
! 38: define(M4_description, subtract)
! 39: ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
! 40: ')')')
! 41:
! 42: MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
! 43:
! 44:
! 45: C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
! 46: C mp_size_t size);
! 47: C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
! 48: C mp_size_t size, mp_limb_t carry);
! 49: C
! 50: C Calculate src1,size M4_description src2,size, and store the result in
! 51: C dst,size. The return value is the carry bit from the top of the result
! 52: C (1 or 0).
! 53: C
! 54: C The _nc version accepts 1 or 0 for an initial carry into the low limb of
! 55: C the calculation. Note values other than 1 or 0 here will lead to garbage
! 56: C results.
! 57: C
! 58: C Instruction decoding limits a normal dst=src1+src2 operation to 3 c/l, and
! 59: C an in-place dst+=src to 2.5 c/l. The unrolled loops have 1 cycle/loop of
! 60: C loop control, which with 4 limbs/loop means an extra 0.25 c/l.
! 61:
! 62: define(PARAM_CARRY, `FRAME+20(%esp)')
! 63: define(PARAM_SIZE, `FRAME+16(%esp)')
! 64: define(PARAM_SRC2, `FRAME+12(%esp)')
! 65: define(PARAM_SRC1, `FRAME+8(%esp)')
! 66: define(PARAM_DST, `FRAME+4(%esp)')
! 67: deflit(`FRAME',0)
! 68:
! 69: dnl minimum 5 because the unrolled code can't handle less
! 70: deflit(UNROLL_THRESHOLD, 5)
! 71:
! 72: .text
! 73: ALIGN(32)
! 74:
! 75: PROLOGUE(M4_function_nc)
! 76: movl PARAM_CARRY, %eax
! 77: jmp LF(M4_function_n,start)
! 78: EPILOGUE()
! 79:
! 80:
! 81: PROLOGUE(M4_function_n)
! 82: xorl %eax, %eax
! 83: L(start):
! 84: movl PARAM_SIZE, %ecx
! 85: pushl %ebx
! 86: FRAME_pushl()
! 87:
! 88: movl PARAM_SRC1, %ebx
! 89: pushl %edi
! 90: FRAME_pushl()
! 91:
! 92: movl PARAM_SRC2, %edx
! 93: cmpl $UNROLL_THRESHOLD, %ecx
! 94:
! 95: movl PARAM_DST, %edi
! 96: jae L(unroll)
! 97:
! 98:
! 99: shrl %eax C initial carry flag
! 100:
! 101: C offset 0x21 here, close enough to aligned
! 102: L(simple):
! 103: C eax scratch
! 104: C ebx src1
! 105: C ecx counter
! 106: C edx src2
! 107: C esi
! 108: C edi dst
! 109: C ebp
! 110: C
! 111: C The store to (%edi) could be done with a stosl; it'd be smaller
! 112: C code, but there's no speed gain and a cld would have to be added
! 113: C (per mpn/x86/README.family).
! 114:
! 115: movl (%ebx), %eax
! 116: leal 4(%ebx), %ebx
! 117:
! 118: M4_inst (%edx), %eax
! 119:
! 120: movl %eax, (%edi)
! 121: leal 4(%edi), %edi
! 122:
! 123: leal 4(%edx), %edx
! 124: loop L(simple)
! 125:
! 126:
! 127: movl $0, %eax
! 128: popl %edi
! 129:
! 130: setc %al
! 131:
! 132: popl %ebx
! 133: ret
! 134:
! 135:
! 136: C -----------------------------------------------------------------------------
! 137: L(unroll):
! 138: C eax carry
! 139: C ebx src1
! 140: C ecx counter
! 141: C edx src2
! 142: C esi
! 143: C edi dst
! 144: C ebp
! 145:
! 146: cmpl %edi, %ebx
! 147: pushl %esi
! 148:
! 149: je L(inplace)
! 150:
! 151: ifdef(`OPERATION_add_n',`
! 152: cmpl %edi, %edx
! 153:
! 154: je L(inplace_reverse)
! 155: ')
! 156:
! 157: movl %ecx, %esi
! 158:
! 159: andl $-4, %ecx
! 160: andl $3, %esi
! 161:
! 162: leal (%ebx,%ecx,4), %ebx
! 163: leal (%edx,%ecx,4), %edx
! 164: leal (%edi,%ecx,4), %edi
! 165:
! 166: negl %ecx
! 167: shrl %eax
! 168:
! 169: ALIGN(32)
! 170: L(normal_top):
! 171: C eax counter, qwords, negative
! 172: C ebx src1
! 173: C ecx scratch
! 174: C edx src2
! 175: C esi
! 176: C edi dst
! 177: C ebp
! 178:
! 179: movl (%ebx,%ecx,4), %eax
! 180: leal 5(%ecx), %ecx
! 181: M4_inst -20(%edx,%ecx,4), %eax
! 182: movl %eax, -20(%edi,%ecx,4)
! 183:
! 184: movl 4-20(%ebx,%ecx,4), %eax
! 185: M4_inst 4-20(%edx,%ecx,4), %eax
! 186: movl %eax, 4-20(%edi,%ecx,4)
! 187:
! 188: movl 8-20(%ebx,%ecx,4), %eax
! 189: M4_inst 8-20(%edx,%ecx,4), %eax
! 190: movl %eax, 8-20(%edi,%ecx,4)
! 191:
! 192: movl 12-20(%ebx,%ecx,4), %eax
! 193: M4_inst 12-20(%edx,%ecx,4), %eax
! 194: movl %eax, 12-20(%edi,%ecx,4)
! 195:
! 196: loop L(normal_top)
! 197:
! 198:
! 199: decl %esi
! 200: jz L(normal_finish_one)
! 201: js L(normal_done)
! 202:
! 203: C two or three more limbs
! 204:
! 205: movl (%ebx), %eax
! 206: M4_inst (%edx), %eax
! 207: movl %eax, (%edi)
! 208:
! 209: movl 4(%ebx), %eax
! 210: M4_inst 4(%edx), %eax
! 211: decl %esi
! 212: movl %eax, 4(%edi)
! 213:
! 214: jz L(normal_done)
! 215: movl $2, %ecx
! 216:
! 217: L(normal_finish_one):
! 218: movl (%ebx,%ecx,4), %eax
! 219: M4_inst (%edx,%ecx,4), %eax
! 220: movl %eax, (%edi,%ecx,4)
! 221:
! 222: L(normal_done):
! 223: popl %esi
! 224: popl %edi
! 225:
! 226: movl $0, %eax
! 227: popl %ebx
! 228:
! 229: setc %al
! 230:
! 231: ret
! 232:
! 233:
! 234: C -----------------------------------------------------------------------------
! 235:
! 236: ifdef(`OPERATION_add_n',`
! 237: L(inplace_reverse):
! 238: C dst==src2
! 239:
! 240: movl %ebx, %edx
! 241: ')
! 242:
! 243: L(inplace):
! 244: C eax initial carry
! 245: C ebx
! 246: C ecx size
! 247: C edx src
! 248: C esi
! 249: C edi dst
! 250: C ebp
! 251:
! 252: leal -1(%ecx), %esi
! 253: decl %ecx
! 254:
! 255: andl $-4, %ecx
! 256: andl $3, %esi
! 257:
! 258: movl (%edx), %ebx C src low limb
! 259: leal (%edx,%ecx,4), %edx
! 260:
! 261: leal (%edi,%ecx,4), %edi
! 262: negl %ecx
! 263:
! 264: shrl %eax
! 265:
! 266:
! 267: ALIGN(32)
! 268: L(inplace_top):
! 269: C eax
! 270: C ebx next src limb
! 271: C ecx size
! 272: C edx src
! 273: C esi
! 274: C edi dst
! 275: C ebp
! 276:
! 277: M4_inst %ebx, (%edi,%ecx,4)
! 278:
! 279: movl 4(%edx,%ecx,4), %eax
! 280: leal 5(%ecx), %ecx
! 281:
! 282: M4_inst %eax, 4-20(%edi,%ecx,4)
! 283:
! 284: movl 8-20(%edx,%ecx,4), %eax
! 285: movl 12-20(%edx,%ecx,4), %ebx
! 286:
! 287: M4_inst %eax, 8-20(%edi,%ecx,4)
! 288: M4_inst %ebx, 12-20(%edi,%ecx,4)
! 289:
! 290: movl 16-20(%edx,%ecx,4), %ebx
! 291: loop L(inplace_top)
! 292:
! 293:
! 294: C now %esi is 0 to 3 representing respectively 1 to 4 limbs more
! 295:
! 296: M4_inst %ebx, (%edi)
! 297:
! 298: decl %esi
! 299: jz L(inplace_finish_one)
! 300: js L(inplace_done)
! 301:
! 302: C two or three more limbs
! 303:
! 304: movl 4(%edx), %eax
! 305: movl 8(%edx), %ebx
! 306: M4_inst %eax, 4(%edi)
! 307: M4_inst %ebx, 8(%edi)
! 308:
! 309: decl %esi
! 310: movl $2, %ecx
! 311:
! 312: jz L(normal_done)
! 313:
! 314: L(inplace_finish_one):
! 315: movl 4(%edx,%ecx,4), %eax
! 316: M4_inst %eax, 4(%edi,%ecx,4)
! 317:
! 318: L(inplace_done):
! 319: popl %esi
! 320: popl %edi
! 321:
! 322: movl $0, %eax
! 323: popl %ebx
! 324:
! 325: setc %al
! 326:
! 327: ret
! 328:
! 329: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>