Annotation of OpenXM_contrib/gmp/mpn/x86/mul_basecase.asm, Revision 1.1
1.1 ! maekawa 1: dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
! 2: dnl in a third limb vector.
! 3:
! 4:
! 5: dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation,
! 6: dnl Inc.
! 7: dnl
! 8: dnl This file is part of the GNU MP Library.
! 9: dnl
! 10: dnl The GNU MP Library is free software; you can redistribute it and/or
! 11: dnl modify it under the terms of the GNU Lesser General Public License as
! 12: dnl published by the Free Software Foundation; either version 2.1 of the
! 13: dnl License, or (at your option) any later version.
! 14: dnl
! 15: dnl The GNU MP Library is distributed in the hope that it will be useful,
! 16: dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 18: dnl Lesser General Public License for more details.
! 19: dnl
! 20: dnl You should have received a copy of the GNU Lesser General Public
! 21: dnl License along with the GNU MP Library; see the file COPYING.LIB. If
! 22: dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
! 23: dnl Suite 330, Boston, MA 02111-1307, USA.
! 24:
! 25:
! 26: include(`../config.m4')
! 27:
! 28:
! 29: C void mpn_mul_basecase (mp_ptr wp,
! 30: C mp_srcptr xp, mp_size_t xsize,
! 31: C mp_srcptr yp, mp_size_t ysize);
! 32: C
! 33: C This was written in a haste since the Pentium optimized code that was used
! 34: C for all x86 machines was slow for the Pentium II. This code would benefit
! 35: C from some cleanup.
! 36: C
! 37: C To shave off some percentage of the run-time, one should make 4 variants
! 38: C of the Louter loop, for the four different outcomes of un mod 4. That
! 39: C would avoid Loop0 altogether. Code expansion would be > 4-fold for that
! 40: C part of the function, but since it is not very large, that would be
! 41: C acceptable.
! 42: C
! 43: C The mul loop (at L(oopM)) might need some tweaking. It's current speed is
! 44: C unknown.
! 45:
! 46: defframe(PARAM_YSIZE,20)
! 47: defframe(PARAM_YP, 16)
! 48: defframe(PARAM_XSIZE,12)
! 49: defframe(PARAM_XP, 8)
! 50: defframe(PARAM_WP, 4)
! 51:
! 52: defframe(VAR_MULTIPLIER, -4)
! 53: defframe(VAR_COUNTER, -8)
! 54: deflit(VAR_STACK_SPACE, 8)
! 55:
! 56: .text
! 57: ALIGN(8)
! 58:
! 59: PROLOGUE(mpn_mul_basecase)
! 60: deflit(`FRAME',0)
! 61:
! 62: subl $VAR_STACK_SPACE,%esp
! 63: pushl %esi
! 64: pushl %ebp
! 65: pushl %edi
! 66: deflit(`FRAME',eval(VAR_STACK_SPACE+12))
! 67:
! 68: movl PARAM_XP,%esi
! 69: movl PARAM_WP,%edi
! 70: movl PARAM_YP,%ebp
! 71:
! 72: movl (%esi),%eax C load xp[0]
! 73: mull (%ebp) C multiply by yp[0]
! 74: movl %eax,(%edi) C store to wp[0]
! 75: movl PARAM_XSIZE,%ecx C xsize
! 76: decl %ecx C If xsize = 1, ysize = 1 too
! 77: jz L(done)
! 78:
! 79: pushl %ebx
! 80: FRAME_pushl()
! 81: movl %edx,%ebx
! 82:
! 83: leal 4(%esi),%esi
! 84: leal 4(%edi),%edi
! 85:
! 86: L(oopM):
! 87: movl (%esi),%eax C load next limb at xp[j]
! 88: leal 4(%esi),%esi
! 89: mull (%ebp)
! 90: addl %ebx,%eax
! 91: movl %edx,%ebx
! 92: adcl $0,%ebx
! 93: movl %eax,(%edi)
! 94: leal 4(%edi),%edi
! 95: decl %ecx
! 96: jnz L(oopM)
! 97:
! 98: movl %ebx,(%edi) C most significant limb of product
! 99: addl $4,%edi C increment wp
! 100: movl PARAM_XSIZE,%eax
! 101: shll $2,%eax
! 102: subl %eax,%edi
! 103: subl %eax,%esi
! 104:
! 105: movl PARAM_YSIZE,%eax C ysize
! 106: decl %eax
! 107: jz L(skip)
! 108: movl %eax,VAR_COUNTER C set index i to ysize
! 109:
! 110: L(outer):
! 111: movl PARAM_YP,%ebp C yp
! 112: addl $4,%ebp C make ebp point to next v limb
! 113: movl %ebp,PARAM_YP
! 114: movl (%ebp),%eax C copy y limb ...
! 115: movl %eax,VAR_MULTIPLIER C ... to stack slot
! 116: movl PARAM_XSIZE,%ecx
! 117:
! 118: xorl %ebx,%ebx
! 119: andl $3,%ecx
! 120: jz L(end0)
! 121:
! 122: L(oop0):
! 123: movl (%esi),%eax
! 124: mull VAR_MULTIPLIER
! 125: leal 4(%esi),%esi
! 126: addl %ebx,%eax
! 127: movl $0,%ebx
! 128: adcl %ebx,%edx
! 129: addl %eax,(%edi)
! 130: adcl %edx,%ebx C propagate carry into cylimb
! 131:
! 132: leal 4(%edi),%edi
! 133: decl %ecx
! 134: jnz L(oop0)
! 135:
! 136: L(end0):
! 137: movl PARAM_XSIZE,%ecx
! 138: shrl $2,%ecx
! 139: jz L(endX)
! 140:
! 141: ALIGN(8)
! 142: L(oopX):
! 143: movl (%esi),%eax
! 144: mull VAR_MULTIPLIER
! 145: addl %eax,%ebx
! 146: movl $0,%ebp
! 147: adcl %edx,%ebp
! 148:
! 149: movl 4(%esi),%eax
! 150: mull VAR_MULTIPLIER
! 151: addl %ebx,(%edi)
! 152: adcl %eax,%ebp C new lo + cylimb
! 153: movl $0,%ebx
! 154: adcl %edx,%ebx
! 155:
! 156: movl 8(%esi),%eax
! 157: mull VAR_MULTIPLIER
! 158: addl %ebp,4(%edi)
! 159: adcl %eax,%ebx C new lo + cylimb
! 160: movl $0,%ebp
! 161: adcl %edx,%ebp
! 162:
! 163: movl 12(%esi),%eax
! 164: mull VAR_MULTIPLIER
! 165: addl %ebx,8(%edi)
! 166: adcl %eax,%ebp C new lo + cylimb
! 167: movl $0,%ebx
! 168: adcl %edx,%ebx
! 169:
! 170: addl %ebp,12(%edi)
! 171: adcl $0,%ebx C propagate carry into cylimb
! 172:
! 173: leal 16(%esi),%esi
! 174: leal 16(%edi),%edi
! 175: decl %ecx
! 176: jnz L(oopX)
! 177:
! 178: L(endX):
! 179: movl %ebx,(%edi)
! 180: addl $4,%edi
! 181:
! 182: C we incremented wp and xp in the loop above; compensate
! 183: movl PARAM_XSIZE,%eax
! 184: shll $2,%eax
! 185: subl %eax,%edi
! 186: subl %eax,%esi
! 187:
! 188: movl VAR_COUNTER,%eax
! 189: decl %eax
! 190: movl %eax,VAR_COUNTER
! 191: jnz L(outer)
! 192:
! 193: L(skip):
! 194: popl %ebx
! 195: popl %edi
! 196: popl %ebp
! 197: popl %esi
! 198: addl $8,%esp
! 199: ret
! 200:
! 201: L(done):
! 202: movl %edx,4(%edi) C store to wp[1]
! 203: popl %edi
! 204: popl %ebp
! 205: popl %esi
! 206: addl $8,%esp
! 207: ret
! 208:
! 209: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>