[BACK]Return to mul_basecase.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86

Annotation of OpenXM_contrib/gmp/mpn/x86/mul_basecase.asm, Revision 1.1

1.1     ! maekawa     1: dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
        !             2: dnl  in a third limb vector.
        !             3:
        !             4:
        !             5: dnl  Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation,
        !             6: dnl  Inc.
        !             7: dnl
        !             8: dnl  This file is part of the GNU MP Library.
        !             9: dnl
        !            10: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !            11: dnl  modify it under the terms of the GNU Lesser General Public License as
        !            12: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            13: dnl  License, or (at your option) any later version.
        !            14: dnl
        !            15: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            16: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            17: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            18: dnl  Lesser General Public License for more details.
        !            19: dnl
        !            20: dnl  You should have received a copy of the GNU Lesser General Public
        !            21: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            22: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            23: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            24:
        !            25:
        !            26: include(`../config.m4')
        !            27:
        !            28:
        !            29: C void mpn_mul_basecase (mp_ptr wp,
        !            30: C                        mp_srcptr xp, mp_size_t xsize,
        !            31: C                        mp_srcptr yp, mp_size_t ysize);
        !            32: C
        !            33: C This was written in a haste since the Pentium optimized code that was used
        !            34: C for all x86 machines was slow for the Pentium II.  This code would benefit
        !            35: C from some cleanup.
        !            36: C
        !            37: C To shave off some percentage of the run-time, one should make 4 variants
        !            38: C of the Louter loop, for the four different outcomes of un mod 4.  That
        !            39: C would avoid Loop0 altogether.  Code expansion would be > 4-fold for that
        !            40: C part of the function, but since it is not very large, that would be
        !            41: C acceptable.
        !            42: C
        !            43: C The mul loop (at L(oopM)) might need some tweaking.  It's current speed is
        !            44: C unknown.
        !            45:
        !            46: defframe(PARAM_YSIZE,20)
        !            47: defframe(PARAM_YP,   16)
        !            48: defframe(PARAM_XSIZE,12)
        !            49: defframe(PARAM_XP,   8)
        !            50: defframe(PARAM_WP,   4)
        !            51:
        !            52: defframe(VAR_MULTIPLIER, -4)
        !            53: defframe(VAR_COUNTER,    -8)
        !            54: deflit(VAR_STACK_SPACE,  8)
        !            55:
        !            56:        .text
        !            57:        ALIGN(8)
        !            58:
        !            59: PROLOGUE(mpn_mul_basecase)
        !            60: deflit(`FRAME',0)
        !            61:
        !            62:        subl    $VAR_STACK_SPACE,%esp
        !            63:        pushl   %esi
        !            64:        pushl   %ebp
        !            65:        pushl   %edi
        !            66: deflit(`FRAME',eval(VAR_STACK_SPACE+12))
        !            67:
        !            68:        movl    PARAM_XP,%esi
        !            69:        movl    PARAM_WP,%edi
        !            70:        movl    PARAM_YP,%ebp
        !            71:
        !            72:        movl    (%esi),%eax             C load xp[0]
        !            73:        mull    (%ebp)                  C multiply by yp[0]
        !            74:        movl    %eax,(%edi)             C store to wp[0]
        !            75:        movl    PARAM_XSIZE,%ecx        C xsize
        !            76:        decl    %ecx                    C If xsize = 1, ysize = 1 too
        !            77:        jz      L(done)
        !            78:
        !            79:        pushl   %ebx
        !            80: FRAME_pushl()
        !            81:        movl    %edx,%ebx
        !            82:
        !            83:        leal    4(%esi),%esi
        !            84:        leal    4(%edi),%edi
        !            85:
        !            86: L(oopM):
        !            87:        movl    (%esi),%eax             C load next limb at xp[j]
        !            88:        leal    4(%esi),%esi
        !            89:        mull    (%ebp)
        !            90:        addl    %ebx,%eax
        !            91:        movl    %edx,%ebx
        !            92:        adcl    $0,%ebx
        !            93:        movl    %eax,(%edi)
        !            94:        leal    4(%edi),%edi
        !            95:        decl    %ecx
        !            96:        jnz     L(oopM)
        !            97:
        !            98:        movl    %ebx,(%edi)             C most significant limb of product
        !            99:        addl    $4,%edi                 C increment wp
        !           100:        movl    PARAM_XSIZE,%eax
        !           101:        shll    $2,%eax
        !           102:        subl    %eax,%edi
        !           103:        subl    %eax,%esi
        !           104:
        !           105:        movl    PARAM_YSIZE,%eax        C ysize
        !           106:        decl    %eax
        !           107:        jz      L(skip)
        !           108:        movl    %eax,VAR_COUNTER        C set index i to ysize
        !           109:
        !           110: L(outer):
        !           111:        movl    PARAM_YP,%ebp           C yp
        !           112:        addl    $4,%ebp                 C make ebp point to next v limb
        !           113:        movl    %ebp,PARAM_YP
        !           114:        movl    (%ebp),%eax             C copy y limb ...
        !           115:        movl    %eax,VAR_MULTIPLIER     C ... to stack slot
        !           116:        movl    PARAM_XSIZE,%ecx
        !           117:
        !           118:        xorl    %ebx,%ebx
        !           119:        andl    $3,%ecx
        !           120:        jz      L(end0)
        !           121:
        !           122: L(oop0):
        !           123:        movl    (%esi),%eax
        !           124:        mull    VAR_MULTIPLIER
        !           125:        leal    4(%esi),%esi
        !           126:        addl    %ebx,%eax
        !           127:        movl    $0,%ebx
        !           128:        adcl    %ebx,%edx
        !           129:        addl    %eax,(%edi)
        !           130:        adcl    %edx,%ebx               C propagate carry into cylimb
        !           131:
        !           132:        leal    4(%edi),%edi
        !           133:        decl    %ecx
        !           134:        jnz     L(oop0)
        !           135:
        !           136: L(end0):
        !           137:        movl    PARAM_XSIZE,%ecx
        !           138:        shrl    $2,%ecx
        !           139:        jz      L(endX)
        !           140:
        !           141:        ALIGN(8)
        !           142: L(oopX):
        !           143:        movl    (%esi),%eax
        !           144:        mull    VAR_MULTIPLIER
        !           145:        addl    %eax,%ebx
        !           146:        movl    $0,%ebp
        !           147:        adcl    %edx,%ebp
        !           148:
        !           149:        movl    4(%esi),%eax
        !           150:        mull    VAR_MULTIPLIER
        !           151:        addl    %ebx,(%edi)
        !           152:        adcl    %eax,%ebp       C new lo + cylimb
        !           153:        movl    $0,%ebx
        !           154:        adcl    %edx,%ebx
        !           155:
        !           156:        movl    8(%esi),%eax
        !           157:        mull    VAR_MULTIPLIER
        !           158:        addl    %ebp,4(%edi)
        !           159:        adcl    %eax,%ebx       C new lo + cylimb
        !           160:        movl    $0,%ebp
        !           161:        adcl    %edx,%ebp
        !           162:
        !           163:        movl    12(%esi),%eax
        !           164:        mull    VAR_MULTIPLIER
        !           165:        addl    %ebx,8(%edi)
        !           166:        adcl    %eax,%ebp       C new lo + cylimb
        !           167:        movl    $0,%ebx
        !           168:        adcl    %edx,%ebx
        !           169:
        !           170:        addl    %ebp,12(%edi)
        !           171:        adcl    $0,%ebx         C propagate carry into cylimb
        !           172:
        !           173:        leal    16(%esi),%esi
        !           174:        leal    16(%edi),%edi
        !           175:        decl    %ecx
        !           176:        jnz     L(oopX)
        !           177:
        !           178: L(endX):
        !           179:        movl    %ebx,(%edi)
        !           180:        addl    $4,%edi
        !           181:
        !           182:        C we incremented wp and xp in the loop above; compensate
        !           183:        movl    PARAM_XSIZE,%eax
        !           184:        shll    $2,%eax
        !           185:        subl    %eax,%edi
        !           186:        subl    %eax,%esi
        !           187:
        !           188:        movl    VAR_COUNTER,%eax
        !           189:        decl    %eax
        !           190:        movl    %eax,VAR_COUNTER
        !           191:        jnz     L(outer)
        !           192:
        !           193: L(skip):
        !           194:        popl    %ebx
        !           195:        popl    %edi
        !           196:        popl    %ebp
        !           197:        popl    %esi
        !           198:        addl    $8,%esp
        !           199:        ret
        !           200:
        !           201: L(done):
        !           202:        movl    %edx,4(%edi)       C store to wp[1]
        !           203:        popl    %edi
        !           204:        popl    %ebp
        !           205:        popl    %esi
        !           206:        addl    $8,%esp
        !           207:        ret
        !           208:
        !           209: EPILOGUE()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>