[BACK]Return to mul_basecase.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / pentium

Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mul_basecase.asm, Revision 1.1.1.2

1.1       maekawa     1: dnl  Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
                      2:
1.1.1.2 ! ohara       3: dnl  Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
1.1       maekawa     4: dnl
                      5: dnl  This file is part of the GNU MP Library.
                      6: dnl
                      7: dnl  The GNU MP Library is free software; you can redistribute it and/or
                      8: dnl  modify it under the terms of the GNU Lesser General Public License as
                      9: dnl  published by the Free Software Foundation; either version 2.1 of the
                     10: dnl  License, or (at your option) any later version.
                     11: dnl
                     12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
                     13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     15: dnl  Lesser General Public License for more details.
                     16: dnl
                     17: dnl  You should have received a copy of the GNU Lesser General Public
                     18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
                     19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
                     20: dnl  Suite 330, Boston, MA 02111-1307, USA.
                     21:
                     22: include(`../config.m4')
                     23:
                     24:
1.1.1.2 ! ohara      25: C P5: 14.2 cycles/crossproduct (approx)
        !            26:
        !            27:
1.1       maekawa    28: C void mpn_mul_basecase (mp_ptr wp,
                     29: C                        mp_srcptr xp, mp_size_t xsize,
                     30: C                        mp_srcptr yp, mp_size_t ysize);
                     31:
                     32: defframe(PARAM_YSIZE, 20)
                     33: defframe(PARAM_YP,    16)
                     34: defframe(PARAM_XSIZE, 12)
                     35: defframe(PARAM_XP,    8)
                     36: defframe(PARAM_WP,    4)
                     37:
                     38: defframe(VAR_COUNTER, -4)
                     39:
1.1.1.2 ! ohara      40:        TEXT
1.1       maekawa    41:        ALIGN(8)
                     42: PROLOGUE(mpn_mul_basecase)
                     43:
                     44:        pushl   %eax                    C dummy push for allocating stack slot
                     45:        pushl   %esi
                     46:        pushl   %ebp
                     47:        pushl   %edi
                     48: deflit(`FRAME',16)
                     49:
                     50:        movl    PARAM_XP,%esi
                     51:        movl    PARAM_WP,%edi
                     52:        movl    PARAM_YP,%ebp
                     53:
                     54:        movl    (%esi),%eax             C load xp[0]
                     55:        mull    (%ebp)                  C multiply by yp[0]
                     56:        movl    %eax,(%edi)             C store to wp[0]
                     57:        movl    PARAM_XSIZE,%ecx        C xsize
                     58:        decl    %ecx                    C If xsize = 1, ysize = 1 too
                     59:        jz      L(done)
                     60:
                     61:        movl    PARAM_XSIZE,%eax
                     62:        pushl   %ebx
                     63: FRAME_pushl()
                     64:        movl    %edx,%ebx
                     65:        leal    (%esi,%eax,4),%esi      C make xp point at end
                     66:        leal    (%edi,%eax,4),%edi      C offset wp by xsize
                     67:        negl    %ecx                    C negate j size/index for inner loop
                     68:        xorl    %eax,%eax               C clear carry
                     69:
                     70:        ALIGN(8)
                     71: L(oop1):       adcl    $0,%ebx
                     72:        movl    (%esi,%ecx,4),%eax      C load next limb at xp[j]
                     73:        mull    (%ebp)
                     74:        addl    %ebx,%eax
                     75:        movl    %eax,(%edi,%ecx,4)
                     76:        incl    %ecx
                     77:        movl    %edx,%ebx
                     78:        jnz     L(oop1)
                     79:
                     80:        adcl    $0,%ebx
                     81:        movl    PARAM_YSIZE,%eax
                     82:        movl    %ebx,(%edi)             C most significant limb of product
                     83:        addl    $4,%edi                 C increment wp
                     84:        decl    %eax
                     85:        jz      L(skip)
                     86:        movl    %eax,VAR_COUNTER        C set index i to ysize
                     87:
                     88: L(outer):
                     89:        addl    $4,%ebp                 C make ebp point to next y limb
                     90:        movl    PARAM_XSIZE,%ecx
                     91:        negl    %ecx
                     92:        xorl    %ebx,%ebx
                     93:
                     94:        C code at 0x61 here, close enough to aligned
                     95: L(oop2):
                     96:        adcl    $0,%ebx
                     97:        movl    (%esi,%ecx,4),%eax
                     98:        mull    (%ebp)
                     99:        addl    %ebx,%eax
                    100:        movl    (%edi,%ecx,4),%ebx
                    101:        adcl    $0,%edx
                    102:        addl    %eax,%ebx
                    103:        movl    %ebx,(%edi,%ecx,4)
                    104:        incl    %ecx
                    105:        movl    %edx,%ebx
                    106:        jnz     L(oop2)
                    107:
                    108:        adcl    $0,%ebx
                    109:
                    110:        movl    %ebx,(%edi)
                    111:        addl    $4,%edi
                    112:        movl    VAR_COUNTER,%eax
                    113:        decl    %eax
                    114:        movl    %eax,VAR_COUNTER
                    115:        jnz     L(outer)
                    116:
                    117: L(skip):
                    118:        popl    %ebx
                    119:        popl    %edi
                    120:        popl    %ebp
                    121:        popl    %esi
                    122:        addl    $4,%esp
                    123:        ret
                    124:
                    125: L(done):
                    126:        movl    %edx,4(%edi)    C store to wp[1]
                    127:        popl    %edi
                    128:        popl    %ebp
                    129:        popl    %esi
                    130:        popl    %eax            C dummy pop for deallocating stack slot
                    131:        ret
                    132:
                    133: EPILOGUE()
                    134:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>