[BACK]Return to mul_basecase.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / pentium

Annotation of OpenXM_contrib/gmp/mpn/x86/pentium/mul_basecase.asm, Revision 1.1.1.1

1.1       maekawa     1: dnl  Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
                      2: dnl
                      3: dnl  P5: 14.2 cycles/crossproduct (approx)
                      4:
                      5:
                      6: dnl  Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
                      7: dnl
                      8: dnl  This file is part of the GNU MP Library.
                      9: dnl
                     10: dnl  The GNU MP Library is free software; you can redistribute it and/or
                     11: dnl  modify it under the terms of the GNU Lesser General Public License as
                     12: dnl  published by the Free Software Foundation; either version 2.1 of the
                     13: dnl  License, or (at your option) any later version.
                     14: dnl
                     15: dnl  The GNU MP Library is distributed in the hope that it will be useful,
                     16: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     18: dnl  Lesser General Public License for more details.
                     19: dnl
                     20: dnl  You should have received a copy of the GNU Lesser General Public
                     21: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
                     22: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
                     23: dnl  Suite 330, Boston, MA 02111-1307, USA.
                     24:
                     25:
                     26: include(`../config.m4')
                     27:
                     28:
                     29: C void mpn_mul_basecase (mp_ptr wp,
                     30: C                        mp_srcptr xp, mp_size_t xsize,
                     31: C                        mp_srcptr yp, mp_size_t ysize);
                     32:
                     33: defframe(PARAM_YSIZE, 20)
                     34: defframe(PARAM_YP,    16)
                     35: defframe(PARAM_XSIZE, 12)
                     36: defframe(PARAM_XP,    8)
                     37: defframe(PARAM_WP,    4)
                     38:
                     39: defframe(VAR_COUNTER, -4)
                     40:
                     41:        .text
                     42:        ALIGN(8)
                     43: PROLOGUE(mpn_mul_basecase)
                     44:
                     45:        pushl   %eax                    C dummy push for allocating stack slot
                     46:        pushl   %esi
                     47:        pushl   %ebp
                     48:        pushl   %edi
                     49: deflit(`FRAME',16)
                     50:
                     51:        movl    PARAM_XP,%esi
                     52:        movl    PARAM_WP,%edi
                     53:        movl    PARAM_YP,%ebp
                     54:
                     55:        movl    (%esi),%eax             C load xp[0]
                     56:        mull    (%ebp)                  C multiply by yp[0]
                     57:        movl    %eax,(%edi)             C store to wp[0]
                     58:        movl    PARAM_XSIZE,%ecx        C xsize
                     59:        decl    %ecx                    C If xsize = 1, ysize = 1 too
                     60:        jz      L(done)
                     61:
                     62:        movl    PARAM_XSIZE,%eax
                     63:        pushl   %ebx
                     64: FRAME_pushl()
                     65:        movl    %edx,%ebx
                     66:        leal    (%esi,%eax,4),%esi      C make xp point at end
                     67:        leal    (%edi,%eax,4),%edi      C offset wp by xsize
                     68:        negl    %ecx                    C negate j size/index for inner loop
                     69:        xorl    %eax,%eax               C clear carry
                     70:
                     71:        ALIGN(8)
                     72: L(oop1):       adcl    $0,%ebx
                     73:        movl    (%esi,%ecx,4),%eax      C load next limb at xp[j]
                     74:        mull    (%ebp)
                     75:        addl    %ebx,%eax
                     76:        movl    %eax,(%edi,%ecx,4)
                     77:        incl    %ecx
                     78:        movl    %edx,%ebx
                     79:        jnz     L(oop1)
                     80:
                     81:        adcl    $0,%ebx
                     82:        movl    PARAM_YSIZE,%eax
                     83:        movl    %ebx,(%edi)             C most significant limb of product
                     84:        addl    $4,%edi                 C increment wp
                     85:        decl    %eax
                     86:        jz      L(skip)
                     87:        movl    %eax,VAR_COUNTER        C set index i to ysize
                     88:
                     89: L(outer):
                     90:        addl    $4,%ebp                 C make ebp point to next y limb
                     91:        movl    PARAM_XSIZE,%ecx
                     92:        negl    %ecx
                     93:        xorl    %ebx,%ebx
                     94:
                     95:        C code at 0x61 here, close enough to aligned
                     96: L(oop2):
                     97:        adcl    $0,%ebx
                     98:        movl    (%esi,%ecx,4),%eax
                     99:        mull    (%ebp)
                    100:        addl    %ebx,%eax
                    101:        movl    (%edi,%ecx,4),%ebx
                    102:        adcl    $0,%edx
                    103:        addl    %eax,%ebx
                    104:        movl    %ebx,(%edi,%ecx,4)
                    105:        incl    %ecx
                    106:        movl    %edx,%ebx
                    107:        jnz     L(oop2)
                    108:
                    109:        adcl    $0,%ebx
                    110:
                    111:        movl    %ebx,(%edi)
                    112:        addl    $4,%edi
                    113:        movl    VAR_COUNTER,%eax
                    114:        decl    %eax
                    115:        movl    %eax,VAR_COUNTER
                    116:        jnz     L(outer)
                    117:
                    118: L(skip):
                    119:        popl    %ebx
                    120:        popl    %edi
                    121:        popl    %ebp
                    122:        popl    %esi
                    123:        addl    $4,%esp
                    124:        ret
                    125:
                    126: L(done):
                    127:        movl    %edx,4(%edi)    C store to wp[1]
                    128:        popl    %edi
                    129:        popl    %ebp
                    130:        popl    %esi
                    131:        popl    %eax            C dummy pop for deallocating stack slot
                    132:        ret
                    133:
                    134: EPILOGUE()
                    135:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>