[BACK]Return to mod_34lsub1.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / x86 / pentium

File: [local] / OpenXM_contrib / gmp / mpn / x86 / pentium / Attic / mod_34lsub1.asm (download)

Revision 1.1.1.1 (vendor branch), Mon Aug 25 16:06:29 2003 UTC (20 years, 10 months ago) by ohara
Branch: GMP
CVS Tags: VERSION_4_1_2, RELEASE_1_2_3, RELEASE_1_2_2_KNOPPIX_b, RELEASE_1_2_2_KNOPPIX
Changes since 1.1: +0 -0 lines

Import gmp 4.1.2

dnl  Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.

dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl 
dnl  This file is part of the GNU MP Library.
dnl 
dnl  The GNU MP Library is free software; you can redistribute it and/or
dnl  modify it under the terms of the GNU Lesser General Public License as
dnl  published by the Free Software Foundation; either version 2.1 of the
dnl  License, or (at your option) any later version.
dnl 
dnl  The GNU MP Library is distributed in the hope that it will be useful,
dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
dnl  Lesser General Public License for more details.
dnl 
dnl  You should have received a copy of the GNU Lesser General Public
dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
dnl  Suite 330, Boston, MA 02111-1307, USA.

include(`../config.m4')


C P5: 1.66 cycles/limb


C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
C

defframe(PARAM_SIZE, 8)
defframe(PARAM_SRC,  4)

	TEXT
	ALIGN(16)
PROLOGUE(mpn_mod_34lsub1)
deflit(`FRAME',0)

	movl	PARAM_SIZE, %ecx
	movl	PARAM_SRC, %edx

	subl	$2, %ecx
	ja	L(three_or_more)

	movl	(%edx), %eax
	jne	L(one)


	movl	4(%edx), %ecx
	movl	%eax, %edx

	shrl	$24, %edx
	andl	$0xFFFFFF, %eax

	addl	%edx, %eax
	movl	%ecx, %edx

	shrl	$16, %ecx
	andl	$0xFFFF, %edx

	shll	$8, %edx
	addl	%ecx, %eax

	addl	%edx, %eax

L(one):
	ret


L(three_or_more):
	C eax
	C ebx
	C ecx	size-2
	C edx	src
	C esi
	C edi
	C ebp

	pushl	%ebx	FRAME_pushl()
	pushl	%esi	FRAME_pushl()

	pushl	%edi	FRAME_pushl()
	pushl	%ebp	FRAME_pushl()

	xorl	%esi, %esi		C 0mod3
	xorl	%edi, %edi		C 1mod3

	xorl	%ebp, %ebp		C 2mod3, and clear carry

L(top):
	C eax	scratch
	C ebx	scratch
	C ecx	counter, limbs
	C edx	src
	C esi	0mod3
	C edi	1mod3
	C ebp	2mod3

	movl	(%edx), %eax
	movl	4(%edx), %ebx

	adcl	%eax, %esi
	movl	8(%edx), %eax

	adcl	%ebx, %edi
	leal	12(%edx), %edx

	adcl	%eax, %ebp
	leal	-2(%ecx), %ecx

	decl	%ecx
	jg	L(top)


	C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively

	movl	$0xFFFFFFFF, %ebx	C mask
	incl	%ecx

	js	L(combine)		C 0 more

	movl	(%edx), %eax
	movl	$0xFFFFFF00, %ebx

	adcl	%eax, %esi
	decl	%ecx

	js	L(combine)		C 1 more

	movl	4(%edx), %eax
	movl	$0xFFFF0000, %ebx

	adcl	%eax, %edi



L(combine):
	C eax
	C ebx	mask
	C ecx
	C edx
	C esi	0mod3
	C edi	1mod3
	C ebp	2mod3

	sbbl	%ecx, %ecx		C carry
	movl	%esi, %eax		C 0mod3

	andl	%ebx, %ecx		C masked for position
	andl	$0xFFFFFF, %eax		C 0mod3 low

	shrl	$24, %esi		C 0mod3 high
	subl	%ecx, %eax		C apply carry

	addl	%esi, %eax		C apply 0mod3
	movl	%edi, %ebx		C 1mod3

	shrl	$16, %edi		C 1mod3 high
	andl	$0x0000FFFF, %ebx

	shll	$8, %ebx		C 1mod3 low
	addl	%edi, %eax		C apply 1mod3 high

	addl	%ebx, %eax		C apply 1mod3 low
	movl	%ebp, %ebx		C 2mod3

	shrl	$8, %ebp		C 2mod3 high
	andl	$0xFF, %ebx

	shll	$16, %ebx		C 2mod3 low
	addl	%ebp, %eax		C apply 2mod3 high

	addl	%ebx, %eax		C apply 2mod3 low

	popl	%ebp
	popl	%edi

	popl	%esi
	popl	%ebx

	ret

EPILOGUE()