=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/tune/Attic/Makefile.am,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/tune/Attic/Makefile.am 2000/09/09 14:13:19 1.1.1.1 +++ OpenXM_contrib/gmp/tune/Attic/Makefile.am 2003/08/25 16:06:37 1.1.1.2 @@ -1,6 +1,6 @@ ## Process this file with automake to generate Makefile.in -# Copyright (C) 2000 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002 Free Software Foundation, Inc. # # This file is part of the GNU MP Library. # @@ -22,162 +22,125 @@ AUTOMAKE_OPTIONS = gnu no-dependencies $(top_builddir)/ansi2knr -INCLUDES = -I$(top_srcdir) +INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests -EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm +EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm ia64.asm many.pl noinst_HEADERS = speed.h -# No dependency on libgmp, so can use libgmptune instead for tune program + EXTRA_LTLIBRARIES = libspeed.la -libspeed_la_SOURCES = common.c time.c noop.c -libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJS) -libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) -# Both libspeed and the programs are built static-only since static is -# fastest and will be preferred by users who care about maximizing speed. -# -# Actually libtool doesn't seem to pass -static through to gcc, it only -# chooses the ".a" versions of libtool libraries. Rerun the gcc link -# invocation with -static if you want a static link of libc and libm. -# -LDFLAGS = -static -LDADD = libspeed.la $(top_builddir)/libgmp.la -lm +libspeed_la_SOURCES = \ + common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \ + freq.c gcd_bin.c gcd_finda_gen.c \ + gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ + jacbase1.c jacbase2.c jacbase3.c \ + mod_1_div.c mod_1_inv.c modlinv.c mul_n_mpn.c mul_n_open.c \ + noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ + set_strb.c set_strs.c time.c \ + sb_div.c sb_inv.c -EXTRA_PROGRAMS = speed speed-ext tuneup +libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \ + $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la +libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM) +libspeed_la_LDFLAGS = -static -tuneup_SOURCES = tuneup.c -tuneup_DEPENDENCIES = libspeed.la libgmptune.a -tuneup_LDADD = $(tuneup_DEPENDENCIES) -lm +$(top_builddir)/tests/libtests.la: + cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la -# Don't know how to get tuneup$U.lo to depend on sqr_basecase.h, so just do -# an explicit make. A plain "make tuneup" will fail if used directly. -tune: - $(MAKE) $(AM_MAKEFLAGS) sqr_basecase.h tuneup$(EXEEXT) - ./tuneup -allprogs: sqr_basecase.h $(EXTRA_PROGRAMS) +# The library code is faster static than shared on some systems, so do +# tuning and measuring with static, since users who care about maximizing +# speed will be using that. speed-dynamic exists to show the difference. +# +# On Solaris 8, gcc 2.95.2 -static is somehow broken (it creates executables +# that immediately seg fault), so -all-static is not used. The only thing +# -all-static does is make libc static linked as well as libgmp, and that +# makes a difference only when measuring malloc and friends in the speed +# program. This can always be forced with "make speed_LDFLAGS=-all-static +# ..." if desired, see tune/README. +EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup +DEPENDENCIES = libspeed.la +LDADD = $(DEPENDENCIES) + speed_SOURCES = speed.c +speed_LDFLAGS = -static -SQR_CLEANFILES = sqr_basecase.c sqr_basecase.h sqr_basecase.asm +speed_dynamic_SOURCES = speed.c -CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \ - $(SQR_CLEANFILES) libgmptune.a libgmptmp.a \ - stg.gnuplot stg.data \ - mtg.gnuplot mtg.data \ - fibg.gnuplot fibg.data \ - graph.gnuplot graph.data +speed_ext_SOURCES = speed-ext.c +speed_ext_LDFLAGS = -static +tuneup_SOURCES = tuneup.c +nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS) +tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la +tuneup_LDADD = $(tuneup_DEPENDENCIES) +tuneup_LDFLAGS = -static -# This bit like mpn/Makefile.am -# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP? -COMPILE_FLAGS = \ - $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +tune: + $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT) + ./tuneup -SUFFIXES = .asm -.asm.o: - $(M4) $(M4FLAGS) -DOPERATION_$* $< >tmp-$*.s - $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ - rm -f tmp-$*.s -.asm.obj: - $(M4) $(M4FLAGS) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s - $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ - rm -f tmp-$*.s -.asm.lo: - echo "$(COMPILE_FLAGS)" - $(M4) $(M4FLAGS) -DPIC -DOPERATION_$* $< >tmp-$*.s - $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ - $(M4) $(M4FLAGS) -DOPERATION_$* $< >tmp-$*.s - $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o - rm -f tmp-$*.s +allprogs: $(EXTRA_PROGRAMS) +# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl +CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \ + $(TUNE_MPN_SRCS) sqr_asm.asm \ + stg.gnuplot stg.data \ + mtg.gnuplot mtg.data \ + fibg.gnuplot fibg.data \ + graph.gnuplot graph.data \ + $(MANY_CLEAN) +DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN) -# Recompiled versions of $(LIBGMPTUNE_OBJS) are always used. sqr_basecase.o -# is replaced if it's mpn/generic/sqr_basecase.c (due to the fixed size -# temporary array), or sqr_basecase.asm with a KARATSUBA_SQR_THRESHOLD_MAX -# (because that says it normally works only up to KARATSUBA_SQR_THRESHOLD). -SQR_GENERIC_MAX = 200 +# Generating these little files at build time seems better than including +# them in the distribution, since the list can be changed more easily. +# +# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1 +# and 2 limb divisors, which are never used during tuning, so it doesn't +# matter whether it picks up a tuned or untuned version of those. +# +# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking +# problem. If a native divrem_1 provides an mpn_divrem_1c entrypoint then +# common.c will want that, but the generic divrem_1 doesn't provide it, +# likewise for mod_1. The simplest way around this is to have the tune +# build versions renamed suitably. +# +# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the +# recompiled object will be rebuilt if that file changes. -LIBGMPTUNE_OBJS = \ - mul_n.o mul_fft.o mul.o \ - bz_divrem_n.o tdiv_qr.o fib_ui.o powm.o gcd.o gcdext.o +TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c +TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c get_str.c \ + mul_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c -sqr_basecase.h: libgmptune.a +$(TUNE_MPN_SRCS_BASIC): + for i in $(TUNE_MPN_SRCS_BASIC); do \ + echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \ + echo "#include \"mpn/generic/$$i\"" >>$$i; \ + done +divrem_1.c: + echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c + echo "#define __gmpn_divrem_1 mpn_divrem_1_tune" >>divrem_1.c + echo "#include \"mpn/generic/divrem_1.c\"" >>divrem_1.c -# "cp -f" is not portable (eg. it fails on SunOS) -# "ar r" is going to be a bit fragile because there's more than one mul.o in -# libgmp.a. +mod_1.c: + echo "#define TUNE_PROGRAM_BUILD 1" >mod_1.c + echo "#define __gmpn_mod_1 mpn_mod_1_tune" >>mod_1.c + echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c -libgmptune.a: $(top_builddir)/libgmp.la $(LIBGMPTUNE_OBJS) - rm -f libgmptmp.a - eval `$(LIBTOOL) --config | grep '^objdir='` ; \ - cp $(top_builddir)/$$objdir/libgmp.a libgmptmp.a - $(AR) r libgmptmp.a $(LIBGMPTUNE_OBJS) - rm -f $(SQR_CLEANFILES) - echo "" >sqr_basecase.h - if test -f $(top_builddir)/mpn/sqr_basecase.c; then \ - $(MAKE) $(AM_MAKEFLAGS) sqr-basecase-generic; \ - else \ - if grep KARATSUBA_SQR_THRESHOLD_MAX \ - $(top_builddir)/mpn/sqr_basecase.asm >/dev/null; then \ - $(MAKE) $(AM_MAKEFLAGS) sqr-basecase-asm; \ - fi \ - fi - $(RANLIB) libgmptmp.a - rm -f libgmptune.a - mv libgmptmp.a libgmptune.a +sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm + echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm + echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm -sqr-basecase-generic: - cp $(top_builddir)/mpn/sqr_basecase.c . - $(MAKE) $(AM_MAKEFLAGS) \ - CFLAGS='$(CFLAGS) -DKARATSUBA_SQR_THRESHOLD=$(SQR_GENERIC_MAX)' \ - sqr_basecase.o - $(AR) r libgmptmp.a sqr_basecase.o - echo '#define KARATSUBA_SQR_MAX $(SQR_GENERIC_MAX)' >sqr_basecase.h -sqr-basecase-asm: - cp $(top_builddir)/mpn/sqr_basecase.asm . - $(MAKE) $(AM_MAKEFLAGS) \ - M4FLAGS='$(M4FLAGS) -DKARATSUBA_SQR_THRESHOLD_OVERRIDE=KARATSUBA_SQR_THRESHOLD_MAX' \ - sqr_basecase.o - ar r libgmptmp.a sqr_basecase.o - echo "#define KARATSUBA_SQR_MAX `sed -n 's/def...(KARATSUBA_SQR_THRESHOLD_MAX, *\([0-9]*\))/\1/p' sqr_basecase.asm`" >sqr_basecase.h +include ../mpn/Makeasm.am -mul_n.o: $(top_builddir)/mpn/mul_n.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/mul_n.c - -mul_fft.o: $(top_builddir)/mpn/mul_fft.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/mul_fft.c - -mul.o: $(top_builddir)/mpn/mul.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/mul.c - -bz_divrem_n.o: $(top_builddir)/mpn/bz_divrem_n.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/bz_divrem_n.c - -tdiv_qr.o: $(top_builddir)/mpn/tdiv_qr.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/tdiv_qr.c - -fib_ui.o: $(top_srcdir)/mpz/fib_ui.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_srcdir)/mpz/fib_ui.c - -powm.o: $(top_srcdir)/mpz/powm.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_srcdir)/mpz/powm.c - -gcd.o: $(top_builddir)/mpn/gcd.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/gcd.c - -gcdext.o: $(top_builddir)/mpn/gcdext.c - $(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/gcdext.c - - - - # "mk" is multiplication in the karatsuba range # "st" is squaring in the toom-cook range, etc # "g" forms produce graphs @@ -187,7 +150,7 @@ mk: MTS = -s 50-150 -c mt: - ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n | less + ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n mtg: ./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n @@ -196,21 +159,33 @@ sk: STS = -s 50-150 -c st: - ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n | less + ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n stg: ./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n -bz: - ./speed -s 5-40 -c mpn_bz_divrem_sb mpn_bz_divrem_n mpn_bz_tdiv_qr +dc: + ./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr fib: - ./speed -s 80-120 -c mpz_fib_ui + ./speed -s 40-60 -c mpz_fib_ui fibg: ./speed -s 10-300 -P fibg mpz_fib_ui gcd: ./speed -s 1-20 -c mpn_gcd + +udiv: + ./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c + +divn: + ./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1 +divun: + ./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345 +modn: + ./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1 +modun: + ./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345 graph: