version 1.1.1.1, 2000/09/09 14:13:19 |
version 1.1.1.2, 2003/08/25 16:06:37 |
|
|
## Process this file with automake to generate Makefile.in |
## Process this file with automake to generate Makefile.in |
|
|
# Copyright (C) 2000 Free Software Foundation, Inc. |
# Copyright 2000, 2001, 2002 Free Software Foundation, Inc. |
# |
# |
# This file is part of the GNU MP Library. |
# This file is part of the GNU MP Library. |
# |
# |
|
|
|
|
AUTOMAKE_OPTIONS = gnu no-dependencies $(top_builddir)/ansi2knr |
AUTOMAKE_OPTIONS = gnu no-dependencies $(top_builddir)/ansi2knr |
|
|
INCLUDES = -I$(top_srcdir) |
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests |
|
|
EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm |
EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm ia64.asm many.pl |
noinst_HEADERS = speed.h |
noinst_HEADERS = speed.h |
|
|
# No dependency on libgmp, so can use libgmptune instead for tune program |
|
EXTRA_LTLIBRARIES = libspeed.la |
EXTRA_LTLIBRARIES = libspeed.la |
libspeed_la_SOURCES = common.c time.c noop.c |
|
libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJS) |
|
libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) |
|
|
|
# Both libspeed and the programs are built static-only since static is |
libspeed_la_SOURCES = \ |
# fastest and will be preferred by users who care about maximizing speed. |
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \ |
# |
freq.c gcd_bin.c gcd_finda_gen.c \ |
# Actually libtool doesn't seem to pass -static through to gcc, it only |
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ |
# chooses the ".a" versions of libtool libraries. Rerun the gcc link |
jacbase1.c jacbase2.c jacbase3.c \ |
# invocation with -static if you want a static link of libc and libm. |
mod_1_div.c mod_1_inv.c modlinv.c mul_n_mpn.c mul_n_open.c \ |
# |
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ |
LDFLAGS = -static |
set_strb.c set_strs.c time.c \ |
LDADD = libspeed.la $(top_builddir)/libgmp.la -lm |
sb_div.c sb_inv.c |
|
|
EXTRA_PROGRAMS = speed speed-ext tuneup |
libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \ |
|
$(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la |
|
libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM) |
|
libspeed_la_LDFLAGS = -static |
|
|
tuneup_SOURCES = tuneup.c |
$(top_builddir)/tests/libtests.la: |
tuneup_DEPENDENCIES = libspeed.la libgmptune.a |
cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la |
tuneup_LDADD = $(tuneup_DEPENDENCIES) -lm |
|
|
|
# Don't know how to get tuneup$U.lo to depend on sqr_basecase.h, so just do |
|
# an explicit make. A plain "make tuneup" will fail if used directly. |
|
tune: |
|
$(MAKE) $(AM_MAKEFLAGS) sqr_basecase.h tuneup$(EXEEXT) |
|
./tuneup |
|
|
|
allprogs: sqr_basecase.h $(EXTRA_PROGRAMS) |
# The library code is faster static than shared on some systems, so do |
|
# tuning and measuring with static, since users who care about maximizing |
|
# speed will be using that. speed-dynamic exists to show the difference. |
|
# |
|
# On Solaris 8, gcc 2.95.2 -static is somehow broken (it creates executables |
|
# that immediately seg fault), so -all-static is not used. The only thing |
|
# -all-static does is make libc static linked as well as libgmp, and that |
|
# makes a difference only when measuring malloc and friends in the speed |
|
# program. This can always be forced with "make speed_LDFLAGS=-all-static |
|
# ..." if desired, see tune/README. |
|
|
|
EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup |
|
|
|
DEPENDENCIES = libspeed.la |
|
LDADD = $(DEPENDENCIES) |
|
|
speed_SOURCES = speed.c |
speed_SOURCES = speed.c |
|
speed_LDFLAGS = -static |
|
|
SQR_CLEANFILES = sqr_basecase.c sqr_basecase.h sqr_basecase.asm |
speed_dynamic_SOURCES = speed.c |
|
|
CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \ |
speed_ext_SOURCES = speed-ext.c |
$(SQR_CLEANFILES) libgmptune.a libgmptmp.a \ |
speed_ext_LDFLAGS = -static |
stg.gnuplot stg.data \ |
|
mtg.gnuplot mtg.data \ |
|
fibg.gnuplot fibg.data \ |
|
graph.gnuplot graph.data |
|
|
|
|
tuneup_SOURCES = tuneup.c |
|
nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS) |
|
tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la |
|
tuneup_LDADD = $(tuneup_DEPENDENCIES) |
|
tuneup_LDFLAGS = -static |
|
|
# This bit like mpn/Makefile.am |
|
|
|
# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP? |
tune: |
COMPILE_FLAGS = \ |
$(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT) |
$(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
./tuneup |
|
|
SUFFIXES = .asm |
allprogs: $(EXTRA_PROGRAMS) |
.asm.o: |
|
$(M4) $(M4FLAGS) -DOPERATION_$* $< >tmp-$*.s |
|
$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ |
|
rm -f tmp-$*.s |
|
.asm.obj: |
|
$(M4) $(M4FLAGS) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s |
|
$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ |
|
rm -f tmp-$*.s |
|
.asm.lo: |
|
echo "$(COMPILE_FLAGS)" |
|
$(M4) $(M4FLAGS) -DPIC -DOPERATION_$* $< >tmp-$*.s |
|
$(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ |
|
$(M4) $(M4FLAGS) -DOPERATION_$* $< >tmp-$*.s |
|
$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o |
|
rm -f tmp-$*.s |
|
|
|
|
# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl |
|
CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \ |
|
$(TUNE_MPN_SRCS) sqr_asm.asm \ |
|
stg.gnuplot stg.data \ |
|
mtg.gnuplot mtg.data \ |
|
fibg.gnuplot fibg.data \ |
|
graph.gnuplot graph.data \ |
|
$(MANY_CLEAN) |
|
DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN) |
|
|
# Recompiled versions of $(LIBGMPTUNE_OBJS) are always used. sqr_basecase.o |
|
# is replaced if it's mpn/generic/sqr_basecase.c (due to the fixed size |
|
# temporary array), or sqr_basecase.asm with a KARATSUBA_SQR_THRESHOLD_MAX |
|
# (because that says it normally works only up to KARATSUBA_SQR_THRESHOLD). |
|
|
|
SQR_GENERIC_MAX = 200 |
# Generating these little files at build time seems better than including |
|
# them in the distribution, since the list can be changed more easily. |
|
# |
|
# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1 |
|
# and 2 limb divisors, which are never used during tuning, so it doesn't |
|
# matter whether it picks up a tuned or untuned version of those. |
|
# |
|
# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking |
|
# problem. If a native divrem_1 provides an mpn_divrem_1c entrypoint then |
|
# common.c will want that, but the generic divrem_1 doesn't provide it, |
|
# likewise for mod_1. The simplest way around this is to have the tune |
|
# build versions renamed suitably. |
|
# |
|
# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the |
|
# recompiled object will be rebuilt if that file changes. |
|
|
LIBGMPTUNE_OBJS = \ |
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c |
mul_n.o mul_fft.o mul.o \ |
TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c get_str.c \ |
bz_divrem_n.o tdiv_qr.o fib_ui.o powm.o gcd.o gcdext.o |
mul_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c |
|
|
sqr_basecase.h: libgmptune.a |
$(TUNE_MPN_SRCS_BASIC): |
|
for i in $(TUNE_MPN_SRCS_BASIC); do \ |
|
echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \ |
|
echo "#include \"mpn/generic/$$i\"" >>$$i; \ |
|
done |
|
|
|
divrem_1.c: |
|
echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c |
|
echo "#define __gmpn_divrem_1 mpn_divrem_1_tune" >>divrem_1.c |
|
echo "#include \"mpn/generic/divrem_1.c\"" >>divrem_1.c |
|
|
# "cp -f" is not portable (eg. it fails on SunOS) |
mod_1.c: |
# "ar r" is going to be a bit fragile because there's more than one mul.o in |
echo "#define TUNE_PROGRAM_BUILD 1" >mod_1.c |
# libgmp.a. |
echo "#define __gmpn_mod_1 mpn_mod_1_tune" >>mod_1.c |
|
echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c |
|
|
libgmptune.a: $(top_builddir)/libgmp.la $(LIBGMPTUNE_OBJS) |
sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm |
rm -f libgmptmp.a |
echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm |
eval `$(LIBTOOL) --config | grep '^objdir='` ; \ |
echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm |
cp $(top_builddir)/$$objdir/libgmp.a libgmptmp.a |
|
$(AR) r libgmptmp.a $(LIBGMPTUNE_OBJS) |
|
rm -f $(SQR_CLEANFILES) |
|
echo "" >sqr_basecase.h |
|
if test -f $(top_builddir)/mpn/sqr_basecase.c; then \ |
|
$(MAKE) $(AM_MAKEFLAGS) sqr-basecase-generic; \ |
|
else \ |
|
if grep KARATSUBA_SQR_THRESHOLD_MAX \ |
|
$(top_builddir)/mpn/sqr_basecase.asm >/dev/null; then \ |
|
$(MAKE) $(AM_MAKEFLAGS) sqr-basecase-asm; \ |
|
fi \ |
|
fi |
|
$(RANLIB) libgmptmp.a |
|
rm -f libgmptune.a |
|
mv libgmptmp.a libgmptune.a |
|
|
|
sqr-basecase-generic: |
|
cp $(top_builddir)/mpn/sqr_basecase.c . |
|
$(MAKE) $(AM_MAKEFLAGS) \ |
|
CFLAGS='$(CFLAGS) -DKARATSUBA_SQR_THRESHOLD=$(SQR_GENERIC_MAX)' \ |
|
sqr_basecase.o |
|
$(AR) r libgmptmp.a sqr_basecase.o |
|
echo '#define KARATSUBA_SQR_MAX $(SQR_GENERIC_MAX)' >sqr_basecase.h |
|
|
|
sqr-basecase-asm: |
include ../mpn/Makeasm.am |
cp $(top_builddir)/mpn/sqr_basecase.asm . |
|
$(MAKE) $(AM_MAKEFLAGS) \ |
|
M4FLAGS='$(M4FLAGS) -DKARATSUBA_SQR_THRESHOLD_OVERRIDE=KARATSUBA_SQR_THRESHOLD_MAX' \ |
|
sqr_basecase.o |
|
ar r libgmptmp.a sqr_basecase.o |
|
echo "#define KARATSUBA_SQR_MAX `sed -n 's/def...(KARATSUBA_SQR_THRESHOLD_MAX, *\([0-9]*\))/\1/p' sqr_basecase.asm`" >sqr_basecase.h |
|
|
|
|
|
mul_n.o: $(top_builddir)/mpn/mul_n.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/mul_n.c |
|
|
|
mul_fft.o: $(top_builddir)/mpn/mul_fft.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/mul_fft.c |
|
|
|
mul.o: $(top_builddir)/mpn/mul.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/mul.c |
|
|
|
bz_divrem_n.o: $(top_builddir)/mpn/bz_divrem_n.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/bz_divrem_n.c |
|
|
|
tdiv_qr.o: $(top_builddir)/mpn/tdiv_qr.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/tdiv_qr.c |
|
|
|
fib_ui.o: $(top_srcdir)/mpz/fib_ui.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_srcdir)/mpz/fib_ui.c |
|
|
|
powm.o: $(top_srcdir)/mpz/powm.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_srcdir)/mpz/powm.c |
|
|
|
gcd.o: $(top_builddir)/mpn/gcd.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/gcd.c |
|
|
|
gcdext.o: $(top_builddir)/mpn/gcdext.c |
|
$(COMPILE) -DTUNE_PROGRAM_BUILD=1 -c $(top_builddir)/mpn/gcdext.c |
|
|
|
|
|
|
|
|
|
# "mk" is multiplication in the karatsuba range |
# "mk" is multiplication in the karatsuba range |
# "st" is squaring in the toom-cook range, etc |
# "st" is squaring in the toom-cook range, etc |
# "g" forms produce graphs |
# "g" forms produce graphs |
|
|
|
|
MTS = -s 50-150 -c |
MTS = -s 50-150 -c |
mt: |
mt: |
./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n | less |
./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n |
mtg: |
mtg: |
./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n |
./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n |
|
|
|
|
|
|
STS = -s 50-150 -c |
STS = -s 50-150 -c |
st: |
st: |
./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n | less |
./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n |
stg: |
stg: |
./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n |
./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n |
|
|
bz: |
dc: |
./speed -s 5-40 -c mpn_bz_divrem_sb mpn_bz_divrem_n mpn_bz_tdiv_qr |
./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr |
|
|
fib: |
fib: |
./speed -s 80-120 -c mpz_fib_ui |
./speed -s 40-60 -c mpz_fib_ui |
fibg: |
fibg: |
./speed -s 10-300 -P fibg mpz_fib_ui |
./speed -s 10-300 -P fibg mpz_fib_ui |
|
|
|
|
gcd: |
gcd: |
./speed -s 1-20 -c mpn_gcd |
./speed -s 1-20 -c mpn_gcd |
|
|
|
udiv: |
|
./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c |
|
|
|
divn: |
|
./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1 |
|
divun: |
|
./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345 |
|
modn: |
|
./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1 |
|
modun: |
|
./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345 |
|
|
|
|
graph: |
graph: |