commit 4bba6e7176632b4d760ba9362a1515552471d741
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Fri Apr 29 21:54:39 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Fri Apr 29 21:55:15 2016 +0200
Some optimisations, fix refsheet, and disable const/pure attributes in gmp in benchmark
Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
diff --git a/Makefile b/Makefile
index 406209d..5e4ace9 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -2,8 +2,8 @@ include config.mk
HDR_PUBLIC =\
zahl.h\
- zahl-internals.h\
- zahl-inlines.h
+ zahl-inlines.h\
+ zahl-internals.h
HDR_PRIVATE =\
src/internals.h
_AT_@ -33,12 +33,9 @@ FUN =\
zptest\
zrand\
zrsh\
- zsave\
zset\
zsets\
- zsetu\
zsetup\
- zsplit\
zsqr\
zstr\
zstr_length\
_AT_@ -48,24 +45,27 @@ FUN =\
zxor
INLINE_FUN =\
- zinit\
- zswap\
- zeven\
- zodd\
- zeven_nonzero\
- zodd_nonzero\
- zzero\
- zsignum\
zabs\
- zneg\
- zlsb\
zbits\
- zseti\
+ zbtest\
zcmp\
zcmpi\
zcmpmag\
zcmpu\
- zbtest
+ zeven\
+ zeven_nonzero\
+ zinit\
+ zlsb\
+ zneg\
+ zodd\
+ zodd_nonzero\
+ zsave\
+ zseti\
+ zsetu\
+ zsignum\
+ zsplit\
+ zswap\
+ zzero
DOC =\
refsheet.pdf
diff --git a/STATUS b/STATUS
index aa8d510..5e705ee 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -16,7 +16,7 @@ zadd_unsigned ........... fastest (faster than all others' zadd)
zsub_unsigned ........... fastest (faster than all others' zsub)
zadd .................... 87 % of tomsfastmath, 83 % libtommath, 80 % of hebimath
zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath
-zand .................... 93 % of gmp, 49 % of tomsfastmath
+zand .................... 49 % of tomsfastmath
zor ..................... 36 % of tomsfastmath
zxor .................... 51 % of tomsfastmath
znot .................... fastest
_AT_@ -26,16 +26,16 @@ zeven_nonzero ........... fastest (shared with gmp, libtommath, and tomsfastmath
zodd_nonzero ............ fastest (shared with gmp, libtommath, and tomsfastmath)
zzero ................... fastest (shared with gmp and libtommath)
zsignum ................. fastest (shared with gmp)
-zbits ................... gmp is faster, because of bug in libzahl
-zlsb .................... fastest (shared with gmp)
+zbits ................... fastest
+zlsb .................... fastest
zswap ................... fastest
zlsh .................... fastest
zrsh .................... fastest
ztrunc(a, b, c) ......... fastest
ztrunc(a, a, b) ......... fastest
-zsplit .................. 95 % of gmp
-zcmpmag ................. gmp is faster
-zcmp .................... 94 % of tomsfastmath, 81 % of hebimath, gmp is even faster (zcmpmag)
+zsplit .................. fastest
+zcmpmag ................. fastest
+zcmp .................... 94 % of tomsfastmath, 81 % of hebimath (zcmpmag)
zcmpi ................... fastest
zcmpu ................... fastest
zbset(a, b, 1) .......... fastest
_AT_@ -44,8 +44,8 @@ zbset(a, b, 0) .......... fastest
zbset(a, a, 0) .......... fastest
zbset(a, b, -1) ......... fastest
zbset(a, a, -1) ......... fastest
-zbtest .................. fastest (shared with gmp)
-zgcd .................... 26 % of gmp (zcmpmag)
+zbtest .................. fastest
+zgcd .................... 17 % of gmp (zcmpmag)
zmul .................... slowest
zsqr .................... slowest (zmul)
zmodmul(big mod) ........ slowest ((zmul, zmod))
diff --git a/bench/libgmp.h b/bench/libgmp.h
index de59602..7b5e1c8 100644
--- a/bench/libgmp.h
+++ b/bench/libgmp.h
_AT_@ -1,3 +1,5 @@
+#define __GMP_NO_ATTRIBUTE_CONST_PURE
+
#include <gmp.h>
#include <setjmp.h>
diff --git a/doc/refsheet.tex b/doc/refsheet.tex
index e39c509..d0c31f7 100644
--- a/doc/refsheet.tex
+++ b/doc/refsheet.tex
_AT_@ -106,10 +106,10 @@ Get string length of $a$ & {\tt zstr\_length(a, b)} & returns {\tt size\_
\textbf{Marshallisation} & {} & {} \\
Marshal $a$ into $b$ & {\tt zsave(a, b)} & returns {\tt size\_t} number of saved bytes, \\
-{} & {} & $~~~~~$ {\tt b} is a {\tt char *\_t} \\
+{} & {} & $~~~~~$ {\tt b} is a {\tt void *\_t} \\
Get marshal-size of $a$ & {\tt zsave(a, NULL)} & returns {\tt size\_t} \\
Unmarshal $a$ from $b$ & {\tt zload(a, b)} & returns {\tt size\_t} number of read bytes, \\
-{} & {} & $~~~~~$ {\tt b} is a {\tt const char *\_t} \\
+{} & {} & $~~~~~$ {\tt b} is a {\tt const void *\_t} \\
\\
\textbf{Number theory} & {} & {} \\
diff --git a/src/internals.h b/src/internals.h
index 6faa254..a049e1a 100644
--- a/src/internals.h
+++ b/src/internals.h
_AT_@ -105,11 +105,12 @@ extern void *libzahl_temp_allocation;
#define SET_SIGNUM(a, signum) ZAHL_SET_SIGNUM(a, signum)
#define SET(a, b) ZAHL_SET(a, b)
-#define ENSURE_SIZE(a, n) do { if ((a)->alloced < (n)) libzahl_realloc(a, (n)); } while (0)
+#define ENSURE_SIZE(a, n) ZAHL_ENSURE_SIZE(a, n)
#define TRIM(a) ZAHL_TRIM(a)
#define TRIM_NONZERO(a) ZAHL_TRIM_NONZERO(a)
#define TRIM_AND_ZERO(a) ZAHL_TRIM_AND_ZERO(a)
#define TRIM_AND_SIGN(a, s) ZAHL_TRIM_AND_SIGN(a, s)
+#define SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m)
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define znegative(a) (zsignum(a) < 0)
_AT_@ -120,8 +121,9 @@ extern void *libzahl_temp_allocation;
#define zpositive2(a, b) (zsignum(a) + zsignum(b) == 2)
#define zzero2(a, b) (!(zsignum(a) | zsignum(b)))
#define zmemmove(d, s, n) memmove((d), (s), (n) * sizeof(zahl_char_t))
+#define zmemcpy(d, s, n) libzahl_memcpy(d, s, n)
+#define zmemset(a, v, n) libzahl_memset(a, v, n)
-void libzahl_realloc(z_t a, size_t need);
void zmul_impl(z_t a, z_t b, z_t c);
void zsqr_impl(z_t a, z_t b);
_AT_@ -151,20 +153,6 @@ libzahl_memfailure(void)
libzahl_failure(errno);
}
-static inline void
-zmemcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n)
-{
- while (n--)
- d[n] = s[n];
-}
-
-static inline void
-zmemset(zahl_char_t *a, register zahl_char_t v, register size_t n)
-{
- while (n--)
- a[n] = v;
-}
-
/*
* libzahl_msb_nz_zu
* ^^^ ^^ ^^
_AT_@ -218,6 +206,17 @@ libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b)
#endif
static inline void
+zsplit_pz(z_t high, z_t low, z_t a, size_t delim)
+{
+ if (unlikely(zzero(a))) {
+ SET_SIGNUM(high, 0);
+ SET_SIGNUM(low, 0);
+ } else {
+ zsplit(high, low, a, delim);
+ }
+}
+
+static inline void
zrsh_taint(z_t a, size_t bits)
{
size_t i, chars, cbits;
diff --git a/src/zbset.c b/src/zbset.c
index 3d93fd4..223b2d5 100644
--- a/src/zbset.c
+++ b/src/zbset.c
_AT_@ -23,14 +23,14 @@
void
-zbset_impl_set(z_t a, size_t bit)
+zbset_ll_set(z_t a, size_t bit)
{
PROLOGUE(1);
a->chars[chars] |= mask;
}
void
-zbset_impl_clear(z_t a, size_t bit)
+zbset_ll_clear(z_t a, size_t bit)
{
PROLOGUE(0);
a->chars[chars] &= ~mask;
_AT_@ -38,7 +38,7 @@ zbset_impl_clear(z_t a, size_t bit)
}
void
-zbset_impl_flip(z_t a, size_t bit)
+zbset_ll_flip(z_t a, size_t bit)
{
PROLOGUE(1);
a->chars[chars] ^= mask;
diff --git a/src/zmul.c b/src/zmul.c
index 6ea6d21..3129061 100644
--- a/src/zmul.c
+++ b/src/zmul.c
_AT_@ -53,8 +53,8 @@ zmul_impl(z_t a, z_t b, z_t c)
zinit_temp(c_high);
zinit_temp(c_low);
- zsplit(b_high, b_low, b, m2);
- zsplit(c_high, c_low, c, m2);
+ zsplit_pz(b_high, b_low, b, m2);
+ zsplit_pz(c_high, c_low, c, m2);
zmul_impl(z0, b_low, c_low);
diff --git a/src/zsave.c b/src/zsave.c
deleted file mode 100644
index 8b08f2a..0000000
--- a/src/zsave.c
+++ /dev/null
_AT_@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-size_t
-zsave(z_t a, void *buffer)
-{
- if (buffer) {
- char *buf = buffer;
- *((int *)buf) = a->sign, buf += sizeof(int);
- *((size_t *)buf) = a->used, buf += sizeof(size_t);
- if (likely(!zzero(a)))
- zmemcpy((zahl_char_t *)buf, a->chars, a->used);
- }
- return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t));
-}
diff --git a/src/zsetu.c b/src/zsetu.c
deleted file mode 100644
index 42e8cec..0000000
--- a/src/zsetu.c
+++ /dev/null
_AT_@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zsetu(z_t a, uint64_t b)
-{
- if (!b) {
- SET_SIGNUM(a, 0);
- return;
- }
- ENSURE_SIZE(a, 1);
- SET_SIGNUM(a, 1);
- a->chars[0] = (zahl_char_t)b;
- a->used = 1;
-}
diff --git a/src/zsplit.c b/src/zsplit.c
deleted file mode 100644
index afb8a33..0000000
--- a/src/zsplit.c
+++ /dev/null
_AT_@ -1,22 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zsplit(z_t high, z_t low, z_t a, size_t delim)
-{
- if (unlikely(zzero(a))) {
- /* This is for performance. */
- SET_SIGNUM(high, 0);
- SET_SIGNUM(low, 0);
- return;
- }
-
- if (unlikely(high == a)) {
- ztrunc(low, a, delim);
- zrsh(high, a, delim);
- } else {
- zrsh(high, a, delim);
- ztrunc(low, a, delim);
- }
-}
diff --git a/zahl-inlines.h b/zahl-inlines.h
index 7c06270..abea865 100644
--- a/zahl-inlines.h
+++ b/zahl-inlines.h
_AT_@ -14,19 +14,12 @@ ZAHL_INLINE void zneg(z_t a, z_t b) { ZAHL_SET(a, b); a->sign = -a->sign; }
ZAHL_INLINE void
zswap(z_t a, z_t b)
{
+ /* Almost three times faster than the naïve method. */
z_t t;
- t->sign = a->sign;
- a->sign = b->sign;
- b->sign = t->sign;
- t->used = b->used;
- b->used = a->used;
- a->used = t->used;
- t->alloced = a->alloced;
- a->alloced = b->alloced;
- b->alloced = t->alloced;
- t->chars = b->chars;
- b->chars = a->chars;
- a->chars = t->chars;
+ ZAHL_SWAP(a, b, t, sign);
+ ZAHL_SWAP(b, a, t, used);
+ ZAHL_SWAP(a, b, t, alloced);
+ ZAHL_SWAP(b, a, t, chars);
}
_AT_@ -42,6 +35,20 @@ zseti(z_t a, int64_t b)
}
+ZAHL_INLINE void
+zsetu(z_t a, uint64_t b)
+{
+ if (!b) {
+ ZAHL_SET_SIGNUM(a, 0);
+ return;
+ }
+ ZAHL_ENSURE_SIZE(a, 1);
+ ZAHL_SET_SIGNUM(a, 1);
+ a->chars[0] = (zahl_char_t)b;
+ a->used = 1;
+}
+
+
ZAHL_INLINE size_t
zlsb(z_t a)
{
_AT_@ -140,10 +147,6 @@ zcmpi(z_t a, int64_t b)
}
-void zbset_impl_set(z_t a, size_t bit);
-void zbset_impl_clear(z_t a, size_t bit);
-void zbset_impl_flip(z_t a, size_t bit);
-
ZAHL_INLINE void
zbset(z_t a, z_t b, size_t bit, int action)
{
_AT_@ -174,11 +177,11 @@ fallback:
#endif
if (action > 0)
- zbset_impl_set(a, bit);
+ zbset_ll_set(a, bit);
else if (action < 0)
- zbset_impl_flip(a, bit);
+ zbset_ll_flip(a, bit);
else
- zbset_impl_clear(a, bit);
+ zbset_ll_clear(a, bit);
}
_AT_@ -196,3 +199,30 @@ zbtest(z_t a, size_t bit)
bit &= ZAHL_BITS_IN_LAST_CHAR(bit);
return (a->chars[chars] >> bit) & 1;
}
+
+
+ZAHL_INLINE void
+zsplit(z_t high, z_t low, z_t a, size_t delim)
+{
+ if (ZAHL_UNLIKELY(high == a)) {
+ ztrunc(low, a, delim);
+ zrsh(high, a, delim);
+ } else {
+ zrsh(high, a, delim);
+ ztrunc(low, a, delim);
+ }
+}
+
+
+ZAHL_INLINE size_t
+zsave(z_t a, void *buffer)
+{
+ if (ZAHL_LIKELY(buffer)) {
+ char *buf = buffer;
+ *((int *)buf) = a->sign, buf += sizeof(int);
+ *((size_t *)buf) = a->used, buf += sizeof(size_t);
+ if (ZAHL_LIKELY(!zzero(a)))
+ libzahl_memcpy((zahl_char_t *)buf, a->chars, a->used);
+ }
+ return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t));
+}
diff --git a/zahl-internals.h b/zahl-internals.h
index 9d6df24..4adbdfe 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
_AT_@ -32,10 +32,12 @@
#define ZAHL_SET_SIGNUM(a, signum) ((a)->sign = (signum))
#define ZAHL_SET(a, b) do { if ((a) != (b)) zset(a, b); } while (0)
+#define ZAHL_ENSURE_SIZE(a, n) do { if ((a)->alloced < (n)) libzahl_realloc(a, (n)); } while (0)
#define ZAHL_TRIM(a) for (; (a)->used && !(a)->chars[(a)->used - 1]; (a)->used--)
#define ZAHL_TRIM_NONZERO(a) for (; !(a)->chars[(a)->used - 1]; (a)->used--)
#define ZAHL_TRIM_AND_ZERO(a) do { ZAHL_TRIM(a); if (!(a)->used) ZAHL_SET_SIGNUM(a, 0); } while (0)
#define ZAHL_TRIM_AND_SIGN(a, s) do { ZAHL_TRIM(a); ZAHL_SET_SIGNUM(a, (a)->used ? (s) : 0); } while (0)
+#define ZAHL_SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m)
#if defined(__GNUC__) || defined(__clang__)
_AT_@ -72,3 +74,20 @@ struct zahl {
size_t alloced;
zahl_char_t *chars;
};
+
+
+void libzahl_realloc(struct zahl *, size_t);
+
+static inline void
+libzahl_memcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n)
+{
+ while (n--)
+ d[n] = s[n];
+}
+
+static inline void
+libzahl_memset(zahl_char_t *a, register zahl_char_t v, register size_t n)
+{
+ while (n--)
+ a[n] = v;
+}
diff --git a/zahl.h b/zahl.h
index f3d3d69..1e27882 100644
--- a/zahl.h
+++ b/zahl.h
_AT_@ -66,14 +66,14 @@ void zunsetup(void); /* Free resources used by libzahl */
ZAHL_INLINE void zinit(z_t); /* Prepare a for use. */
ZAHL_INLINE void zswap(z_t, z_t); /* (a, b) := (b, a) */
void zfree(z_t); /* Free resources in a. */
-size_t zsave(z_t, void *); /* Store a into b (if !!b), and return number of written bytes. */
+ZAHL_INLINE size_t zsave(z_t, void *); /* Store a into b (if !!b), and return number of written bytes. */
size_t zload(z_t, const void *); /* Restore a from b, and return number of read bytes. */
/* Assignment functions. */
void zset(z_t, z_t); /* a := b */
-void zsetu(z_t, uint64_t); /* a := b */
+ZAHL_INLINE void zsetu(z_t, uint64_t); /* a := b */
ZAHL_INLINE void zseti(z_t, int64_t); /* a := b */
/* Comparison functions. */
_AT_@ -119,7 +119,8 @@ void znot(z_t, z_t); /* a := ~b */
void zlsh(z_t, z_t, size_t); /* a := b << c */
void zrsh(z_t, z_t, size_t); /* a := b >> c */
void ztrunc(z_t, z_t, size_t); /* a := b & ((1 << c) - 1) */
-void zsplit(z_t, z_t, z_t, size_t); /* a := c >> d, b := c - (a << d) */
+ZAHL_INLINE void zsplit(z_t, z_t, z_t, size_t);
+ /* a := c >> d, b := c - (a << d) */
ZAHL_INLINE int zbtest(z_t, size_t); /* (a >> b) & 1 */
ZAHL_INLINE size_t zlsb(z_t); /* Index of first set bit, SIZE_MAX if none are set. */
ZAHL_INLINE size_t zbits(z_t); /* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
_AT_@ -165,6 +166,14 @@ void zperror(const char *); /* Identical to perror(3p) except it sup
+/* Low-level functions. [Do not count on these to be retained between different versions of libzahl.] */
+
+void zbset_ll_set(z_t, size_t); /* zbset(a, a, b, 1) */
+void zbset_ll_clear(z_t, size_t); /* zbset(a, a, b, 0) */
+void zbset_ll_flip(z_t, size_t); /* zbset(a, a, b, -1) */
+
+
+
#include "zahl-inlines.h"
#endif
Received on Fri Apr 29 2016 - 21:55:32 CEST
This archive was generated by hypermail 2.3.0
: Fri Apr 29 2016 - 22:00:33 CEST