(wrong string) ée

From: <git_AT_suckless.org>
Date: Fri, 29 Apr 2016 21:55:32 +0200 (CEST)

commit 4bba6e7176632b4d760ba9362a1515552471d741
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Fri Apr 29 21:54:39 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Fri Apr 29 21:55:15 2016 +0200

    Some optimisations, fix refsheet, and disable const/pure attributes in gmp in benchmark
    
    Signed-off-by: Mattias Andrée <maandree_AT_kth.se>

diff --git a/Makefile b/Makefile
index 406209d..5e4ace9 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -2,8 +2,8 @@ include config.mk
 
 HDR_PUBLIC =\
         zahl.h\
- zahl-internals.h\
- zahl-inlines.h
+ zahl-inlines.h\
+ zahl-internals.h
 
 HDR_PRIVATE =\
         src/internals.h
_AT_@ -33,12 +33,9 @@ FUN =\
         zptest\
         zrand\
         zrsh\
- zsave\
         zset\
         zsets\
- zsetu\
         zsetup\
- zsplit\
         zsqr\
         zstr\
         zstr_length\
_AT_@ -48,24 +45,27 @@ FUN =\
         zxor
 
 INLINE_FUN =\
- zinit\
- zswap\
- zeven\
- zodd\
- zeven_nonzero\
- zodd_nonzero\
- zzero\
- zsignum\
         zabs\
- zneg\
- zlsb\
         zbits\
- zseti\
+ zbtest\
         zcmp\
         zcmpi\
         zcmpmag\
         zcmpu\
- zbtest
+ zeven\
+ zeven_nonzero\
+ zinit\
+ zlsb\
+ zneg\
+ zodd\
+ zodd_nonzero\
+ zsave\
+ zseti\
+ zsetu\
+ zsignum\
+ zsplit\
+ zswap\
+ zzero
 
 DOC =\
         refsheet.pdf
diff --git a/STATUS b/STATUS
index aa8d510..5e705ee 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -16,7 +16,7 @@ zadd_unsigned ........... fastest (faster than all others' zadd)
 zsub_unsigned ........... fastest (faster than all others' zsub)
 zadd .................... 87 % of tomsfastmath, 83 % libtommath, 80 % of hebimath
 zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath
-zand .................... 93 % of gmp, 49 % of tomsfastmath
+zand .................... 49 % of tomsfastmath
 zor ..................... 36 % of tomsfastmath
 zxor .................... 51 % of tomsfastmath
 znot .................... fastest
_AT_@ -26,16 +26,16 @@ zeven_nonzero ........... fastest (shared with gmp, libtommath, and tomsfastmath
 zodd_nonzero ............ fastest (shared with gmp, libtommath, and tomsfastmath)
 zzero ................... fastest (shared with gmp and libtommath)
 zsignum ................. fastest (shared with gmp)
-zbits ................... gmp is faster, because of bug in libzahl
-zlsb .................... fastest (shared with gmp)
+zbits ................... fastest
+zlsb .................... fastest
 zswap ................... fastest
 zlsh .................... fastest
 zrsh .................... fastest
 ztrunc(a, b, c) ......... fastest
 ztrunc(a, a, b) ......... fastest
-zsplit .................. 95 % of gmp
-zcmpmag ................. gmp is faster
-zcmp .................... 94 % of tomsfastmath, 81 % of hebimath, gmp is even faster (zcmpmag)
+zsplit .................. fastest
+zcmpmag ................. fastest
+zcmp .................... 94 % of tomsfastmath, 81 % of hebimath (zcmpmag)
 zcmpi ................... fastest
 zcmpu ................... fastest
 zbset(a, b, 1) .......... fastest
_AT_@ -44,8 +44,8 @@ zbset(a, b, 0) .......... fastest
 zbset(a, a, 0) .......... fastest
 zbset(a, b, -1) ......... fastest
 zbset(a, a, -1) ......... fastest
-zbtest .................. fastest (shared with gmp)
-zgcd .................... 26 % of gmp (zcmpmag)
+zbtest .................. fastest
+zgcd .................... 17 % of gmp (zcmpmag)
 zmul .................... slowest
 zsqr .................... slowest (zmul)
 zmodmul(big mod) ........ slowest ((zmul, zmod))
diff --git a/bench/libgmp.h b/bench/libgmp.h
index de59602..7b5e1c8 100644
--- a/bench/libgmp.h
+++ b/bench/libgmp.h
_AT_@ -1,3 +1,5 @@
+#define __GMP_NO_ATTRIBUTE_CONST_PURE
+
 #include <gmp.h>
 
 #include <setjmp.h>
diff --git a/doc/refsheet.tex b/doc/refsheet.tex
index e39c509..d0c31f7 100644
--- a/doc/refsheet.tex
+++ b/doc/refsheet.tex
_AT_@ -106,10 +106,10 @@ Get string length of $a$ & {\tt zstr\_length(a, b)} & returns {\tt size\_
 
 \textbf{Marshallisation} & {} & {} \\
 Marshal $a$ into $b$ & {\tt zsave(a, b)} & returns {\tt size\_t} number of saved bytes, \\
-{} & {} & $~~~~~$ {\tt b} is a {\tt char *\_t} \\
+{} & {} & $~~~~~$ {\tt b} is a {\tt void *\_t} \\
 Get marshal-size of $a$ & {\tt zsave(a, NULL)} & returns {\tt size\_t} \\
 Unmarshal $a$ from $b$ & {\tt zload(a, b)} & returns {\tt size\_t} number of read bytes, \\
-{} & {} & $~~~~~$ {\tt b} is a {\tt const char *\_t} \\
+{} & {} & $~~~~~$ {\tt b} is a {\tt const void *\_t} \\
 \\
 
 \textbf{Number theory} & {} & {} \\
diff --git a/src/internals.h b/src/internals.h
index 6faa254..a049e1a 100644
--- a/src/internals.h
+++ b/src/internals.h
_AT_@ -105,11 +105,12 @@ extern void *libzahl_temp_allocation;
 
 #define SET_SIGNUM(a, signum) ZAHL_SET_SIGNUM(a, signum)
 #define SET(a, b) ZAHL_SET(a, b)
-#define ENSURE_SIZE(a, n) do { if ((a)->alloced < (n)) libzahl_realloc(a, (n)); } while (0)
+#define ENSURE_SIZE(a, n) ZAHL_ENSURE_SIZE(a, n)
 #define TRIM(a) ZAHL_TRIM(a)
 #define TRIM_NONZERO(a) ZAHL_TRIM_NONZERO(a)
 #define TRIM_AND_ZERO(a) ZAHL_TRIM_AND_ZERO(a)
 #define TRIM_AND_SIGN(a, s) ZAHL_TRIM_AND_SIGN(a, s)
+#define SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m)
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define znegative(a) (zsignum(a) < 0)
_AT_@ -120,8 +121,9 @@ extern void *libzahl_temp_allocation;
 #define zpositive2(a, b) (zsignum(a) + zsignum(b) == 2)
 #define zzero2(a, b) (!(zsignum(a) | zsignum(b)))
 #define zmemmove(d, s, n) memmove((d), (s), (n) * sizeof(zahl_char_t))
+#define zmemcpy(d, s, n) libzahl_memcpy(d, s, n)
+#define zmemset(a, v, n) libzahl_memset(a, v, n)
 
-void libzahl_realloc(z_t a, size_t need);
 void zmul_impl(z_t a, z_t b, z_t c);
 void zsqr_impl(z_t a, z_t b);
 
_AT_@ -151,20 +153,6 @@ libzahl_memfailure(void)
         libzahl_failure(errno);
 }
 
-static inline void
-zmemcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n)
-{
- while (n--)
- d[n] = s[n];
-}
-
-static inline void
-zmemset(zahl_char_t *a, register zahl_char_t v, register size_t n)
-{
- while (n--)
- a[n] = v;
-}
-
 /*
  * libzahl_msb_nz_zu
  * ^^^ ^^ ^^
_AT_@ -218,6 +206,17 @@ libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b)
 #endif
 
 static inline void
+zsplit_pz(z_t high, z_t low, z_t a, size_t delim)
+{
+ if (unlikely(zzero(a))) {
+ SET_SIGNUM(high, 0);
+ SET_SIGNUM(low, 0);
+ } else {
+ zsplit(high, low, a, delim);
+ }
+}
+
+static inline void
 zrsh_taint(z_t a, size_t bits)
 {
         size_t i, chars, cbits;
diff --git a/src/zbset.c b/src/zbset.c
index 3d93fd4..223b2d5 100644
--- a/src/zbset.c
+++ b/src/zbset.c
_AT_@ -23,14 +23,14 @@
 
 
 void
-zbset_impl_set(z_t a, size_t bit)
+zbset_ll_set(z_t a, size_t bit)
 {
         PROLOGUE(1);
         a->chars[chars] |= mask;
 }
 
 void
-zbset_impl_clear(z_t a, size_t bit)
+zbset_ll_clear(z_t a, size_t bit)
 {
         PROLOGUE(0);
         a->chars[chars] &= ~mask;
_AT_@ -38,7 +38,7 @@ zbset_impl_clear(z_t a, size_t bit)
 }
 
 void
-zbset_impl_flip(z_t a, size_t bit)
+zbset_ll_flip(z_t a, size_t bit)
 {
         PROLOGUE(1);
         a->chars[chars] ^= mask;
diff --git a/src/zmul.c b/src/zmul.c
index 6ea6d21..3129061 100644
--- a/src/zmul.c
+++ b/src/zmul.c
_AT_@ -53,8 +53,8 @@ zmul_impl(z_t a, z_t b, z_t c)
         zinit_temp(c_high);
         zinit_temp(c_low);
 
- zsplit(b_high, b_low, b, m2);
- zsplit(c_high, c_low, c, m2);
+ zsplit_pz(b_high, b_low, b, m2);
+ zsplit_pz(c_high, c_low, c, m2);
 
 
         zmul_impl(z0, b_low, c_low);
diff --git a/src/zsave.c b/src/zsave.c
deleted file mode 100644
index 8b08f2a..0000000
--- a/src/zsave.c
+++ /dev/null
_AT_@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-size_t
-zsave(z_t a, void *buffer)
-{
- if (buffer) {
- char *buf = buffer;
- *((int *)buf) = a->sign, buf += sizeof(int);
- *((size_t *)buf) = a->used, buf += sizeof(size_t);
- if (likely(!zzero(a)))
- zmemcpy((zahl_char_t *)buf, a->chars, a->used);
- }
- return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t));
-}
diff --git a/src/zsetu.c b/src/zsetu.c
deleted file mode 100644
index 42e8cec..0000000
--- a/src/zsetu.c
+++ /dev/null
_AT_@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zsetu(z_t a, uint64_t b)
-{
- if (!b) {
- SET_SIGNUM(a, 0);
- return;
- }
- ENSURE_SIZE(a, 1);
- SET_SIGNUM(a, 1);
- a->chars[0] = (zahl_char_t)b;
- a->used = 1;
-}
diff --git a/src/zsplit.c b/src/zsplit.c
deleted file mode 100644
index afb8a33..0000000
--- a/src/zsplit.c
+++ /dev/null
_AT_@ -1,22 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zsplit(z_t high, z_t low, z_t a, size_t delim)
-{
- if (unlikely(zzero(a))) {
- /* This is for performance. */
- SET_SIGNUM(high, 0);
- SET_SIGNUM(low, 0);
- return;
- }
-
- if (unlikely(high == a)) {
- ztrunc(low, a, delim);
- zrsh(high, a, delim);
- } else {
- zrsh(high, a, delim);
- ztrunc(low, a, delim);
- }
-}
diff --git a/zahl-inlines.h b/zahl-inlines.h
index 7c06270..abea865 100644
--- a/zahl-inlines.h
+++ b/zahl-inlines.h
_AT_@ -14,19 +14,12 @@ ZAHL_INLINE void zneg(z_t a, z_t b) { ZAHL_SET(a, b); a->sign = -a->sign; }
 ZAHL_INLINE void
 zswap(z_t a, z_t b)
 {
+ /* Almost three times faster than the naïve method. */
         z_t t;
- t->sign = a->sign;
- a->sign = b->sign;
- b->sign = t->sign;
- t->used = b->used;
- b->used = a->used;
- a->used = t->used;
- t->alloced = a->alloced;
- a->alloced = b->alloced;
- b->alloced = t->alloced;
- t->chars = b->chars;
- b->chars = a->chars;
- a->chars = t->chars;
+ ZAHL_SWAP(a, b, t, sign);
+ ZAHL_SWAP(b, a, t, used);
+ ZAHL_SWAP(a, b, t, alloced);
+ ZAHL_SWAP(b, a, t, chars);
 }
 
 
_AT_@ -42,6 +35,20 @@ zseti(z_t a, int64_t b)
 }
 
 
+ZAHL_INLINE void
+zsetu(z_t a, uint64_t b)
+{
+ if (!b) {
+ ZAHL_SET_SIGNUM(a, 0);
+ return;
+ }
+ ZAHL_ENSURE_SIZE(a, 1);
+ ZAHL_SET_SIGNUM(a, 1);
+ a->chars[0] = (zahl_char_t)b;
+ a->used = 1;
+}
+
+
 ZAHL_INLINE size_t
 zlsb(z_t a)
 {
_AT_@ -140,10 +147,6 @@ zcmpi(z_t a, int64_t b)
 }
 
 
-void zbset_impl_set(z_t a, size_t bit);
-void zbset_impl_clear(z_t a, size_t bit);
-void zbset_impl_flip(z_t a, size_t bit);
-
 ZAHL_INLINE void
 zbset(z_t a, z_t b, size_t bit, int action)
 {
_AT_@ -174,11 +177,11 @@ fallback:
 #endif
 
         if (action > 0)
- zbset_impl_set(a, bit);
+ zbset_ll_set(a, bit);
         else if (action < 0)
- zbset_impl_flip(a, bit);
+ zbset_ll_flip(a, bit);
         else
- zbset_impl_clear(a, bit);
+ zbset_ll_clear(a, bit);
 }
 
 
_AT_@ -196,3 +199,30 @@ zbtest(z_t a, size_t bit)
         bit &= ZAHL_BITS_IN_LAST_CHAR(bit);
         return (a->chars[chars] >> bit) & 1;
 }
+
+
+ZAHL_INLINE void
+zsplit(z_t high, z_t low, z_t a, size_t delim)
+{
+ if (ZAHL_UNLIKELY(high == a)) {
+ ztrunc(low, a, delim);
+ zrsh(high, a, delim);
+ } else {
+ zrsh(high, a, delim);
+ ztrunc(low, a, delim);
+ }
+}
+
+
+ZAHL_INLINE size_t
+zsave(z_t a, void *buffer)
+{
+ if (ZAHL_LIKELY(buffer)) {
+ char *buf = buffer;
+ *((int *)buf) = a->sign, buf += sizeof(int);
+ *((size_t *)buf) = a->used, buf += sizeof(size_t);
+ if (ZAHL_LIKELY(!zzero(a)))
+ libzahl_memcpy((zahl_char_t *)buf, a->chars, a->used);
+ }
+ return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t));
+}
diff --git a/zahl-internals.h b/zahl-internals.h
index 9d6df24..4adbdfe 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
_AT_@ -32,10 +32,12 @@
 
 #define ZAHL_SET_SIGNUM(a, signum) ((a)->sign = (signum))
 #define ZAHL_SET(a, b) do { if ((a) != (b)) zset(a, b); } while (0)
+#define ZAHL_ENSURE_SIZE(a, n) do { if ((a)->alloced < (n)) libzahl_realloc(a, (n)); } while (0)
 #define ZAHL_TRIM(a) for (; (a)->used && !(a)->chars[(a)->used - 1]; (a)->used--)
 #define ZAHL_TRIM_NONZERO(a) for (; !(a)->chars[(a)->used - 1]; (a)->used--)
 #define ZAHL_TRIM_AND_ZERO(a) do { ZAHL_TRIM(a); if (!(a)->used) ZAHL_SET_SIGNUM(a, 0); } while (0)
 #define ZAHL_TRIM_AND_SIGN(a, s) do { ZAHL_TRIM(a); ZAHL_SET_SIGNUM(a, (a)->used ? (s) : 0); } while (0)
+#define ZAHL_SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m)
 
 
 #if defined(__GNUC__) || defined(__clang__)
_AT_@ -72,3 +74,20 @@ struct zahl {
         size_t alloced;
         zahl_char_t *chars;
 };
+
+
+void libzahl_realloc(struct zahl *, size_t);
+
+static inline void
+libzahl_memcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n)
+{
+ while (n--)
+ d[n] = s[n];
+}
+
+static inline void
+libzahl_memset(zahl_char_t *a, register zahl_char_t v, register size_t n)
+{
+ while (n--)
+ a[n] = v;
+}
diff --git a/zahl.h b/zahl.h
index f3d3d69..1e27882 100644
--- a/zahl.h
+++ b/zahl.h
_AT_@ -66,14 +66,14 @@ void zunsetup(void); /* Free resources used by libzahl */
 ZAHL_INLINE void zinit(z_t); /* Prepare a for use. */
 ZAHL_INLINE void zswap(z_t, z_t); /* (a, b) := (b, a) */
 void zfree(z_t); /* Free resources in a. */
-size_t zsave(z_t, void *); /* Store a into b (if !!b), and return number of written bytes. */
+ZAHL_INLINE size_t zsave(z_t, void *); /* Store a into b (if !!b), and return number of written bytes. */
 size_t zload(z_t, const void *); /* Restore a from b, and return number of read bytes. */
 
 
 /* Assignment functions. */
 
 void zset(z_t, z_t); /* a := b */
-void zsetu(z_t, uint64_t); /* a := b */
+ZAHL_INLINE void zsetu(z_t, uint64_t); /* a := b */
 ZAHL_INLINE void zseti(z_t, int64_t); /* a := b */
 
 /* Comparison functions. */
_AT_@ -119,7 +119,8 @@ void znot(z_t, z_t); /* a := ~b */
 void zlsh(z_t, z_t, size_t); /* a := b << c */
 void zrsh(z_t, z_t, size_t); /* a := b >> c */
 void ztrunc(z_t, z_t, size_t); /* a := b & ((1 << c) - 1) */
-void zsplit(z_t, z_t, z_t, size_t); /* a := c >> d, b := c - (a << d) */
+ZAHL_INLINE void zsplit(z_t, z_t, z_t, size_t);
+ /* a := c >> d, b := c - (a << d) */
 ZAHL_INLINE int zbtest(z_t, size_t); /* (a >> b) & 1 */
 ZAHL_INLINE size_t zlsb(z_t); /* Index of first set bit, SIZE_MAX if none are set. */
 ZAHL_INLINE size_t zbits(z_t); /* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
_AT_@ -165,6 +166,14 @@ void zperror(const char *); /* Identical to perror(3p) except it sup
 
 
 
+/* Low-level functions. [Do not count on these to be retained between different versions of libzahl.] */
+
+void zbset_ll_set(z_t, size_t); /* zbset(a, a, b, 1) */
+void zbset_ll_clear(z_t, size_t); /* zbset(a, a, b, 0) */
+void zbset_ll_flip(z_t, size_t); /* zbset(a, a, b, -1) */
+
+
+
 #include "zahl-inlines.h"
 
 #endif
Received on Fri Apr 29 2016 - 21:55:32 CEST

This archive was generated by hypermail 2.3.0 : Fri Apr 29 2016 - 22:00:33 CEST