(wrong string) ée

From: <git_AT_suckless.org>
Date: Sat, 30 Apr 2016 06:04:12 +0200 (CEST)

commit 83d95da004c8cc2387a4070b781a71a0c6433faa
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Sat Apr 30 05:47:05 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Sat Apr 30 05:47:05 2016 +0200

    Some optimisations
    
    Signed-off-by: Mattias Andrée <maandree_AT_kth.se>

diff --git a/Makefile b/Makefile
index 5e4ace9..1b0d9de 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -33,7 +33,6 @@ FUN =\
         zptest\
         zrand\
         zrsh\
- zset\
         zsets\
         zsetup\
         zsqr\
_AT_@ -60,6 +59,7 @@ INLINE_FUN =\
         zodd\
         zodd_nonzero\
         zsave\
+ zset\
         zseti\
         zsetu\
         zsignum\
diff --git a/STATUS b/STATUS
index 9a94033..29308a6 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -14,11 +14,11 @@ zabs(a, b) .............. fastest
 zabs(a, a) .............. tomsfastmath is faster
 zadd_unsigned ........... fastest (faster than all others' zadd)
 zsub_unsigned ........... fastest (faster than all others' zsub)
-zadd .................... 94 % of tomsfastmath, 90 % libtommath, 86 % of hebimath
-zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath
-zand .................... 55 % of tomsfastmath
-zor ..................... 46 % of tomsfastmath
-zxor .................... 57 % of tomsfastmath
+zadd .................... fastest
+zsub .................... 98 % of libtommath
+zand .................... 77 % of tomsfastmath
+zor ..................... 65 % of tomsfastmath
+zxor .................... 87 % of tomsfastmath
 znot .................... fastest
 zeven ................... fastest (shared with gmp, libtommath, and tomsfastmath)
 zodd .................... fastest (shared with gmp, libtommath, and tomsfastmath)
diff --git a/src/internals.h b/src/internals.h
index bcaea7a..407f4b7 100644
--- a/src/internals.h
+++ b/src/internals.h
_AT_@ -94,6 +94,8 @@ extern void *libzahl_temp_allocation;
 #define SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m)
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN_MAX_1(min, max, a, b) ((min) = MIN(a, b), (max) = MAX(a, b))
+#define MIN_MAX_2(min, max, a, b) ((min) = (a) + (b) - ((max) = MAX(a, b)))
 #define znegative(a) (zsignum(a) < 0)
 #define znegative1(a, b) ((zsignum(a) | zsignum(b)) < 0)
 #define znegative2(a, b) ((zsignum(a) & zsignum(b)) < 0)
_AT_@ -114,6 +116,13 @@ zzero1(z_t a, z_t b)
         return zzero(a) || zzero(b);
 }
 
+O2 static inline void
+zmemcpy_range(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t i, size_t n)
+{
+ for (; i < n; i++)
+ d[i] = s[i];
+}
+
 static void
 libzahl_failure(int error)
 {
diff --git a/src/zand.c b/src/zand.c
index a8f53a6..6d3a184 100644
--- a/src/zand.c
+++ b/src/zand.c
_AT_@ -2,14 +2,25 @@
 #include "internals.h"
 
 
-static inline O2 void
-zand_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+O2 static inline void
+zand_impl_3(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n)
 {
         size_t i;
         for (i = 0; i < n; i++)
                 a[i] &= b[i];
 }
 
+static inline void
+zand_impl_4(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b,
+ register const zahl_char_t *restrict c, size_t n)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ a[i] = b[i] & c[i];
+}
+
 void
 zand(z_t a, z_t b, z_t c)
 {
_AT_@ -25,13 +36,12 @@ zand(z_t a, z_t b, z_t c)
         a->used = MIN(b->used, c->used);
 
         if (a == b) {
- zand_impl(a->chars, c->chars, a->used);
+ zand_impl_3(a->chars, c->chars, a->used);
         } else if (unlikely(a == c)) {
- zand_impl(a->chars, b->chars, a->used);
+ zand_impl_3(a->chars, b->chars, a->used);
         } else {
                 ENSURE_SIZE(a, a->used);
- zmemcpy(a->chars, c->chars, a->used);
- zand_impl(a->chars, b->chars, a->used);
+ zand_impl_4(a->chars, b->chars, c->chars, a->used);
         }
 
         TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1);
diff --git a/src/zor.c b/src/zor.c
index 25b69ea..2152c7f 100644
--- a/src/zor.c
+++ b/src/zor.c
_AT_@ -2,14 +2,27 @@
 #include "internals.h"
 
 
-static inline O2 void
-zor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+O2 static inline void
+zor_impl_3(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n)
 {
         size_t i;
         for (i = 0; i < n; i++)
                 a[i] |= b[i];
 }
 
+static inline void
+zor_impl_5(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n,
+ register const zahl_char_t *restrict c, size_t m)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ a[i] = b[i] | c[i];
+ for (; i < m; i++)
+ a[i] = c[i];
+}
+
 void
 zor(z_t a, z_t b, z_t c)
 {
_AT_@ -23,25 +36,21 @@ zor(z_t a, z_t b, z_t c)
                 return;
         }
 
- m = MAX(b->used, c->used);
- n = b->used + c->used - m;
-
+ MIN_MAX_1(n, m, b->used, c->used);
         ENSURE_SIZE(a, m);
 
         if (a == b) {
+ zor_impl_3(a->chars, c->chars, n);
                 if (a->used < c->used)
- zmemcpy(a->chars + n, c->chars + n, m - n);
- zor_impl(a->chars, c->chars, n);
+ zmemcpy_range(a->chars, c->chars, n, m);
         } else if (unlikely(a == c)) {
+ zor_impl_3(a->chars, b->chars, n);
                 if (a->used < b->used)
- zmemcpy(a->chars + n, b->chars + n, m - n);
- zor_impl(a->chars, b->chars, n);
+ zmemcpy_range(a->chars, b->chars, n, m);
         } else if (m == b->used) {
- zmemcpy(a->chars, b->chars, m);
- zor_impl(a->chars, c->chars, n);
+ zor_impl_5(a->chars, c->chars, n, b->chars, m);
         } else {
- zmemcpy(a->chars, c->chars, m);
- zor_impl(a->chars, b->chars, n);
+ zor_impl_5(a->chars, b->chars, n, c->chars, m);
         }
 
         a->used = m;
diff --git a/src/zset.c b/src/zset.c
deleted file mode 100644
index b0188fc..0000000
--- a/src/zset.c
+++ /dev/null
_AT_@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zset(z_t a, z_t b)
-{
- if (unlikely(b->sign == 0)) {
- a->sign = 0;
- } else {
- a->sign = b->sign;
- a->used = b->used;
- ENSURE_SIZE(a, b->used);
- zmemcpy(a->chars, b->chars, b->used);
- }
-}
diff --git a/src/zsub.c b/src/zsub.c
index 259526f..51d4a09 100644
--- a/src/zsub.c
+++ b/src/zsub.c
_AT_@ -79,13 +79,12 @@ zsub_unsigned(z_t a, z_t b, z_t c)
 void
 zsub_nonnegative_assign(z_t a, z_t b)
 {
- if (unlikely(zzero(b))) {
+ if (unlikely(zzero(b)))
                 zabs(a, a);
- } else if (unlikely(!zcmpmag(a, b))) {
+ else if (unlikely(!zcmpmag(a, b)))
                 SET_SIGNUM(a, 0);
- } else {
+ else
                 zsub_impl(a, b, b->used);
- }
 }
 
 void
diff --git a/src/zxor.c b/src/zxor.c
index f973774..601a947 100644
--- a/src/zxor.c
+++ b/src/zxor.c
_AT_@ -2,18 +2,33 @@
 #include "internals.h"
 
 
-static inline void
-zxor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+O2 static inline void
+zxor_impl_3(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n)
 {
         size_t i;
         for (i = 0; i < n; i++)
                 a[i] ^= b[i];
 }
 
+static inline void
+zxor_impl_5(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n,
+ register const zahl_char_t *restrict c, size_t m)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ a[i] = b[i] ^ c[i];
+ for (; i < m; i++)
+ a[i] = c[i];
+}
+
 void
 zxor(z_t a, z_t b, z_t c)
 {
- size_t n, m;
+ size_t n, m, bn, cn;
+ const zahl_char_t *restrict bc;
+ const zahl_char_t *restrict cc;
 
         if (unlikely(zzero(b))) {
                 SET(a, c);
_AT_@ -23,25 +38,26 @@ zxor(z_t a, z_t b, z_t c)
                 return;
         }
 
- m = MAX(b->used, c->used);
- n = b->used + c->used - m;
+ bn = b->used;
+ bc = b->chars;
+ cn = c->used;
+ cc = c->chars;
 
+ MIN_MAX_1(n, m, bn, cn);
         ENSURE_SIZE(a, m);
 
         if (a == b) {
- if (a->used < c->used)
- zmemcpy(a->chars + n, c->chars + n, m - n);
- zxor_impl(a->chars, c->chars, n);
+ zxor_impl_3(a->chars, cc, n);
+ if (a->used < cn)
+ zmemcpy_range(a->chars, cc, n, m);
         } else if (unlikely(a == c)) {
- if (a->used < b->used)
- zmemcpy(a->chars + n, b->chars + n, m - n);
- zxor_impl(a->chars, b->chars, n);
- } else if (m == b->used) {
- zmemcpy(a->chars, b->chars, m);
- zxor_impl(a->chars, c->chars, n);
+ zxor_impl_3(a->chars, bc, n);
+ if (a->used < bn)
+ zmemcpy_range(a->chars, bc, n, m);
+ } else if (m == bn) {
+ zxor_impl_5(a->chars, cc, n, bc, m);
         } else {
- zmemcpy(a->chars, c->chars, m);
- zxor_impl(a->chars, b->chars, n);
+ zxor_impl_5(a->chars, bc, n, cc, m);
         }
 
         a->used = m;
diff --git a/zahl-inlines.h b/zahl-inlines.h
index 09e09a6..b69a8d2 100644
--- a/zahl-inlines.h
+++ b/zahl-inlines.h
_AT_@ -19,6 +19,20 @@ ZAHL_INLINE void zabs(z_t a, z_t b) { ZAHL_SET(a, b); a->sign = !!a->sign; }
 
 
 ZAHL_INLINE void
+zset(z_t a, z_t b)
+{
+ if (ZAHL_UNLIKELY(b->sign == 0)) {
+ a->sign = 0;
+ } else {
+ a->sign = b->sign;
+ a->used = b->used;
+ ZAHL_ENSURE_SIZE(a, b->used);
+ libzahl_memcpy(a->chars, b->chars, b->used);
+ }
+}
+
+
+ZAHL_INLINE void
 zswap(z_t a, z_t b)
 {
         /* Almost three times faster than the naïve method. */
diff --git a/zahl.h b/zahl.h
index 1e27882..45404af 100644
--- a/zahl.h
+++ b/zahl.h
_AT_@ -72,10 +72,11 @@ size_t zload(z_t, const void *); /* Restore a from b, and return number o
 
 /* Assignment functions. */
 
-void zset(z_t, z_t); /* a := b */
+ZAHL_INLINE void zset(z_t, z_t); /* a := b */
 ZAHL_INLINE void zsetu(z_t, uint64_t); /* a := b */
 ZAHL_INLINE void zseti(z_t, int64_t); /* a := b */
 
+
 /* Comparison functions. */
 
 ZAHL_INLINE int zcmp(z_t, z_t); /* signum (a - b) */
Received on Sat Apr 30 2016 - 06:04:12 CEST

This archive was generated by hypermail 2.3.0 : Sat Apr 30 2016 - 06:12:19 CEST