commit 83d95da004c8cc2387a4070b781a71a0c6433faa
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Sat Apr 30 05:47:05 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Sat Apr 30 05:47:05 2016 +0200
Some optimisations
Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
diff --git a/Makefile b/Makefile
index 5e4ace9..1b0d9de 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -33,7 +33,6 @@ FUN =\
zptest\
zrand\
zrsh\
- zset\
zsets\
zsetup\
zsqr\
_AT_@ -60,6 +59,7 @@ INLINE_FUN =\
zodd\
zodd_nonzero\
zsave\
+ zset\
zseti\
zsetu\
zsignum\
diff --git a/STATUS b/STATUS
index 9a94033..29308a6 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -14,11 +14,11 @@ zabs(a, b) .............. fastest
zabs(a, a) .............. tomsfastmath is faster
zadd_unsigned ........... fastest (faster than all others' zadd)
zsub_unsigned ........... fastest (faster than all others' zsub)
-zadd .................... 94 % of tomsfastmath, 90 % libtommath, 86 % of hebimath
-zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath
-zand .................... 55 % of tomsfastmath
-zor ..................... 46 % of tomsfastmath
-zxor .................... 57 % of tomsfastmath
+zadd .................... fastest
+zsub .................... 98 % of libtommath
+zand .................... 77 % of tomsfastmath
+zor ..................... 65 % of tomsfastmath
+zxor .................... 87 % of tomsfastmath
znot .................... fastest
zeven ................... fastest (shared with gmp, libtommath, and tomsfastmath)
zodd .................... fastest (shared with gmp, libtommath, and tomsfastmath)
diff --git a/src/internals.h b/src/internals.h
index bcaea7a..407f4b7 100644
--- a/src/internals.h
+++ b/src/internals.h
_AT_@ -94,6 +94,8 @@ extern void *libzahl_temp_allocation;
#define SWAP(a, b, t, m) ((t)->m = (a)->m, (a)->m = (b)->m, (b)->m = (t)->m)
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN_MAX_1(min, max, a, b) ((min) = MIN(a, b), (max) = MAX(a, b))
+#define MIN_MAX_2(min, max, a, b) ((min) = (a) + (b) - ((max) = MAX(a, b)))
#define znegative(a) (zsignum(a) < 0)
#define znegative1(a, b) ((zsignum(a) | zsignum(b)) < 0)
#define znegative2(a, b) ((zsignum(a) & zsignum(b)) < 0)
_AT_@ -114,6 +116,13 @@ zzero1(z_t a, z_t b)
return zzero(a) || zzero(b);
}
+O2 static inline void
+zmemcpy_range(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t i, size_t n)
+{
+ for (; i < n; i++)
+ d[i] = s[i];
+}
+
static void
libzahl_failure(int error)
{
diff --git a/src/zand.c b/src/zand.c
index a8f53a6..6d3a184 100644
--- a/src/zand.c
+++ b/src/zand.c
_AT_@ -2,14 +2,25 @@
#include "internals.h"
-static inline O2 void
-zand_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+O2 static inline void
+zand_impl_3(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
a[i] &= b[i];
}
+static inline void
+zand_impl_4(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b,
+ register const zahl_char_t *restrict c, size_t n)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ a[i] = b[i] & c[i];
+}
+
void
zand(z_t a, z_t b, z_t c)
{
_AT_@ -25,13 +36,12 @@ zand(z_t a, z_t b, z_t c)
a->used = MIN(b->used, c->used);
if (a == b) {
- zand_impl(a->chars, c->chars, a->used);
+ zand_impl_3(a->chars, c->chars, a->used);
} else if (unlikely(a == c)) {
- zand_impl(a->chars, b->chars, a->used);
+ zand_impl_3(a->chars, b->chars, a->used);
} else {
ENSURE_SIZE(a, a->used);
- zmemcpy(a->chars, c->chars, a->used);
- zand_impl(a->chars, b->chars, a->used);
+ zand_impl_4(a->chars, b->chars, c->chars, a->used);
}
TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1);
diff --git a/src/zor.c b/src/zor.c
index 25b69ea..2152c7f 100644
--- a/src/zor.c
+++ b/src/zor.c
_AT_@ -2,14 +2,27 @@
#include "internals.h"
-static inline O2 void
-zor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+O2 static inline void
+zor_impl_3(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
a[i] |= b[i];
}
+static inline void
+zor_impl_5(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n,
+ register const zahl_char_t *restrict c, size_t m)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ a[i] = b[i] | c[i];
+ for (; i < m; i++)
+ a[i] = c[i];
+}
+
void
zor(z_t a, z_t b, z_t c)
{
_AT_@ -23,25 +36,21 @@ zor(z_t a, z_t b, z_t c)
return;
}
- m = MAX(b->used, c->used);
- n = b->used + c->used - m;
-
+ MIN_MAX_1(n, m, b->used, c->used);
ENSURE_SIZE(a, m);
if (a == b) {
+ zor_impl_3(a->chars, c->chars, n);
if (a->used < c->used)
- zmemcpy(a->chars + n, c->chars + n, m - n);
- zor_impl(a->chars, c->chars, n);
+ zmemcpy_range(a->chars, c->chars, n, m);
} else if (unlikely(a == c)) {
+ zor_impl_3(a->chars, b->chars, n);
if (a->used < b->used)
- zmemcpy(a->chars + n, b->chars + n, m - n);
- zor_impl(a->chars, b->chars, n);
+ zmemcpy_range(a->chars, b->chars, n, m);
} else if (m == b->used) {
- zmemcpy(a->chars, b->chars, m);
- zor_impl(a->chars, c->chars, n);
+ zor_impl_5(a->chars, c->chars, n, b->chars, m);
} else {
- zmemcpy(a->chars, c->chars, m);
- zor_impl(a->chars, b->chars, n);
+ zor_impl_5(a->chars, b->chars, n, c->chars, m);
}
a->used = m;
diff --git a/src/zset.c b/src/zset.c
deleted file mode 100644
index b0188fc..0000000
--- a/src/zset.c
+++ /dev/null
_AT_@ -1,16 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zset(z_t a, z_t b)
-{
- if (unlikely(b->sign == 0)) {
- a->sign = 0;
- } else {
- a->sign = b->sign;
- a->used = b->used;
- ENSURE_SIZE(a, b->used);
- zmemcpy(a->chars, b->chars, b->used);
- }
-}
diff --git a/src/zsub.c b/src/zsub.c
index 259526f..51d4a09 100644
--- a/src/zsub.c
+++ b/src/zsub.c
_AT_@ -79,13 +79,12 @@ zsub_unsigned(z_t a, z_t b, z_t c)
void
zsub_nonnegative_assign(z_t a, z_t b)
{
- if (unlikely(zzero(b))) {
+ if (unlikely(zzero(b)))
zabs(a, a);
- } else if (unlikely(!zcmpmag(a, b))) {
+ else if (unlikely(!zcmpmag(a, b)))
SET_SIGNUM(a, 0);
- } else {
+ else
zsub_impl(a, b, b->used);
- }
}
void
diff --git a/src/zxor.c b/src/zxor.c
index f973774..601a947 100644
--- a/src/zxor.c
+++ b/src/zxor.c
_AT_@ -2,18 +2,33 @@
#include "internals.h"
-static inline void
-zxor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+O2 static inline void
+zxor_impl_3(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
a[i] ^= b[i];
}
+static inline void
+zxor_impl_5(register zahl_char_t *restrict a,
+ register const zahl_char_t *restrict b, size_t n,
+ register const zahl_char_t *restrict c, size_t m)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ a[i] = b[i] ^ c[i];
+ for (; i < m; i++)
+ a[i] = c[i];
+}
+
void
zxor(z_t a, z_t b, z_t c)
{
- size_t n, m;
+ size_t n, m, bn, cn;
+ const zahl_char_t *restrict bc;
+ const zahl_char_t *restrict cc;
if (unlikely(zzero(b))) {
SET(a, c);
_AT_@ -23,25 +38,26 @@ zxor(z_t a, z_t b, z_t c)
return;
}
- m = MAX(b->used, c->used);
- n = b->used + c->used - m;
+ bn = b->used;
+ bc = b->chars;
+ cn = c->used;
+ cc = c->chars;
+ MIN_MAX_1(n, m, bn, cn);
ENSURE_SIZE(a, m);
if (a == b) {
- if (a->used < c->used)
- zmemcpy(a->chars + n, c->chars + n, m - n);
- zxor_impl(a->chars, c->chars, n);
+ zxor_impl_3(a->chars, cc, n);
+ if (a->used < cn)
+ zmemcpy_range(a->chars, cc, n, m);
} else if (unlikely(a == c)) {
- if (a->used < b->used)
- zmemcpy(a->chars + n, b->chars + n, m - n);
- zxor_impl(a->chars, b->chars, n);
- } else if (m == b->used) {
- zmemcpy(a->chars, b->chars, m);
- zxor_impl(a->chars, c->chars, n);
+ zxor_impl_3(a->chars, bc, n);
+ if (a->used < bn)
+ zmemcpy_range(a->chars, bc, n, m);
+ } else if (m == bn) {
+ zxor_impl_5(a->chars, cc, n, bc, m);
} else {
- zmemcpy(a->chars, c->chars, m);
- zxor_impl(a->chars, b->chars, n);
+ zxor_impl_5(a->chars, bc, n, cc, m);
}
a->used = m;
diff --git a/zahl-inlines.h b/zahl-inlines.h
index 09e09a6..b69a8d2 100644
--- a/zahl-inlines.h
+++ b/zahl-inlines.h
_AT_@ -19,6 +19,20 @@ ZAHL_INLINE void zabs(z_t a, z_t b) { ZAHL_SET(a, b); a->sign = !!a->sign; }
ZAHL_INLINE void
+zset(z_t a, z_t b)
+{
+ if (ZAHL_UNLIKELY(b->sign == 0)) {
+ a->sign = 0;
+ } else {
+ a->sign = b->sign;
+ a->used = b->used;
+ ZAHL_ENSURE_SIZE(a, b->used);
+ libzahl_memcpy(a->chars, b->chars, b->used);
+ }
+}
+
+
+ZAHL_INLINE void
zswap(z_t a, z_t b)
{
/* Almost three times faster than the naïve method. */
diff --git a/zahl.h b/zahl.h
index 1e27882..45404af 100644
--- a/zahl.h
+++ b/zahl.h
_AT_@ -72,10 +72,11 @@ size_t zload(z_t, const void *); /* Restore a from b, and return number o
/* Assignment functions. */
-void zset(z_t, z_t); /* a := b */
+ZAHL_INLINE void zset(z_t, z_t); /* a := b */
ZAHL_INLINE void zsetu(z_t, uint64_t); /* a := b */
ZAHL_INLINE void zseti(z_t, int64_t); /* a := b */
+
/* Comparison functions. */
ZAHL_INLINE int zcmp(z_t, z_t); /* signum (a - b) */
Received on Sat Apr 30 2016 - 06:04:12 CEST
This archive was generated by hypermail 2.3.0
: Sat Apr 30 2016 - 06:12:19 CEST