commit 599a71a058b8913a4d166485fff6b964247763e9
Author:     Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Sat Apr 30 02:20:20 2016 +0200
Commit:     Mattias Andrée <maandree_AT_kth.se>
CommitDate: Sat Apr 30 02:20:20 2016 +0200
    Some optimisations
    
    Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
diff --git a/STATUS b/STATUS
index 5e705ee..9a94033 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -14,11 +14,11 @@ zabs(a, b) .............. fastest
 zabs(a, a) .............. tomsfastmath is faster
 zadd_unsigned ........... fastest (faster than all others' zadd)
 zsub_unsigned ........... fastest (faster than all others' zsub)
-zadd .................... 87 % of tomsfastmath, 83 % libtommath, 80 % of hebimath
+zadd .................... 94 % of tomsfastmath, 90 % libtommath, 86 % of hebimath
 zsub .................... 97 % of tomsfastmath, 95 % hebimath, 93 % of libtommath
-zand .................... 49 % of tomsfastmath
-zor ..................... 36 % of tomsfastmath
-zxor .................... 51 % of tomsfastmath
+zand .................... 55 % of tomsfastmath
+zor ..................... 46 % of tomsfastmath
+zxor .................... 57 % of tomsfastmath
 znot .................... fastest
 zeven ................... fastest (shared with gmp, libtommath, and tomsfastmath)
 zodd .................... fastest (shared with gmp, libtommath, and tomsfastmath)
_AT_@ -35,7 +35,7 @@ ztrunc(a, b, c) ......... fastest
 ztrunc(a, a, b) ......... fastest
 zsplit .................. fastest
 zcmpmag ................. fastest
-zcmp .................... 94 % of tomsfastmath, 81 % of hebimath (zcmpmag)
+zcmp .................... fastest
 zcmpi ................... fastest
 zcmpu ................... fastest
 zbset(a, b, 1) .......... fastest
_AT_@ -45,7 +45,7 @@ zbset(a, a, 0) .......... fastest
 zbset(a, b, -1) ......... fastest
 zbset(a, a, -1) ......... fastest
 zbtest .................. fastest
-zgcd .................... 17 % of gmp (zcmpmag)
+zgcd .................... 21 % of gmp (zcmpmag)
 zmul .................... slowest
 zsqr .................... slowest (zmul)
 zmodmul(big mod) ........ slowest ((zmul, zmod))
_AT_@ -56,16 +56,16 @@ zpow .................... slowest (zmul, zsqr)
 zpowu ................... slowest (zmul, zsqr)
 zmodpow ................. slowest (zmul, zsqr. zmod)
 zmodpowu ................ slowest (zmul, zsqr, zmod)
-zsets ................... 9 % of gmp
+zsets ................... 13 % of gmp
 zstr_length(a, 10) ...... gmp is faster (zdiv, zsqr)
-zstr(a, b, n) ........... 8 % of gmp, 58 % of hebimath
+zstr(a, b, n) ........... 8 % of gmp, 59 % of hebimath
 zrand(default uniform) .. 51 % of gmp
 zptest .................. slowest (zrand, zmodpow, zsqr, zmod)
 zsave ................... fastest
 zload ................... fastest
 zdiv(big denum) ......... tomsfastmath and naïve hebimath implementation are faster (zdivmod)
-zmod(big denum) ......... naïve hebimath implementation is faster (zdivmod)
-zdivmod(big denum) ...... tomsfastmath and naïve hebimath implementation are faster
+zmod(big denum) ......... fastest (zdivmod)
+zdivmod(big denum) ...... fastest
 zdiv(tiny denum) ........ slowest
 zmod(tiny denum) ........ slowest
 zdivmod(tiny denum) ..... slowest
diff --git a/src/internals.h b/src/internals.h
index 04118e6..bcaea7a 100644
--- a/src/internals.h
+++ b/src/internals.h
_AT_@ -27,7 +27,6 @@
 #define Oz     ZAHL_Oz
 
 #define LIST_TEMPS\
-	X(libzahl_tmp_cmp, 1)\
         X(libzahl_tmp_str_num, 0)\
         X(libzahl_tmp_str_mag, 0)\
         X(libzahl_tmp_str_div, 0)\
diff --git a/zahl-inlines.h b/zahl-inlines.h
index 1f04a05..09e09a6 100644
--- a/zahl-inlines.h
+++ b/zahl-inlines.h
_AT_@ -1,17 +1,24 @@
 /* See LICENSE file for copyright and license details. */
 
 ZAHL_INLINE void zinit(z_t a)         { a->alloced = 0; a->chars = 0; }
-ZAHL_INLINE int zeven(z_t a)          { return !a->sign || !(a->chars[0] & 1); }
+ZAHL_INLINE int zeven(z_t a)          { return !a->sign || (~a->chars[0] & 1); }
 ZAHL_INLINE int zodd(z_t a)           { return a->sign && (a->chars[0] & 1); }
-ZAHL_INLINE int zeven_nonzero(z_t a)  { return !(a->chars[0] & 1); }
+ZAHL_INLINE int zeven_nonzero(z_t a)  { return (~a->chars[0] & 1); }
 ZAHL_INLINE int zodd_nonzero(z_t a)   { return (a->chars[0] & 1); }
 ZAHL_INLINE int zzero(z_t a)          { return !a->sign; }
 ZAHL_INLINE int zsignum(z_t a)        { return a->sign; }
-ZAHL_INLINE void zabs(z_t a, z_t b)   { ZAHL_SET(a, b); a->sign = !!a->sign; }
 ZAHL_INLINE void zneg(z_t a, z_t b)   { ZAHL_SET(a, b); a->sign = -a->sign; }
 
+#if 1 && (-1 & 1) /* In the future, tuning will select the fastest implementation. */
+ZAHL_INLINE void zabs(z_t a, z_t b)   { ZAHL_SET(a, b); a->sign &= 1; }
+#elif 1
+ZAHL_INLINE void zabs(z_t a, z_t b)   { ZAHL_SET(a, b); if (ZAHL_LIKELY(a->sign < 0)) a->sign = 1; }
+#else
+ZAHL_INLINE void zabs(z_t a, z_t b)   { ZAHL_SET(a, b); a->sign = !!a->sign; }
+#endif
+
 
-ZAHL_INLINE ZAHL_O3 void
+ZAHL_INLINE void
 zswap(z_t a, z_t b)
 {
         /* Almost three times faster than the naïve method. */
_AT_@ -23,22 +30,24 @@ zswap(z_t a, z_t b)
 }
 
 
-ZAHL_INLINE ZAHL_O3 void
+ZAHL_INLINE void
 zseti(z_t a, int64_t b)
 {
         if (ZAHL_UNLIKELY(b >= 0)) {
                 zsetu(a, (uint64_t)b);
-	} else {
-		zsetu(a, (uint64_t)-b);
-		ZAHL_SET_SIGNUM(a, -1);
+		return;
         }
+	ZAHL_ENSURE_SIZE(a, 1);
+	ZAHL_SET_SIGNUM(a, -1);
+	a->chars[0] = (zahl_char_t)-b;
+	a->used = 1;
 }
 
 
-ZAHL_INLINE ZAHL_O3 void
+ZAHL_INLINE void
 zsetu(z_t a, uint64_t b)
 {
-	if (!b) {
+	if (ZAHL_UNLIKELY(!b)) {
                 ZAHL_SET_SIGNUM(a, 0);
                 return;
         }
_AT_@ -49,7 +58,7 @@ zsetu(z_t a, uint64_t b)
 }
 
 
-ZAHL_INLINE ZAHL_O3 size_t
+ZAHL_INLINE size_t
 zlsb(z_t a)
 {
         size_t i = 0;
_AT_@ -62,7 +71,7 @@ zlsb(z_t a)
 }
 
 
-ZAHL_INLINE ZAHL_O3 size_t
+ZAHL_INLINE size_t
 zbits(z_t a)
 {
         size_t rc;
_AT_@ -75,7 +84,7 @@ zbits(z_t a)
 }
 
 
-ZAHL_INLINE ZAHL_O3 int
+ZAHL_INLINE int
 zcmpmag(z_t a, z_t b)
 {
         size_t i, j;
_AT_@ -103,7 +112,7 @@ zcmpmag(z_t a, z_t b)
 }
 
 
-ZAHL_INLINE ZAHL_O3 int
+ZAHL_INLINE int
 zcmp(z_t a, z_t b)
 {
         if (zsignum(a) != zsignum(b))
_AT_@ -112,23 +121,23 @@ zcmp(z_t a, z_t b)
 }
 
 
-ZAHL_INLINE ZAHL_O3 int
+ZAHL_INLINE int
 zcmpu(z_t a, uint64_t b)
 {
-	extern z_t libzahl_tmp_cmp;
         if (ZAHL_UNLIKELY(!b))
                 return zsignum(a);
         if (ZAHL_UNLIKELY(zsignum(a) <= 0))
                 return -1;
-	libzahl_tmp_cmp->chars[0] = b;
-	return zcmpmag(a, libzahl_tmp_cmp);
+	while (!a->chars[a->used - 1])  a->used--; /* TODO should not be necessary */
+	if (a->used > 1)
+		return +1;
+	return a->chars[0] < b ? -1 : a->chars[0] > b;
 }
 
 
-ZAHL_INLINE ZAHL_O3 int
+ZAHL_INLINE int
 zcmpi(z_t a, int64_t b)
 {
-	extern z_t libzahl_tmp_cmp;
         if (ZAHL_UNLIKELY(!b))
                 return zsignum(a);
         if (ZAHL_UNLIKELY(zzero(a)))
_AT_@ -136,18 +145,22 @@ zcmpi(z_t a, int64_t b)
         if (ZAHL_LIKELY(b < 0)) {
                 if (zsignum(a) > 0)
                         return +1;
-		libzahl_tmp_cmp->chars[0] = (zahl_char_t)-b;
-		return -zcmpmag(a, libzahl_tmp_cmp);
+		while (!a->chars[a->used - 1])  a->used--; /* TODO should not be necessary */
+		if (a->used > 1)
+			return -1;
+		return a->chars[0] > (zahl_char_t)-b ? -1 : a->chars[0] < (zahl_char_t)-b;
         } else {
                 if (zsignum(a) < 0)
                         return -1;
-		libzahl_tmp_cmp->chars[0] = (zahl_char_t)b;
-		return +zcmpmag(a, libzahl_tmp_cmp);
+		while (!a->chars[a->used - 1])  a->used--; /* TODO should not be necessary */
+		if (a->used > 1)
+			return +1;
+		return a->chars[0] < (zahl_char_t)b ? -1 : a->chars[0] > (zahl_char_t)b;
         }
 }
 
 
-ZAHL_INLINE ZAHL_O3 void
+ZAHL_INLINE void
 zbset(z_t a, z_t b, size_t bit, int action)
 {
         if (ZAHL_UNLIKELY(a != b))
_AT_@ -185,7 +198,7 @@ fallback:
 }
 
 
-ZAHL_INLINE ZAHL_O3 int
+ZAHL_O3 ZAHL_INLINE int
 zbtest(z_t a, size_t bit)
 {
         size_t chars;
_AT_@ -201,7 +214,7 @@ zbtest(z_t a, size_t bit)
 }
 
 
-ZAHL_INLINE ZAHL_O3 void
+ZAHL_O3 ZAHL_INLINE void
 zsplit(z_t high, z_t low, z_t a, size_t delim)
 {
         if (ZAHL_UNLIKELY(high == a)) {
_AT_@ -214,7 +227,7 @@ zsplit(z_t high, z_t low, z_t a, size_t delim)
 }
 
 
-ZAHL_INLINE ZAHL_O3 size_t
+ZAHL_O2 ZAHL_INLINE size_t
 zsave(z_t a, void *buffer)
 {
         if (ZAHL_LIKELY(buffer)) {
diff --git a/zahl-internals.h b/zahl-internals.h
index 83fd2e6..4901b6d 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
_AT_@ -105,16 +105,18 @@ struct zahl {
 
 void libzahl_realloc(struct zahl *, size_t);
 
-ZAHL_O3 static inline void
-libzahl_memcpy(zahl_char_t *restrict d, const zahl_char_t *restrict s, register size_t n)
+ZAHL_O2 static inline void
+libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n)
 {
-	while (n--)
-		d[n] = s[n];
+	size_t i;
+	for (i = 0; i < n; i++)
+		d[i] = s[i];
 }
 
-ZAHL_O3 static inline void
-libzahl_memset(zahl_char_t *a, register zahl_char_t v, register size_t n)
+ZAHL_O2 static inline void
+libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n)
 {
-	while (n--)
-		a[n] = v;
+	size_t i;
+	for (i = 0; i < n; i++)
+		a[i] = v;
 }
Received on Sat Apr 30 2016 - 06:04:11 CEST
This archive was generated by hypermail 2.3.0
: Sat Apr 30 2016 - 06:12:16 CEST