commit 40b860777616071997ec035783eeea402ffb1ae2
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Tue May 3 14:03:33 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Tue May 3 14:03:33 2016 +0200
Optimise libzahl_memcpy and libzahl_memset
Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
diff --git a/STATUS b/STATUS
index 36d9717..8cae48a 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -6,7 +6,7 @@ left column. Double-parenthesis means there may be a better way
to do it. Inside square-brackets, there are some comments on
multi-bit comparisons.
-zset .................... fastest [until ~750, then gmp, also tomsfastmath after ~2750]
+zset .................... fastest [always with gcc, unless ~250 with clang]
zseti ................... tomsfastmath is faster [always]
zsetu ................... tomsfastmath is faster [always]
zneg(a, b) .............. fastest [until ~300, then gmp]
diff --git a/TODO b/TODO
index 56d8dbe..0327bca 100644
--- a/TODO
+++ b/TODO
_AT_@ -5,9 +5,10 @@ Add zsets_radix
Add zstr_radix
Test big endian
-Test always having used > 0 for zero
+Test always having .used > 0 for zero
Test negative/non-negative instead of sign
Test long .sign
+Test always having .chars % 4 == 0
Test optimisation of zmul:
bc = [(Hb * Hc) << (m2 << 1)]
diff --git a/zahl-internals.h b/zahl-internals.h
index e9232dd..fc6768a 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
_AT_@ -109,18 +109,62 @@ struct zahl {
void libzahl_realloc(struct zahl *, size_t);
-ZAHL_O2 ZAHL_INLINE void
+ZAHL_INLINE void
libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n)
{
size_t i;
- for (i = 0; i < n; i++)
- d[i] = s[i];
+ if (n <= 4) {
+ if (n >= 1)
+ d[0] = s[0];
+ if (n >= 2)
+ d[1] = s[1];
+ if (n >= 3)
+ d[2] = s[2];
+ if (n >= 4)
+ d[3] = s[3];
+ } else {
+ for (i = 0; (i += 4) <= n;) {
+ d[i - 1] = s[i - 1];
+ d[i - 2] = s[i - 2];
+ d[i - 3] = s[i - 3];
+ d[i - 4] = s[i - 4];
+ }
+ if (i > n) {
+ i -= 4;
+ if (i < n)
+ d[i] = s[i], i++;
+ if (i < n)
+ d[i] = s[i], i++;
+ if (i < n)
+ d[i] = s[i], i++;
+ if (i < n)
+ d[i] = s[i], i++;
+ }
+ }
}
-ZAHL_O2 ZAHL_INLINE void
+ZAHL_INLINE void
libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n)
{
size_t i;
- for (i = 0; i < n; i++)
- a[i] = v;
+ if (n <= 4) {
+ if (n >= 1)
+ a[0] = v;
+ if (n >= 2)
+ a[1] = v;
+ if (n >= 3)
+ a[2] = v;
+ if (n >= 4)
+ a[3] = v;
+ } else {
+ for (i = 0; (i += 4) <= n;) {
+ a[i - 1] = v;
+ a[i - 2] = v;
+ a[i - 3] = v;
+ a[i - 4] = v;
+ }
+ if (i > n)
+ for (i -= 4; i < n; i++)
+ a[i] = v;
+ }
}
Received on Tue May 03 2016 - 14:49:27 CEST
This archive was generated by hypermail 2.3.0
: Tue May 03 2016 - 15:00:34 CEST