(wrong string) ée

From: <git_AT_suckless.org>
Date: Tue, 3 May 2016 14:49:27 +0200 (CEST)

commit 40b860777616071997ec035783eeea402ffb1ae2
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Tue May 3 14:03:33 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Tue May 3 14:03:33 2016 +0200

    Optimise libzahl_memcpy and libzahl_memset
    
    Signed-off-by: Mattias Andrée <maandree_AT_kth.se>

diff --git a/STATUS b/STATUS
index 36d9717..8cae48a 100644
--- a/STATUS
+++ b/STATUS
_AT_@ -6,7 +6,7 @@ left column. Double-parenthesis means there may be a better way
 to do it. Inside square-brackets, there are some comments on
 multi-bit comparisons.
 
-zset .................... fastest [until ~750, then gmp, also tomsfastmath after ~2750]
+zset .................... fastest [always with gcc, unless ~250 with clang]
 zseti ................... tomsfastmath is faster [always]
 zsetu ................... tomsfastmath is faster [always]
 zneg(a, b) .............. fastest [until ~300, then gmp]
diff --git a/TODO b/TODO
index 56d8dbe..0327bca 100644
--- a/TODO
+++ b/TODO
_AT_@ -5,9 +5,10 @@ Add zsets_radix
 Add zstr_radix
 
 Test big endian
-Test always having used > 0 for zero
+Test always having .used > 0 for zero
   Test negative/non-negative instead of sign
 Test long .sign
+Test always having .chars % 4 == 0
 
 Test optimisation of zmul:
   bc = [(Hb * Hc) << (m2 << 1)]
diff --git a/zahl-internals.h b/zahl-internals.h
index e9232dd..fc6768a 100644
--- a/zahl-internals.h
+++ b/zahl-internals.h
_AT_@ -109,18 +109,62 @@ struct zahl {
 
 void libzahl_realloc(struct zahl *, size_t);
 
-ZAHL_O2 ZAHL_INLINE void
+ZAHL_INLINE void
 libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n)
 {
         size_t i;
- for (i = 0; i < n; i++)
- d[i] = s[i];
+ if (n <= 4) {
+ if (n >= 1)
+ d[0] = s[0];
+ if (n >= 2)
+ d[1] = s[1];
+ if (n >= 3)
+ d[2] = s[2];
+ if (n >= 4)
+ d[3] = s[3];
+ } else {
+ for (i = 0; (i += 4) <= n;) {
+ d[i - 1] = s[i - 1];
+ d[i - 2] = s[i - 2];
+ d[i - 3] = s[i - 3];
+ d[i - 4] = s[i - 4];
+ }
+ if (i > n) {
+ i -= 4;
+ if (i < n)
+ d[i] = s[i], i++;
+ if (i < n)
+ d[i] = s[i], i++;
+ if (i < n)
+ d[i] = s[i], i++;
+ if (i < n)
+ d[i] = s[i], i++;
+ }
+ }
 }
 
-ZAHL_O2 ZAHL_INLINE void
+ZAHL_INLINE void
 libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n)
 {
         size_t i;
- for (i = 0; i < n; i++)
- a[i] = v;
+ if (n <= 4) {
+ if (n >= 1)
+ a[0] = v;
+ if (n >= 2)
+ a[1] = v;
+ if (n >= 3)
+ a[2] = v;
+ if (n >= 4)
+ a[3] = v;
+ } else {
+ for (i = 0; (i += 4) <= n;) {
+ a[i - 1] = v;
+ a[i - 2] = v;
+ a[i - 3] = v;
+ a[i - 4] = v;
+ }
+ if (i > n)
+ for (i -= 4; i < n; i++)
+ a[i] = v;
+ }
 }
Received on Tue May 03 2016 - 14:49:27 CEST

This archive was generated by hypermail 2.3.0 : Tue May 03 2016 - 15:00:34 CEST