(wrong string) ée

From: <git_AT_suckless.org>
Date: Thu, 5 May 2016 02:42:01 +0200 (CEST)

commit 93bf9e5b4bf63708c732f5bf07619d2e59c81ec4
Author: Mattias Andrée <maandree_AT_kth.se>
AuthorDate: Thu May 5 02:41:50 2016 +0200
Commit: Mattias Andrée <maandree_AT_kth.se>
CommitDate: Thu May 5 02:41:50 2016 +0200

    Optimise zadd on x86-64
    
    Signed-off-by: Mattias Andrée <maandree_AT_kth.se>

diff --git a/src/zadd.c b/src/zadd.c
index a78a918..8efdf19 100644
--- a/src/zadd.c
+++ b/src/zadd.c
_AT_@ -2,20 +2,79 @@
 #include "internals.h"
 
 
+#if defined(__x86_64__)
+# define ASM3(code) \
+ __asm__ __volatile__ (code : "+d"(carry) : "a"(ac + i), "b"(bc + i), "c"(cc + i))
+
+# define ASM2(code) \
+ __asm__ __volatile__ (code : "+d"(carry) : "a"(ac + i), "b"(bc + i))
+
+# define ADD2(off) \
+ "\n movq "#off"(%%rbx), %%rdx" \
+ "\n adcq %%rdx, "#off"(%%rax)"
+
+# define ADD3(off) \
+ "\n movq "#off"(%%rbx), %%rdx" \
+ "\n adcq "#off"(%%rcx), %%rdx" \
+ "\n movq %%rdx, "#off"(%%rax)"
+
+# define WRAP_CARRY(interior) \
+ "\n clc" \
+ "\n cmpq $0, %%rdx" \
+ "\n je 1f" \
+ "\n stc" \
+ "\n 1:" \
+ interior \
+ "\n movq $1, %%rdx" \
+ "\n jc 1f" \
+ "\n movq $0, %%rdx" \
+ "\n 1:"
+#endif
+
+
 static inline void
 zadd_impl_4(z_t a, z_t b, z_t c, size_t n)
 {
- zahl_char_t carry = 0, tcarry;
+ zahl_char_t carry = 0, *ac = a->chars, *bc = b->chars, *cc = c->chars;
         size_t i;
 
+#if defined(__x86_64__)
+ for (i = 0; (i += 4) <= n;)
+ ASM3(WRAP_CARRY(ADD3(-32) ADD3(-24) ADD3(-16) ADD3(-8)));
+ if (i > n) {
+ i -= 4;
+ switch (n & 3) {
+ case 3:
+ ASM3(WRAP_CARRY(ADD3(0) ADD3(8) ADD3(16)));
+ break;
+ case 2:
+ ASM3(WRAP_CARRY(ADD3(0) ADD3(8)));
+ break;
+ case 1:
+ ASM3(WRAP_CARRY(ADD3(0)));
+ break;
+ default:
+ break;
+ }
+ }
+ i = n;
+
+ while (carry) {
+ carry = libzahl_add_overflow(ac + i, ac[i], 1);
+ i++;
+ }
+#else
+ zahl_char_t tcarry;
+
         for (i = 0; i < n; i++) {
- tcarry = libzahl_add_overflow(a->chars + i, b->chars[i], c->chars[i]);
- carry = tcarry | (zahl_char_t)libzahl_add_overflow(a->chars + i, a->chars[i], carry);
+ tcarry = libzahl_add_overflow(ac + i, bc[i], cc[i]);
+ carry = tcarry | (zahl_char_t)libzahl_add_overflow(ac + i, ac[i], carry);
         }
         while (carry) {
- carry = libzahl_add_overflow(a->chars + i, a->chars[i], 1);
+ carry = libzahl_add_overflow(ac + i, ac[i], 1);
                 i++;
         }
+#endif
 
         if (a->used < i)
                 a->used = i;
_AT_@ -24,7 +83,40 @@ zadd_impl_4(z_t a, z_t b, z_t c, size_t n)
 static inline void
 zadd_impl_3(z_t a, z_t b, size_t n)
 {
+#if defined(__x86_64__)
+ zahl_char_t carry = 0, *ac = a->chars, *bc = b->chars;
+ size_t i;
+
+ for (i = 0; (i += 4) <= n;)
+ ASM2(WRAP_CARRY(ADD2(-32) ADD2(-24) ADD2(-16) ADD2(-8)));
+ if (i > n) {
+ i -= 4;
+ switch (n & 3) {
+ case 3:
+ ASM2(WRAP_CARRY(ADD2(0) ADD2(8) ADD2(16)));
+ break;
+ case 2:
+ ASM2(WRAP_CARRY(ADD2(0) ADD2(8)));
+ break;
+ case 1:
+ ASM2(WRAP_CARRY(ADD2(0)));
+ break;
+ default:
+ break;
+ }
+ }
+ i = n;
+
+ while (carry) {
+ carry = libzahl_add_overflow(ac + i, ac[i], 1);
+ i++;
+ }
+
+ if (a->used < i)
+ a->used = i;
+#else
         zadd_impl_4(a, a, b, n);
+#endif
 }
 
 static inline void
Received on Thu May 05 2016 - 02:42:01 CEST

This archive was generated by hypermail 2.3.0 : Thu May 05 2016 - 02:48:14 CEST