[hackers] [libgrapheme] Create separate comparative benchmark/ folder || Laslo Hunhold

From: <git_AT_suckless.org>
Date: Sun, 2 Jan 2022 12:34:07 +0100 (CET)

commit b91df388fd392e4feb024462da6feff4ed304814
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sun Jan 2 12:24:43 2022 +0100
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sun Jan 2 12:33:48 2022 +0100

    Create separate comparative benchmark/ folder
    
    Benchmarks shouldn't really be part of the functional test and are
    only meaningful in direct comparison with other implementations, which
    obviously should not be a install-time-dependency. Instead, benchmarks
    are meant as a development-tool, and thus deserve their own separate
    folder.
    
    So, which library should we compare ourselves to? There are many
    options, like libunistring, libicu and libutf8proc, but at the end only
    the latter remains, for the following reasons: libunistring's
    implementation is incorrect, as it is not stateful and thus probably
    fell victim to the semantic change of the standard a few years ago,
    which makes any benchmark against it meaningless. libicu does not offer
    a way to simply check two code points with a manual state. libutf8proc
    is a great comparison, because it's the backbone of the modern Julia
    language and received a lot of performance optimization.
    
    Measuring ourselves against it makes a lot of sense.
    
    Regarding the implementation: Given we don't use the return value of the
    break check, I added function-level-optimization attributes to the
    function. However, they are behind the feature-check "__has_attribute"
    ensuring maximum portability even for compilers which don't offer that.
    
    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/benchmark/character.c b/benchmark/character.c
new file mode 100644
index 0000000..84ea33f
--- /dev/null
+++ b/benchmark/character.c
_AT_@ -0,0 +1,68 @@
+/* See LICENSE file for copyright and license details. */
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../grapheme.h"
+#include "../gen/character-test.h"
+#include "util.h"
+
+#include <unigbrk.h>
+#include <utf8proc.h>
+
+#define NUM_ITERATIONS 10000
+
+#if defined __has_attribute
+ #if __has_attribute(optnone)
+ void libgrapheme(const uint32_t *, size_t) __attribute__((optnone));
+ void libutf8proc(const uint32_t *, size_t) __attribute__((optnone));
+ #endif
+#endif
+
+void
+libgrapheme(const uint32_t *buf, size_t bufsiz)
+{
+ GRAPHEME_STATE state = { 0 };
+ size_t i;
+
+ for (i = 0; i + 1 < bufsiz; i++) {
+ (void)grapheme_is_character_break(buf[i], buf[i+1], &state);
+ }
+}
+
+void
+libutf8proc(const uint32_t *buf, size_t bufsiz)
+{
+ utf8proc_int32_t state = 0;
+ size_t i;
+
+ for (i = 0; i + 1 < bufsiz; i++) {
+ (void)utf8proc_grapheme_break_stateful(buf[i], buf[i+1], &state);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ size_t bufsiz;
+ uint32_t *buf;
+ double baseline = NAN;
+
+ (void)argc;
+
+ if ((buf = generate_test_buffer(character_test, LEN(character_test),
+ &bufsiz)) == NULL) {
+ return 1;
+ }
+
+ printf("%s\n", argv[0]);
+ run_benchmark(libgrapheme, "libgrapheme ", &baseline, buf, bufsiz,
+ NUM_ITERATIONS);
+ run_benchmark(libutf8proc, "libutf8proc ", &baseline, buf, bufsiz,
+ NUM_ITERATIONS);
+
+ free(buf);
+
+ return 0;
+}
diff --git a/benchmark/util.c b/benchmark/util.c
new file mode 100644
index 0000000..a30196d
--- /dev/null
+++ b/benchmark/util.c
_AT_@ -0,0 +1,72 @@
+/* See LICENSE file for copyright and license details. */
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "util.h"
+
+uint32_t *
+generate_test_buffer(const struct test *t, size_t tlen, size_t *bufsiz)
+{
+ size_t i, j, off;
+ uint32_t *buf;
+
+ /* allocate and generate buffer */
+ for (i = 0, *bufsiz = 0; i < tlen; i++) {
+ *bufsiz += t[i].cplen;
+ }
+ if (!(buf = calloc(*bufsiz, sizeof(*buf)))) {
+ fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\n");
+ return NULL;
+ }
+ for (i = 0, off = 0; i < tlen; i++) {
+ for (j = 0; j < t[i].cplen; j++) {
+ buf[off + j] = t[i].cp[j];
+ }
+ off += t[i].cplen;
+ }
+
+ return buf;
+}
+
+static double
+time_diff(struct timespec *a, struct timespec *b)
+{
+ return (double)(b->tv_sec - a->tv_sec) +
+ (double)(b->tv_nsec - a->tv_nsec) * 1E-9;
+}
+
+void
+run_benchmark(void (*func)(const uint32_t *, size_t), const char *name,
+ double *baseline, const uint32_t *buf, size_t bufsiz,
+ uint32_t num_iterations)
+{
+ struct timespec start, end;
+ size_t i;
+ double diff;
+
+ printf("\t%s ", name);
+ fflush(stdout);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (i = 0; i < num_iterations; i++) {
+ func(buf, bufsiz);
+
+ if (i % (num_iterations / 10) == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ diff = time_diff(&start, &end);
+
+ if (isnan(*baseline)) {
+ *baseline = diff;
+ printf(" %.3fs (baseline)\n", diff);
+ } else {
+ printf(" %.3fs (%.2f%% %s)\n", diff,
+ fabs(1.0 - diff / *baseline) * 100,
+ (diff < *baseline) ? "faster" : "slower");
+ }
+}
diff --git a/benchmark/util.h b/benchmark/util.h
new file mode 100644
index 0000000..13320cc
--- /dev/null
+++ b/benchmark/util.h
_AT_@ -0,0 +1,13 @@
+/* See LICENSE file for copyright and license details. */
+#ifndef UTIL_H
+#define UTIL_H
+
+#include "../gen/types.h"
+
+#define LEN(x) (sizeof(x) / sizeof(*(x)))
+
+uint32_t *generate_test_buffer(const struct test *, size_t, size_t *);
+void run_benchmark(void (*func)(const uint32_t *, size_t), const char *,
+ double *, const uint32_t *, size_t, uint32_t);
+
+#endif /* UTIL_H */
Received on Sun Jan 02 2022 - 12:34:07 CET

This archive was generated by hypermail 2.3.0 : Sun Jan 02 2022 - 12:36:31 CET