[hackers] [libgrapheme] Refactor benchmark code || Laslo Hunhold from git_AT_suckless.org on 2022-01-09 (hackers mail list archive)

From: <git_AT_suckless.org>
Date: Sun, 9 Jan 2022 17:31:55 +0100 (CET)

commit da46b2648d2846dc23e310b7ac0cc3ddebb7ccd3
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sun Jan 9 17:30:53 2022 +0100
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sun Jan 9 17:30:53 2022 +0100

    Refactor benchmark code

    Rename some variables for more consistent naming, add a function
    to explicitly generate a UTF-8-test-buffer and move some things into
    benchmark/util.

    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/benchmark/character.c b/benchmark/character.c
index 626733d..2ef0d1c 100644
--- a/benchmark/character.c
+++ b/benchmark/character.c
_AT_@ -14,27 +14,20 @@

#define NUM_ITERATIONS 1000000

-#ifdef __has_attribute
- #if __has_attribute(optnone)
- void libgrapheme(const void *) __attribute__((optnone));
- void libutf8proc(const void *) __attribute__((optnone));
- #endif
-#endif
-
-struct payload {
+struct break_benchmark_payload {
         uint_least32_t *buf;
- utf8proc_int32_t *buf_int32;
- size_t bufsiz;
+ utf8proc_int32_t *buf_utf8proc;
+ size_t buflen;
};

void
libgrapheme(const void *payload)
{
         GRAPHEME_STATE state = { 0 };
- const struct payload *p = payload;
+ const struct break_benchmark_payload *p = payload;
         size_t i;

- for (i = 0; i + 1 < p->bufsiz; i++) {
+ for (i = 0; i + 1 < p->buflen; i++) {
                 (void)grapheme_is_character_break(p->buf[i], p->buf[i+1],
                                                   &state);
         }
_AT_@ -44,12 +37,12 @@ void
libutf8proc(const void *payload)
{
         utf8proc_int32_t state = 0;
- const struct payload *p = payload;
+ const struct break_benchmark_payload *p = payload;
         size_t i;

- for (i = 0; i + 1 < p->bufsiz; i++) {
- (void)utf8proc_grapheme_break_stateful(p->buf_int32[i],
- p->buf_int32[i+1],
+ for (i = 0; i + 1 < p->buflen; i++) {
+ (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i],
+ p->buf_utf8proc[i+1],
                                                        &state);
         }
}
_AT_@ -57,33 +50,33 @@ libutf8proc(const void *payload)
int
main(int argc, char *argv[])
{
- struct payload p;
+ struct break_benchmark_payload p;
         double baseline = (double)NAN;
         size_t i;

         (void)argc;

- if ((p.buf = generate_test_buffer(character_test, LEN(character_test),
- &(p.bufsiz))) == NULL) {
+ if ((p.buf = generate_cp_test_buffer(character_test, LEN(character_test),
+ &(p.buflen))) == NULL) {
                 return 1;
         }
- if ((p.buf_int32 = malloc(p.bufsiz * sizeof(*(p.buf_int32)))) == NULL) {
+ if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) == NULL) {
                 fprintf(stderr, "malloc: %s\n", strerror(errno));
                 exit(1);
         }
- for (i = 0; i < p.bufsiz; i++) {
+ for (i = 0; i < p.buflen; i++) {
                 /*
                  * there is no overflow, as we know that the maximum
                  * codepoint is 0x10FFFF, which is way below 2^31
                  */
- p.buf_int32[i] = (utf8proc_int32_t)p.buf[i];
+ p.buf_utf8proc[i] = (utf8proc_int32_t)p.buf[i];
         }

         printf("%s\n", argv[0]);
         run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "comparison",
- &baseline, NUM_ITERATIONS, p.bufsiz - 1);
+ &baseline, NUM_ITERATIONS, p.buflen - 1);
         run_benchmark(libutf8proc, &p, "libutf8proc ", NULL, "comparison",
- &baseline, NUM_ITERATIONS, p.bufsiz - 1);
+ &baseline, NUM_ITERATIONS, p.buflen - 1);

         free(p.buf);

diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
index 5dc0321..68d28fe 100644
--- a/benchmark/utf8-decode.c
+++ b/benchmark/utf8-decode.c
_AT_@ -14,30 +14,23 @@

#define NUM_ITERATIONS 100000

-#ifdef __has_attribute
- #if __has_attribute(optnone)
- void libgrapheme(const void *) __attribute__((optnone));
- void libutf8proc(const void *) __attribute__((optnone));
- #endif
-#endif
-
-struct payload {
- char *buf_char;
- utf8proc_uint8_t *buf_uint8;
- size_t bufsiz;
+struct utf8_benchmark_payload {
+ char *buf;
+ utf8proc_uint8_t *buf_utf8proc;
+ size_t buflen;
};

void
libgrapheme(const void *payload)
{
- const struct payload *p = payload;
+ const struct utf8_benchmark_payload *p = payload;
         uint_least32_t cp;
         size_t ret, off;

- for (off = 0; off < p->bufsiz; off += ret) {
- if ((ret = grapheme_decode_utf8(p->buf_char + off,
- p->bufsiz - off, &cp)) >
- (p->bufsiz - off)) {
+ for (off = 0; off < p->buflen; off += ret) {
+ if ((ret = grapheme_decode_utf8(p->buf + off,
+ p->buflen - off, &cp)) >
+ (p->buflen - off)) {
                         break;
                 }
                 (void)cp;
_AT_@ -47,14 +40,14 @@ libgrapheme(const void *payload)
void
libutf8proc(const void *payload)
{
- const struct payload *p = payload;
+ const struct utf8_benchmark_payload *p = payload;
         utf8proc_int32_t cp;
         utf8proc_ssize_t ret;
         size_t off;

- for (off = 0; off < p->bufsiz; off += (size_t)ret) {
- if ((ret = utf8proc_iterate(p->buf_uint8 + off,
- (utf8proc_ssize_t)(p->bufsiz - off),
+ for (off = 0; off < p->buflen; off += (size_t)ret) {
+ if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
+ (utf8proc_ssize_t)(p->buflen - off),
                                             &cp)) < 0) {
                         break;
                 }
_AT_@ -65,57 +58,38 @@ libutf8proc(const void *payload)
int
main(int argc, char *argv[])
{
- struct payload p;
- size_t cpbufsiz, i, off, ret;
- uint_least32_t *cpbuf;
+ struct utf8_benchmark_payload p;
+ size_t i;
         double baseline = (double)NAN;

         (void)argc;

- if ((cpbuf = generate_test_buffer(character_test, LEN(character_test),
- &cpbufsiz)) == NULL) {
- return 1;
- }
+ p.buf = generate_utf8_test_buffer(character_test,
+ LEN(character_test),
+ &(p.buflen));

- /* convert cp-buffer to utf8-data (both as char and custom uint8-type) */
- for (i = 0, p.bufsiz = 0; i < cpbufsiz; i++) {
- p.bufsiz += grapheme_encode_utf8(cpbuf[i], NULL, 0);
- }
- if ((p.buf_char = malloc(p.bufsiz)) == NULL) {
- fprintf(stderr, "malloc: %s\n", strerror(errno));
- exit(1);
- }
- for (i = 0, off = 0; i < cpbufsiz; i++, off += ret) {
- if ((ret = grapheme_encode_utf8(cpbuf[i], p.buf_char + off,
- p.bufsiz - off)) >
- (p.bufsiz - off)) {
- /* shouldn't happen */
- fprintf(stderr, "Error while converting buffer.\n");
- exit(1);
- }
- }
- if ((p.buf_uint8 = malloc(p.bufsiz)) == NULL) {
+ /* convert cp-buffer to stupid custom libutf8proc-uint8-type */
+ if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
                 fprintf(stderr, "malloc: %s\n", strerror(errno));
                 exit(1);
         }
- for (i = 0; i < p.bufsiz; i++) {
+ for (i = 0; i < p.buflen; i++) {
                 /*
                  * even if char is larger than 8 bit, it will only have
                  * any of the first 8 bits set (by construction).
                  */
- p.buf_uint8[i] = (utf8proc_uint8_t)p.buf_char[i];
+ p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
         }

         printf("%s\n", argv[0]);
         run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
- "byte", &baseline, NUM_ITERATIONS, p.bufsiz);
+ "byte", &baseline, NUM_ITERATIONS, p.buflen);
         run_benchmark(libutf8proc, &p, "libutf8proc ",
                       "but unsafe (does not detect overlong encodings)",
- "byte", &baseline, NUM_ITERATIONS, p.bufsiz);
+ "byte", &baseline, NUM_ITERATIONS, p.buflen);

- free(cpbuf);
- free(p.buf_char);
- free(p.buf_uint8);
+ free(p.buf);
+ free(p.buf_utf8proc);

         return 0;
}
diff --git a/benchmark/util.c b/benchmark/util.c
index b5d7e23..5f85874 100644
--- a/benchmark/util.c
+++ b/benchmark/util.c
_AT_@ -5,22 +5,23 @@
#include <time.h>

#include "../gen/types.h"
+#include "../grapheme.h"
#include "util.h"

uint_least32_t *
-generate_test_buffer(const struct break_test *test, size_t testlen,
- size_t *bufsiz)
+generate_cp_test_buffer(const struct break_test *test, size_t testlen,
+ size_t *buflen)
{
         size_t i, j, off;
         uint_least32_t *buf;

         /* allocate and generate buffer */
- for (i = 0, *bufsiz = 0; i < testlen; i++) {
- *bufsiz += test[i].cplen;
+ for (i = 0, *buflen = 0; i < testlen; i++) {
+ *buflen += test[i].cplen;
         }
- if (!(buf = calloc(*bufsiz, sizeof(*buf)))) {
+ if (!(buf = calloc(*buflen, sizeof(*buf)))) {
                 fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\n");
- return NULL;
+ exit(1);
         }
         for (i = 0, off = 0; i < testlen; i++) {
                 for (j = 0; j < test[i].cplen; j++) {
_AT_@ -32,6 +33,42 @@ generate_test_buffer(const struct break_test *test, size_t testlen,
         return buf;
}

+char *
+generate_utf8_test_buffer(const struct break_test *test, size_t testlen,
+ size_t *buflen)
+{
+ size_t i, j, off, ret;
+ char *buf;
+
+ /* allocate and generate buffer */
+ for (i = 0, *buflen = 0; i < testlen; i++) {
+ for (j = 0; j < test[i].cplen; j++) {
+ *buflen += grapheme_encode_utf8(test[i].cp[j], NULL, 0);
+ }
+ }
+ (*buflen)++; /* terminating NUL-byte */
+ if (!(buf = malloc(*buflen))) {
+ fprintf(stderr, "generate_test_buffer: malloc: Out of memory.\n");
+ exit(1);
+ }
+ for (i = 0, off = 0; i < testlen; i++) {
+ for (j = 0; j < test[i].cplen; j++, off += ret) {
+ if ((ret = grapheme_encode_utf8(test[i].cp[j],
+ buf + off,
+ *buflen - off)) >
+ (*buflen - off)) {
+ /* shouldn't happen */
+ fprintf(stderr, "generate_utf8_test_buffer: "
+ "Buffer too small.\n");
+ exit(1);
+ }
+ }
+ }
+ buf[*buflen - 1] = '\0';
+
+ return buf;
+}
+
static double
time_diff(struct timespec *a, struct timespec *b)
{
diff --git a/benchmark/util.h b/benchmark/util.h
index 7451290..653d9da 100644
--- a/benchmark/util.h
+++ b/benchmark/util.h
_AT_@ -6,8 +6,17 @@

#define LEN(x) (sizeof(x) / sizeof(*(x)))

-uint_least32_t *generate_test_buffer(const struct break_test *, size_t,
- size_t *);
+#ifdef __has_attribute
+ #if __has_attribute(optnone)
+ void libgrapheme(const void *) __attribute__((optnone));
+ void libutf8proc(const void *) __attribute__((optnone));
+ #endif
+#endif
+
+uint_least32_t *generate_cp_test_buffer(const struct break_test *, size_t,
+ size_t *);
+char *generate_utf8_test_buffer(const struct break_test *, size_t, size_t *);
+
void run_benchmark(void (*func)(const void *), const void *, const char *,
                    const char *, const char *, double *, size_t, size_t);

Received on Sun Jan 09 2022 - 17:31:55 CET

This archive was generated by hypermail 2.3.0 : Sun Jan 09 2022 - 17:36:32 CET