[hackers] [libgrapheme] Add unit tests for all segmentation functions || Laslo Hunhold from git_AT_suckless.org on 2022-10-03 (hackers mail list archive)

From: <git_AT_suckless.org>
Date: Mon, 3 Oct 2022 21:23:12 +0200 (CEST)

commit a815be4b5de7f7df2da664049fdb04874d37016a
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Mon Oct 3 21:18:52 2022 +0200
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Mon Oct 3 21:18:52 2022 +0200

    Add unit tests for all segmentation functions

    Now all functions in the library are covered by exhaustive unit tests
    which supplement the already present conformance tests to make sure
    that the thin layer between API and implementation is also working as
    expected.

    At this point I would assess that libgrapheme is a stable foundation
    for using it in the real world and now preparation can go underway
    to prepare the release of version 2.

    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/test/character.c b/test/character.c
index f87022e..a2abb9c 100644
--- a/test/character.c
+++ b/test/character.c
_AT_@ -6,12 +6,121 @@
#include "../grapheme.h"
#include "util.h"

+static const struct unit_test_next_break next_character_break[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "one character",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2A },
+ .srclen = 3,
+ },
+ .output = { 2 },
+ },
+ {
+ .description = "one character, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 2 },
+ },
+};
+
+static const struct unit_test_next_break_utf8 next_character_break_utf8[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = { "", 0 },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, NUL-terminated",
+ .input = { "", SIZE_MAX },
+ .output = { 0 },
+ },
+ {
+ .description = "one character",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA*", 9 },
+ .output = { 8 },
+ },
+ {
+ .description = "one character, fragment",
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
+ .output = { 4 },
+ },
+ {
+ .description = "one character, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA", SIZE_MAX },
+ .output = { 8 },
+ },
+ {
+ .description = "one character, fragment, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
+ .output = { 4 },
+ },
+};
+
+static int
+unit_test_callback_next_character_break(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break(t, off,
+ grapheme_next_character_break,
+ name, argv0);
+}
+
+static int
+unit_test_callback_next_character_break_utf8(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break_utf8(t, off,
+ grapheme_next_character_break_utf8,
+ name, argv0);
+}
+
int
main(int argc, char *argv[])
{
         (void)argc;

         return run_break_tests(grapheme_next_character_break,
- character_break_test,
- LEN(character_break_test), argv[0]);
+ character_break_test, LEN(character_break_test), argv[0]) +
+ run_unit_tests(unit_test_callback_next_character_break,
+ next_character_break, LEN(next_character_break),
+ "grapheme_next_character_break", argv[0]) +
+ run_unit_tests(unit_test_callback_next_character_break_utf8,
+ next_character_break_utf8, LEN(next_character_break_utf8),
+ "grapheme_next_character_break_utf8", argv[0]);
}
diff --git a/test/line.c b/test/line.c
index da6600e..b69ef39 100644
--- a/test/line.c
+++ b/test/line.c
_AT_@ -6,6 +6,110 @@
#include "../grapheme.h"
#include "util.h"

+static const struct unit_test_next_break next_line_break[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "one opportunity",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A },
+ .srclen = 4,
+ },
+ .output = { 3 },
+ },
+ {
+ .description = "one opportunity, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A, 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 3 },
+ },
+};
+
+static const struct unit_test_next_break_utf8 next_line_break_utf8[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = { "", 0 },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, NUL-terminated",
+ .input = { "", SIZE_MAX },
+ .output = { 0 },
+ },
+ {
+ .description = "one opportunity",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA *", 10 },
+ .output = { 9 },
+ },
+ {
+ .description = "one opportunity, fragment",
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
+ .output = { 4 },
+ },
+ {
+ .description = "one opportunity, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA A", SIZE_MAX },
+ .output = { 9 },
+ },
+ {
+ .description = "one opportunity, fragment, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
+ .output = { 4 },
+ },
+};
+
+static int
+unit_test_callback_next_line_break(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break(t, off,
+ grapheme_next_line_break,
+ name, argv0);
+}
+
+static int
+unit_test_callback_next_line_break_utf8(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break_utf8(t, off,
+ grapheme_next_line_break_utf8,
+ name, argv0);
+}
+
int
main(int argc, char *argv[])
{
_AT_@ -13,5 +117,11 @@ main(int argc, char *argv[])

         return run_break_tests(grapheme_next_line_break,
                                line_break_test, LEN(line_break_test),
- argv[0]);
+ argv[0]) +
+ run_unit_tests(unit_test_callback_next_line_break,
+ next_line_break, LEN(next_line_break),
+ "grapheme_next_line_break", argv[0]) +
+ run_unit_tests(unit_test_callback_next_line_break_utf8,
+ next_line_break_utf8, LEN(next_line_break_utf8),
+ "grapheme_next_line_break_utf8", argv[0]);
}
diff --git a/test/sentence.c b/test/sentence.c
index 1751616..c18df73 100644
--- a/test/sentence.c
+++ b/test/sentence.c
_AT_@ -6,6 +6,110 @@
#include "../grapheme.h"
#include "util.h"

+static const struct unit_test_next_break next_sentence_break[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "one sentence",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A },
+ .srclen = 5,
+ },
+ .output = { 4 },
+ },
+ {
+ .description = "one sentence, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A, 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 4 },
+ },
+};
+
+static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = { "", 0 },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, NUL-terminated",
+ .input = { "", SIZE_MAX },
+ .output = { 0 },
+ },
+ {
+ .description = "one sentence",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany. It", 36 },
+ .output = { 34 },
+ },
+ {
+ .description = "one sentence, fragment",
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
+ .output = { 4 },
+ },
+ {
+ .description = "one sentence, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany. It", SIZE_MAX },
+ .output = { 34 },
+ },
+ {
+ .description = "one sentence, fragment, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
+ .output = { 6 },
+ },
+};
+
+static int
+unit_test_callback_next_sentence_break(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break(t, off,
+ grapheme_next_sentence_break,
+ name, argv0);
+}
+
+static int
+unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break_utf8(t, off,
+ grapheme_next_sentence_break_utf8,
+ name, argv0);
+}
+
int
main(int argc, char *argv[])
{
_AT_@ -13,5 +117,11 @@ main(int argc, char *argv[])

         return run_break_tests(grapheme_next_sentence_break,
                                sentence_break_test,
- LEN(sentence_break_test), argv[0]);
+ LEN(sentence_break_test), argv[0]) +
+ run_unit_tests(unit_test_callback_next_sentence_break,
+ next_sentence_break, LEN(next_sentence_break),
+ "grapheme_next_sentence_break", argv[0]) +
+ run_unit_tests(unit_test_callback_next_sentence_break_utf8,
+ next_sentence_break_utf8, LEN(next_sentence_break_utf8),
+ "grapheme_next_character_break_utf8", argv[0]);
}
diff --git a/test/utf8-decode.c b/test/utf8-decode.c
index ad1495f..78f1854 100644
--- a/test/utf8-decode.c
+++ b/test/utf8-decode.c
_AT_@ -310,7 +310,7 @@ main(int argc, char *argv[])
                         failed++;
                 }
         }
- printf("%s: %zu/%zu tests passed.\n", argv[0],
+ printf("%s: %zu/%zu unit tests passed.\n", argv[0],
                LEN(dec_test) - failed, LEN(dec_test));

         return (failed > 0) ? 1 : 0;
diff --git a/test/utf8-encode.c b/test/utf8-encode.c
index ded2af0..3b482ef 100644
--- a/test/utf8-encode.c
+++ b/test/utf8-encode.c
_AT_@ -86,7 +86,7 @@ main(int argc, char *argv[])
                         failed++;
                 }
         }
- printf("%s: %zu/%zu tests passed.\n", argv[0],
+ printf("%s: %zu/%zu unit tests passed.\n", argv[0],
                LEN(enc_test) - failed, LEN(enc_test));

         return (failed > 0) ? 1 : 0;
diff --git a/test/util.c b/test/util.c
index d6c0de1..7cd6eee 100644
--- a/test/util.c
+++ b/test/util.c
_AT_@ -38,8 +38,8 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
}

int
-run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
- const char *), void *test, size_t testlen, const char *name,
+run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
+ const char *), const void *test, size_t testlen, const char *name,
                const char *argv0)
{
         size_t i, failed;
_AT_@ -53,3 +53,46 @@ run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,

         return (failed > 0) ? 1 : 0;
}
+
+int
+unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
+ size_t (*next_break)(const uint_least32_t *, size_t),
+ const char *name, const char *argv0)
+{
+ const struct unit_test_next_break *test = t + off;
+
+ size_t ret = next_break(test->input.src, test->input.srclen);
+
+ if (ret != test->output.ret) {
+ goto err;
+ }
+
+ return 0;
+err:
+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned %zu instead of %zu).\n", argv0,
+ name, off, test->description, ret, test->output.ret);
+ return 1;
+}
+
+int
+unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
+ size_t off,
+ size_t (*next_break_utf8)(const char *, size_t),
+ const char *name, const char *argv0)
+{
+ const struct unit_test_next_break_utf8 *test = t + off;
+
+ size_t ret = next_break_utf8(test->input.src, test->input.srclen);
+
+ if (ret != test->output.ret) {
+ goto err;
+ }
+
+ return 0;
+err:
+ fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned %zu instead of %zu).\n", argv0,
+ name, off, test->description, ret, test->output.ret);
+ return 1;
+}
diff --git a/test/util.h b/test/util.h
index e6577a1..20cdf08 100644
--- a/test/util.h
+++ b/test/util.h
_AT_@ -10,10 +10,40 @@
#undef LEN
#define LEN(x) (sizeof(x) / sizeof(*(x)))

+struct unit_test_next_break {
+ const char *description;
+ struct {
+ const uint_least32_t *src;
+ size_t srclen;
+ } input;
+ struct {
+ size_t ret;
+ } output;
+};
+
+struct unit_test_next_break_utf8 {
+ const char *description;
+ struct {
+ const char *src;
+ size_t srclen;
+ } input;
+ struct {
+ size_t ret;
+ } output;
+};
+
int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
                     const struct break_test *test, size_t testlen,
                     const char *);
-int run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
- const char *), void *, size_t, const char *, const char *);
+int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
+ const char *), const void *, size_t, const char *, const char *);
+
+int unit_test_callback_next_break(const struct unit_test_next_break *, size_t,
+ size_t (*next_break)(const uint_least32_t *, size_t),
+ const char *, const char *);
+int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *,
+ size_t,
+ size_t (*next_break_utf8)(const char *, size_t),
+ const char *, const char *);

#endif /* UTIL_H */
diff --git a/test/word.c b/test/word.c
index fe6d82c..42abebc 100644
--- a/test/word.c
+++ b/test/word.c
_AT_@ -6,11 +6,121 @@
#include "../grapheme.h"
#include "util.h"

+static const struct unit_test_next_break next_word_break[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "one word",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A },
+ .srclen = 4,
+ },
+ .output = { 2 },
+ },
+ {
+ .description = "one word, null-terminated",
+ .input = {
+ .src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A, 0x0 },
+ .srclen = SIZE_MAX,
+ },
+ .output = { 2 },
+ },
+};
+
+static const struct unit_test_next_break_utf8 next_word_break_utf8[] = {
+ {
+ .description = "NULL input",
+ .input = {
+ .src = NULL,
+ .srclen = 0,
+ },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input",
+ .input = { "", 0 },
+ .output = { 0 },
+ },
+ {
+ .description = "empty input, NUL-terminated",
+ .input = { "", SIZE_MAX },
+ .output = { 0 },
+ },
+ {
+ .description = "one word",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", 11 },
+ .output = { 8 },
+ },
+ {
+ .description = "one word, fragment",
+ .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
+ .output = { 4 },
+ },
+ {
+ .description = "one word, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", SIZE_MAX },
+ .output = { 8 },
+ },
+ {
+ .description = "one word, fragment, NUL-terminated",
+ .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
+ .output = { 4 },
+ },
+};
+
+static int
+unit_test_callback_next_word_break(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break(t, off,
+ grapheme_next_word_break,
+ name, argv0);
+}
+
+static int
+unit_test_callback_next_word_break_utf8(const void *t, size_t off,
+ const char *name,
+ const char *argv0)
+{
+ return unit_test_callback_next_break_utf8(t, off,
+ grapheme_next_word_break_utf8,
+ name, argv0);
+}
+
int
main(int argc, char *argv[])
{
         (void)argc;

         return run_break_tests(grapheme_next_word_break, word_break_test,
- LEN(word_break_test), argv[0]);
+ LEN(word_break_test), argv[0]) +
+ run_unit_tests(unit_test_callback_next_word_break,
+ next_word_break, LEN(next_word_break),
+ "grapheme_next_word_break", argv[0]) +
+ run_unit_tests(unit_test_callback_next_word_break_utf8,
+ next_word_break_utf8, LEN(next_word_break_utf8),
+ "grapheme_next_word_break_utf8", argv[0]);
}
Received on Mon Oct 03 2022 - 21:23:12 CEST

This archive was generated by hypermail 2.3.0 : Mon Oct 03 2022 - 21:24:37 CEST