[hackers] [libgrapheme] Split test/test.c into three separate tests || Laslo Hunhold from git_AT_suckless.org on 2020-10-18 (hackers mail list archive)

From: <git_AT_suckless.org>
Date: Sun, 18 Oct 2020 22:20:21 +0200 (CEST)

commit 009498ac0fc3744a7bc5cc1afb5f601e445442be
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sun Oct 18 22:20:31 2020 +0200
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sun Oct 18 22:20:31 2020 +0200

    Split test/test.c into three separate tests

    The test-infrastructure needed a bit of preparation, but now it makes
    sense to split the single test.c into its three parts, making it easier
    to handle and reason about.

    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/Makefile b/Makefile
index 7b4663d..b02a347 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -5,7 +5,7 @@
include config.mk

LIB = src/boundary src/codepoint src/grapheme
-TEST = test/test
+TEST = test/grapheme_break test/utf8-decode test/utf8-encode
DATA = data/gbp data/emo data/gbt

MAN3 = man/grapheme_bytelen.3
_AT_@ -24,12 +24,16 @@ data/util.o: data/util.c config.mk data/util.h
src/boundary.o: src/boundary.c config.mk data/emo.h data/gbp.h grapheme.h
src/codepoint.o: src/codepoint.c config.mk grapheme.h
src/grapheme.o: src/grapheme.c config.mk grapheme.h
-test/test.o: test/test.c config.mk data/gbt.h grapheme.h
+test/grapheme_break.o: test/grapheme_break.c config.mk data/gbt.h grapheme.h
+test/utf8-encode.o: test/utf8-encode.c config.mk grapheme.h
+test/utf8-decode.o: test/utf8-decode.c config.mk grapheme.h

data/gbp: data/gbp.o data/util.o
data/emo: data/emo.o data/util.o
data/gbt: data/gbt.o data/util.o
-test/test: test/test.o $(LIB:=.o)
+test/grapheme_break: test/grapheme_break.o $(LIB:=.o)
+test/utf8-encode: test/utf8-encode.o $(LIB:=.o)
+test/utf8-decode: test/utf8-decode.o $(LIB:=.o)

data/gbp.txt:
         wget -O $_AT_ https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
diff --git a/test/grapheme_break.c b/test/grapheme_break.c
new file mode 100644
index 0000000..3bd48a5
--- /dev/null
+++ b/test/grapheme_break.c
_AT_@ -0,0 +1,41 @@
+/* See LICENSE file for copyright and license details. */
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../grapheme.h"
+#include "../data/gbt.h"
+
+#define LEN(x) (sizeof(x) / sizeof(*x))
+
+int
+main(void)
+{
+ int state;
+ size_t i, j, k, len, failed;
+
+ /* grapheme break test */
+ for (i = 0, failed = 0; i < LEN(t); i++) {
+ for (j = 0, k = 0, state = 0, len = 1; j < t[i].cplen; j++) {
+ if ((j + 1) == t[i].cplen ||
+ grapheme_boundary(t[i].cp[j], t[i].cp[j + 1],
+ &state)) {
+ /* check if our resulting length matches */
+ if (k == t[i].lenlen || len != t[i].len[k++]) {
+ fprintf(stderr, "Failed \"%s\"\n",
+ t[i].descr);
+ failed++;
+ break;
+ }
+ len = 1;
+ } else {
+ len++;
+ }
+ }
+ }
+ printf("Grapheme break test: Passed %zu out of %zu tests.\n",
+ LEN(t) - failed, LEN(t));
+
+ return (failed > 0) ? 1 : 0;
+}
diff --git a/test/test.c b/test/utf8-decode.c
similarity index 69%
rename from test/test.c
rename to test/utf8-decode.c
index 82613a1..8349f39 100644
--- a/test/test.c
+++ b/test/utf8-decode.c
_AT_@ -5,53 +5,9 @@
#include <string.h>

#include "../grapheme.h"
-#include "../data/gbt.h"

#define LEN(x) (sizeof(x) / sizeof(*x))

-static const struct {
- uint32_t cp; /* input code point */
- uint8_t *exp_arr; /* expected UTF-8 byte sequence */
- size_t exp_len; /* expected length of UTF-8 sequence */
-} enc_test[] = {
- {
- /* invalid code point (UTF-16 surrogate half) */
- .cp = UINT32_C(0xD800),
- .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
- .exp_len = 3,
- },
- {
- /* invalid code point (UTF-16-unrepresentable) */
- .cp = UINT32_C(0x110000),
- .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
- .exp_len = 3,
- },
- {
- /* code point encoded to a 1-byte sequence */
- .cp = 0x01,
- .exp_arr = (uint8_t[]){ 0x01 },
- .exp_len = 1,
- },
- {
- /* code point encoded to a 2-byte sequence */
- .cp = 0xFF,
- .exp_arr = (uint8_t[]){ 0xC3, 0xBF },
- .exp_len = 2,
- },
- {
- /* code point encoded to a 3-byte sequence */
- .cp = 0xFFF,
- .exp_arr = (uint8_t[]){ 0xE0, 0xBF, 0xBF },
- .exp_len = 3,
- },
- {
- /* code point encoded to a 4-byte sequence */
- .cp = UINT32_C(0xFFFFF),
- .exp_arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0xBF },
- .exp_len = 4,
- },
-};
-
static const struct {
         uint8_t *arr; /* UTF-8 byte sequence */
         size_t len; /* length of UTF-8 byte sequence */
_AT_@ -293,40 +249,7 @@ static const struct {
int
main(void)
{
- int state;
- size_t i, j, k, len, failed;
-
- /* UTF-8 encoder test */
- for (i = 0, failed = 0; i < LEN(enc_test); i++) {
- uint8_t arr[4];
- size_t len;
-
- len = grapheme_cp_encode(enc_test[i].cp, arr, LEN(arr));
-
- if (len != enc_test[i].exp_len ||
- memcmp(arr, enc_test[i].exp_arr, len)) {
- fprintf(stderr, "Failed UTF-8-encoder test %zu: "
- "Expected (", i);
- for (j = 0; j < enc_test[i].exp_len; j++) {
- fprintf(stderr, "0x%x",
- enc_test[i].exp_arr[j]);
- if (j + 1 < enc_test[i].exp_len) {
- fprintf(stderr, " ");
- }
- }
- fprintf(stderr, "), but got (");
- for (j = 0; j < len; j++) {
- fprintf(stderr, "0x%x", arr[j]);
- if (j + 1 < len) {
- fprintf(stderr, " ");
- }
- }
- fprintf(stderr, ")\n");
- failed++;
- }
- }
- printf("UTF-8 encoder test: Passed %zu out of %zu tests.\n",
- LEN(enc_test) - failed, LEN(enc_test));
+ size_t i, failed;

         /* UTF-8 decoder test */
         for (i = 0, failed = 0; i < LEN(dec_test); i++) {
_AT_@ -348,27 +271,5 @@ main(void)
         printf("UTF-8 decoder test: Passed %zu out of %zu tests.\n",
                LEN(dec_test) - failed, LEN(dec_test));

- /* grapheme break test */
- for (i = 0, failed = 0; i < LEN(t); i++) {
- for (j = 0, k = 0, state = 0, len = 1; j < t[i].cplen; j++) {
- if ((j + 1) == t[i].cplen ||
- grapheme_boundary(t[i].cp[j], t[i].cp[j + 1],
- &state)) {
- /* check if our resulting length matches */
- if (k == t[i].lenlen || len != t[i].len[k++]) {
- fprintf(stderr, "Failed \"%s\"\n",
- t[i].descr);
- failed++;
- break;
- }
- len = 1;
- } else {
- len++;
- }
- }
- }
- printf("Grapheme break test: Passed %zu out of %zu tests.\n",
- LEN(t) - failed, LEN(t));
-
         return (failed > 0) ? 1 : 0;
}
diff --git a/test/utf8-encode.c b/test/utf8-encode.c
new file mode 100644
index 0000000..7851d25
--- /dev/null
+++ b/test/utf8-encode.c
_AT_@ -0,0 +1,92 @@
+/* See LICENSE file for copyright and license details. */
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../grapheme.h"
+
+#define LEN(x) (sizeof(x) / sizeof(*x))
+
+static const struct {
+ uint32_t cp; /* input code point */
+ uint8_t *exp_arr; /* expected UTF-8 byte sequence */
+ size_t exp_len; /* expected length of UTF-8 sequence */
+} enc_test[] = {
+ {
+ /* invalid code point (UTF-16 surrogate half) */
+ .cp = UINT32_C(0xD800),
+ .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
+ .exp_len = 3,
+ },
+ {
+ /* invalid code point (UTF-16-unrepresentable) */
+ .cp = UINT32_C(0x110000),
+ .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
+ .exp_len = 3,
+ },
+ {
+ /* code point encoded to a 1-byte sequence */
+ .cp = 0x01,
+ .exp_arr = (uint8_t[]){ 0x01 },
+ .exp_len = 1,
+ },
+ {
+ /* code point encoded to a 2-byte sequence */
+ .cp = 0xFF,
+ .exp_arr = (uint8_t[]){ 0xC3, 0xBF },
+ .exp_len = 2,
+ },
+ {
+ /* code point encoded to a 3-byte sequence */
+ .cp = 0xFFF,
+ .exp_arr = (uint8_t[]){ 0xE0, 0xBF, 0xBF },
+ .exp_len = 3,
+ },
+ {
+ /* code point encoded to a 4-byte sequence */
+ .cp = UINT32_C(0xFFFFF),
+ .exp_arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0xBF },
+ .exp_len = 4,
+ },
+};
+
+int
+main(void)
+{
+ size_t i, j, failed;
+
+ /* UTF-8 encoder test */
+ for (i = 0, failed = 0; i < LEN(enc_test); i++) {
+ uint8_t arr[4];
+ size_t len;
+
+ len = grapheme_cp_encode(enc_test[i].cp, arr, LEN(arr));
+
+ if (len != enc_test[i].exp_len ||
+ memcmp(arr, enc_test[i].exp_arr, len)) {
+ fprintf(stderr, "Failed UTF-8-encoder test %zu: "
+ "Expected (", i);
+ for (j = 0; j < enc_test[i].exp_len; j++) {
+ fprintf(stderr, "0x%x",
+ enc_test[i].exp_arr[j]);
+ if (j + 1 < enc_test[i].exp_len) {
+ fprintf(stderr, " ");
+ }
+ }
+ fprintf(stderr, "), but got (");
+ for (j = 0; j < len; j++) {
+ fprintf(stderr, "0x%x", arr[j]);
+ if (j + 1 < len) {
+ fprintf(stderr, " ");
+ }
+ }
+ fprintf(stderr, ")\n");
+ failed++;
+ }
+ }
+ printf("UTF-8 encoder test: Passed %zu out of %zu tests.\n",
+ LEN(enc_test) - failed, LEN(enc_test));
+
+ return (failed > 0) ? 1 : 0;
+}
Received on Sun Oct 18 2020 - 22:20:21 CEST

This archive was generated by hypermail 2.3.0 : Sun Oct 18 2020 - 22:24:36 CEST