[hackers] [libgrapheme] Reintroduce the "grapheme_" prefix || Laslo Hunhold

From: <git_AT_suckless.org>
Date: Sat, 18 Dec 2021 12:52:01 +0100 (CET)

commit 82b85a60b3a334c928aa22de2555a55367bf739d
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sat Dec 18 12:48:32 2021 +0100
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sat Dec 18 12:48:32 2021 +0100

    Reintroduce the "grapheme_" prefix
    
    With the character ambiguity out of the way we can now go back to
    prefixing everything with "grapheme_" instead of "lg_". It's always
    better to have a prefix matching the library name, as it's otherwise
    not immediately obvious where a given symbol or function comes from.
    
    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/grapheme.h b/grapheme.h
index ff25358..a8805bd 100644
--- a/grapheme.h
+++ b/grapheme.h
_AT_@ -6,24 +6,25 @@
 #include <stddef.h>
 #include <stdint.h>
 
-struct lg_internal_heisenstate {
+struct grapheme_internal_heisenstate {
         uint_least64_t determined;
         uint_least64_t state;
 };
 
-typedef struct lg_internal_segmentation_state {
- struct lg_internal_heisenstate a;
- struct lg_internal_heisenstate b;
+typedef struct grapheme_internal_segmentation_state {
+ struct grapheme_internal_heisenstate a;
+ struct grapheme_internal_heisenstate b;
         uint_least16_t flags;
-} LG_SEGMENTATION_STATE;
+} GRAPHEME_SEGMENTATION_STATE;
 
-#define LG_INVALID_CODE_POINT UINT32_C(0xFFFD)
+#define GRAPHEME_INVALID_CODE_POINT UINT32_C(0xFFFD)
 
-size_t lg_character_nextbreak(const char *);
+size_t grapheme_character_nextbreak(const char *);
 
-bool lg_character_isbreak(uint_least32_t, uint_least32_t, LG_SEGMENTATION_STATE *);
+bool grapheme_character_isbreak(uint_least32_t, uint_least32_t,
+ GRAPHEME_SEGMENTATION_STATE *);
 
-size_t lg_utf8_decode(const char *, size_t, uint_least32_t *);
-size_t lg_utf8_encode(uint_least32_t, char *, size_t);
+size_t grapheme_utf8_decode(const char *, size_t, uint_least32_t *);
+size_t grapheme_utf8_encode(uint_least32_t, char *, size_t);
 
 #endif /* GRAPHEME_H */
diff --git a/man/lg_grapheme_isbreak.3 b/man/grapheme_character_isbreak.3
similarity index 73%
rename from man/lg_grapheme_isbreak.3
rename to man/grapheme_character_isbreak.3
index 2d975dd..a7568f6 100644
--- a/man/lg_grapheme_isbreak.3
+++ b/man/grapheme_character_isbreak.3
_AT_@ -1,16 +1,16 @@
 .Dd 2021-12-18
-.Dt LG_GRAPHEME_ISBREAK 3
+.Dt GRAPHEME_CHARACTER_ISBREAK 3
 .Os suckless.org
 .Sh NAME
-.Nm lg_grapheme_isbreak
+.Nm grapheme_character_isbreak
 .Nd test for a grapheme cluster break between two code points
 .Sh SYNOPSIS
 .In grapheme.h
 .Ft size_t
-.Fn lg_grapheme_isbreak "uint_least32_t cp1" "uint_least32_t cp2" "LG_SEGMENTATION_STATE *state"
+.Fn grapheme_character_isbreak "uint_least32_t cp1" "uint_least32_t cp2" "GRAPHEME_SEGMENTATION_STATE *state"
 .Sh DESCRIPTION
 The
-.Fn lg_grapheme_isbreak
+.Fn grapheme_character_isbreak
 function determines if there is a grapheme cluster break (see
 .Xr libgrapheme 7 )
 between the two code points
_AT_@ -26,11 +26,11 @@ If
 .Va state
 is
 .Dv NULL
-.Fn lg_grapheme_isbreak
+.Fn grapheme_character_isbreak
 behaves as if it was called with a fully reset state.
 .Sh RETURN VALUES
 The
-.Fn lg_grapheme_isbreak
+.Fn grapheme_character_isbreak
 function returns
 .Va true
 if there is a grapheme cluster break between the code points
_AT_@ -51,18 +51,18 @@ if there is not.
 int
 main(void)
 {
- LG_SEGMENTATION_STATE state = { 0 };
+ GRAPHEME_SEGMENTATION_STATE state = { 0 };
         uint_least32_t s1[] = ..., s2[] = ...; /* two input arrays */
         size_t i;
 
         for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) {
- if (lg_grapheme_isbreak(s[i], s[i + 1], &state)) {
+ if (grapheme_character_isbreak(s[i], s[i + 1], &state)) {
                         printf("break in s1 at offset %zu\n", i);
                 }
         }
         memset(&state, 0, sizeof(state)); /* reset state */
         for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) {
- if (lg_grapheme_isbreak(s[i], s[i + 1], &state)) {
+ if (grapheme_character_isbreak(s[i], s[i + 1], &state)) {
                         printf("break in s2 at offset %zu\n", i);
                 }
         }
_AT_@ -71,10 +71,10 @@ main(void)
 }
 .Ed
 .Sh SEE ALSO
-.Xr lg_grapheme_nextbreak 3 ,
+.Xr grapheme_character_nextbreak 3 ,
 .Xr libgrapheme 7
 .Sh STANDARDS
-.Fn lg_grapheme_isbreak
+.Fn grapheme_character_isbreak
 is compliant with the Unicode 14.0.0 specification.
 .Sh AUTHORS
 .An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/lg_grapheme_nextbreak.3 b/man/grapheme_character_nextbreak.3
similarity index 81%
rename from man/lg_grapheme_nextbreak.3
rename to man/grapheme_character_nextbreak.3
index cad7faf..2cc0365 100644
--- a/man/lg_grapheme_nextbreak.3
+++ b/man/grapheme_character_nextbreak.3
_AT_@ -1,16 +1,16 @@
 .Dd 2021-12-18
-.Dt LG_GRAPHEME_NEXTBREAK 3
+.Dt GRAPHEME_CHARACTER_NEXTBREAK 3
 .Os suckless.org
 .Sh NAME
-.Nm lg_grapheme_nextbreak
+.Nm grapheme_character_nextbreak
 .Nd determine byte-offset to next grapheme cluster break
 .Sh SYNOPSIS
 .In grapheme.h
 .Ft size_t
-.Fn lg_grapheme_nextbreak "const char *str"
+.Fn grapheme_character_nextbreak "const char *str"
 .Sh DESCRIPTION
 The
-.Fn lg_grapheme_nextbreak
+.Fn grapheme_character_nextbreak
 function computes the offset (in bytes) to the next grapheme
 cluster break (see
 .Xr libgrapheme 7 )
_AT_@ -21,11 +21,11 @@ If a grapheme cluster begins at
 this offset is equal to the length of said grapheme cluster.
 .Pp
 For non-UTF-8 input data
-.Xr lg_grapheme_isbreak 3
+.Xr grapheme_character_isbreak 3
 can be used instead.
 .Sh RETURN VALUES
 The
-.Fn lg_grapheme_nextbreak
+.Fn grapheme_character_nextbreak
 function returns the offset (in bytes) to the next grapheme cluster
 break in
 .Va str
_AT_@ -54,7 +54,7 @@ main(void)
 
         /* print each grapheme cluster with byte-length */
         for (; *s != '\\0';) {
- len = lg_grapheme_nextbreak(s);
+ len = grapheme_character_nextbreak(s);
                 printf("%2zu bytes | %.*s\\n", len, (int)len, s, len);
                 s += len;
         }
_AT_@ -63,10 +63,10 @@ main(void)
 }
 .Ed
 .Sh SEE ALSO
-.Xr lg_grapheme_isbreak 3 ,
+.Xr grapheme_character_isbreak 3 ,
 .Xr libgrapheme 7
 .Sh STANDARDS
-.Fn lg_grapheme_nextbreak
+.Fn grapheme_character_nextbreak
 is compliant with the Unicode 14.0.0 specification.
 .Sh AUTHORS
 .An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/lg_utf8_decode.3 b/man/grapheme_utf8_decode.3
similarity index 77%
rename from man/lg_utf8_decode.3
rename to man/grapheme_utf8_decode.3
index 5dfb50a..6d90c32 100644
--- a/man/lg_utf8_decode.3
+++ b/man/grapheme_utf8_decode.3
_AT_@ -1,16 +1,16 @@
 .Dd 2021-12-17
-.Dt LG_UTF8_DECODE 3
+.Dt GRAPHEME_UTF8_DECODE 3
 .Os suckless.org
 .Sh NAME
-.Nm lg_utf8_decode
+.Nm grapheme_utf8_decode
 .Nd decode first code point in UTF-8-encoded string
 .Sh SYNOPSIS
 .In grapheme.h
 .Ft size_t
-.Fn lg_utf8_decode "const char *str" "size_t len" "uint_least32_t *cp"
+.Fn grapheme_utf8_decode "const char *str" "size_t len" "uint_least32_t *cp"
 .Sh DESCRIPTION
 The
-.Fn lg_utf8_decode
+.Fn grapheme_utf8_decode
 function decodes the next code point in the UTF-8-encoded string
 .Va str
 of length
_AT_@ -18,7 +18,7 @@ of length
 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
 string ends unexpectedly, empty string, etc.) the decoding is stopped
 at the last processed byte and the decoded code point set to
-.Dv LG_INVALID_CODE_POINT.
+.Dv GRAPHEME_INVALID_CODE_POINT.
 .Pp
 If
 .Va cp
_AT_@ -39,7 +39,7 @@ is 0 (see
 for an example).
 .Sh RETURN VALUES
 The
-.Fn lg_utf8_decode
+.Fn grapheme_utf8_decode
 function returns the number of processed bytes and 0 if
 .Va str
 is
_AT_@ -65,8 +65,8 @@ print_cps(const char *str, size_t len)
         uint_least32_t cp;
 
         for (off = 0; off < len; off += ret) {
- if ((ret = lg_utf8_decode(str + off,
- len - off, &cp)) > (len - off)) {
+ if ((ret = grapheme_utf8_decode(str + off,
+ len - off, &cp)) > (len - off)) {
                         /*
                          * string ended unexpectedly in the middle of a
                          * multibyte sequence and we have the choice
_AT_@ -86,16 +86,16 @@ print_cps_nul_terminated(const char *str)
         size_t ret, off;
         uint_least32_t cp;
 
- for (off = 0; (ret = lg_utf8_decode(str + off,
- (size_t)-1, &cp)) > 0 &&
+ for (off = 0; (ret = grapheme_utf8_decode(str + off,
+ (size_t)-1, &cp)) > 0 &&
              cp != 0; off += ret) {
                 printf("%"PRIxLEAST32"\\n", cp);
         }
 }
 .Ed
 .Sh SEE ALSO
-.Xr lg_grapheme_encode 3 ,
-.Xr lg_grapheme_isbreak 3 ,
+.Xr grapheme_utf8_encode 3 ,
+.Xr grapheme_character_isbreak 3 ,
 .Xr libgrapheme 7
 .Sh AUTHORS
 .An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/lg_utf8_encode.3 b/man/grapheme_utf8_encode.3
similarity index 75%
rename from man/lg_utf8_encode.3
rename to man/grapheme_utf8_encode.3
index 542cdae..a2f05c8 100644
--- a/man/lg_utf8_encode.3
+++ b/man/grapheme_utf8_encode.3
_AT_@ -1,16 +1,16 @@
 .Dd 2021-12-17
-.Dt LG_UTF8_ENCODE 3
+.Dt GRAPHEME_UTF8_ENCODE 3
 .Os suckless.org
 .Sh NAME
-.Nm lg_utf8_encode
+.Nm grapheme_utf8_encode
 .Nd encode code point into UTF-8 string
 .Sh SYNOPSIS
 .In grapheme.h
 .Ft size_t
-.Fn lg_utf8_encode "uint_least32_t cp" "char *" "size_t"
+.Fn grapheme_utf8_encode "uint_least32_t cp" "char *" "size_t"
 .Sh DESCRIPTION
 The
-.Fn lg_utf8_encode
+.Fn grapheme_utf8_encode
 function encodes the code point
 .Va cp
 into a UTF-8-string.
_AT_@ -24,7 +24,7 @@ is large enough it writes the UTF-8-string to the memory pointed to by
 .Va str .
 .Sh RETURN VALUES
 The
-.Fn lg_utf8_encode
+.Fn grapheme_utf8_encode
 function returns the length (in bytes) of the UTF-8-string resulting
 from encoding
 .Va cp .
_AT_@ -45,13 +45,13 @@ cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
         size_t i, off, ret;
 
         for (i = 0, off = 0; i < cplen; i++, off += ret) {
- if ((ret = lg_utf8_encode(cp[i], str + off,
- len - off)) > (len - off)) {
+ if ((ret = grapheme_utf8_encode(cp[i], str + off,
+ len - off)) > (len - off)) {
                         /* buffer too small */
                         break;
                 }
         }
-
+
         return off;
 }
 
_AT_@ -61,7 +61,7 @@ cps_bytelen(const uint_least32_t *cp, size_t cplen)
         size_t i, len;
 
         for (i = 0, len = 0; i < cplen; i++) {
- len += lg_utf8_encode(cp[i], NULL, 0);
+ len += grapheme_utf8_encode(cp[i], NULL, 0);
         }
 
         return len;
_AT_@ -80,8 +80,8 @@ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
         }
 
         for (i = 0, off = 0; i < cplen; i++, off += ret) {
- if ((ret = lg_utf8_encode(cp[i], str + off,
- len - off)) > (len - off)) {
+ if ((ret = grapheme_utf8_encode(cp[i], str + off,
+ len - off)) > (len - off)) {
                         /* buffer too small */
                         break;
                 }
_AT_@ -92,7 +92,7 @@ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
 }
 .Ed
 .Sh SEE ALSO
-.Xr lg_grapheme_decode 3 ,
+.Xr grapheme_utf8_decode 3 ,
 .Xr libgrapheme 7
 .Sh AUTHORS
 .An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/libgrapheme.7 b/man/libgrapheme.7
index af43568..4071602 100644
--- a/man/libgrapheme.7
+++ b/man/libgrapheme.7
_AT_@ -15,10 +15,10 @@ see
 .Sx MOTIVATION )
 according to the Unicode specification.
 .Sh SEE ALSO
-.Xr lg_grapheme_isbreak 3 ,
-.Xr lg_grapheme_nextbreak 3 ,
-.Xr lg_utf8_decode 3 ,
-.Xr lg_utf8_encode 3
+.Xr grapheme_character_isbreak 3 ,
+.Xr grapheme_character_nextbreak 3 ,
+.Xr grapheme_utf8_decode 3 ,
+.Xr grapheme_utf8_encode 3
 .Sh STANDARDS
 .Nm
 is compliant with the Unicode 14.0.0 specification.
diff --git a/src/character.c b/src/character.c
index 798fec3..7d89871 100644
--- a/src/character.c
+++ b/src/character.c
_AT_@ -14,9 +14,10 @@ enum {
 };
 
 bool
-lg_character_isbreak(uint_least32_t a, uint_least32_t b, LG_SEGMENTATION_STATE *state)
+grapheme_character_isbreak(uint_least32_t a, uint_least32_t b,
+ GRAPHEME_SEGMENTATION_STATE *state)
 {
- struct lg_internal_heisenstate *p[2] = { 0 };
+ struct grapheme_internal_heisenstate *p[2] = { 0 };
         uint_least16_t flags = 0;
         bool isbreak = true;
 
_AT_@ -179,18 +180,18 @@ hasbreak:
 }
 
 size_t
-lg_character_nextbreak(const char *str)
+grapheme_character_nextbreak(const char *str)
 {
         uint_least32_t cp0, cp1;
         size_t ret, len = 0;
- LG_SEGMENTATION_STATE state = { 0 };
+ GRAPHEME_SEGMENTATION_STATE state = { 0 };
 
         if (str == NULL) {
                 return 0;
         }
 
         /*
- * lg_utf8_decode, when it encounters an unexpected byte,
+ * grapheme_utf8_decode, when it encounters an unexpected byte,
          * does not count it to the error and instead assumes that the
          * unexpected byte is the beginning of a new sequence.
          * This way, when the string ends with a null byte, we never
_AT_@ -202,17 +203,17 @@ lg_character_nextbreak(const char *str)
          */
 
         /* get first code point */
- len += lg_utf8_decode(str, (size_t)-1, &cp0);
- if (cp0 == LG_INVALID_CODE_POINT) {
+ len += grapheme_utf8_decode(str, (size_t)-1, &cp0);
+ if (cp0 == GRAPHEME_INVALID_CODE_POINT) {
                 return len;
         }
 
         while (cp0 != 0) {
                 /* get next code point */
- ret = lg_utf8_decode(str + len, (size_t)-1, &cp1);
+ ret = grapheme_utf8_decode(str + len, (size_t)-1, &cp1);
 
- if (cp1 == LG_INVALID_CODE_POINT ||
- lg_character_isbreak(cp0, cp1, &state)) {
+ if (cp1 == GRAPHEME_INVALID_CODE_POINT ||
+ grapheme_character_isbreak(cp0, cp1, &state)) {
                         /* we read an invalid cp or have a breakpoint */
                         break;
                 } else {
diff --git a/src/utf8.c b/src/utf8.c
index efd6068..a74b8c1 100644
--- a/src/utf8.c
+++ b/src/utf8.c
_AT_@ -48,13 +48,13 @@ static const struct {
 };
 
 size_t
-lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp)
+grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp)
 {
         size_t off, i;
 
         if (s == NULL || n == 0) {
                 /* a sequence must be at least 1 byte long */
- *cp = LG_INVALID_CODE_POINT;
+ *cp = GRAPHEME_INVALID_CODE_POINT;
                 return 0;
         }
 
_AT_@ -79,14 +79,14 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp)
                  * this also includes the cases where bits higher than
                  * the 8th are set on systems with CHAR_BIT > 8
                  */
- *cp = LG_INVALID_CODE_POINT;
+ *cp = GRAPHEME_INVALID_CODE_POINT;
                 return 1;
         }
         if (1 + off > n) {
                 /*
                  * input is not long enough, set cp as invalid
                  */
- *cp = LG_INVALID_CODE_POINT;
+ *cp = GRAPHEME_INVALID_CODE_POINT;
 
                 /*
                  * count the following continuation bytes, but nothing
_AT_@ -125,7 +125,7 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp)
                          * higher than the 8th are set on systems
                          * with CHAR_BIT > 8
                          */
- *cp = LG_INVALID_CODE_POINT;
+ *cp = GRAPHEME_INVALID_CODE_POINT;
                         return 1 + (i - 1);
                 }
                 /*
_AT_@ -144,14 +144,14 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp)
                  * not representable in UTF-16 (>0x10FFFF) (RFC-3629
                  * specifies the latter two conditions)
                  */
- *cp = LG_INVALID_CODE_POINT;
+ *cp = GRAPHEME_INVALID_CODE_POINT;
         }
 
         return 1 + off;
 }
 
 size_t
-lg_utf8_encode(uint_least32_t cp, char *s, size_t n)
+grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n)
 {
         size_t off, i;
 
_AT_@ -162,7 +162,7 @@ lg_utf8_encode(uint_least32_t cp, char *s, size_t n)
                  * (0xD800..0xDFFF) or not representable in UTF-16
                  * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8.
                  */
- cp = LG_INVALID_CODE_POINT;
+ cp = GRAPHEME_INVALID_CODE_POINT;
         }
 
         /* determine necessary sequence type */
diff --git a/src/util.c b/src/util.c
index 1455621..9b18fcb 100644
--- a/src/util.c
+++ b/src/util.c
_AT_@ -8,7 +8,7 @@
 /* 64-slot (0,...,63) optionally undetermined binary state */
 
 int
-heisenstate_get(struct lg_internal_heisenstate *h, int slot)
+heisenstate_get(struct grapheme_internal_heisenstate *h, int slot)
 {
         if (h == NULL || slot >= 64 || slot < 0 ||
             !(h->determined & (1 << slot))) {
_AT_@ -21,7 +21,7 @@ heisenstate_get(struct lg_internal_heisenstate *h, int slot)
 }
 
 int
-heisenstate_set(struct lg_internal_heisenstate *h, int slot, int state)
+heisenstate_set(struct grapheme_internal_heisenstate *h, int slot, int state)
 {
         if (h == NULL || slot >= 64 || slot < 0) {
                 /* no state given or slot out of range */
_AT_@ -54,7 +54,7 @@ cp_cmp(const void *a, const void *b)
 }
 
 int
-has_property(uint_least32_t cp, struct lg_internal_heisenstate *cpstate,
+has_property(uint_least32_t cp, struct grapheme_internal_heisenstate *cpstate,
              const struct range_list *proptable, int property)
 {
         int res;
diff --git a/src/util.h b/src/util.h
index 065097b..f646071 100644
--- a/src/util.h
+++ b/src/util.h
_AT_@ -19,10 +19,10 @@ struct range_list {
         size_t len;
 };
 
-int heisenstate_get(struct lg_internal_heisenstate *, int);
-int heisenstate_set(struct lg_internal_heisenstate *, int, int);
+int heisenstate_get(struct grapheme_internal_heisenstate *, int);
+int heisenstate_set(struct grapheme_internal_heisenstate *, int, int);
 
-int has_property(uint_least32_t, struct lg_internal_heisenstate *,
+int has_property(uint_least32_t, struct grapheme_internal_heisenstate *,
                  const struct range_list *, int);
 
 #endif /* UTIL_H */
diff --git a/test/character-performance.c b/test/character-performance.c
index 5d25e82..ecf5a7f 100644
--- a/test/character-performance.c
+++ b/test/character-performance.c
_AT_@ -17,7 +17,7 @@ main(int argc, char *argv[])
         struct timespec start, end;
         size_t i, j, bufsiz, off;
         uint32_t *buf;
- LG_SEGMENTATION_STATE state;
+ GRAPHEME_SEGMENTATION_STATE state;
         double cp_per_sec;
 
         (void)argc;
_AT_@ -45,7 +45,7 @@ main(int argc, char *argv[])
         for (i = 0; i < NUM_ITERATIONS; i++) {
                 memset(&state, 0, sizeof(state));
                 for (j = 0; j < bufsiz - 1; j++) {
- (void)lg_character_isbreak(buf[j], buf[j+1], &state);
+ (void)grapheme_character_isbreak(buf[j], buf[j+1], &state);
                 }
                 if (i % (NUM_ITERATIONS / 10) == 0) {
                         printf(".");
diff --git a/test/character.c b/test/character.c
index f7f3ce8..d7a4d27 100644
--- a/test/character.c
+++ b/test/character.c
_AT_@ -11,7 +11,7 @@
 int
 main(int argc, char *argv[])
 {
- LG_SEGMENTATION_STATE state;
+ GRAPHEME_SEGMENTATION_STATE state;
         size_t i, j, k, len, failed;
 
         (void)argc;
_AT_@ -21,9 +21,9 @@ main(int argc, char *argv[])
                 memset(&state, 0, sizeof(state));
                 for (j = 0, k = 0, len = 1; j < character_test[i].cplen; j++) {
                         if ((j + 1) == character_test[i].cplen ||
- lg_character_isbreak(character_test[i].cp[j],
- character_test[i].cp[j + 1],
- &state)) {
+ grapheme_character_isbreak(character_test[i].cp[j],
+ character_test[i].cp[j + 1],
+ &state)) {
                                 /* check if our resulting length matches */
                                 if (k == character_test[i].lenlen ||
                                     len != character_test[i].len[k++]) {
diff --git a/test/utf8-decode.c b/test/utf8-decode.c
index d98314c..537694b 100644
--- a/test/utf8-decode.c
+++ b/test/utf8-decode.c
_AT_@ -21,7 +21,7 @@ static const struct {
                 .arr = NULL,
                 .len = 0,
                 .exp_len = 0,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid lead byte
_AT_@ -31,7 +31,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xFD },
                 .len = 1,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* valid 1-byte sequence
_AT_@ -61,7 +61,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xC3 },
                 .len = 1,
                 .exp_len = 2,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 2-byte sequence (second byte malformed)
_AT_@ -71,7 +71,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xC3, 0xFF },
                 .len = 2,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 2-byte sequence (overlong encoded)
_AT_@ -81,7 +81,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xC1, 0xBF },
                 .len = 2,
                 .exp_len = 2,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* valid 3-byte sequence
_AT_@ -101,7 +101,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xE0 },
                 .len = 1,
                 .exp_len = 3,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 3-byte sequence (second byte malformed)
_AT_@ -111,7 +111,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
                 .len = 3,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 3-byte sequence (short string, second byte malformed)
_AT_@ -121,7 +121,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xE0, 0x7F },
                 .len = 2,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 3-byte sequence (third byte missing)
_AT_@ -131,7 +131,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xE0, 0xBF },
                 .len = 2,
                 .exp_len = 3,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 3-byte sequence (third byte malformed)
_AT_@ -141,7 +141,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
                 .len = 3,
                 .exp_len = 2,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 3-byte sequence (overlong encoded)
_AT_@ -151,7 +151,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
                 .len = 3,
                 .exp_len = 3,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 3-byte sequence (UTF-16 surrogate half)
_AT_@ -161,7 +161,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
                 .len = 3,
                 .exp_len = 3,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* valid 4-byte sequence
_AT_@ -181,7 +181,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3 },
                 .len = 1,
                 .exp_len = 4,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (second byte malformed)
_AT_@ -191,7 +191,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
                 .len = 4,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (short string 1, second byte malformed)
_AT_@ -201,7 +201,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0x7F },
                 .len = 2,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (short string 2, second byte malformed)
_AT_@ -211,7 +211,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
                 .len = 3,
                 .exp_len = 1,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
 
         {
_AT_@ -222,7 +222,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0xBF },
                 .len = 2,
                 .exp_len = 4,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (third byte malformed)
_AT_@ -232,7 +232,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
                 .len = 4,
                 .exp_len = 2,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (short string, third byte malformed)
_AT_@ -242,7 +242,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
                 .len = 3,
                 .exp_len = 2,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (fourth byte missing)
_AT_@ -252,7 +252,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
                 .len = 3,
                 .exp_len = 4,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (fourth byte malformed)
_AT_@ -262,7 +262,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
                 .len = 4,
                 .exp_len = 3,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (overlong encoded)
_AT_@ -272,7 +272,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
                 .len = 4,
                 .exp_len = 4,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
         {
                 /* invalid 4-byte sequence (UTF-16-unrepresentable)
_AT_@ -282,7 +282,7 @@ static const struct {
                 .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
                 .len = 4,
                 .exp_len = 4,
- .exp_cp = LG_INVALID_CODE_POINT,
+ .exp_cp = GRAPHEME_INVALID_CODE_POINT,
         },
 };
 
_AT_@ -298,8 +298,8 @@ main(int argc, char *argv[])
                 size_t len;
                 uint_least32_t cp;
 
- len = lg_utf8_decode(dec_test[i].arr,
- dec_test[i].len, &cp);
+ len = grapheme_utf8_decode(dec_test[i].arr,
+ dec_test[i].len, &cp);
 
                 if (len != dec_test[i].exp_len ||
                     cp != dec_test[i].exp_cp) {
diff --git a/test/utf8-encode.c b/test/utf8-encode.c
index 9ebaccf..dc9090b 100644
--- a/test/utf8-encode.c
+++ b/test/utf8-encode.c
_AT_@ -62,7 +62,7 @@ main(int argc, char *argv[])
                 char arr[4];
                 size_t len;
 
- len = lg_utf8_encode(enc_test[i].cp, arr, LEN(arr));
+ len = grapheme_utf8_encode(enc_test[i].cp, arr, LEN(arr));
 
                 if (len != enc_test[i].exp_len ||
                     memcmp(arr, enc_test[i].exp_arr, len)) {
Received on Sat Dec 18 2021 - 12:52:01 CET

This archive was generated by hypermail 2.3.0 : Sat Dec 18 2021 - 13:00:33 CET