commit 91d7e5af278c6e4231a8330a2ec6e54440b01465
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Mon Oct 12 12:32:27 2020 +0200
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Mon Oct 12 12:36:10 2020 +0200
Rename grapheme_len() to grapheme_bytelen() and refactor manual
I wasn't happy with the previous name, because it can be a bit confusing,
given there are functions like strlen() which count the number of bytes
in a string and one might assume that grapheme_len() counts the number
of grapheme clusters in a string.
Calling it grapheme_bytelen() clears this confusion up, as one can then
tell that it's about the number of bytes in a grapheme cluster and not
the number of grapheme clusters.
The manual was refactored inspired by the high standards set by the
OpenBSD manuals.
Signed-off-by: Laslo Hunhold <dev_AT_frign.de>
diff --git a/Makefile b/Makefile
index 33c163f..c92c109 100644
--- a/Makefile
+++ b/Makefile
_AT__AT_ -12,7 +12,7 @@ GBT_URL =
https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakTest.
GBP = data/gbp.txt
EMO = data/emo.txt
GBT = data/gbt.txt
-MAN3 = man/grapheme_len.3
+MAN3 = man/grapheme_bytelen.3
MAN7 = man/libgrapheme.7
all: libgrapheme.a libgrapheme.so $(BIN)
diff --git a/grapheme.h b/grapheme.h
index 9c5375f..e0c5d02 100644
--- a/grapheme.h
+++ b/grapheme.h
_AT_@ -8,10 +8,9 @@
#define GRAPHEME_CP_INVALID UINT32_C(0xFFFD)
int grapheme_boundary(uint32_t, uint32_t, int *);
+size_t grapheme_bytelen(const char *);
size_t grapheme_cp_decode(uint32_t *, const uint8_t *, size_t);
size_t grapheme_cp_encode(uint32_t, uint8_t *, size_t);
-size_t grapheme_len(const char *);
-
#endif /* GRAPHEME_H */
diff --git a/man/grapheme_len.3 b/man/grapheme_bytelen.3
similarity index 50%
rename from man/grapheme_len.3
rename to man/grapheme_bytelen.3
index 73b500e..7780138 100644
--- a/man/grapheme_len.3
+++ b/man/grapheme_bytelen.3
_AT_@ -1,17 +1,31 @@
.Dd 2020-03-26
-.Dt GRAPHEME_LEN 3
+.Dt GRAPHEME_BYTELEN 3
.Os suckless.org
.Sh NAME
-.Nm grapheme_len
-.Nd determine grapheme cluster length
+.Nm grapheme_bytelen
+.Nd compute grapheme cluster byte-length
.Sh SYNOPSIS
.In grapheme.h
.Ft size_t
-.Fn grapheme_len "const char *"
+.Fn grapheme_bytelen "const char *str"
.Sh DESCRIPTION
-.Nm
-returns the length (in bytes) of the grapheme cluster beginning at
-the provided char-address.
+The
+.Fn grapheme_bytelen
+function computes the length (in bytes) of the grapheme cluster
+(see
+.Xr libgrapheme 7 )
+beginning at the NUL-terminated string
+.Va str .
+.Sh RETURN VALUES
+The
+.Fn grapheme_bytelen
+function returns the length (in bytes) of the grapheme cluster beginning
+at
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
.Sh EXAMPLES
.Bd -literal
/* cc (-static) -o example example.c -lgrapheme */
_AT_@ -26,7 +40,7 @@ main(void)
/* print each grapheme cluster with accompanying byte-length */
while (*s != '\\0') {
- len = grapheme_len(s);
+ len = grapheme_bytelen(s);
printf("%2zu bytes | %.*s\\n", len, (int)len, s, len);
s += len;
}
_AT_@ -36,5 +50,8 @@ main(void)
.Ed
.Sh SEE ALSO
.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_bytelen
+is compliant with the Unicode 13.0.0 specification.
.Sh AUTHORS
.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/libgrapheme.7 b/man/libgrapheme.7
index 40e7b1d..c74e8e6 100644
--- a/man/libgrapheme.7
+++ b/man/libgrapheme.7
_AT_@ -16,9 +16,9 @@ see
.Sx MOTIVATION )
using the Unicode grapheme cluster breaking algorithm (UAX #29).
.Pp
-You can either count the byte-length of the grapheme cluster at the
-beginning of an UTF-8-encoded string (see
-.Xr grapheme_len 3 )
+You can either count the length (in bytes) of the grapheme cluster at
+the beginning of an UTF-8-encoded string (see
+.Xr grapheme_bytelen 3 )
or determine if a grapheme cluster breaks between two Unicode code
points (see
.Xr grapheme_boundary 3 ) ,
_AT_@ -28,9 +28,9 @@ and
.Xr grapheme_cp_encode 3 ) .
.Sh SEE ALSO
.Xr grapheme_boundary 3 ,
+.Xr grapheme_bytelen 3
.Xr grapheme_cp_decode 3 ,
.Xr grapheme_cp_encode 3 ,
-.Xr grapheme_len 3
.Sh STANDARDS
.Nm
is compliant with the Unicode 13.0.0 specification.
diff --git a/src/grapheme.c b/src/grapheme.c
index 2115836..8577038 100644
--- a/src/grapheme.c
+++ b/src/grapheme.c
_AT_@ -5,7 +5,7 @@
#include "../grapheme.h"
size_t
-grapheme_len(const char *str)
+grapheme_bytelen(const char *str)
{
uint32_t cp0, cp1;
size_t ret, len = 0;
Received on Mon Oct 12 2020 - 12:35:59 CEST