[hackers] [libgrapheme] Add manuals for all implemented segmentation functions || Laslo Hunhold
commit 25808f024f907a8cac1227989fca883c7b0bc271
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Fri Aug 26 20:53:08 2022 +0200
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Fri Aug 26 20:57:13 2022 +0200
Add manuals for all implemented segmentation functions
Signed-off-by: Laslo Hunhold <dev_AT_frign.de>
diff --git a/Makefile b/Makefile
index 209ce36..30c72d2 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -60,6 +60,14 @@ MAN3 =\
man/grapheme_decode_utf8.3\
man/grapheme_encode_utf8.3\
man/grapheme_is_character_break.3\
+ man/grapheme_next_character_break.3\
+ man/grapheme_next_line_break.3\
+ man/grapheme_next_sentence_break.3\
+ man/grapheme_next_word_break.3\
+ man/grapheme_next_character_break_utf8.3\
+ man/grapheme_next_line_break_utf8.3\
+ man/grapheme_next_sentence_break_utf8.3\
+ man/grapheme_next_word_break_utf8.3\
MAN7 = man/libgrapheme.7
diff --git a/man/grapheme_is_character_break.3 b/man/grapheme_is_character_break.3
index f50eee3..c847737 100644
--- a/man/grapheme_is_character_break.3
+++ b/man/grapheme_is_character_break.3
_AT_@ -1,4 +1,4 @@
-.Dd 2021-12-22
+.Dd 2022-08-26
.Dt GRAPHEME_IS_CHARACTER_BREAK 3
.Os suckless.org
.Sh NAME
_AT_@ -72,6 +72,7 @@ main(void)
.Ed
.Sh SEE ALSO
.Xr grapheme_next_character_break 3 ,
+.Xr grapheme_next_character_break_utf8 3 ,
.Xr libgrapheme 7
.Sh STANDARDS
.Fn grapheme_is_character_break
diff --git a/man/grapheme_next_character_break.3 b/man/grapheme_next_character_break.3
new file mode 100644
index 0000000..cb17639
--- /dev/null
+++ b/man/grapheme_next_character_break.3
_AT_@ -0,0 +1,55 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_CHARACTER_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_character_break
+.Nd determine codepoint-offset to next grapheme cluster break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_character_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_character_break
+function computes the offset (in codepoints) to the next grapheme
+cluster break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+If a grapheme cluster begins at
+.Va str
+this offset is equal to the length of said grapheme cluster.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_character_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_character_break
+function returns the offset (in codepoints) to the next grapheme cluster
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_is_character_break 3 ,
+.Xr grapheme_next_character_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_character_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/grapheme_next_character_break_utf8.3 b/man/grapheme_next_character_break_utf8.3
index 9fe6356..fd8d9f7 100644
--- a/man/grapheme_next_character_break_utf8.3
+++ b/man/grapheme_next_character_break_utf8.3
_AT_@ -1,4 +1,4 @@
-.Dd 2021-12-22
+.Dd 2022-08-26
.Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
.Os suckless.org
.Sh NAME
_AT_@ -33,6 +33,8 @@ NUL-byte is encountered.
.Pp
For non-UTF-8 input data
.Xr grapheme_is_character_break 3
+and
+.Xr grapheme_next_character_break 3
can be used instead.
.Sh RETURN VALUES
The
_AT_@ -84,6 +86,7 @@ main(void)
.Ed
.Sh SEE ALSO
.Xr grapheme_is_character_break 3 ,
+.Xr grapheme_next_character_break 3 ,
.Xr libgrapheme 7
.Sh STANDARDS
.Fn grapheme_next_character_break_utf8
diff --git a/man/grapheme_next_line_break.3 b/man/grapheme_next_line_break.3
new file mode 100644
index 0000000..05d361d
--- /dev/null
+++ b/man/grapheme_next_line_break.3
_AT_@ -0,0 +1,51 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_LINE_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_line_break
+.Nd determine codepoint-offset to next grapheme cluster break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_line_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_line_break
+function computes the offset (in codepoints) to the next possible line
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_line_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_line_break
+function returns the offset (in codepoints) to the next possible line
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_next_line_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_line_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/grapheme_next_line_break_utf8.3 b/man/grapheme_next_line_break_utf8.3
new file mode 100644
index 0000000..692d13d
--- /dev/null
+++ b/man/grapheme_next_line_break_utf8.3
_AT_@ -0,0 +1,89 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_LINE_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_line_break_utf8
+.Nd determine byte-offset to next possible line break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_line_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_line_break_utf8
+function computes the offset (in bytes) to the next possible line
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_next_line_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_line_break_utf8
+function returns the offset (in bytes) to the next possible line
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_line_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_line_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_line_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/grapheme_next_sentence_break.3 b/man/grapheme_next_sentence_break.3
new file mode 100644
index 0000000..723cf80
--- /dev/null
+++ b/man/grapheme_next_sentence_break.3
_AT_@ -0,0 +1,54 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_SENTENCE_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_sentence_break
+.Nd determine codepoint-offset to next sentence break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_sentence_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_sentence_break
+function computes the offset (in codepoints) to the next sentence
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+If a sentence begins at
+.Va str
+this offset is equal to the length of said sentence.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_sentence_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_sentence_break
+function returns the offset (in codepoints) to the next sentence
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_next_sentence_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_sentence_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/grapheme_next_sentence_break_utf8.3 b/man/grapheme_next_sentence_break_utf8.3
new file mode 100644
index 0000000..858b9c0
--- /dev/null
+++ b/man/grapheme_next_sentence_break_utf8.3
_AT_@ -0,0 +1,92 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_SENTENCE_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_sentence_break_utf8
+.Nd determine byte-offset to next sentence break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_sentence_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_sentence_break_utf8
+function computes the offset (in bytes) to the next sentence
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+If a sentence begins at
+.Va str
+this offset is equal to the length of said sentence.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_next_sentence_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_sentence_break_utf8
+function returns the offset (in bytes) to the next sentence
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_sentence_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_sentence_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_sentence_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/grapheme_next_word_break.3 b/man/grapheme_next_word_break.3
new file mode 100644
index 0000000..c82c4fb
--- /dev/null
+++ b/man/grapheme_next_word_break.3
_AT_@ -0,0 +1,54 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_WORD_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_word_break
+.Nd determine codepoint-offset to next word break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_word_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_word_break
+function computes the offset (in codepoints) to the next word
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+If a word begins at
+.Va str
+this offset is equal to the length of said word.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_word_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_word_break
+function returns the offset (in codepoints) to the next word
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_next_word_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_word_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/grapheme_next_word_break_utf8.3 b/man/grapheme_next_word_break_utf8.3
new file mode 100644
index 0000000..7bc3287
--- /dev/null
+++ b/man/grapheme_next_word_break_utf8.3
_AT_@ -0,0 +1,92 @@
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_WORD_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_word_break_utf8
+.Nd determine byte-offset to next word break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_word_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_word_break_utf8
+function computes the offset (in bytes) to the next word
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+If a word begins at
+.Va str
+this offset is equal to the length of said word.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_next_word_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_word_break_utf8
+function returns the offset (in bytes) to the next word
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_word_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_word_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_word_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/libgrapheme.7 b/man/libgrapheme.7
index 5d96e49..acbe4f7 100644
--- a/man/libgrapheme.7
+++ b/man/libgrapheme.7
_AT_@ -1,4 +1,4 @@
-.Dd 2021-12-22
+.Dd 2022-08-26
.Dt LIBGRAPHEME 7
.Os suckless.org
.Sh NAME
_AT_@ -38,7 +38,14 @@ example illustrating the possible usage.
.Xr grapheme_decode_utf8 3 ,
.Xr grapheme_encode_utf8 3 ,
.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_character_break 3
+.Xr grapheme_next_character_break 3 ,
+.Xr grapheme_next_line_break 3 ,
+.Xr grapheme_next_sentence_break 3 ,
+.Xr grapheme_next_word_break 3 ,
+.Xr grapheme_next_character_break_utf8 3 ,
+.Xr grapheme_next_line_break_utf8 3 ,
+.Xr grapheme_next_sentence_break_utf8 3 ,
+.Xr grapheme_next_word_break_utf8 3
.Sh STANDARDS
.Nm
is compliant with the Unicode 14.0.0 specification.
Received on Fri Aug 26 2022 - 20:58:34 CEST
This archive was generated by hypermail 2.3.0
: Fri Aug 26 2022 - 21:00:36 CEST