[hackers] [libgrapheme] Merge next_break- and next_break_utf8-templates || Laslo Hunhold
commit 6c5d42d5dfe3ed5b24a26c70bd6a86d98bb2b65e
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sun Aug 28 16:29:54 2022 +0200
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sun Aug 28 16:29:54 2022 +0200
Merge next_break- and next_break_utf8-templates
Signed-off-by: Laslo Hunhold <dev_AT_frign.de>
diff --git a/Makefile b/Makefile
index 501b372..a396309 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -62,7 +62,6 @@ MAN_DATE = 2022-08-28
MAN_TEMPLATE =\
man/template/next_break.sh\
- man/template/next_break_utf8.sh\
man/template/to_case.sh\
MAN3 =\
_AT_@ -199,10 +198,10 @@ man/grapheme_next_character_break.3: man/grapheme_next_character_break.sh man/te
man/grapheme_next_line_break.3: man/grapheme_next_line_break.sh man/template/next_break.sh config.mk
man/grapheme_next_sentence_break.3: man/grapheme_next_sentence_break.sh man/template/next_break.sh config.mk
man/grapheme_next_word_break.3: man/grapheme_next_word_break.sh man/template/next_break.sh config.mk
-man/grapheme_next_character_break_utf8.3: man/grapheme_next_character_break_utf8.sh man/template/next_break_utf8.sh config.mk
-man/grapheme_next_line_break_utf8.3: man/grapheme_next_line_break_utf8.sh man/template/next_break_utf8.sh config.mk
-man/grapheme_next_sentence_break_utf8.3: man/grapheme_next_sentence_break_utf8.sh man/template/next_break_utf8.sh config.mk
-man/grapheme_next_word_break_utf8.3: man/grapheme_next_word_break_utf8.sh man/template/next_break_utf8.sh config.mk
+man/grapheme_next_character_break_utf8.3: man/grapheme_next_character_break_utf8.sh man/template/next_break.sh config.mk
+man/grapheme_next_line_break_utf8.3: man/grapheme_next_line_break_utf8.sh man/template/next_break.sh config.mk
+man/grapheme_next_sentence_break_utf8.3: man/grapheme_next_sentence_break_utf8.sh man/template/next_break.sh config.mk
+man/grapheme_next_word_break_utf8.3: man/grapheme_next_word_break_utf8.sh man/template/next_break.sh config.mk
man/grapheme_to_uppercase.3: man/grapheme_to_uppercase.sh man/template/to_case.sh config.mk
man/grapheme_to_lowercase.3: man/grapheme_to_lowercase.sh man/template/to_case.sh config.mk
man/grapheme_to_titlecase.3: man/grapheme_to_titlecase.sh man/template/to_case.sh config.mk
diff --git a/man/grapheme_next_character_break.sh b/man/grapheme_next_character_break.sh
index 0d91dcc..c9d2f46 100644
--- a/man/grapheme_next_character_break.sh
+++ b/man/grapheme_next_character_break.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="codepoint" \
TYPE="character" \
REALTYPE="grapheme cluster" \
$SH man/template/next_break.sh
diff --git a/man/grapheme_next_character_break_utf8.sh b/man/grapheme_next_character_break_utf8.sh
index 5a2ecb2..e1590fe 100644
--- a/man/grapheme_next_character_break_utf8.sh
+++ b/man/grapheme_next_character_break_utf8.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="utf8" \
TYPE="character" \
REALTYPE="grapheme cluster" \
- $SH man/template/next_break_utf8.sh
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_line_break.sh b/man/grapheme_next_line_break.sh
index fcd84e1..b578577 100644
--- a/man/grapheme_next_line_break.sh
+++ b/man/grapheme_next_line_break.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="codepoint" \
TYPE="line" \
REALTYPE="possible line" \
$SH man/template/next_break.sh
diff --git a/man/grapheme_next_line_break_utf8.sh b/man/grapheme_next_line_break_utf8.sh
index c666f96..bcf8505 100644
--- a/man/grapheme_next_line_break_utf8.sh
+++ b/man/grapheme_next_line_break_utf8.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="utf8" \
TYPE="line" \
REALTYPE="possible line" \
- $SH man/template/next_break_utf8.sh
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_sentence_break.sh b/man/grapheme_next_sentence_break.sh
index 2325a94..9869c7f 100644
--- a/man/grapheme_next_sentence_break.sh
+++ b/man/grapheme_next_sentence_break.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="codepoint" \
TYPE="sentence" \
REALTYPE="sentence" \
$SH man/template/next_break.sh
diff --git a/man/grapheme_next_sentence_break_utf8.sh b/man/grapheme_next_sentence_break_utf8.sh
index 1d8b3e7..4a9bf9e 100644
--- a/man/grapheme_next_sentence_break_utf8.sh
+++ b/man/grapheme_next_sentence_break_utf8.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="utf8" \
TYPE="sentence" \
REALTYPE="sentence" \
- $SH man/template/next_break_utf8.sh
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_word_break.sh b/man/grapheme_next_word_break.sh
index d1ea725..2b0abd7 100644
--- a/man/grapheme_next_word_break.sh
+++ b/man/grapheme_next_word_break.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="codepoint" \
TYPE="word" \
REALTYPE="word" \
$SH man/template/next_break.sh
diff --git a/man/grapheme_next_word_break_utf8.sh b/man/grapheme_next_word_break_utf8.sh
index 15643bf..83b1034 100644
--- a/man/grapheme_next_word_break_utf8.sh
+++ b/man/grapheme_next_word_break_utf8.sh
_AT_@ -1,3 +1,4 @@
+ENCODING="utf8" \
TYPE="word" \
REALTYPE="word" \
- $SH man/template/next_break_utf8.sh
+ $SH man/template/next_break.sh
diff --git a/man/template/next_break.sh b/man/template/next_break.sh
index 588e49b..e268ad1 100644
--- a/man/template/next_break.sh
+++ b/man/template/next_break.sh
_AT_@ -1,21 +1,29 @@
+if [ "$ENCODING" = "utf8" ]; then
+ UNIT="byte"
+ SUFFIX="_utf8"
+else
+ UNIT="codepoint"
+ SUFFIX=""
+fi
+
cat << EOF
.Dd $MAN_DATE
-.Dt GRAPHEME_NEXT_$(printf $TYPE | tr [:lower:] [:upper:])_BREAK 3
+.Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
.Os suckless.org
.Sh NAME
-.Nm grapheme_next_$(printf $TYPE)_break
-.Nd determine codepoint-offset to next $REALTYPE break
+.Nm grapheme_next_$(printf $TYPE)_break$SUFFIX
+.Nd determine $UNIT-offset to next $REALTYPE break
.Sh SYNOPSIS
.In grapheme.h
.Ft size_t
-.Fn grapheme_next_$(printf $TYPE)_break "const uint_least32_t *str" "size_t len"
+.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len"
.Sh DESCRIPTION
The
-.Fn grapheme_next_$(printf $TYPE)_break
-function computes the offset (in codepoints) to the next $REALTYPE
+.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX
+function computes the offset (in $(printf $UNIT)s) to the next $REALTYPE
break (see
.Xr libgrapheme 7 )
-in the codepoint array
+in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi)
.Va str
of length
.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a $REALTYPE begins at
_AT_@ -31,25 +39,71 @@ is set to
is interpreted to be NUL-terminated and processing stops when a
NUL-byte is encountered.
.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_$(printf $TYPE)_break_utf8 3
+For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input
+data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi)
+.Xr grapheme_next_$(printf $TYPE)_break$(if [ "$ENCODING" != "utf8" ]; then printf "_utf8"; fi) 3
can be used instead.
.Sh RETURN VALUES
The
-.Fn grapheme_next_$(printf $TYPE)_break
-function returns the offset (in codepoints) to the next $REALTYPE
+.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX
+function returns the offset (in $(printf $UNIT)s) to the next $REALTYPE
break in
.Va str
or 0 if
.Va str
is
.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_$(printf $TYPE)_break_utf8 3 ,
+EOF
+
+if [ "$ENCODING" = "utf8" ]; then
+cat << EOF
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
+ "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
+ "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
+ "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\\\"%s\\\\"\\\\n", s);
+
+ /* print each $REALTYPE with byte-length */
+ printf("$(printf "$REALTYPE")s in NUL-delimited input:\\\\n");
+ for (off = 0; s[off] != '\\\\0'; off += ret) {
+ ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("$(printf "$REALTYPE")s in input delimited to %zu bytes:\\\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+EOF
+fi
+
+cat << EOF
+.Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
+.Xr grapheme_next_$(printf $TYPE)_break$(if [ "$ENCODING" != "utf8" ]; then printf "_utf8"; fi) 3 ,
.Xr libgrapheme 7
.Sh STANDARDS
-.Fn grapheme_next_$(printf $TYPE)_break
+.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX
is compliant with the Unicode $UNICODE_VERSION specification.
.Sh AUTHORS
.An Laslo Hunhold Aq Mt dev_AT_frign.de
diff --git a/man/template/next_break_utf8.sh b/man/template/next_break_utf8.sh
deleted file mode 100644
index 046dd45..0000000
--- a/man/template/next_break_utf8.sh
+++ /dev/null
_AT_@ -1,96 +0,0 @@
-cat << EOF
-.Dd $MAN_DATE
-.Dt GRAPHEME_NEXT_$(printf $TYPE | tr [:lower:] [:upper:])_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_$(printf $TYPE)_break_utf8
-.Nd determine byte-offset to next $REALTYPE break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_$(printf $TYPE)_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_$(printf $TYPE)_break_utf8
-function computes the offset (in bytes) to the next $REALTYPE
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a $REALTYPE begins at
-.Va str
-this offset is equal to the length of said $REALTYPE."; fi)
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data$(if [ "$TYPE" = "character" ];
-then printf "\n.Xr grapheme_is_character_break 3
-and"; fi)
-.Xr grapheme_next_$(printf $TYPE)_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_$(printf $TYPE)_break_utf8
-function returns the offset (in bytes) to the next $REALTYPE
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
- "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
- "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
- "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\\\"%s\\\\"\\\\n", s);
-
- /* print each $REALTYPE with byte-length */
- printf("$(printf "$REALTYPE")s in NUL-delimited input:\\\\n");
- for (off = 0; s[off] != '\\\\0'; off += ret) {
- ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("$(printf "$REALTYPE")s in input delimited to %zu bytes:\\\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO$(if [ "$TYPE" = "character" ];
-then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
-.Xr grapheme_next_$(printf $TYPE)_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_$(printf $TYPE)_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev_AT_frign.de
-EOF
Received on Sun Aug 28 2022 - 16:30:49 CEST
This archive was generated by hypermail 2.3.0
: Sun Aug 28 2022 - 16:36:34 CEST