[hackers] [libgrapheme] Match function parameters in code and documentation || Laslo Hunhold
commit 950adad158c79da041c85cbb3773208988ea7477
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sun Dec 19 01:22:58 2021 +0100
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sun Dec 19 01:22:58 2021 +0100
Match function parameters in code and documentation
This always helps with readability if you want to check upon the
implementation.
Signed-off-by: Laslo Hunhold <dev_AT_frign.de>
diff --git a/grapheme.h b/grapheme.h
index c2def7c..1f08f55 100644
--- a/grapheme.h
+++ b/grapheme.h
_AT_@ -12,8 +12,8 @@ struct grapheme_internal_heisenstate {
};
typedef struct grapheme_internal_segmentation_state {
- struct grapheme_internal_heisenstate a;
- struct grapheme_internal_heisenstate b;
+ struct grapheme_internal_heisenstate cp0;
+ struct grapheme_internal_heisenstate cp1;
uint_least16_t flags;
} GRAPHEME_STATE;
diff --git a/man/grapheme_encode_utf8.3 b/man/grapheme_encode_utf8.3
index 42dbbe5..5e51ac2 100644
--- a/man/grapheme_encode_utf8.3
+++ b/man/grapheme_encode_utf8.3
_AT_@ -7,7 +7,7 @@
.Sh SYNOPSIS
.In grapheme.h
.Ft size_t
-.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *" "size_t"
+.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *str" "size_t len"
.Sh DESCRIPTION
The
.Fn grapheme_encode_utf8
diff --git a/src/character.c b/src/character.c
index 2215543..ae8f3df 100644
--- a/src/character.c
+++ b/src/character.c
_AT_@ -14,7 +14,7 @@ enum {
};
bool
-grapheme_is_character_break(uint_least32_t a, uint_least32_t b, GRAPHEME_STATE *state)
+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, GRAPHEME_STATE *state)
{
struct grapheme_internal_heisenstate *p[2] = { 0 };
uint_least16_t flags = 0;
_AT_@ -22,14 +22,14 @@ grapheme_is_character_break(uint_least32_t a, uint_least32_t b, GRAPHEME_STATE *
/* set state depending on state pointer */
if (state != NULL) {
- p[0] = &(state->a);
- p[1] = &(state->b);
+ p[0] = &(state->cp0);
+ p[1] = &(state->cp1);
flags = state->flags;
}
/* skip printable ASCII */
- if ((a >= 0x20 && a <= 0x7E) &&
- (b >= 0x20 && b <= 0x7E)) {
+ if ((cp0 >= 0x20 && cp0 <= 0x7E) &&
+ (cp1 >= 0x20 && cp1 <= 0x7E)) {
goto hasbreak;
}
_AT_@ -41,8 +41,8 @@ grapheme_is_character_break(uint_least32_t a, uint_least32_t b, GRAPHEME_STATE *
/*
* update flags, if state-pointer given
*/
- if (has_property(b, p[1], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR)) {
- if (has_property(a, p[0], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR)) {
+ if (has_property(cp1, p[1], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR)) {
+ if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR)) {
/* one more RI is on the left side of the seam, flip state */
flags ^= CHARACTER_FLAG_RI_ODD;
} else {
_AT_@ -52,22 +52,22 @@ grapheme_is_character_break(uint_least32_t a, uint_least32_t b, GRAPHEME_STATE *
}
}
if (!(flags & CHARACTER_FLAG_EMOJI) &&
- ((has_property(a, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
- (has_property(a, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_EXTEND)))) {
+ ((has_property(cp0, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
+ (has_property(cp0, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTEND)))) {
flags |= CHARACTER_FLAG_EMOJI;
} else if ((flags & CHARACTER_FLAG_EMOJI) &&
- ((has_property(a, p[0], character_prop, CHARACTER_PROP_ZWJ) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) ||
- (has_property(a, p[0], character_prop, CHARACTER_PROP_EXTEND) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_EXTEND)) ||
- (has_property(a, p[0], character_prop, CHARACTER_PROP_EXTEND) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
- (has_property(a, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
- (has_property(a, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_EXTEND)))) {
+ ((has_property(cp0, p[0], character_prop, CHARACTER_PROP_ZWJ) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) ||
+ (has_property(cp0, p[0], character_prop, CHARACTER_PROP_EXTEND) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTEND)) ||
+ (has_property(cp0, p[0], character_prop, CHARACTER_PROP_EXTEND) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
+ (has_property(cp0, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
+ (has_property(cp0, p[0], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTEND)))) {
/* CHARACTER_FLAG_EMOJI remains */
} else {
flags &= ~CHARACTER_FLAG_EMOJI;
_AT_@ -85,76 +85,76 @@ grapheme_is_character_break(uint_least32_t a, uint_least32_t b, GRAPHEME_STATE *
/* skip GB1 and GB2, as they are never satisfied here */
/* GB3 */
- if (has_property(a, p[0], character_prop, CHARACTER_PROP_CR) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_LF)) {
+ if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_CR) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_LF)) {
goto nobreak;
}
/* GB4 */
- if (has_property(a, p[0], character_prop, CHARACTER_PROP_CONTROL) ||
- has_property(a, p[0], character_prop, CHARACTER_PROP_CR) ||
- has_property(a, p[0], character_prop, CHARACTER_PROP_LF)) {
+ if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_CONTROL) ||
+ has_property(cp0, p[0], character_prop, CHARACTER_PROP_CR) ||
+ has_property(cp0, p[0], character_prop, CHARACTER_PROP_LF)) {
goto hasbreak;
}
/* GB5 */
- if (has_property(b, p[1], character_prop, CHARACTER_PROP_CONTROL) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_CR) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_LF)) {
+ if (has_property(cp1, p[1], character_prop, CHARACTER_PROP_CONTROL) ||
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_CR) ||
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_LF)) {
goto hasbreak;
}
/* GB6 */
- if (has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_L) &&
- (has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_L) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_LV) ||
+ if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_L) &&
+ (has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_L) ||
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_LV) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_LVT))) {
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_LVT))) {
goto nobreak;
}
/* GB7 */
- if ((has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_LV) ||
- has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_V)) &&
- (has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_T))) {
+ if ((has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_LV) ||
+ has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_V)) &&
+ (has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_T))) {
goto nobreak;
}
/* GB8 */
- if ((has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_LVT) ||
- has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_T)) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_T)) {
+ if ((has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_LVT) ||
+ has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_T)) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_T)) {
goto nobreak;
}
/* GB9 */
- if (has_property(b, p[1], character_prop, CHARACTER_PROP_EXTEND) ||
- has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) {
+ if (has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTEND) ||
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_ZWJ)) {
goto nobreak;
}
/* GB9a */
- if (has_property(b, p[1], character_prop, CHARACTER_PROP_SPACINGMARK)) {
+ if (has_property(cp1, p[1], character_prop, CHARACTER_PROP_SPACINGMARK)) {
goto nobreak;
}
/* GB9b */
- if (has_property(a, p[0], character_prop, CHARACTER_PROP_PREPEND)) {
+ if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_PREPEND)) {
goto nobreak;
}
/* GB11 */
if ((flags & CHARACTER_FLAG_EMOJI) &&
- has_property(a, p[0], character_prop, CHARACTER_PROP_ZWJ) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) {
+ has_property(cp0, p[0], character_prop, CHARACTER_PROP_ZWJ) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) {
goto nobreak;
}
/* GB12/GB13 */
- if (has_property(a, p[0], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR) &&
- has_property(b, p[1], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR) &&
+ if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR) &&
+ has_property(cp1, p[1], character_prop, CHARACTER_PROP_REGIONAL_INDICATOR) &&
(flags & CHARACTER_FLAG_RI_ODD)) {
goto nobreak;
}
_AT_@ -166,8 +166,8 @@ nobreak:
hasbreak:
if (state != NULL) {
/* move b-state to a-state, discard b-state */
- memcpy(&(state->a), &(state->b), sizeof(state->a));
- memset(&(state->b), 0, sizeof(state->b));
+ memcpy(&(state->cp0), &(state->cp1), sizeof(state->cp0));
+ memset(&(state->cp1), 0, sizeof(state->cp1));
/* reset flags */
if (isbreak) {
diff --git a/src/utf8.c b/src/utf8.c
index fe7775c..e01fa37 100644
--- a/src/utf8.c
+++ b/src/utf8.c
_AT_@ -48,11 +48,11 @@ static const struct {
};
size_t
-grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
+grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp)
{
size_t off, i;
- if (s == NULL || n == 0) {
+ if (str == NULL || len == 0) {
/* a sequence must be at least 1 byte long */
*cp = GRAPHEME_INVALID_CODEPOINT;
return 0;
_AT_@ -60,14 +60,14 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
/* identify sequence type with the first byte */
for (off = 0; off < LEN(lut); off++) {
- if (BETWEEN(((const unsigned char *)s)[0], lut[off].lower,
+ if (BETWEEN(((const unsigned char *)str)[0], lut[off].lower,
lut[off].upper)) {
/*
* first byte is within the bounds; fill
* p with the the first bits contained in
* the first byte (by subtracting the high bits)
*/
- *cp = ((const unsigned char *)s)[0] - lut[off].lower;
+ *cp = ((const unsigned char *)str)[0] - lut[off].lower;
break;
}
}
_AT_@ -82,7 +82,7 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
*cp = GRAPHEME_INVALID_CODEPOINT;
return 1;
}
- if (1 + off > n) {
+ if (1 + off > len) {
/*
* input is not long enough, set cp as invalid
*/
_AT_@ -93,8 +93,8 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
* else in case we have a "rogue" case where e.g. such a
* sequence starter occurs right before a NUL-byte.
*/
- for (i = 0; 1 + i < n; i++) {
- if(!BETWEEN(((const unsigned char *)s)[1 + i],
+ for (i = 0; 1 + i < len; i++) {
+ if(!BETWEEN(((const unsigned char *)str)[1 + i],
0x80, 0xBF)) {
break;
}
_AT_@ -106,7 +106,7 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
* Otherwise return the number of bytes we actually
* expected, which is larger than n.
*/
- return ((1 + i) < n) ? (1 + i) : (1 + off);
+ return ((1 + i) < len) ? (1 + i) : (1 + off);
}
/*
_AT_@ -114,7 +114,7 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
* (i.e. between 0x80 (10000000) and 0xBF (10111111))
*/
for (i = 1; i <= off; i++) {
- if(!BETWEEN(((const unsigned char *)s)[i], 0x80, 0xBF)) {
+ if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
/*
* byte does not match format; return
* number of bytes processed excluding the
_AT_@ -132,7 +132,7 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
* shift codepoint by 6 bits and add the 6 stored bits
* in s[i] to it using the bitmask 0x3F (00111111)
*/
- *cp = (*cp << 6) | (((const unsigned char *)s)[i] & 0x3F);
+ *cp = (*cp << 6) | (((const unsigned char *)str)[i] & 0x3F);
}
if (*cp < lut[off].mincp ||
_AT_@ -151,7 +151,7 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
}
size_t
-grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
+grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len)
{
size_t off, i;
_AT_@ -171,7 +171,7 @@ grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
break;
}
}
- if (1 + off > n || s == NULL || n == 0) {
+ if (1 + off > len || str == NULL || len == 0) {
/*
* specified buffer is too small to store sequence or
* the caller just wanted to know how many bytes the
_AT_@ -191,7 +191,7 @@ grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
* We do not overwrite the mask because we guaranteed earlier
* that there are no bits higher than the mask allows.
*/
- ((unsigned char *)s)[0] = lut[off].lower | (uint8_t)(cp >> (6 * off));
+ ((unsigned char *)str)[0] = lut[off].lower | (uint8_t)(cp >> (6 * off));
for (i = 1; i <= off; i++) {
/*
_AT_@ -200,8 +200,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
* extract from the properly-shifted value using the
* mask 00111111 (0x3F)
*/
- ((unsigned char *)s)[i] = 0x80 |
- ((cp >> (6 * (off - i))) & 0x3F);
+ ((unsigned char *)str)[i] = 0x80 |
+ ((cp >> (6 * (off - i))) & 0x3F);
}
return 1 + off;
Received on Sun Dec 19 2021 - 01:24:02 CET
This archive was generated by hypermail 2.3.0
: Sun Dec 19 2021 - 01:24:36 CET