[hackers] [libgrapheme] Improve parsing of hexadecimal strings || Laslo Hunhold

From: <git_AT_suckless.org>
Date: Sun, 12 Dec 2021 15:53:37 +0100 (CET)

commit 92a5a67ed17573d9ad7d0a85ae602cb17d154f82
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sun Dec 12 15:51:04 2021 +0100
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sun Dec 12 15:51:04 2021 +0100

    Improve parsing of hexadecimal strings
    
    Trying to get strtoul()-usage right is so frustrating that it's easier
    and simpler to just roll your own. While at it, modify it in such a
    way that we don't modify the constant str in range_parse().
    
    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/gen/util.c b/gen/util.c
index 2275538..9bd9985 100644
--- a/gen/util.c
+++ b/gen/util.c
_AT_@ -20,31 +20,42 @@ struct segment_test_payload
 };
 
 static int
-valid_hexstring(const char *str)
+hextocp(const char *str, size_t len, uint_least32_t *cp)
 {
- const char *p = str;
+ size_t i;
+ int off;
+ char relative;
 
- while ((*p >= '0' && *p <= '9') ||
- (*p >= 'a' && *p <= 'f') ||
- (*p >= 'A' && *p <= 'F')) {
- p++;
+ /* the maximum valid codepoint is 0x10FFFF */
+ if (len > 6) {
+ fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len, str);
+ return 1;
         }
 
- if (*p != '\0') {
- fprintf(stderr, "valid_hexstring: Invalid code point range '%s'\n", str);
- return 0;
- }
+ for (i = 0, *cp = 0; i < len; i++) {
+ if (str[i] >= '0' && str[i] <= '9') {
+ relative = '0';
+ off = 0;
+ } else if (str[i] >= 'a' && str[i] <= 'f') {
+ relative = 'a';
+ off = 10;
+ } else if (str[i] >= 'A' && str[i] <= 'F') {
+ relative = 'A';
+ off = 10;
+ } else {
+ fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n",
+ (int)len, str);
+ return 1;
+ }
 
- return 1;
-}
+ *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) *
+ (uint_least32_t)(str[i] - relative + off);
+ }
 
-static int
-cp_parse(const char *str, uint_least32_t *cp)
-{
- if (!valid_hexstring(str)) {
+ if (*cp > 0x10ffff) {
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str);
                 return 1;
         }
- *cp = strtol(str, NULL, 16);
 
         return 0;
 }
_AT_@ -56,19 +67,16 @@ range_parse(const char *str, struct range *range)
 
         if ((p = strstr(str, "..")) == NULL) {
                 /* input has the form "XXXXXX" */
- if (!valid_hexstring(str)) {
+ if (hextocp(str, strlen(str), &range->lower)) {
                         return 1;
                 }
- range->lower = range->upper = strtol(str, NULL, 16);
+ range->upper = range->lower;
         } else {
                 /* input has the form "XXXXXX..XXXXXX" */
- *p = '\0';
- p += 2;
- if (!valid_hexstring(str) || !valid_hexstring(p)) {
+ if (hextocp(str, (size_t)(p - str), &range->lower) ||
+ hextocp(p + 2, strlen(p + 2), &range->upper)) {
                         return 1;
                 }
- range->lower = strtol(str, NULL, 16);
- range->upper = strtol(p, NULL, 16);
         }
 
         return 0;
_AT_@ -308,7 +316,7 @@ segment_test_callback(char *fname, char **field, size_t nfields, char *comment,
                                 fprintf(stderr, "realloc: %s\n", strerror(errno));
                                 return 1;
                         }
- if (cp_parse(token, &t->cp[t->cplen - 1])) {
+ if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) {
                                 return 1;
                         }
                         if (t->lenlen > 0) {
Received on Sun Dec 12 2021 - 15:53:37 CET

This archive was generated by hypermail 2.3.0 : Sun Dec 12 2021 - 16:00:34 CET