[hackers] [libgrapheme] Refactor state into unsigned integer || Laslo Hunhold

From: <git_AT_suckless.org>
Date: Mon, 21 Nov 2022 11:06:50 +0100 (CET)

commit 4431bcb54fa941eb26a4fa926df96d82ef0111bd
Author: Laslo Hunhold <laslo_AT_hunhold.de>
AuthorDate: Mon Nov 21 11:05:26 2022 +0100
Commit: Laslo Hunhold <laslo_AT_hunhold.de>
CommitDate: Mon Nov 21 11:06:37 2022 +0100

    Refactor state into unsigned integer
    
    Now that we separated the level-determination itself, there
    is no need to have a signed integer for this purpose. This
    simplifies the masking.

diff --git a/grapheme.h b/grapheme.h
index f8d39bd..d0ec863 100644
--- a/grapheme.h
+++ b/grapheme.h
_AT_@ -16,14 +16,14 @@ enum grapheme_bidirectional_override {
 };
 
 void grapheme_bidirectional_get_line_embedding_levels(
- const int_least32_t *, size_t, int_least8_t *);
+ const uint_least32_t *, size_t, int_least8_t *);
 
 size_t grapheme_bidirectional_preprocess(
         const uint_least32_t *, size_t, enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+ uint_least32_t *, size_t);
 size_t grapheme_bidirectional_preprocess_utf8(
         const char *, size_t, enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+ uint_least32_t *, size_t);
 
 size_t grapheme_bidirectional_reorder_line(
         const uint_least32_t *, const int_least8_t *, size_t,
diff --git a/src/bidirectional.c b/src/bidirectional.c
index a1736b2..ac97b3c 100644
--- a/src/bidirectional.c
+++ b/src/bidirectional.c
_AT_@ -8,127 +8,71 @@
 
 #define MAX_DEPTH 125
 
-#if 0
 enum state_type {
         STATE_PROP, /* in 0..23, bidi_property */
+ STATE_PRESERVED_PROP, /* in 0..23, preserved bidi_property for L1-rules */
         STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */
         STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */
         STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */
         STATE_VISITED, /* in 0..1, visited within isolating run */
 };
 
-/* without rawprop, as it should be */
 static struct {
- int_least32_t filter_mask;
- int_least32_t clear_mask;
+ uint_least32_t filter_mask;
         size_t mask_shift;
         int_least16_t value_offset;
 } state_lut[] = {
         [STATE_PROP] = {
- .filter_mask = 0x00001F, /* 00000000 00000000 00011111 */
- .clear_mask = 0x3FFFE0, /* 00111111 11111111 11100000 */
+ .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011111 */
                 .mask_shift = 0,
                 .value_offset = 0,
         },
- [STATE_BRACKET_OFF] = {
- .filter_mask = 0x001FE0, /* 00000000 00011111 11100000 */
- .clear_mask = 0x3FE01F, /* 00111111 11100000 00011111 */
+ [STATE_PRESERVED_PROP] = {
+ .filter_mask = 0x00003E0, /* 00000000 00000000 00000011 11100000 */
                 .mask_shift = 5,
                 .value_offset = 0,
         },
- [STATE_LEVEL] = {
- .filter_mask = 0x0FE000, /* 00001111 11100000 00000000 */
- .clear_mask = 0x301FFF, /* 00110000 00011111 11111111 */
- .mask_shift = 13,
- .value_offset = -1,
- },
- [STATE_PARAGRAPH_LEVEL] = {
- .filter_mask = 0x100000, /* 00010000 00000000 00000000 */
- .clear_mask = 0x2FFFFF, /* 00101111 11111111 11111111 */
- .mask_shift = 20,
- .value_offset = 0,
- },
- [STATE_VISITED] = {
- .filter_mask = 0x200000, /* 00100000 00000000 00000000 */
- .clear_mask = 0x1FFFFF, /* 00011111 11111111 11111111 */
- .mask_shift = 21,
- .value_offset = 0,
- },
-};
-#endif
-
-enum state_type {
- STATE_PROP, /* in 0..23, bidi_property */
- STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */
- STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */
- STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */
- STATE_VISITED, /* in 0..1, visited within isolating run */
- STATE_RAWPROP,
-};
-
-static struct {
- int_least32_t filter_mask;
- int_least32_t clear_mask;
- size_t mask_shift;
- int_least16_t value_offset;
-} state_lut[] = {
- [STATE_PROP] = {
- .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011111 */
- .clear_mask = 0x7FFFFE0, /* 00000111 11111111 11111111 11100000 */
- .mask_shift = 0,
- .value_offset = 0,
- },
         [STATE_BRACKET_OFF] = {
- .filter_mask = 0x0001FE0, /* 00000000 00000000 00011111 11100000 */
- .clear_mask = 0x7FFE01F, /* 00000111 11111111 11100000 00011111 */
- .mask_shift = 5,
+ .filter_mask = 0x003FC00, /* 00000000 00000011 11111100 00000000 */
+ .mask_shift = 10,
                 .value_offset = 0,
         },
         [STATE_LEVEL] = {
- .filter_mask = 0x00FE000, /* 00000000 00001111 11100000 00000000 */
- .clear_mask = 0x7F01FFF, /* 00000111 11110000 00011111 11111111 */
- .mask_shift = 13,
+ .filter_mask = 0x1FC0000, /* 00000001 11111100 00000000 00000000 */
+ .mask_shift = 18,
                 .value_offset = -1,
         },
         [STATE_PARAGRAPH_LEVEL] = {
- .filter_mask = 0x0100000, /* 00000000 00010000 00000000 00000000 */
- .clear_mask = 0x7EFFFFF, /* 00000111 11101111 11111111 11111111 */
- .mask_shift = 20,
+ .filter_mask = 0x2000000, /* 00000010 00000000 00000000 00000000 */
+ .mask_shift = 25,
                 .value_offset = 0,
         },
         [STATE_VISITED] = {
- .filter_mask = 0x0200000, /* 00000000 00100000 00000000 00000000 */
- .clear_mask = 0x7DFFFFF, /* 00000111 11011111 11111111 11111111 */
- .mask_shift = 21,
- .value_offset = 0,
- },
- [STATE_RAWPROP] = {
- .filter_mask = 0x7C00000, /* 00000111 11000000 00000000 00000000 */
- .clear_mask = 0x03FFFFF, /* 00000000 00111111 11111111 11111111 */
- .mask_shift = 22,
+ .filter_mask = 0x4000000, /* 00000100 00000000 00000000 00000000 */
+ .mask_shift = 26,
                 .value_offset = 0,
         },
 };
 
 static inline int_least16_t
-get_state(enum state_type t, int_least32_t input)
+get_state(enum state_type t, uint_least32_t input)
 {
- return (int_least16_t)(((input & state_lut[t].filter_mask) >>
- state_lut[t].mask_shift) +
- state_lut[t].value_offset);
+ return (int_least16_t)((input & state_lut[t].filter_mask) >>
+ state_lut[t].mask_shift) +
+ state_lut[t].value_offset;
 }
 
 static inline void
-set_state(enum state_type t, int_least16_t value, int_least32_t *output)
+set_state(enum state_type t, int_least16_t value, uint_least32_t *output)
 {
- *output &= state_lut[t].clear_mask;
- *output |= ((value - state_lut[t].value_offset)
+ *output &= ~state_lut[t].filter_mask;
+ *output |= ((uint_least32_t)(value - state_lut[t].value_offset)
                     << state_lut[t].mask_shift) &
                    state_lut[t].filter_mask;
 }
 
 struct isolate_runner {
- int_least32_t *buf;
+ uint_least32_t *buf;
         size_t buflen;
 
         struct {
_AT_@ -179,7 +123,7 @@ ir_set_current_prop(struct isolate_runner *ir, enum bidi_property prop)
 }
 
 static void
-ir_init(int_least32_t *buf, size_t buflen, size_t off,
+ir_init(uint_least32_t *buf, size_t buflen, size_t off,
         uint_least8_t paragraph_level, bool within, struct isolate_runner *ir)
 {
         size_t i;
_AT_@ -385,7 +329,7 @@ ir_advance(struct isolate_runner *ir)
 }
 
 static size_t
-preprocess_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
+preprocess_isolating_run_sequence(uint_least32_t *buf, size_t buflen, size_t off,
                                   uint_least8_t paragraph_level)
 {
         enum bidi_property sequence_prop, prop;
_AT_@ -597,7 +541,7 @@ preprocess_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
 
 static uint_least8_t
 get_paragraph_level(enum grapheme_bidirectional_override override,
- bool terminate_on_pdi, const int_least32_t *buf,
+ bool terminate_on_pdi, const uint_least32_t *buf,
                     size_t buflen)
 {
         enum bidi_property prop;
_AT_@ -653,7 +597,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override,
 
 static void
 preprocess_paragraph(enum grapheme_bidirectional_override override,
- int_least32_t *buf, size_t buflen)
+ uint_least32_t *buf, size_t buflen)
 {
         enum bidi_property prop;
         int_least8_t level;
_AT_@ -961,7 +905,7 @@ again:
         runsince = SIZE_MAX;
         for (bufoff = 0; bufoff < buflen; bufoff++) {
                 level = (int_least8_t)get_state(STATE_LEVEL, buf[bufoff]);
- prop = (uint_least8_t)get_state(STATE_RAWPROP, buf[bufoff]);
+ prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, buf[bufoff]);
 
                 if (level == -1) {
                         /* ignored character */
_AT_@ -1038,7 +982,7 @@ get_bidi_bracket_off(uint_least32_t cp)
 static size_t
 preprocess(HERODOTUS_READER *r,
            enum grapheme_bidirectional_override override,
- int_least32_t *buf, size_t buflen)
+ uint_least32_t *buf, size_t buflen)
 {
         size_t bufoff, bufsize, lastparoff;
         uint_least32_t cp;
_AT_@ -1075,7 +1019,7 @@ preprocess(HERODOTUS_READER *r,
                         set_state(STATE_LEVEL, 0, &(buf[bufoff]));
                         set_state(STATE_PARAGRAPH_LEVEL, 0, &(buf[bufoff]));
                         set_state(STATE_VISITED, 0, &(buf[bufoff]));
- set_state(STATE_RAWPROP,
+ set_state(STATE_PRESERVED_PROP,
                                   (uint_least8_t)get_bidi_property(cp),
                                   &(buf[bufoff]));
                 }
_AT_@ -1110,7 +1054,7 @@ preprocess(HERODOTUS_READER *r,
 size_t
 grapheme_bidirectional_preprocess(
         const uint_least32_t *src, size_t srclen,
- enum grapheme_bidirectional_override override, int_least32_t *dest,
+ enum grapheme_bidirectional_override override, uint_least32_t *dest,
         size_t destlen)
 {
         HERODOTUS_READER r;
_AT_@ -1123,7 +1067,7 @@ grapheme_bidirectional_preprocess(
 size_t
 grapheme_bidirectional_preprocess_utf8(
         const char *src, size_t srclen,
- enum grapheme_bidirectional_override override, int_least32_t *dest,
+ enum grapheme_bidirectional_override override, uint_least32_t *dest,
         size_t destlen)
 {
         HERODOTUS_READER r;
_AT_@ -1135,7 +1079,7 @@ grapheme_bidirectional_preprocess_utf8(
 
 void
 grapheme_bidirectional_get_line_embedding_levels(
- const int_least32_t *linedata, size_t linelen, int_least8_t *linelevel)
+ const uint_least32_t *linedata, size_t linelen, int_least8_t *linelevel)
 {
         enum bidi_property prop;
         size_t i, runsince;
_AT_@ -1143,7 +1087,7 @@ grapheme_bidirectional_get_line_embedding_levels(
         /* rule L1.4 */
         runsince = SIZE_MAX;
         for (i = 0; i < linelen; i++) {
- prop = (uint_least8_t)get_state(STATE_RAWPROP, linedata[i]);
+ prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, linedata[i]);
 
                 /* write level into level array */
                 if ((linelevel[i] = (int_least8_t)get_state(
_AT_@ -1171,7 +1115,7 @@ grapheme_bidirectional_get_line_embedding_levels(
                  */
                 for (i = runsince; i < linelen; i++) {
                         if (linelevel[i] != -1) {
- linelevel[i] = get_state(
+ linelevel[i] = (int_least8_t)get_state(
                                         STATE_PARAGRAPH_LEVEL, linedata[i]);
                         }
                 }
diff --git a/test/bidirectional.c b/test/bidirectional.c
index c32cacd..8646afe 100644
--- a/test/bidirectional.c
+++ b/test/bidirectional.c
_AT_@ -12,7 +12,7 @@
 int
 main(int argc, char *argv[])
 {
- int_least32_t data[512]; /* TODO iterate and get max, allocate */
+ uint_least32_t data[512]; /* TODO iterate and get max, allocate */
         int_least8_t lev[512];
         size_t i, num_tests, failed, datalen, ret, j, m;
 
Received on Mon Nov 21 2022 - 11:06:50 CET

This archive was generated by hypermail 2.3.0 : Mon Nov 21 2022 - 11:12:32 CET