[hackers] [libutf] runes are signed (31 bits are enough) || Connor Lane Smith

From: <hg_AT_suckless.org>
Date: Sat, 26 May 2012 21:51:58 +0200 (CEST)

changeset: 20:9df3c4ba5e72
tag: tip
user: Connor Lane Smith <cls_AT_lubutu.com>
date: Sat May 26 20:51:12 2012 +0100
files: rune.c utf.h
description:
runes are signed (31 bits are enough)


diff -r e2cd9cf5b465 -r 9df3c4ba5e72 rune.c
--- a/rune.c Sat May 26 12:45:42 2012 +0100
+++ b/rune.c Sat May 26 20:51:12 2012 +0100
_AT_@ -12,7 +12,7 @@
                  : (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \
                                           : 0 )
 
-#define BADRUNE(x) ((x) > Runemax \
+#define BADRUNE(x) ((x) < 0 || (x) > Runemax \
                 || ((x) & 0xFFFE) == 0xFFFE \
                 || ((x) >= 0xD800 && (x) <= 0xDFFF) \
                 || ((x) >= 0xFDD0 && (x) <= 0xFDEF))
_AT_@ -74,19 +74,21 @@
         }
         /* add values from continuation bytes */
         for(i = 1; i < MIN(n, len); i++)
- if((s[i] & 0xC0) != 0x80) {
- /* expected continuation */
+ if((s[i] & 0xC0) == 0x80) {
+ /* add bits from continuation byte to rune value
+ * cannot overflow: 6 byte sequences contain 31 bits */
+ r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */
+ }
+ else { /* expected continuation */
                         *p = Runeerror;
                         return i;
                 }
- else
- r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */
 
         if(i < n) /* must have reached len limit */
                 return 0;
 
- /* reject invalid runes and overlong sequences */
- if(n > 4 || runelen(r) < (int)n || BADRUNE(r))
+ /* reject invalid or overlong sequences */
+ if(BADRUNE(r) || runelen(r) < (int)n)
                 r = Runeerror;
 
         *p = r;
_AT_@ -96,12 +98,12 @@
 int
 runelen(Rune r)
 {
- if(r <= 0x7F)
+ if(BADRUNE(r))
+ return 0; /* error */
+ else if(r <= 0x7F)
                 return 1;
         else if(r <= 0x07FF)
                 return 2;
- else if(BADRUNE(r))
- return 0; /* error */
         else if(r <= 0xFFFF)
                 return 3;
         else
diff -r e2cd9cf5b465 -r 9df3c4ba5e72 utf.h
--- a/utf.h Sat May 26 12:45:42 2012 +0100
+++ b/utf.h Sat May 26 20:51:12 2012 +0100
_AT_@ -4,7 +4,7 @@
 
 #include <stddef.h>
 
-typedef unsigned int Rune;
+typedef int Rune;
 
 enum {
         UTFmax = 6, /* maximum bytes per rune */
Received on Sat May 26 2012 - 21:51:58 CEST

This archive was generated by hypermail 2.3.0 : Sat May 26 2012 - 22:00:08 CEST