[hackers] [libutf] runes are signed (31 bits are enough) || Connor Lane Smith
changeset: 20:9df3c4ba5e72
tag: tip
user: Connor Lane Smith <cls_AT_lubutu.com>
date: Sat May 26 20:51:12 2012 +0100
files: rune.c utf.h
description:
runes are signed (31 bits are enough)
diff -r e2cd9cf5b465 -r 9df3c4ba5e72 rune.c
--- a/rune.c Sat May 26 12:45:42 2012 +0100
+++ b/rune.c Sat May 26 20:51:12 2012 +0100
_AT_@ -12,7 +12,7 @@
: (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \
: 0 )
-#define BADRUNE(x) ((x) > Runemax \
+#define BADRUNE(x) ((x) < 0 || (x) > Runemax \
|| ((x) & 0xFFFE) == 0xFFFE \
|| ((x) >= 0xD800 && (x) <= 0xDFFF) \
|| ((x) >= 0xFDD0 && (x) <= 0xFDEF))
_AT_@ -74,19 +74,21 @@
}
/* add values from continuation bytes */
for(i = 1; i < MIN(n, len); i++)
- if((s[i] & 0xC0) != 0x80) {
- /* expected continuation */
+ if((s[i] & 0xC0) == 0x80) {
+ /* add bits from continuation byte to rune value
+ * cannot overflow: 6 byte sequences contain 31 bits */
+ r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */
+ }
+ else { /* expected continuation */
*p = Runeerror;
return i;
}
- else
- r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */
if(i < n) /* must have reached len limit */
return 0;
- /* reject invalid runes and overlong sequences */
- if(n > 4 || runelen(r) < (int)n || BADRUNE(r))
+ /* reject invalid or overlong sequences */
+ if(BADRUNE(r) || runelen(r) < (int)n)
r = Runeerror;
*p = r;
_AT_@ -96,12 +98,12 @@
int
runelen(Rune r)
{
- if(r <= 0x7F)
+ if(BADRUNE(r))
+ return 0; /* error */
+ else if(r <= 0x7F)
return 1;
else if(r <= 0x07FF)
return 2;
- else if(BADRUNE(r))
- return 0; /* error */
else if(r <= 0xFFFF)
return 3;
else
diff -r e2cd9cf5b465 -r 9df3c4ba5e72 utf.h
--- a/utf.h Sat May 26 12:45:42 2012 +0100
+++ b/utf.h Sat May 26 20:51:12 2012 +0100
_AT_@ -4,7 +4,7 @@
#include <stddef.h>
-typedef unsigned int Rune;
+typedef int Rune;
enum {
UTFmax = 6, /* maximum bytes per rune */
Received on Sat May 26 2012 - 21:51:58 CEST
This archive was generated by hypermail 2.3.0
: Sat May 26 2012 - 22:00:08 CEST