[PATCH] not roll our own utf functions

From: Strake <strake888_AT_gmail.com>
Date: Sun, 5 May 2013 09:35:58 -0500

---
 README    |   2 +-
 config.mk |   2 +-
 st.c      | 129 +++++---------------------------------------------------------
 3 files changed, 11 insertions(+), 122 deletions(-)
diff --git a/README b/README
index 25606a2..2bbb859 100644
--- a/README
+++ b/README
_AT_@ -5,7 +5,7 @@ st is a simple virtual terminal emulator for X which sucks less.
 Requirements
 ------------
-In order to build st you need the Xlib header files.
+In order to build st you need libutf and the Xlib header files.
 Installation
diff --git a/config.mk b/config.mk
index 9431de2..36a0424 100644
--- a/config.mk
+++ b/config.mk
_AT_@ -14,7 +14,7 @@ X11LIB = /usr/X11R6/lib
 INCS = -I. -I/usr/include -I${X11INC} \
        `pkg-config --cflags fontconfig` \
        `pkg-config --cflags freetype2`
-LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 -lutil -lXext -lXft \
+LIBS = -L/usr/lib -lc -lutf -L${X11LIB} -lX11 -lutil -lXext -lXft \
        `pkg-config --libs fontconfig`  \
        `pkg-config --libs freetype2`
diff --git a/st.c b/st.c
index 8cf3483..da0b6b7 100644
--- a/st.c
+++ b/st.c
_AT_@ -19,6 +19,7 @@
 #include <sys/wait.h>
 #include <time.h>
 #include <unistd.h>
+#include <utf.h>
 #include <X11/Xatom.h>
 #include <X11/Xlib.h>
 #include <X11/Xutil.h>
_AT_@ -394,10 +395,7 @@ static void selcopy(void);
 static void selscroll(int, int);
 static void selsnap(int, int *, int *, int);
-static int utf8decode(char *, long *);
-static int utf8encode(long *, char *);
 static int utf8size(char *);
-static int isfullutf8(char *, int);
 static ssize_t xwrite(int, char *, size_t);
 static void *xmalloc(size_t);
_AT_@ -506,115 +504,6 @@ xcalloc(size_t nmemb, size_t size) {
 }
 int
-utf8decode(char *s, long *u) {
-	uchar c;
-	int i, n, rtn;
-
-	rtn = 1;
-	c = *s;
-	if(~c & B7) { /* 0xxxxxxx */
-		*u = c;
-		return rtn;
-	} else if((c & (B7|B6|B5)) == (B7|B6)) { /* 110xxxxx */
-		*u = c&(B4|B3|B2|B1|B0);
-		n = 1;
-	} else if((c & (B7|B6|B5|B4)) == (B7|B6|B5)) { /* 1110xxxx */
-		*u = c&(B3|B2|B1|B0);
-		n = 2;
-	} else if((c & (B7|B6|B5|B4|B3)) == (B7|B6|B5|B4)) { /* 11110xxx */
-		*u = c & (B2|B1|B0);
-		n = 3;
-	} else {
-		goto invalid;
-	}
-
-	for(i = n, ++s; i > 0; --i, ++rtn, ++s) {
-		c = *s;
-		if((c & (B7|B6)) != B7) /* 10xxxxxx */
-			goto invalid;
-		*u <<= 6;
-		*u |= c & (B5|B4|B3|B2|B1|B0);
-	}
-
-	if((n == 1 && *u < 0x80) ||
-	   (n == 2 && *u < 0x800) ||
-	   (n == 3 && *u < 0x10000) ||
-	   (*u >= 0xD800 && *u <= 0xDFFF)) {
-		goto invalid;
-	}
-
-	return rtn;
-invalid:
-	*u = 0xFFFD;
-
-	return rtn;
-}
-
-int
-utf8encode(long *u, char *s) {
-	uchar *sp;
-	ulong uc;
-	int i, n;
-
-	sp = (uchar *)s;
-	uc = *u;
-	if(uc < 0x80) {
-		*sp = uc; /* 0xxxxxxx */
-		return 1;
-	} else if(*u < 0x800) {
-		*sp = (uc >> 6) | (B7|B6); /* 110xxxxx */
-		n = 1;
-	} else if(uc < 0x10000) {
-		*sp = (uc >> 12) | (B7|B6|B5); /* 1110xxxx */
-		n = 2;
-	} else if(uc <= 0x10FFFF) {
-		*sp = (uc >> 18) | (B7|B6|B5|B4); /* 11110xxx */
-		n = 3;
-	} else {
-		goto invalid;
-	}
-
-	for(i=n,++sp; i>0; --i,++sp)
-		*sp = ((uc >> 6*(i-1)) & (B5|B4|B3|B2|B1|B0)) | B7; /* 10xxxxxx */
-
-	return n+1;
-invalid:
-	/* U+FFFD */
-	*s++ = '\xEF';
-	*s++ = '\xBF';
-	*s = '\xBD';
-
-	return 3;
-}
-
-/* use this if your buffer is less than UTF_SIZ, it returns 1 if you can decode
-   UTF-8 otherwise return 0 */
-int
-isfullutf8(char *s, int b) {
-	uchar *c1, *c2, *c3;
-
-	c1 = (uchar *)s;
-	c2 = (uchar *)++s;
-	c3 = (uchar *)++s;
-	if(b < 1) {
-		return 0;
-	} else if((*c1&(B7|B6|B5)) == (B7|B6) && b == 1) {
-		return 0;
-	} else if((*c1&(B7|B6|B5|B4)) == (B7|B6|B5) &&
-	    ((b == 1) ||
-	    ((b == 2) && (*c2&(B7|B6)) == B7))) {
-		return 0;
-	} else if((*c1&(B7|B6|B5|B4|B3)) == (B7|B6|B5|B4) &&
-	    ((b == 1) ||
-	    ((b == 2) && (*c2&(B7|B6)) == B7) ||
-	    ((b == 3) && (*c2&(B7|B6)) == B7 && (*c3&(B7|B6)) == B7))) {
-		return 0;
-	} else {
-		return 1;
-	}
-}
-
-int
 utf8size(char *s) {
 	uchar c = *s;
_AT_@ -1230,7 +1119,7 @@ ttyread(void) {
 	char *ptr;
 	char s[UTF_SIZ];
 	int charsize; /* size of utf8 char in bytes */
-	long utf8c;
+	Rune utf8c;
 	int ret;
 	/* append read bytes to unprocessed bytes */
_AT_@ -1240,9 +1129,9 @@ ttyread(void) {
 	/* process every complete utf8 char */
 	buflen += ret;
 	ptr = buf;
-	while(buflen >= UTF_SIZ || isfullutf8(ptr,buflen)) {
-		charsize = utf8decode(ptr, &utf8c);
-		utf8encode(&utf8c, s);
+	while(buflen >= UTF_SIZ || fullrune(ptr, buflen)) {
+		charsize = chartorune(&utf8c, ptr);
+		runetochar(s, &utf8c);
 		tputc(s, charsize);
 		ptr += charsize;
 		buflen -= charsize;
_AT_@ -2866,7 +2755,7 @@ xdraws(char *s, Glyph base, int x, int y, int
charlen, int bytelen) {
 	int frp, frcflags;
 	int u8fl, u8fblen, u8cblen, doesexist;
 	char *u8c, *u8fs;
-	long u8char;
+	Rune u8char;
 	Font *font = &dc.font;
 	FcResult fcres;
 	FcPattern *fcpattern, *fontpattern;
_AT_@ -2982,7 +2871,7 @@ xdraws(char *s, Glyph base, int x, int y, int
charlen, int bytelen) {
 		u8fl = 0;
 		for(;;) {
 			u8c = s;
-			u8cblen = utf8decode(s, &u8char);
+			u8cblen = chartorune(&u8char, s);
 			s += u8cblen;
 			bytelen -= u8cblen;
_AT_@ -3354,7 +3243,7 @@ kpress(XEvent *ev) {
 	KeySym ksym;
 	char xstr[31], buf[32], *customkey, *cp = buf;
 	int len, ret;
-	long c;
+	Rune c;
 	Status status;
 	Shortcut *bp;
_AT_@ -3384,7 +3273,7 @@ kpress(XEvent *ev) {
 			if(IS_SET(MODE_8BIT)) {
 				if(*xstr < 0177) {
 					c = *xstr | B7;
-					ret = utf8encode(&c, cp);
+					ret = runetochar(cp, &c);
 					cp += ret;
 					len = 0;
 				}
-- 
1.7.11.1
Received on Mon Sep 17 2001 - 00:00:00 CEST

This archive was generated by hypermail 2.3.0 : Sun May 05 2013 - 16:36:05 CEST