Re: [dev] [st][PATCH] Add support for multiple charset definitions

From: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
Date: Tue, 1 Oct 2013 22:33:40 +0200

> I applied the patch to tip ( eeae9b0 ) but compilation fails due to
> a redefinition of a the tsetchar function:

Sorry, I don't know why my gcc version doesn't give me the error (some
type of overloading????). This version uses a different name for this
new function.

-- >8 --

Subject: [PATCH] Add support for multiple charset definitions

vt100 has support for two defined charset, G0 and G1. Each charset
can be defined, but in each moment is selected only one of both
charset. This is usually used selecting a national charset in G0
and graphic charset in G1, so you can switch between graphic
charset and text charset without losing the national charset
already defined.

st hasn't support for national charsets, because it is an utf8
based terminal emulator, but it has support for graphic
charset because it is heavily used, but it only supports G0,
without understanding G1 selection sequences, which causes some
programs in some moments can print some garbage in the screen.

This patch adds a fake support for multiple charset definitions,
where we only support graphic charset and us-ascii charset, but
we allow more of one charset definition.

This patch allow define G0 until G3 charsets, but only accepts
select G0 or G1, and it accepts some national charset definitions
but all of them are mapped to us-ascii.
---
 st.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 25 deletions(-)
diff --git a/st.c b/st.c
index 12fcc90..a22ba97 100644
--- a/st.c
+++ b/st.c
_AT_@ -137,6 +137,16 @@ enum term_mode {
 	                  |MODE_MOUSEMANY,
 };
 
+enum charset {
+	CS_GRAPHIC0,
+	CS_GRAPHIC1,
+	CS_UK,
+	CS_USA,
+	CS_MULTI,
+	CS_GER,
+	CS_FIN
+};
+
 enum escape_state {
 	ESC_START      = 1,
 	ESC_CSI        = 2,
_AT_@ -216,6 +226,9 @@ typedef struct {
 	int bot;      /* bottom scroll limit */
 	int mode;     /* terminal mode flags */
 	int esc;      /* escape state flags */
+	char trantbl[4]; /* charset table translation */
+	int charset;  /* current charset */
+	int icharset; /* selected charset for sequence */
 	bool numlock; /* lock numbers in keyboard */
 	bool *tabs;
 } Term;
_AT_@ -367,6 +380,8 @@ static void tsetmode(bool, bool, int *, int);
 static void tfulldirt(void);
 static void techo(char *, int);
 static long tdefcolor(int *, int *, int);
+static void tselcs(void);
+static void tdeftran(char);
 static inline bool match(uint, uint);
 static void ttynew(void);
 static void ttyread(void);
_AT_@ -1369,6 +1384,8 @@ treset(void) {
 	term.top = 0;
 	term.bot = term.row - 1;
 	term.mode = MODE_WRAP;
+	memset(term.trantbl, sizeof(term.trantbl), CS_USA);
+	term.charset = 0;
 
 	tclearregion(0, 0, term.col-1, term.row-1);
 	tmoveto(0, 0);
_AT_@ -2257,6 +2274,33 @@ techo(char *buf, int len) {
 }
 
 void
+tdeftran(char ascii) {
+	char c, (*bp)[2];
+	static char tbl[][2] = {
+		{'0', CS_GRAPHIC0}, {'1', CS_GRAPHIC1}, {'A', CS_UK},
+		{'B', CS_USA},      {'<', CS_MULTI},    {'K', CS_GER},
+		{'5', CS_FIN},      {'C', CS_FIN},
+		{0, 0}
+	};
+
+	for (bp = &tbl[0]; (c = (*bp)[0]) && c != ascii; ++bp)
+		/* nothing */;
+
+	if (c == 0)
+		fprintf(stderr, "esc unhandled charset: ESC ( %c\n", ascii);
+	else
+		term.trantbl[term.icharset] = (*bp)[1];
+}
+
+void
+tselcs(void) {
+	if (term.trantbl[term.charset] == CS_GRAPHIC0)
+		term.c.attr.mode |= ATTR_GFX;
+	else
+		term.c.attr.mode &= ~ATTR_GFX;
+}
+
+void
 tputc(char *c, int len) {
 	uchar ascii = *c;
 	bool control = ascii < '\x20' || ascii == 0177;
_AT_@ -2348,13 +2392,12 @@ tputc(char *c, int len) {
 			term.esc = ESC_START;
 			return;
 		case '\016': /* SO */
+			term.charset = 0;
+			tselcs();
+			return;
 		case '\017': /* SI */
-			/*
-			 * Different charsets are hard to handle. Applications
-			 * should use the right alt charset escapes for the
-			 * only reason they still exist: line drawing. The
-			 * rest is incompatible history st should not support.
-			 */
+			term.charset = 1;
+			tselcs();
 			return;
 		case '\032': /* SUB */
 		case '\030': /* CAN */
_AT_@ -2382,22 +2425,8 @@ tputc(char *c, int len) {
 			if(ascii == '\\')
 				strhandle();
 		} else if(term.esc & ESC_ALTCHARSET) {
-			switch(ascii) {
-			case '0': /* Line drawing set */
-				term.c.attr.mode |= ATTR_GFX;
-				break;
-			case 'B': /* USASCII */
-				term.c.attr.mode &= ~ATTR_GFX;
-				break;
-			case 'A': /* UK (IGNORED) */
-			case '<': /* multinational charset (IGNORED) */
-			case '5': /* Finnish (IGNORED) */
-			case 'C': /* Finnish (IGNORED) */
-			case 'K': /* German (IGNORED) */
-				break;
-			default:
-				fprintf(stderr, "esc unhandled charset: ESC ( %c\n", ascii);
-			}
+			tdeftran(ascii);
+			tselcs();
 			term.esc = 0;
 		} else if(term.esc & ESC_TEST) {
 			if(ascii == '8') { /* DEC screen alignment test. */
_AT_@ -2428,12 +2457,11 @@ tputc(char *c, int len) {
 				term.esc |= ESC_STR;
 				break;
 			case '(': /* set primary charset G0 */
-				term.esc |= ESC_ALTCHARSET;
-				break;
 			case ')': /* set secondary charset G1 (IGNORED) */
 			case '*': /* set tertiary charset G2 (IGNORED) */
 			case '+': /* set quaternary charset G3 (IGNORED) */
-				term.esc = 0;
+				term.icharset = ascii - '(';
+				term.esc |= ESC_ALTCHARSET;
 				break;
 			case 'D': /* IND -- Linefeed */
 				if(term.c.y == term.bot) {
-- 
1.8.4
Received on Tue Oct 01 2013 - 22:33:40 CEST

This archive was generated by hypermail 2.3.0 : Tue Oct 01 2013 - 22:36:06 CEST