[hackers] [sbase][PATCH] tr: Allow combining classes and sets

From: Santtu Lakkala <inz_AT_inz.fi>
Date: Thu, 15 Jan 2026 11:11:20 +0200

Allow combining chracter classes and mixing with ranges and sets.
---
 TODO             |  5 ----
 tests/0026-tr.sh | 15 ++++++++++++
 tests/0027-tr.sh | 26 ++++++++++++++++++++
 tr.c             | 64 +++++++++++++++++++++++++++++++-----------------
 4 files changed, 82 insertions(+), 28 deletions(-)
 create mode 100755 tests/0026-tr.sh
 create mode 100755 tests/0027-tr.sh
diff --git a/TODO b/TODO
index e428d84..4dcedcd 100644
--- a/TODO
+++ b/TODO
_AT_@ -40,11 +40,6 @@ printf
 * Escape sequences that expand to '%' are treated as beginning of
   conversion specification.
 
-tr
---
-* When a character class is present, all other characters in the
-  string are ignored.
-
 xargs
 -----
 * Add -L.
diff --git a/tests/0026-tr.sh b/tests/0026-tr.sh
new file mode 100755
index 0000000..243772d
--- /dev/null
+++ b/tests/0026-tr.sh
_AT_@ -0,0 +1,15 @@
+#!/bin/sh
+
+tmp=tmp.$$
+
+trap 'rm -f $tmp' EXIT
+trap 'exit $?' HUP INT TERM
+
+cat <<EOF > $tmp
+FOOBAR123LOREM_AT_IPSUM
+EOF
+
+($EXEC ../tr -cd '[:upper:][:digit:]_AT_' <<EOF; echo) | diff -u $tmp -
+Forem OOsBm __aor Ait amet, consectetuR ad1piscing elit. i2teger e\$#icit3r
+neLue eOet sRm acEumsan eM_AT_Ismod. aePean ac niSi eU erat gravida vulputateM
+EOF
diff --git a/tests/0027-tr.sh b/tests/0027-tr.sh
new file mode 100755
index 0000000..634c5c4
--- /dev/null
+++ b/tests/0027-tr.sh
_AT_@ -0,0 +1,26 @@
+#!/bin/sh
+
+tmp=tmp.$$
+
+trap 'rm -f $tmp' EXIT
+trap 'exit $?' HUP INT TERM
+
+cat <<EOF > $tmp
+STATIC INT RADIX = 16;
+STATIC INT PFLAG;
+STATIC INT AFLAG;
+STATIC INT VFLAG;
+STATIC INT GFLAG;
+STATIC INT UFLAG;
+STATIC INT ARFLAG;
+EOF
+
+$EXEC ../tr '[:lower:]' '[:upper:]' <<EOF | diff -u $tmp -
+static int radix = 16;
+static int Pflag;
+static int Aflag;
+static int vflag;
+static int gflag;
+static int uflag;
+static int arflag;
+EOF
diff --git a/tr.c b/tr.c
index b612896..8cac453 100644
--- a/tr.c
+++ b/tr.c
_AT_@ -32,12 +32,27 @@ static struct {
 	{ "xdigit", isxdigitrune },
 };
 
-static struct range *set1        = NULL;
-static size_t set1ranges         = 0;
-static int    (*set1check)(Rune) = NULL;
-static struct range *set2        = NULL;
-static size_t set2ranges         = 0;
-static int    (*set2check)(Rune) = NULL;
+#define ISLOWERBIT 		   1U << 6
+#define ISUPPERBIT 		   1U << 10
+
+static struct   range *set1 = NULL;
+static size_t   set1ranges  = 0;
+static unsigned set1checks  = 0;
+static struct   range *set2 = NULL;
+static size_t   set2ranges  = 0;
+static unsigned set2checks  = 0;
+
+static int
+check(Rune rune, unsigned checks)
+{
+	size_t i;
+
+	for (i = 0; checks && i < LEN(classes); i++, checks >>= 1)
+		if (checks & 1 && classes[i].check(rune))
+			return 1;
+
+	return 0;
+}
 
 static size_t
 rangelen(struct range r)
_AT_@ -68,7 +83,7 @@ rstrmatch(Rune *r, char *s, size_t n)
 }
 
 static size_t
-makeset(char *str, struct range **set, int (**check)(Rune))
+makeset(char *str, struct range **set, unsigned *checks)
 {
 	Rune  *rstr;
 	size_t len, i, j, m, n;
_AT_@ -111,10 +126,13 @@ nextbrack:
 			if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
 				for (n = 0; n < LEN(classes); n++) {
 					if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
-						*check = classes[n].check;
-						return 0;
+						*checks |= 1 << n;
+						i = j;
+						break;
 					}
 				}
+				if (n < LEN(classes))
+					continue;
 				eprintf("Invalid character class.\n");
 			}
 
_AT_@ -195,14 +213,14 @@ main(int argc, char *argv[])
 	    (dflag && argc != 1))
 		usage();
 
-	set1ranges = makeset(argv[0], &set1, &set1check);
+	set1ranges = makeset(argv[0], &set1, &set1checks);
 	if (argc == 2) {
-		set2ranges = makeset(argv[1], &set2, &set2check);
+		set2ranges = makeset(argv[1], &set2, &set2checks);
 		/* sanity checks as we are translating */
-		if (!set2ranges && !set2check)
+		if (!set2ranges && !set2checks)
 			eprintf("cannot map to an empty set.\n");
-		if (set2check && set2check != islowerrune &&
-		    set2check != isupperrune) {
+		if (set2checks && set2checks != ISLOWERBIT &&
+		    set2checks != ISUPPERBIT) {
 			eprintf("can only map to 'lower' and 'upper' class.\n");
 		}
 	}
_AT_@ -225,8 +243,8 @@ read:
 				goto write;
 
 			/* map r to set2 */
-			if (set2check) {
-				if (set2check == islowerrune)
+			if (set2checks) {
+				if (set2checks == ISLOWERBIT)
 					r = tolowerrune(r);
 				else
 					r = toupperrune(r);
_AT_@ -249,13 +267,13 @@ read:
 			goto write;
 		}
 	}
-	if (set1check && set1check(r)) {
+	if (check(r, set1checks)) {
 		if (cflag)
 			goto write;
 		if (dflag)
 			goto read;
-		if (set2check) {
-			if (set2check == islowerrune)
+		if (set2checks) {
+			if (set2checks == ISLOWERBIT)
 				r = tolowerrune(r);
 			else
 				r = toupperrune(r);
_AT_@ -265,8 +283,8 @@ read:
 		goto write;
 	}
 	if (!dflag && cflag) {
-		if (set2check) {
-			if (set2check == islowerrune)
+		if (set2checks) {
+			if (set2checks == ISLOWERBIT)
 				r = tolowerrune(r);
 			else
 				r = toupperrune(r);
_AT_@ -279,7 +297,7 @@ read:
 		goto read;
 write:
 	if (argc == 1 && sflag && r == lastrune) {
-		if (set1check && set1check(r))
+		if (check(r, set1checks))
 			goto read;
 		for (i = 0; i < set1ranges; i++) {
 			if (set1[i].start <= r && r <= set1[i].end)
_AT_@ -287,7 +305,7 @@ write:
 		}
 	}
 	if (argc == 2 && sflag && r == lastrune) {
-		if (set2check && set2check(r))
+		if (set2checks && check(r, set2checks))
 			goto read;
 		for (i = 0; i < set2ranges; i++) {
 			if (set2[i].start <= r && r <= set2[i].end)
-- 
2.42.0
Received on Thu Jan 15 2026 - 10:11:20 CET

This archive was generated by hypermail 2.3.0 : Thu Jan 15 2026 - 10:12:47 CET