--- Makefile | 35 ++- libutf/Makefile | 6 + libutf/fgetrune.c | 36 +++ libutf/fputrune.c | 27 ++ libutf/isalnumrune.c | 9 + libutf/isalpharune.c | 718 ++++++++++++++++++++++++++++++++++++++++++++++++++ libutf/isblankrune.c | 9 + libutf/iscntrlrune.c | 18 ++ libutf/isdigitrune.c | 70 +++++ libutf/isgraphrune.c | 9 + libutf/isprintrune.c | 10 + libutf/ispunctrune.c | 9 + libutf/isspacerune.c | 31 +++ libutf/istitlerune.c | 31 +++ libutf/isxdigitrune.c | 9 + libutf/lowerrune.c | 334 +++++++++++++++++++++++ libutf/mkrunetype.awk | 240 +++++++++++++++++ libutf/rune.c | 148 +++++++++++ libutf/runetype.c | 41 +++ libutf/runetype.h | 26 ++ libutf/upperrune.c | 251 ++++++++++++++++++ libutf/utf.c | 129 +++++++++ libutf/utftorunestr.c | 13 + libutil/fshut.c | 43 +++ ul.1 | 39 +++ ul.c | 384 +++++++++++++++++++++++++++ utf.h | 67 +++++ util.h | 7 + 28 files changed, 2747 insertions(+), 2 deletions(-) create mode 100644 libutf/Makefile create mode 100644 libutf/fgetrune.c create mode 100644 libutf/fputrune.c create mode 100644 libutf/isalnumrune.c create mode 100644 libutf/isalpharune.c create mode 100644 libutf/isblankrune.c create mode 100644 libutf/iscntrlrune.c create mode 100644 libutf/isdigitrune.c create mode 100644 libutf/isgraphrune.c create mode 100644 libutf/isprintrune.c create mode 100644 libutf/ispunctrune.c create mode 100644 libutf/isspacerune.c create mode 100644 libutf/istitlerune.c create mode 100644 libutf/isxdigitrune.c create mode 100644 libutf/lowerrune.c create mode 100644 libutf/mkrunetype.awk create mode 100644 libutf/rune.c create mode 100644 libutf/runetype.c create mode 100644 libutf/runetype.h create mode 100644 libutf/upperrune.c create mode 100644 libutf/utf.c create mode 100644 libutf/utftorunestr.c create mode 100644 libutil/fshut.c create mode 100644 ul.1 create mode 100644 ul.c create mode 100644 utf.h diff --git a/Makefile b/Makefile index 453607c..3b6d219 100644 --- a/Makefile +++ b/Makefile _AT_@ -12,8 +12,31 @@ HDR = \ reboot.h \ rtc.h \ text.h \ + utf.h\ util.h +LIBUTF = libutf.a +LIBUTFSRC =\ + libutf/fgetrune.c\ + libutf/fputrune.c\ + libutf/isalnumrune.c\ + libutf/isalpharune.c\ + libutf/isblankrune.c\ + libutf/iscntrlrune.c\ + libutf/isdigitrune.c\ + libutf/isgraphrune.c\ + libutf/isprintrune.c\ + libutf/ispunctrune.c\ + libutf/isspacerune.c\ + libutf/istitlerune.c\ + libutf/isxdigitrune.c\ + libutf/lowerrune.c\ + libutf/rune.c\ + libutf/runetype.c\ + libutf/upperrune.c\ + libutf/utf.c\ + libutf/utftorunestr.c + LIBUTIL = libutil.a LIBUTILSRC = \ libutil/agetcwd.c \ _AT_@ -25,6 +48,7 @@ LIBUTILSRC = \ libutil/estrtol.c \ libutil/estrtoul.c \ libutil/explicit_bzero.c \ + libutil/fshut.c \ libutil/passwd.c \ libutil/proc.c \ libutil/putword.c \ _AT_@ -34,7 +58,7 @@ LIBUTILSRC = \ libutil/strtonum.c \ libutil/tty.c -LIB = $(LIBUTIL) +LIB = $(LIBUTF) $(LIBUTIL) BIN = \ chvt \ _AT_@ -82,6 +106,7 @@ BIN = \ switch_root \ sysctl \ truncate \ + ul \ umount \ unshare \ uptime \ _AT_@ -112,6 +137,7 @@ MAN1 = \ stat.1 \ su.1 \ truncate.1 \ + ul.1 \ unshare.1 \ uptime.1 \ vtallow.1 \ _AT_@ -143,8 +169,9 @@ MAN8 = \ sysctl.8 \ umount.8 +LIBUTFOBJ = $(LIBUTFSRC:.c=.o) LIBUTILOBJ = $(LIBUTILSRC:.c=.o) -OBJ = $(BIN:=.o) $(LIBUTILOBJ) +OBJ = $(BIN:=.o) $(LIBUTFOBJ) $(LIBUTILOBJ) SRC = $(BIN:=.c) all: $(BIN) _AT_@ -162,6 +189,10 @@ config.h: .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $_AT_ -c $< +$(LIBUTF): $(LIBUTFOBJ) + $(AR) rc $_AT_ $? + $(RANLIB) $_AT_ + $(LIBUTIL): $(LIBUTILOBJ) $(AR) rc $_AT_ $? $(RANLIB) $_AT_ diff --git a/libutf/Makefile b/libutf/Makefile new file mode 100644 index 0000000..aac2d2e --- /dev/null +++ b/libutf/Makefile _AT_@ -0,0 +1,6 @@ +AWK = awk +UNICODE = http://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + +default: + _AT_echo Downloading and parsing $(UNICODE) + _AT_curl -\# $(UNICODE) | $(AWK) -f mkrunetype.awk diff --git a/libutf/fgetrune.c b/libutf/fgetrune.c new file mode 100644 index 0000000..8cd78c6 --- /dev/null +++ b/libutf/fgetrune.c _AT_@ -0,0 +1,36 @@ +/* See LICENSE file for copyright and license details. */ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../utf.h" + +int +fgetrune(Rune *r, FILE *fp) +{ + char buf[UTFmax]; + int i = 0, c; + + while (i < UTFmax && (c = fgetc(fp)) != EOF) { + buf[i++] = c; + if (charntorune(r, buf, i) > 0) + break; + } + if (ferror(fp)) + return -1; + + return i; +} + +int +efgetrune(Rune *r, FILE *fp, const char *file) +{ + int ret; + + if ((ret = fgetrune(r, fp)) < 0) { + fprintf(stderr, "fgetrune %s: %s\n", file, strerror(errno)); + exit(1); + } + return ret; +} diff --git a/libutf/fputrune.c b/libutf/fputrune.c new file mode 100644 index 0000000..6a393b5 --- /dev/null +++ b/libutf/fputrune.c _AT_@ -0,0 +1,27 @@ +/* See LICENSE file for copyright and license details. */ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../utf.h" + +int +fputrune(const Rune *r, FILE *fp) +{ + char buf[UTFmax]; + + return fwrite(buf, runetochar(buf, r), 1, fp); +} + +int +efputrune(const Rune *r, FILE *fp, const char *file) +{ + int ret; + + if ((ret = fputrune(r, fp)) < 0) { + fprintf(stderr, "fputrune %s: %s\n", file, strerror(errno)); + exit(1); + } + return ret; +} diff --git a/libutf/isalnumrune.c b/libutf/isalnumrune.c new file mode 100644 index 0000000..e4720d0 --- /dev/null +++ b/libutf/isalnumrune.c _AT_@ -0,0 +1,9 @@ +/* Automatically generated by mkrunetype.awk */ +#include "../utf.h" +#include "runetype.h" + +int +isalnumrune(Rune r) +{ + return isalpharune(r) || isdigitrune(r); +} diff --git a/libutf/isalpharune.c b/libutf/isalpharune.c new file mode 100644 index 0000000..0526b97 --- /dev/null +++ b/libutf/isalpharune.c _AT_@ -0,0 +1,718 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune alpha3[][2] = { + { 0x00D6, 0x00D8 }, + { 0x00F6, 0x00F8 }, + { 0x02EC, 0x02EE }, + { 0x0374, 0x0376 }, + { 0x037D, 0x037F }, + { 0x0386, 0x0388 }, + { 0x038A, 0x038E }, + { 0x03A1, 0x03A3 }, + { 0x03F5, 0x03F7 }, + { 0x052F, 0x0531 }, + { 0x066F, 0x0671 }, + { 0x06D3, 0x06D5 }, + { 0x0710, 0x0712 }, + { 0x08B4, 0x08B6 }, + { 0x09A8, 0x09AA }, + { 0x09B0, 0x09B2 }, + { 0x09DD, 0x09DF }, + { 0x0A28, 0x0A2A }, + { 0x0A30, 0x0A32 }, + { 0x0A33, 0x0A35 }, + { 0x0A36, 0x0A38 }, + { 0x0A5C, 0x0A5E }, + { 0x0A8D, 0x0A8F }, + { 0x0A91, 0x0A93 }, + { 0x0AA8, 0x0AAA }, + { 0x0AB0, 0x0AB2 }, + { 0x0AB3, 0x0AB5 }, + { 0x0B28, 0x0B2A }, + { 0x0B30, 0x0B32 }, + { 0x0B33, 0x0B35 }, + { 0x0B5D, 0x0B5F }, + { 0x0B83, 0x0B85 }, + { 0x0B90, 0x0B92 }, + { 0x0B9A, 0x0B9E }, + { 0x0C0C, 0x0C0E }, + { 0x0C10, 0x0C12 }, + { 0x0C28, 0x0C2A }, + { 0x0C8C, 0x0C8E }, + { 0x0C90, 0x0C92 }, + { 0x0CA8, 0x0CAA }, + { 0x0CB3, 0x0CB5 }, + { 0x0CDE, 0x0CE0 }, + { 0x0D0C, 0x0D0E }, + { 0x0D10, 0x0D12 }, + { 0x0DB1, 0x0DB3 }, + { 0x0DBB, 0x0DBD }, + { 0x0E30, 0x0E32 }, + { 0x0E82, 0x0E84 }, + { 0x0E88, 0x0E8A }, + { 0x0E97, 0x0E99 }, + { 0x0E9F, 0x0EA1 }, + { 0x0EA3, 0x0EA7 }, + { 0x0EAB, 0x0EAD }, + { 0x0EB0, 0x0EB2 }, + { 0x0EC4, 0x0EC6 }, + { 0x0F47, 0x0F49 }, + { 0x10C5, 0x10C7 }, + { 0x10FA, 0x10FC }, + { 0x1248, 0x124A }, + { 0x1256, 0x125A }, + { 0x1288, 0x128A }, + { 0x12B0, 0x12B2 }, + { 0x12BE, 0x12C2 }, + { 0x12D6, 0x12D8 }, + { 0x1310, 0x1312 }, + { 0x167F, 0x1681 }, + { 0x170C, 0x170E }, + { 0x176C, 0x176E }, + { 0x18A8, 0x18AA }, + { 0x1CEC, 0x1CEE }, + { 0x1F57, 0x1F5F }, + { 0x1FB4, 0x1FB6 }, + { 0x1FBC, 0x1FBE }, + { 0x1FC4, 0x1FC6 }, + { 0x1FF4, 0x1FF6 }, + { 0x2113, 0x2115 }, + { 0x2124, 0x212A }, + { 0x212D, 0x212F }, + { 0x2C2E, 0x2C30 }, + { 0x2C5E, 0x2C60 }, + { 0x2D25, 0x2D27 }, + { 0x2DA6, 0x2DA8 }, + { 0x2DAE, 0x2DB0 }, + { 0x2DB6, 0x2DB8 }, + { 0x2DBE, 0x2DC0 }, + { 0x2DC6, 0x2DC8 }, + { 0x2DCE, 0x2DD0 }, + { 0x2DD6, 0x2DD8 }, + { 0x309F, 0x30A1 }, + { 0x30FA, 0x30FC }, + { 0xA7AE, 0xA7B0 }, + { 0xA801, 0xA803 }, + { 0xA805, 0xA807 }, + { 0xA80A, 0xA80C }, + { 0xA8FB, 0xA8FD }, + { 0xA9E4, 0xA9E6 }, + { 0xA9FE, 0xAA00 }, + { 0xAA42, 0xAA44 }, + { 0xAAAF, 0xAAB1 }, + { 0xAAC0, 0xAAC2 }, + { 0xAB26, 0xAB28 }, + { 0xAB2E, 0xAB30 }, + { 0xAB5A, 0xAB5C }, + { 0xFB1D, 0xFB1F }, + { 0xFB28, 0xFB2A }, + { 0xFB36, 0xFB38 }, + { 0xFB3C, 0xFB40 }, + { 0xFB41, 0xFB43 }, + { 0xFB44, 0xFB46 }, + { 0xFE74, 0xFE76 }, + { 0x1000B, 0x1000D }, + { 0x10026, 0x10028 }, + { 0x1003A, 0x1003C }, + { 0x1003D, 0x1003F }, + { 0x10340, 0x10342 }, + { 0x10808, 0x1080A }, + { 0x10835, 0x10837 }, + { 0x108F2, 0x108F4 }, + { 0x10A13, 0x10A15 }, + { 0x10A17, 0x10A19 }, + { 0x10AC7, 0x10AC9 }, + { 0x111DA, 0x111DC }, + { 0x11211, 0x11213 }, + { 0x11286, 0x1128A }, + { 0x1128D, 0x1128F }, + { 0x1129D, 0x1129F }, + { 0x11328, 0x1132A }, + { 0x11330, 0x11332 }, + { 0x11333, 0x11335 }, + { 0x114C5, 0x114C7 }, + { 0x11C08, 0x11C0A }, + { 0x1D454, 0x1D456 }, + { 0x1D49C, 0x1D49E }, + { 0x1D4AC, 0x1D4AE }, + { 0x1D4B9, 0x1D4BD }, + { 0x1D4C3, 0x1D4C5 }, + { 0x1D505, 0x1D507 }, + { 0x1D514, 0x1D516 }, + { 0x1D51C, 0x1D51E }, + { 0x1D539, 0x1D53B }, + { 0x1D53E, 0x1D540 }, + { 0x1D544, 0x1D546 }, + { 0x1D550, 0x1D552 }, + { 0x1D6C0, 0x1D6C2 }, + { 0x1D6DA, 0x1D6DC }, + { 0x1D6FA, 0x1D6FC }, + { 0x1D714, 0x1D716 }, + { 0x1D734, 0x1D736 }, + { 0x1D74E, 0x1D750 }, + { 0x1D76E, 0x1D770 }, + { 0x1D788, 0x1D78A }, + { 0x1D7A8, 0x1D7AA }, + { 0x1D7C2, 0x1D7C4 }, + { 0x1EE03, 0x1EE05 }, + { 0x1EE1F, 0x1EE21 }, + { 0x1EE22, 0x1EE24 }, + { 0x1EE27, 0x1EE29 }, + { 0x1EE32, 0x1EE34 }, + { 0x1EE37, 0x1EE3B }, + { 0x1EE47, 0x1EE4D }, + { 0x1EE4F, 0x1EE51 }, + { 0x1EE52, 0x1EE54 }, + { 0x1EE57, 0x1EE61 }, + { 0x1EE62, 0x1EE64 }, + { 0x1EE6A, 0x1EE6C }, + { 0x1EE72, 0x1EE74 }, + { 0x1EE77, 0x1EE79 }, + { 0x1EE7C, 0x1EE80 }, + { 0x1EE89, 0x1EE8B }, + { 0x1EEA3, 0x1EEA5 }, + { 0x1EEA9, 0x1EEAB }, +}; + +static Rune alpha2[][2] = { + { 0x0041, 0x005A }, + { 0x0061, 0x007A }, + { 0x00C0, 0x00D6 }, + { 0x00D8, 0x00F6 }, + { 0x00F8, 0x02C1 }, + { 0x02C6, 0x02D1 }, + { 0x02E0, 0x02E4 }, + { 0x0370, 0x0374 }, + { 0x0376, 0x0377 }, + { 0x037A, 0x037D }, + { 0x0388, 0x038A }, + { 0x038E, 0x03A1 }, + { 0x03A3, 0x03F5 }, + { 0x03F7, 0x0481 }, + { 0x048A, 0x052F }, + { 0x0531, 0x0556 }, + { 0x0561, 0x0587 }, + { 0x05D0, 0x05EA }, + { 0x05F0, 0x05F2 }, + { 0x0620, 0x064A }, + { 0x066E, 0x066F }, + { 0x0671, 0x06D3 }, + { 0x06E5, 0x06E6 }, + { 0x06EE, 0x06EF }, + { 0x06FA, 0x06FC }, + { 0x0712, 0x072F }, + { 0x074D, 0x07A5 }, + { 0x07CA, 0x07EA }, + { 0x07F4, 0x07F5 }, + { 0x0800, 0x0815 }, + { 0x0840, 0x0858 }, + { 0x08A0, 0x08B4 }, + { 0x08B6, 0x08BD }, + { 0x0904, 0x0939 }, + { 0x0958, 0x0961 }, + { 0x0971, 0x0980 }, + { 0x0985, 0x098C }, + { 0x098F, 0x0990 }, + { 0x0993, 0x09A8 }, + { 0x09AA, 0x09B0 }, + { 0x09B6, 0x09B9 }, + { 0x09DC, 0x09DD }, + { 0x09DF, 0x09E1 }, + { 0x09F0, 0x09F1 }, + { 0x0A05, 0x0A0A }, + { 0x0A0F, 0x0A10 }, + { 0x0A13, 0x0A28 }, + { 0x0A2A, 0x0A30 }, + { 0x0A32, 0x0A33 }, + { 0x0A35, 0x0A36 }, + { 0x0A38, 0x0A39 }, + { 0x0A59, 0x0A5C }, + { 0x0A72, 0x0A74 }, + { 0x0A85, 0x0A8D }, + { 0x0A8F, 0x0A91 }, + { 0x0A93, 0x0AA8 }, + { 0x0AAA, 0x0AB0 }, + { 0x0AB2, 0x0AB3 }, + { 0x0AB5, 0x0AB9 }, + { 0x0AE0, 0x0AE1 }, + { 0x0B05, 0x0B0C }, + { 0x0B0F, 0x0B10 }, + { 0x0B13, 0x0B28 }, + { 0x0B2A, 0x0B30 }, + { 0x0B32, 0x0B33 }, + { 0x0B35, 0x0B39 }, + { 0x0B5C, 0x0B5D }, + { 0x0B5F, 0x0B61 }, + { 0x0B85, 0x0B8A }, + { 0x0B8E, 0x0B90 }, + { 0x0B92, 0x0B95 }, + { 0x0B99, 0x0B9A }, + { 0x0B9E, 0x0B9F }, + { 0x0BA3, 0x0BA4 }, + { 0x0BA8, 0x0BAA }, + { 0x0BAE, 0x0BB9 }, + { 0x0C05, 0x0C0C }, + { 0x0C0E, 0x0C10 }, + { 0x0C12, 0x0C28 }, + { 0x0C2A, 0x0C39 }, + { 0x0C58, 0x0C5A }, + { 0x0C60, 0x0C61 }, + { 0x0C85, 0x0C8C }, + { 0x0C8E, 0x0C90 }, + { 0x0C92, 0x0CA8 }, + { 0x0CAA, 0x0CB3 }, + { 0x0CB5, 0x0CB9 }, + { 0x0CE0, 0x0CE1 }, + { 0x0CF1, 0x0CF2 }, + { 0x0D05, 0x0D0C }, + { 0x0D0E, 0x0D10 }, + { 0x0D12, 0x0D3A }, + { 0x0D54, 0x0D56 }, + { 0x0D5F, 0x0D61 }, + { 0x0D7A, 0x0D7F }, + { 0x0D85, 0x0D96 }, + { 0x0D9A, 0x0DB1 }, + { 0x0DB3, 0x0DBB }, + { 0x0DC0, 0x0DC6 }, + { 0x0E01, 0x0E30 }, + { 0x0E32, 0x0E33 }, + { 0x0E40, 0x0E46 }, + { 0x0E81, 0x0E82 }, + { 0x0E87, 0x0E88 }, + { 0x0E94, 0x0E97 }, + { 0x0E99, 0x0E9F }, + { 0x0EA1, 0x0EA3 }, + { 0x0EAA, 0x0EAB }, + { 0x0EAD, 0x0EB0 }, + { 0x0EB2, 0x0EB3 }, + { 0x0EC0, 0x0EC4 }, + { 0x0EDC, 0x0EDF }, + { 0x0F40, 0x0F47 }, + { 0x0F49, 0x0F6C }, + { 0x0F88, 0x0F8C }, + { 0x1000, 0x102A }, + { 0x1050, 0x1055 }, + { 0x105A, 0x105D }, + { 0x1065, 0x1066 }, + { 0x106E, 0x1070 }, + { 0x1075, 0x1081 }, + { 0x10A0, 0x10C5 }, + { 0x10D0, 0x10FA }, + { 0x10FC, 0x1248 }, + { 0x124A, 0x124D }, + { 0x1250, 0x1256 }, + { 0x125A, 0x125D }, + { 0x1260, 0x1288 }, + { 0x128A, 0x128D }, + { 0x1290, 0x12B0 }, + { 0x12B2, 0x12B5 }, + { 0x12B8, 0x12BE }, + { 0x12C2, 0x12C5 }, + { 0x12C8, 0x12D6 }, + { 0x12D8, 0x1310 }, + { 0x1312, 0x1315 }, + { 0x1318, 0x135A }, + { 0x1380, 0x138F }, + { 0x13A0, 0x13F5 }, + { 0x13F8, 0x13FD }, + { 0x1401, 0x166C }, + { 0x166F, 0x167F }, + { 0x1681, 0x169A }, + { 0x16A0, 0x16EA }, + { 0x16F1, 0x16F8 }, + { 0x1700, 0x170C }, + { 0x170E, 0x1711 }, + { 0x1720, 0x1731 }, + { 0x1740, 0x1751 }, + { 0x1760, 0x176C }, + { 0x176E, 0x1770 }, + { 0x1780, 0x17B3 }, + { 0x1820, 0x1877 }, + { 0x1880, 0x1884 }, + { 0x1887, 0x18A8 }, + { 0x18B0, 0x18F5 }, + { 0x1900, 0x191E }, + { 0x1950, 0x196D }, + { 0x1970, 0x1974 }, + { 0x1980, 0x19AB }, + { 0x19B0, 0x19C9 }, + { 0x1A00, 0x1A16 }, + { 0x1A20, 0x1A54 }, + { 0x1B05, 0x1B33 }, + { 0x1B45, 0x1B4B }, + { 0x1B83, 0x1BA0 }, + { 0x1BAE, 0x1BAF }, + { 0x1BBA, 0x1BE5 }, + { 0x1C00, 0x1C23 }, + { 0x1C4D, 0x1C4F }, + { 0x1C5A, 0x1C7D }, + { 0x1C80, 0x1C88 }, + { 0x1CE9, 0x1CEC }, + { 0x1CEE, 0x1CF1 }, + { 0x1CF5, 0x1CF6 }, + { 0x1D00, 0x1DBF }, + { 0x1E00, 0x1F15 }, + { 0x1F18, 0x1F1D }, + { 0x1F20, 0x1F45 }, + { 0x1F48, 0x1F4D }, + { 0x1F50, 0x1F57 }, + { 0x1F5F, 0x1F7D }, + { 0x1F80, 0x1FB4 }, + { 0x1FB6, 0x1FBC }, + { 0x1FC2, 0x1FC4 }, + { 0x1FC6, 0x1FCC }, + { 0x1FD0, 0x1FD3 }, + { 0x1FD6, 0x1FDB }, + { 0x1FE0, 0x1FEC }, + { 0x1FF2, 0x1FF4 }, + { 0x1FF6, 0x1FFC }, + { 0x2090, 0x209C }, + { 0x210A, 0x2113 }, + { 0x2119, 0x211D }, + { 0x212A, 0x212D }, + { 0x212F, 0x2139 }, + { 0x213C, 0x213F }, + { 0x2145, 0x2149 }, + { 0x2183, 0x2184 }, + { 0x2C00, 0x2C2E }, + { 0x2C30, 0x2C5E }, + { 0x2C60, 0x2CE4 }, + { 0x2CEB, 0x2CEE }, + { 0x2CF2, 0x2CF3 }, + { 0x2D00, 0x2D25 }, + { 0x2D30, 0x2D67 }, + { 0x2D80, 0x2D96 }, + { 0x2DA0, 0x2DA6 }, + { 0x2DA8, 0x2DAE }, + { 0x2DB0, 0x2DB6 }, + { 0x2DB8, 0x2DBE }, + { 0x2DC0, 0x2DC6 }, + { 0x2DC8, 0x2DCE }, + { 0x2DD0, 0x2DD6 }, + { 0x2DD8, 0x2DDE }, + { 0x3005, 0x3006 }, + { 0x3031, 0x3035 }, + { 0x303B, 0x303C }, + { 0x3041, 0x3096 }, + { 0x309D, 0x309F }, + { 0x30A1, 0x30FA }, + { 0x30FC, 0x30FF }, + { 0x3105, 0x312D }, + { 0x3131, 0x318E }, + { 0x31A0, 0x31BA }, + { 0x31F0, 0x31FF }, + { 0xA000, 0xA48C }, + { 0xA4D0, 0xA4FD }, + { 0xA500, 0xA60C }, + { 0xA610, 0xA61F }, + { 0xA62A, 0xA62B }, + { 0xA640, 0xA66E }, + { 0xA67F, 0xA69D }, + { 0xA6A0, 0xA6E5 }, + { 0xA717, 0xA71F }, + { 0xA722, 0xA788 }, + { 0xA78B, 0xA7AE }, + { 0xA7B0, 0xA7B7 }, + { 0xA7F7, 0xA801 }, + { 0xA803, 0xA805 }, + { 0xA807, 0xA80A }, + { 0xA80C, 0xA822 }, + { 0xA840, 0xA873 }, + { 0xA882, 0xA8B3 }, + { 0xA8F2, 0xA8F7 }, + { 0xA90A, 0xA925 }, + { 0xA930, 0xA946 }, + { 0xA960, 0xA97C }, + { 0xA984, 0xA9B2 }, + { 0xA9E0, 0xA9E4 }, + { 0xA9E6, 0xA9EF }, + { 0xA9FA, 0xA9FE }, + { 0xAA00, 0xAA28 }, + { 0xAA40, 0xAA42 }, + { 0xAA44, 0xAA4B }, + { 0xAA60, 0xAA76 }, + { 0xAA7E, 0xAAAF }, + { 0xAAB5, 0xAAB6 }, + { 0xAAB9, 0xAABD }, + { 0xAADB, 0xAADD }, + { 0xAAE0, 0xAAEA }, + { 0xAAF2, 0xAAF4 }, + { 0xAB01, 0xAB06 }, + { 0xAB09, 0xAB0E }, + { 0xAB11, 0xAB16 }, + { 0xAB20, 0xAB26 }, + { 0xAB28, 0xAB2E }, + { 0xAB30, 0xAB5A }, + { 0xAB5C, 0xAB65 }, + { 0xAB70, 0xABE2 }, + { 0xD7B0, 0xD7C6 }, + { 0xD7CB, 0xD7FB }, + { 0xF900, 0xFA6D }, + { 0xFA70, 0xFAD9 }, + { 0xFB00, 0xFB06 }, + { 0xFB13, 0xFB17 }, + { 0xFB1F, 0xFB28 }, + { 0xFB2A, 0xFB36 }, + { 0xFB38, 0xFB3C }, + { 0xFB40, 0xFB41 }, + { 0xFB43, 0xFB44 }, + { 0xFB46, 0xFBB1 }, + { 0xFBD3, 0xFD3D }, + { 0xFD50, 0xFD8F }, + { 0xFD92, 0xFDC7 }, + { 0xFDF0, 0xFDFB }, + { 0xFE70, 0xFE74 }, + { 0xFE76, 0xFEFC }, + { 0xFF21, 0xFF3A }, + { 0xFF41, 0xFF5A }, + { 0xFF66, 0xFFBE }, + { 0xFFC2, 0xFFC7 }, + { 0xFFCA, 0xFFCF }, + { 0xFFD2, 0xFFD7 }, + { 0xFFDA, 0xFFDC }, + { 0x10000, 0x1000B }, + { 0x1000D, 0x10026 }, + { 0x10028, 0x1003A }, + { 0x1003C, 0x1003D }, + { 0x1003F, 0x1004D }, + { 0x10050, 0x1005D }, + { 0x10080, 0x100FA }, + { 0x10280, 0x1029C }, + { 0x102A0, 0x102D0 }, + { 0x10300, 0x1031F }, + { 0x10330, 0x10340 }, + { 0x10342, 0x10349 }, + { 0x10350, 0x10375 }, + { 0x10380, 0x1039D }, + { 0x103A0, 0x103C3 }, + { 0x103C8, 0x103CF }, + { 0x10400, 0x1049D }, + { 0x104B0, 0x104D3 }, + { 0x104D8, 0x104FB }, + { 0x10500, 0x10527 }, + { 0x10530, 0x10563 }, + { 0x10600, 0x10736 }, + { 0x10740, 0x10755 }, + { 0x10760, 0x10767 }, + { 0x10800, 0x10805 }, + { 0x1080A, 0x10835 }, + { 0x10837, 0x10838 }, + { 0x1083F, 0x10855 }, + { 0x10860, 0x10876 }, + { 0x10880, 0x1089E }, + { 0x108E0, 0x108F2 }, + { 0x108F4, 0x108F5 }, + { 0x10900, 0x10915 }, + { 0x10920, 0x10939 }, + { 0x10980, 0x109B7 }, + { 0x109BE, 0x109BF }, + { 0x10A10, 0x10A13 }, + { 0x10A15, 0x10A17 }, + { 0x10A19, 0x10A33 }, + { 0x10A60, 0x10A7C }, + { 0x10A80, 0x10A9C }, + { 0x10AC0, 0x10AC7 }, + { 0x10AC9, 0x10AE4 }, + { 0x10B00, 0x10B35 }, + { 0x10B40, 0x10B55 }, + { 0x10B60, 0x10B72 }, + { 0x10B80, 0x10B91 }, + { 0x10C00, 0x10C48 }, + { 0x10C80, 0x10CB2 }, + { 0x10CC0, 0x10CF2 }, + { 0x11003, 0x11037 }, + { 0x11083, 0x110AF }, + { 0x110D0, 0x110E8 }, + { 0x11103, 0x11126 }, + { 0x11150, 0x11172 }, + { 0x11183, 0x111B2 }, + { 0x111C1, 0x111C4 }, + { 0x11200, 0x11211 }, + { 0x11213, 0x1122B }, + { 0x11280, 0x11286 }, + { 0x1128A, 0x1128D }, + { 0x1128F, 0x1129D }, + { 0x1129F, 0x112A8 }, + { 0x112B0, 0x112DE }, + { 0x11305, 0x1130C }, + { 0x1130F, 0x11310 }, + { 0x11313, 0x11328 }, + { 0x1132A, 0x11330 }, + { 0x11332, 0x11333 }, + { 0x11335, 0x11339 }, + { 0x1135D, 0x11361 }, + { 0x11400, 0x11434 }, + { 0x11447, 0x1144A }, + { 0x11480, 0x114AF }, + { 0x114C4, 0x114C5 }, + { 0x11580, 0x115AE }, + { 0x115D8, 0x115DB }, + { 0x11600, 0x1162F }, + { 0x11680, 0x116AA }, + { 0x11700, 0x11719 }, + { 0x118A0, 0x118DF }, + { 0x11AC0, 0x11AF8 }, + { 0x11C00, 0x11C08 }, + { 0x11C0A, 0x11C2E }, + { 0x11C72, 0x11C8F }, + { 0x12000, 0x12399 }, + { 0x12480, 0x12543 }, + { 0x13000, 0x1342E }, + { 0x14400, 0x14646 }, + { 0x16800, 0x16A38 }, + { 0x16A40, 0x16A5E }, + { 0x16AD0, 0x16AED }, + { 0x16B00, 0x16B2F }, + { 0x16B40, 0x16B43 }, + { 0x16B63, 0x16B77 }, + { 0x16B7D, 0x16B8F }, + { 0x16F00, 0x16F44 }, + { 0x16F93, 0x16F9F }, + { 0x18800, 0x18AF2 }, + { 0x1B000, 0x1B001 }, + { 0x1BC00, 0x1BC6A }, + { 0x1BC70, 0x1BC7C }, + { 0x1BC80, 0x1BC88 }, + { 0x1BC90, 0x1BC99 }, + { 0x1D400, 0x1D454 }, + { 0x1D456, 0x1D49C }, + { 0x1D49E, 0x1D49F }, + { 0x1D4A5, 0x1D4A6 }, + { 0x1D4A9, 0x1D4AC }, + { 0x1D4AE, 0x1D4B9 }, + { 0x1D4BD, 0x1D4C3 }, + { 0x1D4C5, 0x1D505 }, + { 0x1D507, 0x1D50A }, + { 0x1D50D, 0x1D514 }, + { 0x1D516, 0x1D51C }, + { 0x1D51E, 0x1D539 }, + { 0x1D53B, 0x1D53E }, + { 0x1D540, 0x1D544 }, + { 0x1D54A, 0x1D550 }, + { 0x1D552, 0x1D6A5 }, + { 0x1D6A8, 0x1D6C0 }, + { 0x1D6C2, 0x1D6DA }, + { 0x1D6DC, 0x1D6FA }, + { 0x1D6FC, 0x1D714 }, + { 0x1D716, 0x1D734 }, + { 0x1D736, 0x1D74E }, + { 0x1D750, 0x1D76E }, + { 0x1D770, 0x1D788 }, + { 0x1D78A, 0x1D7A8 }, + { 0x1D7AA, 0x1D7C2 }, + { 0x1D7C4, 0x1D7CB }, + { 0x1E800, 0x1E8C4 }, + { 0x1E900, 0x1E943 }, + { 0x1EE00, 0x1EE03 }, + { 0x1EE05, 0x1EE1F }, + { 0x1EE21, 0x1EE22 }, + { 0x1EE29, 0x1EE32 }, + { 0x1EE34, 0x1EE37 }, + { 0x1EE4D, 0x1EE4F }, + { 0x1EE51, 0x1EE52 }, + { 0x1EE61, 0x1EE62 }, + { 0x1EE67, 0x1EE6A }, + { 0x1EE6C, 0x1EE72 }, + { 0x1EE74, 0x1EE77 }, + { 0x1EE79, 0x1EE7C }, + { 0x1EE80, 0x1EE89 }, + { 0x1EE8B, 0x1EE9B }, + { 0x1EEA1, 0x1EEA3 }, + { 0x1EEA5, 0x1EEA9 }, + { 0x1EEAB, 0x1EEBB }, + { 0x2F800, 0x2FA1D }, +}; + +static Rune alpha1[] = { + 0x00AA, + 0x00B5, + 0x00BA, + 0x0559, + 0x06FF, + 0x07B1, + 0x07FA, + 0x081A, + 0x0824, + 0x0828, + 0x093D, + 0x0950, + 0x09BD, + 0x09CE, + 0x0ABD, + 0x0AD0, + 0x0AF9, + 0x0B3D, + 0x0B71, + 0x0BD0, + 0x0C3D, + 0x0C80, + 0x0CBD, + 0x0D3D, + 0x0D4E, + 0x0E8D, + 0x0EBD, + 0x0F00, + 0x103F, + 0x1061, + 0x108E, + 0x10CD, + 0x17D7, + 0x17DC, + 0x1AA7, + 0x2071, + 0x207F, + 0x2102, + 0x2107, + 0x214E, + 0x2D2D, + 0x2D6F, + 0x2E2F, + 0x3400, + 0x4DB5, + 0x4E00, + 0x9FD5, + 0xA9CF, + 0xAA7A, + 0xAC00, + 0xD7A3, + 0x1083C, + 0x10A00, + 0x11176, + 0x1133D, + 0x11350, + 0x11644, + 0x118FF, + 0x11C40, + 0x16F50, + 0x16FE0, + 0x17000, + 0x187EC, + 0x1D4A2, + 0x1EE42, + 0x20000, + 0x2A6D6, + 0x2A700, + 0x2B734, + 0x2B740, + 0x2B81D, + 0x2B820, + 0x2CEA1, +}; + +int +isalpharune(Rune r) +{ + Rune *match; + + if((match = bsearch(&r, alpha3, nelem(alpha3), sizeof *alpha3, &rune2cmp))) + return !((r - match[0]) % 2); + if(bsearch(&r, alpha2, nelem(alpha2), sizeof *alpha2, &rune2cmp)) + return 1; + if(bsearch(&r, alpha1, nelem(alpha1), sizeof *alpha1, &rune1cmp)) + return 1; + return 0; +} diff --git a/libutf/isblankrune.c b/libutf/isblankrune.c new file mode 100644 index 0000000..7cf9159 --- /dev/null +++ b/libutf/isblankrune.c _AT_@ -0,0 +1,9 @@ +/* Automatically generated by mkrunetype.awk */ +#include "../utf.h" +#include "runetype.h" + +int +isblankrune(Rune r) +{ + return r == ' ' || r == '\t'; +} diff --git a/libutf/iscntrlrune.c b/libutf/iscntrlrune.c new file mode 100644 index 0000000..286dce8 --- /dev/null +++ b/libutf/iscntrlrune.c _AT_@ -0,0 +1,18 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune cntrl2[][2] = { + { 0x0000, 0x001F }, + { 0x007F, 0x009F }, +}; + +int +iscntrlrune(Rune r) +{ + if(bsearch(&r, cntrl2, nelem(cntrl2), sizeof *cntrl2, &rune2cmp)) + return 1; + return 0; +} diff --git a/libutf/isdigitrune.c b/libutf/isdigitrune.c new file mode 100644 index 0000000..d48bcca --- /dev/null +++ b/libutf/isdigitrune.c _AT_@ -0,0 +1,70 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune digit2[][2] = { + { 0x0030, 0x0039 }, + { 0x0660, 0x0669 }, + { 0x06F0, 0x06F9 }, + { 0x07C0, 0x07C9 }, + { 0x0966, 0x096F }, + { 0x09E6, 0x09EF }, + { 0x0A66, 0x0A6F }, + { 0x0AE6, 0x0AEF }, + { 0x0B66, 0x0B6F }, + { 0x0BE6, 0x0BEF }, + { 0x0C66, 0x0C6F }, + { 0x0CE6, 0x0CEF }, + { 0x0D66, 0x0D6F }, + { 0x0DE6, 0x0DEF }, + { 0x0E50, 0x0E59 }, + { 0x0ED0, 0x0ED9 }, + { 0x0F20, 0x0F29 }, + { 0x1040, 0x1049 }, + { 0x1090, 0x1099 }, + { 0x17E0, 0x17E9 }, + { 0x1810, 0x1819 }, + { 0x1946, 0x194F }, + { 0x19D0, 0x19D9 }, + { 0x1A80, 0x1A89 }, + { 0x1A90, 0x1A99 }, + { 0x1B50, 0x1B59 }, + { 0x1BB0, 0x1BB9 }, + { 0x1C40, 0x1C49 }, + { 0x1C50, 0x1C59 }, + { 0xA620, 0xA629 }, + { 0xA8D0, 0xA8D9 }, + { 0xA900, 0xA909 }, + { 0xA9D0, 0xA9D9 }, + { 0xA9F0, 0xA9F9 }, + { 0xAA50, 0xAA59 }, + { 0xABF0, 0xABF9 }, + { 0xFF10, 0xFF19 }, + { 0x104A0, 0x104A9 }, + { 0x11066, 0x1106F }, + { 0x110F0, 0x110F9 }, + { 0x11136, 0x1113F }, + { 0x111D0, 0x111D9 }, + { 0x112F0, 0x112F9 }, + { 0x11450, 0x11459 }, + { 0x114D0, 0x114D9 }, + { 0x11650, 0x11659 }, + { 0x116C0, 0x116C9 }, + { 0x11730, 0x11739 }, + { 0x118E0, 0x118E9 }, + { 0x11C50, 0x11C59 }, + { 0x16A60, 0x16A69 }, + { 0x16B50, 0x16B59 }, + { 0x1D7CE, 0x1D7FF }, + { 0x1E950, 0x1E959 }, +}; + +int +isdigitrune(Rune r) +{ + if(bsearch(&r, digit2, nelem(digit2), sizeof *digit2, &rune2cmp)) + return 1; + return 0; +} diff --git a/libutf/isgraphrune.c b/libutf/isgraphrune.c new file mode 100644 index 0000000..08770f6 --- /dev/null +++ b/libutf/isgraphrune.c _AT_@ -0,0 +1,9 @@ +/* Automatically generated by mkrunetype.awk */ +#include "../utf.h" +#include "runetype.h" + +int +isgraphrune(Rune r) +{ + return !isspacerune(r) && isprintrune(r); +} diff --git a/libutf/isprintrune.c b/libutf/isprintrune.c new file mode 100644 index 0000000..f6e2fa4 --- /dev/null +++ b/libutf/isprintrune.c _AT_@ -0,0 +1,10 @@ +/* Automatically generated by mkrunetype.awk */ +#include "../utf.h" +#include "runetype.h" + +int +isprintrune(Rune r) +{ + return !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) && + ((r < 0xFFF9) || (r > 0xFFFB)); +} diff --git a/libutf/ispunctrune.c b/libutf/ispunctrune.c new file mode 100644 index 0000000..d73cb25 --- /dev/null +++ b/libutf/ispunctrune.c _AT_@ -0,0 +1,9 @@ +/* Automatically generated by mkrunetype.awk */ +#include "../utf.h" +#include "runetype.h" + +int +ispunctrune(Rune r) +{ + return isgraphrune(r) && !isalnumrune(r); +} diff --git a/libutf/isspacerune.c b/libutf/isspacerune.c new file mode 100644 index 0000000..bb8fe28 --- /dev/null +++ b/libutf/isspacerune.c _AT_@ -0,0 +1,31 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune space2[][2] = { + { 0x0009, 0x000D }, + { 0x001C, 0x0020 }, + { 0x2000, 0x200A }, + { 0x2028, 0x2029 }, +}; + +static Rune space1[] = { + 0x0085, + 0x00A0, + 0x1680, + 0x202F, + 0x205F, + 0x3000, +}; + +int +isspacerune(Rune r) +{ + if(bsearch(&r, space2, nelem(space2), sizeof *space2, &rune2cmp)) + return 1; + if(bsearch(&r, space1, nelem(space1), sizeof *space1, &rune1cmp)) + return 1; + return 0; +} diff --git a/libutf/istitlerune.c b/libutf/istitlerune.c new file mode 100644 index 0000000..211a4aa --- /dev/null +++ b/libutf/istitlerune.c _AT_@ -0,0 +1,31 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune title2[][2] = { + { 0x1F88, 0x1F8F }, + { 0x1F98, 0x1F9F }, + { 0x1FA8, 0x1FAF }, +}; + +static Rune title1[] = { + 0x01C5, + 0x01C8, + 0x01CB, + 0x01F2, + 0x1FBC, + 0x1FCC, + 0x1FFC, +}; + +int +istitlerune(Rune r) +{ + if(bsearch(&r, title2, nelem(title2), sizeof *title2, &rune2cmp)) + return 1; + if(bsearch(&r, title1, nelem(title1), sizeof *title1, &rune1cmp)) + return 1; + return 0; +} diff --git a/libutf/isxdigitrune.c b/libutf/isxdigitrune.c new file mode 100644 index 0000000..0797240 --- /dev/null +++ b/libutf/isxdigitrune.c _AT_@ -0,0 +1,9 @@ +/* Automatically generated by mkrunetype.awk */ +#include "../utf.h" +#include "runetype.h" + +int +isxdigitrune(Rune r) +{ + return (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6); +} diff --git a/libutf/lowerrune.c b/libutf/lowerrune.c new file mode 100644 index 0000000..a6d0485 --- /dev/null +++ b/libutf/lowerrune.c _AT_@ -0,0 +1,334 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune lower4[][2] = { + { 0x0101, 0x012F }, + { 0x0133, 0x0137 }, + { 0x013A, 0x0148 }, + { 0x014B, 0x0177 }, + { 0x017A, 0x017E }, + { 0x0183, 0x0185 }, + { 0x01A1, 0x01A5 }, + { 0x01B4, 0x01B6 }, + { 0x01CE, 0x01DC }, + { 0x01DF, 0x01EF }, + { 0x01F9, 0x021F }, + { 0x0223, 0x0233 }, + { 0x0247, 0x024F }, + { 0x0371, 0x0373 }, + { 0x03D9, 0x03EF }, + { 0x0461, 0x0481 }, + { 0x048B, 0x04BF }, + { 0x04C2, 0x04CE }, + { 0x04D1, 0x052F }, + { 0x1E01, 0x1E95 }, + { 0x1EA1, 0x1EFF }, + { 0x2C68, 0x2C6C }, + { 0x2C81, 0x2CE3 }, + { 0x2CEC, 0x2CEE }, + { 0xA641, 0xA66D }, + { 0xA681, 0xA69B }, + { 0xA723, 0xA72F }, + { 0xA733, 0xA76F }, + { 0xA77A, 0xA77C }, + { 0xA77F, 0xA787 }, + { 0xA791, 0xA793 }, + { 0xA797, 0xA7A9 }, + { 0xA7B5, 0xA7B7 }, +}; + +static Rune lower2[][3] = { + { 0x0061, 0x007A, 0x0041 }, + { 0x00E0, 0x00F6, 0x00C0 }, + { 0x00F8, 0x00FE, 0x00D8 }, + { 0x01AA, 0x01AB, 0x01AA }, + { 0x0234, 0x0239, 0x0234 }, + { 0x023F, 0x0240, 0x2C7E }, + { 0x0256, 0x0257, 0x0189 }, + { 0x025D, 0x025F, 0x025D }, + { 0x026D, 0x026E, 0x026D }, + { 0x0273, 0x0274, 0x0273 }, + { 0x0276, 0x027C, 0x0276 }, + { 0x027E, 0x027F, 0x027E }, + { 0x0281, 0x0282, 0x0281 }, + { 0x0284, 0x0286, 0x0284 }, + { 0x028A, 0x028B, 0x01B1 }, + { 0x028D, 0x0291, 0x028D }, + { 0x0295, 0x029C, 0x0295 }, + { 0x029F, 0x02AF, 0x029F }, + { 0x037B, 0x037D, 0x03FD }, + { 0x03AD, 0x03AF, 0x0388 }, + { 0x03B1, 0x03C1, 0x0391 }, + { 0x03C3, 0x03CB, 0x03A3 }, + { 0x03CD, 0x03CE, 0x038E }, + { 0x0430, 0x044F, 0x0410 }, + { 0x0450, 0x045F, 0x0400 }, + { 0x0561, 0x0586, 0x0531 }, + { 0x13F8, 0x13FD, 0x13F0 }, + { 0x1C83, 0x1C84, 0x0421 }, + { 0x1D00, 0x1D2B, 0x1D00 }, + { 0x1D6B, 0x1D77, 0x1D6B }, + { 0x1D7A, 0x1D7C, 0x1D7A }, + { 0x1D7E, 0x1D9A, 0x1D7E }, + { 0x1E96, 0x1E9A, 0x1E96 }, + { 0x1E9C, 0x1E9D, 0x1E9C }, + { 0x1F00, 0x1F07, 0x1F08 }, + { 0x1F10, 0x1F15, 0x1F18 }, + { 0x1F20, 0x1F27, 0x1F28 }, + { 0x1F30, 0x1F37, 0x1F38 }, + { 0x1F40, 0x1F45, 0x1F48 }, + { 0x1F60, 0x1F67, 0x1F68 }, + { 0x1F70, 0x1F71, 0x1FBA }, + { 0x1F72, 0x1F75, 0x1FC8 }, + { 0x1F76, 0x1F77, 0x1FDA }, + { 0x1F78, 0x1F79, 0x1FF8 }, + { 0x1F7A, 0x1F7B, 0x1FEA }, + { 0x1F7C, 0x1F7D, 0x1FFA }, + { 0x1F80, 0x1F87, 0x1F88 }, + { 0x1F90, 0x1F97, 0x1F98 }, + { 0x1FA0, 0x1FA7, 0x1FA8 }, + { 0x1FB0, 0x1FB1, 0x1FB8 }, + { 0x1FB6, 0x1FB7, 0x1FB6 }, + { 0x1FC6, 0x1FC7, 0x1FC6 }, + { 0x1FD0, 0x1FD1, 0x1FD8 }, + { 0x1FD2, 0x1FD3, 0x1FD2 }, + { 0x1FD6, 0x1FD7, 0x1FD6 }, + { 0x1FE0, 0x1FE1, 0x1FE8 }, + { 0x1FE2, 0x1FE4, 0x1FE2 }, + { 0x1FE6, 0x1FE7, 0x1FE6 }, + { 0x1FF6, 0x1FF7, 0x1FF6 }, + { 0x210E, 0x210F, 0x210E }, + { 0x213C, 0x213D, 0x213C }, + { 0x2146, 0x2149, 0x2146 }, + { 0x2C30, 0x2C5E, 0x2C00 }, + { 0x2C77, 0x2C7B, 0x2C77 }, + { 0x2D00, 0x2D25, 0x10A0 }, + { 0xA730, 0xA731, 0xA730 }, + { 0xA771, 0xA778, 0xA771 }, + { 0xA794, 0xA795, 0xA794 }, + { 0xAB30, 0xAB52, 0xAB30 }, + { 0xAB54, 0xAB5A, 0xAB54 }, + { 0xAB60, 0xAB65, 0xAB60 }, + { 0xAB70, 0xABBF, 0x13A0 }, + { 0xFB00, 0xFB06, 0xFB00 }, + { 0xFB13, 0xFB17, 0xFB13 }, + { 0xFF41, 0xFF5A, 0xFF21 }, + { 0x10428, 0x1044F, 0x10400 }, + { 0x104D8, 0x104FB, 0x104B0 }, + { 0x10CC0, 0x10CF2, 0x10C80 }, + { 0x118C0, 0x118DF, 0x118A0 }, + { 0x1D41A, 0x1D433, 0x1D41A }, + { 0x1D44E, 0x1D454, 0x1D44E }, + { 0x1D456, 0x1D467, 0x1D456 }, + { 0x1D482, 0x1D49B, 0x1D482 }, + { 0x1D4B6, 0x1D4B9, 0x1D4B6 }, + { 0x1D4BD, 0x1D4C3, 0x1D4BD }, + { 0x1D4C5, 0x1D4CF, 0x1D4C5 }, + { 0x1D4EA, 0x1D503, 0x1D4EA }, + { 0x1D51E, 0x1D537, 0x1D51E }, + { 0x1D552, 0x1D56B, 0x1D552 }, + { 0x1D586, 0x1D59F, 0x1D586 }, + { 0x1D5BA, 0x1D5D3, 0x1D5BA }, + { 0x1D5EE, 0x1D607, 0x1D5EE }, + { 0x1D622, 0x1D63B, 0x1D622 }, + { 0x1D656, 0x1D66F, 0x1D656 }, + { 0x1D68A, 0x1D6A5, 0x1D68A }, + { 0x1D6C2, 0x1D6DA, 0x1D6C2 }, + { 0x1D6DC, 0x1D6E1, 0x1D6DC }, + { 0x1D6FC, 0x1D714, 0x1D6FC }, + { 0x1D716, 0x1D71B, 0x1D716 }, + { 0x1D736, 0x1D74E, 0x1D736 }, + { 0x1D750, 0x1D755, 0x1D750 }, + { 0x1D770, 0x1D788, 0x1D770 }, + { 0x1D78A, 0x1D78F, 0x1D78A }, + { 0x1D7AA, 0x1D7C2, 0x1D7AA }, + { 0x1D7C4, 0x1D7C9, 0x1D7C4 }, + { 0x1E922, 0x1E943, 0x1E900 }, +}; + +static Rune lower1[][2] = { + { 0x00B5, 0x039C }, + { 0x00DF, 0x00DF }, + { 0x00FF, 0x0178 }, + { 0x0131, 0x0049 }, + { 0x0138, 0x0138 }, + { 0x0149, 0x0149 }, + { 0x017F, 0x0053 }, + { 0x0180, 0x0243 }, + { 0x0188, 0x0187 }, + { 0x018C, 0x018B }, + { 0x018D, 0x018D }, + { 0x0192, 0x0191 }, + { 0x0195, 0x01F6 }, + { 0x0199, 0x0198 }, + { 0x019A, 0x023D }, + { 0x019B, 0x019B }, + { 0x019E, 0x0220 }, + { 0x01A8, 0x01A7 }, + { 0x01AD, 0x01AC }, + { 0x01B0, 0x01AF }, + { 0x01B9, 0x01B8 }, + { 0x01BA, 0x01BA }, + { 0x01BD, 0x01BC }, + { 0x01BE, 0x01BE }, + { 0x01BF, 0x01F7 }, + { 0x01C6, 0x01C4 }, + { 0x01C9, 0x01C7 }, + { 0x01CC, 0x01CA }, + { 0x01DD, 0x018E }, + { 0x01F0, 0x01F0 }, + { 0x01F3, 0x01F1 }, + { 0x01F5, 0x01F4 }, + { 0x0221, 0x0221 }, + { 0x023C, 0x023B }, + { 0x0242, 0x0241 }, + { 0x0250, 0x2C6F }, + { 0x0251, 0x2C6D }, + { 0x0252, 0x2C70 }, + { 0x0253, 0x0181 }, + { 0x0254, 0x0186 }, + { 0x0255, 0x0255 }, + { 0x0258, 0x0258 }, + { 0x0259, 0x018F }, + { 0x025A, 0x025A }, + { 0x025B, 0x0190 }, + { 0x025C, 0xA7AB }, + { 0x0260, 0x0193 }, + { 0x0261, 0xA7AC }, + { 0x0262, 0x0262 }, + { 0x0263, 0x0194 }, + { 0x0264, 0x0264 }, + { 0x0265, 0xA78D }, + { 0x0266, 0xA7AA }, + { 0x0267, 0x0267 }, + { 0x0268, 0x0197 }, + { 0x0269, 0x0196 }, + { 0x026A, 0xA7AE }, + { 0x026B, 0x2C62 }, + { 0x026C, 0xA7AD }, + { 0x026F, 0x019C }, + { 0x0270, 0x0270 }, + { 0x0271, 0x2C6E }, + { 0x0272, 0x019D }, + { 0x0275, 0x019F }, + { 0x027D, 0x2C64 }, + { 0x0280, 0x01A6 }, + { 0x0283, 0x01A9 }, + { 0x0287, 0xA7B1 }, + { 0x0288, 0x01AE }, + { 0x0289, 0x0244 }, + { 0x028C, 0x0245 }, + { 0x0292, 0x01B7 }, + { 0x0293, 0x0293 }, + { 0x029D, 0xA7B2 }, + { 0x029E, 0xA7B0 }, + { 0x0377, 0x0376 }, + { 0x0390, 0x0390 }, + { 0x03AC, 0x0386 }, + { 0x03B0, 0x03B0 }, + { 0x03C2, 0x03A3 }, + { 0x03CC, 0x038C }, + { 0x03D0, 0x0392 }, + { 0x03D1, 0x0398 }, + { 0x03D5, 0x03A6 }, + { 0x03D6, 0x03A0 }, + { 0x03D7, 0x03CF }, + { 0x03F0, 0x039A }, + { 0x03F1, 0x03A1 }, + { 0x03F2, 0x03F9 }, + { 0x03F3, 0x037F }, + { 0x03F5, 0x0395 }, + { 0x03F8, 0x03F7 }, + { 0x03FB, 0x03FA }, + { 0x03FC, 0x03FC }, + { 0x04CF, 0x04C0 }, + { 0x0587, 0x0587 }, + { 0x1C80, 0x0412 }, + { 0x1C81, 0x0414 }, + { 0x1C82, 0x041E }, + { 0x1C85, 0x0422 }, + { 0x1C86, 0x042A }, + { 0x1C87, 0x0462 }, + { 0x1C88, 0xA64A }, + { 0x1D79, 0xA77D }, + { 0x1D7D, 0x2C63 }, + { 0x1E9B, 0x1E60 }, + { 0x1E9F, 0x1E9F }, + { 0x1F50, 0x1F50 }, + { 0x1F51, 0x1F59 }, + { 0x1F52, 0x1F52 }, + { 0x1F53, 0x1F5B }, + { 0x1F54, 0x1F54 }, + { 0x1F55, 0x1F5D }, + { 0x1F56, 0x1F56 }, + { 0x1F57, 0x1F5F }, + { 0x1FB2, 0x1FB2 }, + { 0x1FB3, 0x1FBC }, + { 0x1FB4, 0x1FB4 }, + { 0x1FBE, 0x0399 }, + { 0x1FC2, 0x1FC2 }, + { 0x1FC3, 0x1FCC }, + { 0x1FC4, 0x1FC4 }, + { 0x1FE5, 0x1FEC }, + { 0x1FF2, 0x1FF2 }, + { 0x1FF3, 0x1FFC }, + { 0x1FF4, 0x1FF4 }, + { 0x210A, 0x210A }, + { 0x2113, 0x2113 }, + { 0x212F, 0x212F }, + { 0x2134, 0x2134 }, + { 0x2139, 0x2139 }, + { 0x214E, 0x2132 }, + { 0x2184, 0x2183 }, + { 0x2C61, 0x2C60 }, + { 0x2C65, 0x023A }, + { 0x2C66, 0x023E }, + { 0x2C71, 0x2C71 }, + { 0x2C73, 0x2C72 }, + { 0x2C74, 0x2C74 }, + { 0x2C76, 0x2C75 }, + { 0x2CE4, 0x2CE4 }, + { 0x2CF3, 0x2CF2 }, + { 0x2D27, 0x10C7 }, + { 0x2D2D, 0x10CD }, + { 0xA78C, 0xA78B }, + { 0xA78E, 0xA78E }, + { 0xA7FA, 0xA7FA }, + { 0xAB53, 0xA7B3 }, + { 0x1D4BB, 0x1D4BB }, + { 0x1D7CB, 0x1D7CB }, +}; + +int +islowerrune(Rune r) +{ + Rune *match; + + if((match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp))) + return !((r - match[0]) % 2); + if(bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp)) + return 1; + if(bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp)) + return 1; + return 0; +} + +int +toupperrune(Rune r) +{ + Rune *match; + + match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp); + if (match) + return ((r - match[0]) % 2) ? r : r - 1; + match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp); + if (match) + return match[2] + (r - match[0]); + match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp); + if (match) + return match[1]; + return r; +} diff --git a/libutf/mkrunetype.awk b/libutf/mkrunetype.awk new file mode 100644 index 0000000..3736e78 --- /dev/null +++ b/libutf/mkrunetype.awk _AT_@ -0,0 +1,240 @@ +# See LICENSE file for copyright and license details. + +BEGIN { + FS = ";" + # set up hexadecimal lookup table + for(i = 0; i < 16; i++) + hex[sprintf("%X",i)] = i; + HEADER = "/* Automatically generated by mkrunetype.awk */\n#include <stdlib.h>\n\n#include \"../utf.h\"\n#include \"runetype.h\"\n" + HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n" +} + +$3 ~ /^L/ { alphav[alphac++] = $1; } +($3 ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; } +$3 == "Cc" { cntrlv[cntrlc++] = $1; } +$3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; } +$3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; } +$3 == "Lt" { titlev[titlec++] = $1; } +$3 == "Nd" { digitv[digitc++] = $1; } + +END { + system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c"); + + mkis("alpha", alphav, alphac, "isalpharune.c", q, ""); + mkis("space", spacev, spacec, "isspacerune.c", q, ""); + mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, ""); + mkis("upper", upperv, upperc, "upperrune.c", tolowerv, "lower"); + mkis("lower", lowerv, lowerc, "lowerrune.c", toupperv, "upper"); + mkis("title", titlev, titlec, "istitlerune.c", q, ""); + mkis("digit", digitv, digitc, "isdigitrune.c", q, ""); + + system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c"); + + otheris(); +} + +# parse hexadecimal rune index to int +function code(s) { + x = 0; + for(i = 1; i <= length(s); i++) { + c = substr(s, i, 1); + x = (x*16) + hex[c]; + } + return x; +} + +# generate 'is<name>rune' unicode lookup function +function mkis(name, runev, runec, file, casev, casename) { + rune1c = 0; + rune2c = 0; + rune3c = 0; + rune4c = 0; + mode = 1; + + #sort rune groups into singletons, ranges and laces + for(j = 0; j < runec; j++) { + # range + if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) || + code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) { + if (mode == 2) { + continue; + } else if (mode == 3) { + rune3v1[rune3c] = runev[j]; + rune3c++; + } else if (mode == 4) { + rune4v1[rune4c] = runev[j]; + rune4c++; + } + mode = 2; + rune2v0[rune2c] = runev[j]; + if(length(casev) > 0) { + case2v[rune2c] = casev[j]; + } + continue; + } + # lace 1 + if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || + (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) && + j+1 < runec) { + if (mode == 3) { + continue; + } else if (mode == 2) { + rune2v1[rune2c] = runev[j]; + rune2c++; + } else if (mode == 4) { + rune4v1[rune2c] = runev[j]; + rune4c++; + } + mode = 3; + rune3v0[rune3c] = runev[j]; + continue; + } + # lace 2 + if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || + (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) && + j+1 < runec) { + if (mode == 4) { + continue; + } else if (mode == 2) { + rune2v1[rune2c] = runev[j]; + rune2c++; + } else if (mode == 3) { + rune3v1[rune2c] = runev[j]; + rune3c++; + } + mode = 4; + rune4v0[rune4c] = runev[j]; + continue; + } + # terminating case + if (mode == 1) { + rune1v[rune1c] = runev[j]; + if (length(casev) > 0) { + case1v[rune1c] = casev[j]; + } + rune1c++; + } else if (mode == 2) { + rune2v1[rune2c] = runev[j]; + rune2c++; + } else if (mode == 3) { + rune3v1[rune3c] = runev[j]; + rune3c++; + } else { #lace 2 + rune4v1[rune4c] = runev[j]; + rune4c++; + } + mode = 1; + } + print HEADER > file; + + #generate list of laces 1 + if(rune3c > 0) { + print "static Rune "name"3[][2] = {" > file; + for(j = 0; j < rune3c; j++) { + print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file; + } + print "};\n" > file; + } + + #generate list of laces 2 + if(rune4c > 0) { + print "static Rune "name"4[][2] = {" > file; + for(j = 0; j < rune4c; j++) { + print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file; + } + print "};\n" > file; + } + + # generate list of ranges + if(rune2c > 0) { + if(length(casev) > 0) { + print "static Rune "name"2[][3] = {" > file; + for(j = 0; j < rune2c; j++) { + print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file; + } + } else { + print "static Rune "name"2[][2] = {" > file + for(j = 0; j < rune2c; j++) { + print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file; + } + } + print "};\n" > file; + } + + # generate list of singletons + if(rune1c > 0) { + if(length(casev) > 0) { + print "static Rune "name"1[][2] = {" > file; + for(j = 0; j < rune1c; j++) { + print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file; + } + } else { + print "static Rune "name"1[] = {" > file; + for(j = 0; j < rune1c; j++) { + print "\t0x"rune1v[j]"," > file; + } + } + print "};\n" > file; + } + # generate lookup function + print "int\nis"name"rune(Rune r)\n{" > file; + if(rune4c > 0 || rune3c > 0) + print "\tRune *match;\n" > file; + if(rune4c > 0) { + print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file; + print "\t\treturn !((r - match[0]) % 2);" > file; + } + if(rune3c > 0) { + print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file; + print "\t\treturn !((r - match[0]) % 2);" > file; + } + if(rune2c > 0) { + print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file; + } + if(rune1c > 0) { + print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file; + } + print "\treturn 0;\n}" > file; + + # generate case conversion function + if(length(casev) > 0) { + print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file; + if(rune4c > 0) { + print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file; + } + if(rune3c > 0) { + print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file; + } + if(rune2c > 0) { + print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn match[2] + (r - match[0]);" > file; + } + if(rune1c > 0) { + print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn match[1];" > file; + } + print "\treturn r;\n}" > file; + } +} + +function otheris() { + print HEADER_OTHER > "isalnumrune.c"; + print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c"; + print HEADER_OTHER > "isblankrune.c"; + print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c"; + print HEADER_OTHER > "isprintrune.c"; + print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c"; + print "\t ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c"; + print HEADER_OTHER > "isgraphrune.c"; + print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c"; + print HEADER_OTHER > "ispunctrune.c"; + print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c"; + print HEADER_OTHER > "isxdigitrune.c"; + print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c"; +} diff --git a/libutf/rune.c b/libutf/rune.c new file mode 100644 index 0000000..1273f45 --- /dev/null +++ b/libutf/rune.c _AT_@ -0,0 +1,148 @@ +/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls_AT_lubutu.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "../utf.h" + +#define MIN(x,y) ((x) < (y) ? (x) : (y)) + +#define UTFSEQ(x) ((((x) & 0x80) == 0x00) ? 1 /* 0xxxxxxx */ \ + : (((x) & 0xC0) == 0x80) ? 0 /* 10xxxxxx */ \ + : (((x) & 0xE0) == 0xC0) ? 2 /* 110xxxxx */ \ + : (((x) & 0xF0) == 0xE0) ? 3 /* 1110xxxx */ \ + : (((x) & 0xF8) == 0xF0) ? 4 /* 11110xxx */ \ + : (((x) & 0xFC) == 0xF8) ? 5 /* 111110xx */ \ + : (((x) & 0xFE) == 0xFC) ? 6 /* 1111110x */ \ + : 0 ) + +#define BADRUNE(x) ((x) < 0 || (x) > Runemax \ + || ((x) & 0xFFFE) == 0xFFFE \ + || ((x) >= 0xD800 && (x) <= 0xDFFF) \ + || ((x) >= 0xFDD0 && (x) <= 0xFDEF)) + +int +runetochar(char *s, const Rune *p) +{ + Rune r = *p; + + switch(runelen(r)) { + case 1: /* 0aaaaaaa */ + s[0] = r; + return 1; + case 2: /* 00000aaa aabbbbbb */ + s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ + s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ + return 2; + case 3: /* aaaabbbb bbcccccc */ + s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ + s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ + s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ + return 3; + case 4: /* 000aaabb bbbbcccc ccdddddd */ + s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ + s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ + s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ + s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ + return 4; + default: + return 0; /* error */ + } +} + +int +chartorune(Rune *p, const char *s) +{ + return charntorune(p, s, UTFmax); +} + +int +charntorune(Rune *p, const char *s, size_t len) +{ + unsigned int i, n; + Rune r; + + if(len == 0) /* can't even look at s[0] */ + return 0; + + switch((n = UTFSEQ(s[0]))) { + case 1: r = s[0]; break; /* 0xxxxxxx */ + case 2: r = s[0] & 0x1F; break; /* 110xxxxx */ + case 3: r = s[0] & 0x0F; break; /* 1110xxxx */ + case 4: r = s[0] & 0x07; break; /* 11110xxx */ + case 5: r = s[0] & 0x03; break; /* 111110xx */ + case 6: r = s[0] & 0x01; break; /* 1111110x */ + default: /* invalid sequence */ + *p = Runeerror; + return 1; + } + /* add values from continuation bytes */ + for(i = 1; i < MIN(n, len); i++) + if((s[i] & 0xC0) == 0x80) { + /* add bits from continuation byte to rune value + * cannot overflow: 6 byte sequences contain 31 bits */ + r = (r << 6) | (s[i] & 0x3F); /* 10xxxxxx */ + } + else { /* expected continuation */ + *p = Runeerror; + return i; + } + + if(i < n) /* must have reached len limit */ + return 0; + + /* reject invalid or overlong sequences */ + if(BADRUNE(r) || runelen(r) < (int)n) + r = Runeerror; + + *p = r; + return n; +} + +int +runelen(Rune r) +{ + if(BADRUNE(r)) + return 0; /* error */ + else if(r <= 0x7F) + return 1; + else if(r <= 0x07FF) + return 2; + else if(r <= 0xFFFF) + return 3; + else + return 4; +} + +size_t +runenlen(const Rune *p, size_t len) +{ + size_t i, n = 0; + + for(i = 0; i < len; i++) + n += runelen(p[i]); + return n; +} + +int +fullrune(const char *s, size_t len) +{ + Rune r; + + return charntorune(&r, s, len) > 0; +} diff --git a/libutf/runetype.c b/libutf/runetype.c new file mode 100644 index 0000000..9e8ede8 --- /dev/null +++ b/libutf/runetype.c _AT_@ -0,0 +1,41 @@ +/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls_AT_lubutu.com> + * (c) 2015 Laslo Hunhold <dev_AT_frign.de> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "../utf.h" + +int +rune1cmp(const void *v1, const void *v2) +{ + Rune r1 = *(Rune *)v1, r2 = *(Rune *)v2; + + return r1 - r2; +} + +int +rune2cmp(const void *v1, const void *v2) +{ + Rune r = *(Rune *)v1, *p = (Rune *)v2; + + if(r >= p[0] && r <= p[1]) + return 0; + else + return r - p[0]; +} diff --git a/libutf/runetype.h b/libutf/runetype.h new file mode 100644 index 0000000..8d09c34 --- /dev/null +++ b/libutf/runetype.h _AT_@ -0,0 +1,26 @@ +/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls_AT_lubutu.com> + * (c) 2015 Laslo Hunhold <dev_AT_frign.de> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#define nelem(x) (sizeof (x) / sizeof *(x)) + +int rune1cmp(const void *, const void *); +int rune2cmp(const void *, const void *); diff --git a/libutf/upperrune.c b/libutf/upperrune.c new file mode 100644 index 0000000..5628a2e --- /dev/null +++ b/libutf/upperrune.c _AT_@ -0,0 +1,251 @@ +/* Automatically generated by mkrunetype.awk */ +#include <stdlib.h> + +#include "../utf.h" +#include "runetype.h" + +static Rune upper3[][2] = { + { 0x0100, 0x012E }, + { 0x0132, 0x0136 }, + { 0x0139, 0x0147 }, + { 0x014A, 0x0176 }, + { 0x0179, 0x017D }, + { 0x0182, 0x0184 }, + { 0x01A0, 0x01A4 }, + { 0x01B3, 0x01B5 }, + { 0x01CD, 0x01DB }, + { 0x01DE, 0x01EE }, + { 0x01F8, 0x021E }, + { 0x0222, 0x0232 }, + { 0x0246, 0x024E }, + { 0x0370, 0x0372 }, + { 0x03D8, 0x03EE }, + { 0x0460, 0x0480 }, + { 0x048A, 0x04BE }, + { 0x04C1, 0x04CD }, + { 0x04D0, 0x052E }, + { 0x1E00, 0x1E94 }, + { 0x1EA0, 0x1EFE }, + { 0x2C67, 0x2C6B }, + { 0x2C80, 0x2CE2 }, + { 0x2CEB, 0x2CED }, + { 0xA640, 0xA66C }, + { 0xA680, 0xA69A }, + { 0xA722, 0xA72E }, + { 0xA732, 0xA76E }, + { 0xA779, 0xA77B }, + { 0xA77E, 0xA786 }, + { 0xA790, 0xA792 }, + { 0xA796, 0xA7A8 }, + { 0xA7B4, 0xA7B6 }, +}; + +static Rune upper2[][3] = { + { 0x0041, 0x005A, 0x0061 }, + { 0x00C0, 0x00D6, 0x00E0 }, + { 0x00D8, 0x00DE, 0x00F8 }, + { 0x0189, 0x018A, 0x0256 }, + { 0x01B1, 0x01B2, 0x028A }, + { 0x0388, 0x038A, 0x03AD }, + { 0x038E, 0x038F, 0x03CD }, + { 0x0391, 0x03A1, 0x03B1 }, + { 0x03A3, 0x03AB, 0x03C3 }, + { 0x03D2, 0x03D4, 0x03D2 }, + { 0x03FD, 0x03FF, 0x037B }, + { 0x0400, 0x040F, 0x0450 }, + { 0x0410, 0x042F, 0x0430 }, + { 0x0531, 0x0556, 0x0561 }, + { 0x10A0, 0x10C5, 0x2D00 }, + { 0x13A0, 0x13EF, 0xAB70 }, + { 0x13F0, 0x13F5, 0x13F8 }, + { 0x1F08, 0x1F0F, 0x1F00 }, + { 0x1F18, 0x1F1D, 0x1F10 }, + { 0x1F28, 0x1F2F, 0x1F20 }, + { 0x1F38, 0x1F3F, 0x1F30 }, + { 0x1F48, 0x1F4D, 0x1F40 }, + { 0x1F68, 0x1F6F, 0x1F60 }, + { 0x1FB8, 0x1FB9, 0x1FB0 }, + { 0x1FBA, 0x1FBB, 0x1F70 }, + { 0x1FC8, 0x1FCB, 0x1F72 }, + { 0x1FD8, 0x1FD9, 0x1FD0 }, + { 0x1FDA, 0x1FDB, 0x1F76 }, + { 0x1FE8, 0x1FE9, 0x1FE0 }, + { 0x1FEA, 0x1FEB, 0x1F7A }, + { 0x1FF8, 0x1FF9, 0x1F78 }, + { 0x1FFA, 0x1FFB, 0x1F7C }, + { 0x210B, 0x210D, 0x210B }, + { 0x2110, 0x2112, 0x2110 }, + { 0x2119, 0x211D, 0x2119 }, + { 0x212C, 0x212D, 0x212C }, + { 0x2130, 0x2131, 0x2130 }, + { 0x213E, 0x213F, 0x213E }, + { 0x2C00, 0x2C2E, 0x2C30 }, + { 0x2C7E, 0x2C7F, 0x023F }, + { 0xFF21, 0xFF3A, 0xFF41 }, + { 0x10400, 0x10427, 0x10428 }, + { 0x104B0, 0x104D3, 0x104D8 }, + { 0x10C80, 0x10CB2, 0x10CC0 }, + { 0x118A0, 0x118BF, 0x118C0 }, + { 0x1D400, 0x1D419, 0x1D400 }, + { 0x1D434, 0x1D44D, 0x1D434 }, + { 0x1D468, 0x1D481, 0x1D468 }, + { 0x1D49E, 0x1D49F, 0x1D49E }, + { 0x1D4A5, 0x1D4A6, 0x1D4A5 }, + { 0x1D4A9, 0x1D4AC, 0x1D4A9 }, + { 0x1D4AE, 0x1D4B5, 0x1D4AE }, + { 0x1D4D0, 0x1D4E9, 0x1D4D0 }, + { 0x1D504, 0x1D505, 0x1D504 }, + { 0x1D507, 0x1D50A, 0x1D507 }, + { 0x1D50D, 0x1D514, 0x1D50D }, + { 0x1D516, 0x1D51C, 0x1D516 }, + { 0x1D538, 0x1D539, 0x1D538 }, + { 0x1D53B, 0x1D53E, 0x1D53B }, + { 0x1D540, 0x1D544, 0x1D540 }, + { 0x1D54A, 0x1D550, 0x1D54A }, + { 0x1D56C, 0x1D585, 0x1D56C }, + { 0x1D5A0, 0x1D5B9, 0x1D5A0 }, + { 0x1D5D4, 0x1D5ED, 0x1D5D4 }, + { 0x1D608, 0x1D621, 0x1D608 }, + { 0x1D63C, 0x1D655, 0x1D63C }, + { 0x1D670, 0x1D689, 0x1D670 }, + { 0x1D6A8, 0x1D6C0, 0x1D6A8 }, + { 0x1D6E2, 0x1D6FA, 0x1D6E2 }, + { 0x1D71C, 0x1D734, 0x1D71C }, + { 0x1D756, 0x1D76E, 0x1D756 }, + { 0x1D790, 0x1D7A8, 0x1D790 }, + { 0x1E900, 0x1E921, 0x1E922 }, +}; + +static Rune upper1[][2] = { + { 0x0130, 0x0069 }, + { 0x0178, 0x00FF }, + { 0x0181, 0x0253 }, + { 0x0186, 0x0254 }, + { 0x0187, 0x0188 }, + { 0x018B, 0x018C }, + { 0x018E, 0x01DD }, + { 0x018F, 0x0259 }, + { 0x0190, 0x025B }, + { 0x0191, 0x0192 }, + { 0x0193, 0x0260 }, + { 0x0194, 0x0263 }, + { 0x0196, 0x0269 }, + { 0x0197, 0x0268 }, + { 0x0198, 0x0199 }, + { 0x019C, 0x026F }, + { 0x019D, 0x0272 }, + { 0x019F, 0x0275 }, + { 0x01A6, 0x0280 }, + { 0x01A7, 0x01A8 }, + { 0x01A9, 0x0283 }, + { 0x01AC, 0x01AD }, + { 0x01AE, 0x0288 }, + { 0x01AF, 0x01B0 }, + { 0x01B7, 0x0292 }, + { 0x01B8, 0x01B9 }, + { 0x01BC, 0x01BD }, + { 0x01C4, 0x01C6 }, + { 0x01C7, 0x01C9 }, + { 0x01CA, 0x01CC }, + { 0x01F1, 0x01F3 }, + { 0x01F4, 0x01F5 }, + { 0x01F6, 0x0195 }, + { 0x01F7, 0x01BF }, + { 0x0220, 0x019E }, + { 0x023A, 0x2C65 }, + { 0x023B, 0x023C }, + { 0x023D, 0x019A }, + { 0x023E, 0x2C66 }, + { 0x0241, 0x0242 }, + { 0x0243, 0x0180 }, + { 0x0244, 0x0289 }, + { 0x0245, 0x028C }, + { 0x0376, 0x0377 }, + { 0x037F, 0x03F3 }, + { 0x0386, 0x03AC }, + { 0x038C, 0x03CC }, + { 0x03CF, 0x03D7 }, + { 0x03F4, 0x03B8 }, + { 0x03F7, 0x03F8 }, + { 0x03F9, 0x03F2 }, + { 0x03FA, 0x03FB }, + { 0x04C0, 0x04CF }, + { 0x10C7, 0x2D27 }, + { 0x10CD, 0x2D2D }, + { 0x1E9E, 0x00DF }, + { 0x1F59, 0x1F51 }, + { 0x1F5B, 0x1F53 }, + { 0x1F5D, 0x1F55 }, + { 0x1F5F, 0x1F57 }, + { 0x1FEC, 0x1FE5 }, + { 0x2102, 0x2102 }, + { 0x2107, 0x2107 }, + { 0x2115, 0x2115 }, + { 0x2124, 0x2124 }, + { 0x2126, 0x03C9 }, + { 0x2128, 0x2128 }, + { 0x212A, 0x006B }, + { 0x212B, 0x00E5 }, + { 0x2132, 0x214E }, + { 0x2133, 0x2133 }, + { 0x2145, 0x2145 }, + { 0x2183, 0x2184 }, + { 0x2C60, 0x2C61 }, + { 0x2C62, 0x026B }, + { 0x2C63, 0x1D7D }, + { 0x2C64, 0x027D }, + { 0x2C6D, 0x0251 }, + { 0x2C6E, 0x0271 }, + { 0x2C6F, 0x0250 }, + { 0x2C70, 0x0252 }, + { 0x2C72, 0x2C73 }, + { 0x2C75, 0x2C76 }, + { 0x2CF2, 0x2CF3 }, + { 0xA77D, 0x1D79 }, + { 0xA78B, 0xA78C }, + { 0xA78D, 0x0265 }, + { 0xA7AA, 0x0266 }, + { 0xA7AB, 0x025C }, + { 0xA7AC, 0x0261 }, + { 0xA7AD, 0x026C }, + { 0xA7AE, 0x026A }, + { 0xA7B0, 0x029E }, + { 0xA7B1, 0x0287 }, + { 0xA7B2, 0x029D }, + { 0xA7B3, 0xAB53 }, + { 0x1D49C, 0x1D49C }, + { 0x1D4A2, 0x1D4A2 }, + { 0x1D546, 0x1D546 }, + { 0x1D7CA, 0x1D7CA }, +}; + +int +isupperrune(Rune r) +{ + Rune *match; + + if((match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp))) + return !((r - match[0]) % 2); + if(bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp)) + return 1; + if(bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp)) + return 1; + return 0; +} + +int +tolowerrune(Rune r) +{ + Rune *match; + + match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp); + if (match) + return ((r - match[0]) % 2) ? r : r + 1; + match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp); + if (match) + return match[2] + (r - match[0]); + match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp); + if (match) + return match[1]; + return r; +} diff --git a/libutf/utf.c b/libutf/utf.c new file mode 100644 index 0000000..897c5ef --- /dev/null +++ b/libutf/utf.c _AT_@ -0,0 +1,129 @@ +/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls_AT_lubutu.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <string.h> +#include "../utf.h" + +char * +utfecpy(char *to, char *end, const char *from) +{ + Rune r = Runeerror; + size_t i, n; + + /* seek through to find final full rune */ + for(i = 0; r != '\0' && (n = charntorune(&r, &from[i], end - &to[i])); i += n) + ; + memcpy(to, from, i); /* copy over bytes up to this rune */ + + if(i > 0 && r != '\0') + to[i] = '\0'; /* terminate if unterminated */ + return &to[i]; +} + +size_t +utflen(const char *s) +{ + const char *p = s; + size_t i; + Rune r; + + for(i = 0; *p != '\0'; i++) + p += chartorune(&r, p); + return i; +} + +size_t +utfnlen(const char *s, size_t len) +{ + const char *p = s; + size_t i; + Rune r; + int n; + + for(i = 0; (n = charntorune(&r, p, len-(p-s))) && r != '\0'; i++) + p += n; + return i; +} + +char * +utfrune(const char *s, Rune r) +{ + if(r < Runeself) { + return strchr(s, r); + } + else if(r == Runeerror) { + Rune r0; + int n; + + for(; *s != '\0'; s += n) { + n = chartorune(&r0, s); + if(r == r0) + return (char *)s; + } + } + else { + char buf[UTFmax+1]; + int n; + + if(!(n = runetochar(buf, &r))) + return NULL; + buf[n] = '\0'; + return strstr(s, buf); + } + return NULL; +} + +char * +utfrrune(const char *s, Rune r) +{ + const char *p = NULL; + Rune r0; + int n; + + if(r < Runeself) + return strrchr(s, r); + + for(; *s != '\0'; s += n) { + n = chartorune(&r0, s); + if(r == r0) + p = s; + } + return (char *)p; +} + +char * +utfutf(const char *s, const char *t) +{ + const char *p, *q; + Rune r0, r1, r2; + int n, m; + + for(chartorune(&r0, t); (s = utfrune(s, r0)); s++) { + for(p = s, q = t; *q && *p; p += n, q += m) { + n = chartorune(&r1, p); + m = chartorune(&r2, q); + if(r1 != r2) + break; + } + if(!*q) + return (char *)s; + } + return NULL; +} diff --git a/libutf/utftorunestr.c b/libutf/utftorunestr.c new file mode 100644 index 0000000..005fe8a --- /dev/null +++ b/libutf/utftorunestr.c _AT_@ -0,0 +1,13 @@ +/* See LICENSE file for copyright and license details. */ +#include "../utf.h" + +int +utftorunestr(const char *str, Rune *r) +{ + int i, n; + + for(i = 0; (n = chartorune(&r[i], str)) && r[i]; i++) + str += n; + + return i; +} diff --git a/libutil/fshut.c b/libutil/fshut.c new file mode 100644 index 0000000..e596f07 --- /dev/null +++ b/libutil/fshut.c _AT_@ -0,0 +1,43 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdio.h> +#include <stdlib.h> + +#include "../util.h" + +int +fshut(FILE *fp, const char *fname) +{ + int ret = 0; + + /* fflush() is undefined for input streams by ISO C, + * but not POSIX 2008 if you ignore ISO C overrides. + * Leave it unchecked and rely on the following + * functions to detect errors. + */ + fflush(fp); + + if (ferror(fp) && !ret) { + weprintf("ferror %s:", fname); + ret = 1; + } + + if (fclose(fp) && !ret) { + weprintf("fclose %s:", fname); + ret = 1; + } + + return ret; +} + +void +enfshut(int status, FILE *fp, const char *fname) +{ + if (fshut(fp, fname)) + exit(status); +} + +void +efshut(FILE *fp, const char *fname) +{ + enfshut(1, fp, fname); +} diff --git a/ul.1 b/ul.1 new file mode 100644 index 0000000..6f1af4b --- /dev/null +++ b/ul.1 _AT_@ -0,0 +1,39 @@ +.Dd February 3, 2017 +.Dt WATCH 1 +.Os ubase +.Sh NAME +.Nm ul +.Nd convert text formatting +.Sh SYNOPSIS +.Nm +.Op Fl l Ar lines +.Op Fl c Ar cols +.Ar command +.Sh DESCRIPTION +.Nm +convets formatted text to a formatting that can +be displayed by the terminal. For example, the +byte sequence \fBa, <backspace>, a\fP is replaced +by a bold \fBa\fP. +.Nm +can also remove formatting to output plain text. +.Sh OPTIONS +.Bl -tag -width Ds +.It Fl l Ar lines +Pretend that the screen consists of +.Ar lines +lines. +.It Fl c Ar cols +Pretend that the screen consists of +.Ar cols +columns. Default is unlimited. +.It Fl C +Move combining diacritical marks to be in front +of the with which character they combine. +Not implemented yet. +.It Fl f +Remove all formating and output plain text. +.It Fl R +If a text segment is both underlined and bold, +apply reverse video instead. +.El diff --git a/ul.c b/ul.c new file mode 100644 index 0000000..94c6aea --- /dev/null +++ b/ul.c _AT_@ -0,0 +1,384 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdint.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#include "utf.h" +#include "util.h" + +/* TODO Add support for ANSI escape sequences. */ +/* TODO Add options for remapping ANSI escape sequences. */ + +enum SGR { + SGR_BOLD = 1, + SGR_FAINT = 2, + SGR_ITALIC = 3, + SGR_UNDERLINE = 4, + SGR_BLINK = 5, + SGR_RAPID_BLINK = 6, + SGR_REVERSE_VIDEO = 7, + SGR_CONCEAL = 8, +#if 0 + SGR_CROSS_OUT = 9, + SGR_FRAKTUR = 20, + SGR_FRAMED = 51, + SGR_ENCIRCLED = 52, + SGR_OVERLINED = 53, + SGR_IDEOGRAM_UNDERLINE = 60, + SGR_IDEOGRAM_DOUBLE_UNDERLINE = 61, + SGR_IDEOGRAM_OVERLINE = 62, + SGR_IDEOGRAM_DOUBLE_OVERLINE = 63, + SGR_IDEOGRAM_STRESS_MARKING = 64, +#endif + N_SGR +}; + +struct line { + Rune *runes; + size_t *stacking; + size_t maxstack; + size_t len; + size_t capacity; + Rune *end; + size_t endlen; + size_t endcap; +}; + +static struct { + struct line *lines; + size_t nlines; + size_t maxlines; + size_t maxcols; + size_t y; + size_t x; +} scr; + +static char active_sgr[N_SGR]; +static char underlined_bold_is_reverse_video = 0; +static char remove_formatting = 0; +static char move_combining_to_front = 0; + +static void +usage(void) +{ + eprintf("usage: [-l lines] [-c cols] [-CfR] %s\n", argv0); +} + +static ssize_t +cwidth(Rune rune) +{ + if (rune < ' ' || rune == 127) + return 0; + /* TODO Check non-ASCII characters */ + return 1; +} + +static void +printlf(void) +{ + int i; + for (i = 0; i < N_SGR; i++) { + if (active_sgr[i]) { + printf("\033[0m"); + memset(active_sgr, 0, sizeof(active_sgr)); + break; + } + } + printf("\n"); +} + +static void +printcol(Rune *runes, size_t n, int needwidth) +{ + char to_be_active_sgr[N_SGR]; + Rune *p; + size_t underscores = 0; + size_t full_blocks = 0; + size_t stacked = 0; + size_t i, j; + int zerowidth = 1; + memset(to_be_active_sgr, 0, sizeof(to_be_active_sgr)); + + for (p = runes; n--; p++) { + if (*p == 0x2588) + full_blocks += 1; + else if (*p == '_') + underscores += 1; + else if (*p == 127) + full_blocks = underscores = stacked = 0; + else + runes[stacked++] = *p; + } + + for (i = 0; i < stacked; i++) { + if (cwidth(runes[i]) <= 0) + continue; + to_be_active_sgr[SGR_BOLD] = 0; + for (j = i + 1; j < stacked; j++) { + if (runes[i] == runes[j]) { + to_be_active_sgr[SGR_BOLD] = 1; + goto bold_done; + } + } + } +bold_done: + if (underscores && stacked) { + to_be_active_sgr[SGR_UNDERLINE] = 1; + } else if (underscores) { + to_be_active_sgr[SGR_UNDERLINE] = underscores > 1; + to_be_active_sgr[SGR_BOLD] = underscores > 2; + runes[stacked++] = '_'; + } + if (full_blocks && stacked) { + to_be_active_sgr[SGR_CONCEAL] = 1; + } else if (full_blocks) { + to_be_active_sgr[SGR_CONCEAL] = full_blocks > 1; + to_be_active_sgr[SGR_BOLD] = full_blocks > 2; + runes[stacked++] = 0x2588; + } + + if (remove_formatting) + goto formatting_done; + if (underlined_bold_is_reverse_video) { + if (to_be_active_sgr[SGR_BOLD] && to_be_active_sgr[SGR_UNDERLINE]) { + to_be_active_sgr[SGR_BOLD] = 0; + to_be_active_sgr[SGR_UNDERLINE] = 0; + to_be_active_sgr[SGR_REVERSE_VIDEO] = 1; + } + } + for (i = 0; i < N_SGR; i++) { + if (active_sgr[i] && !to_be_active_sgr[i]) { + printf("\033[0m"); + memset(active_sgr, 0, sizeof(active_sgr)); + break; + } + } + for (i = 0; i < N_SGR; i++) { + if (to_be_active_sgr[i] && !active_sgr[i]) { + printf("\033[%im", (int)i); + active_sgr[i] = 1; + } + } +formatting_done: + + /* TODO implement move_combining_to_front */ + + for (i = 0; i < stacked; i++) { + if (cwidth(runes[i]) > 0) { + if (zerowidth) + zerowidth = 0; + else + continue; + } + fputrune(runes + i, stdout); + } + if (zerowidth && needwidth) + printf(" "); + +} + +static void +printline(void) +{ + size_t i; + struct line ln = scr.lines[0]; + if (ln.maxstack) + for (i = 0; i < ln.len; i++) + printcol(ln.runes + i * ln.maxstack, ln.stacking[i], 1); + for (i = 0; i < ln.endlen; i++) + printcol(ln.end + i, ln.endlen, 0); + printlf(); + memset(ln.stacking, 0, ln.capacity * sizeof(size_t)); + ln.maxstack = 0; + ln.len = 0; + ln.endlen = 0; + memmove(scr.lines, scr.lines + 1, (scr.maxlines - 1) * sizeof(struct line)); + scr.lines[scr.maxlines - 1] = ln; +} + +static void +stackrune(struct line *line, size_t col, Rune rune) +{ + Rune *r, *w; + size_t i, j; + if (line->stacking[col] == line->maxstack) { + line->maxstack += 1; + if (line->capacity > line->len) + line->capacity -= 1; + line->runes = erealloc(line->runes, line->capacity * line->maxstack * sizeof(Rune)); + w = line->runes + line->len * line->maxstack; + r = w - line->len; + for (i = line->len; i--;) { + *--w = 0; + for (j = line->maxstack; j-- > 1;) + *--w = *--r; + } + memset(line->runes + line->len * line->maxstack, 0, + (line->capacity - line->len) * line->maxstack * sizeof(Rune)); + } + line->runes[col * line->maxstack + line->stacking[col]++] = rune; +} + +static void +extendline(struct line *line, size_t len) +{ + if (!line->maxstack) + line->maxstack = 1; + + line->capacity += 32; + line->runes = erealloc(line->runes, line->capacity * line->maxstack * sizeof(Rune)); + line->stacking = erealloc(line->stacking, line->capacity * sizeof(size_t)); + + memset(line->runes + line->len * line->maxstack, + 0, (line->capacity - line->len) * line->maxstack * sizeof(Rune)); + memset(line->stacking + line->len, 0, (line->capacity - line->len) * sizeof(size_t)); +} + +static void +newline(void) +{ + scr.x = 0, scr.y++; + if (scr.y == scr.nlines && scr.nlines++ == scr.maxlines) + printline(), scr.y--, scr.nlines--; +} + +static void +addtoend(struct line *line, Rune rune) +{ + if (line->endlen == line->endcap) + line->end = erealloc(line->end, (line->endcap += 8) * sizeof(Rune)); + line->end[line->endlen++] = rune; +} + +static void +maprune(Rune rune) +{ + size_t repeat = 1; + size_t i, x; + ssize_t width; + struct line *ln; + + switch (rune) { + case '\b': + scr.x = scr.x ? scr.x - 1 : 0; + break; + case '\r': + scr.x = 0; + break; + case '\n': + newline(); + break; + case '\v': + repeat = scr.x; + newline(); + scr.x = repeat; + break; + case '\t': + repeat = 8 - scr.x % 8; + if (scr.x + repeat >= scr.maxcols) + repeat = scr.maxcols - 1 - scr.x; + rune = ' '; + goto print; + default: + repeat = 1; + goto print; + } + + return; + +print: + width = cwidth(rune); + ln = scr.lines + scr.y; + while (repeat--) { + x = scr.x + (width > 0 ? width : 0); + if (x >= scr.maxcols) + newline(); + if (x > scr.lines[scr.y].len) { + if (x >= ln->capacity) + extendline(ln, x); + for (i = 0; i < ln->endlen; i++) + stackrune(ln, scr.x, ln->end[i]); + ln->endlen = 0; + ln->len = x; + } + if (width < 0) { + if (scr.x > (size_t)-width) + scr.x -= (size_t)-width; + else + scr.x = 0; + } + if (scr.x == ln->len && width <= 0) + addtoend(ln, rune); + else + stackrune(ln, scr.x, rune); + if (width > 0) + scr.x += width; + } +} + +int +main(int argc, char *argv[]) +{ + Rune rune; + int savedlf = 0; + + scr.maxlines = 1000; /* TODO Set to SIZE_MAX after dynamic reallocation has been added */ + scr.maxcols = SIZE_MAX; + + /* TODO (?) add -v: display control characters */ + ARGBEGIN { + case 'l': + scr.maxlines = estrtonum(EARGF(usage()), 1, SSIZE_MAX); + break; + case 'c': + scr.maxcols = estrtonum(EARGF(usage()), 1, SSIZE_MAX); + break; + case 'C': + move_combining_to_front = 1; + break; + case 'f': + remove_formatting = 1; + break; + case 'R': + underlined_bold_is_reverse_video = 1; + break; + default: + usage(); + } ARGEND + + if (argc) + usage(); + + scr.lines = ecalloc(scr.maxlines, sizeof(struct line)); + if (!efgetrune(&rune, stdin, "<stdin>")) + goto done; + scr.nlines = 1; + if (rune == '\n') + savedlf = 1; + else + maprune(rune); + while (efgetrune(&rune, stdin, "<stdin>")) { + if (savedlf) { + savedlf = 0; + maprune('\n'); + } + if (rune == '\n') + savedlf = 1; + else + maprune(rune); + } + while (scr.nlines--) + printline(); + +done: + while (scr.maxlines--) { + free(scr.lines[scr.maxlines].runes); + free(scr.lines[scr.maxlines].stacking); + free(scr.lines[scr.maxlines].end); + } + free(scr.lines); + efshut(stdin, "<stdin>"); + efshut(stdout, "<stdout>"); + return 0; +} diff --git a/utf.h b/utf.h new file mode 100644 index 0000000..a74be94 --- /dev/null +++ b/utf.h _AT_@ -0,0 +1,67 @@ +/* MIT/X Consortium Copyright (c) 2012 Connor Lane Smith <cls_AT_lubutu.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> + +typedef int Rune; + +enum { + UTFmax = 6, /* maximum bytes per rune */ + Runeself = 0x80, /* rune and utf are equal (<) */ + Runeerror = 0xFFFD, /* decoding error in utf */ + Runemax = 0x10FFFF /* maximum rune value */ +}; + +int runetochar(char *, const Rune *); +int chartorune(Rune *, const char *); +int charntorune(Rune *, const char *, size_t); +int runelen(const Rune); +size_t runenlen(const Rune *, size_t); +int fullrune(const char *, size_t); +char *utfecpy(char *, char *, const char *); +size_t utflen(const char *); +size_t utfnlen(const char *, size_t); +char *utfrune(const char *, Rune); +char *utfrrune(const char *, Rune); +char *utfutf(const char *, const char *); + +int isalnumrune(Rune); +int isalpharune(Rune); +int isblankrune(Rune); +int iscntrlrune(Rune); +int isdigitrune(Rune); +int isgraphrune(Rune); +int islowerrune(Rune); +int isprintrune(Rune); +int ispunctrune(Rune); +int isspacerune(Rune); +int istitlerune(Rune); +int isupperrune(Rune); +int isxdigitrune(Rune); + +Rune tolowerrune(Rune); +Rune toupperrune(Rune); + +int utftorunestr(const char*, Rune *); + +int fgetrune(Rune *, FILE *); +int efgetrune(Rune *, FILE *, const char *); +int fputrune(const Rune *, FILE *); +int efputrune(const Rune *, FILE *, const char *); diff --git a/util.h b/util.h index 6ab609e..e4be15d 100644 --- a/util.h +++ b/util.h _AT_@ -1,4 +1,6 @@ /* See LICENSE file for copyright and license details. */ +#include <stdio.h> + #include "arg.h" #define UTF8_POINT(c) (((c) & 0xc0) != 0x80) _AT_@ -27,6 +29,11 @@ void *emalloc(size_t size); void *erealloc(void *, size_t); char *estrdup(const char *); +/* fshut.c */ +void enfshut(int, FILE *, const char *); +void efshut(FILE *, const char *); +int fshut(FILE *, const char *); + /* eprintf.c */ void enprintf(int, const char *, ...); void eprintf(const char *, ...); -- 2.11.0Received on Fri Feb 03 2017 - 21:56:01 CET
This archive was generated by hypermail 2.3.0 : Fri Feb 03 2017 - 22:00:18 CET