[hackers] [sbase] Add even stricter UTF-8-support to wc(1) || FRIGN

From: <git_AT_suckless.org>
Date: Sun, 1 Feb 2015 11:20:54 +0100 (CET)

commit 017ec7655d5fe4df729633dbe1199c98de41e3cf
Author: FRIGN <dev_AT_frign.de>
Date: Sun Feb 1 04:06:06 2015 +0100

    Add even stricter UTF-8-support to wc(1)
    
    using readrune() and iswspace().
    musl for instance doesn't differentiate between iswspace() and
    isspace(), but when it does, the code will be ready.
    It goes without saying that GNU coreutils don't use iswspace()[0].
    
    [0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c

diff --git a/wc.c b/wc.c
index f283e1b..6af23d4 100644
--- a/wc.c
+++ b/wc.c
_AT_@ -3,7 +3,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <wctype.h>
 
+#include "utf.h"
 #include "util.h"
 
 static int lflag = 0;
_AT_@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
 void
 wc(FILE *fp, const char *str)
 {
- int word = 0;
- int c;
+ int word = 0, read;
+ Rune c;
         size_t nc = 0, nl = 0, nw = 0;
 
- while ((c = getc(fp)) != EOF) {
- if (cmode != 'm' || UTF8_POINT(c))
- nc++;
+ while ((read = readrune(str, fp, &c))) {
+ nc += (cmode == 'c') ? read :
+ (c != Runeerror) ? 1 : 0;
                 if (c == '\n')
                         nl++;
- if (!isspace(c))
+ if (!iswspace(c))
                         word = 1;
                 else if (word) {
                         word = 0;
Received on Sun Feb 01 2015 - 11:20:54 CET

This archive was generated by hypermail 2.3.0 : Sun Feb 01 2015 - 11:24:13 CET