[hackers] [sbase] Add even stricter UTF-8-support to wc(1) || FRIGN

From: <git_AT_suckless.org>
Date: Tue, 24 Mar 2015 23:53:33 +0100 (CET)

commit 986a9de51a77e7f6803e1b2259ec0675762077db
Author: FRIGN <dev_AT_frign.de>
Date: Sun Feb 1 04:06:06 2015 +0100

    Add even stricter UTF-8-support to wc(1)
    
    using readrune() and iswspace().
    musl for instance doesn't differentiate between iswspace() and
    isspace(), but when it does, the code will be ready.
    It goes without saying that GNU coreutils don't use iswspace()[0].
    
    [0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c

diff --git a/wc.c b/wc.c
index f283e1b..6af23d4 100644
--- a/wc.c
+++ b/wc.c
_AT_@ -3,7 +3,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <wctype.h>
 
+#include "utf.h"
 #include "util.h"
 
 static int lflag = 0;
_AT_@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
 void
 wc(FILE *fp, const char *str)
 {
- int word = 0;
- int c;
+ int word = 0, read;
+ Rune c;
         size_t nc = 0, nl = 0, nw = 0;
 
- while ((c = getc(fp)) != EOF) {
- if (cmode != 'm' || UTF8_POINT(c))
- nc++;
+ while ((read = readrune(str, fp, &c))) {
+ nc += (cmode == 'c') ? read :
+ (c != Runeerror) ? 1 : 0;
                 if (c == '\n')
                         nl++;
- if (!isspace(c))
+ if (!iswspace(c))
                         word = 1;
                 else if (word) {
                         word = 0;
Received on Tue Mar 24 2015 - 23:53:33 CET

This archive was generated by hypermail 2.3.0 : Wed Mar 25 2015 - 00:07:06 CET