[hackers] [libutf] add manpages rune.3, isalpharune.3 || Connor Lane Smith
changeset: 16:22b64c605385
tag: tip
user: Connor Lane Smith <cls_AT_lubutu.com>
date: Mon May 21 19:00:45 2012 +0100
files: Makefile config.mk isalpharune.3 rune.3 rune.c utf.h
description:
add manpages rune.3, isalpharune.3
diff -r bcf6cc6b9f2d -r 22b64c605385 Makefile
--- a/Makefile Mon May 21 17:57:46 2012 +0100
+++ b/Makefile Mon May 21 19:00:45 2012 +0100
_AT_@ -8,8 +8,6 @@
LIB = libutf.a
INC = utf.h
-UCD = UnicodeData-6.1.0.txt
-
all: $(LIB) utftest
$(LIB): $(OBJ)
_AT_@ -21,18 +19,24 @@
.c.o:
$(CC) $(CFLAGS) -c $<
-runetypebody.h: mkrunetype.awk $(UCD)
- $(AWK) -f mkrunetype.awk $(UCD) > $_AT_
+runetypebody.h: mkrunetype.awk UnicodeData-$(UNICODE).txt
+ $(AWK) -f mkrunetype.awk UnicodeData-$(UNICODE).txt > $_AT_
-install: $(LIB)
- mkdir -p $(DESTDIR)$(PREFIX)/lib
- cp $(LIB) $(DESTDIR)$(PREFIX)/lib/$(LIB)
- mkdir -p $(DESTDIR)$(PREFIX)/include
- cp $(INC) $(DESTDIR)$(PREFIX)/include/$(INC)
+install: $(LIB) $(INC) $(MAN)
+ _AT_echo @ install libutf
+ _AT_mkdir -p $(DESTDIR)$(PREFIX)/lib
+ _AT_cp $(LIB) $(DESTDIR)$(PREFIX)/lib/$(LIB)
+ _AT_mkdir -p $(DESTDIR)$(PREFIX)/include
+ _AT_cp $(INC) $(DESTDIR)$(PREFIX)/include/$(INC)
+ _AT_mkdir -p $(DESTDIR)$(PREFIX)/share/man/man3
+ _AT_cp rune.3 $(DESTDIR)$(PREFIX)/share/man/man3/rune.3
+ _AT_sed 's/$$UNICODE/$(UNICODE)/g' isalpharune.3 > $(DESTDIR)$(PREFIX)/share/man/man3/isalpharune.3
uninstall:
rm -f $(DESTDIR)$(PREFIX)/lib/$(LIB)
rm -f $(DESTDIR)$(PREFIX)/include/$(INC)
+ rm -f $(DESTDIR)$(PREFIX)/share/man/man3/rune.3
+ rm -f $(DESTDIR)$(PREFIX)/share/man/man3/isalpharune.3
clean:
rm -f $(LIB) utftest utftest.o $(OBJ)
diff -r bcf6cc6b9f2d -r 22b64c605385 config.mk
--- a/config.mk Mon May 21 17:57:46 2012 +0100
+++ b/config.mk Mon May 21 19:00:45 2012 +0100
_AT_@ -2,6 +2,8 @@
PREFIX = /usr/local
+UNICODE = 6.1.0
+
CFLAGS = -ansi -pedantic -Os -Wall -Wextra
LDFLAGS = -s
diff -r bcf6cc6b9f2d -r 22b64c605385 isalpharune.3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/isalpharune.3 Mon May 21 19:00:45 2012 +0100
_AT_@ -0,0 +1,29 @@
+.Dd $Mdocdate$
+.Dt ISALPHARUNE 3
+.Os
+.Sh NAME
+.Nm isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, isdigitrune
+.Nd Unicode rune classification
+.Sh SYNOPSIS
+.Ft int
+.Fn isalpharune "Rune r"
+.Ft int
+.Fn islowerrune "Rune r"
+.Ft int
+.Fn isspacerune "Rune r"
+.Ft int
+.Fn istitlerune "Rune r"
+.Ft int
+.Fn isupperrune "Rune r"
+.Ft int
+.Fn isdigitrune "Rune r"
+.Sh DESCRIPTION
+These functions classify Unicode runes according to their properties defined in the Unicode standard, analogously to
+.Xr ctype 3
+for ASCII.
+.Sh CONFORMING TO
+These functions are compatible with those defined in the Plan 9 C library, but are generated automatically from the Unicode $UNICODE Character Database, so classifications may differ.
+.Sh SEE ALSO
+.Xr ctype 3 ,
+.Xr rune 3 ,
+The Unicode $UNICODE Standard
diff -r bcf6cc6b9f2d -r 22b64c605385 rune.3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rune.3 Mon May 21 19:00:45 2012 +0100
_AT_@ -0,0 +1,157 @@
+.Dd $Mdocdate$
+.Dt RUNE 3
+.Os
+.Sh NAME
+.Nm runetochar, chartorune, charntorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf
+.Nd UTF-8 rune conversion
+.Sh SYNOPSIS
+.In utf.h
+.Ft int
+.Fn runetochar "char *s" "Rune *p"
+.Ft int
+.Fn chartorune "Rune *p" "char *s"
+.Ft int
+.Fn charntorune "Rune *p" "char *s" "size_t len"
+.Ft int
+.Fn runelen "Rune r"
+.Ft int
+.Fn runenlen "Rune *p" "size_t len"
+.Ft int
+.Fn fullrune "char *s" "size_t len"
+.Ft char *
+.Fn utfecpy "char *to" "char *end" "char *from"
+.Ft size_t
+.Fn utflen "char *s"
+.Ft size_t
+.Fn utfnlen "char *s" "size_t len"
+.Ft char *
+.Fn utfrune "char *s" "Rune r"
+.Ft char *
+.Fn utfrrune "char *s" "Rune r"
+.Ft char *
+.Fn utfutf "char *s" "char *t"
+.Sh DESCRIPTION
+The following functions convert to and from a UTF-8 byte stream and Unicode runes.
+.Pp
+.Fn runetochar
+converts one rune at
+.Fa p
+to at most
+.Dv UTFmax
+bytes starting at
+.Fa s ,
+and returns the number of bytes copied.
+.Dv UTFmax
+is the maximum number of bytes required to represent a rune.
+If the rune is illegal,
+.Fn runetochar
+will return 0.
+.Pp
+.Fn chartorune
+converts at most
+.Dv UTFmax
+bytes starting at
+.Fa s
+to one rune at
+.Fa p ,
+and returns the number of bytes copied.
+If the input is invalid UTF-8,
+.Fn chartorune
+will convert the sequence to
+.Dv Runeerror
+(0xFFFD) and return the number of bytes in the invalid sequence.
+.Pp
+.Fn charntorune
+converts at most
+.Fa len
+bytes starting at
+.Fa s
+to one rune at
+.Fa p ,
+and returns the number of bytes copied.
+If the next sequence is longer than
+.Fa len
+bytes,
+.Fn charntorune
+will return 0.
+.Pp
+.Fn runelen
+returns the number of bytes required to convert the rune
+.Fa r
+into UTF-8.
+If the rune is illegal,
+.Fn runelen
+will return 0.
+.Pp
+.Fn runenlen
+returns the number of bytes required to convert the
+.Fa len
+runes pointed to by
+.Fa p
+into UTF-8.
+.Pp
+.Fn fullrune
+returns 1 if the first
+.Fa len
+bytes of the UTF-8 string
+.Fa s
+form a complete rune, and 0 otherwise.
+.Pp
+The following functions are analogous to the corresponding string routines, with `utf' substituted for `str', and `rune' for `chr'.
+.Pp
+.Fn utfecpy
+copies UTF-8 sequences until a nul byte has been copied, but writes no sequences beyond
+.Fa end .
+If any sequences are copied,
+.Fa to
+is terminated with a nul byte and a pointer to that byte is returned.
+Otherwise the original
+.Fa to
+is returned.
+.Pp
+.Fn utflen
+returns the number of runes represented by the UTF-8 string
+.Fa s .
+.Pp
+.Fn utfnlen
+returns the number of runes represented by the first
+.Fa len
+bytes of the UTF-8 string
+.Fa s .
+If the final sequence is incomplete it will not be counted.
+.Pp
+.Fn utfrune
+.Pq Fn utfrrune
+returns a pointer to the first
+.Pq last
+occurrence of the rune
+.Fa r
+in the UTF-8 string
+.Fa s ,
+or
+.Dv NULL
+if there is none.
+The terminating nul byte is considered a part of the string
+.Fa s .
+.Pp
+.Fn utfutf
+returns a pointer to the first occurrence of the UTF-8 string
+.Fa t
+as a UTF-8 substring of
+.Fa s ,
+or
+.Dv NULL
+if there is none.
+If
+.Fa t
+is the null string,
+.Fn utfutf
+returns
+.Fa s .
+.Sh CONFORMING TO
+These functions are compatible with those defined in the Plan 9 C library, with the exception of
+.Fn charntorune ,
+which is an extension.
+However, these functions are much stricter about UTF-8 validity than their Plan 9 counterparts (the kind from up there).
+.Sh SEE ALSO
+.Xr isalpharune 3
diff -r bcf6cc6b9f2d -r 22b64c605385 rune.c
--- a/rune.c Mon May 21 17:57:46 2012 +0100
+++ b/rune.c Mon May 21 19:00:45 2012 +0100
_AT_@ -18,13 +18,6 @@
|| ((x) >= 0xD800 && (x) <= 0xDFFF) \
|| ((x) >= 0xFDD0 && (x) <= 0xFDEF))
-/*
- * runetochar copies one rune at p to at most UTFmax bytes starting at s and
- * returns the number of bytes copied. UTFmax is the maximum number of bytes
- * required to represent a legal rune.
- *
- * If the rune is illegal, runetochar will return 0.
- */
int
runetochar(char *s, const Rune *p)
{
_AT_@ -54,27 +47,12 @@
}
}
-/*
- * chartorune copies at most UTFmax bytes starting at s to one rune at p and
- * returns the number of bytes copied. If the input is not valid UTF-8,
- * chartorune will convert the sequence to Runeerror (0xFFFD), returning the
- * number of bytes in the invalid sequence.
- */
int
chartorune(Rune *p, const char *s)
{
return charntorune(p, s, UTFmax);
}
-/*
- * charntorune copies at most len bytes starting at s to one rune at p and
- * returns the number of bytes copied. If the input is not valid UTF-8,
- * charntorune will convert the sequence to Runeerror (0xFFFD), returning the
- * number of bytes in the invalid sequence.
- *
- * If a potentially valid sequence is cut off by the len limit, charntorune will
- * return 0.
- */
int
charntorune(Rune *p, const char *s, size_t len)
{
_AT_@ -116,29 +94,21 @@
return n;
}
-/*
- * runelen returns the number of bytes required to convert r into UTF-8. If the
- * rune is illegal, runelen will return 0.
- */
int
runelen(Rune r)
{
- if(BADRUNE(r))
- return 0; /* error */
- else if(r <= 0x7F)
+ if(r <= 0x7F)
return 1;
else if(r <= 0x07FF)
return 2;
+ else if(BADRUNE(r))
+ return 0; /* error */
else if(r <= 0xFFFF)
return 3;
else
return 4;
}
-/*
- * runelen returns the number of bytes required to convert the rune-string of
- * length len pointed to by p into UTF-8.
- */
size_t
runenlen(const Rune *p, size_t len)
{
_AT_@ -149,10 +119,6 @@
return n;
}
-/*
- * fullrune returns 1 if the string s of length len is long enough to be
- * decoded by chartorune, and 0 otherwise.
- */
int
fullrune(const char *s, size_t len)
{
_AT_@ -161,12 +127,6 @@
return charntorune(&r, s, len) > 0;
}
-/*
- * utfecpy copies UTF-8 sequences until a null sequence has been copied, but
- * writes no sequences beyond end. If any sequences are copied, the to string is
- * terminated by a null sequence, and a pointer to that sequence is returned.
- * Otherwise, the original to string is returned.
- */
char *
utfecpy(char *to, char *end, const char *from)
{
_AT_@ -183,10 +143,6 @@
return &to[i];
}
-/*
- * utflen returns the number of runes that are represented by the UTF-8 string
- * s.
- */
size_t
utflen(const char *s)
{
_AT_@ -199,12 +155,6 @@
return i;
}
-/*
- * utfnlen returns the number of runes that are represented by the UTF-8 string
- * s of length len. If the last few bytes contain an incompletely coded rune,
- * utfnlen will not count them; in this way it differs from utflen, which
- * includes every byte of the string.
- */
size_t
utfnlen(const char *s, size_t len)
{
_AT_@ -218,11 +168,6 @@
return i;
}
-/*
- * utfrune returns a pointer to the first ocurrence of r in the UTF-8 string s,
- * or NULL if r does not occur in s. The null byte terminating a string is
- * considered to be part of the string s.
- */
char *
utfrune(const char *s, Rune r)
{
_AT_@ -251,11 +196,6 @@
return NULL;
}
-/*
- * utfrrune returns a pointer to the last ocurrence of r in the UTF-8 string s,
- * or NULL if r does not occur in s. The null byte terminating a string is
- * considered to be part of the string s.
- */
char *
utfrrune(const char *s, Rune r)
{
_AT_@ -274,11 +214,6 @@
return (char *)p;
}
-/*
- * utfutf returns a pointer to the first occurrence of the UTF-8 string t as a
- * UTF-8 substring of s, or NULL if there is none. If t is the null string,
- * utfutf returns s.
- */
char *
utfutf(const char *s, const char *t)
{
diff -r bcf6cc6b9f2d -r 22b64c605385 utf.h
--- a/utf.h Mon May 21 17:57:46 2012 +0100
+++ b/utf.h Mon May 21 19:00:45 2012 +0100
_AT_@ -27,10 +27,10 @@
char *utfutf(const char *, const char *);
int isalpharune(Rune);
+int islowerrune(Rune);
int isspacerune(Rune);
+int istitlerune(Rune);
int isupperrune(Rune);
-int islowerrune(Rune);
-int istitlerune(Rune);
int isdigitrune(Rune);
#endif
Received on Mon May 21 2012 - 20:02:21 CEST
This archive was generated by hypermail 2.3.0
: Mon May 21 2012 - 20:12:08 CEST