Re: [hackers] [sbase] [PATCH 04/10] Don't use buffered IO (fread) when not appropriate

From: Silvan Jegen <s.jegen_AT_gmail.com>
Date: Mon, 5 Dec 2016 21:15:37 +0100

Hi

Some comments below.

On Sun, Dec 04, 2016 at 09:55:06PM -0800, Michael Forney wrote:
> fread reads the entire requested size (BUFSIZ), which causes tools to
> block if only small amounts of data are available at a time. At best,
> this causes unnecessary copies and inefficiency, at worst, tools like
> tee and cat are almost unusable in some cases since they only display
> large chunks of data at a time.
> ---
> cksum.c | 31 +++++++++++++++++--------------
> crypt.h | 2 +-
> libutil/crypt.c | 37 +++++++++++++++++++------------------
> od.c | 42 ++++++++++++++++++++++++++----------------
> tee.c | 39 +++++++++++++++++++--------------------
> 5 files changed, 82 insertions(+), 69 deletions(-)
>
> diff --git a/cksum.c b/cksum.c
> index 570ca81..b53ec17 100644
> --- a/cksum.c
> +++ b/cksum.c
> _AT_@ -1,7 +1,9 @@
> /* See LICENSE file for copyright and license details. */
> +#include <fcntl.h>
> #include <inttypes.h>
> #include <stdio.h>

This include is not needed anymore.


> #include <string.h>
> +#include <unistd.h>
>
> #include "util.h"
>
> _AT_@ -61,19 +63,20 @@ static const unsigned long crctab[] = { 0x00000000,
> };
>
> static void
> -cksum(FILE *fp, const char *s)
> +cksum(int fd, const char *s)
> {
> - size_t len = 0, i, n;
> + ssize_t n;
> + size_t len = 0, i;
> uint32_t ck = 0;
> unsigned char buf[BUFSIZ];
>
> - while ((n = fread(buf, 1, sizeof(buf), fp))) {
> + while ((n = read(fd, buf, sizeof(buf))) > 0) {
> for (i = 0; i < n; i++)
> ck = (ck << 8) ^ crctab[(ck >> 24) ^ buf[i]];
> len += n;
> }
> - if (ferror(fp)) {
> - weprintf("fread %s:", s ? s : "<stdin>");
> + if (n < 0) {
> + weprintf("read %s:", s ? s : "<stdin>");
> ret = 1;
> return;
> }
> _AT_@ -92,29 +95,29 @@ cksum(FILE *fp, const char *s)
> int
> main(int argc, char *argv[])
> {
> - FILE *fp;
> + int fd;
>
> argv0 = argv[0], argc--, argv++;
>
> if (!argc) {
> - cksum(stdin, NULL);
> + cksum(0, NULL);
> } else {
> for (; *argv; argc--, argv++) {
> if (!strcmp(*argv, "-")) {
> *argv = "<stdin>";
> - fp = stdin;
> - } else if (!(fp = fopen(*argv, "r"))) {
> - weprintf("fopen %s:", *argv);
> + fd = 0;
> + } else if ((fd = open(*argv, O_RDONLY)) < 0) {
> + weprintf("open %s:", *argv);
> ret = 1;
> continue;
> }
> - cksum(fp, *argv);
> - if (fp != stdin && fshut(fp, *argv))
> - ret = 1;
> + cksum(fd, *argv);
> + if (fd != 0)
> + close(fd);
> }
> }
>
> - ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
> + ret |= fshut(stdout, "<stdout>");
>
> return ret;
> }
> diff --git a/crypt.h b/crypt.h
> index e0cc08d..2fd2932 100644
> --- a/crypt.h
> +++ b/crypt.h
> _AT_@ -8,5 +8,5 @@ struct crypt_ops {
>
> int cryptcheck(int, char **, struct crypt_ops *, uint8_t *, size_t);
> int cryptmain(int, char **, struct crypt_ops *, uint8_t *, size_t);
> -int cryptsum(struct crypt_ops *, FILE *, const char *, uint8_t *);
> +int cryptsum(struct crypt_ops *, int, const char *, uint8_t *);
> void mdprint(const uint8_t *, const char *, size_t);
> diff --git a/libutil/crypt.c b/libutil/crypt.c
> index 6991c39..e285614 100644
> --- a/libutil/crypt.c
> +++ b/libutil/crypt.c
> _AT_@ -1,8 +1,10 @@
> /* See LICENSE file for copyright and license details. */
> +#include <fcntl.h>
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#include <unistd.h>
>
> #include "../crypt.h"
> #include "../text.h"
> _AT_@ -41,7 +43,7 @@ static void
> mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz,
> int *formatsucks, int *noread, int *nonmatch)
> {
> - FILE *fp;
> + int fd;
> size_t bufsiz = 0;
> int r;
> char *line = NULL, *file, *p;
> _AT_@ -59,12 +61,12 @@ mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz,
> file += 2;
> for (p = file; *p && *p != '\n' && *p != '\r'; p++); /* strip newline */
> *p = '\0';
> - if (!(fp = fopen(file, "r"))) {
> - weprintf("fopen %s:", file);
> + if ((fd = open(file, O_RDONLY)) < 0) {
> + weprintf("open %s:", file);
> (*noread)++;
> continue;
> }
> - if (cryptsum(ops, fp, file, md)) {
> + if (cryptsum(ops, fd, file, md)) {
> (*noread)++;
> continue;
> }
> _AT_@ -77,7 +79,7 @@ mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz,
> } else {
> (*formatsucks)++;
> }
> - fclose(fp);
> + close(fd);
> }
> free(line);
> }
> _AT_@ -124,11 +126,11 @@ cryptcheck(int argc, char *argv[], struct crypt_ops *ops, uint8_t *md, size_t sz
> int
> cryptmain(int argc, char *argv[], struct crypt_ops *ops, uint8_t *md, size_t sz)
> {
> - FILE *fp;
> + int fd;
> int ret = 0;
>
> if (argc == 0) {
> - if (cryptsum(ops, stdin, "<stdin>", md))
> + if (cryptsum(ops, 0, "<stdin>", md))
> ret = 1;
> else
> mdprint(md, "<stdin>", sz);
> _AT_@ -136,18 +138,18 @@ cryptmain(int argc, char *argv[], struct crypt_ops *ops, uint8_t *md, size_t sz)
> for (; *argv; argc--, argv++) {
> if ((*argv)[0] == '-' && !(*argv)[1]) {
> *argv = "<stdin>";
> - fp = stdin;
> - } else if (!(fp = fopen(*argv, "r"))) {
> - weprintf("fopen %s:", *argv);
> + fd = 0;
> + } else if ((fd = open(*argv, O_RDONLY)) < 0) {
> + weprintf("open %s:", *argv);
> ret = 1;
> continue;
> }
> - if (cryptsum(ops, fp, *argv, md))
> + if (cryptsum(ops, fd, *argv, md))
> ret = 1;
> else
> mdprint(md, *argv, sz);
> - if (fp != stdin && fshut(fp, *argv))
> - ret = 1;
> + if (fd != 0)
> + close(fd);
> }
> }
>
> _AT_@ -155,16 +157,15 @@ cryptmain(int argc, char *argv[], struct crypt_ops *ops, uint8_t *md, size_t sz)
> }
>
> int
> -cryptsum(struct crypt_ops *ops, FILE *fp, const char *f,
> - uint8_t *md)
> +cryptsum(struct crypt_ops *ops, int fd, const char *f, uint8_t *md)
> {
> uint8_t buf[BUFSIZ];
> - size_t n;
> + ssize_t n;
>
> ops->init(ops->s);
> - while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
> + while ((n = read(fd, buf, sizeof(buf))) > 0)
> ops->update(ops->s, buf, n);
> - if (ferror(fp)) {
> + if (n < 0) {
> weprintf("%s: read error:", f);
> return 1;
> }
> diff --git a/od.c b/od.c
> index b5884e7..9ae1e29 100644
> --- a/od.c
> +++ b/od.c
> _AT_@ -1,8 +1,10 @@
> /* See LICENSE file for copyright and license details. */
> +#include <fcntl.h>
> #include <stdint.h>
> #include <stdio.h>

This include is not needed anymore.


> #include <stdlib.h>
> #include <string.h>
> +#include <unistd.h>
>
> #include "queue.h"
> #include "util.h"
> _AT_@ -124,26 +126,28 @@ once:
> }
> }
>
> -static void
> -od(FILE *fp, char *fname, int last)
> +static int
> +od(int fd, char *fname, int last)
> {
> static unsigned char *line;
> static size_t lineoff;
> size_t i;
> unsigned char buf[BUFSIZ];
> static off_t addr;
> - size_t n;
> + ssize_t n;
>
> while (skip - addr > 0) {
> - n = fread(buf, 1, MIN(skip - addr, sizeof(buf)), fp);
> + n = read(fd, buf, MIN(skip - addr, sizeof(buf)));
> + if (n < 0)
> + weprintf("read %s:", fname);
> + if (n <= 0)
> + return n;
> addr += n;
> - if (feof(fp) || ferror(fp))
> - return;
> }
> if (!line)
> line = emalloc(linelen);
>
> - while ((n = fread(buf, 1, MIN((size_t)max - (addr - skip), sizeof(buf)), fp))) {
> + while ((n = read(fd, buf, MIN((size_t)max - (addr - skip), sizeof(buf)))) > 0) {
> for (i = 0; i < n; i++, addr++) {
> line[lineoff++] = buf[i];
> if (lineoff == linelen) {
> _AT_@ -152,10 +156,15 @@ od(FILE *fp, char *fname, int last)
> }
> }
> }
> + if (n < 0) {
> + weprintf("read %s:", fname);
> + return n;
> + }
> if (lineoff && last)
> printline(line, lineoff, addr - lineoff);
> if (last)
> printline((unsigned char *)"", 0, addr);
> + return 0;
> }
>
> static int
> _AT_@ -193,7 +202,7 @@ usage(void)
> int
> main(int argc, char *argv[])
> {
> - FILE *fp;
> + int fd;
> struct type *t;
> int ret = 0, len;
> char *s;
> _AT_@ -290,25 +299,26 @@ main(int argc, char *argv[])
> linelen *= 2;
>
> if (!argc) {
> - od(stdin, "<stdin>", 1);
> + if (od(0, "<stdin>", 1) < 0)
> + ret = 1;
> } else {
> for (; *argv; argc--, argv++) {
> if (!strcmp(*argv, "-")) {
> *argv = "<stdin>";
> - fp = stdin;
> - } else if (!(fp = fopen(*argv, "r"))) {
> - weprintf("fopen %s:", *argv);
> + fd = 0;
> + } else if ((fd = open(*argv, O_RDONLY)) < 0) {
> + weprintf("open %s:", *argv);
> ret = 1;
> continue;
> }
> - od(fp, *argv, (!*(argv + 1)));
> - if (fp != stdin && fshut(fp, *argv))
> + if (od(fd, *argv, (!*(argv + 1))) < 0)
> ret = 1;
> + if (fd != 0)
> + close(fd);
> }
> }
>
> - ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
> - fshut(stderr, "<stderr>");
> + ret |= fshut(stdout, "<stdout>") | fshut(stderr, "<stderr>");
>
> return ret;
> }
> diff --git a/tee.c b/tee.c
> index 35e3db5..eac106c 100644
> --- a/tee.c
> +++ b/tee.c
> _AT_@ -1,6 +1,7 @@
> /* See LICENSE file for copyright and license details. */
> +#include <fcntl.h>
> #include <signal.h>
> -#include <stdio.h>
> +#include <unistd.h>
>
> #include "util.h"
>
> _AT_@ -13,14 +14,15 @@ usage(void)
> int
> main(int argc, char *argv[])
> {
> - FILE **fps = NULL;
> - size_t i, n, nfps;
> - int ret = 0, aflag = 0, iflag = 0;
> + int *fds = NULL;
> + size_t i, nfds;
> + ssize_t n;
> + int ret = 0, aflag = O_TRUNC, iflag = 0;
> char buf[BUFSIZ];
>
> ARGBEGIN {
> case 'a':
> - aflag = 1;
> + aflag = O_APPEND;
> break;
> case 'i':
> iflag = 1;
> _AT_@ -31,31 +33,28 @@ main(int argc, char *argv[])
>
> if (iflag && signal(SIGINT, SIG_IGN) == SIG_ERR)
> eprintf("signal:");
> - nfps = argc + 1;
> - fps = ecalloc(nfps, sizeof(*fps));
> + nfds = argc + 1;
> + fds = ecalloc(nfds, sizeof(*fds));
>
> for (i = 0; i < argc; i++) {
> - if (!(fps[i] = fopen(argv[i], aflag ? "a" : "w"))) {
> - weprintf("fopen %s:", argv[i]);
> + if ((fds[i] = open(argv[i], O_WRONLY|O_CREAT|aflag, 0666)) < 0) {

umask will be honored when creating a file but I am wondering if just
setting mode_t to 0660 would be the safer option here.

Other than these this one LGTM!


Cheers,

Silvan


> + weprintf("open %s:", argv[i]);
> ret = 1;
> }
> }
> - fps[i] = stdout;
> + fds[i] = 1;
>
> - while ((n = fread(buf, 1, sizeof(buf), stdin))) {
> - for (i = 0; i < nfps; i++) {
> - if (fps[i] && fwrite(buf, 1, n, fps[i]) != n) {
> - fshut(fps[i], (i != argc) ? argv[i] : "<stdout>");
> - fps[i] = NULL;
> + while ((n = read(0, buf, sizeof(buf))) > 0) {
> + for (i = 0; i < nfds; i++) {
> + if (fds[i] >= 0 && writeall(fds[i], buf, n) < 0) {
> + weprintf("write %s:", (i != argc) ? argv[i] : "<stdout>");
> + fds[i] = -1;
> ret = 1;
> }
> }
> }
> -
> - ret |= fshut(stdin, "<stdin>");
> - for (i = 0; i < nfps; i++)
> - if (fps[i])
> - ret |= fshut(fps[i], (i != argc) ? argv[i] : "<stdout>");
> + if (n < 0)
> + eprintf("read <stdin>:");
>
> return ret;
> }
> --
> 2.11.0
>
>
Received on Mon Dec 05 2016 - 21:15:37 CET

This archive was generated by hypermail 2.3.0 : Mon Dec 05 2016 - 21:24:16 CET