--- .gitignore | 1 + Makefile | 2 ++ README | 1 + libutil/random.c | 46 +++++++++++++++++++++++++++++ shuf.1 | 43 +++++++++++++++++++++++++++ shuf.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ util.h | 3 ++ 7 files changed, 173 insertions(+) create mode 100644 libutil/random.c create mode 100644 shuf.1 create mode 100644 shuf.c diff --git a/.gitignore b/.gitignore index 001b055..e789e24 100644 --- a/.gitignore +++ b/.gitignore _AT_@ -71,6 +71,7 @@ /sha512sum /sha512-224sum /sha512-256sum +/shuf /sleep /sort /split diff --git a/Makefile b/Makefile index 7bd9626..8e8732a 100644 --- a/Makefile +++ b/Makefile _AT_@ -86,6 +86,7 @@ LIBUTILOBJ =\ libutil/strsep.o\ libutil/strnsubst.o\ libutil/strtonum.o\ + libutil/random.o\ libutil/unescape.o\ libutil/writeall.o _AT_@ -160,6 +161,7 @@ BIN =\ sha512sum\ sha512-224sum\ sha512-256sum\ + shuf\ sleep\ sort\ split\ diff --git a/README b/README index 5555f8b..9b287d9 100644 --- a/README +++ b/README _AT_@ -112,6 +112,7 @@ The following tools are implemented: 0=*|x sha512sum . 0=* x sha512-224sum . 0=* x sha512-256sum . +0=* x shuf (-o, -z, -e, -i) 0=*|o sleep . 0#*|o sort . 0=*|o split . diff --git a/libutil/random.c b/libutil/random.c new file mode 100644 index 0000000..48eeb79 --- /dev/null +++ b/libutil/random.c _AT_@ -0,0 +1,46 @@ +#include <stdint.h> +#include <stdlib.h> +#include <time.h> + +/* + * Uniformity is achieved by generating new random numbers until the one + * returned is outside the range [0, 2**32 % upper_bound). This + * guarantees the selected random number will be inside + * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) + * after reduction modulo upper_bound. + * + * Copied off OpenBSD (original is arc4random_uniform) + */ +uint32_t +random_uniform(uint32_t upper_bound) +{ + uint32_t r, min; + + if (upper_bound < 2) + return 0; + + /* 2**32 % x == (2**32 - x) % x */ + min = -upper_bound % upper_bound; + + /* + * This could theoretically loop forever but each retry has + * p > 0.5 (worst case, usually far better) of selecting a + * number inside the range we need, so it should rarely need + * to re-roll. + */ + for (;;) { + r = random(); /* arc4random() is better, but we don't always have it */ + if (r >= min) + break; + } + + return r % upper_bound; +} + +void +random_seed(void) +{ + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + srandom(ts.tv_nsec); /* not a good source of randomness, but eh */ +} diff --git a/shuf.1 b/shuf.1 new file mode 100644 index 0000000..1f744a7 --- /dev/null +++ b/shuf.1 _AT_@ -0,0 +1,43 @@ +.Dd 2024-02-25 +.Dt SHUF 1 +.Os +.Sh NAME +.Nm shuf +.Nd randomly permute input lines +.Sh SYNOPSIS +.Nm +.Op Fl r +.Op Ar file ... +.Sh DESCRIPTION +.Nm +is a utility that outputs a random permutation of its input lines. +By default, +.Nm +reads from stdin and outputs to stdout. +.Sh OPTIONS +.Bl -tag -width Ds +.It Fl r +Do not permute. +Instead, choose lines at random, with replacement. +By default, it repeats indefinitely. +.El +.Sh NOTES +This +.Nm +misses a few options. +.Pp +.Fl n +can be reproduced using +.Xr head 1 . +.Pp +.Fl i +can be reproduced using +.Xr seq 1 . +.Pp +.Fl e +can be reproduced using +.Xr printf 1 . +.Sh SEE ALSO +.Xr sort 1 , +.Xr seq 1 , +.Xr head 1 diff --git a/shuf.c b/shuf.c new file mode 100644 index 0000000..36b3898 --- /dev/null +++ b/shuf.c _AT_@ -0,0 +1,77 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "text.h" +#include "util.h" + +static void +usage(void) +{ + eprintf("usage: %s [-r] [file ...]\n", argv0); +} + +int +main(int argc, char *argv[]) +{ + FILE *fp; + int i, j, rflag = 0, ret = 0; + struct linebuf buf = EMPTY_LINEBUF; + struct line line; + + ARGBEGIN { + case 'r': + rflag = 1; + break; + default: + usage(); + } ARGEND + + if (!argc) { + getlines(stdin, &buf); + } else { + for (; *argv; argc--, argv++) { + if (!strcmp(*argv, "-")) { + *argv = "<stdin>"; + fp = stdin; + } else if (!(fp = fopen(*argv, "r"))) { + weprintf("fopen %s:", *argv); + ret = 1; + continue; + } + getlines(fp, &buf); + if (fp != stdin && fshut(fp, *argv)) + ret = 1; + } + } + + random_seed(); + + if (!rflag) { + for (i = buf.nlines - 1; i > 0; i--) { + j = random_uniform(i+1); + line = buf.lines[j]; + buf.lines[j] = buf.lines[i]; + buf.lines[i] = line; + } + + for (i = 0; i < buf.nlines; i++) { + line = buf.lines[i]; + fwrite(line.data, 1, line.len, stdout); + } + } else { + for (;;) { + j = random_uniform(buf.nlines); + line = buf.lines[j]; + fwrite(line.data, 1, line.len, stdout); + } + } + for (i = 0; i < buf.nlines; i++) + free(buf.lines[i].data); + + ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"); + + return ret; +} diff --git a/util.h b/util.h index 346f6ca..4c9b5b2 100644 --- a/util.h +++ b/util.h _AT_@ -3,6 +3,7 @@ #include <regex.h> #include <stddef.h> +#include <stdint.h> #include <stdio.h> #include "arg.h" _AT_@ -91,3 +92,5 @@ int mkdirp(const char *, mode_t, mode_t); #undef memmem #define memmem xmemmem void *memmem(const void *, size_t, const void *, size_t); +void random_seed(void); +uint32_t random_uniform(uint32_t); -- 2.44.0Received on Sun Mar 03 2024 - 00:36:35 CET
This archive was generated by hypermail 2.3.0 : Sun Mar 03 2024 - 00:48:37 CET