---
.gitignore | 1 +
Makefile | 2 ++
README | 1 +
libutil/random.c | 46 +++++++++++++++++++++++++++++
shuf.1 | 43 +++++++++++++++++++++++++++
shuf.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++
util.h | 3 ++
7 files changed, 173 insertions(+)
create mode 100644 libutil/random.c
create mode 100644 shuf.1
create mode 100644 shuf.c
diff --git a/.gitignore b/.gitignore
index 001b055..e789e24 100644
--- a/.gitignore
+++ b/.gitignore
_AT_@ -71,6 +71,7 @@
/sha512sum
/sha512-224sum
/sha512-256sum
+/shuf
/sleep
/sort
/split
diff --git a/Makefile b/Makefile
index 7bd9626..8e8732a 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -86,6 +86,7 @@ LIBUTILOBJ =\
libutil/strsep.o\
libutil/strnsubst.o\
libutil/strtonum.o\
+ libutil/random.o\
libutil/unescape.o\
libutil/writeall.o
_AT_@ -160,6 +161,7 @@ BIN =\
sha512sum\
sha512-224sum\
sha512-256sum\
+ shuf\
sleep\
sort\
split\
diff --git a/README b/README
index 5555f8b..9b287d9 100644
--- a/README
+++ b/README
_AT_@ -112,6 +112,7 @@ The following tools are implemented:
0=*|x sha512sum .
0=* x sha512-224sum .
0=* x sha512-256sum .
+0=* x shuf (-o, -z, -e, -i)
0=*|o sleep .
0#*|o sort .
0=*|o split .
diff --git a/libutil/random.c b/libutil/random.c
new file mode 100644
index 0000000..48eeb79
--- /dev/null
+++ b/libutil/random.c
_AT_@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+/*
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound). This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ *
+ * Copied off OpenBSD (original is arc4random_uniform)
+ */
+uint32_t
+random_uniform(uint32_t upper_bound)
+{
+ uint32_t r, min;
+
+ if (upper_bound < 2)
+ return 0;
+
+ /* 2**32 % x == (2**32 - x) % x */
+ min = -upper_bound % upper_bound;
+
+ /*
+ * This could theoretically loop forever but each retry has
+ * p > 0.5 (worst case, usually far better) of selecting a
+ * number inside the range we need, so it should rarely need
+ * to re-roll.
+ */
+ for (;;) {
+ r = random(); /* arc4random() is better, but we don't always have it */
+ if (r >= min)
+ break;
+ }
+
+ return r % upper_bound;
+}
+
+void
+random_seed(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_REALTIME, &ts);
+ srandom(ts.tv_nsec); /* not a good source of randomness, but eh */
+}
diff --git a/shuf.1 b/shuf.1
new file mode 100644
index 0000000..1f744a7
--- /dev/null
+++ b/shuf.1
_AT_@ -0,0 +1,43 @@
+.Dd 2024-02-25
+.Dt SHUF 1
+.Os
+.Sh NAME
+.Nm shuf
+.Nd randomly permute input lines
+.Sh SYNOPSIS
+.Nm
+.Op Fl r
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+is a utility that outputs a random permutation of its input lines.
+By default,
+.Nm
+reads from stdin and outputs to stdout.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl r
+Do not permute.
+Instead, choose lines at random, with replacement.
+By default, it repeats indefinitely.
+.El
+.Sh NOTES
+This
+.Nm
+misses a few options.
+.Pp
+.Fl n
+can be reproduced using
+.Xr head 1 .
+.Pp
+.Fl i
+can be reproduced using
+.Xr seq 1 .
+.Pp
+.Fl e
+can be reproduced using
+.Xr printf 1 .
+.Sh SEE ALSO
+.Xr sort 1 ,
+.Xr seq 1 ,
+.Xr head 1
diff --git a/shuf.c b/shuf.c
new file mode 100644
index 0000000..36b3898
--- /dev/null
+++ b/shuf.c
_AT_@ -0,0 +1,77 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-r] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ int i, j, rflag = 0, ret = 0;
+ struct linebuf buf = EMPTY_LINEBUF;
+ struct line line;
+
+ ARGBEGIN {
+ case 'r':
+ rflag = 1;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ if (!argc) {
+ getlines(stdin, &buf);
+ } else {
+ for (; *argv; argc--, argv++) {
+ if (!strcmp(*argv, "-")) {
+ *argv = "<stdin>";
+ fp = stdin;
+ } else if (!(fp = fopen(*argv, "r"))) {
+ weprintf("fopen %s:", *argv);
+ ret = 1;
+ continue;
+ }
+ getlines(fp, &buf);
+ if (fp != stdin && fshut(fp, *argv))
+ ret = 1;
+ }
+ }
+
+ random_seed();
+
+ if (!rflag) {
+ for (i = buf.nlines - 1; i > 0; i--) {
+ j = random_uniform(i+1);
+ line = buf.lines[j];
+ buf.lines[j] = buf.lines[i];
+ buf.lines[i] = line;
+ }
+
+ for (i = 0; i < buf.nlines; i++) {
+ line = buf.lines[i];
+ fwrite(line.data, 1, line.len, stdout);
+ }
+ } else {
+ for (;;) {
+ j = random_uniform(buf.nlines);
+ line = buf.lines[j];
+ fwrite(line.data, 1, line.len, stdout);
+ }
+ }
+ for (i = 0; i < buf.nlines; i++)
+ free(buf.lines[i].data);
+
+ ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
+
+ return ret;
+}
diff --git a/util.h b/util.h
index 346f6ca..4c9b5b2 100644
--- a/util.h
+++ b/util.h
_AT_@ -3,6 +3,7 @@
#include <regex.h>
#include <stddef.h>
+#include <stdint.h>
#include <stdio.h>
#include "arg.h"
_AT_@ -91,3 +92,5 @@ int mkdirp(const char *, mode_t, mode_t);
#undef memmem
#define memmem xmemmem
void *memmem(const void *, size_t, const void *, size_t);
+void random_seed(void);
+uint32_t random_uniform(uint32_t);
--
2.44.0
Received on Sun Mar 03 2024 - 00:36:35 CET
This archive was generated by hypermail 2.3.0 : Sun Mar 03 2024 - 00:48:37 CET