From 3cc3ad75c6c390cde307a6ff45ccc4a58cd71cdf Mon Sep 17 00:00:00 2001 From: stateless Date: Tue, 18 Jun 2013 11:19:47 +0100 Subject: [PATCH] Add comm --- Makefile | 1 + TODO | 2 - comm.1 | 33 ++++++++++++++++ comm.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 comm.1 create mode 100644 comm.c diff --git a/Makefile b/Makefile index 4c45ff6..65473a3 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,7 @@ SRC = \ chvt.c \ cksum.c \ cmp.c \ + comm.c \ cp.c \ date.c \ dirname.c \ diff --git a/TODO b/TODO index a5b095c..c1f03ec 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,5 @@ cal [-1] [-3] [-y] [year] -comm [-123] file1 file2 - cut [-bcfs] [-d delim] list [file...] df [-shia] [path] diff --git a/comm.1 b/comm.1 new file mode 100644 index 0000000..8081775 --- /dev/null +++ b/comm.1 @@ -0,0 +1,33 @@ +.TH COMM 1 sbase\-VERSION +.SH NAME +comm \- select or reject lines common to two files +.SH SYNOPSIS +.B comm +.RB [ \-123 ] +.IR file1 +.IR file2 +.SH DESCRIPTION +The comm utility reads +.IR file1 +and +.IR file2, +which should be sorted lexically, +and produces three text columns as output: lines only in +.IR file1; +lines only in +.IR file2; +and lines in both files. +.SH OPTIONS +.TP +.BI \-1 +Suppress printing of column 1 +.TP +.BI \-2 +Suppress printing of column 2 +.TP +.BI \-3 +Suppress printing of column 3 +.SH SEE ALSO +.IR cmp (1), +.IR sort (1), +.IR uniq (1) diff --git a/comm.c b/comm.c new file mode 100644 index 0000000..e693461 --- /dev/null +++ b/comm.c @@ -0,0 +1,132 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include +#include +#include "util.h" + +enum { Suppress1 = 1, Suppress2 = 2, Suppress3 = 4 }; + +static void comm(FILE *fp1, const char *s1, FILE *fp2, + const char *s2, int sflags); + +static void +usage(void) +{ + eprintf("usage: %s [-123] file1 file2\n", argv0); +} + +int +main(int argc, char *argv[]) +{ + int sflags = 0; + FILE *fp1, *fp2; + + ARGBEGIN { + case '1': + sflags |= Suppress1; + break; + case '2': + sflags |= Suppress2; + break; + case '3': + sflags |= Suppress3; + break; + default: + usage(); + } ARGEND; + + if (argc < 2) + usage(); + + if (!(fp1 = fopen(argv[0], "r"))) + eprintf("fopen %s:", argv[0]); + if (!(fp2 = fopen(argv[1], "r"))) + eprintf("fopen %s:", argv[1]); + + comm(fp1, argv[0], fp2, argv[1], sflags); + + return 0; +} + +static void +print_col1(const char *s, int sflags) +{ + if (sflags & Suppress1) + return; + printf("%s", s); +} + +static void +print_col2(const char *s, int sflags) +{ + const char *tabs = "\t"; + if (sflags & Suppress1) + tabs = ""; + if (sflags & Suppress2) + return; + printf("%s%s", tabs, s); +} + +static void +print_col3(const char *s, int sflags) +{ + const char *tabs = "\t\t"; + if (sflags & Suppress1) + tabs = "\t"; + if (sflags & Suppress2) + tabs = ""; + if (sflags & Suppress3) + return; + printf("%s%s", tabs, s); +} + +static void +comm(FILE *fp1, const char *s1, FILE *fp2, const char *s2, int sflags) +{ + char buf1[BUFSIZ], buf2[BUFSIZ]; + bool eof1 = false, eof2 = false; + bool r1 = true, r2 = true; + int ret; + + for (;;) { + if (r1) + if (!fgets(buf1, sizeof buf1, fp1)) + eof1 = true; + if (r2) + if (!fgets(buf2, sizeof buf2, fp2)) + eof2 = true; + + /* If we reached EOF on fp1 then just dump fp2 */ + if (eof1) { + do { + print_col2(buf2, sflags); + } while (fgets(buf2, sizeof buf2, fp2)); + return; + } + /* If we reached EOF on fp2 then just dump fp1 */ + if (eof2) { + do { + print_col1(buf1, sflags); + } while (fgets(buf1, sizeof buf1, fp1)); + return; + } + + ret = strcmp(buf1, buf2); + if (!ret) { + r1 = r2 = true; + print_col3(buf1, sflags); + continue; + } else if (ret < 0) { + r1 = true; + r2 = false; + print_col1(buf1, sflags); + continue; + } else { + r1 = false; + r2 = true; + print_col2(buf2, sflags); + continue; + } + } +} -- 1.7.10.4