[hackers] [sbase] new expr using shunting-yard instead of recursive descent (this time with tabs) || Evan Gates

From: <git_AT_suckless.org>
Date: Sun, 16 Nov 2014 13:52:54 +0100

commit 6240d26beb52747638ed274ff6a60a8b4b8eb5b7
Author: Evan Gates <evan.gates_AT_gmail.com>
Date: Thu Nov 13 15:07:15 2014 -0800

    new expr using shunting-yard instead of recursive descent (this time with tabs)

diff --git a/expr.c b/expr.c
index 9457d39..28ca387 100644
--- a/expr.c
+++ b/expr.c
_AT_@ -1,517 +1,241 @@
-/* $OpenBSD: src/bin/expr/expr.c,v 1.19 2013/11/21 15:54:45 deraadt Exp $ */
-/* $NetBSD: expr.c,v 1.3.6.1 1996/06/04 20:41:47 cgd Exp $ */
-
-/*
- * Written by J.T. Conklin <jtc_AT_netbsd.org>.
- * Public domain.
- */
-
+#include <inttypes.h>
+#include <limits.h>
+#include <regex.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
-#include <locale.h>
-#include <ctype.h>
-#include <regex.h>
-#include <err.h>
-
-static struct val *make_int(int);
-static struct val *make_str(char *);
-static void free_value(struct val *);
-static int is_integer(struct val *, int *);
-static int to_integer(struct val *);
-static void to_string(struct val *);
-static int is_zero_or_null(struct val *);
-static void nexttoken(int);
-static void error(void);
-static struct val *eval6(void);
-static struct val *eval5(void);
-static struct val *eval4(void);
-static struct val *eval3(void);
-static struct val *eval2(void);
-static struct val *eval1(void);
-static struct val *eval0(void);
-
-enum token {
- OR, AND, EQ, LT, GT, ADD, SUB, MUL, DIV, MOD, MATCH, RP, LP,
- NE, LE, GE, OPERAND, EOI
-};
-
-struct val {
- enum {
- integer,
- string
- } type;
+#include "util.h"
 
- union {
- char *s;
- int i;
- } u;
+enum {
+ VAL = CHAR_MAX + 1, GE, LE, NE
 };
 
-static enum token token;
-static struct val *tokval;
-static char **av;
-
-static struct val *
-make_int(int i)
-{
- struct val *vp;
+typedef struct {
+ char *s;
+ intmax_t n;
+} Val;
 
- vp = (struct val *) malloc(sizeof(*vp));
- if (vp == NULL) {
- err(3, NULL);
- }
- vp->type = integer;
- vp->u.i = i;
- return vp;
-}
 
-static struct val *
-make_str(char *s)
-{
- struct val *vp;
+static void doop(int*, int**, Val*, Val**);
+static Val match(Val, Val);
+static void num(Val);
+static int valcmp(Val, Val);
+static char *valstr(Val, char*);
+static int yylex(void);
+static int yyparse(int);
 
- vp = (struct val *) malloc(sizeof(*vp));
- if (vp == NULL || ((vp->u.s = strdup(s)) == NULL)) {
- err(3, NULL);
- }
- vp->type = string;
- return vp;
-}
+static char **args;
+static size_t intlen;
+static Val yylval;
 
+// otop points to one past last op
+// vtop points to one past last val
+// guaranteed otop != ops
+// pop two vals, pop op, apply op, push val
 static void
-free_value(struct val *vp)
+doop(int *ops, int **otop, Val *vals, Val **vtop)
 {
- if (vp->type == string)
- free(vp->u.s);
- free(vp);
-}
+ Val ret, a, b;
+ int op;
 
-/* determine if vp is an integer; if so, return it's value in *r */
-static int
-is_integer(struct val *vp, int *r)
-{
- char *s;
- int neg;
- int i;
+ if((*otop)[-1] == '(')
+ enprintf(2, "syntax error: extra (\n");
+ if(*vtop - vals < 2)
+ enprintf(2, "syntax error: missing expression or extra operator\n");
 
- if (vp->type == integer) {
- *r = vp->u.i;
- return 1;
- }
+ a = (*vtop)[-2];
+ b = (*vtop)[-1];
+ op = (*otop)[-1];
 
- /*
- * POSIX.2 defines an "integer" as an optional unary minus
- * followed by digits.
- */
- s = vp->u.s;
- i = 0;
+ switch (op) {
+ case '|':
+ if ( a.s && *a.s) ret = (Val){ a.s , 0 };
+ else if(!a.s && a.n) ret = (Val){ NULL, a.n };
+ else if( b.s && *b.s) ret = (Val){ b.s , 0 };
+ else ret = (Val){ NULL, b.n };
+ break;
 
- neg = (*s == '-');
- if (neg)
- s++;
+ case '&':
+ if(((a.s && *a.s) || a.n) &&
+ ((b.s && *b.s) || b.n)) ret = a;
+ else ret = (Val){ NULL, 0 };
+ break;
 
- while (*s) {
- if (!isdigit((unsigned char)*s))
- return 0;
+ case '=': ret = (Val){ NULL, valcmp(a, b) == 0 }; break;
+ case '>': ret = (Val){ NULL, valcmp(a, b) > 0 }; break;
+ case GE : ret = (Val){ NULL, valcmp(a, b) >= 0 }; break;
+ case '<': ret = (Val){ NULL, valcmp(a, b) < 0 }; break;
+ case LE : ret = (Val){ NULL, valcmp(a, b) <= 0 }; break;
+ case NE : ret = (Val){ NULL, valcmp(a, b) != 0 }; break;
 
- i *= 10;
- i += *s - '0';
+ case '+': num(a); num(b); ret = (Val){ NULL, a.n + b.n }; break;
+ case '-': num(a); num(b); ret = (Val){ NULL, a.n - b.n }; break;
+ case '*': num(a); num(b); ret = (Val){ NULL, a.n * b.n }; break;
+ case '/': num(a); num(b); ret = (Val){ NULL, a.n / b.n }; break;
+ case '%': num(a); num(b); ret = (Val){ NULL, a.n % b.n }; break;
 
- s++;
+ case ':': ret = match(a, b); break;
         }
 
- if (neg)
- i *= -1;
-
- *r = i;
- return 1;
+ (*vtop)[-2] = ret;
+ (*otop)--;
+ (*vtop)--;
 }
 
-/* coerce to vp to an integer */
-static int
-to_integer(struct val *vp)
+static Val
+match(Val vstr, Val vregx)
 {
- int r;
+ char b1[intlen], *str = valstr(vstr , b1);
+ char b2[intlen], *regx = valstr(vregx, b2);
 
- if (vp->type == integer)
- return 1;
+ regex_t re;
+ regmatch_t matches[2];
+ char anchreg[strlen(regx) + 2];
 
- if (is_integer(vp, &r)) {
- free(vp->u.s);
- vp->u.i = r;
- vp->type = integer;
- return 1;
- }
+ sprintf(anchreg, "^%s", regx);
 
- return 0;
-}
+ if(regcomp(&re, anchreg, 0))
+ enprintf(3, "regcomp failed\n");
 
-/* coerce to vp to an string */
-static void
-to_string(struct val *vp)
-{
- char *tmp;
-
- if (vp->type == string)
- return;
-
- if (asprintf(&tmp, "%d", vp->u.i) == -1)
- err(3, NULL);
+ if(regexec(&re, str, 2, matches, 0))
+ return (Val){ (re.re_nsub ? "" : NULL), 0 };
 
- vp->type = string;
- vp->u.s = tmp;
-}
+ if(re.re_nsub) {
+ intmax_t d;
+ char *ret, *p;
+ regoff_t len = matches[1].rm_eo - matches[1].rm_so + 1;
 
-static int
-is_zero_or_null(struct val *vp)
-{
- if (vp->type == integer) {
- return (vp->u.i == 0);
- } else {
- return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0));
- }
- /* NOTREACHED */
-}
+ if(!(ret = malloc(len))) // FIXME: free
+ enprintf(3, "malloc failed\n");
 
-static void
-nexttoken(int pat)
-{
- char *p;
+ d = strtoimax(ret, &p, 10);
+ strlcpy(ret, str + matches[1].rm_so, len);
 
- if ((p = *av) == NULL) {
- token = EOI;
- return;
- }
- av++;
-
- if (pat == 0 && p[0] != '\0') {
- if (p[1] == '\0') {
- const char *x = "|&=<>+-*/%:()";
- char *i; /* index */
-
- if ((i = strchr(x, *p)) != NULL) {
- token = i - x;
- return;
- }
- } else if (p[1] == '=' && p[2] == '\0') {
- switch (*p) {
- case '<':
- token = LE;
- return;
- case '>':
- token = GE;
- return;
- case '!':
- token = NE;
- return;
- }
- }
+ if(*ret && !*p)
+ return (Val){ NULL, d };
+ return (Val){ ret, 0 };
         }
- tokval = make_str(p);
- token = OPERAND;
- return;
+ return (Val){ NULL, matches[0].rm_eo - matches[0].rm_so };
 }
 
 static void
-error(void)
+num(Val v)
 {
- errx(2, "syntax error");
- /* NOTREACHED */
+ if(v.s)
+ enprintf(2, "syntax error: expected integer got `%s'\n", v.s);
 }
 
-static struct val *
-eval6(void)
+static int
+valcmp(Val a, Val b)
 {
- struct val *v;
-
- if (token == OPERAND) {
- nexttoken(0);
- return tokval;
-
- } else if (token == RP) {
- nexttoken(0);
- v = eval0();
+ char b1[intlen], *p = valstr(a, b1);
+ char b2[intlen], *q = valstr(b, b2);
 
- if (token != LP) {
- error();
- /* NOTREACHED */
- }
- nexttoken(0);
- return v;
- } else {
- error();
- }
- /* NOTREACHED */
- return NULL;
+ if(!a.s && !b.s)
+ return (a.n > b.n) - (a.n < b.n);
+ return strcmp(p, q);
 }
 
-/* Parse and evaluate match (regex) expressions */
-static struct val *
-eval5(void)
+static char *
+valstr(Val val, char *buf)
 {
- regex_t rp;
- regmatch_t rm[2];
- char errbuf[256];
- int eval;
- struct val *l, *r;
- struct val *v;
-
- l = eval6();
- while (token == MATCH) {
- nexttoken(1);
- r = eval6();
-
- /* coerce to both arguments to strings */
- to_string(l);
- to_string(r);
-
- /* compile regular expression */
- if ((eval = regcomp(&rp, r->u.s, 0)) != 0) {
- regerror(eval, &rp, errbuf, sizeof(errbuf));
- errx(2, "%s", errbuf);
- }
-
- /* compare string against pattern -- remember that patterns
- are anchored to the beginning of the line */
- if (regexec(&rp, l->u.s, 2, rm, 0) == 0 && rm[0].rm_so == 0) {
- if (rm[1].rm_so >= 0) {
- *(l->u.s + rm[1].rm_eo) = '\0';
- v = make_str(l->u.s + rm[1].rm_so);
-
- } else {
- v = make_int((int)(rm[0].rm_eo - rm[0].rm_so));
- }
- } else {
- if (rp.re_nsub == 0) {
- v = make_int(0);
- } else {
- v = make_str("");
- }
- }
-
- /* free arguments and pattern buffer */
- free_value(l);
- free_value(r);
- regfree(&rp);
-
- l = v;
+ char *p = val.s;
+ if(!p) {
+ sprintf(buf, "%"PRIdMAX, val.n);
+ p = buf;
         }
-
- return l;
+ return p;
 }
 
-/* Parse and evaluate multiplication and division expressions */
-static struct val *
-eval4(void)
+static int
+yylex(void)
 {
- struct val *l, *r;
- enum token op;
-
- l = eval5();
- while ((op = token) == MUL || op == DIV || op == MOD) {
- nexttoken(0);
- r = eval5();
-
- if (!to_integer(l) || !to_integer(r)) {
- errx(2, "non-numeric argument");
- }
+ intmax_t d;
+ char *q, *p, *ops = "|&=><+-*/%():";
 
- if (op == MUL) {
- l->u.i *= r->u.i;
- } else {
- if (r->u.i == 0) {
- errx(2, "division by zero");
- }
- if (op == DIV) {
- if (l->u.i != INT_MIN || r->u.i != -1)
- l->u.i /= r->u.i;
- } else {
- if (l->u.i != INT_MIN || r->u.i != -1)
- l->u.i %= r->u.i;
- else
- l->u.i = 0;
- }
- }
+ if(!(p = *args++))
+ return 0;
 
- free_value(r);
+ d = strtoimax(p, &q, 10);
+ if(*p && !*q) {
+ yylval = (Val){ NULL, d };
+ return VAL;
         }
 
- return l;
-}
-
-/* Parse and evaluate addition and subtraction expressions */
-static struct val *
-eval3(void)
-{
- struct val *l, *r;
- enum token op;
-
- l = eval4();
- while ((op = token) == ADD || op == SUB) {
- nexttoken(0);
- r = eval4();
+ if(*p && !p[1] && strchr(ops, *p))
+ return *p;
 
- if (!to_integer(l) || !to_integer(r)) {
- errx(2, "non-numeric argument");
- }
-
- if (op == ADD) {
- l->u.i += r->u.i;
- } else {
- l->u.i -= r->u.i;
- }
+ if(strcmp(p, ">=") == 0) return GE;
+ if(strcmp(p, "<=") == 0) return LE;
+ if(strcmp(p, "!=") == 0) return NE;
 
- free_value(r);
- }
-
- return l;
+ yylval = (Val){ p, 0 };
+ return VAL;
 }
 
-/* Parse and evaluate comparison expressions */
-static struct val *
-eval2(void)
-{
- struct val *l, *r;
- enum token op;
- int v = 0, li, ri;
-
- l = eval3();
- while ((op = token) == EQ || op == NE || op == LT || op == GT ||
- op == LE || op == GE) {
- nexttoken(0);
- r = eval3();
-
- if (is_integer(l, &li) && is_integer(r, &ri)) {
- switch (op) {
- case GT:
- v = (li > ri);
- break;
- case GE:
- v = (li >= ri);
- break;
- case LT:
- v = (li < ri);
- break;
- case LE:
- v = (li <= ri);
- break;
- case EQ:
- v = (li == ri);
- break;
- case NE:
- v = (li != ri);
- break;
- default:
- break;
- }
- } else {
- to_string(l);
- to_string(r);
-
- switch (op) {
- case GT:
- v = (strcoll(l->u.s, r->u.s) > 0);
- break;
- case GE:
- v = (strcoll(l->u.s, r->u.s) >= 0);
- break;
- case LT:
- v = (strcoll(l->u.s, r->u.s) < 0);
- break;
- case LE:
- v = (strcoll(l->u.s, r->u.s) <= 0);
- break;
- case EQ:
- v = (strcoll(l->u.s, r->u.s) == 0);
- break;
- case NE:
- v = (strcoll(l->u.s, r->u.s) != 0);
+static int
+yyparse(int argc)
+{
+ Val vals[argc], *vtop = vals;
+ int ops [argc], *otop = ops;
+ int type, last = 0;
+ char prec[] = {
+ ['|'] = 1,
+ ['&'] = 2,
+ ['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3,
+ ['+'] = 4, ['-'] = 4,
+ ['*'] = 5, ['/'] = 5, ['%'] = 5,
+ [':'] = 6,
+ };
+
+ while((type = yylex())) {
+ switch (type) {
+ case VAL: *vtop++ = yylval; break;
+ case '(': *otop++ = '(' ; break;
+ case ')':
+ if(last == '(')
+ enprintf(2, "syntax error: empty ( )\n");
+ while(otop > ops && otop[-1] != '(')
+ doop(ops, &otop, vals, &vtop);
+ if(otop == ops)
+ enprintf(2, "syntax error: extra )\n");
+ otop--;
                                 break;
- default:
+ default :
+ if(prec[last])
+ enprintf(2, "syntax error: extra operator\n");
+ while(otop > ops && prec[otop[-1]] >= prec[type])
+ doop(ops, &otop, vals, &vtop);
+ *otop++ = type;
                                 break;
- }
                 }
-
- free_value(l);
- free_value(r);
- l = make_int(v);
+ last = type;
         }
+ while(otop > ops)
+ doop(ops, &otop, vals, &vtop);
 
- return l;
-}
+ if(vtop == vals)
+ enprintf(2, "syntax error: missing expression\n");
+ if(vtop - vals > 1)
+ enprintf(2, "syntax error: extra expression\n");
 
-/* Parse and evaluate & expressions */
-static struct val *
-eval1(void)
-{
- struct val *l, *r;
-
- l = eval2();
- while (token == AND) {
- nexttoken(0);
- r = eval2();
-
- if (is_zero_or_null(l) || is_zero_or_null(r)) {
- free_value(l);
- free_value(r);
- l = make_int(0);
- } else {
- free_value(r);
- }
- }
+ vtop--;
+ if(vtop->s) printf("%s\n" , vtop->s);
+ else printf("%"PRIdMAX"\n", vtop->n);
 
- return l;
+ return (vtop->s && *vtop->s) || vtop->n;
 }
 
-/* Parse and evaluate | expressions */
-static struct val *
-eval0(void)
-{
- struct val *l, *r;
-
- l = eval1();
- while (token == OR) {
- nexttoken(0);
- r = eval1();
-
- if (is_zero_or_null(l)) {
- free_value(l);
- l = r;
- } else {
- free_value(r);
- }
- }
-
- return l;
-}
-
-
 int
-main(int argc, char *argv[])
+main(int argc, char **argv)
 {
- struct val *vp;
-
- (void) setlocale(LC_ALL, "");
-
- if (argc > 1 && !strcmp(argv[1], "--"))
- argv++;
-
- av = argv + 1;
-
- nexttoken(0);
- vp = eval0();
-
- if (token != EOI) {
- error();
- /* NOTREACHED */
- }
+ if(!(intlen = snprintf(NULL, 0, "%"PRIdMAX, INTMAX_MIN) + 1))
+ enprintf(3, "failed to get max digits\n");
 
- if (vp->type == integer)
- printf("%d\n", vp->u.i);
- else
- printf("%s\n", vp->u.s);
+ args = argv + 1;
+ if(*args && !strcmp("--", *args))
+ ++args;
 
- exit(is_zero_or_null(vp));
+ return !yyparse(argc);
 }
Received on Sun Nov 16 2014 - 13:52:54 CET

This archive was generated by hypermail 2.3.0 : Sun Nov 16 2014 - 14:00:15 CET