[hackers] [scc] Rewrite and simplify the embedded preprocessor || Roberto E. Vargas Caballero

From: <git_AT_suckless.org>
Date: Fri, 17 Jul 2015 19:37:49 +0200 (CEST)

commit e1b218ec5e6cc2745a43c5244bf6c2c0481b3e07
Author: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
AuthorDate: Fri Jul 17 19:30:37 2015 +0200
Commit: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
CommitDate: Fri Jul 17 19:30:37 2015 +0200

    Rewrite and simplify the embedded preprocessor
    
    The preprocessor dealt macro expansions in the same way that
    includes. This was done in this way as a first attemp to avoid
    recursion, but it only avoided direct recursion. Implement the
    algorithm described in ANSI C is really complex, and the idea
    of the embedded preprocessor was to have a fast and simple
    preprocessor, so it is better to remove any idea of trying to
    detect recursivity. It will simplify a lot the input/output
    functions.
    This version of the preprocessor uses the lexer of the compiler,
    so the code is not so hardcore like the original which did all
    the parsing directly with char pointers.

diff --git a/cc1/cc1.h b/cc1/cc1.h
index 11bb342..7095cf2 100644
--- a/cc1/cc1.h
+++ b/cc1/cc1.h
_AT_@ -11,6 +11,8 @@ typedef struct type Type;
 typedef struct symbol Symbol;
 typedef struct caselist Caselist;
 typedef struct node Node;
+typedef struct input Input;
+
 
 struct type {
         unsigned char op; /* type builder operator */
_AT_@ -70,6 +72,14 @@ struct yystype {
         unsigned char token;
 };
 
+struct input {
+ char *fname;
+ void *fp;
+ char *line, *begin, *p;
+ struct input *next;
+ unsigned short nline;
+};
+
 /*
  * Definition of enumerations
  */
_AT_@ -96,6 +106,7 @@ enum {
         NS_LABEL,
         NS_CPP,
         NS_KEYWORD,
+ NS_CPPCLAUSES,
         NS_STRUCTS
 };
 
_AT_@ -111,6 +122,13 @@ enum {
         ISEXTERN =128
 };
 
+
+/* lexer mode, compiler or preprocessor directive */
+enum {
+ CCMODE,
+ CPPMODE
+};
+
 /* input tokens */
 enum tokens {
         TQUALIFIER = 128,
_AT_@ -177,6 +195,14 @@ enum tokens {
         CONTINUE,
         BREAK,
         RETURN,
+ DEFINE,
+ INCLUDE,
+ LINE,
+ PRAGMA,
+ ERROR,
+ IFDEF,
+ IFNDEF,
+ UNDEF,
         EOFTOK
 };
 
_AT_@ -275,11 +301,7 @@ extern unsigned next(void);
 extern bool moreinput(void);
 extern void expect(unsigned tok);
 extern void discard(void);
-extern char *getfname(void);
-extern unsigned short getfline(void);
-extern void setfname(char *name);
-extern void setfline(unsigned short line);
-extern bool addinput(char *fname, Symbol *sym, char *str);
+extern bool addinput(char *fname);
 extern void setnamespace(int ns);
 extern void setsafe(int type);
 extern void ilex(char *fname);
_AT_@ -298,8 +320,8 @@ extern Node *expr(void), *negate(Node *np);
 
 /* cpp.c */
 extern void icpp(void);
-extern bool cpp(char *s);
-extern int expand(Symbol *sym);
+extern bool cpp(void);
+extern bool expand(char *begin, Symbol *sym);
 
 /*
  * Definition of global variables
_AT_@ -310,6 +332,8 @@ extern unsigned yytoken;
 extern unsigned short yylen;
 extern int cppoff, disexpand;
 extern unsigned cppctx;
+extern Input *input;
+extern int lexmode;
 
 extern Type *voidtype, *pvoidtype, *booltype,
             *uchartype, *chartype,
diff --git a/cc1/cpp.c b/cc1/cpp.c
index 8552f80..534585a 100644
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
_AT_@ -19,7 +19,7 @@ static char *argp, *macroname;
 static unsigned arglen;
 static Symbol *symline, *symfile;
 static unsigned char ifstatus[NR_COND];
-static int paramerr;
+static Type *charptype;
 
 unsigned cppctx;
 int disexpand;
_AT_@ -56,87 +56,26 @@ icpp(void)
         symfile = defmacro("__FILE__");
 }
 
-static bool
-iden(char **str)
-{
- char c, *bp, *s = *str;
-
- if (!isalpha(c = *s) && c != '_')
- return 0;
- for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = c) {
- if ((c = *s) == '\0' || !isalnum(c) && c != '_')
- break;
- ++s;
- }
- if (bp == &yytext[IDENTSIZ]) {
- printerr("identifier too long in preprocessor");
- return 0;
- }
- *bp = '\0';
-
- while (isspace(*s))
- ++s;
-
- *str = s;
- return 1;
-}
-
-static bool
-string(char **input, char **str, char delim)
-{
- char c, *s = *input;
-
- if (str)
- *str = s;
-
- while ((c = *s) && c != delim)
- ++s;
- if (c == '\0')
- return 0;
- *s++ = '\0';
- *input = s;
-
- return 1;
-}
-
-static void
-cleanup(char *s)
-{
- while (isspace(*s))
- ++s;
- if (*s != '\0')
- printerr("trailing characters after preprocessor directive");
-}
-
 static void
 nextcpp(void)
 {
- next();
- if (yytoken == EOFTOK) {
- printerr("unterminated argument list invoking macro \"%s\"",
- macroname);
- goto mark_error;
- }
- if (yylen + 1 > arglen) {
- printerr("argument overflow invoking macro \"%s\"",
- macroname);
- goto mark_error;
- }
- memcpy(argp, yytext, yylen);
- argp += yylen;
- *argp++ = ' ';
- arglen -= yylen + 1;
- return;
-
-mark_error:
- paramerr = 1;
- yytoken = 0;
+ next();
+ if (yytoken == EOFTOK)
+ error("unterminated argument list invoking macro \"%s\"",
+ macroname);
+ if (yylen + 1 > arglen)
+ error("argument overflow invoking macro \"%s\"",
+ macroname);
+ memcpy(argp, yytext, yylen);
+ argp += yylen;
+ *argp++ = ' ';
+ arglen -= yylen + 1;
 }
 
 static void
 paren(void)
 {
- while (!paramerr) {
+ for (;;) {
                 nextcpp();
                 switch (yytoken) {
                 case ')':
_AT_@ -151,7 +90,7 @@ paren(void)
 static void
 parameter(void)
 {
- while (!paramerr) {
+ for (;;) {
                 nextcpp();
                 switch (yytoken) {
                 case ')':
_AT_@ -172,90 +111,55 @@ parsepars(char *buffer, char **listp, int nargs)
         int n;
 
         if (nargs == -1)
- return 1;
-
- if (ahead() != '(')
+ return -1;
+ if (ahead() != '(' && nargs > 0)
                 return 0;
 
         disexpand = 1;
         next();
- paramerr = n = 0;
+ n = 0;
         argp = buffer;
         arglen = INPUTSIZ;
- if (ahead() != ')') {
+ if (yytoken != ')') {
                 do {
                         *listp++ = argp;
                         parameter();
- } while (!paramerr && ++n < NR_MACROARG && yytoken == ',');
+ } while (++n < NR_MACROARG && yytoken == ',');
         }
+ if (yytoken != ')')
+ error("incorrect macro function alike invocation");
         disexpand = 0;
 
- if (paramerr)
- return -1;
- if (n == NR_MACROARG) {
- printerr("too much parameters in macro \"%s\"", macroname);
- return -1;
- }
+ if (n == NR_MACROARG)
+ error("too much parameters in macro \"%s\"", macroname);
         if (n != nargs) {
- printerr("macro \"%s\" passed %d arguments, but it takes %d",
+ error("macro \"%s\" passed %d arguments, but it takes %d",
                       macroname, n, nargs);
- return -1;
         }
 
         return 1;
 }
 
-/*
- * sym->u.s is a string with the following format:
- * dd#string
- * where dd is the number of arguments of the macro
- * (-1 if it is a macro without arguments), and string
- * is the macro definition, where _AT_dd@ indicates the
- * parameter number dd
- */
-#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2)
-int
-expand(Symbol *sym)
+static void
+copymacro(char *bp, char *s, size_t bufsiz, char *arglist[])
 {
- unsigned len;
- int r, n;
- char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE];
- char prevc, c, *bp, *lim, *arg, *s = sym->u.s;
+ char prevc, c, *arg;
 
- fprintf(stderr, "macro %s:%s\n", sym->name, sym->u.s);
- if (sym == symfile) {
- sprintf(buffer, "\"%s\"", getfname());
- goto add_macro;
- }
- if (sym == symline) {
- sprintf(buffer, "%d", getfline());
- goto add_macro;
- }
-
- macroname = sym->name;
- if ((r = parsepars(arguments, arglist, atoi(s))) < 1)
- return r;
-
- for (n = 0; n < atoi(s); ++n)
- fprintf(stderr, "PAR%d:%s\n", n, arglist[n]);
-
- len = INPUTSIZ-1;
- bp = buffer;
- for (prevc = '\0', s += 3; c = *s; prevc = c, ++s) {
+ for (prevc = '\0'; c = *s; prevc = c, ++s) {
                 if (c != '_AT_') {
                         if (c == '#')
                                 continue;
- if (len-- == 0)
+ if (bufsiz-- == 0)
                                 goto expansion_too_long;
                         *bp++ = c;
                 } else {
- unsigned size;
+ size_t size;
 
                         if (prevc == '#')
- len -= 2;
+ bufsiz -= 2;
                         arg = arglist[atoi(++s)];
                         size = strlen(arg);
- if (size > len)
+ if (size > bufsiz)
                                 goto expansion_too_long;
                         if (prevc == '#')
                                 *bp++ = '"';
_AT_@ -263,190 +167,179 @@ expand(Symbol *sym)
                         bp += size;
                         if (prevc == '#')
                                 *bp++ = '"';
- len -= size;
+ bufsiz -= size;
                         s += 2;
                 }
         }
- *bp = '\0';
- fprintf(stderr, "macro expanded:%s\n", buffer);
-add_macro:
- addinput(NULL, sym, buffer);
- return 1;
+ *bp = '\0';
+
+ return;
 
 expansion_too_long:
- printerr("expansion of macro \"%s\" is too long", macroname);
- return -1;
+ error("expansion of macro \"%s\" is too long", macroname);
 }
-#undef BUFSIZE
 
-/*
- * Parse an argument list (par0, par1, ...) and creates
- * an array with pointers to all the arguments in the
- * list
- */
-static char *
-parseargs(char *s, char *args[NR_MACROARG], int *nargs)
+#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2)
+bool
+expand(char *begin, Symbol *sym)
 {
- int n;
         size_t len;
- char *endp, c;
+ int n;
+ char *s = sym->u.s;
+ char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE];
 
- n = -1;
- if (*s != '(')
- goto set_nargs;
- n = 0;
- while (isspace(*s++))
- /* nothing */;
- if (*s == ')')
- goto set_nargs;
-
- for (n = 1; n <= NR_MACROARG; ++n) {
- while (isspace(*s))
- ++s;
- if (!isalpha(*s) && *s != '_') {
- printerr("macro arguments must be identifiers");
- return NULL;
- }
- for (endp = s+1; isalnum(*endp) || *endp == '_'; ++endp)
- /* nothing */;
- if ((len = endp - s) > IDENTSIZ) {
- printerr("macro argument too long");
- return NULL;
- }
- *args++ = s;
- for (s = endp; isspace(*s); ++s)
- *s = '\0';
- c = *s;
- *s++ = '\0';
- if (c == ')')
- break;
- if (c == ',') {
- continue;
- } else {
- printerr("macro parameters must be comma-separated");
- return NULL;
- }
+ fprintf(stderr, "macro '%s':%s\n", sym->name, sym->u.s);
+ if (sym == symfile) {
+ sprintf(buffer, "\"%s\"", input->fname);
+ goto print_subs;
         }
- if (n > NR_MACROARG) {
- printerr("too much parameters in macro");
- return NULL;
+ if (sym == symline) {
+ sprintf(buffer, "%d", input->line);
+ goto print_subs;
         }
 
-set_nargs:
- *nargs = n;
- return s;
+ macroname = sym->name;
+ if (!parsepars(arguments, arglist, atoi(s)))
+ return 0;
+ for (n = 0; n < atoi(s); ++n)
+ fprintf(stderr, "PAR%d:%s\n", n, arglist[n]);
+
+ copymacro(buffer, s+3, INPUTSIZ-1, arglist);
+
+print_subs:
+ fprintf(stderr, "macro '%s' expanded to :'%s'\n", macroname, buffer);
+ len = strlen(buffer);
+
+ /* cut macro invocation */
+ memmove(begin, input->p, input->p - begin);
+ memmove(begin + len, begin, len);
+
+ /* paste macro expansion */
+ memcpy(begin, buffer, len);
+ input->p = input->begin = begin;
+
+ return 1;
 }
+#undef BUFSIZE
 
-/*
- * Copy a string define, and substitute formal arguments of the
- * macro into strings in the form _AT_XX@, where XX is the position
- * of the argument in the argument list.
- */
-static bool
-copydefine(char *s, char *args[], char *buff, int bufsiz, int nargs)
+static int
+getpars(Symbol *args[NR_MACROARG])
 {
- int n;
- size_t ncopy;
- char arroba[6], *p, **bp, c, prevc;
-
- for (prevc = '\0'; c = *s++; prevc = c) {
- if (!isalpha(c) && c != '_' || nargs < 1) {
- if (bufsiz-- == 0)
- goto too_long;
- if (prevc == '#')
- goto bad_stringer;
- *buff++ = c;
- if (c != '#')
- continue;
- while (isspace(*++s))
- /* nothing */;
+ int n = -1;
+ char *err;
+
+ if (!accept('('))
+ return n;
+ ++n;
+ if (accept(')'))
+ return n;
+
+ do {
+ if (n == NR_MACROARG) {
+ err = "too much parameters in macro";
+ goto popctx_and_error;
                 }
- /* found an identifier, is it one of the macro arguments? */
- for (p = s; isalnum(c = *p) || c == '_'; ++p)
- /* nothing */;
- ncopy = p - --s;
- bp = args;
- for (n = 0; n < nargs; ++n) {
- if (strncmp(s, *bp++, ncopy))
- continue;
- sprintf(arroba, "_AT_%02d@", n);
- s = arroba, ncopy = 4;
- break;
+ if (yytoken != IDEN) {
+ err = "macro arguments must be identifiers";
+ goto popctx_and_error;
                 }
- if (n == nargs && prevc == '#')
- goto bad_stringer;
- if ((bufsiz -= ncopy) < 0)
- goto too_long;
- memcpy(buff, s, ncopy);
- buff += ncopy, s = p;
- }
- if (bufsiz == 0)
- goto too_long;
- *buff = '\0';
- return 1;
+ args[n++] = yylval.sym;
+ next();
+ } while (accept(','));
+ expect(')');
 
-bad_stringer:
- printerr("'#' is not followed by a macro parameter");
- return 0;
-too_long:
- printerr("macro definition too long");
- return 0;
+ return n;
+
+popctx_and_error:
+ popctx();
+ error(err);
 }
 
-static char *
-mkdefine(char *s)
+static void
+getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz)
 {
- int nargs;
- char *args[NR_MACROARG], buff[LINESIZ+1];
-
- if ((s = parseargs(s, args, &nargs)) == NULL)
- return NULL;
- sprintf(buff, "%02d#", nargs);
+ Symbol **argp;
+ char *err;
+ size_t len;
+ int prevc = 0, ispar;
+
+ for (;;) {
+ ispar = 0;
+ if (yytoken == IDEN) {
+ for (argp = args; argp < &args[nargs]; ++argp) {
+ if (*argp == yylval.sym)
+ break;
+ }
+ if (argp != &args[nargs]) {
+ sprintf(yytext, "_AT_%02d@", argp - args);
+ ispar = 1;
+ }
+ }
+ if (prevc == '#' && !ispar)
+ goto bad_stringer;
+ if (yytoken == EOFTOK)
+ break;
 
- while (isspace(*s))
- ++s;
+ if ((len = strlen(yytext)) >= bufsiz) {
+ err = "too long macro";
+ goto popctx_and_error;
+ }
+ memcpy(bp, yytext, len);
+ bp += len;
+ bufsiz -= len;
+ if ((prevc = yytoken) != '#') {
+ bufsiz;
+ *bp++ = ' ';
+ }
+ next();
+ }
+ *bp = '\0';
+ return;
 
- if (*s == '\0')
- buff[0] = '\0';
- else if (!copydefine(s, args, buff+3, LINESIZ-3, nargs))
- return NULL;
- return xstrdup(buff);
+bad_stringer:
+ err = "'#' is not followed by a macro parameter";
+popctx_and_error:
+ popctx();
+ error(err);
 }
 
 static void
-define(char *s)
+define(void)
 {
- char *t;
- Symbol *sym;
+ Symbol *sym,*args[NR_MACROARG];
+ char buff[LINESIZ+1];
+ int n;
 
         if (cppoff)
                 return;
- if (!iden(&s)) {
- printerr("#define must have an identifier as parameter");
- return;
- }
-
- for (t = s + strlen(s) + 1; isspace(*--t); *t = '\0')
- /* nothing */;
- if ((s = mkdefine(s)) == NULL)
- return;
-
- sym = lookup(NS_CPP);
+ if (yytoken != IDEN)
+ error("macro names must be identifiers");
+ sym = yylval.sym;
         if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) {
                 warn("'%s' redefined", yytext);
                 free(sym->u.s);
+ } else if (sym->ns != NS_CPP) {
+ sym = lookup(NS_CPP);
         }
         sym->flags |= ISDEFINED;
- sym->ns = NS_CPP;
- sym->ctx = UCHAR_MAX;
- sym->u.s = s;
+
+ pushctx();
+
+ next();
+ n = getpars(args);
+ sprintf(buff, "%02d#", n);
+ getdefs(args, n, buff+3, LINESIZ-3);
+ sym->u.s = xstrdup(buff);
+ fprintf(stderr, "Defining macro '%s'='%s'\n", sym->name, buff);
+
+ popctx();
 }
 
 static void
-include(char *s)
+include(void)
 {
- char **bp, delim, c, *p, *file, path[FILENAME_MAX];
- char *sysinclude[] = {
+ char **bp, *p, file[FILENAME_MAX], path[FILENAME_MAX];
+ static char *sysinclude[] = {
                 PREFIX"/include/",
                 PREFIX"/local/include/",
                 NULL
_AT_@ -455,193 +348,199 @@ include(char *s)
 
         if (cppoff)
                 return;
- if ((c = *s++) == '>')
- delim = '>';
- else if (c == '"')
- delim = '"';
- else
- goto bad_include;
-
- if (!string(&s, &file, delim))
+ switch (*yytext) {
+ case '<':
+ if ((p = strchr(input->begin, '>')) == NULL)
+ goto bad_include;
+ *p = '\0';
+ if (p - input->begin >= FILENAME_MAX)
+ goto too_long;
+ strcpy(file, input->begin);
+ input->begin = input->p = p+1;
+ next();
+ break;
+ case '"':
+ if ((p = strchr(yytext + 1, '"')) == NULL)
+ goto bad_include;
+ *p = '\0';
+ if (p - yytext + 1 >= FILENAME_MAX)
+ goto too_long;
+ strcpy(file, yytext + 1);
+ next();
+ if (addinput(file))
+ return;
+ break;
+ default:
                 goto bad_include;
- if (delim == '"' && addinput(file, NULL, NULL))
- return;
+ }
 
         filelen = strlen(file);
         for (bp = sysinclude; *bp; ++bp) {
                 dirlen = strlen(*bp);
- if (dirlen + filelen > FILENAME_MAX)
+ if (dirlen + filelen > FILENAME_MAX-1)
                         continue;
                 memcpy(path, *bp, dirlen);
                 memcpy(path+dirlen, file, filelen);
- if (addinput(path, NULL, NULL))
+ if (addinput(path))
                         break;
         }
         if (*bp)
- printerr("included file '%s' not found", file);
- cleanup(s);
+ error("included file '%s' not found", file);
+
         return;
 
 bad_include:
- printerr("#include expects \"FILENAME\" or <FILENAME>");
+ error("#include expects \"FILENAME\" or <FILENAME>");
+too_long:
+ error("#include FILENAME too long");
 }
 
 static void
-line(char *s)
+line(void)
 {
- char *file;
+ char *file, *p;
+ Type *tp;
         long n;
 
         if (cppoff)
                 return;
- if ((n = strtol(s, &s, 10)) <= 0 || n > USHRT_MAX) {
- printerr("first parameter of #line is not a positive integer");
+ if ((n = strtol(input->p, &input->p, 10)) <= 0 || n > USHRT_MAX)
+ error("first parameter of #line is not a positive integer");
+
+ if (yytoken != CONSTANT || yylval.sym->type != inttype)
+ error("first parameter of #line is not a positive integer");
+
+ input->nline = yylval.sym->u.i;
+ next();
+ if (yytoken == EOFTOK)
                 return;
- }
 
- switch (*s) {
- case ' ':
- case '\t':
- while (isspace(*s))
- ++s;
- if (*s == '\0')
- goto end_string;
- if (*s++ != '"' && !string(&s, &file, '"'))
- goto bad_file;
- setfname(file);
- case '\0':
- end_string:
- setfline(n-1);
- break;;
- default:
- bad_file:
- printerr("second parameter of #line is not a valid filename");
- break;
- }
- cleanup(s);
+ tp = yylval.sym->type;
+ if (yytoken != CONSTANT || tp->op != ARY && tp->type != chartype)
+ error("second parameter of #line is not a valid filename");
+ free(input->fname);
+ input->fname = xstrdup(yylval.sym->u.s);
 }
 
 static void
-pragma(char *s)
+pragma(void)
 {
         if (cppoff)
                 return;
+ /* TODO: discard input */
 }
 
 static void
-usererr(char *s)
+usererr(void)
 {
         if (cppoff)
                 return;
- printerr("#error %s", s);
- exit(1);
+ printerr("#error %s", input->p);
+ /* TODO: discard input */
 }
 
 static void
-ifclause(char *s, int isdef)
+ifclause(int isdef)
 {
         Symbol *sym;
- unsigned n = cppctx++;
-
- if (cppctx == NR_COND-1) {
- printerr("too much nesting levels of conditional inclusion");
- return;
- }
- if (!iden(&s)) {
- printerr("no macro name given in #%s directive",
- (isdef) ? "ifdef" : "ifndef");
- return;
+ unsigned n;
+
+ if (cppctx == NR_COND-1)
+ error("too much nesting levels of conditional inclusion");
+ n = cppctx++
+ if (yytoken != IDEN) {
+ error("no macro name given in #%s directive",
+ (isdef) ? "ifdef" : "ifndef");
         }
+
         sym = lookup(NS_CPP);
+ next();
         if (!(ifstatus[n] = (sym->flags & ISDEFINED) != 0 == isdef))
                 ++cppoff;
- cleanup(s);
 }
 
 static void
-ifdef(char *s)
+ifdef(void)
 {
- ifclause(s, 1);
+ ifclause(1);
 }
 
 static void
-ifndef(char *s)
+ifndef(void)
 {
- ifclause(s, 0);
+ ifclause(0);
 }
 
 static void
-endif(char *s)
+endif(void)
 {
- if (cppctx == 0) {
- printerr("#endif without #if");
- return;
- }
+ if (cppctx == 0)
+ error("#endif without #if");
+
         if (!ifstatus[--cppctx])
                 --cppoff;
- cleanup(s);
 }
 
 static void
-elseclause(char *s)
+elseclause(void)
 {
         struct ifstatus *ip;
 
- if (cppctx == 0) {
- printerr("#else without #ifdef/ifndef");
- return;
- }
+ if (cppctx == 0)
+ error("#else without #ifdef/ifndef");
+
         cppoff += (ifstatus[cppctx-1] ^= 1) ? -1 : 1;
- cleanup(s);
 }
 
 static void
-undef(char *s)
+undef(void)
 {
         Symbol *sym;
 
- if (!iden(&s)) {
- printerr("no macro name given in #undef directive");
+ if (cppoff)
+ return;
+ if (yytoken != IDEN) {
+ error("no macro name given in #undef directive");
                 return;
         }
         sym = lookup(NS_CPP);
         sym->flags &= ~ISDEFINED;
- cleanup(s);
 }
 
 bool
-cpp(char *s)
+cpp(void)
 {
         static struct {
- char *name;
- void (*fun)(char *);
- } *bp, cmds[] = {
- "define", define,
- "include", include,
- "ifdef", ifdef,
- "ifndef", ifndef,
- "endif", endif,
- "else", elseclause,
- "undef", undef,
- "line", line,
- "pragma", pragma,
- "error", usererr,
- NULL, NULL
+ uint8_t tok;
+ void (*fun)(void);
+ } *bp, clauses [] = {
+ {DEFINE, define},
+ {INCLUDE, include},
+ {LINE, line},
+ {IFDEF, ifdef},
+ {IFNDEF, ifndef},
+ {ELSE, elseclause},
+ {UNDEF, undef},
+ {PRAGMA, pragma},
+ {ERROR, usererr},
+ {0, NULL}
         };
 
- if (*s++ != '#')
+ if (*input->p != '#')
                 return 0;
- while (isspace(*s))
- ++s;
- if (!iden(&s))
- goto incorrect;
- for (bp = cmds; bp->name; ++bp) {
- if (strcmp(bp->name, yytext))
- continue;
- (*bp->fun)(s);
- return 1;
- }
-incorrect:
- printerr("invalid preprocessor directive #%s", yytext);
+ ++input->p;
+ lexmode = CPPMODE;
+ setnamespace(NS_CPPCLAUSES);
+ next();
+ for (bp = clauses; bp->tok && bp->tok != yytoken; ++bp)
+ /* nothing */;
+ if (!bp->tok)
+ error("incorrect preprocessor directive");
+ next();
+ (*bp->fun)();
+
+ if (yytoken != EOFTOK && !cppoff)
+ error("trailing characters after preprocessor directive");
+ lexmode = CCMODE;
         return 1;
 }
diff --git a/cc1/error.c b/cc1/error.c
index 68880d1..9219fde 100644
--- a/cc1/error.c
+++ b/cc1/error.c
_AT_@ -19,8 +19,9 @@ warn_helper(int flag, char *fmt, va_list va)
                 return;
         if (flag < 0)
                 failure = 1;
- fprintf(stderr, "%s:%s:%u: ",
- (flag < 0) ? "error" : "warning", getfname(), getfline());
+ fprintf(stderr, "%s:%u: %s: ",
+ input->fname, input->nline,
+ (flag < 0) ? "error" : "warning");
         vfprintf(stderr, fmt, va);
         putc('\n', stderr);
         if (flag < 0 && nerrors++ == MAXERRNUM) {
diff --git a/cc1/lex.c b/cc1/lex.c
index 111c6f8..9d68717 100644
--- a/cc1/lex.c
+++ b/cc1/lex.c
_AT_@ -11,38 +11,27 @@
 #include "../inc/cc.h"
 #include "cc1.h"
 
-typedef struct input Input;
-
-struct input {
- char *fname;
- unsigned short nline;
- FILE *fp;
- char *line, *begin, *p;
- Symbol *macro;
- struct input *next;
-};
-
 unsigned yytoken;
 struct yystype yylval;
 char yytext[STRINGSIZ+3];
 unsigned short yylen;
 int cppoff;
+int lexmode = CCMODE;
 
 static unsigned lex_ns = NS_IDEN;
 static int safe, eof;
-static Input *input;
+Input *input;
 
 static void
-allocinput(char *fname, FILE *fp, char *buff)
+allocinput(char *fname, FILE *fp)
 {
         Input *ip;
 
         ip = xmalloc(sizeof(Input));
- ip->fname = fname;
+ ip->fname = xstrdup(fname);
+ ip->p = ip->begin = ip->line = xmalloc(INPUTSIZ);
+ ip->nline = 0;
         ip->next = input;
- ip->macro = NULL;
- ip->begin = ip->line = buff;
- ip->nline = (fp) ? 0 : input->nline;
         ip->fp = fp;
         input = ip;
 }
_AT_@ -52,10 +41,6 @@ ilex(char *fname)
 {
         FILE *fp;
 
- /*
- * we can use static file names because this Input is not going
- * to be freed ever
- */
         if (!fname) {
                 fp = stdin;
                 fname = "<stdin>";
_AT_@ -64,93 +49,42 @@ ilex(char *fname)
                         die("error opening output:%s", strerror(errno));
                 fname = fname;
         }
- allocinput(fname, fp, xmalloc(INPUTSIZ));
+ allocinput(fname, fp);
         *input->begin = '\0';
 }
 
 bool
-addinput(char *fname, Symbol *sym, char *str)
+addinput(char *fname)
 {
         FILE *fp;
- char flags = 0;
 
- if (fname) {
- /*
- * this call comes from an include clause, so we reuse
- * the buffer from the calling Input
- */
- if ((fp = fopen(fname, "r")) == NULL)
- return 0;
- fname = xstrdup(fname);
- str = input->line;
- *str = '\0';
- } else {
- /*
- * This call comes from a macro expansion, so we have
- * to duplicate the input string because it is the
- * expansion of the macro in a temporal buffer
- */
- fname = input->fname;
- fp = NULL;
- str = xstrdup(str);
- }
- allocinput(fname, fp, str);
- input->macro = sym;
+ if ((fp = fopen(fname, "r")) == NULL)
+ return 0;
+ allocinput(fname, fp);
         return 1;
 }
 
 static void
 delinput(void)
 {
- Input *ip;
+ Input *ip = input;
 
-repeat:
- if (input->fp) {
- /* include input */
- if (fclose(input->fp))
- die("error reading from input file '%s'", input->fname);
- if (!input->next) {
- eof = 1;
- return;
- }
- free(input->fname);
- } else {
- /* macro input */
- free(input->line);
- }
- ip = input;
- input = input->next;
- free(ip);
-
- if (*input->begin != '\0')
+ if (!ip->next)
+ eof = 1;
+ if (fclose(ip->fp))
+ die("error reading from input file '%s'", ip->fname);
+ if (eof)
                 return;
- if (!input->fp)
- goto repeat;
-}
-
-void
-setfname(char *name)
-{
- free(input->fname);
- input->fname = xstrdup(name);
-}
-
-char *
-getfname(void)
-{
- return input->fname;
+ input = ip->next;
+ free(ip->fname);
+ free(ip->line);
 }
 
-void
-setfline(unsigned short line)
-{
- input->nline = line;
-}
-
-unsigned short
-getfline(void)
+static void
+newline(void)
 {
- return input->nline;
+ if (++input->nline == 0)
+ die("error:input file '%s' too long", input->fname);
 }
 
 static char
_AT_@ -160,22 +94,25 @@ readchar(void)
         FILE *fp;
 
 repeat:
- if (feof(input->fp))
- delinput();
- if (eof)
- return '\0';
         fp = input->fp;
 
- if ((c = getc(fp)) == '\\') {
- if ((c = getc(fp)) == '\n')
+ switch (c = getc(fp)) {
+ case EOF:
+ c = '\0';
+ break;
+ case '\\':
+ if ((c = getc(fp)) == '\n') {
+ newline();
                         goto repeat;
+ }
                 ungetc(c, fp);
                 c = '\\';
- } else if (c == EOF) {
- c = '\n';
- } else if (c == '\n' && ++input->nline == 0) {
- die("error:input file '%s' too long", getfname());
+ break;
+ case '\n':
+ newline();
+ break;
         }
+
         return c;
 }
 
_AT_@ -203,10 +140,17 @@ readline(void)
         char *bp, *lim;
         char c, peekc = 0;
 
+repeat:
+ input->begin = input->p = input->line;
+ *input->line = '\0';
         if (eof)
                 return 0;
+ if (feof(input->fp)) {
+ delinput();
+ goto repeat;
+ }
         lim = &input->line[INPUTSIZ-1];
- for (bp = input->line; bp != lim; *bp++ = c) {
+ for (bp = input->line; bp < lim; *bp++ = c) {
                 c = (peekc) ? peekc : readchar();
                 peekc = 0;
                 if (c == '\n' || c == '\0')
_AT_@ -222,8 +166,10 @@ readline(void)
                 }
         }
 
- if (bp == lim)
- error("line %u too big in file '%s'", getfline(), getfname());
+ if (bp == lim) {
+ error("line %u too big in file '%s'",
+ input->line, input->fname);
+ }
         *bp = '\0';
         return 1;
 }
_AT_@ -231,24 +177,18 @@ readline(void)
 bool
 moreinput(void)
 {
- char *p;
-
 repeat:
- if (!input->fp)
- delinput();
- if (*input->begin)
- return 1;
         if (!readline())
                 return 0;
- p = input->line;
- while (isspace(*p))
- ++p;
- if (*p == '\0' || cpp(p) || cppoff) {
+ while (isspace(*input->p))
+ ++input->p;
+ input->begin = input->p;
+ if (*input->p == '\0' || cpp() || cppoff) {
                 *input->begin = '\0';
                 goto repeat;
         }
 
- input->p = input->begin = p;
+ input->begin = input->p;
         return 1;
 }
 
_AT_@ -403,16 +343,17 @@ string(void)
 
         *bp++ = '"';
 repeat:
- for (++input->p; (c = *input->p) != '\0' && c != '"'; ++input->p) {
+ for (++input->p; (c = *input->p) != '"'; ++input->p) {
+ if (c == '\0')
+ error("missing terminating '\"' character");
                 if (c == '\\')
                         c = escape();
                 if (bp == &yytext[STRINGSIZ+1])
                         error("string too long");
                 *bp++ = c;
         }
- if (c == '\0')
- error("missing terminating '\"' character");
- input->begin = input->p + 1;
+
+ input->begin = ++input->p;
         if (ahead() == '"')
                 goto repeat;
         *bp = '\0';
_AT_@ -430,15 +371,16 @@ static unsigned
 iden(void)
 {
         Symbol *sym;
- char *p, *t, c;
+ char *p, *begin;
 
- for (p = input->p; isalnum(*p) || *p == '_'; ++p)
+ begin = input->p;
+ for (p = begin; isalnum(*p) || *p == '_'; ++p)
                 /* nothing */;
         input->p = p;
         tok2str();
         yylval.sym = sym = lookup(lex_ns);
         if (sym->ns == NS_CPP) {
- if (!disexpand && sym != input->macro && expand(sym))
+ if (!disexpand && expand(begin, sym))
                         return next();
                 /*
                  * it is not a correct macro call, so try to find
_AT_@ -554,17 +496,15 @@ setnamespace(int ns)
 static void
 skipspaces(void)
 {
- char *p;
-
 repeat:
- for (p = input->begin; isspace(*p); ++p)
- /* nothing */;
- if (*p == '\0') {
+ while (isspace(*input->p))
+ ++input->p;
+ if (*input->p == '\0' && lexmode != CPPMODE) {
                 if (!moreinput())
                         return;
                 goto repeat;
         }
- input->begin = input->p = p;
+ input->begin = input->p;
 }
 
 unsigned
_AT_@ -573,14 +513,15 @@ next(void)
         char c;
 
         skipspaces();
- if (eof) {
- if (cppctx)
- error("#endif expected");
+ c = *input->begin;
+ if ((eof || lexmode == CPPMODE) && c == '\0') {
                 strcpy(yytext, "<EOF>");
- return yytoken = EOFTOK;
+ if (cppctx && eof)
+ error("#endif expected");
+ yytoken = EOFTOK;
+ goto exit;
         }
 
- c = *input->begin;
         if (isalpha(c) || c == '_')
                 yytoken = iden();
         else if (isdigit(c))
_AT_@ -592,8 +533,8 @@ next(void)
         else
                 yytoken = operator();
 
- fputs(yytext, stderr);
- putc('\n', stderr);
+exit:
+ fprintf(stderr, "%s\n", yytext);
         lex_ns = NS_IDEN;
         return yytoken;
 }
diff --git a/cc1/symbol.c b/cc1/symbol.c
index c5c6b5e..57eb7ac 100644
--- a/cc1/symbol.c
+++ b/cc1/symbol.c
_AT_@ -191,7 +191,7 @@ ikeywords(void)
         static struct {
                 char *str;
                 unsigned char token, value;
- } *bp, buff[] = {
+ } *bp, keywords[] = {
                 {"auto", SCLASS, AUTO},
                 {"break", BREAK, BREAK},
                 {"_Bool", TYPE, BOOL},
_AT_@ -227,14 +227,32 @@ ikeywords(void)
                 {"volatile", TQUALIFIER, VOLATILE},
                 {"while", WHILE, WHILE},
                 {NULL, 0, 0},
- };
+ }, cppclauses[] = {
+ {"define", DEFINE, DEFINE},
+ {"include", INCLUDE, INCLUDE},
+ {"line", LINE, LINE},
+ {"ifdef", IFDEF, IFDEF},
+ {"else", ELSE, ELSE},
+ {"ifndef", IFNDEF, IFNDEF},
+ {"undef", UNDEF, UNDEF},
+ {"pragma", PRAGMA, PRAGMA},
+ {"error", ERROR, ERROR}
+ }, *list[] = {
+ keywords,
+ cppclauses,
+ NULL
+ }, **lp;
         Symbol *sym;
+ int ns = NS_KEYWORD;
 
- for (bp = buff; bp->str; ++bp) {
- strcpy(yytext, bp->str);
- sym = lookup(NS_KEYWORD);
- sym->token = bp->token;
- sym->u.token = bp->value;
+ for (lp = list; *lp; ++lp) {
+ for (bp = *lp; bp->str; ++bp) {
+ strcpy(yytext, bp->str);
+ sym = lookup(ns);
+ sym->token = bp->token;
+ sym->u.token = bp->value;
+ }
+ ns = NS_CPPCLAUSES;
         }
         globalcnt = 0;
 }
Received on Fri Jul 17 2015 - 19:37:49 CEST

This archive was generated by hypermail 2.3.0 : Fri Jul 17 2015 - 19:48:11 CEST