[hackers] [scc] [cc1] Rewrite the input system || Roberto E. Vargas Caballero

From: <git_AT_suckless.org>
Date: Thu, 19 Jan 2017 10:00:32 +0100 (CET)

commit 490b2ddc64c800327ab07b4b3e5e56ed434fd38e
Author: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
AuthorDate: Wed Jan 18 11:16:06 2017 +0100
Commit: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
CommitDate: Thu Jan 19 09:55:40 2017 +0100

    [cc1] Rewrite the input system
    
    The input system was broken for a lot of different reasons. Some of them
    were due to the fact that the responsability of the different functions
    was not clear, and the kind of input wasn't explicit. This new version
    tries to split clearly the responsability of every function (for
    example spaces are skiped only in skipspaces now).

diff --git a/cc1/cc1.h b/cc1/cc1.h
index 3f3020f..13c6d44 100644
--- a/cc1/cc1.h
+++ b/cc1/cc1.h
_AT_@ -400,7 +400,6 @@ extern void decl(void);
 /* lex.c */
 extern char ahead(void);
 extern unsigned next(void);
-extern int moreinput(void);
 extern void expect(unsigned tok);
 extern void discard(void);
 extern int addinput(char *fname, Symbol *hide, char *buffer);
_AT_@ -451,7 +450,7 @@ extern struct yystype yylval;
 extern char yytext[];
 extern unsigned yytoken;
 extern unsigned short yylen;
-extern int cppoff, disexpand;
+extern int disexpand;
 extern unsigned cppctx;
 extern Input *input;
 extern int lexmode, namespace, onlycpp;
diff --git a/cc1/code.c b/cc1/code.c
index 5a48318..38a5f0e 100644
--- a/cc1/code.c
+++ b/cc1/code.c
_AT_@ -159,7 +159,7 @@ emit(unsigned op, void *arg)
 {
         extern int failure;
 
- if (failure)
+ if (failure || onlycpp)
                 return;
         (*opcode[op])(op, arg);
 }
diff --git a/cc1/cpp.c b/cc1/cpp.c
index d8c4124..8c4f8f2 100644
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
_AT_@ -16,7 +16,7 @@ static unsigned arglen;
 static unsigned ncmdlines;
 static Symbol *symline, *symfile;
 static unsigned char ifstatus[NR_COND];
-static int ninclude;
+static int ninclude, cppoff;
 static char **dirinclude;
 
 unsigned cppctx;
_AT_@ -25,7 +25,7 @@ int disexpand;
 void
 defdefine(char *macro, char *val, char *source)
 {
- char *def, *fmt = "#define %s %s";
+ char *def, *fmt = "#define %s %s\n";
         Symbol dummy = {.flags = SDECLARED};
 
         if (!val)
_AT_@ -345,7 +345,7 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz)
                         cpperror("'#' is not followed by a macro parameter");
                         return 0;
                 }
- if (yytoken == EOFTOK)
+ if (yytoken == '\n')
                         break;
 
                 if ((len = strlen(yytext)) >= bufsiz) {
_AT_@ -449,7 +449,7 @@ includefile(char *dir, char *file, size_t filelen)
 static void
 include(void)
 {
- char *file, *p, **bp;
+ char file[FILENAME_MAX], *p, **bp;
         size_t filelen;
         static char *sysinclude[] = {
                 PREFIX "/include/scc/" ARCH "/",
_AT_@ -467,19 +467,31 @@ include(void)
 
         switch (*yytext) {
         case '<':
- if ((p = strchr(input->begin, '>')) == NULL || p == yytext + 1)
+ if ((p = strchr(input->begin, '>')) == NULL || p[-1] == '<')
                         goto bad_include;
- *p = '\0';
- file = input->begin;
- filelen = strlen(file);
+ filelen = p - input->begin;
+ if (filelen >= FILENAME_MAX)
+ goto too_long;
+ memcpy(file, input->begin, filelen);
+ file[filelen] = '\0';
+
                 input->begin = input->p = p+1;
+ if (next() != '\n')
+ goto trailing_characters;
+
                 break;
         case '"':
- if ((p = strchr(yytext + 1, '"')) == NULL || p == yytext + 1)
+ if (yylen < 3)
                         goto bad_include;
- *p = '\0';
- file = yytext+1;
- filelen = strlen(file);
+ filelen = yylen-2;
+ if (filelen >= FILENAME_MAX)
+ goto too_long;
+ memcpy(file, yytext+1, filelen);
+ file[filelen] = '\0';
+
+ if (next() != '\n')
+ goto trailing_characters;
+
                 if (includefile(NULL, file, filelen))
                         goto its_done;
                 break;
_AT_@ -499,7 +511,14 @@ include(void)
         cpperror("included file '%s' not found", file);
 
 its_done:
- next();
+ return;
+
+trailing_characters:
+ cpperror("trailing characters after preprocessor directive");
+ return;
+
+too_long:
+ cpperror("too long file name in #include");
         return;
 
 bad_include:
_AT_@ -709,10 +728,14 @@ cpp(void)
                 {0, NULL}
         };
         int ns;
+ char *p;
 
- if (*input->p != '#')
- return 0;
- ++input->p;
+ for (p = input->p; isspace(*p); ++p)
+ /* nothing */;
+
+ if (*p != '#')
+ return cppoff;
+ input->p = p+1;
 
         disexpand = 1;
         lexmode = CPPMODE;
_AT_@ -724,15 +747,23 @@ cpp(void)
         for (bp = clauses; bp->token && bp->token != yytoken; ++bp)
                 /* nothing */;
         if (!bp->token) {
- errorp("incorrect preprocessor directive");
+ errorp("incorrect preprocessor directive '%s'", yytext);
                 goto error;
         }
 
+ DBG("CPP %s", yytext);
+
         pushctx(); /* create a new context to avoid polish */
         (*bp->fun)(); /* the current context, and to get all */
         popctx(); /* the symbols freed at the end */
 
- if (yytoken != EOFTOK && !cppoff)
+ /*
+ * #include changes the content of input->line, so the correctness
+ * of the line must be checked in the own include(), and we have
+ * to skip this tests. For the same reason include() is the only
+ * function which does not prepare the next token
+ */
+ if (yytoken != '\n' && !cppoff && bp->token != INCLUDE)
                 errorp("trailing characters after preprocessor directive");
 
 error:
diff --git a/cc1/lex.c b/cc1/lex.c
index 83c0be3..82a3ff2 100644
--- a/cc1/lex.c
+++ b/cc1/lex.c
_AT_@ -1,5 +1,6 @@
 /* See LICENSE file for copyright and license details. */
 static char sccsid[] = "_AT_(#) ./cc1/lex.c";
+#include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <limits.h>
_AT_@ -16,11 +17,10 @@ unsigned yytoken;
 struct yystype yylval;
 char yytext[STRINGSIZ+3];
 unsigned short yylen;
-int cppoff;
 int lexmode = CCMODE;
 
 int namespace = NS_IDEN;
-static int safe, eof;
+static int safe;
 Input *input;
 
 void
_AT_@ -83,7 +83,7 @@ addinput(char *fname, Symbol *hide, char *buffer)
                 if (hide->hide == UCHAR_MAX)
                         die("Too many macro expansions");
                 ++hide->hide;
- flags = IMACRO|IEOF;
+ flags = IMACRO;
         } else if (fname) {
                 /* a new file */
                 if ((fp = fopen(fname, "r")) == NULL)
_AT_@ -126,24 +126,12 @@ delinput(void)
                 if (fclose(ip->fp))
                         die("error: failed to read from input file '%s'",
                             ip->fname);
- if (!ip->next)
- eof = 1;
                 break;
         case IMACRO:
+ assert(hide->hide == 1);
                 --hide->hide;
- /*
- * If the symbol is not declared then it was
- * an expansion due to a #if directive with
- * a non declared symbol (expanded to 0),
- * thus we have to kill the symbol
- * TODO: review this comment and code
- */
- if ((hide->flags & SDECLARED) == 0)
- killsym(hide);
                 break;
         }
- if (eof)
- return;
         input = ip->next;
         free(ip->fname);
         free(ip->line);
_AT_@ -156,19 +144,18 @@ newline(void)
                 die("error: input file '%s' too long", input->fname);
 }
 
+/*
+ * Read the next character from the input file, counting number of lines
+ * and joining lines escaped with \
+ */
 static int
 readchar(void)
 {
         FILE *fp = input->fp;
         int c;
 
- if (eof || !fp)
- return 0;
 repeat:
         switch (c = getc(fp)) {
- case EOF:
- c = '\0';
- break;
         case '\\':
                 if ((c = getc(fp)) == '\n') {
                         newline();
_AT_@ -185,85 +172,111 @@ repeat:
         return c;
 }
 
+/*
+ * discard a C comment. This function is only called from readline
+ * because it is impossible to have a comment in a macro, because
+ * comments are always discarded before processing any cpp directive
+ */
 static void
 comment(int type)
 {
         int c;
 
- c = -1;
 repeat:
- do {
- if (!c || eof) {
- errorp("unterminated comment");
- return;
- }
- } while ((c = readchar()) != type);
+ while ((c = readchar()) != EOF && c != type)
+ /* nothing */;
+
+ if (c == EOF) {
+ errorp("unterminated comment");
+ return;
+ }
 
         if (type == '*' && (c = readchar()) != '/')
                 goto repeat;
 }
 
+/*
+ * readline is used to read a full logic line from a file.
+ * It discards comments and check that the line fits in
+ * the input buffer
+ */
 static int
 readline(void)
 {
         char *bp, *lim;
- char c, peekc = 0;
-
-repeat:
+ int c, peekc = 0;
 
- if (eof)
- return 0;
- if (!input->fp) {
- delinput();
- return 1;
- }
         if (feof(input->fp)) {
- delinput();
- goto repeat;
+ input->flags |= IEOF;
+ return 0;
         }
 
         *input->line = '\0';
- input->begin = input->p = input->line;
         lim = &input->line[INPUTSIZ-1];
- for (bp = input->line; bp < lim; *bp++ = c) {
+ for (bp = input->line; bp < lim-1; *bp++ = c) {
                 c = (peekc) ? peekc : readchar();
                 peekc = 0;
- if (c == '\n' || c == '\0')
+ if (c == '\n' || c == EOF)
                         break;
- if (c != '/' || (peekc = readchar()) != '*' && peekc != '/')
+ if (c != '/')
+ continue;
+
+ /* check for /* or // */
+ peekc = readchar();
+ if (peekc != '*' && peekc != '/')
                         continue;
- comment((peekc == '/') ? '\n' : peekc);
+ comment((peekc == '/') ? '\n' : '/');
                 peekc = 0;
                 c = ' ';
         }
 
- if (bp == lim)
- error("line too long");
+ input->begin = input->p = input->line;
+ if (bp == lim-1) {
+ errorp("line too long");
+ --bp;
+ }
+ *bp++ = '\n';
         *bp = '\0';
+
         return 1;
 }
 
-int
+/*
+ * moreinput gets more bytes to be passed to the lexer.
+ * It can take more bytes from macro expansions or
+ * directly reading from files. When a cpp directive
+ * is processed the line is discarded because it must not
+ * be passed to the lexer
+ */
+static int
 moreinput(void)
 {
- static char file[FILENAME_MAX];
- static unsigned nline;
- char *s;
- int wasexpand;
+ int wasexpand = 0;
 
 repeat:
- wasexpand = input->hide != NULL;
- if (!readline())
+ if (!input)
                 return 0;
- while (isspace(*input->p))
- ++input->p;
- input->begin = input->p;
- if (*input->p == '\0' || cpp() || cppoff) {
- *input->begin = '\0';
- goto repeat;
+
+ if (*input->p == '\0') {
+ if ((input->flags&ITYPE) == IMACRO) {
+ wasexpand = 1;
+ input->flags |= IEOF;
+ }
+ if (input->flags & IEOF) {
+ delinput();
+ goto repeat;
+ }
+ if (!readline() || cpp()) {
+ *input->p = '\0';
+ goto repeat;
+ }
         }
 
         if (onlycpp && !wasexpand) {
+ static char file[FILENAME_MAX];
+ static unsigned nline;
+ char *s;
+
                 putchar('\n');
                 if (strcmp(file, input->fname)) {
                         strcpy(file, input->fname);
_AT_@ -276,7 +289,6 @@ repeat:
                 nline = input->nline;
                 printf(s, nline, file);
         }
- input->begin = input->p;
         return 1;
 }
 
_AT_@ -483,7 +495,7 @@ character(void)
                 c = *input->p;
         ++input->p;
         if (*input->p != '\'')
- error("invalid character constant");
+ errorp("invalid character constant");
         else
                 ++input->p;
 
_AT_@ -643,47 +655,50 @@ operator(void)
 
 /* TODO: Ensure that namespace is NS_IDEN after a recovery */
 
-static void
+/*
+ * skip all the spaces until the next token. When we are in
+ * CPPMODE \n is not considered a whitespace
+ */
+static int
 skipspaces(void)
 {
-repeat:
- while (isspace(*input->p))
- ++input->p;
- input->begin = input->p;
-
- if (*input->p != '\0')
- return;
+ int c;
 
- if (lexmode == CPPMODE) {
- /*
- * If we are in cpp mode, we only return eof when
- * we don't have more inputs, or when the next
- * next input is from a file
- */
- if (!input || !input->next || input->next->fp)
- return;
+ for (;;) {
+ switch (c = *input->p) {
+ case '\n':
+ if (lexmode == CPPMODE)
+ goto return_byte;
+ ++input->p;
+ case '\0':
+ if (!moreinput())
+ return EOF;
+ break;
+ case ' ':
+ case '\t':
+ case '\v':
+ case '\r':
+ case '\f':
+ ++input->p;
+ break;
+ default:
+ goto return_byte;
+ }
         }
- if (!moreinput())
- return;
- goto repeat;
+
+return_byte:
+ input->begin = input->p;
+ return c;
 }
 
 unsigned
 next(void)
 {
- char c;
+ int c;
 
- skipspaces();
- c = *input->begin;
- if ((eof || lexmode == CPPMODE) && c == '\0') {
- strcpy(yytext, "<EOF>");
- if (cppctx && eof)
- error("#endif expected");
+ if ((c = skipspaces()) == EOF)
                 yytoken = EOFTOK;
- goto exit;
- }
-
- if (isalpha(c) || c == '_')
+ else if (isalpha(c) || c == '_')
                 yytoken = iden();
         else if (isdigit(c))
                 yytoken = number();
_AT_@ -694,7 +709,12 @@ next(void)
         else
                 yytoken = operator();
 
-exit:
+ if (yytoken == EOF) {
+ strcpy(yytext, "<EOF>");
+ if (cppctx)
+ errorp("#endif expected");
+ }
+
         DBG("TOKEN %s", yytext);
         return yytoken;
 }
Received on Thu Jan 19 2017 - 10:00:32 CET

This archive was generated by hypermail 2.3.0 : Thu Jan 19 2017 - 10:12:35 CET