[hackers] [scc] Rewrite symbol table again || Roberto E. Vargas Caballero

From: <git_AT_suckless.org>
Date: Mon, 24 Aug 2015 20:51:51 +0200 (CEST)

X-DEBUG-UPD: 1c93045ca831a57e47da5ed0c513b1fc987de308
commit 1c93045ca831a57e47da5ed0c513b1fc987de308
Author: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
AuthorDate: Mon Aug 24 17:55:35 2015 +0200
Commit: Roberto E. Vargas Caballero <k0ga_AT_shike2.com>
CommitDate: Mon Aug 24 17:55:35 2015 +0200

    Rewrite symbol table again
    
    Lookup() was inserting in the table when a symbol was not defined.
    It was a bad idea because there are sometimes where the namespace
    is not known in the lexer time. It was creating symbols in the
    incorrect namespace, and it was difficult to re use these symbols,
    and in some times they were masquerading other symbols. The solution
    is allocate when a symbol is not found but don't insert it in the
    symbol table.

diff --git a/cc1/cc1.h b/cc1/cc1.h
index 66c8530..4515125 100644
--- a/cc1/cc1.h
+++ b/cc1/cc1.h
_AT_@ -22,7 +22,7 @@ typedef struct input Input;
  */
 struct type {
         unsigned char op; /* type builder operator */
- unsigned char ns; /* namespace for struct members */
+ char ns; /* namespace for struct members */
         short id; /* type id, used in dcls */
         char letter; /* letter of the type */
         bool defined : 1; /* type defined */
_AT_@ -48,7 +48,7 @@ struct symbol {
         Type *type;
         unsigned short id;
         unsigned char ctx;
- unsigned char ns;
+ char ns;
         unsigned char token;
         short flags;
         union {
_AT_@ -311,10 +311,10 @@ extern Type *duptype(Type *base);
 
 /* symbol.c */
 extern void dumpstab(char *msg);
-extern Symbol *lookup(unsigned ns, char *name);
-extern Symbol *nextsym(Symbol *sym, unsigned ns);
-extern Symbol *install(unsigned ns, Symbol *sym);
-extern Symbol *newsym(unsigned ns);
+extern Symbol *lookup(int ns, char *name);
+extern Symbol *nextsym(Symbol *sym, int ns);
+extern Symbol *install(int ns, Symbol *sym);
+extern Symbol *newsym(int ns);
 extern void pushctx(void), popctx(void);
 extern void ikeywords(void);
 extern void delmacro(Symbol *sym);
_AT_@ -334,7 +334,6 @@ extern bool moreinput(void);
 extern void expect(unsigned tok);
 extern void discard(void);
 extern bool addinput(char *fname);
-extern void setnamespace(int ns);
 extern void setsafe(int type);
 extern void ilex(char *fname);
 #define accept(t) ((yytoken == (t)) ? next() : 0)
_AT_@ -369,7 +368,7 @@ extern unsigned short yylen;
 extern int cppoff, disexpand;
 extern unsigned cppctx;
 extern Input *input;
-extern int lexmode;
+extern int lexmode, namespace;
 extern unsigned curctx;
 extern Symbol *curfun, *zero, *one;
 
diff --git a/cc1/cpp.c b/cc1/cpp.c
index 849e18d..dae6c3a 100644
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
_AT_@ -327,8 +327,9 @@ define(void)
         if (cppoff)
                 return;
 
- setnamespace(NS_CPP);
+ namespace = NS_CPP;
         next();
+
         if (yytoken != IDEN) {
                 cpperror("macro names must be identifiers");
                 return;
_AT_@ -342,7 +343,7 @@ define(void)
                 sym->flags |= ISDECLARED;
         }
 
- setnamespace(NS_IDEN); /* Avoid polution in NS_CPP */
+ namespace = NS_IDEN; /* Avoid polution in NS_CPP */
         next();
         if ((n = getpars(args)) == NR_MACROARG)
                 goto delete;
_AT_@ -371,7 +372,7 @@ include(void)
         if (cppoff)
                 return;
 
- setnamespace(NS_IDEN);
+ namespace = NS_IDEN;
         next();
 
         switch (*yytext) {
_AT_@ -488,7 +489,7 @@ ifclause(int negate, int isifdef)
                 error("too much nesting levels of conditional inclusion");
 
         n = cppctx++;
- setnamespace(NS_CPP);
+ namespace = NS_CPP;
         next();
 
         if (isifdef) {
_AT_@ -578,7 +579,7 @@ undef(void)
         if (cppoff)
                 return;
 
- setnamespace(NS_CPP);
+ namespace = NS_CPP;
         next();
         if (yytoken != IDEN) {
                 error("no macro name given in #undef directive");
_AT_@ -609,6 +610,7 @@ cpp(void)
                 {ERROR, usererr},
                 {0, NULL}
         };
+ int ns;
 
         if (*input->p != '#')
                 return 0;
_AT_@ -616,8 +618,11 @@ cpp(void)
 
         disexpand = 1;
         lexmode = CPPMODE;
- setnamespace(NS_CPPCLAUSES);
+ ns = namespace;
+ namespace = NS_CPPCLAUSES;
         next();
+ namespace = NS_IDEN;
+
         for (bp = clauses; bp->token && bp->token != yytoken; ++bp)
                 /* nothing */;
         if (!bp->token)
_AT_@ -629,8 +634,10 @@ cpp(void)
 
         if (yytoken != EOFTOK && !cppoff)
                 errorp("trailing characters after preprocessor directive");
+
         disexpand = 0;
         lexmode = CCMODE;
+ namespace = ns;
 
         return 1;
 }
diff --git a/cc1/decl.c b/cc1/decl.c
index 897659e..44a57cc 100644
--- a/cc1/decl.c
+++ b/cc1/decl.c
_AT_@ -372,14 +372,16 @@ newtag(void)
         int op, tag = yylval.token;
         static unsigned ns = NS_STRUCTS;
 
- setnamespace(NS_TAG);
+ namespace = NS_TAG;
         next();
+
         switch (yytoken) {
         case IDEN:
         case TYPEIDEN:
                 sym = yylval.sym;
                 if ((sym->flags & ISDECLARED) == 0)
                         install(NS_TAG, yylval.sym);
+ namespace = NS_IDEN;
                 next();
                 break;
         default:
_AT_@ -413,15 +415,20 @@ structdcl(void)
         Symbol *sym;
         Type *tp;
         static int nested;
+ int ns;
 
+ ns = namespace;
         sym = newtag();
         tp = sym->type;
+ namespace = tp->ns;
+
         if (!accept('{'))
- return tp;
+ goto restore_name;
 
         if (tp->defined)
                 error("redefinition of struct/union '%s'", sym->name);
         tp->defined = 1;
+ namespace = tp->ns;
 
         if (nested == NR_STRUCT_LEVEL)
                 error("too levels of nested structure or union definitions");
_AT_@ -431,6 +438,8 @@ structdcl(void)
                 fieldlist(tp);
         --nested;
 
+restore_name:
+ namespace = ns;
         return tp;
 }
 
_AT_@ -439,16 +448,19 @@ enumdcl(void)
 {
         Type *tp;
         Symbol *sym, *tagsym;
- int val, nctes;
+ int ns, val, nctes;
 
+ ns = namespace;
         tagsym = newtag();
         tp = tagsym->type;
 
         if (!accept('{'))
- return tp;
+ goto restore_name;
         if (tp->defined)
                 error("redefinition of enumeration '%s'", tagsym->name);
         tp->defined = 1;
+ namespace = NS_IDEN;
+
         for (nctes = val = 0; yytoken != ')'; ++nctes, ++val) {
                 if (yytoken != IDEN)
                         unexpected();
_AT_@ -475,6 +487,8 @@ enumdcl(void)
         }
         expect('}');
 
+restore_name:
+ namespace = ns;
         return tp;
 }
 
diff --git a/cc1/expr.c b/cc1/expr.c
index 34838ff..8403083 100644
--- a/cc1/expr.c
+++ b/cc1/expr.c
_AT_@ -588,8 +588,10 @@ field(Node *np)
         switch (BTYPE(np)) {
         case STRUCT:
         case UNION:
- setnamespace(np->type->ns);
+ namespace = np->type->ns;
                 next();
+ namespace = NS_IDEN;
+
                 if (yytoken != IDEN)
                         unexpected();
                 if ((sym = yylval.sym) == NULL)
_AT_@ -710,7 +712,7 @@ primary(void)
                 next();
                 break;
         case IDEN:
- if (!(yylval.sym->flags & ISDECLARED)) {
+ if ((yylval.sym->flags & ISDECLARED) == 0) {
                         yylval.sym->type = inttype;
                         yylval.sym->flags |= ISDECLARED;
                         error("'%s' undeclared", yytext);
diff --git a/cc1/lex.c b/cc1/lex.c
index d167cb6..58ba415 100644
--- a/cc1/lex.c
+++ b/cc1/lex.c
_AT_@ -18,7 +18,7 @@ unsigned short yylen;
 int cppoff;
 int lexmode = CCMODE;
 
-static unsigned lex_ns = NS_IDEN, saved_ns;
+int namespace = NS_IDEN;
 static int safe, eof;
 Input *input;
 
_AT_@ -378,7 +378,7 @@ iden(void)
                 /* nothing */;
         input->p = p;
         tok2str();
- sym = lookup(lex_ns, yytext);
+ sym = lookup(namespace, yytext);
         if (sym->ns == NS_CPP) {
                 if (!disexpand && expand(begin, sym))
                         return next();
_AT_@ -386,7 +386,7 @@ iden(void)
                  * it is not a correct macro call, so try to find
                  * another definition.
                  */
- sym = nextsym(sym, lex_ns);
+ sym = nextsym(sym, namespace);
         }
         yylval.sym = sym;
         if (sym->flags & ISCONSTANT)
_AT_@ -488,13 +488,7 @@ operator(void)
         return t;
 }
 
-/* TODO: Ensure that lex_ns is NS_IDEN after a recovery */
-void
-setnamespace(int ns)
-{
- saved_ns = (ns == NS_CPPCLAUSES) ? lex_ns : 0;
- lex_ns = ns;
-}
+/* TODO: Ensure that namespace is NS_IDEN after a recovery */
 
 static void
 skipspaces(void)
_AT_@ -518,8 +512,6 @@ next(void)
         skipspaces();
         c = *input->begin;
         if ((eof || lexmode == CPPMODE) && c == '\0') {
- if (lexmode == CPPMODE)
- lex_ns = saved_ns;
                 strcpy(yytext, "<EOF>");
                 if (cppctx && eof)
                         error("#endif expected");
_AT_@ -540,8 +532,6 @@ next(void)
 
 exit:
         DBG(stderr, "TOKEN %s\n", yytext);
- if (lexmode == CCMODE)
- lex_ns = NS_IDEN;
         return yytoken;
 }
 
diff --git a/cc1/stmt.c b/cc1/stmt.c
index 6c0b29a..b4de938 100644
--- a/cc1/stmt.c
+++ b/cc1/stmt.c
_AT_@ -20,8 +20,11 @@ label(void)
         switch (yytoken) {
         case IDEN:
         case TYPEIDEN:
- if ((sym = install(NS_LABEL, yylval.sym)) == NULL)
+ sym = lookup(NS_LABEL, yytext);
+ if (sym->flags & ISDEFINED)
                         error("label '%s' already defined", yytoken);
+ if ((sym->flags & ISDECLARED) == 0)
+ sym = install(NS_LABEL, sym);
                 sym->flags |= ISDEFINED;
                 emit(OLABEL, sym);
                 next();
_AT_@ -169,8 +172,10 @@ Goto(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
 {
         Symbol *sym;
 
- setnamespace(NS_LABEL);
+ namespace = NS_LABEL;
         next();
+ namespace = NS_IDEN;
+
         if (yytoken != IDEN)
                 unexpected();
         sym = yylval.sym;
diff --git a/cc1/symbol.c b/cc1/symbol.c
index 1e4b673..6d32e51 100644
--- a/cc1/symbol.c
+++ b/cc1/symbol.c
_AT_@ -54,30 +54,6 @@ hash(const char *s)
         return h & NR_SYM_HASH-1;
 }
 
-static Symbol *
-linkhash(Symbol *sym, char *name, unsigned hval)
-{
- Symbol **h, *p, *prev;
-
- sym->name = xstrdup(name);
- h = &htab[hval];
-
- for (prev = p = *h; p; prev = p, p = p->hash) {
- if (p->ctx <= sym->ctx)
- break;
- }
- if (p == prev) {
- sym->hash = *h;
- *h = sym;
- } else {
- p = prev->hash;
- prev->hash = sym;
- sym->hash = p;
- }
-
- return sym;
-}
-
 static void
 unlinkhash(Symbol *sym)
 {
_AT_@ -99,41 +75,47 @@ pushctx(void)
                 error("too much nested blocks");
 }
 
+static void
+killsym(Symbol *sym)
+{
+ short f;
+ char *name;
+
+ f = sym->flags;
+ if (f & ISSTRING)
+ free(sym->u.s);
+ if (sym->ns == NS_TAG)
+ sym->type->defined = 0;
+ if ((name = sym->name) != NULL) {
+ unlinkhash(sym);
+ if ((f & (ISUSED|ISGLOBAL|ISDECLARED)) == ISDECLARED)
+ warn("'%s' defined but not used", name);
+ if ((f & ISDEFINED) == 0 && sym->ns == NS_LABEL)
+ errorp("label '%s' is not defined", name);
+ free(name);
+ }
+ free(sym);
+}
+
 void
 popctx(void)
 {
         Symbol *next, *sym;
+ char *name;
         short f;
 
         if (--curctx == GLOBALCTX) {
                 localcnt = 0;
                 for (sym = labels; sym; sym = next) {
                         next = sym->next;
- f = sym->flags;
- if ((f & (ISUSED|ISDEFINED)) == ISDEFINED)
- warn("'%s' defined but not used", sym->name);
- if ((f & ISDEFINED) == 0)
- errorp("label '%s' is not defined", sym->name);
- free(sym->name);
- free(sym);
+ killsym(sym);
                 }
                 labels = NULL;
         }
 
         for (sym = head; sym && sym->ctx > curctx; sym = next) {
                 next = sym->next;
- f = sym->flags;
- if (sym->ns == NS_TAG)
- sym->type->defined = 0;
- if (sym->name) {
- unlinkhash(sym);
- if ((f & (ISUSED|ISGLOBAL|ISDECLARED)) == ISDECLARED)
- warn("'%s' defined but not used", sym->name);
- }
- free(sym->name);
- if (f & ISSTRING)
- free(sym->u.s);
- free(sym);
+ killsym(sym);
         }
         head = sym;
 }
_AT_@ -141,7 +123,7 @@ popctx(void)
 static unsigned short
 newid(void)
 {
- unsigned id;
+ unsigned short id;
 
         id = (curctx) ? ++localcnt : ++globalcnt;
         if (id == 0) {
_AT_@ -161,41 +143,86 @@ duptype(Type *base)
         return tp;
 }
 
-Symbol *
-newsym(unsigned ns)
+static Symbol *
+allocsym(int ns, char *name)
 {
- Symbol *sym, *p, *prev;
+ Symbol *sym;
 
- sym = malloc(sizeof(*sym));
+ sym = xmalloc(sizeof(*sym));
+ if (name)
+ name = xstrdup(name);
+ sym->name = name;
         sym->id = 0;
         sym->ns = ns;
         sym->ctx = (ns == NS_CPP) ? UCHAR_MAX : curctx;
         sym->token = IDEN;
- sym->flags = ISDECLARED | ISUSED;
- sym->u.s = sym->name = NULL;
+ sym->flags = 0;
+ sym->u.s = NULL;
         sym->type = NULL;
         sym->next = sym->hash = NULL;
+ return sym;
+}
 
- if (ns == NS_CPP)
+static Symbol *
+linksym(Symbol *sym)
+{
+ Symbol *p, *prev;
+
+ sym->flags |= ISDECLARED;
+ switch (sym->ns) {
+ case NS_CPP:
                 return sym;
- if (ns == NS_LABEL) {
+ case NS_LABEL:
                 sym->next = labels;
                 return labels = sym;
+ default:
+ for (prev = p = head; p; prev = p, p = p->next) {
+ if (p->ctx <= sym->ctx)
+ break;
+ }
+ if (p == prev) {
+ sym->next = head;
+ head = sym;
+ } else {
+ p = prev->next;
+ prev->next = sym;
+ sym->next = p;
+ }
+ return sym;
         }
+}
+
+static Symbol *
+linkhash(Symbol *sym)
+{
+ Symbol **h, *p, *prev;
+
+ h = &htab[hash(sym->name)];
 
- for (prev = p = head; p; prev = p, p = p->next) {
+ for (prev = p = *h; p; prev = p, p = p->hash) {
                 if (p->ctx <= sym->ctx)
                         break;
         }
         if (p == prev) {
- sym->next = head;
- head = sym;
+ sym->hash = *h;
+ *h = sym;
         } else {
- p = prev->next;
- prev->next = sym;
- sym->next = p;
+ p = prev->hash;
+ prev->hash = sym;
+ sym->hash = p;
         }
 
+ if (sym->ns != NS_CPP)
+ sym->id = newid();
+ return linksym(sym);
+}
+
+Symbol *
+newsym(int ns)
+{
+ Symbol *sym;
+
+ sym = linksym(allocsym(ns, NULL));
         return sym;
 }
 
_AT_@ -204,33 +231,26 @@ newlabel(void)
 {
         Symbol *sym = newsym(NS_LABEL);
         sym->id = newid();
- sym->flags |= ISDEFINED;
         return sym;
 }
 
 Symbol *
-lookup(unsigned ns, char *name)
+lookup(int ns, char *name)
 {
- Symbol *sym, **h;
- unsigned sns, v;
+ Symbol *sym;
+ int sns;
         char *t, c;
 
- v = hash(name);
- h = &htab[v];
         c = *name;
- for (sym = *h; sym; sym = sym->hash) {
+ for (sym = htab[hash(name)]; sym; sym = sym->hash) {
                 t = sym->name;
                 if (*t != c || strcmp(t, name))
                         continue;
                 sns = sym->ns;
- if (sns == NS_KEYWORD || sns == NS_CPP)
+ if (sns == NS_KEYWORD || sns == NS_CPP || sns == ns)
                         return sym;
- if (sns != ns)
- continue;
- return sym;
         }
- sym = linkhash(newsym(ns), name, v);
- sym->flags &= ~(ISDECLARED | ISUSED);
+ sym = allocsym(ns, name);
 
         return sym;
 }
_AT_@ -245,10 +265,10 @@ delmacro(Symbol *sym)
 }
 
 Symbol *
-nextsym(Symbol *sym, unsigned ns)
+nextsym(Symbol *sym, int ns)
 {
         char *s, *t, c;
- Symbol *new, *p;
+ Symbol *p;
 
         /*
          * This function is only called when a macro with parameters
_AT_@ -263,29 +283,18 @@ nextsym(Symbol *sym, unsigned ns)
                 if (c == *t && !strcmp(s, t))
                         return sym;
         }
- new = linkhash(newsym(ns), s, hash(s));
- new->flags &= ~ISDECLARED;
- return new;
+ return linkhash(allocsym(ns, s));
 }
 
 Symbol *
-install(unsigned ns, Symbol *sym)
+install(int ns, Symbol *sym)
 {
- if (sym->ctx == curctx && ns == sym->ns) {
- if (sym->flags & ISDECLARED)
+ if (sym->flags & ISDECLARED) {
+ if (sym->ctx == curctx && ns == sym->ns)
                         return NULL;
- } else {
- sym = lookup(ns, sym->name);
- if (sym->flags & ISDECLARED)
- return sym;
+ sym = allocsym(ns, sym->name);
         }
-
- sym->flags |= ISDECLARED;
- if (ns == NS_CPP)
- return sym;
- sym->id = newid();
-
- return sym;
+ return linkhash(sym);
 }
 
 void
_AT_@ -354,7 +363,7 @@ ikeywords(void)
 
         for (lp = list; *lp; ++lp) {
                 for (bp = *lp; bp->str; ++bp) {
- sym = lookup(ns, bp->str);
+ sym = linkhash(allocsym(ns, bp->str));
                         sym->token = bp->token;
                         sym->u.token = bp->value;
                 }
_AT_@ -362,8 +371,9 @@ ikeywords(void)
         }
         /*
          * Remove all the predefined symbols from * the symbol list. It
- * will make faster someoperations. There is no problem of memory
+ * will make faster some operations. There is no problem of memory
          * leakeage because this memory is not ever freed
          */
+ globalcnt = 0;
         head = NULL;
 }
diff --git a/cc1/tests/test014.c b/cc1/tests/test014.c
index c8da94a..50d1a0d 100644
--- a/cc1/tests/test014.c
+++ b/cc1/tests/test014.c
_AT_@ -2,21 +2,21 @@
 name: TEST014
 description: Basic storage class test
 output:
-test014.c:22: warning: 'a' defined but not used
-test014.c:22: warning: 'k' defined but not used
-test014.c:22: warning: 'j' defined but not used
-test014.c:22: warning: 'i' defined but not used
-test014.c:22: warning: 'h' defined but not used
-test014.c:28: warning: 'par' defined but not used
-test014.c:28: warning: 'par' defined but not used
-test014.c:33: warning: 'par' defined but not used
-test014.c:35: error: incorrect storage class for file-scope declaration
-test014.c:35: error: invalid storage class for function 'd'
-test014.c:38: error: bad storage class in function parameter
-test014.c:39: error: invalid storage class for function 'func4'
-test014.c:40: error: invalid type specification
-test014.c:41: warning: 'f' defined but not used
-test014.c:44: error: conflicting types for 'd'
+test014.c:16: warning: 'a' defined but not used
+test014.c:16: warning: 'k' defined but not used
+test014.c:16: warning: 'j' defined but not used
+test014.c:16: warning: 'i' defined but not used
+test014.c:16: warning: 'h' defined but not used
+test014.c:22: warning: 'par' defined but not used
+test014.c:22: warning: 'par' defined but not used
+test014.c:27: warning: 'par' defined but not used
+test014.c:29: error: incorrect storage class for file-scope declaration
+test014.c:29: error: invalid storage class for function 'd'
+test014.c:32: error: bad storage class in function parameter
+test014.c:33: error: invalid storage class for function 'func4'
+test014.c:34: error: invalid type specification
+test014.c:35: warning: 'f' defined but not used
+test014.c:38: error: conflicting types for 'd'
 G1 I a
 Y2 M b
 X3 I c
_AT_@ -46,6 +46,7 @@ R1 I par
 ????
 */
 
+#line 1
 int a;
 static char b;
 extern int c;
Received on Mon Aug 24 2015 - 20:51:51 CEST

This archive was generated by hypermail 2.3.0 : Mon Aug 24 2015 - 21:00:13 CEST