Home
       parser.c - scc - simple c99 compiler
  HTML git clone git://git.simple-cc.org/scc
   DIR Log
   DIR Files
   DIR Refs
   DIR Submodules
   DIR README
   DIR LICENSE
       ---
       parser.c (14254B)
       ---
            1 #include <errno.h>
            2 #include <limits.h>
            3 #include <stdio.h>
            4 #include <stdlib.h>
            5 #include <string.h>
            6 
            7 #include <scc/cstd.h>
            8 #include <scc/scc.h>
            9 
           10 #include "cc2.h"
           11 
           12 #define STACKSIZ     50
           13 
           14 Type funetype = {
           15         .flags = FUNF | ELLIPS
           16 };
           17 
           18 Type funtype = {
           19         .flags = FUNF
           20 };
           21 
           22 union tokenop {
           23         void *arg;
           24         unsigned op;
           25 };
           26 
           27 struct swtch {
           28         int nr;
           29         Node *first;
           30         Node *last;
           31 };
           32 
           33 static struct swtch swtbl[NR_BLOCK], *swp = swtbl;
           34 static Symbol *lastfun;
           35 
           36 typedef void parsefun(char *, union tokenop);
           37 static parsefun type, symbol, getname, unary, binary, ternary, call,
           38                 constant, composed, binit, einit,
           39                 jump, oreturn, loop, assign,
           40                 ocase, bswitch, eswitch, builtin;
           41 
           42 typedef void evalfun(void);
           43 static evalfun vardecl, beginfun, endfun, endpars, stmt,
           44                array, aggregate, flddecl, labeldcl;
           45 
           46 static struct decoc {
           47         void (*eval)(void);
           48         void (*parse)(char *token, union tokenop);
           49         union tokenop u;
           50 } optbl[] = {      /*  eval     parse           args */
           51         ['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
           52         ['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
           53         ['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
           54         ['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
           55         ['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
           56         ['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
           57         ['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
           58         ['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
           59 
           60         ['C']   = {     NULL,    type, .u.arg =    &int8type},
           61         ['I']   = {     NULL,    type, .u.arg =   &int16type},
           62         ['W']   = {     NULL,    type, .u.arg =   &int32type},
           63         ['Q']   = {     NULL,    type, .u.arg =   &int64type},
           64         ['K']   = {     NULL,    type, .u.arg =   &uint8type},
           65         ['N']   = {     NULL,    type, .u.arg =  &uint16type},
           66         ['Z']   = {     NULL,    type, .u.arg =  &uint32type},
           67         ['O']   = {     NULL,    type, .u.arg =  &uint64type},
           68         ['J']   = {     NULL,    type, .u.arg = &float32type},
           69         ['D']   = {     NULL,    type, .u.arg = &float64type},
           70         ['H']   = {     NULL,    type, .u.arg = &float80type},
           71         ['0']   = {     NULL,    type, .u.arg =    &voidtype},
           72         ['B']   = {     NULL,    type, .u.arg =    &booltype},
           73         ['P']   = {     NULL,    type, .u.arg =     &ptrtype},
           74         ['E']   = {     NULL,    type, .u.arg =    &funetype},
           75         ['1']        = {     NULL,    type, .u.arg =    &arg_type},
           76 
           77         ['F']   = {     NULL,    type, .u.arg =     &funtype},
           78         ['V']   = {    array,composed,                     0},
           79         ['U']   = {aggregate,composed,                     0},
           80         ['S']   = {aggregate,composed,                     0},
           81 
           82         ['"']   = {     NULL, getname,                     0},
           83         ['{']   = { beginfun,    NULL,                     0},
           84         ['}']   = {   endfun,    NULL,                     0},
           85         ['(']   = {     NULL,   binit,                     0},
           86         [')']   = {     NULL,   einit,                     0},
           87         ['\\']  = {  endpars,    NULL,                     0},
           88         ['\t']  = {     stmt,    NULL,                     0},
           89 
           90         ['~']   = {     NULL,   unary, .u.op =          OCPL},
           91         ['_']   = {     NULL,   unary, .u.op =         OSNEG},
           92         ['\'']  = {     NULL,   unary, .u.op =         OADDR},
           93         ['@']   = {     NULL,   unary, .u.op =          OPTR},
           94         ['g']   = {     NULL,   unary, .u.op =         OCAST},
           95         ['p']   = {     NULL,   unary, .u.op =          OPAR},
           96         ['n']   = {     NULL,   unary, .u.op =          ONEG},
           97 
           98         ['a']   = {     NULL,  binary, .u.op =          OAND},
           99         ['o']   = {     NULL,  binary, .u.op =           OOR},
          100         ['.']   = {     NULL,  binary, .u.op =        OFIELD},
          101         ['+']   = {     NULL,  binary, .u.op =          OADD},
          102         ['-']   = {     NULL,  binary, .u.op =          OSUB},
          103         ['*']   = {     NULL,  binary, .u.op =          OMUL},
          104         ['%']   = {     NULL,  binary, .u.op =          OMOD},
          105         ['/']   = {     NULL,  binary, .u.op =          ODIV},
          106         ['l']   = {     NULL,  binary, .u.op =          OSHL},
          107         ['r']   = {     NULL,  binary, .u.op =          OSHR},
          108         ['<']   = {     NULL,  binary, .u.op =           OLT},
          109         ['>']   = {     NULL,  binary, .u.op =           OGT},
          110         ['[']   = {     NULL,  binary, .u.op =           OLE},
          111         [']']   = {     NULL,  binary, .u.op =           OGE},
          112         ['=']   = {     NULL,  binary, .u.op =           OEQ},
          113         ['!']   = {     NULL,  binary, .u.op =           ONE},
          114         ['&']   = {     NULL,  binary, .u.op =         OBAND},
          115         ['|']   = {     NULL,  binary, .u.op =          OBOR},
          116         ['^']   = {     NULL,  binary, .u.op =         OBXOR},
          117         [',']   = {     NULL,  binary, .u.op =        OCOMMA},
          118         ['m']   = {     NULL,  builtin,.u.op =      OBUILTIN},
          119 
          120         [':']   = {     NULL,  assign, .u.op =        OASSIG},
          121         ['?']   = {     NULL, ternary, .u.op =          OASK},
          122         ['c']   = {     NULL,    call, .u.op =         OCALL},
          123         ['z']   = {     NULL,    call, .u.op =        OCALLE},
          124 
          125         ['#']   = {     NULL,constant, .u.op =        OCONST},
          126 
          127         ['j']   = {     NULL,    jump, .u.op =          OJMP},
          128         ['y']   = {     NULL,    jump, .u.op =       OBRANCH},
          129         ['h']   = {     NULL, oreturn, .u.op =          ORET},
          130         ['i']   = {     NULL,    NULL, .u.op =          OINC},
          131         ['d']   = {     NULL,    NULL, .u.op =          ODEC},
          132 
          133         ['b']   = {     NULL,    loop, .u.op =        OBLOOP},
          134         ['e']   = {     NULL,    loop, .u.op =        OELOOP},
          135 
          136         ['v']   = {     NULL,   ocase, .u.op =         OCASE},
          137         ['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
          138         ['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
          139         ['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
          140 };
          141 
          142 static int sclass, inpars, ininit, endf, lineno;
          143 static void *stack[STACKSIZ], **sp = stack;
          144 
          145 static Node *
          146 push(void *elem)
          147 {
          148         if (sp == &stack[STACKSIZ])
          149                 error(ESTACKO);
          150         return *sp++ = elem;
          151 }
          152 
          153 static void *
          154 pop(void)
          155 {
          156         if (sp == stack)
          157                 error(ESTACKU);
          158         return *--sp;
          159 }
          160 
          161 static int
          162 empty(void)
          163 {
          164         return sp == stack;
          165 }
          166 
          167 static void
          168 type(char *token, union tokenop u)
          169 {
          170         push(u.arg);
          171 }
          172 
          173 static void
          174 composed(char *token, union tokenop u)
          175 {
          176         Symbol *sym;
          177 
          178         sym = getsym(atoi(token+1));
          179         sym->type.id = sym->id;
          180         push(&sym->type);
          181 }
          182 
          183 static void
          184 getname(char *t, union tokenop u)
          185 {
          186         push((*++t) ? xstrdup(t) : NULL);
          187 }
          188 
          189 static void
          190 symbol(char *token, union tokenop u)
          191 {
          192         Node *np = node(u.op & 0xFF);
          193         Symbol *sym = getsym(atoi(token+1));
          194 
          195         sclass = u.op >> 8;
          196         np->u.sym = sym;
          197         np->type = sym->type;
          198         push(np);
          199 }
          200 
          201 static Type *
          202 gettype(char *token)
          203 {
          204         struct decoc *dp;
          205 
          206         dp = &optbl[*token];
          207         if (!dp->parse)
          208                 error(ESYNTAX);
          209         (*dp->parse)(token, dp->u);
          210         return pop();
          211 }
          212 
          213 static void
          214 constant(char *token, union tokenop u)
          215 {
          216         static char letters[] = "0123456789ABCDEF";
          217         Node *np;
          218         TUINT v;
          219         unsigned c;
          220 
          221         ++token;
          222         if (*token == '"') {
          223                 ++token;
          224                 np = node(OSTRING);
          225                 np->type.flags = STRF;
          226                 np->type.size = strlen(token);
          227                 np->type.align = int8type.align;
          228                 np->u.s = xstrdup(token);
          229         } else {
          230                 np = node(OCONST);
          231                 np->type = *gettype(token++);
          232                 for (v = 0; c = *token++; v += c) {
          233                         v <<= 4;
          234                         c = strchr(letters, c) - letters;
          235                 }
          236                 np->u.i = v;
          237         }
          238         push(np);
          239 }
          240 
          241 static void
          242 assign(char *token, union tokenop u)
          243 {
          244         int subop;
          245         Node *np = node(u.op);
          246 
          247         switch (subop = *++token) {
          248         case '+':
          249         case '-':
          250         case '*':
          251         case '%':
          252         case '/':
          253         case 'l':
          254         case 'r':
          255         case '&':
          256         case '|':
          257         case '^':
          258         case 'i':
          259         case 'd':
          260                 ++token;
          261                 subop = optbl[subop].u.op;
          262                 break;
          263         default:
          264                 subop = 0;
          265                 break;
          266         }
          267 
          268         np->u.subop = subop;
          269         np->type = *gettype(token);
          270         np->right = pop();
          271         np->left = pop();
          272         push(np);
          273 }
          274 
          275 static void
          276 ternary(char *token, union tokenop u)
          277 {
          278         Node *ask = node(OASK), *colon = node(OCOLON);
          279         Type *tp = gettype(token+1);
          280 
          281         colon->right = pop();
          282         colon->left = pop();
          283 
          284         ask->type = *tp;
          285         ask->left = pop();
          286         ask->right = colon;
          287         push(ask);
          288 }
          289 
          290 static void
          291 eval(char *tok)
          292 {
          293         struct decoc *dp;
          294 
          295         do {
          296                 dp = &optbl[*tok];
          297                 if (!dp->parse)
          298                         break;
          299                 (*dp->parse)(tok, dp->u);
          300         } while (tok = strtok(NULL, "\t\n"));
          301 }
          302 
          303 static int
          304 nextline(void)
          305 {
          306         static char line[LINESIZ];
          307         size_t len;
          308         int c;
          309         void (*fun)(void);
          310 
          311 repeat:
          312         ++lineno;
          313         if (!fgets(line, sizeof(line), stdin))
          314                 return 0;
          315         if ((len = strlen(line)) == 0 || line[0] == '\n')
          316                 goto repeat;
          317         if (line[len-1] != '\n')
          318                 error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
          319         line[len-1] = '\0';
          320 
          321         c = *line;
          322         eval(strtok(line, "\t\n"));
          323         if ((fun = *optbl[c].eval) != NULL)
          324                 (*fun)();
          325         if (sp != stack)
          326                 error(ESTACKA);
          327         return 1;
          328 }
          329 
          330 static void
          331 oreturn(char *token, union tokenop u)
          332 {
          333         Node *np = node(u.op);
          334 
          335         if (token = strtok(NULL, "\t\n"))
          336                 eval(token);
          337         if (!empty())
          338                 np->left = pop();
          339         push(np);
          340 }
          341 
          342 /*
          343  * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with
          344  * the last switch table. It is a bit ugly to touch directly curstmt
          345  * here, but moving this function to node.c is worse, because we are
          346  * putting knowledge of how the text is parsed into the node
          347  * represtation module.
          348  */
          349 static void
          350 waft(Node *np)
          351 {
          352         Node *lastcase, *next;
          353         struct swtch *cur;
          354         extern Node *curstmt;
          355 
          356         if (swp == swtbl)
          357                 error(EWTACKU);
          358 
          359         cur = swp - 1;
          360         lastcase = cur->last;
          361         next = lastcase->next;
          362 
          363         np->next = next;
          364         np->prev = lastcase;
          365 
          366         if (next)
          367                 next->prev = np;
          368         lastcase->next = np;
          369 
          370         if (curstmt == cur->last)
          371                 curstmt = np;
          372         cur->last = np;
          373         cur->nr++;
          374 }
          375 
          376 static void
          377 bswitch(char *token, union tokenop u)
          378 {
          379         struct swtch *cur;
          380         Node *np = node(u.op);
          381 
          382         if (swp == &swtbl[NR_BLOCK])
          383                 error(EWTACKO);
          384         cur = swp++;
          385         cur->nr = 0;
          386 
          387         eval(strtok(NULL, "\t\n"));
          388         np->left = pop();
          389 
          390         push(cur->first = cur->last = np);
          391 }
          392 
          393 static void
          394 eswitch(char *token, union tokenop u)
          395 {
          396         struct swtch *cur;
          397 
          398         if (swp == swtbl)
          399                 error(EWTACKU);
          400         jump(token, u);
          401         waft(pop());
          402         cur = --swp;
          403         cur->first->u.i = cur->nr;
          404 }
          405 
          406 static void
          407 ocase(char *token, union tokenop u)
          408 {
          409         jump(token, u);
          410         waft(pop());
          411 }
          412 
          413 static void
          414 jump(char *token, union tokenop u)
          415 {
          416         Node *aux, *np = node(u.op);
          417 
          418         eval(strtok(NULL, "\t\n"));
          419 
          420         if (u.op == OBRANCH || u.op == OCASE)
          421                 np->left = pop();
          422         aux = pop();
          423         np->u.sym = aux->u.sym;
          424         delnode(aux);
          425         push(np);
          426 }
          427 
          428 static void
          429 loop(char *token, union tokenop u)
          430 {
          431         push(node(u.op));
          432 }
          433 
          434 static void
          435 unary(char *token, union tokenop u)
          436 {
          437         Node *np = node(u.op);
          438 
          439         np->type = *gettype(token+1);
          440         np->left = pop();
          441         np->right = NULL;
          442         push(np);
          443 }
          444 
          445 static void
          446 call(char *token, union tokenop u)
          447 {
          448         Node *np, *par, *fun = node(u.op);
          449 
          450         for (par = NULL;; par = np) {
          451                 np = pop();
          452                 if (np->op != OPAR)
          453                         break;
          454                 np->right = par;
          455         }
          456 
          457         fun->type = *gettype(token+1);
          458         fun->left = np;
          459         fun->right = par;
          460         push(fun);
          461 }
          462 
          463 static void
          464 builtin(char *token, union tokenop u)
          465 {
          466         Node *np = node(u.op);
          467         char *name;
          468         unsigned subop, nchilds;
          469 
          470         np->type = *gettype(token+1);
          471         name = pop();
          472 
          473         if (!strcmp("__builtin_va_arg", name)) {
          474                 nchilds = 1;
          475                 subop = BVA_ARG;
          476         } else if (!strcmp("__builtin_va_start", name)) {
          477                 nchilds = 2;
          478                 subop = BVA_START;
          479         } else if (!strcmp("__builtin_va_end", name)) {
          480                 nchilds = 1;
          481                 subop = BVA_END;
          482         } else if (!strcmp("__builtin_va_copy", name)) {
          483                 nchilds = 2;
          484                 subop = BVA_COPY;
          485         } else {
          486                 error(EBBUILT);
          487         }
          488 
          489         np->u.subop = subop;
          490         np->right = (nchilds == 2) ? pop() : NULL;
          491         np->left = (nchilds != 0) ? pop() : NULL;
          492 
          493         free(name);
          494         push(np);
          495 }
          496 
          497 static void
          498 binary(char *token, union tokenop u)
          499 {
          500         Node *np = node(u.op);
          501 
          502         np->type = *gettype(token+1);
          503         np->right = pop();
          504         np->left = pop();
          505         push(np);
          506 }
          507 
          508 static void
          509 binit(char *token, union tokenop u)
          510 {
          511         ininit = 1;
          512 }
          513 
          514 static void
          515 einit(char *token, union tokenop u)
          516 {
          517         ininit = 0;
          518         endinit();
          519 }
          520 
          521 static void
          522 endpars(void)
          523 {
          524         if (!curfun || !inpars)
          525                 error(ESYNTAX);
          526         inpars = 0;
          527 }
          528 
          529 static void
          530 aggregate(void)
          531 {
          532         Node *align, *size;
          533         char *name;
          534         Type *tp;
          535         Symbol *sym;
          536 
          537         align = pop();
          538         size = pop();
          539         name = pop();
          540         tp = pop();
          541 
          542         tp->size = size->u.i;
          543         tp->align = align->u.i;
          544         tp->flags = AGGRF;
          545         /*
          546          * type is the first field of Symbol so we can obtain the
          547          * address of the symbol from the address of the type.
          548          * We have to do this because composed returns the pointer
          549          * to the type, but in this function we also need the
          550          * symbol to store the name.
          551          */
          552         sym = (Symbol *) tp;
          553         sym->name = name;
          554         deftype(tp);
          555 
          556         delnode(align);
          557         delnode(size);
          558 }
          559 
          560 static void
          561 array(void)
          562 {
          563         Type *tp, *base;
          564         Node *size;
          565 
          566         size = pop();
          567         base = pop();
          568         tp = pop();
          569         tp->flags = ARRF;
          570 
          571         if (size->u.i > LONG_MAX/base->size)
          572                 error(EOVERFL);
          573 
          574         tp->size = size->u.i * base->size;
          575         tp->align = base->align;
          576 
          577         delnode(size);
          578 }
          579 
          580 static void
          581 decl(Symbol *sym)
          582 {
          583         Type *tp = &sym->type;
          584 
          585         if (tp->flags & FUNF) {
          586                 lastfun = sym;
          587         } else {
          588                 switch (sym->kind) {
          589                 case SEXTRN:
          590                 case SGLOB:
          591                 case SPRIV:
          592                 case SLOCAL:
          593                         defglobal(sym);
          594                         break;
          595                 case SAUTO:
          596                 case SREG:
          597                         if (!curfun)
          598                                 error(ESYNTAX);
          599                         ((inpars) ? defpar : defvar)(sym);
          600                         break;
          601                 default:
          602                         abort();
          603                 }
          604         }
          605 }
          606 
          607 static void
          608 vardecl(void)
          609 {
          610         Type *tp, *rp;
          611         Node *np;
          612         Symbol *sym;
          613         char *name;
          614 
          615         name = pop();
          616         tp = pop();
          617         if (tp->flags & FUNF)
          618                 rp = pop();
          619         np = pop();
          620 
          621         sym = np->u.sym;
          622         /*
          623          * We have to free sym->name because in tentative declarations
          624          * we can have multiple declarations of the same symbol, and in
          625          * this case our parser will allocate twice the memory
          626          */
          627         free(sym->name);
          628         sym->name = name;
          629         sym->type = *tp;
          630         if (tp->flags & FUNF)
          631                 sym->rtype = *rp;
          632         sym->kind = sclass;
          633 
          634         if (ininit)
          635                 sym->type.flags |= INITF;
          636         decl(sym);
          637         delnode(np);
          638 }
          639 
          640 static void
          641 flddecl(void)
          642 {
          643         Node *off, *np;
          644         char *name;
          645         Type *tp;
          646         Symbol *sym;
          647 
          648         off = pop();
          649         name = pop();
          650         tp = pop();
          651         np = pop();
          652 
          653         sym = np->u.sym;
          654         sym->u.off = off->u.i;
          655         sym->name = name;
          656         sym->type = *tp;
          657 
          658         delnode(np);
          659         delnode(off);
          660 }
          661 
          662 static void
          663 labeldcl(void)
          664 {
          665         Node *np;
          666         Symbol *sym;
          667 
          668         np = pop();
          669         np->op = ONOP;
          670         sym = np->u.sym;
          671         sym->kind = SLABEL;
          672         sym->u.stmt = np;
          673         np->label = sym;
          674         addstmt(np, SETCUR);
          675 }
          676 
          677 static void
          678 stmt(void)
          679 {
          680         Node *np;
          681 
          682         if (empty())
          683                 return;
          684         np = pop();
          685         if (ininit) {
          686                 data(np);
          687                 deltree(np);
          688                 return;
          689         }
          690         addstmt(np, SETCUR);
          691 }
          692 
          693 static void
          694 beginfun(void)
          695 {
          696         curfun = lastfun;
          697         inpars = 1;
          698         pushctx();
          699         addstmt(node(OBFUN), SETCUR);
          700 }
          701 
          702 static void
          703 endfun(void)
          704 {
          705         endf = 1;
          706         addstmt(node(OEFUN), SETCUR);
          707 }
          708 
          709 void
          710 parse(void)
          711 {
          712         cleannodes();  /* remove code of previous function */
          713         popctx();  /* remove context of previous function */
          714         curfun = NULL;
          715         endf = 0;
          716 
          717         while (!endf && nextline())
          718                 ;
          719         if (ferror(stdin))
          720                 error(EFERROR, strerror(errno));
          721 }