view lwcc/cc-parse.c @ 504:16bd8effb2f6

Update lwcc driver to actually do something useful sometimes Make lwcc driver actually find the correct filenames for the preprocessor and compiler. Construct the preprocessor argument list correctly instead of assigning all arguments to argv[0], including the trailing NULL argument. Instruct lwasm to use the object file format. Correct state setting for -S and -c options so they stop after the correct phases of compilation.
author William Astle <>
date Thu, 24 Oct 2019 22:41:04 -0600
parents 14a40f8bb4eb
children 7e8298f7bc0a
line wrap: on
line source


Copyright © 2019 William Astle

This file is part of LWTOOLS.

LWTOOLS is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.

You should have received a copy of the GNU General Public License along with
this program. If not, see <>.

#include <string.h>

#include <lw_alloc.h>
#include <lw_string.h>

#include "cpp.h"
#include "tree.h"

#define TOK_KW_IF       -1
#define TOK_KW_ELSE     -2
#define TOK_KW_WHILE    -3
#define TOK_KW_DO       -4
#define TOK_KW_FOR      -5
#define TOK_KW_VOID     -6
#define TOK_KW_INT      -7
#define TOK_KW_CHAR     -8
#define TOK_KW_SHORT    -9
#define TOK_KW_LONG     -10
#define TOK_KW_UNSIGNED -11
#define TOK_KW_SIGNED   -12
#define TOK_KW_FLOAT    -13
#define TOK_KW_DOUBLE   -14
#define TOK_KW_STRUCT   -15
#define TOK_KW_UNION    -16
#define TOK_KW_TYPEDEF  -17
#define TOK_KW_STATIC   -18
#define TOK_KW_SWITCH   -19
#define TOK_KW_CASE     -20
#define TOK_KW_DEFAULT  -21
#define TOK_KW_BREAK    -22
#define TOK_KW_CONTINUE -23
#define TOK_KW_CONST    -24
#define TOK_KW_AUTO     -25
#define TOK_KW_ENUM     -26
#define TOK_KW_REGISTER -27
#define TOK_KW_SIZEOF   -28
#define TOK_KW_VOLATILE -29
#define TOK_KW_RETURN   -30
#define TOK_KW_EXTERN   -31
#define TOK_KW_GOTO     -32
#define TOK_TYPENAME    -100
#define TOK_CONST_INT   -150

static struct { int tok; char *word; } keyword_list[] = {
    { TOK_KW_IF, "if" },
    { TOK_KW_ELSE, "else" },
    { TOK_KW_WHILE, "while" },
    { TOK_KW_DO, "do" },
    { TOK_KW_FOR, "for" },
    { TOK_KW_VOID, "void" },
    { TOK_KW_INT, "int" },
    { TOK_KW_CHAR, "char" },
    { TOK_KW_SHORT, "short" },
    { TOK_KW_LONG, "long" },
    { TOK_KW_UNSIGNED, "unsigned" },
    { TOK_KW_SIGNED, "signed" },
    { TOK_KW_FLOAT, "float" },
    { TOK_KW_DOUBLE, "double" },
    { TOK_KW_STRUCT, "struct" },
    { TOK_KW_UNION, "union" },
    { TOK_KW_TYPEDEF, "typedef" },
    { TOK_KW_STATIC, "static" },
    { TOK_KW_SWITCH, "switch" },
    { TOK_KW_CASE, "case" },
    { TOK_KW_DEFAULT, "default" },
    { TOK_KW_BREAK, "break" },
    { TOK_KW_CONTINUE, "continue" },
    { TOK_KW_CONST, "const" },
    { TOK_KW_AUTO, "auto" },
    { TOK_KW_ENUM, "enum" },
    { TOK_KW_REGISTER, "register" },
    { TOK_KW_SIZEOF, "sizeof" },
    { TOK_KW_VOLATILE, "volatile" },
    { TOK_KW_RETURN, "return" },
    { TOK_KW_EXTERN, "extern" },
    { TOK_KW_GOTO, "goto" },
    { TOK_NONE, "" }

struct parser_state
    struct preproc_info *pp;                // preprocessor data
    struct token *curtok;                   // the current token

struct token *parse_next(struct parser_state *ps)
    struct token *tok;
    int i;
    for (;;)
        tok = preproc_next(ps -> pp);
        if (tok -> ttype == TOK_WSPACE)
        if (tok -> ttype == TOK_EOL)
        if (tok -> ttype == TOK_CHAR)
            // random character
            fprintf(stderr, "Random character %02x\n", tok -> strval[0]);
            if (tok -> strval[0] < 32 || tok -> strval[0] > 126)
    if (tok -> ttype == TOK_IDENT)
        // convert identifier tokens to their respective meanings
        for (i = 0; keyword_list[i].tok != TOK_NONE; i++)
            if (strcmp(keyword_list[i].word, tok -> strval) == 0)
                tok -> ttype = keyword_list[i].tok;
                goto out;
        // check for registered types here
    else if (tok -> ttype == TOK_NUMBER)
        // look for anything that isn't 0-9
        for (i = 0; tok -> strval[i]; i++)
            if (tok -> strval[i] < '0' || tok -> strval[i] > '9')
        if (tok -> strval[i] == 0)
            tok -> ttype = TOK_CONST_INT;
    fprintf(stderr, "Lexed: ");
    token_print(tok, stderr);
    fprintf(stderr, " (%d)\n", tok -> ttype);
    if (ps -> curtok)
        token_free(ps -> curtok);
    ps -> curtok = tok;
    return tok;

void parse_generr(struct parser_state *ps, char *tag)
    fprintf(stderr, "(%s) Unexpected token (%d): ", tag, ps -> curtok -> ttype);
    token_print(ps -> curtok, stderr);
    fprintf(stderr, "\n");


node_t *parse_expr_real(struct parser_state *ps, int prec);

node_t *parse_term_real(struct parser_state *ps)
    node_t *rv;

    switch (ps -> curtok -> ttype)
    case TOK_CONST_INT:
        rv = node_create(NODE_CONST_INT, ps -> curtok -> strval);
        return rv;

    parse_generr(ps, "term");
    return NULL;

node_t *parse_expr_fncall(struct parser_state *ps, node_t *term1)
    if (ps -> curtok -> ttype != TOK_CPAREN)
        parse_generr(ps, "missing )");
        return NULL;
    return node_create(NODE_OPER_FNCALL, term1, NULL);

node_t *parse_expr_postinc(struct parser_state *ps, node_t *term1)
    return node_create(NODE_OPER_POSTINC, term1);

node_t *parse_expr_postdec(struct parser_state *ps, node_t *term1)
    return node_create(NODE_OPER_POSTDEC, term1);

node_t *parse_expr_subscript(struct parser_state *ps, node_t *term1)
    node_t *term2;
    term2 = parse_expr_real(ps, 0);
    if (!term2)
        return NULL;
    if (ps -> curtok -> ttype != TOK_CSQUARE)
        parse_generr(ps, "missing ]");
        return NULL;
    return node_create(NODE_OPER_SUBSCRIPT, term1, term2);

node_t *parse_expr_cond(struct parser_state *ps, node_t *term1)
    node_t *term2, *term3;
    // conditional operator
    // NOTE: the middle operand is evaluated as though it is its own
    // independent expression because the : must appear. The third
    // operand is evaluated at the ternary operator precedence so that
    // subsequent operand binding behaves correctly (if surprisingly). This
    // would be less confusing if the ternary operator was fully bracketed
    // (that is, had a terminator)
    term2 = parse_expr_real(ps, 0);
    if (!term2)
        return NULL;
    if (ps -> curtok -> ttype == TOK_COLON)
        term3 = parse_expr_real(ps, 25);
        if (!term3)
            return NULL;
        return node_create(NODE_OPER_COND, term1, term2, term3);
        parse_generr(ps, "missing :");
        return NULL;

node_t *parse_expr_real(struct parser_state *ps, int prec)
    static struct { int tok; int nodetype; int prec; int ra; node_t *(*spec)(struct parser_state *, node_t *); } operlist[] = {
//        { TOK_OPAREN, NODE_OPER_FNCALL, 200, 0, parse_expr_fncall },
//        { TOK_OSQUARE, NODE_OPER_SUBSCRIPT, 200, 0, parse_expr_subscript },
//        { TOK_ARROW, NODE_OPER_PTRMEM, 200, 0 },
//        { TOK_DOT, NODE_OPER_OBJMEM, 200, 0 },
//        { TOK_DBLADD, NODE_OPER_POSTINC, 200, 0, parse_expr_postinc },
//        { TOK_DBLSUB, NODE_OPER_POSTDEC, 200, 0, parse_expr_postdec },
        { TOK_STAR, NODE_OPER_TIMES, 150 },
        { TOK_DIV, NODE_OPER_DIVIDE, 150 },
        { TOK_MOD, NODE_OPER_MOD, 150 },
        { TOK_ADD, NODE_OPER_PLUS, 100 },
        { TOK_SUB, NODE_OPER_MINUS, 100 },
        { TOK_LSH, NODE_OPER_LSH, 90 },
        { TOK_RSH, NODE_OPER_RSH, 90 },
        { TOK_LT, NODE_OPER_LT, 80 },
        { TOK_LE, NODE_OPER_LE, 80 },
        { TOK_GT, NODE_OPER_GT, 80 },
        { TOK_GE, NODE_OPER_GE, 80 },
        { TOK_EQ, NODE_OPER_EQ, 70 },
        { TOK_NE, NODE_OPER_NE, 70 },
        { TOK_BWAND, NODE_OPER_BWAND, 60},
        { TOK_XOR, NODE_OPER_BWXOR, 55 },
        { TOK_BWOR, NODE_OPER_BWOR, 50 },
        { TOK_BAND, NODE_OPER_BAND, 40 },
        { TOK_BOR, NODE_OPER_BOR, 35 },
        { TOK_QMARK, NODE_OPER_COND, 25, 1, parse_expr_cond },
//        { TOK_ASS, NODE_OPER_ASS, 20, 1 },
//        { TOK_ADDASS, NODE_OPER_ADDASS, 20, 1 },
//        { TOK_SUBASS, NODE_OPER_SUBASS, 20, 1 },
//        { TOK_MULASS, NODE_OPER_MULASS, 20, 1 },
//        { TOK_DIVASS, NODE_OPER_DIVASS, 20, 1 },
//        { TOK_MODASS, NODE_OPER_MODASS, 20, 1 },
//        { TOK_LSHASS, NODE_OPER_LSHASS, 20, 1 },
//        { TOK_RSHASS, NODE_OPER_RSHASS, 20, 1 },
//        { TOK_BWORASS, NODE_OPER_BWORASS, 20, 1 },
//        { TOK_XORASS, NODE_OPER_BWXORASS, 20, 1 },
        { TOK_COMMA, NODE_OPER_COMMA, 1 },
        { 0, 0, 0 }
    node_t *term1, *term2;
    int i;
    term1 = parse_term_real(ps);
    if (!term1)
        return NULL;

    for (i = 0; operlist[i].tok; i++)
        if (operlist[i].tok == ps -> curtok -> ttype)
    fprintf(stderr, "Matched operator: %d, %d\n", operlist[i].tok, operlist[i].prec);
    // if we hit the end of the expression, return
    if (operlist[i].tok == 0)
        return term1;

    // is previous operation higher precedence? If so, just return the first term
    if (operlist[i].prec < prec)
        return term1;

    // is this operator left associative and previous operation is same precedence?
    // if so, just return the first term
    if (operlist[i].ra == 0 && operlist[i].prec == prec)
        return term1;

    // consume the operator
    // special handling
    if (operlist[i].spec)
        term2 = (operlist[i].spec)(ps, term1);
        if (!term2)
            return NULL;
        term1 = term2;
        goto nextoper;
    term2 = parse_expr_real(ps, operlist[i].prec);
    if (!term2)
        parse_generr(ps, "expr");
    term1 = node_create(operlist[i].nodetype, term1, term2);
    term2 = NULL;
    goto nextoper;

node_t *parse_expr(struct parser_state *ps)
    return parse_expr_real(ps, 0);

node_t *parse_statement(struct parser_state *ps)
    node_t *rv;
    node_t *n;

    switch (ps -> curtok -> ttype)
    case TOK_KW_RETURN:
        n = parse_expr(ps);
        if (!n)
            parse_generr(ps, "statement");
            return NULL;
        rv = node_create(NODE_STMT_RETURN);
        node_addchild(rv, n);

        return NULL;
    if (ps -> curtok -> ttype != TOK_EOS)
        parse_generr(ps, "statement");
    return rv;

node_t *parse_globaldecl(struct parser_state *ps)
    node_t *rv = NULL;
    node_t *stmt;
    char *fnname = NULL;
    if (ps -> curtok -> ttype == TOK_KW_INT)
        // variable name
        if (ps -> curtok -> ttype != TOK_IDENT)
            goto error;
        fnname = lw_strdup(ps -> curtok -> strval);
        if (ps -> curtok -> ttype != TOK_OPAREN)
            goto error;
        if (ps -> curtok -> ttype != TOK_CPAREN)
            goto error;
        if (ps -> curtok -> ttype != TOK_OBRACE)
            goto error;
        stmt = parse_statement(ps);
        if (!stmt)
            goto error;
        rv = node_create(NODE_FUNDEF, node_create(NODE_TYPE_INT), node_create(NODE_IDENT, fnname), node_create(NODE_FUNARGS), stmt);
        if (ps -> curtok -> ttype != TOK_CBRACE)
            goto error;
        return rv;

    if (fnname)
    parse_generr(ps, "globaldecl");
    return rv;

node_t *parse_program(struct preproc_info *pp)
    node_t *rv;
    node_t *node;
    struct parser_state ps;
    ps.pp = pp;
    ps.curtok = NULL;

    rv = node_create(NODE_PROGRAM);

    // prime the parser
    while (ps.curtok -> ttype != TOK_EOF)
        node = parse_globaldecl(&ps);
        if (!node)
        node_addchild(rv, node);

    return rv;