comparison lwcc/cc-parse.c @ 498:1bd2d590d734

Rejig parser to eliminate lemon No longer use lemon for building the parser. It adds too much complexity, really, and a hand written recursive descent parser is far more flexible. The current iteration can handle exactly one statement: "return <int>".
author William Astle <lost@l-w.ca>
date Thu, 08 Aug 2019 23:32:23 -0600
parents
children f3e9732973f1
comparison
equal deleted inserted replaced
497:4b865c9d4371 498:1bd2d590d734
1 /*
2 lwcc/cc-parse.c
3
4 Copyright © 2019 William Astle
5
6 This file is part of LWTOOLS.
7
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation, either version 3 of the License, or (at your option) any later
11 version.
12
13 This program is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 You should have received a copy of the GNU General Public License along with
19 this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <string.h>
23
24 #include <lw_alloc.h>
25 #include <lw_string.h>
26
27 #include "cpp.h"
28 #include "tree.h"
29
30 #define TOK_KW_IF -1
31 #define TOK_KW_ELSE -2
32 #define TOK_KW_WHILE -3
33 #define TOK_KW_DO -4
34 #define TOK_KW_FOR -5
35 #define TOK_KW_VOID -6
36 #define TOK_KW_INT -7
37 #define TOK_KW_CHAR -8
38 #define TOK_KW_SHORT -9
39 #define TOK_KW_LONG -10
40 #define TOK_KW_UNSIGNED -11
41 #define TOK_KW_SIGNED -12
42 #define TOK_KW_FLOAT -13
43 #define TOK_KW_DOUBLE -14
44 #define TOK_KW_STRUCT -15
45 #define TOK_KW_UNION -16
46 #define TOK_KW_TYPEDEF -17
47 #define TOK_KW_STATIC -18
48 #define TOK_KW_SWITCH -19
49 #define TOK_KW_CASE -20
50 #define TOK_KW_DEFAULT -21
51 #define TOK_KW_BREAK -22
52 #define TOK_KW_CONTINUE -23
53 #define TOK_KW_CONST -24
54 #define TOK_KW_AUTO -25
55 #define TOK_KW_ENUM -26
56 #define TOK_KW_REGISTER -27
57 #define TOK_KW_SIZEOF -28
58 #define TOK_KW_VOLATILE -29
59 #define TOK_KW_RETURN -30
60 #define TOK_KW_EXTERN -31
61 #define TOK_KW_GOTO -32
62 #define TOK_TYPENAME -100
63 #define TOK_CONST_INT -150
64
65 static struct { int tok; char *word; } keyword_list[] = {
66 { TOK_KW_IF, "if" },
67 { TOK_KW_ELSE, "else" },
68 { TOK_KW_WHILE, "while" },
69 { TOK_KW_DO, "do" },
70 { TOK_KW_FOR, "for" },
71 { TOK_KW_VOID, "void" },
72 { TOK_KW_INT, "int" },
73 { TOK_KW_CHAR, "char" },
74 { TOK_KW_SHORT, "short" },
75 { TOK_KW_LONG, "long" },
76 { TOK_KW_UNSIGNED, "unsigned" },
77 { TOK_KW_SIGNED, "signed" },
78 { TOK_KW_FLOAT, "float" },
79 { TOK_KW_DOUBLE, "double" },
80 { TOK_KW_STRUCT, "struct" },
81 { TOK_KW_UNION, "union" },
82 { TOK_KW_TYPEDEF, "typedef" },
83 { TOK_KW_STATIC, "static" },
84 { TOK_KW_SWITCH, "switch" },
85 { TOK_KW_CASE, "case" },
86 { TOK_KW_DEFAULT, "default" },
87 { TOK_KW_BREAK, "break" },
88 { TOK_KW_CONTINUE, "continue" },
89 { TOK_KW_CONST, "const" },
90 { TOK_KW_AUTO, "auto" },
91 { TOK_KW_ENUM, "enum" },
92 { TOK_KW_REGISTER, "register" },
93 { TOK_KW_SIZEOF, "sizeof" },
94 { TOK_KW_VOLATILE, "volatile" },
95 { TOK_KW_RETURN, "return" },
96 { TOK_KW_EXTERN, "extern" },
97 { TOK_KW_GOTO, "goto" },
98 { TOK_NONE, "" }
99 };
100
101
102 struct parser_state
103 {
104 struct preproc_info *pp; // preprocessor data
105 struct token *curtok; // the current token
106 };
107
108
109 struct token *parse_next(struct parser_state *ps)
110 {
111 struct token *tok;
112 int i;
113
114 for (;;)
115 {
116 tok = preproc_next(ps -> pp);
117 if (tok -> ttype == TOK_WSPACE)
118 continue;
119 if (tok -> ttype == TOK_EOL)
120 continue;
121 if (tok -> ttype == TOK_CHAR)
122 {
123 // random character
124 fprintf(stderr, "Random character %02x\n", tok -> strval[0]);
125 if (tok -> strval[0] < 32 || tok -> strval[0] > 126)
126 continue;
127 }
128 break;
129 }
130 if (tok -> ttype == TOK_IDENT)
131 {
132 // convert identifier tokens to their respective meanings
133 for (i = 0; keyword_list[i].tok != TOK_NONE; i++)
134 {
135 if (strcmp(keyword_list[i].word, tok -> strval) == 0)
136 {
137 tok -> ttype = keyword_list[i].tok;
138 goto out;
139 }
140 }
141 // check for registered types here
142 }
143 else if (tok -> ttype == TOK_NUMBER)
144 {
145 // look for anything that isn't 0-9
146 for (i = 0; tok -> strval[i]; i++)
147 {
148 if (tok -> strval[i] < '0' || tok -> strval[i] > '9')
149 break;
150 }
151 if (tok -> strval[i] == 0)
152 tok -> ttype = TOK_CONST_INT;
153 }
154 out:
155 fprintf(stderr, "Lexed: ");
156 token_print(tok, stderr);
157 fprintf(stderr, " (%d)\n", tok -> ttype);
158 if (ps -> curtok)
159 token_free(ps -> curtok);
160 ps -> curtok = tok;
161 return tok;
162 }
163
164 void parse_generr(struct parser_state *ps, char *tag)
165 {
166 fprintf(stderr, "(%s) Unexpected token (%d): ", tag, ps -> curtok -> ttype);
167 token_print(ps -> curtok, stderr);
168 fprintf(stderr, "\n");
169
170 }
171
172 node_t *parse_expr(struct parser_state *ps)
173 {
174 node_t *rv;
175 if (ps -> curtok -> ttype == TOK_CONST_INT)
176 {
177 rv = node_create(NODE_CONST_INT, ps -> curtok -> strval);
178 parse_next(ps);
179 return rv;
180 }
181 parse_generr(ps, "expr");
182 return NULL;
183 }
184
185 node_t *parse_statement(struct parser_state *ps)
186 {
187 node_t *rv;
188 node_t *n;
189
190 switch (ps -> curtok -> ttype)
191 {
192 case TOK_KW_RETURN:
193 parse_next(ps);
194 n = parse_expr(ps);
195 if (!n)
196 {
197 parse_generr(ps, "statement");
198 return NULL;
199 }
200 rv = node_create(NODE_STMT_RETURN);
201 node_addchild(rv, n);
202 break;
203
204 default:
205 return NULL;
206 }
207
208 if (ps -> curtok -> ttype != TOK_EOS)
209 parse_generr(ps, "statement");
210 else
211 parse_next(ps);
212 return rv;
213 }
214
215 node_t *parse_globaldecl(struct parser_state *ps)
216 {
217 node_t *rv = NULL;
218 node_t *stmt;
219 char *fnname = NULL;
220 if (ps -> curtok -> ttype == TOK_KW_INT)
221 {
222 // variable name
223 parse_next(ps);
224 if (ps -> curtok -> ttype != TOK_IDENT)
225 goto error;
226 fnname = lw_strdup(ps -> curtok -> strval);
227 parse_next(ps);
228 if (ps -> curtok -> ttype != TOK_OPAREN)
229 goto error;
230 parse_next(ps);
231 if (ps -> curtok -> ttype != TOK_CPAREN)
232 goto error;
233 parse_next(ps);
234 if (ps -> curtok -> ttype != TOK_OBRACE)
235 goto error;
236 parse_next(ps);
237 stmt = parse_statement(ps);
238 if (!stmt)
239 goto error;
240 rv = node_create(NODE_FUNDEF, node_create(NODE_TYPE_INT), node_create(NODE_IDENT, fnname), node_create(NODE_FUNARGS), stmt);
241 if (ps -> curtok -> ttype != TOK_CBRACE)
242 goto error;
243 parse_next(ps);
244 lw_free(fnname);
245 return rv;
246 }
247
248
249 error:
250 if (fnname)
251 lw_free(fnname);
252 parse_generr(ps, "globaldecl");
253 return rv;
254 }
255
256 node_t *parse_program(struct preproc_info *pp)
257 {
258 node_t *rv;
259 node_t *node;
260 struct parser_state ps;
261
262 ps.pp = pp;
263 ps.curtok = NULL;
264
265 rv = node_create(NODE_PROGRAM);
266
267 // prime the parser
268 parse_next(&ps);
269 while (ps.curtok -> ttype != TOK_EOF)
270 {
271 node = parse_globaldecl(&ps);
272 if (!node)
273 break;
274 node_addchild(rv, node);
275 }
276
277 return rv;
278 }