comparison lwbasic/attic/parser.c @ 185:cca933d32298

Clean up some mess in lwbasic directory
author lost@l-w.ca
date Thu, 22 Dec 2011 18:03:38 -0700
parents lwbasic/parser.c@5325b640424d
children
comparison
equal deleted inserted replaced
184:6433cb024174 185:cca933d32298
1 /*
2 compiler.c
3
4 Copyright © 2011 William Astle
5
6 This file is part of LWTOOLS.
7
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation, either version 3 of the License, or (at your option) any later
11 version.
12
13 This program is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 You should have received a copy of the GNU General Public License along with
19 this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 /*
23 This is the actual compiler bit; it drives the parser and code generation
24 */
25
26 #include <stdio.h>
27
28 #include <lw_alloc.h>
29 #include <lw_string.h>
30
31 #include "lwbasic.h"
32 #include "symtab.h"
33
34 static void expect(cstate *state, int tt)
35 {
36 if (state -> lexer_token != tt)
37 lwb_error("Expecting %s, got %s\n", lexer_token_name(tt), lexer_return_token(state));
38 lexer(state);
39 }
40
41
42 /* size of a type */
43 static int sizeof_type(int type)
44 {
45 /* everything is an "int" right now; 2 bytes */
46 return 2;
47 }
48
49 /* parse a type; the next token will be acquired as a result */
50 /* the token advancement is to provide consistency */
51 static int parse_type(cstate *state)
52 {
53 int pt = -1;
54
55 switch (state -> lexer_token)
56 {
57 case token_kw_integer:
58 pt = 1;
59 break;
60
61 default:
62 lwb_error("Invalid type specification");
63 }
64 lexer(state);
65 /* look for "unsigned" modifier for integer types */
66 return pt;
67 }
68
69 static void parse_expr(cstate *state, int prec);
70 static void parse_term(cstate *state);
71 static int parse_expression(cstate *state)
72 {
73 state -> expression = 1;
74
75 parse_expr(state, 0);
76
77 state -> expression = 0;
78 return 1;
79 }
80
81 static void parse_decls(cstate *state)
82 {
83 /* declarations */
84 /* the first thing that doesn't look like a declaration is assumed */
85 /* to be a statement and will trigger a bailout */
86 int vt;
87 char *vn;
88 symtab_entry_t *se;
89
90 for (;;)
91 {
92 switch (state -> lexer_token)
93 {
94 /* DIM keyword */
95 case token_kw_dim:
96 lexer(state);
97 if (state -> lexer_token != token_identifier)
98 {
99 lwb_error("Expecting identifier, got %s\n", lexer_return_token(state));
100 }
101 vn = lw_strdup(state -> lexer_token_string);
102 lexer(state);
103 if (state -> lexer_token != token_kw_as)
104 {
105 lwb_error("Expecting AS, got %s\n", lexer_return_token(state));
106 }
107 lexer(state);
108 vt = parse_type(state);
109
110 se = symtab_find(state -> local_syms, vn);
111 if (se)
112 {
113 lwb_error("Multiply defined local variable %s", vn);
114 }
115 state -> framesize += sizeof_type(vt);
116 symtab_register(state -> local_syms, vn, -(state -> framesize), symtype_var, NULL);
117
118 lw_free(vn);
119 break;
120
121 /* blank lines allowed */
122 case token_eol:
123 break;
124
125 default:
126 return;
127 }
128 if (state -> lexer_token != token_eol)
129 lwb_error("Expecting end of line; got %s\n", lexer_return_token(state));
130 lexer(state);
131 }
132 }
133
134 static void parse_statements(cstate *state)
135 {
136 symtab_entry_t *se;
137 int et;
138
139 for (;;)
140 {
141 switch (state -> lexer_token)
142 {
143 /* blank lines allowed */
144 case token_eol:
145 break;
146
147 /* variable assignment */
148 case token_identifier:
149 se = symtab_find(state -> local_syms, state -> lexer_token_string);
150 if (!se)
151 {
152 se = symtab_find(state -> global_syms, state -> lexer_token_string);
153 }
154 if (!se)
155 lwb_error("Unknown variable %s\n", state -> lexer_token_string);
156 lexer(state);
157 /* ensure the first token of the expression will be parsed correctly */
158 state -> expression = 1;
159 expect(state, token_op_assignment);
160
161 /* parse the expression */
162 et = parse_expression(state);
163
164 /* check type compatibility */
165
166 /* actually do the assignment */
167
168 break;
169
170 /* anything we don't recognize as a statement token breaks out */
171 default:
172 return;
173 }
174 if (state -> lexer_token != token_eol)
175 lwb_error("Expecting end of line; got %s\n", lexer_return_token(state));
176 lexer(state);
177 }
178 }
179
180
181 /* issub means RETURNS is not allowed; !issub means RETURNS is required */
182
183 static void parse_subfunc(cstate *state, int issub)
184 {
185 int pt, rt;
186 char *subname, *pn;
187 int vis = 0;
188 symtab_entry_t *se;
189 int paramsize = 0;
190
191 state -> local_syms = symtab_init();
192 state -> framesize = 0;
193
194 lexer(state);
195 if (state -> lexer_token != token_identifier)
196 {
197 lwb_error("Invalid sub name '%s'", state -> lexer_token_string);
198 }
199
200 subname = lw_strdup(state -> lexer_token_string);
201
202 lexer(state);
203 if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private)
204 {
205 if (state -> lexer_token == token_kw_public)
206 vis = 1;
207 lexer(state);
208 }
209
210 /* ignore the "PARAMS" keyword if present */
211 if (state -> lexer_token == token_kw_params)
212 lexer(state);
213
214 if (state -> lexer_token == token_eol || state -> lexer_token == token_kw_returns)
215 goto noparms;
216
217 paramagain:
218 if (state -> lexer_token != token_identifier)
219 {
220 lwb_error("Parameter name expected, got %s\n", lexer_return_token(state));
221 }
222 pn = lw_strdup(state -> lexer_token_string);
223 lexer(state);
224
225 if (state -> lexer_token != token_kw_as)
226 lwb_error("Expecting AS\n");
227 lexer(state);
228
229 pt = parse_type(state);
230
231 se = symtab_find(state -> local_syms, pn);
232 if (se)
233 {
234 lwb_error("Duplicate parameter name %s\n", pn);
235 }
236 symtab_register(state -> local_syms, pn, paramsize, symtype_param, NULL);
237 paramsize += sizeof_type(pt);
238 lw_free(pn);
239
240 if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',')
241 {
242 lexer(state);
243 goto paramagain;
244 }
245
246 noparms:
247 rt = -1;
248 if (!issub)
249 {
250 if (state -> lexer_token != token_kw_returns)
251 {
252 lwb_error("FUNCTION must have RETURNS\n");
253 }
254 lexer(state);
255 /* if (state -> lexer_token == token_identifier)
256 {
257 printf("Return value named: %s\n", state -> lexer_token_string);
258
259 lexer(state);
260 if (state -> lexer_token != token_kw_as)
261 lwb_error("Execting AS after RETURNS");
262 lexer(state);
263 }
264 */
265 rt = parse_type(state);
266 }
267 else
268 {
269 if (state -> lexer_token == token_kw_returns)
270 {
271 lwb_error("SUB cannot specify RETURNS\n");
272 }
273 }
274
275
276 if (state -> lexer_token != token_eol)
277 {
278 lwb_error("EOL expected; found %s\n", lexer_return_token(state));
279 }
280
281
282 se = symtab_find(state -> global_syms, subname);
283 if (se)
284 {
285 lwb_error("Multiply defined symbol %s\n", subname);
286 }
287
288 symtab_register(state -> global_syms, subname, -1, issub ? symtype_sub : symtype_func, NULL);
289
290 state -> currentsub = subname;
291 state -> returntype = rt;
292 /* consume EOL */
293 lexer(state);
294
295 /* variable declarations */
296 parse_decls(state);
297
298 /* output function/sub prolog */
299 emit_prolog(state, vis);
300
301 /* parse statement block */
302 parse_statements(state);
303
304 if (issub)
305 {
306 if (state -> lexer_token != token_kw_endsub)
307 {
308 lwb_error("Expecting ENDSUB, got %s\n", lexer_return_token(state));
309 }
310 }
311 else
312 {
313 if (state -> lexer_token != token_kw_endfunction)
314 {
315 lwb_error("Expecting ENDFUNCTION, got %s\n", lexer_return_token(state));
316 }
317 }
318 /* output function/sub epilog */
319 emit_epilog(state);
320
321 lw_free(state -> currentsub);
322 state -> currentsub = NULL;
323 symtab_destroy(state -> local_syms);
324 state -> local_syms = NULL;
325 }
326
327 void parser(cstate *state)
328 {
329 state -> lexer_curchar = -1;
330 state -> global_syms = symtab_init();
331
332 /* now look for a global declaration */
333 for (;;)
334 {
335 state -> parser_state = parser_state_global;
336 lexer(state);
337 switch (state -> lexer_token)
338 {
339 case token_kw_function:
340 printf("Function\n");
341 parse_subfunc(state, 0);
342 break;
343
344 case token_kw_sub:
345 printf("Sub\n");
346 parse_subfunc(state, 1);
347 break;
348
349 /* blank lines are allowed */
350 case token_eol:
351 continue;
352
353 /* EOF is allowed - end of parsing */
354 case token_eof:
355 return;
356
357 default:
358 lwb_error("Invalid token '%s' in global state\n", lexer_return_token(state));
359 }
360 }
361 }
362
363 static void parse_expr(cstate *state, int prec)
364 {
365 static const struct operinfo {
366 int opernum;
367 int operprec;
368 } operators[] =
369 {
370 { token_op_plus, 100 },
371 { token_op_minus, 100 },
372 { token_op_times, 150 },
373 { token_op_divide, 150 },
374 { token_op_modulus, 150 },
375 { token_op_and, 25 },
376 { token_op_or, 20 },
377 { token_op_xor, 20 },
378 { token_op_band, 50 },
379 { token_op_bor, 45 },
380 { token_op_bxor, 45 },
381 { -1, -1 }
382 };
383 int opern;
384
385 parse_term(state);
386
387 eval_next:
388 for (opern = 0; operators[opern].opernum != -1; opern++)
389 {
390 if (operators[opern].opernum == state -> lexer_token)
391 break;
392 }
393 if (operators[opern].opernum == -1)
394 return;
395
396 if (operators[opern].operprec <= prec)
397 return;
398
399 lexer(state);
400
401 parse_expr(state, operators[opern].operprec);
402
403 /* push operator */
404
405 goto eval_next;
406 }
407
408 static void parse_term(cstate *state)
409 {
410 eval_next:
411 /* parens */
412 if (state -> lexer_token == token_op_oparen)
413 {
414 lexer(state);
415 parse_expr(state, 0);
416 expect(state, token_op_cparen);
417 return;
418 }
419
420 /* unary plus; ignore it */
421 if (state -> lexer_token == token_op_plus)
422 {
423 lexer(state);
424 goto eval_next;
425 }
426
427 /* unary minus, precision 200 */
428 if (state -> lexer_token == token_op_minus)
429 {
430 lexer(state);
431 parse_expr(state, 200);
432
433 /* push unary negation */
434 }
435
436 /* BNOT, NOT */
437 if (state -> lexer_token == token_op_not || state -> lexer_token == token_op_bnot)
438 {
439 lexer(state);
440 parse_expr(state, 200);
441
442 /* push unary operator */
443 }
444
445 /* integer */
446 if (state -> lexer_token == token_int)
447 {
448 }
449
450 /* unsigned integer */
451 if (state -> lexer_token == token_uint)
452 {
453 }
454
455 /* variable or function call */
456 if (state -> lexer_token == token_identifier)
457 {
458 lexer(state);
459 if (state -> lexer_token == token_op_oparen)
460 {
461 /* function call */
462 return;
463 }
464 /* variable */
465 return;
466 }
467
468 lwb_error("Invalid input in expression; got %s\n", lexer_return_token(state));
469 }