# HG changeset patch # User lost@starbug # Date 1270609399 21600 # Node ID d96c30e60ddff0661d6b7d3440df2683147503b9 # Parent 4867f18c872fc7bd63623e4fa0cf2c6a07b3dc60 Added pass2 and various supporting logic including symbol lookups diff -r 4867f18c872f -r d96c30e60ddf doc/internals.txt --- a/doc/internals.txt Thu Apr 01 20:56:19 2010 -0600 +++ b/doc/internals.txt Tue Apr 06 21:03:19 2010 -0600 @@ -9,14 +9,16 @@ This pass reads the entire source code and parses each line into an internal representation. Macros, file inclusions, and conditional assembly -instructions are resolved at this point as well. +instructions are resolved at this point as well. Instructions with known +sizes will have their sizes resolved at this point. Pass 2 ------ -This pass assigns instruction sizes to all invariate instructions. Invariate -instructions are any instructions with a fixed size, including those with -forced addressing modes. +Check all exported symbols for validity and set them as imports if the +assembler state says so. Also resolve all symbol references in all +expressions to be direct references either to the symbol table or +to the import list. Pass 3 ------ diff -r 4867f18c872f -r d96c30e60ddf lwasm/Makefile.am --- a/lwasm/Makefile.am Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/Makefile.am Tue Apr 06 21:03:19 2010 -0600 @@ -1,7 +1,7 @@ AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib -I$(top_builddir)/lwlib -I$(top_srcdir)/lwlib bin_PROGRAMS = lwasm lwasm_SOURCES = main.c pragma.c input.c pass1.c lwasm.c \ - instab.c symbol.c macro.c \ + instab.c symbol.c macro.c pass2.c \ insn_inh.c insn_rtor.c insn_tfm.c insn_rlist.c insn_rel.c \ insn_bitbit.c insn_indexed.c insn_gen.c insn_logicmem.c \ pseudo.c section.c os9.c diff -r 4867f18c872f -r d96c30e60ddf lwasm/lwasm.c --- a/lwasm/lwasm.c Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/lwasm.c Tue Apr 06 21:03:19 2010 -0600 @@ -33,8 +33,53 @@ #include "lwasm.h" +void lwasm_register_error(asmstate_t *as, line_t *l, const char *msg, ...); + lw_expr_t lwasm_evaluate_var(char *var, void *priv) { + asmstate_t *as = (asmstate_t *)priv; + lw_expr_t e; + importlist_t *im; + struct symtabe *s; + + s = lookup_symbol(as, as -> cl, var); + if (s) + { + e = lw_expr_build(lw_expr_type_special, lwasm_expr_syment, s); + return e; + } + + // undefined here is undefied unless output is object + if (as -> output_format != OUTPUT_OBJ) + goto nomatch; + + // check for import + for (im = as -> importlist; im; im = im -> next) + { + if (!strcmp(im -> symbol, var)) + break; + } + + // check for "undefined" to import automatically + if (!im && CURPRAGMA(as -> cl, PRAGMA_UNDEFEXTERN)) + { + im = lw_alloc(sizeof(importlist_t)); + im -> symbol = lw_strdup(var); + im -> next = as -> importlist; + as -> importlist = im; + } + + if (!im) + goto nomatch; + + e = lw_expr_build(lw_expr_type_special, lwasm_expr_import, im); + return e; + +nomatch: + if (as -> badsymerr) + { + lwasm_register_error(as, as -> cl, "Undefined symbol %s", var); + } return NULL; } @@ -59,6 +104,26 @@ else return NULL; } + + case lwasm_expr_syment: + { + return NULL; + } + + case lwasm_expr_import: + { + return NULL; + } + + case lwasm_expr_nextbp: + { + return NULL; + } + + case lwasm_expr_prevbp: + { + return NULL; + } } return NULL; } @@ -482,6 +547,11 @@ return e; } +int lwasm_reduce_expr(asmstate_t *as, lw_expr_t expr) +{ + lw_expr_simplify(expr, as); +} + void lwasm_save_expr(line_t *cl, int id, lw_expr_t expr) { struct line_expr_s *e; @@ -612,3 +682,8 @@ (*p) += 3; return rval; } + +void lwasm_show_errors(asmstate_t *as) +{ + fprintf(stderr, "Errors encountered. FIXME - print out errors.\n"); +} diff -r 4867f18c872f -r d96c30e60ddf lwasm/lwasm.h --- a/lwasm/lwasm.h Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/lwasm.h Tue Apr 06 21:03:19 2010 -0600 @@ -39,7 +39,9 @@ lwasm_expr_linelen = 1, // length of ref'd line lwasm_expr_lineaddr = 2, // addr of ref'd line lwasm_expr_nextbp = 3, // next branch point - lwasm_expr_prevbp = 4 // previous branch point + lwasm_expr_prevbp = 4, // previous branch point + lwasm_expr_syment = 5, // symbol table entry + lwasm_expr_import = 6 // symbol import entry }; enum lwasm_output_e @@ -104,10 +106,13 @@ struct line_expr_s *next; }; +typedef struct line_s line_t; + typedef struct exportlist_s exportlist_t; struct exportlist_s { char *symbol; // symbol to export + line_t *line; // line the export is on exportlist_t *next; // next in the export list }; @@ -118,7 +123,6 @@ importlist_t *next; // next in the import list }; -typedef struct line_s line_t; struct line_s { lw_expr_t addr; // assembly address of the line @@ -143,6 +147,7 @@ int lint2; // another pass forward integer asmstate_t *as; // assembler state data ptr int pragmas; // pragmas in effect for the line + int context; // the symbol context number }; enum @@ -190,6 +195,7 @@ int execaddr; // address from "end" int inmod; // inside an os9 module? unsigned char crc[3]; // crc accumulator + int badsymerr; // throw error on undef sym if set line_t *line_head; // start of lines list line_t *line_tail; // tail of lines list @@ -217,7 +223,7 @@ #ifndef ___symbol_c_seen___ extern struct symtabe *register_symbol(asmstate_t *as, line_t *cl, char *sym, lw_expr_t value, int flags); -extern struct symtabe *lookup_symbol(asmstate_t *as, line_t *cl, char *sym, int context, int version); +extern struct symtabe *lookup_symbol(asmstate_t *as, line_t *cl, char *sym); #endif @@ -239,6 +245,10 @@ extern int lwasm_lookupreg2(const char *rlist, char **p); extern int lwasm_lookupreg3(const char *rlist, char **p); +extern void lwasm_show_errors(asmstate_t *as); + +extern int lwasm_reduce_expr(asmstate_t *as, lw_expr_t expr); + #endif #define OPLEN(op) (((op)>0xFF)?2:1) diff -r 4867f18c872f -r d96c30e60ddf lwasm/main.c --- a/lwasm/main.c Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/main.c Tue Apr 06 21:03:19 2010 -0600 @@ -165,6 +165,7 @@ assembler on the first file */ extern void do_pass1(asmstate_t *as); +extern void do_pass2(asmstate_t *as); extern lw_expr_t lwasm_evaluate_special(int t, void *ptr, void *priv); extern lw_expr_t lwasm_evaluate_var(char *var, void *priv); extern lw_expr_t lwasm_parse_term(char **p, void *priv); @@ -193,5 +194,12 @@ input_init(&asmstate); do_pass1(&asmstate); + if (asmstate.errorcount > 0) + { + lwasm_show_errors(&asmstate); + exit(1); + } + do_pass2(&asmstate); + exit(0); } diff -r 4867f18c872f -r d96c30e60ddf lwasm/pass1.c --- a/lwasm/pass1.c Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/pass1.c Tue Apr 06 21:03:19 2010 -0600 @@ -86,6 +86,7 @@ cl -> inmod = as -> inmod; cl -> csect = as -> csect; cl -> pragmas = as -> pragmas; + cl -> context = as -> context; if (!as -> line_tail) { as -> line_head = cl; diff -r 4867f18c872f -r d96c30e60ddf lwasm/pass2.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwasm/pass2.c Tue Apr 06 21:03:19 2010 -0600 @@ -0,0 +1,95 @@ +/* +pass2.c + +Copyright © 2010 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include + +#include +#include + +#include +#include + +#include "lwasm.h" +#include "instab.h" + +/* +pass 2: deal with undefined symbols and do a simplification pass +on all the expressions. Handle PRAGMA_IMPORTUNDEFEXPORT + +*/ +void do_pass2(asmstate_t *as) +{ + line_t *cl; + exportlist_t *ex; + struct symtabe *s; + importlist_t *im; + struct line_expr_s *le; + + // verify the export list + if (as -> output_format == OUTPUT_OBJ) + { + for (ex = as -> exportlist; ex; ex = ex -> next) + { + s = lookup_symbol(as, NULL, ex -> symbol); + if (!s) + { + if (CURPRAGMA(ex -> line, PRAGMA_IMPORTUNDEFEXPORT)) + { + for (im = as -> importlist; im; im = im -> next) + { + if (!strcmp(ex -> symbol, im -> symbol)) + break; + } + if (!im) + { + im = lw_alloc(sizeof(importlist_t)); + im -> symbol = lw_strdup(ex -> symbol); + im -> next = as -> importlist; + as -> importlist = im; + } + } + else + { + // undefined export - register error + lwasm_register_error(as, ex -> line, "Undefined exported symbol"); + } + } + } + if (as -> errorcount > 0) + return; + } + + // we want to throw errors on undefined symbols here + as -> badsymerr = 1; + + // now do some reductions on expressions + for (cl = as -> line_head; cl; cl = cl -> next) + { + as -> cl = cl; + + // simplify address + lwasm_reduce_expr(as, cl -> addr); + + // simplify each expression + for (le = cl -> exprs; le; le = le -> next) + lwasm_reduce_expr(as, le -> expr); + } +} diff -r 4867f18c872f -r d96c30e60ddf lwasm/pseudo.c --- a/lwasm/pseudo.c Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/pseudo.c Tue Apr 06 21:03:19 2010 -0600 @@ -868,7 +868,7 @@ sym = lw_strndup(*p, i); - s = lookup_symbol(as, l, sym, -1, -1); + s = lookup_symbol(as, l, sym); lw_free(sym); @@ -899,7 +899,7 @@ sym = lw_strndup(*p, i); - s = lookup_symbol(as, l, sym, -1, -1); + s = lookup_symbol(as, l, sym); lw_free(sym); diff -r 4867f18c872f -r d96c30e60ddf lwasm/section.c --- a/lwasm/section.c Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/section.c Tue Apr 06 21:03:19 2010 -0600 @@ -202,6 +202,7 @@ e = lw_alloc(sizeof(exportlist_t)); e -> next = as -> exportlist; e -> symbol = lw_strdup(sym); + e -> line = l; as -> exportlist = e; lw_free(sym); diff -r 4867f18c872f -r d96c30e60ddf lwasm/symbol.c --- a/lwasm/symbol.c Thu Apr 01 20:56:19 2010 -0600 +++ b/lwasm/symbol.c Tue Apr 06 21:03:19 2010 -0600 @@ -65,7 +65,7 @@ } if (islocal) - context = as -> context; + context = cl -> context; // first, look up symbol to see if it is already defined for (se = as -> symtab.head; se; se = se -> next) @@ -95,6 +95,10 @@ version++; } + // symplify the symbol expression - replaces "SET" symbols with + // symbol table entries + lwasm_reduce_expr(as, val); + se = lw_alloc(sizeof(struct symtabe)); se -> next = as -> symtab.head; as -> symtab.head = se; @@ -105,7 +109,53 @@ return se; } -struct symtabe * lookup_symbol(asmstate_t *as, line_t *cl, char *sym, int context, int version) +// for "SET" symbols, always returns the LAST definition of the +// symbol. This works because the lwasm_reduce_expr() call in +// register_symbol will ensure there are no lingering "var" references +// to the set symbol anywhere in the symbol table; they will all be +// converted to direct references +// NOTE: this means that for a forward reference to a SET symbol, +// the LAST definition will be the one used. +// This arrangement also ensures that any reference to the symbol +// itself inside a "set" definition will refer to the previous version +// of the symbol. +struct symtabe * lookup_symbol(asmstate_t *as, line_t *cl, char *sym) { - return NULL; + int local = 0; + struct symtabe *s, *s2; + + // check if this is a local symbol + if (strchr(sym, '@') || strchr(sym, '?')) + local = 1; + + if (cl && !CURPRAGMA(cl, PRAGMA_DOLLARNOTLOCAL) && strchr(sym, '$')) + local = 1; + if (!cl && !(as -> pragmas & PRAGMA_DOLLARNOTLOCAL) && strchr(sym, '$')) + local = 1; + + // cannot look up local symbol in global context!!!!! + if (!cl && local) + return NULL; + + for (s = as -> symtab.head, s2 = NULL; s; s = s -> next) + { + if (!strcmp(sym, s -> symbol)) + { + if (local && s -> context != cl -> context) + continue; + + if (s -> flags & symbol_flag_set) + { + // look for highest version of symbol + if (s -> version > s2 -> version) + s2 = s; + continue; + } + break; + } + } + if (!s && s2) + s = s2; + + return s; }