changeset 363:d96c30e60ddf

Added pass2 and various supporting logic including symbol lookups
author lost@starbug
date Tue, 06 Apr 2010 21:03:19 -0600
parents 4867f18c872f
children 0b5a26bedbe1
files doc/internals.txt lwasm/Makefile.am lwasm/lwasm.c lwasm/lwasm.h lwasm/main.c lwasm/pass1.c lwasm/pass2.c lwasm/pseudo.c lwasm/section.c lwasm/symbol.c
diffstat 10 files changed, 255 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/doc/internals.txt	Thu Apr 01 20:56:19 2010 -0600
+++ b/doc/internals.txt	Tue Apr 06 21:03:19 2010 -0600
@@ -9,14 +9,16 @@
 
 This pass reads the entire source code and parses each line into an internal
 representation. Macros, file inclusions, and conditional assembly
-instructions are resolved at this point as well.
+instructions are resolved at this point as well. Instructions with known
+sizes will have their sizes resolved at this point.
 
 Pass 2
 ------
 
-This pass assigns instruction sizes to all invariate instructions. Invariate
-instructions are any instructions with a fixed size, including those with
-forced addressing modes.
+Check all exported symbols for validity and set them as imports if the
+assembler state says so. Also resolve all symbol references in all
+expressions to be direct references either to the symbol table or
+to the import list.
 
 Pass 3
 ------
--- a/lwasm/Makefile.am	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/Makefile.am	Tue Apr 06 21:03:19 2010 -0600
@@ -1,7 +1,7 @@
 AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib -I$(top_builddir)/lwlib -I$(top_srcdir)/lwlib
 bin_PROGRAMS = lwasm
 lwasm_SOURCES = main.c pragma.c input.c pass1.c lwasm.c \
-	instab.c symbol.c macro.c \
+	instab.c symbol.c macro.c pass2.c \
 	insn_inh.c insn_rtor.c insn_tfm.c insn_rlist.c insn_rel.c \
 	insn_bitbit.c insn_indexed.c insn_gen.c insn_logicmem.c \
 	pseudo.c section.c os9.c
--- a/lwasm/lwasm.c	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/lwasm.c	Tue Apr 06 21:03:19 2010 -0600
@@ -33,8 +33,53 @@
 
 #include "lwasm.h"
 
+void lwasm_register_error(asmstate_t *as, line_t *l, const char *msg, ...);
+
 lw_expr_t lwasm_evaluate_var(char *var, void *priv)
 {
+	asmstate_t *as = (asmstate_t *)priv;
+	lw_expr_t e;
+	importlist_t *im;
+	struct symtabe *s;
+	
+	s = lookup_symbol(as, as -> cl, var);
+	if (s)
+	{
+		e = lw_expr_build(lw_expr_type_special, lwasm_expr_syment, s);
+		return e;
+	}
+	
+	// undefined here is undefied unless output is object
+	if (as -> output_format != OUTPUT_OBJ)
+		goto nomatch;
+	
+	// check for import
+	for (im = as -> importlist; im; im = im -> next)
+	{
+		if (!strcmp(im -> symbol, var))
+			break;
+	}
+	
+	// check for "undefined" to import automatically
+	if (!im && CURPRAGMA(as -> cl, PRAGMA_UNDEFEXTERN))
+	{
+		im = lw_alloc(sizeof(importlist_t));
+		im -> symbol = lw_strdup(var);
+		im -> next = as -> importlist;
+		as -> importlist = im;
+	}
+	
+	if (!im)
+		goto nomatch;
+
+	e = lw_expr_build(lw_expr_type_special, lwasm_expr_import, im);
+	return e;
+
+nomatch:
+	if (as -> badsymerr)
+	{
+		lwasm_register_error(as, as -> cl, "Undefined symbol %s", var);
+	}
 	return NULL;
 }
 
@@ -59,6 +104,26 @@
 			else
 				return NULL;
 		}
+	
+	case lwasm_expr_syment:
+		{
+			return NULL;
+		}
+	
+	case lwasm_expr_import:
+		{
+			return NULL;
+		}
+	
+	case lwasm_expr_nextbp:
+		{
+			return NULL;
+		}
+	
+	case lwasm_expr_prevbp:
+		{
+			return NULL;
+		}
 	}
 	return NULL;
 }
@@ -482,6 +547,11 @@
 	return e;
 }
 
+int lwasm_reduce_expr(asmstate_t *as, lw_expr_t expr)
+{
+	lw_expr_simplify(expr, as);
+}
+
 void lwasm_save_expr(line_t *cl, int id, lw_expr_t expr)
 {
 	struct line_expr_s *e;
@@ -612,3 +682,8 @@
 		(*p) += 3;
 	return rval;
 }
+
+void lwasm_show_errors(asmstate_t *as)
+{
+	fprintf(stderr, "Errors encountered. FIXME - print out errors.\n");
+}
--- a/lwasm/lwasm.h	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/lwasm.h	Tue Apr 06 21:03:19 2010 -0600
@@ -39,7 +39,9 @@
 	lwasm_expr_linelen = 1,			// length of ref'd line
 	lwasm_expr_lineaddr = 2,		// addr of ref'd line
 	lwasm_expr_nextbp = 3,			// next branch point
-	lwasm_expr_prevbp = 4			// previous branch point
+	lwasm_expr_prevbp = 4,			// previous branch point
+	lwasm_expr_syment = 5,			// symbol table entry
+	lwasm_expr_import = 6			// symbol import entry
 };
 
 enum lwasm_output_e
@@ -104,10 +106,13 @@
 	struct line_expr_s *next;
 };
 
+typedef struct line_s line_t;
+
 typedef struct exportlist_s exportlist_t;
 struct exportlist_s
 {
 	char *symbol;						// symbol to export
+	line_t *line;						// line the export is on
 	exportlist_t *next;					// next in the export list
 };
 
@@ -118,7 +123,6 @@
 	importlist_t *next;					// next in the import list
 };
 
-typedef struct line_s line_t;
 struct line_s
 {
 	lw_expr_t addr;						// assembly address of the line
@@ -143,6 +147,7 @@
 	int lint2;							// another pass forward integer
 	asmstate_t *as;						// assembler state data ptr
 	int pragmas;						// pragmas in effect for the line
+	int context;						// the symbol context number
 };
 
 enum
@@ -190,6 +195,7 @@
 	int execaddr;						// address from "end"
 	int inmod;							// inside an os9 module?
 	unsigned char crc[3];				// crc accumulator
+	int badsymerr;						// throw error on undef sym if set
 
 	line_t *line_head;					// start of lines list
 	line_t *line_tail;					// tail of lines list
@@ -217,7 +223,7 @@
 #ifndef ___symbol_c_seen___
 
 extern struct symtabe *register_symbol(asmstate_t *as, line_t *cl, char *sym, lw_expr_t value, int flags);
-extern struct symtabe *lookup_symbol(asmstate_t *as, line_t *cl, char *sym, int context, int version);
+extern struct symtabe *lookup_symbol(asmstate_t *as, line_t *cl, char *sym);
 
 #endif
 
@@ -239,6 +245,10 @@
 extern int lwasm_lookupreg2(const char *rlist, char **p);
 extern int lwasm_lookupreg3(const char *rlist, char **p);
 
+extern void lwasm_show_errors(asmstate_t *as);
+
+extern int lwasm_reduce_expr(asmstate_t *as, lw_expr_t expr);
+
 #endif
 
 #define OPLEN(op) (((op)>0xFF)?2:1)
--- a/lwasm/main.c	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/main.c	Tue Apr 06 21:03:19 2010 -0600
@@ -165,6 +165,7 @@
 assembler on the first file
 */
 extern void do_pass1(asmstate_t *as);
+extern void do_pass2(asmstate_t *as);
 extern lw_expr_t lwasm_evaluate_special(int t, void *ptr, void *priv);
 extern lw_expr_t lwasm_evaluate_var(char *var, void *priv);
 extern lw_expr_t lwasm_parse_term(char **p, void *priv);
@@ -193,5 +194,12 @@
 	input_init(&asmstate);
 
 	do_pass1(&asmstate);
+	if (asmstate.errorcount > 0)
+	{
+		lwasm_show_errors(&asmstate);
+		exit(1);
+	}
+	do_pass2(&asmstate);
+	
 	exit(0);
 }
--- a/lwasm/pass1.c	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/pass1.c	Tue Apr 06 21:03:19 2010 -0600
@@ -86,6 +86,7 @@
 		cl -> inmod = as -> inmod;
 		cl -> csect = as -> csect;
 		cl -> pragmas = as -> pragmas;
+		cl -> context = as -> context;
 		if (!as -> line_tail)
 		{
 			as -> line_head = cl;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lwasm/pass2.c	Tue Apr 06 21:03:19 2010 -0600
@@ -0,0 +1,95 @@
+/*
+pass2.c
+
+Copyright © 2010 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#include <lw_alloc.h>
+#include <lw_string.h>
+
+#include "lwasm.h"
+#include "instab.h"
+
+/*
+pass 2: deal with undefined symbols and do a simplification pass
+on all the expressions. Handle PRAGMA_IMPORTUNDEFEXPORT
+
+*/
+void do_pass2(asmstate_t *as)
+{
+	line_t *cl;
+	exportlist_t *ex;
+	struct symtabe *s;
+	importlist_t *im;
+	struct line_expr_s *le;
+
+	// verify the export list
+	if (as -> output_format == OUTPUT_OBJ)
+	{	
+		for (ex = as -> exportlist; ex; ex = ex -> next)
+		{
+			s = lookup_symbol(as, NULL, ex -> symbol);
+			if (!s)
+			{
+				if (CURPRAGMA(ex -> line, PRAGMA_IMPORTUNDEFEXPORT))
+				{
+					for (im = as -> importlist; im; im = im -> next)
+					{
+						if (!strcmp(ex -> symbol, im -> symbol))
+							break;
+					}
+					if (!im)
+					{
+						im = lw_alloc(sizeof(importlist_t));
+						im -> symbol = lw_strdup(ex -> symbol);
+						im -> next = as -> importlist;
+						as -> importlist = im;
+					}
+				}
+				else
+				{
+					// undefined export - register error
+					lwasm_register_error(as, ex -> line, "Undefined exported symbol");
+				}
+			}
+		}
+		if (as -> errorcount > 0)
+			return;
+	}
+
+	// we want to throw errors on undefined symbols here
+	as -> badsymerr = 1;
+	
+	// now do some reductions on expressions
+	for (cl = as -> line_head; cl; cl = cl -> next)
+	{
+		as -> cl = cl;
+		
+		// simplify address
+		lwasm_reduce_expr(as, cl -> addr);
+		
+		// simplify each expression
+		for (le = cl -> exprs; le; le = le -> next)
+			lwasm_reduce_expr(as, le -> expr);
+	}	
+}
--- a/lwasm/pseudo.c	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/pseudo.c	Tue Apr 06 21:03:19 2010 -0600
@@ -868,7 +868,7 @@
 	
 	sym = lw_strndup(*p, i);
 	
-	s = lookup_symbol(as, l, sym, -1, -1);
+	s = lookup_symbol(as, l, sym);
 	
 	lw_free(sym);
 	
@@ -899,7 +899,7 @@
 	
 	sym = lw_strndup(*p, i);
 	
-	s = lookup_symbol(as, l, sym, -1, -1);
+	s = lookup_symbol(as, l, sym);
 	
 	lw_free(sym);
 
--- a/lwasm/section.c	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/section.c	Tue Apr 06 21:03:19 2010 -0600
@@ -202,6 +202,7 @@
 	e = lw_alloc(sizeof(exportlist_t));
 	e -> next = as -> exportlist;
 	e -> symbol = lw_strdup(sym);
+	e -> line = l;
 	as -> exportlist = e;
 	lw_free(sym);
 	
--- a/lwasm/symbol.c	Thu Apr 01 20:56:19 2010 -0600
+++ b/lwasm/symbol.c	Tue Apr 06 21:03:19 2010 -0600
@@ -65,7 +65,7 @@
 	}
 
 	if (islocal)
-		context = as -> context;
+		context = cl -> context;
 	
 	// first, look up symbol to see if it is already defined
 	for (se = as -> symtab.head; se; se = se -> next)
@@ -95,6 +95,10 @@
 		version++;
 	}
 	
+	// symplify the symbol expression - replaces "SET" symbols with
+	// symbol table entries
+	lwasm_reduce_expr(as, val);
+	
 	se = lw_alloc(sizeof(struct symtabe));
 	se -> next = as -> symtab.head;
 	as -> symtab.head = se;
@@ -105,7 +109,53 @@
 	return se;
 }
 
-struct symtabe * lookup_symbol(asmstate_t *as, line_t *cl, char *sym, int context, int version)
+// for "SET" symbols, always returns the LAST definition of the
+// symbol. This works because the lwasm_reduce_expr() call in 
+// register_symbol will ensure there are no lingering "var" references
+// to the set symbol anywhere in the symbol table; they will all be
+// converted to direct references
+// NOTE: this means that for a forward reference to a SET symbol,
+// the LAST definition will be the one used.
+// This arrangement also ensures that any reference to the symbol
+// itself inside a "set" definition will refer to the previous version
+// of the symbol.
+struct symtabe * lookup_symbol(asmstate_t *as, line_t *cl, char *sym)
 {
-	return NULL;
+	int local = 0;
+	struct symtabe *s, *s2;
+
+	// check if this is a local symbol
+	if (strchr(sym, '@') || strchr(sym, '?'))
+		local = 1;
+	
+	if (cl && !CURPRAGMA(cl, PRAGMA_DOLLARNOTLOCAL) && strchr(sym, '$'))
+		local = 1;
+	if (!cl && !(as -> pragmas & PRAGMA_DOLLARNOTLOCAL) && strchr(sym, '$'))
+		local = 1;
+	
+	// cannot look up local symbol in global context!!!!!
+	if (!cl && local)
+		return NULL;
+	
+	for (s = as -> symtab.head, s2 = NULL; s; s = s -> next)
+	{
+		if (!strcmp(sym, s -> symbol))
+		{
+			if (local && s -> context != cl -> context)
+				continue;
+			
+			if (s -> flags & symbol_flag_set)
+			{
+				// look for highest version of symbol
+				if (s -> version > s2 -> version)
+					s2 = s;
+				continue;
+			}
+			break;
+		}
+	}
+	if (!s && s2)
+		s = s2;
+	
+	return s;
 }