changeset 26:d2e86babd958

Added error tracking infrastructure
author lost
date Fri, 02 Jan 2009 02:38:02 +0000
parents 3b818f05dc2a
children f736579569b4
files src/Makefile.am src/lwasm.c src/lwasm.h src/pass1.c
diffstat 4 files changed, 47 insertions(+), 794 deletions(-) [+]
line wrap: on
line diff
--- a/src/Makefile.am	Fri Jan 02 02:03:48 2009 +0000
+++ b/src/Makefile.am	Fri Jan 02 02:38:02 2009 +0000
@@ -1,3 +1,3 @@
 bin_PROGRAMS = lwasm
-lwasm_SOURCES = main.c expr.c pass1.c pass2.c util.c instab.c
+lwasm_SOURCES = main.c expr.c pass1.c pass2.c util.c instab.c parse.c lwasm.c
 EXTRA_DIST = instab.h lwasm.h expr.h util.h
--- a/src/lwasm.c	Fri Jan 02 02:03:48 2009 +0000
+++ b/src/lwasm.c	Fri Jan 02 02:38:02 2009 +0000
@@ -17,810 +17,42 @@
 You should have received a copy of the GNU General Public License along with
 this program. If not, see <http://www.gnu.org/licenses/>.
 
-Contains the main code for lwasm
+
+Contains random functions used by the assembler
 */
 
-#include <ctype.h>
-#include <errno.h>
-#include <stdio.h>
+#define __lwasm_c_seen__
+
+#include <stdarg.h>
 #include <stdlib.h>
-#include <string.h>
-#define __lwasm_c_seen__
-#include "instab.h"
+#include <stdio.h>
+
 #include "lwasm.h"
-
-void lwasm_read_file(asmstate_t *as, char *fname);
-extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr);
-extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr);
+#include "util.h"
 
-#define debug(mess, ...)	do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0)
-
-void register_error(asmstate_t *as, sourceline_t *cl, int errcode)
+int register_error(asmstate_t *as, lwasm_line_t *l, int pass, const char *fmt, ...)
 {
-	errortab_t *e;
+	lwasm_error_t *e;
+	va_list args;
+	char errbuff[1024];
+	int r;
 	
-	e = malloc(sizeof(errortab_t));
+	if (as -> passnum != pass)
+		return;
+	
+	va_start(args, fmt);
 	
-	e -> errnum = errcode;
-	e -> line = cl;
-	e -> next = cl -> errors;
-	cl -> errors = e;
+	e = lwasm_alloc(sizeof(lwasm_error_t));
+	
+	e -> next = l -> err;
+	l -> err = e;
 	
 	as -> errorcount++;
-}
-
-int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val);
-
-int eval_min(int v1, int v2, int v3, int v4)
-{
-	if (v2 < v1)
-		v1 = v2;
-	if (v3 < v1)
-		v1 = v3;
-	if (v4 < v1)
-		v1 = v4;
-	return v1;
-}
-
-int eval_max(int v1, int v2, int v3, int v4)
-{
-	if (v2 > v1)
-		v1 = v2;
-	if (v3 > v1)
-		v1 = v3;
-	if (v4 > v1)
-		v1 = v4;
-	return v1;
-}
-
-int lookupreg3(const char *rlist, char **str)
-{
-	int rval = 0;
-	int f = 0;
-	const char *reglist = rlist;
-		
-	while (*reglist)
-	{
-		if (toupper(**str) == *reglist)
-		{
-			// first char matches
-			if (reglist[1] == ' ')
-			{
-				f = 1;
-				break;
-			}
-			if (toupper(*(*str + 1)) == reglist[1])
-			{
-				// second char matches
-				if (reglist[2] == ' ')
-				{
-					f = 1;
-					break;
-				}
-				if (toupper(*(*str + 2)) == reglist[2])
-				{
-					f = 1;
-					break;
-				}
-			}
-		}
-		reglist += 3;
-		rval++;
-	}
-	if (f == 0)
-		return -1;
 	
-	
-	reglist = rval * 3 + rlist;
-	if (reglist[1] == ' ')
-		(*str) += 1;
-	else if (reglist[2] == ' ')
-		(*str) += 2;
-	else
-		(*str)+=3;
-	return rval;
-}
-
-
-int lookupreg(const char *reglist, char **str)
-{
-	int rval = 0;
-	while (*reglist)
-	{
-		if (toupper(**str) == *reglist)
-		{
-			// first char matches
-			if (reglist[1] == ' ' && !isalpha(*(*str + 1)))
-				break;
-			if (toupper(*(*str + 1)) == reglist[1])
-				break;
-		}
-		reglist += 2;
-		rval++;
-	}
-	if (!*reglist)
-		return -1;
-	if (reglist[1] == ' ')
-		(*str)++;
-	else
-		(*str)+=2;
-	return rval;
-}
-
-void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb)
-{
-	cl -> len += 1;
-	if (as -> passnum != 2)
-		return;
-
-	if (cl -> numcodebytes >= cl -> codesize)
-	{
-		cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32);
-		cl -> codesize += 32;
-	}
-	debug("EMIT: %02x\n", cb & 0xff);
-	cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF;
-}
-
-// parse a symble out of the line and return a pointer
-// to a static pointer
-// return NULL if not a symbol or a bad symbol
-char *parse_symbol(asmstate_t *as, char **ptr)
-{
-	static char *symptr = NULL;
-	char *tptr = *ptr;
-	int sl = 0;
-	
-	// symbol can start with _,a-z,A-Z
-	
-	if (!strchr(SYMCHAR_START, **ptr))
-		return NULL;
-	
-	while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr))
-	{
-		tptr++;
-		sl++;
-	}
-
-	symptr = realloc(symptr, sl + 1);
-	tptr = symptr;
-	while (sl)
-	{
-		*tptr++ = *(*ptr)++;
-		sl--;
-	}
-	*tptr = '\0';
-	return symptr;
-}
-
-// resolve an instruction
-void resolve_insn(asmstate_t *as, sourceline_t *cl)
-{
-	char *optr;
-	char opbuf[MAX_OP_LEN + 1];
-	char *symbol = NULL;
-	int c;
-	
-	cl -> code_symloc = as -> addr;
-	
-	cl -> addrset = 0;
-	cl -> isequ = 0;
-	cl -> len = 0;
-	cl -> undef = 0;
-	
-	// only parse line on first pass
-	if (as -> passnum == 1)
-	{
-		optr = cl -> line;
-		if (!*optr || *optr == '*' || *optr == ';')
-		{
-			cl -> opcode = -1;
-			cl -> remainder = cl -> line;
-			return;
-		}
+	r = vsnprintf(errbuff, 1024, fmt, args);
+	e -> mess = lwasm_strdup(errbuff);
 	
-		if (!isspace(*optr))
-		{
-			symbol = parse_symbol(as, &optr);
-			if (*optr && !isspace(*optr) && !(as -> inmacro))
-			{
-				errorp1(ERR_BADSYM);
-				while (*optr && !isspace(*optr))
-					optr++;
-			}
-			if (symbol)
-			{
-				cl -> symstr = strdup(symbol);
-				cl -> hassym = 1;
-			}
-		}
-
-		while (isspace(*optr))
-			optr++;	
-	
-		// parse opcode
-		if (*optr && *optr != ';')
-		{
-			c = 0;
-			while (c < MAX_OP_LEN && *optr && !isspace(*optr))
-			{
-				opbuf[c++] = *optr++;
-			}
-			opbuf[c] = '\0';
-			if (*optr && !isspace(*optr) && !(as -> inmacro))
-			{
-				errorp1(ERR_BADOP);
-				cl -> opcode = -1;
-			}
-			else
-			{
-				cl -> opcstr = strdup(opbuf);
-				for (c = 0; instab[c].opcode; c++)
-				{
-					if (!strcasecmp(opbuf, instab[c].opcode))
-						break;
-				}
-				if (!instab[c].opcode && opbuf[0] == '*')
-				{
-					cl -> opcode = -1;
-				}
-				else if (!instab[c].opcode && !(as -> inmacro))
-				{
-					cl -> opcode = -1;
-					
-					// look up macro
-					if (as -> macros)
-					{
-						macrotab_t *m;
-						
-						for (m = as -> macros; m; m = m -> next)
-						{
-							if (!strcmp(m -> name, opbuf))
-								break;
-						}
-						if (m)
-						{
-							// we have a macro here
-							cl -> macro = m;
-							while (*optr && isspace(*optr))
-								optr++;
-							expand_macro(as, cl, &optr);
-							return;
-						}
-						else
-						{
-							errorp1(ERR_BADOP);
-						}
-					}
-					else
-					{
-						errorp1(ERR_BADOP);
-					}
-				}
-				else
-					cl -> opcode = c;
-			}
-		}
-		else
-			cl -> opcode = -1;
+	va_end(args);
 	
-		if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM)
-		{
-			add_macro_line(as, cl, cl -> line);
-			cl -> opcode = -1;
-			cl -> remainder = cl -> line;
-			cl -> opcstr = NULL;
-			cl -> operstr = NULL;
-			cl -> symstr = NULL;
-			cl -> hassym = 0;
-			cl -> macrodef = 1;
-			return;
-		}
-		// parse operand
-		while (*optr && isspace(*optr))
-			optr++;
-
-		cl -> operstr = optr;
-	}
-	else
-		optr = cl -> operstr;
-
-	if (as -> skipcond)
-	{
-		// if skipping a condition, need to skip a macro
-		if (cl -> opcode >= 0)
-		{
-			if (instab[cl -> opcode].specialnum == SPECIAL_MACRO)
-			{
-				as -> skipmacro = 1;
-			}
-			else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM)
-			{
-				as -> skipmacro = 0;
-			}
-			else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro))
-			{
-				as -> skipcount++;
-			}
-			else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro))
-			{
-				as -> skipcount--;
-				if (as -> skipcount <= 0)
-				{
-					as -> skipcond = 0;
-					as -> noelse = 0;
-				}
-			}
-			else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro))
-			{
-				if (as -> skipcount == 1)
-				{	
-					as -> skipcount = 0;
-					as -> skipcond = 0;
-					as -> noelse = 1;
-					return;
-				}
-			}
-		}
-		if (as -> skipcond)
-			cl -> skipped = 1;
-		return;
-	}
-		
-	// do the code thing
-	// on pass 1, no code is generated
-	// on pass 2, code is generated using the "emit()" macro
-	if (cl -> opcode >= 0)
-	{
-		if (instab[cl -> opcode].opfn)
-		{
-			(*(instab[cl -> opcode].opfn))(as, cl, &optr);
-			if (as -> passnum == 1)
-			{
-				if (*optr)
-				{
-					char *t = optr;
-					char t2;
-					
-					t2 = *optr;
-					cl -> operstr = strdup(cl -> operstr);
-					*optr = t2;
-					while (*t && isspace(*t))
-						t++;
-					cl -> remainder = strdup(t);
-					
-				}
-				cl -> remainder = optr;
-			}
-		}
-		else
-		{
-			errorp1(ERR_BADOP);
-			cl -> opcode = -1;
-		}
-	}
-	// address of the symbol may have been changed by a pseudo op
-	// so we couldn't register it above
-	// that means it may turn out to be a "forward ref" in pass 1
-	if (cl -> hassym)
-	{
-		register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE);
-	}
-
-	as -> addr += cl -> len;
-}
-
-void generate_code(asmstate_t *as)
-{
-	sourceline_t *cl;
-	
-	as -> addr = 0;
-	as -> dpval = 0;
-	as -> passnum = 2;
-	for (cl = as -> source_head; cl; cl = cl -> next)
-	{
-		resolve_insn(as, cl);
-	}
+	return r;
 }
-
-
-/*
-below this point is the expression evaluation package
-
-Supported binary operators: + - / * %
-Supported unary operators: -
-
-<infix>: + | - | * | / | %
-<unary>: -
-<expr>: <term> <infix> <term>
-<term>: <unary> <term>
-<term>: ( <expr> )
-<term>: <symbol>
-<term>: ' <char>
-<term>: " <char> <char>
-<term>: *
-<term>: <number>
-
-<number>: <dec>
-<number>: & <dec>
-
-<number>: $ <hex>
-<number>: <hex> H
-<number>: @ <oct>
-<number>: <oct> O
-<number>: <oct> Q
-
-<number>: % <bin>
-<number>: <bin> B
-
-<bin>: 0 | 1
-<oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7
-<dec>: <oct> | 8 | 9
-<hex>: <dec> | A | B | C | D | E | F
-
-NOTE: hex values which start with a non-digit will need to be prefixed
-by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the
-assembler cannot tell the difference between CCH as a symbol or CCH as
-the value $CC
-
-*/
-
-// will throw an error and return 0 in tval if there's a problem
-// -1 is problem; cl -> undef set is undefined symbol
-int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval)
-{
-	char tc;
-	int rval;
-	int binval;
-	int octval;
-	int decval;
-	int hexval;
-	int valtype;
-	int digval;
-	int bindone = 0;
-	
-	*tval = 0;
-
-beginagain:
-	tc = **optr;
-	if (tc == '+')
-	{
-		// unary +, ignored for symetry
-		(*optr)++;
-		goto beginagain;
-	}
-
-	if (tc == '(')
-	{
-		(*optr)++;
-		rval = eval_expr(as, cl, optr, tval);
-		if (rval < 0)
-			return rval;
-		if (**optr != ')')
-		{
-			errorp1(ERR_BADEXPR);
-			return -1;
-		}
-		(*optr)++;
-		return 0;
-	}
-
-	if (tc == '-')
-	{
-		(*optr)++;
-		rval = eval_term(as, cl, optr, tval);
-		if (rval < 0)
-			return rval;
-		*tval = -*tval;
-		return 0;
-	}
-	
-	// current address (of current instruction, not PC)
-	if (tc == '*')
-	{
-		*tval = cl -> addr;
-		(*optr)++;
-		return 0;
-	}
-	
-	if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc)))
-	{
-		// evaluate a symbol
-		char *symbuf;
-
-		symbuf = parse_symbol(as, optr);
-		if (!symbuf)
-		{
-			errorp1(ERR_BADSYM);
-			*tval = 0;
-			return -1;
-		}
-
-		debug(" looking up symbol: %s\n", symbuf);
-		*tval = lookup_symbol(as, symbuf);
-		
-		// if not found, flag forward ref
-		if (*tval == -1)
-		{
-			errorp2(ERR_UNDEF);
-			cl -> undef = 1;
-			*tval = 0;
-			return 0;
-		}
-		return 0;
-	}
-	
-	if (tc == '%')
-	{
-		// binary number
-		int v1 = 0;
-		(*optr)++;
-		while (strchr("01", **optr))
-		{
-			v1 = v1 << 1 | ((*(*optr)++) - '0');
-		}
-		*tval = v1;
-		return 0;
-	}
-	if (tc == '$')
-	{
-		// hex number
-		int v1 = 0;
-		(*optr)++;
-		debug("HEX CONST: %s\n", *optr);
-		while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr)))
-		{
-			debug("HEX 2: %02x\n", tc);
-			if (**optr >= 'A')
-			{
-				v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10);
-			}
-			else
-			{
-				v1 = v1 << 4 | ((*(*optr)++) - '0');
-			}
-		}
-		*tval = v1;
-		return 0;
-	}
-	if (tc == '@')
-	{
-		// octal number
-		int v1 = 0;
-		(*optr)++;
-		while (strchr("01234567", **optr))
-		{
-			v1 = v1 << 3 | ((*(*optr)++) - '0');
-		}
-		*tval = v1;
-		return 0;
-	}
-	if (tc == '&')
-	{
-		// decimal number
-		int v1 = 0;
-		(*optr)++;
-		while (strchr("0123456789", **optr))
-		{
-			v1 = v1 * 10 + ((*(*optr)++) - '0');
-		}
-		*tval = v1;
-		return 0;
-	}
-	if (tc == '\'')
-	{
-		(*optr)++;
-		if (!**optr)
-		{
-			errorp1(ERR_BADEXPR);
-			return -2;
-		}
-		*tval = *(*optr)++; 
-		return 0;
-	}
-	if (tc == '"')
-	{
-		(*optr)++;
-		if (!**optr || !*(*optr + 1))
-		{
-			errorp1(ERR_BADEXPR);
-			return -2;
-		}
-		*tval = *(*optr)++ << 8 | *(*optr)++;
-		return 0;
-	}
-	// end of string
-	if (tc == '\0')
-	{
-		// error if at EOS as we are looking for a term
-		errorp1(ERR_BADEXPR);
-		return -1;
-	}
-	
-	// we have a generic number here which may be decimal, hex, binary, or octal
-	// based on a suffix
-
-	// possible data types are binary (1), octal (2), decimal(4), hex (8)
-	valtype = 15;
-	hexval = octval = decval = binval = 0;
-	while (1)
-	{
-		
-//		printf("    %c\n", **optr);
-		if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr))
-		{
-			// end of string, must be decimal or the end of a bin
-			if (bindone == 1)
-			{
-				*tval = binval;
-				return 0;
-			}
-			if (valtype & 4)
-			{
-				*tval = decval;
-				return 0;
-			}
-			else
-			{
-				errorp1(ERR_BADEXPR);
-				return -1;
-			}
-		}
-		tc = toupper(*(*optr)++);
-		
-		if (tc == 'H')
-		{
-			if (valtype & 8)
-			{
-				*tval = hexval;
-				return 0;
-			}
-			else
-			{
-				// syntax error
-				errorp1(ERR_BADEXPR);
-				return -1;
-			}
-		}
-		
-		if (tc == 'Q' || tc == 'O')
-		{
-			if (valtype && 2)
-			{
-				*tval = octval;
-				return 0;
-			}
-			else
-			{
-				errorp1(ERR_BADEXPR);
-				return -1;
-			}
-		}
-		
-		digval = tc - '0';
-		if (digval > 9)
-			digval -= 7;
-		
-		// if it's not in the range of a hex digit, error out
-		if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F')
-		{
-			(*optr)--;
-			if (valtype & 4)
-			{
-				*tval = decval;
-				return 0;
-			}
-			// if we're in hex/bin mode and run to the end of the number
-			// we must have a binary constant or an error
-			// if the previous character is B, then we have binary
-			// else we have error since hex would require a terminating H
-			// which would be caught above
-			if (valtype == 8 && toupper(*(*optr)) == 'B')
-			{
-				*tval = binval;
-				return 0;
-			}
-			errorp1(ERR_BADEXPR);
-			return -1;
-		}
-		
-		// if we have any characters past the end of the B, it's not binary
-		if (bindone == 1)
-			bindone = 0;
-		if (tc == 'B')
-			bindone = 1;
-		if (digval > 1)
-			valtype &= 14;
-		else if (digval > 7)
-			valtype &= 13;
-		else if (digval > 9)
-			valtype &= 11;
-		
-		if (valtype & 8)
-		{
-			hexval = (hexval << 4) | digval;
-		}
-		if (valtype & 4)
-		{
-			decval = decval * 10 + digval;
-		}
-		if (valtype & 2)
-		{
-			octval = (octval << 3) | digval;
-		}
-		if (valtype & 1 && !bindone)
-		{
-			binval = (binval << 1) | digval;
-		}
-		
-	}	
-	// can't get here from there
-}
-
-// returns -1 if the expression cannot be parsed
-// and returns -2 if there is an undefined symbol reference
-// resulting value will be in *val; undefined symbols are parsed as
-// value 0 but cl -> undef will be set.
-int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val)
-{
-	int left;
-	int right;
-	char oper;
-	int rval;
-	
-	// by default, return 0 in val
-	*val = 0;
-	cl -> undef = 0;
-
-	rval = eval_term(as, cl, optr, &left);
-	if (rval < 0)
-		return rval;
-
-nextop:
-	oper = **optr;
-
-	// end of expr	
-	if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')')
-		goto retleft;
-
-	// unrecognized chars
-	if (!strchr("+-*/%", oper))
-		goto retleft;
-
-	(*optr)++;
-
-	rval = eval_term(as, cl, optr, &right);
-	// propagate error
-	if (rval < 0)
-		return rval;
-
-	// do the operation and put it in "left"
-	switch (oper)
-	{
-	case '+':
-		left += right;
-		break;
-
-	case '-':
-		left -= right;
-		break;
-	
-	case '*':
-		left *= right;
-		break;
-	
-	case '/':
-		left /= right;
-		break;
-		
-	case '%':
-		left %= right;
-		break;
-	}
-
-	goto nextop;
-
-retleft:
-	*val = left;
-	return 0;
-}
--- a/src/lwasm.h	Fri Jan 02 02:03:48 2009 +0000
+++ b/src/lwasm.h	Fri Jan 02 02:38:02 2009 +0000
@@ -28,6 +28,14 @@
 #define OUTPUT_RAW		1	// raw sequence of bytes
 #define OUTPUT_OBJ		2	// proprietary object file format
 
+// structure for tracking errors
+typedef struct lwasm_error_s lwasm_error_t;
+struct lwasm_error_s
+{
+	char *mess;				// the actual error message
+	lwasm_error_t *next;	// ptr to next error
+};
+
 // structure for keeping track of lines
 typedef struct lwasm_line_s lwasm_line_t;
 struct lwasm_line_s {
@@ -36,6 +44,7 @@
 	char *filename;		// file name reference
 	lwasm_line_t *next;	// next line
 	lwasm_line_t *prev;	// previous line
+	lwasm_error_t *err;	// error messages
 };
 
 // keep track of current assembler state
@@ -61,6 +70,17 @@
 
 #define PRAGMA_NOINDEX0TONONE	1
 
+#ifndef __lwasm_c_seen__
+#define __lwasm_E__ extern
+#else
+#define __lwasm_E__
+#endif
+
+__lwasm_E__ int register_error(asmstate_t *as, lwasm_line_t *l, int pass, const char *fmt, ...);
+
+#undef __lwasm_E__
+
+
 #ifndef __symtab_c_seen__
 //extern void register_symbol(asmstate_t *as, sourceline_t *cl, char *symstr, int val, int flags);
 //extern int lookup_symbol(asmstate_t *as, char *symstr);
--- a/src/pass1.c	Fri Jan 02 02:03:48 2009 +0000
+++ b/src/pass1.c	Fri Jan 02 02:38:02 2009 +0000
@@ -130,6 +130,7 @@
 			nl -> filename = fnref;
 			nl -> next = NULL;
 			nl -> prev = as -> linestail;
+			nl -> err = NULL;
 			if (as -> linestail)
 				as -> linestail -> next = nl;
 			else