diff src/lwasm.c @ 0:57495da01900

Initial checking of LWASM
author lost
date Fri, 03 Oct 2008 02:44:20 +0000
parents
children 34568fab6058
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lwasm.c	Fri Oct 03 02:44:20 2008 +0000
@@ -0,0 +1,879 @@
+/*
+ * lwasm.c
+ *
+ * main code for lwasm
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define __lwasm_c_seen__
+#include "instab.h"
+#include "lwasm.h"
+
+void lwasm_read_file(asmstate_t *as, char *fname);
+extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr);
+extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr);
+
+#define debug(mess, ...)	do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0)
+
+void register_error(asmstate_t *as, sourceline_t *cl, int errcode)
+{
+	errortab_t *e;
+	
+	e = malloc(sizeof(errortab_t));
+	
+	e -> errnum = errcode;
+	e -> line = cl;
+	e -> next = cl -> errors;
+	cl -> errors = e;
+	
+	as -> errorcount++;
+}
+
+int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val);
+
+int eval_min(int v1, int v2, int v3, int v4)
+{
+	if (v2 < v1)
+		v1 = v2;
+	if (v3 < v1)
+		v1 = v3;
+	if (v4 < v1)
+		v1 = v4;
+	return v1;
+}
+
+int eval_max(int v1, int v2, int v3, int v4)
+{
+	if (v2 > v1)
+		v1 = v2;
+	if (v3 > v1)
+		v1 = v3;
+	if (v4 > v1)
+		v1 = v4;
+	return v1;
+}
+
+int lookupreg3(const char *rlist, char **str)
+{
+	int rval = 0;
+	int f = 0;
+	const char *reglist = rlist;
+		
+	while (*reglist)
+	{
+		if (toupper(**str) == *reglist)
+		{
+			// first char matches
+			if (reglist[1] == ' ')
+			{
+				f = 1;
+				break;
+			}
+			if (toupper(*(*str + 1)) == reglist[1])
+			{
+				// second char matches
+				if (reglist[2] == ' ')
+				{
+					f = 1;
+					break;
+				}
+				if (toupper(*(*str + 2)) == reglist[2])
+				{
+					f = 1;
+					break;
+				}
+			}
+		}
+		reglist += 3;
+		rval++;
+	}
+	if (f == 0)
+		return -1;
+	
+	
+	reglist = rval * 3 + rlist;
+	if (reglist[1] == ' ')
+		(*str) += 1;
+	else if (reglist[2] == ' ')
+		(*str) += 2;
+	else
+		(*str)+=3;
+	return rval;
+}
+
+
+int lookupreg(const char *reglist, char **str)
+{
+	int rval = 0;
+	while (*reglist)
+	{
+		if (toupper(**str) == *reglist)
+		{
+			// first char matches
+			if (reglist[1] == ' ' && !isalpha(*(*str + 1)))
+				break;
+			if (toupper(*(*str + 1)) == reglist[1])
+				break;
+		}
+		reglist += 2;
+		rval++;
+	}
+	if (!*reglist)
+		return -1;
+	if (reglist[1] == ' ')
+		(*str)++;
+	else
+		(*str)+=2;
+	return rval;
+}
+
+void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb)
+{
+	cl -> len += 1;
+	if (as -> passnum != 2)
+		return;
+
+	if (cl -> numcodebytes >= cl -> codesize)
+	{
+		cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32);
+		cl -> codesize += 32;
+	}
+	debug("EMIT: %02x\n", cb & 0xff);
+	cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF;
+}
+
+// parse a symble out of the line and return a pointer
+// to a static pointer
+// return NULL if not a symbol or a bad symbol
+char *parse_symbol(asmstate_t *as, char **ptr)
+{
+	static char *symptr = NULL;
+	char *tptr = *ptr;
+	int sl = 0;
+	
+	// symbol can start with _,a-z,A-Z
+	
+	if (!strchr(SYMCHAR_START, **ptr))
+		return NULL;
+	
+	while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr))
+	{
+		tptr++;
+		sl++;
+	}
+
+	symptr = realloc(symptr, sl + 1);
+	tptr = symptr;
+	while (sl)
+	{
+		*tptr++ = *(*ptr)++;
+		sl--;
+	}
+	*tptr = '\0';
+	return symptr;
+}
+
+// resolve an instruction
+void resolve_insn(asmstate_t *as, sourceline_t *cl)
+{
+	char *optr;
+	char opbuf[MAX_OP_LEN + 1];
+	char *symbol = NULL;
+	int c;
+	
+	cl -> code_symloc = as -> addr;
+	
+	cl -> addrset = 0;
+	cl -> isequ = 0;
+	cl -> len = 0;
+	cl -> undef = 0;
+	
+	// only parse line on first pass
+	if (as -> passnum == 1)
+	{
+		optr = cl -> line;
+		if (!*optr || *optr == '*' || *optr == ';')
+		{
+			cl -> opcode = -1;
+			cl -> remainder = cl -> line;
+			return;
+		}
+	
+		if (!isspace(*optr))
+		{
+			symbol = parse_symbol(as, &optr);
+			if (*optr && !isspace(*optr) && !(as -> inmacro))
+			{
+				errorp1(ERR_BADSYM);
+				while (*optr && !isspace(*optr))
+					optr++;
+			}
+			if (symbol)
+			{
+				cl -> symstr = strdup(symbol);
+				cl -> hassym = 1;
+			}
+		}
+
+		while (isspace(*optr))
+			optr++;	
+	
+		// parse opcode
+		if (*optr && *optr != ';')
+		{
+			c = 0;
+			while (c < MAX_OP_LEN && *optr && !isspace(*optr))
+			{
+				opbuf[c++] = *optr++;
+			}
+			opbuf[c] = '\0';
+			if (*optr && !isspace(*optr) && !(as -> inmacro))
+			{
+				errorp1(ERR_BADOP);
+				cl -> opcode = -1;
+			}
+			else
+			{
+				cl -> opcstr = strdup(opbuf);
+				for (c = 0; instab[c].opcode; c++)
+				{
+					if (!strcasecmp(opbuf, instab[c].opcode))
+						break;
+				}
+				if (!instab[c].opcode && opbuf[0] == '*')
+				{
+					cl -> opcode = -1;
+				}
+				else if (!instab[c].opcode && !(as -> inmacro))
+				{
+					cl -> opcode = -1;
+					
+					// look up macro
+					if (as -> macros)
+					{
+						macrotab_t *m;
+						
+						for (m = as -> macros; m; m = m -> next)
+						{
+							if (!strcmp(m -> name, opbuf))
+								break;
+						}
+						if (m)
+						{
+							// we have a macro here
+							cl -> macro = m;
+							while (*optr && isspace(*optr))
+								optr++;
+							expand_macro(as, cl, &optr);
+							return;
+						}
+						else
+						{
+							errorp1(ERR_BADOP);
+						}
+					}
+					else
+					{
+						errorp1(ERR_BADOP);
+					}
+				}
+				else
+					cl -> opcode = c;
+			}
+		}
+		else
+			cl -> opcode = -1;
+	
+		if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM)
+		{
+			add_macro_line(as, cl, cl -> line);
+			cl -> opcode = -1;
+			cl -> remainder = cl -> line;
+			cl -> opcstr = NULL;
+			cl -> operstr = NULL;
+			cl -> symstr = NULL;
+			cl -> hassym = 0;
+			cl -> macrodef = 1;
+			return;
+		}
+		// parse operand
+		while (*optr && isspace(*optr))
+			optr++;
+
+		cl -> operstr = optr;
+	}
+	else
+		optr = cl -> operstr;
+
+	if (as -> skipcond)
+	{
+		// if skipping a condition, need to skip a macro
+		if (cl -> opcode >= 0)
+		{
+			if (instab[cl -> opcode].specialnum == SPECIAL_MACRO)
+			{
+				as -> skipmacro = 1;
+			}
+			else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM)
+			{
+				as -> skipmacro = 0;
+			}
+			else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro))
+			{
+				as -> skipcount++;
+			}
+			else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro))
+			{
+				as -> skipcount--;
+				if (as -> skipcount <= 0)
+				{
+					as -> skipcond = 0;
+					as -> noelse = 0;
+				}
+			}
+			else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro))
+			{
+				if (as -> skipcount == 1)
+				{	
+					as -> skipcount = 0;
+					as -> skipcond = 0;
+					as -> noelse = 1;
+					return;
+				}
+			}
+		}
+		if (as -> skipcond)
+			cl -> skipped = 1;
+		return;
+	}
+		
+	// do the code thing
+	// on pass 1, no code is generated
+	// on pass 2, code is generated using the "emit()" macro
+	if (cl -> opcode >= 0)
+	{
+		if (instab[cl -> opcode].opfn)
+		{
+			(*(instab[cl -> opcode].opfn))(as, cl, &optr);
+			if (as -> passnum == 1)
+			{
+				if (*optr)
+				{
+					char *t = optr;
+					char t2;
+					
+					t2 = *optr;
+					cl -> operstr = strdup(cl -> operstr);
+					*optr = t2;
+					while (*t && isspace(*t))
+						t++;
+					cl -> remainder = strdup(t);
+					
+				}
+				cl -> remainder = optr;
+			}
+		}
+		else
+		{
+			errorp1(ERR_BADOP);
+			cl -> opcode = -1;
+		}
+	}
+	// address of the symbol may have been changed by a pseudo op
+	// so we couldn't register it above
+	// that means it may turn out to be a "forward ref" in pass 1
+	if (cl -> hassym)
+	{
+		register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE);
+	}
+
+	as -> addr += cl -> len;
+}
+
+void generate_code(asmstate_t *as)
+{
+	sourceline_t *cl;
+	
+	as -> addr = 0;
+	as -> dpval = 0;
+	as -> passnum = 2;
+	for (cl = as -> source_head; cl; cl = cl -> next)
+	{
+		resolve_insn(as, cl);
+	}
+}
+
+void lwasm_read_file(asmstate_t *as, char *fname)
+{
+	FILE *f;
+	int cline = 0;
+	sourceline_t *cl;
+	size_t bufflen;
+	char *buff = NULL;
+	int retval;
+	
+	as -> passnum = 1;
+	
+	f = fopen(fname, "r");
+	if (!f)
+	{
+		fprintf(stderr, "Cannot open input file %s: %s\n", fname, strerror(errno));
+		return;
+	}
+	
+	while (!feof(f))
+	{
+		retval = getline(&buff, &bufflen, f);
+		debug(" read line (%s:%d): %s\n", fname, cline, buff);
+		if (retval < 0)
+		{
+			if (feof(f))
+				break;
+			fprintf(stderr, "Error reading '%s': %s\n", fname, strerror(errno));
+			exit(1);
+		}
+		if (strchr(buff, '\n'))
+			*strchr(buff, '\n') = '\0';
+		if (strchr(buff, '\r'))
+			*strchr(buff, '\r') = '\0';
+		cl = calloc(sizeof(sourceline_t), 1);
+		if (!cl)
+		{
+			perror("Malloc");
+			exit(1);
+		}
+		
+		cl -> lineno = cline++;
+		cl -> sourcefile = fname;
+		cl -> opcode = -1;
+		cl -> addrmode = -1;
+		cl -> addr = as -> addr;
+		cl -> dpval = as -> dpval;
+		cl -> prev = as -> source_tail;
+		if (as -> source_tail)
+			as -> source_tail -> next = cl;
+		as -> source_tail = cl;
+		if (as -> source_head == NULL)
+			as -> source_head = cl;
+		cl -> line = strdup(buff);
+
+		resolve_insn(as, cl);
+
+		if (cl -> opcode >= 0 && instab[cl -> opcode].instype == INSTYPE_PSEUDO && instab[cl -> opcode].specialnum == SPECIAL_END)
+			break;
+		
+		*buff = '\0';
+
+	}
+	if (buff)
+		free(buff);
+
+	fclose(f);
+
+	return;
+}
+
+/*
+below this point is the expression evaluation package
+
+Supported binary operators: + - / * %
+Supported unary operators: -
+
+<infix>: + | - | * | / | %
+<unary>: -
+<expr>: <term> <infix> <term>
+<term>: <unary> <term>
+<term>: ( <expr> )
+<term>: <symbol>
+<term>: ' <char>
+<term>: " <char> <char>
+<term>: *
+<term>: <number>
+
+<number>: <dec>
+<number>: & <dec>
+
+<number>: $ <hex>
+<number>: <hex> H
+<number>: @ <oct>
+<number>: <oct> O
+<number>: <oct> Q
+
+<number>: % <bin>
+<number>: <bin> B
+
+<bin>: 0 | 1
+<oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7
+<dec>: <oct> | 8 | 9
+<hex>: <dec> | A | B | C | D | E | F
+
+NOTE: hex values which start with a non-digit will need to be prefixed
+by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the
+assembler cannot tell the difference between CCH as a symbol or CCH as
+the value $CC
+
+*/
+
+// will throw an error and return 0 in tval if there's a problem
+// -1 is problem; cl -> undef set is undefined symbol
+int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval)
+{
+	char tc;
+	int rval;
+	int binval;
+	int octval;
+	int decval;
+	int hexval;
+	int valtype;
+	int digval;
+	int bindone = 0;
+	
+	*tval = 0;
+
+beginagain:
+	tc = **optr;
+	if (tc == '+')
+	{
+		// unary +, ignored for symetry
+		(*optr)++;
+		goto beginagain;
+	}
+
+	if (tc == '(')
+	{
+		(*optr)++;
+		rval = eval_expr(as, cl, optr, tval);
+		if (rval < 0)
+			return rval;
+		if (**optr != ')')
+		{
+			errorp1(ERR_BADEXPR);
+			return -1;
+		}
+		(*optr)++;
+		return 0;
+	}
+
+	if (tc == '-')
+	{
+		(*optr)++;
+		rval = eval_term(as, cl, optr, tval);
+		if (rval < 0)
+			return rval;
+		*tval = -*tval;
+		return 0;
+	}
+	
+	// current address (of current instruction, not PC)
+	if (tc == '*')
+	{
+		*tval = cl -> addr;
+		(*optr)++;
+		return 0;
+	}
+	
+	if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc)))
+	{
+		// evaluate a symbol
+		char *symbuf;
+
+		symbuf = parse_symbol(as, optr);
+		if (!symbuf)
+		{
+			errorp1(ERR_BADSYM);
+			*tval = 0;
+			return -1;
+		}
+
+		debug(" looking up symbol: %s\n", symbuf);
+		*tval = lookup_symbol(as, symbuf);
+		
+		// if not found, flag forward ref
+		if (*tval == -1)
+		{
+			errorp2(ERR_UNDEF);
+			cl -> undef = 1;
+			*tval = 0;
+			return 0;
+		}
+		return 0;
+	}
+	
+	if (tc == '%')
+	{
+		// binary number
+		int v1 = 0;
+		(*optr)++;
+		while (strchr("01", **optr))
+		{
+			v1 = v1 << 1 | ((*(*optr)++) - '0');
+		}
+		*tval = v1;
+		return 0;
+	}
+	if (tc == '$')
+	{
+		// hex number
+		int v1 = 0;
+		(*optr)++;
+		debug("HEX CONST: %s\n", *optr);
+		while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr)))
+		{
+			debug("HEX 2: %02x\n", tc);
+			if (**optr >= 'A')
+			{
+				v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10);
+			}
+			else
+			{
+				v1 = v1 << 4 | ((*(*optr)++) - '0');
+			}
+		}
+		*tval = v1;
+		return 0;
+	}
+	if (tc == '@')
+	{
+		// octal number
+		int v1 = 0;
+		(*optr)++;
+		while (strchr("01234567", **optr))
+		{
+			v1 = v1 << 3 | ((*(*optr)++) - '0');
+		}
+		*tval = v1;
+		return 0;
+	}
+	if (tc == '&')
+	{
+		// decimal number
+		int v1 = 0;
+		(*optr)++;
+		while (strchr("0123456789", **optr))
+		{
+			v1 = v1 * 10 + ((*(*optr)++) - '0');
+		}
+		*tval = v1;
+		return 0;
+	}
+	if (tc == '\'')
+	{
+		(*optr)++;
+		if (!**optr)
+		{
+			errorp1(ERR_BADEXPR);
+			return -2;
+		}
+		*tval = *(*optr)++; 
+		return 0;
+	}
+	if (tc == '"')
+	{
+		(*optr)++;
+		if (!**optr || !*(*optr + 1))
+		{
+			errorp1(ERR_BADEXPR);
+			return -2;
+		}
+		*tval = *(*optr)++ << 8 | *(*optr)++;
+		return 0;
+	}
+	// end of string
+	if (tc == '\0')
+	{
+		// error if at EOS as we are looking for a term
+		errorp1(ERR_BADEXPR);
+		return -1;
+	}
+	
+	// we have a generic number here which may be decimal, hex, binary, or octal
+	// based on a suffix
+
+	// possible data types are binary (1), octal (2), decimal(4), hex (8)
+	valtype = 15;
+	hexval = octval = decval = binval = 0;
+	while (1)
+	{
+		
+//		printf("    %c\n", **optr);
+		if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr))
+		{
+			// end of string, must be decimal or the end of a bin
+			if (bindone == 1)
+			{
+				*tval = binval;
+				return 0;
+			}
+			if (valtype & 4)
+			{
+				*tval = decval;
+				return 0;
+			}
+			else
+			{
+				errorp1(ERR_BADEXPR);
+				return -1;
+			}
+		}
+		tc = toupper(*(*optr)++);
+		
+		if (tc == 'H')
+		{
+			if (valtype & 8)
+			{
+				*tval = hexval;
+				return 0;
+			}
+			else
+			{
+				// syntax error
+				errorp1(ERR_BADEXPR);
+				return -1;
+			}
+		}
+		
+		if (tc == 'Q' || tc == 'O')
+		{
+			if (valtype && 2)
+			{
+				*tval = octval;
+				return 0;
+			}
+			else
+			{
+				errorp1(ERR_BADEXPR);
+				return -1;
+			}
+		}
+		
+		digval = tc - '0';
+		if (digval > 9)
+			digval -= 7;
+		
+		// if it's not in the range of a hex digit, error out
+		if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F')
+		{
+			(*optr)--;
+			if (valtype & 4)
+			{
+				*tval = decval;
+				return 0;
+			}
+			// if we're in hex/bin mode and run to the end of the number
+			// we must have a binary constant or an error
+			// if the previous character is B, then we have binary
+			// else we have error since hex would require a terminating H
+			// which would be caught above
+			if (valtype == 8 && toupper(*(*optr)) == 'B')
+			{
+				*tval = binval;
+				return 0;
+			}
+			errorp1(ERR_BADEXPR);
+			return -1;
+		}
+		
+		// if we have any characters past the end of the B, it's not binary
+		if (bindone == 1)
+			bindone = 0;
+		if (tc == 'B')
+			bindone = 1;
+		if (digval > 1)
+			valtype &= 14;
+		else if (digval > 7)
+			valtype &= 13;
+		else if (digval > 9)
+			valtype &= 11;
+		
+		if (valtype & 8)
+		{
+			hexval = (hexval << 4) | digval;
+		}
+		if (valtype & 4)
+		{
+			decval = decval * 10 + digval;
+		}
+		if (valtype & 2)
+		{
+			octval = (octval << 3) | digval;
+		}
+		if (valtype & 1 && !bindone)
+		{
+			binval = (binval << 1) | digval;
+		}
+		
+	}	
+	// can't get here from there
+}
+
+// returns -1 if the expression cannot be parsed
+// and returns -2 if there is an undefined symbol reference
+// resulting value will be in *val; undefined symbols are parsed as
+// value 0 but cl -> undef will be set.
+int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val)
+{
+	int left;
+	int right;
+	char oper;
+	int rval;
+	
+	// by default, return 0 in val
+	*val = 0;
+	cl -> undef = 0;
+
+	rval = eval_term(as, cl, optr, &left);
+	if (rval < 0)
+		return rval;
+
+nextop:
+	oper = **optr;
+
+	// end of expr	
+	if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')')
+		goto retleft;
+
+	// unrecognized chars
+	if (!strchr("+-*/%", oper))
+		goto retleft;
+
+	(*optr)++;
+
+	rval = eval_term(as, cl, optr, &right);
+	// propagate error
+	if (rval < 0)
+		return rval;
+
+	// do the operation and put it in "left"
+	switch (oper)
+	{
+	case '+':
+		left += right;
+		break;
+
+	case '-':
+		left -= right;
+		break;
+	
+	case '*':
+		left *= right;
+		break;
+	
+	case '/':
+		left /= right;
+		break;
+		
+	case '%':
+		left %= right;
+		break;
+	}
+
+	goto nextop;
+
+retleft:
+	*val = left;
+	return 0;
+}