changeset 18:218aabbc3b1a

First pass at expression evaluator complete
author lost
date Thu, 01 Jan 2009 23:55:18 +0000
parents df0c4a46af8f
children 925105ccf22f
files src/expr.c src/expr.h
diffstat 2 files changed, 650 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/expr.c	Thu Jan 01 02:26:26 2009 +0000
+++ b/src/expr.c	Thu Jan 01 23:55:18 2009 +0000
@@ -19,14 +19,15 @@
 */
 
 /*
-This file contains the actual expression evaluator which uses the LWVAL
-mechanism to store results.
+This file contains the actual expression evaluator
 */
 
 #define __expr_c_seen__
 
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "expr.h"
 #include "util.h"
@@ -162,3 +163,626 @@
 	
 	return t;
 }
+
+// the following two functions are co-routines which actually parse
+// an infix expression onto the expression stack, each returns -1
+// if an error is encountered
+
+/*
+parse a term and push it onto the stack
+
+this function handles unary prefix operators (-, +, .not., .com.)
+as well as ()
+*/
+int lwasm_expr_parse_term(lwasm_expr_stack_t *s, const char **p)
+{
+	lwasm_expr_term_t *t;
+
+eval_next:
+	if (**p == '(')
+	{
+		(*p)++;
+		lwasm_expr_parse_expr(s, p, 0);
+		if (**p != ')')
+			return -1;
+		(*p)++;
+		return 0;
+	}
+	
+	if (**p == '+')
+	{
+		(*p)++;
+		goto eval_next;
+	}
+	
+	if (**p == '-')
+	{
+		// parse expression following "-"
+		(*p)++;
+		if (lwasm_expr_parse_expr(s, p, 200) < 0)
+			return -1;
+		t = lwasm_expr_term_create_oper(LWASM_OPER_NEG);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	
+	/*
+		we have an actual term here so evaluate it
+		
+		it could be one of the following:
+		
+		1. a decimal constant
+		2. a hexadecimal constant
+		3. an octal constant
+		4. a binary constant
+		5. a symbol reference
+		6. the "current" instruction address (*)
+		7. the "current" data address (.)
+		8. a "back reference" (<)
+		9. a "forward reference" (>)
+		
+		items 6 through 9 are stored as symbol references
+		
+		(a . followed by a . or a alpha char or number is a symbol)
+	*/
+	if (**p == '*'
+		|| (
+			**p == '.' 
+			&& (*p)[1] != '.' 
+			&& !((*p)[1] >= 'A' && (*p)[1] <= 'Z') 
+			&& !((*p)[1] >= 'a' && (*p)[1] <= 'z') 
+			&& !((*p)[1] >= '0' && (*p)[1] <= '9')
+			)
+		|| **p == '<'
+		|| **p == '>')
+	{
+		char tstr[2];
+		tstr[0] = **p;
+		tstr[1] = '\0';
+		t = lwasm_expr_term_create_sym(tstr);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		(*p)++;
+		return 0;
+	}
+	
+	/*
+		- a symbol will be a string of characters introduced by a letter, ".",
+		  "_" but NOT a number
+		- a decimal constant will consist of only digits, optionally prefixed
+		  with "&"
+		- a binary constant will consist of only 0s and 1s either prefixed with %
+		  or suffixed with "B"
+		- a hex constant will consist of 0-9A-F either prefixed with $ or
+		  suffixed with "H"; a hex number starting with A-F must be prefixed
+		  with $ or start with 0 and end with H
+		- an octal constant will consist of 0-7 either prefixed with @ or
+		  suffixed with "O" or "Q"
+		- an ascii constant will be a single character prefixed with a '
+		- a double ascii constant will be two characters prefixed with a "
+		
+	*/
+	if (**p == '"')
+	{
+		// double ascii constant
+		int val;
+		(*p)++;
+		if (!**p)
+			return -1;
+		if (!*((*p)+1))
+			return -1;
+		val = **p << 8 | *((*p) + 1);
+		(*p) += 2;
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	else if (**p == '\'')
+	{
+		// single ascii constant
+		int val;
+		(*p)++;
+		if (!**p)
+			return -1;
+		val = **p;
+		(*p)++;
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+	}
+	else if (**p == '&')
+	{
+		// decimal constant
+		int val = 0;
+		
+		(*p)++;
+		while (strchr("0123456789", **p))
+		{
+			val = val * 10 + (**p - '0');
+			(*p)++;
+		}
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	else if (**p == '%')
+	{
+		// binary constant
+		int val = 0;
+		
+		(*p)++;
+		while (**p == '0' || **p == '1')
+		{
+			val = val * 2 + (**p - '0');
+			(*p)++;
+		}
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	else if (**p == '$')
+	{
+		// hexadecimal constant
+		int val = 0, val2;
+		
+		(*p)++;
+		while (strchr("0123456789ABCDEFabcdef", **p))
+		{
+			val2 = toupper(**p) - '0';
+			if (val2 > 9)
+				val2 -= 7;
+			val = val * 16 + val2;
+			(*p)++;
+		}
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	else if (**p == '@')
+	{
+		// octal constant
+		int val = 0;
+		
+		(*p)++;
+		while (strchr("01234567", **p))
+		{
+			val = val * 8 + (**p - '0');
+			(*p)++;
+		}
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	
+	// symbol or bare decimal or suffix identified constant here
+	// all numbers will start with a digit at this point
+	if (**p < '0' || **p > '9')
+	{
+		int l = 0;
+		char *sb;
+		
+		// evaluate a symbol here
+		static const char *symchars = "_.$@\\abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+		while (strchr(symchars, (*p)[l]))
+			l++;
+
+		if (l == 0)
+			return -1;
+
+		sb = lwasm_alloc(l + 1);
+		sb[l] = '\0';
+		memcpy(sb, *p, l);
+		t = lwasm_expr_term_create_sym(sb);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		lwasm_free(sb);
+		(*p) += l;
+		return 0;
+	}
+	
+	if (!**p)
+		return -1;
+	
+	// evaluate a suffix based constant
+	{
+		int decval = 0, binval = 0, hexval = 0, octval = 0;
+		int valtype = 15;	// 1 = bin, 2 = oct, 4 = dec, 8 = hex
+		int bindone = 0;
+		int val;
+		int dval;
+		
+		while (1)
+		{
+			if (!**p || !strchr("0123456789ABCDEFabcdefqhoQHO", **p))
+			{
+				// we can legally have bin or decimal here
+				if (bindone)
+				{
+					// we just finished a binary value
+					val = binval;
+					break;
+				}
+				else if (valtype & 4)
+				{
+					// otherwise we must be decimal (if we're still allowed one)
+					val = decval;
+					break;
+				}
+				else
+				{
+					// bad value
+					return -1;
+				}
+			}
+			
+			dval = toupper(**p);
+			(*p)++;
+			
+			if (bindone)
+			{
+				// any characters past "B" means it is not binary
+				bindone = 0;
+				valtype &= 14;
+			}
+			
+			switch (dval)
+			{
+			case 'Q':
+			case 'O':
+				if (valtype & 2)
+				{
+					val = octval;
+					valtype = -1;
+					break;
+				}
+				else
+				{
+					// not a valid octal value
+					return -1;
+				}
+				/* can't get here */
+
+			case 'H':
+				if (valtype & 8)
+				{
+					val = hexval;
+					valtype = -1;
+					break;
+				}
+				else
+				{
+					// not a valid hex number
+					return -1;
+				}
+				/* can't get here */
+
+			case 'B':
+				// this is a bit of a sticky one since B is a legit hex
+				// digit so this may or may not be the end of the number
+				// so we fall through to the digit case
+
+				if (valtype & 1)
+				{
+					// could still be binary
+					bindone = 1;
+					valtype = 9;	// hex and binary
+				}
+				/* fall through intentional */
+				
+			default:
+				// digit
+				dval -= '0';
+				if (dval > 9)
+					dval -= 7;
+				
+				if (valtype & 8)
+				{
+					hexval = hexval * 16 + dval;
+				}
+				if (valtype & 4)
+				{
+					if (dval > 9)
+						valtype &= 11;
+					else
+						decval = decval * 10 + dval;
+				}
+				if (valtype & 2)
+				{
+					if (dval > 7)
+						valtype &= 13;
+					else
+						octval = octval * 8 + dval;
+				}
+				if (valtype & 1)
+				{
+					if (dval > 1)
+						valtype &= 14;
+					else
+						binval = binval * 2 + dval;
+				}
+			}
+			// break out if we have a return value
+			if (valtype = -1)
+				break;
+			// return if no more valid possibilities!
+			if (valtype == 0)
+				return -1;
+		}
+		
+		// we get here when we have a value to return
+		t = lwasm_expr_term_create_int(val);
+		lwasm_expr_stack_push(s, t);
+		lwasm_expr_term_free(t);
+		return 0;
+	}
+	/* can't get here */
+}
+
+// parse an expression and push the result onto the stack
+// if an operator of lower precedence than the value of "prec" is found,
+int lwasm_expr_parse_expr(lwasm_expr_stack_t *s, const char **p, int prec)
+{
+	static const struct operinfo
+	{
+		int opernum;
+		char *operstr;
+		int operprec;
+	} operators[] =
+	{
+		{ LWASM_OPER_PLUS, "+", 100 },
+		{ LWASM_OPER_MINUS, "-", 100 },
+		{ LWASM_OPER_TIMES, "*", 150 },
+		{ LWASM_OPER_DIVIDE, "/", 150 },
+		{ LWASM_OPER_MOD, "%", 150 },
+		{ LWASM_OPER_INTDIV, "\\", 150 },
+		
+		{ LWASM_OPER_NONE, "", 0 }
+	};	
+	int opern, i;
+	lwasm_expr_term_t *operterm;
+	
+	// return if we are at the end of the expression or a subexpression
+	if (!**p || isspace(**p) || **p == ')')
+		return 0;
+	
+eval_next:
+	if (lwasm_expr_parse_term(s, p) < 0)
+		return -1;
+	
+	// expecting an operator here
+	for (opern = 0; operators[opern].opernum != LWASM_OPER_NONE; opern++)
+	{
+		for (i = 0; (*p)[i] && operators[opern].operstr[i] && (*p[i] == operators[opern].operstr[i]); i++)
+			/* do nothing */ ;
+		if (operators[opern].operstr[i] == '\0')
+			break;
+	}
+	if (operators[opern].opernum == LWASM_OPER_NONE)
+	{
+		// unrecognized operator
+		return -1;
+	}
+	
+	// the operator number in question is in opern; i is the length of the
+	// operator string
+	
+	// logic:
+	// if the precedence of this operation is <= to the "prec" flag,
+	// we simply return without advancing the input pointer; the operator
+	// will be evaluated again in the enclosing function call
+	if (operators[opern].operprec <= prec)
+		return 0;
+	
+	// logic:
+	// we have a higher precedence operator here so we will advance the
+	// input pointer to the next term and let the expression evaluator
+	// loose on it after which time we will push our operator onto the
+	// stack and then go on with the expression evaluation
+	(*p) += i;	// advance input pointer
+	
+	// evaluate next expression(s) of higher precedence
+	if (lwasm_expr_parse_expr(s, p, operators[opern].operprec) < 0)
+		return -1;
+	
+	operterm = lwasm_expr_term_create_oper(operators[opern].opernum);
+	lwasm_expr_stack_push(s, operterm);
+	lwasm_expr_term_free(operterm);
+	
+	// return if we are at the end of the expression or a subexpression
+	if (!**p || isspace(**p) || **p == ')')
+		return 0;
+
+	// continue evaluating
+	goto eval_next;	 	
+}
+
+/*
+actually evaluate an expression
+
+This happens in two stages. The first stage merely parses the expression into
+a lwasm_expr_stack_t * which is then evaluated as much as possible before the
+result is returned.
+
+Returns NULL on a parse error or otherwise invalid expression. *outp will
+contain the pointer to the next character after the expression if and only
+if there is no error. In the case of an error, *outp is undefined.
+*/
+lwasm_expr_stack_t *lwasm_expr_eval(const char *inp, const char **outp)
+{
+	lwasm_expr_stack_t *s;
+	const char *p;
+	
+	// actually parse the expression
+	p = inp;
+	s = lwasm_expr_stack_create();
+	if (lwasm_expr_parse_expr(s, &p, 0) < 0)
+		goto cleanup_error;
+	
+	// save end of expression
+	if (outp)
+		(*outp) = p;
+	
+	// return potentially partial expression
+	if (lwasm_expr_reval(s) < 0)
+		goto cleanup_error;
+	
+	return s;
+
+cleanup_error:
+	lwasm_expr_stack_free(s);
+	return NULL;
+}
+
+/*
+take an expression stack s and scan for operations that can be completed
+
+return -1 on error, 0 on no error
+
+possible errors are: division by zero or unknown operator
+
+theory of operation:
+
+scan the stack for an operator which has two constants preceding it (binary)
+or 1 constant preceding it (unary) and if found, perform the calculation
+and replace the operator and its operands with the result
+
+repeat the scan until no futher simplications are found or if there are no
+further operators or only a single term remains
+
+*/
+int lwasm_expr_reval(lwasm_expr_stack_t *s)
+{
+	lwasm_expr_stack_node_t *n;
+
+next_iter:	
+	// a single term
+	if (s -> head == s -> tail)
+		return 0;
+	
+	// search for an operator
+	for (n = s -> head; n; n = n -> next)
+	{
+		if (n -> term -> term_type == LWASM_TERM_OPER)
+		{
+			if (n -> term -> value == LWASM_OPER_NEG
+				|| n -> term -> value == LWASM_OPER_COM
+				)
+			{
+				// unary operator
+				if (n -> prev && n -> prev -> term -> term_type == LWASM_TERM_INT)
+				{
+					// a unary operator we can resolve
+					// we do the op then remove the term "n" is pointing at
+					if (n -> term -> value == LWASM_OPER_NEG)
+					{
+						n -> prev -> term -> value = -(n -> prev -> term -> value);
+					}
+					else if (n -> term -> value == LWASM_OPER_COM)
+					{
+						n -> prev -> term -> value = ~(n -> prev -> term -> value);
+					}
+					n -> prev -> next = n -> next;
+					if (n -> next)
+						n -> next -> prev = n -> prev;
+					else
+						s -> tail = n -> prev;	
+					
+					lwasm_expr_term_free(n -> term);
+					lwasm_free(n);
+					break;
+				}
+			}
+			else
+			{
+				// binary operator
+				if (n -> prev && n -> prev -> prev && n -> prev -> term -> term_type == LWASM_TERM_INT && n -> prev -> prev -> term -> term_type == LWASM_TERM_INT)
+				{
+					// a binary operator we can resolve
+					switch (n -> term -> value)
+					{
+					case LWASM_OPER_PLUS:
+						n -> prev -> prev -> term -> value += n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_MINUS:
+						n -> prev -> prev -> term -> value -= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_TIMES:
+						n -> prev -> prev -> term -> value *= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_DIVIDE:
+						if (n -> prev -> term -> value == 0)
+							return -1;
+						n -> prev -> prev -> term -> value /= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_MOD:
+						if (n -> prev -> term -> value == 0)
+							return -1;
+						n -> prev -> prev -> term -> value %= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_INTDIV:
+						if (n -> prev -> term -> value == 0)
+							return -1;
+						n -> prev -> prev -> term -> value /= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_BWAND:
+						n -> prev -> prev -> term -> value &= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_BWOR:
+						n -> prev -> prev -> term -> value |= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_BWXOR:
+						n -> prev -> prev -> term -> value ^= n -> prev -> term -> value;
+						break;
+
+					case LWASM_OPER_AND:
+						n -> prev -> prev -> term -> value = (n -> prev -> term -> value && n -> prev -> prev -> term -> value) ? 1 : 0;
+						break;
+
+					case LWASM_OPER_OR:
+						n -> prev -> prev -> term -> value = (n -> prev -> term -> value || n -> prev -> prev -> term -> value) ? 1 : 0;
+						break;
+
+					default:
+						// return error if unknown operator!
+						return -1;
+					}
+
+					// now remove the two unneeded entries from the stack
+					n -> prev -> prev -> next = n -> next;
+					if (n -> next)
+						n -> next -> prev = n -> prev -> prev;
+					else
+						s -> tail = n -> prev -> prev;	
+					
+					lwasm_expr_term_free(n -> term);
+					lwasm_expr_term_free(n -> prev -> term);
+					lwasm_free(n -> prev);
+					lwasm_free(n);
+					break;
+				}
+			}
+		}
+	}
+	// note for the terminally confused about dynamic memory and pointers:
+	// n will not be NULL even after the lwasm_free calls above so
+	// this test will still work (n will be a dangling pointer)
+	// (n will only be NULL if we didn't find any operators to simplify)
+	if (n)
+		goto next_iter;
+	
+	return 0;
+}
--- a/src/expr.h	Thu Jan 01 02:26:26 2009 +0000
+++ b/src/expr.h	Thu Jan 01 23:55:18 2009 +0000
@@ -44,7 +44,7 @@
 #define LWASM_OPER_TIMES	3	// *
 #define LWASM_OPER_DIVIDE	4	// /
 #define LWASM_OPER_MOD		5	// %
-#define LWASM_OPER_INTDIV	6	// \
+#define LWASM_OPER_INTDIV	6	// \ (don't end line with \)
 #define LWASM_OPER_BWAND	7	// bitwise AND
 #define LWASM_OPER_BWOR		8	// bitwise OR
 #define LWASM_OPER_BWXOR	9	// bitwise XOR
@@ -89,6 +89,29 @@
 __expr_E__ void lwasm_expr_stack_push(lwasm_expr_stack_t *s, lwasm_expr_term_t *t);
 __expr_E__ lwasm_expr_term_t *lwasm_expr_stack_pop(lwasm_expr_stack_t *s);
 
+/*
+Evaluate an expression. The result is an lwasm_expr_stack_t pointer. If the
+expression evaluates to a constant result, the stack will contain exactly one
+value which will be a constant. Otherwise, the stack will contain the
+expression with all operations that can be evaluated completely evaluated.
+You must call lwasm_expr_stack_free() on the result when you are finished
+with it.
+*/
+__expr_E__ lwasm_expr_stack_t *lwasm_expr_eval(const char *inp, const char **outp);
+
+// simplify expression
+__expr_E__ int lwasm_expr_reval(lwasm_expr_stack_t *s);
+
+// useful macros
+// is the expression "simple" (one term)?
+#define lwasm_expr_is_simple(s) ((s) -> head == (s) -> tail)
+
+// is the expression constant?
+#define lwasm_expr_is_constant(s) (lwasm_expr_is_simple(s) && (s) -> head -> term -> term_type == LWASM_TERM_INT)
+
+// get the constant value of an expression or 0 if not constant
+#define lwasm_expr_get_value(s) (lwasm_expr_is_constant(s) ? (s) -> head -> term -> value : 0)
+
 #undef __expr_E__
 
 #endif // __expr_h_seen__