changeset 402:b20f14edda5a

Completed initial conversion to new parser allowing spaces in operands Converted the remaining addressing modes. This required a complete rewrite of a large portion of the indexed addressing parser. Now the entire indexed parsing system is programmatic without cheating with a lookup table. This update also fixes the "force 0,r" by writing a literal 0,r which is *supposed* to work. There will likely be some pseudo ops that need tweaking for space handling, specially those that take multiple operands of some description which are not expressions. (The expression parser call eats the spaces both before and after the expression, if appropriate.)
author William Astle <lost@l-w.ca>
date Wed, 14 Oct 2015 20:49:41 -0600
parents bbe5401a9bf3
children f5a88f147fae
files lwasm/insn_bitbit.c lwasm/insn_gen.c lwasm/insn_indexed.c lwasm/insn_logicmem.c lwasm/insn_rlist.c lwasm/insn_rtor.c lwasm/insn_tfm.c
diffstat 7 files changed, 274 insertions(+), 138 deletions(-) [+]
line wrap: on
line diff
--- a/lwasm/insn_bitbit.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_bitbit.c	Wed Oct 14 20:49:41 2015 -0600
@@ -49,7 +49,7 @@
 		lwasm_register_error(as, l, E_REGISTER_BAD);
 		return;
 	}
-	
+	lwasm_skip_to_next_token(l, p);
 	if (*(*p)++ != ',')
 	{
 		lwasm_register_error(as, l, E_OPERAND_BAD);
@@ -81,7 +81,7 @@
 		lwasm_register_error(as, l, E_OPERAND_BAD);
 		return;
 	}
-
+	lwasm_skip_to_next_token(l, p);
 	// ignore base page address modifier
 	if (**p == '<')
 		(*p)++;
--- a/lwasm/insn_gen.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_gen.c	Wed Oct 14 20:49:41 2015 -0600
@@ -37,36 +37,35 @@
 // "extra" is required due to the way OIM, EIM, TIM, and AIM work
 void insn_parse_gen_aux(asmstate_t *as, line_t *l, char **p, int elen)
 {
-	const char *optr2;
+	char *optr2;
 	int v1, tv;
 	lw_expr_t s;
-
+	
 	if (!**p)
 	{
 		lwasm_register_error(as, l, E_OPERAND_BAD);
 		return;
 	}
 
-	optr2 = *p;
-	while (*optr2 && !isspace(*optr2) && *optr2 != ',') optr2++
-		/* do nothing */ ;
-
-	if (*optr2 == ',' || **p == '[')
+	/* this is the easy case - start it [ or , means indexed */
+	if (**p == ',' || **p == '[')
 	{
+indexed:
 		l -> lint = -1;
-		l -> lint2 = 1;
+		l -> lint2  = 1;
 		insn_parse_indexed_aux(as, l, p);
 		l -> minlen = OPLEN(instab[l -> insn].ops[1]) + 1 + elen;
 		l -> maxlen = OPLEN(instab[l -> insn].ops[1]) + 3 + elen;
 		goto out;
 	}
 
+	/* we have to parse the first expression to find if we have a comma after it */
+	optr2 = *p;
 	if (**p == '<')
 	{
 		(*p)++;
 		l -> lint2 = 0;
 	}
-
 	// for compatibility with asxxxx
 	// * followed by a digit, alpha, or _, or ., or ?, or another * is "f8"
 	else if (**p == '*')
@@ -87,10 +86,17 @@
 	{
 		l -> lint2 = -1;
 	}
-
-	l -> minlen = OPLEN(instab[l -> insn].ops[0]) + 1 + elen;
-	l -> maxlen = OPLEN(instab[l -> insn].ops[2]) + 2 + elen;
+	lwasm_skip_to_next_token(l, p);
+	
 	s = lwasm_parse_expr(as, p);
+	
+	if (**p == ',')
+	{
+		/* we have an indexed mode here - reset and transfer control to indexing mode */
+		lw_expr_destroy(s);
+		*p = optr2;
+		goto indexed;
+	}
 	if (!s)
 	{
 		lwasm_register_error(as, l, E_OPERAND_BAD);
@@ -99,6 +105,8 @@
 	
 	lwasm_save_expr(l, 0, s);
 
+	l -> minlen = OPLEN(instab[l -> insn].ops[0]) + 1 + elen;
+	l -> maxlen = OPLEN(instab[l -> insn].ops[2]) + 2 + elen;
 	if (as -> output_format == OUTPUT_OBJ && l -> lint2 == -1)
 	{
 		l -> lint2 = 2;
--- a/lwasm/insn_indexed.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_indexed.c	Wed Oct 14 20:49:41 2015 -0600
@@ -38,147 +38,250 @@
 */
 void insn_parse_indexed_aux(asmstate_t *as, line_t *l, char **p)
 {
-	struct opvals { char *opstr; int pb; };
-	
-	static const char *regs = "X  Y  U  S  W  PCRPC ";
-	static const struct opvals simpleindex[] =
-	{
-		{",x", 0x84},		{",y", 0xa4},		{",u", 0xc4},		{",s", 0xe4},
-		{",x+", 0x80},		{",y+", 0xa0},		{",u+", 0xc0},		{",s+", 0xe0},
-		{",x++", 0x81},		{",y++", 0xa1},		{",u++", 0xc1},		{",s++", 0xe1},
-		{",-x", 0x82},		{",-y", 0xa2},		{",-u", 0xc2},		{",-s", 0xe2},
-		{",--x", 0x83},		{",--y", 0xa3},		{",--u", 0xc3},		{",--s", 0xe3},
-		{"a,x", 0x86},		{"a,y", 0xa6},		{"a,u", 0xc6},		{"a,s", 0xe6},
-		{"b,x", 0x85},		{"b,y", 0xa5},		{"b,u", 0xc5},		{"b,s", 0xe5},
-		{"e,x", 0x87},		{"e,y", 0xa7},		{"e,u", 0xc7},		{"e,s", 0xe7},
-		{"f,x",	0x8a},		{"f,y",	0xaa},		{"f,u", 0xca},		{"f,s", 0xea},
-		{"d,x", 0x8b},		{"d,y", 0xab},		{"d,u", 0xcb},		{"d,s", 0xeb},
-		{"w,x", 0x8e},		{"w,y", 0xae},		{"w,u", 0xce},		{"w,s", 0xee},
-		{",w", 0x8f},							{",w++", 0xcf},		{",--w", 0xef},
-		
-		{"[,x]", 0x94},		{"[,y]", 0xb4},		{"[,u]", 0xd4},		{"[,s]", 0xf4},
-		{"[,x++]", 0x91},	{"[,y++]", 0xb1},	{"[,u++]", 0xd1},	{"[,s++]", 0xf1},
-		{"[,--x]", 0x93},	{"[,--y]", 0xb3},	{"[,--u]", 0xd3},	{"[,--s]", 0xf3},
-		{"[a,x]", 0x96},	{"[a,y]", 0xb6},	{"[a,u]", 0xd6},	{"[a,s]", 0xf6},
-		{"[b,x]", 0x95},	{"[b,y]", 0xb5},	{"[b,u]", 0xd5},	{"[b,s]", 0xf5},
-		{"[e,x]", 0x97},	{"[e,y]", 0xb7},	{"[e,u]", 0xd7},	{"[e,s]", 0xf7},
-		{"[f,x]", 0x9a},	{"[f,y]", 0xba},	{"[f,u]", 0xda},	{"[f,s]", 0xfa},
-		{"[d,x]", 0x9b},	{"[d,y]", 0xbb},	{"[d,u]", 0xdb},	{"[d,s]", 0xfb},
-		{"[w,x]", 0x9e},	{"[w,y]", 0xbe},	{"[w,u]", 0xde},	{"[w,s]", 0xfe},
-		{"[,w]", 0x90},							{"[,w++]", 0xd0},	{"[,--w]", 0xf0},
-		
-		{ "", -1 }
-	};
-
 	static const char *regs9 = "X  Y  U  S     PCRPC ";
-	static const struct opvals simpleindex9[] =
-	{
-		{",x", 0x84},		{",y", 0xa4},		{",u", 0xc4},		{",s", 0xe4},
-		{",x+", 0x80},		{",y+", 0xa0},		{",u+", 0xc0},		{",s+", 0xe0},
-		{",x++", 0x81},		{",y++", 0xa1},		{",u++", 0xc1},		{",s++", 0xe1},
-		{",-x", 0x82},		{",-y", 0xa2},		{",-u", 0xc2},		{",-s", 0xe2},
-		{",--x", 0x83},		{",--y", 0xa3},		{",--u", 0xc3},		{",--s", 0xe3},
-		{"a,x", 0x86},		{"a,y", 0xa6},		{"a,u", 0xc6},		{"a,s", 0xe6},
-		{"b,x", 0x85},		{"b,y", 0xa5},		{"b,u", 0xc5},		{"b,s", 0xe5},
-		{"d,x", 0x8b},		{"d,y", 0xab},		{"d,u", 0xcb},		{"d,s", 0xeb},
-		
-		{"[,x]", 0x94},		{"[,y]", 0xb4},		{"[,u]", 0xd4},		{"[,s]", 0xf4},
-		{"[,x++]", 0x91},	{"[,y++]", 0xb1},	{"[,u++]", 0xd1},	{"[,s++]", 0xf1},
-		{"[,--x]", 0x93},	{"[,--y]", 0xb3},	{"[,--u]", 0xd3},	{"[,--s]", 0xf3},
-		{"[a,x]", 0x96},	{"[a,y]", 0xb6},	{"[a,u]", 0xd6},	{"[a,s]", 0xf6},
-		{"[b,x]", 0x95},	{"[b,y]", 0xb5},	{"[b,u]", 0xd5},	{"[b,s]", 0xf5},
-		{"[d,x]", 0x9b},	{"[d,y]", 0xbb},	{"[d,u]", 0xdb},	{"[d,s]", 0xfb},
-		
-		{ "", -1 }
-	};
-	char stbuf[25];
-	int i, j, rn;
+	static const char *regs  = "X  Y  U  S  W  PCRPC ";
+	int i, rn;
 	int indir = 0;
-	int f0 = 1;
-	const struct opvals *simples;
+	int f0 = 0;
 	const char *reglist;
 	lw_expr_t e;
-		
+	char *tstr;
+	
+
 	if (CURPRAGMA(l, PRAGMA_6809))
 	{
-		simples = simpleindex9;
 		reglist = regs9;
 	}
 	else
 	{
-		simples = simpleindex;
 		reglist = regs;
 	}
-	
-	// fetch out operand for lookup
-	for (i = 0; i < 24; i++)
-	{
-		if (*((*p) + i) && !isspace(*((*p) + i)))
-			stbuf[i] = *((*p) + i);
-		else
-			break;
-	}
-	stbuf[i] = '\0';
-	
-	// now look up operand in "simple" table
-	if (!*((*p) + i) || isspace(*((*p) + i)))
-	{
-		// do simple lookup
-		for (j = 0; simples[j].opstr[0]; j++)
-		{
-			if (!strcasecmp(stbuf, simples[j].opstr))
-				break;
-		}
-		if (simples[j].opstr[0])
-		{
-			l -> pb = simples[j].pb;
-			l -> lint = 0;
-			(*p) += i;
-			return;
-		}
-	}
-
-	// now do the "hard" ones
-
 	// is it indirect?
 	if (**p == '[')
 	{
 		indir = 1;
 		(*p)++;
 	}
-	
-	// look for a "," - all indexed modes have a "," except extended indir
-	rn = 0;
-	for (i = 0; (*p)[i] && !isspace((*p)[i]); i++)
+	lwasm_skip_to_next_token(l, p);
+	if (**p == ',')
 	{
-		if ((*p)[i] == ',')
+		int incdec = 0;
+		/* we have a pre-dec, post-inc, or no offset mode here */
+		(*p)++;
+		lwasm_skip_to_next_token(l, p);
+		if (**p == '-')
 		{
+			incdec = -1;
+			(*p)++;
+			if (**p == '-')
+			{
+				incdec = -2;
+				(*p)++;
+			}
+			lwasm_skip_to_next_token(l, p);
+		}
+		/* allowed registers: X, Y, U, S, or W (6309) */
+		switch (**p)
+		{
+		case 'x':
+		case 'X':
+			rn = 0;
+			break;
+		
+		case 'y':
+		case 'Y':
 			rn = 1;
 			break;
-		}
-	}
-
-	// if no "," and indirect, do extended indir
-	if (!rn && indir)
-	{
-		// eat the extended addressing indicator if present
-		if (**p == '>')
-			(*p)++;
-		// extended indir
-		l -> pb = 0x9f;
-		e = lwasm_parse_expr(as, p);
-		if (!e || **p != ']')
-		{
+			
+		case 'u':
+		case 'U':
+			rn = 2;
+			break;
+			
+		case 's':
+		case 'S':
+			rn = 3;
+			break;
+			
+		case 'w':
+		case 'W':
+			if (CURPRAGMA(l, PRAGMA_6809))
+			{
+				lwasm_register_error(as, l, E_OPERAND_BAD);
+				return;
+			}
+			rn = 4;
+			break;
+			
+		default:
 			lwasm_register_error(as, l, E_OPERAND_BAD);
 			return;
 		}
-		lwasm_save_expr(l, 0, e);
-		
 		(*p)++;
-		l -> lint = 2;
+		lwasm_skip_to_next_token(l, p);
+		if (**p == '+')
+		{
+			if (incdec != 0)
+			{
+				lwasm_register_error(as, l, E_OPERAND_BAD);
+				return;
+			}
+			incdec = 1;
+			(*p)++;
+			if (**p == '+')
+			{
+				incdec = 2;
+				(*p)++;
+			}
+			lwasm_skip_to_next_token(l, p);
+		}
+		if (indir)
+		{
+			if (**p != ']')
+			{
+				lwasm_register_error(as, l, E_OPERAND_BAD);
+				return;
+			}
+			(*p)++;
+		}
+		if (indir || rn == 4)
+		{
+			if (incdec == 1 || incdec == -1)
+			{
+				lwasm_register_error(as, l, E_OPERAND_BAD);
+				return;
+			}
+		}
+		if (rn == 4)
+		{
+			if (indir)
+			{
+				if (incdec == 0)
+					i = 0x90;
+				else if (incdec == -2)
+					i = 0xF0;
+				else
+					i = 0xD0;
+			}
+			else
+			{
+				if (incdec == 0)
+					i = 0x8F;
+				else if (incdec == -2)
+					i = 0xEF;
+				else
+					i = 0xCF;
+			}
+		}
+		else
+		{
+			switch (incdec)
+			{
+			case 0:
+				i = 0x84;
+				break;
+			case 1:
+				i = 0x80;
+				break;
+			case 2:
+				i = 0x81;
+				break;
+			case -1:
+				i = 0x82;
+				break;
+			case -2:
+				i = 0x83;
+				break;
+			}
+			i = (rn << 5) | i | (indir << 4);
+		}
+		l -> pb = i;
+		l -> lint = 0;
 		return;
 	}
-
+	i = toupper(**p);
+	if (
+			(i == 'A' || i == 'B' || i == 'D') ||
+			(!CURPRAGMA(l, PRAGMA_6809) && (i == 'E' || i == 'F' || i == 'W'))
+	   )
+	{
+		tstr = *p + 1;
+		lwasm_skip_to_next_token(l, &tstr);
+		if (*tstr == ',')
+		{
+			*p = tstr + 1;
+			lwasm_skip_to_next_token(l, p);
+			switch (**p)
+			{
+			case 'x':
+			case 'X':
+				rn = 0;
+				break;
+		
+			case 'y':
+			case 'Y':
+				rn = 1;
+				break;
+			
+			case 'u':
+			case 'U':
+				rn = 2;
+				break;
+			
+			case 's':
+			case 'S':
+				rn = 3;
+				break;
+			
+			default:
+				lwasm_register_error(as, l, E_OPERAND_BAD);
+				return;
+			}
+			(*p)++;
+			lwasm_skip_to_next_token(l, p);
+			if (indir)
+			{
+				if (**p != ']')
+				{
+					lwasm_register_error(as, l, E_OPERAND_BAD);
+					return;
+				}
+				(*p)++;
+			}
+			
+			switch (i)
+			{
+			case 'A':
+				i = 0x86;
+				break;
+			
+			case 'B':
+				i = 0x85;
+				break;
+			
+			case 'D':
+				i = 0x8B;
+				break;
+			
+			case 'E':
+				i = 0x87;
+				break;
+			
+			case 'F':
+				i = 0x8A;
+				break;
+			
+			case 'W':
+				i = 0x8E;
+				break;
+			}
+			l -> pb = i | (indir << 4) | (rn << 5);
+			l -> lint = 0;
+			return;
+		}
+	}
+	
+	/* we have the "expression" types now */
 	if (**p == '<')
 	{
 		l -> lint = 1;
@@ -189,12 +292,17 @@
 		l -> lint = 2;
 		(*p)++;
 	}
-
-	if (**p == '0' && *((*p)+1) == ',')
+	lwasm_skip_to_next_token(l, p);
+	if (**p == '0')
 	{
-		f0 = 1;
+		tstr = *p + 1;
+		lwasm_skip_to_next_token(l, &tstr);
+		if (*tstr == ',')
+		{
+			f0 = 1;
+		}
 	}
-	
+
 	// now we have to evaluate the expression
 	e = lwasm_parse_expr(as, p);
 	if (!e)
@@ -203,14 +311,22 @@
 		return;
 	}
 	lwasm_save_expr(l, 0, e);
-
-	// now look for a comma; if not present, explode
-	if (*(*p)++ != ',')
+	
+	if (**p != ',')
 	{
-		lwasm_register_error(as, l, E_OPERAND_BAD);
+		/* if no comma, we have extended indirect */
+		if (l -> lint == 1 || **p != ']')
+		{
+			lwasm_register_error(as, l, E_OPERAND_BAD);
+			return;
+		}
+		(*p)++;
+		l -> lint = 2;
+		l -> pb = 0x9F;
 		return;
 	}
-	
+	(*p)++;
+	lwasm_skip_to_next_token(l, p);
 	// now get the register
 	rn = lwasm_lookupreg3(reglist, p);
 	if (rn < 0)
@@ -462,7 +578,8 @@
 	{
 		// we know how big it is
 		v = lw_expr_intval(e);
-		if (v == 0 && !CURPRAGMA(l, PRAGMA_NOINDEX0TONONE) && (l -> pb & 0x07) <= 4)
+			
+		if (v == 0 && !CURPRAGMA(l, PRAGMA_NOINDEX0TONONE) && (l -> pb & 0x07) <= 4 && ((l -> pb & 0x40) == 0))
 		{
 			if ((l -> pb & 0x07) < 4)
 			{
--- a/lwasm/insn_logicmem.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_logicmem.c	Wed Oct 14 20:49:41 2015 -0600
@@ -50,6 +50,7 @@
 	}
 	
 	lwasm_save_expr(l, 100, s);
+	lwasm_skip_to_next_token(l, p);
 	if (**p != ',' && **p != ';')
 	{
 		lwasm_register_error(as, l, E_OPERAND_BAD);
@@ -57,7 +58,7 @@
 	}
 	
 	(*p)++;
-
+	lwasm_skip_to_next_token(l, p);
 	// now we have a general addressing mode - call for it
 	insn_parse_gen_aux(as, l, p, 1);
 }
--- a/lwasm/insn_rlist.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_rlist.c	Wed Oct 14 20:49:41 2015 -0600
@@ -41,12 +41,16 @@
 			lwasm_register_error2(as, l, E_REGISTER_BAD, "'%s'", *p);
 			return;
 		}
+		lwasm_skip_to_next_token(l, p);
 		if (**p && **p != ',' && !isspace(**p))
 		{
 			lwasm_register_error(as, l, E_OPERAND_BAD);
 		}
 		if (**p == ',')
+		{
 			(*p)++;
+			lwasm_skip_to_next_token(l, p);
+		}
 		if ((instab[l -> insn].ops[0]) & 2)
 		{
 			// pshu/pulu
--- a/lwasm/insn_rtor.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_rtor.c	Wed Oct 14 20:49:41 2015 -0600
@@ -35,6 +35,7 @@
 	// A,B,CC,DP,0,0,E,F
 
 	r0 = lwasm_lookupreg2(!CURPRAGMA(l, PRAGMA_6809) ? regs : regs9, p);
+	lwasm_skip_to_next_token(l, p);
 	if (r0 < 0 || *(*p)++ != ',')
 	{
 		lwasm_register_error(as, l, E_OPERAND_BAD);
@@ -42,6 +43,7 @@
 	}
 	else
 	{
+		lwasm_skip_to_next_token(l, p);
 		r1 = lwasm_lookupreg2(!CURPRAGMA(l, PRAGMA_6809) ? regs : regs9, p);
 		if (r1 < 0)
 		{
--- a/lwasm/insn_tfm.c	Wed Oct 14 19:15:41 2015 -0600
+++ b/lwasm/insn_tfm.c	Wed Oct 14 20:49:41 2015 -0600
@@ -48,11 +48,13 @@
 		(*p)++;
 		tfm = 2;
 	}
+	lwasm_skip_to_next_token(l, p);
 	if (*(*p)++ != ',')
 	{
 		lwasm_register_error(as, l, E_UNKNOWN_OPERATION);
 		return;
 	}
+	lwasm_skip_to_next_token(l, p);
 	c = strchr(reglist, toupper(*(*p)++));
 	if (!c)
 	{
@@ -131,6 +133,7 @@
 	// D,X,Y,U,S,PC,W,V
 	// A,B,CC,DP,0,0,E,F
 	r0 = lwasm_lookupreg2(regs, p);
+	lwasm_skip_to_next_token(l, p);
 	if (r0 < 0 || *(*p)++ != ',')
 	{
 		lwasm_register_error(as, l, E_OPERAND_BAD);
@@ -138,6 +141,7 @@
 	}
 	else
 	{
+		lwasm_skip_to_next_token(l, p);
 		r1 = lwasm_lookupreg2(regs, p);
 		if (r1 < 0)
 		{