# HG changeset patch # User William Astle # Date 1444877381 21600 # Node ID b20f14edda5a45506ed7bb2e71375ecb857ecc0c # Parent bbe5401a9bf34f7eb1e48287a7798d1b66b74dc1 Completed initial conversion to new parser allowing spaces in operands Converted the remaining addressing modes. This required a complete rewrite of a large portion of the indexed addressing parser. Now the entire indexed parsing system is programmatic without cheating with a lookup table. This update also fixes the "force 0,r" by writing a literal 0,r which is *supposed* to work. There will likely be some pseudo ops that need tweaking for space handling, specially those that take multiple operands of some description which are not expressions. (The expression parser call eats the spaces both before and after the expression, if appropriate.) diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_bitbit.c --- a/lwasm/insn_bitbit.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_bitbit.c Wed Oct 14 20:49:41 2015 -0600 @@ -49,7 +49,7 @@ lwasm_register_error(as, l, E_REGISTER_BAD); return; } - + lwasm_skip_to_next_token(l, p); if (*(*p)++ != ',') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -81,7 +81,7 @@ lwasm_register_error(as, l, E_OPERAND_BAD); return; } - + lwasm_skip_to_next_token(l, p); // ignore base page address modifier if (**p == '<') (*p)++; diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_gen.c --- a/lwasm/insn_gen.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_gen.c Wed Oct 14 20:49:41 2015 -0600 @@ -37,36 +37,35 @@ // "extra" is required due to the way OIM, EIM, TIM, and AIM work void insn_parse_gen_aux(asmstate_t *as, line_t *l, char **p, int elen) { - const char *optr2; + char *optr2; int v1, tv; lw_expr_t s; - + if (!**p) { lwasm_register_error(as, l, E_OPERAND_BAD); return; } - optr2 = *p; - while (*optr2 && !isspace(*optr2) && *optr2 != ',') optr2++ - /* do nothing */ ; - - if (*optr2 == ',' || **p == '[') + /* this is the easy case - start it [ or , means indexed */ + if (**p == ',' || **p == '[') { +indexed: l -> lint = -1; - l -> lint2 = 1; + l -> lint2 = 1; insn_parse_indexed_aux(as, l, p); l -> minlen = OPLEN(instab[l -> insn].ops[1]) + 1 + elen; l -> maxlen = OPLEN(instab[l -> insn].ops[1]) + 3 + elen; goto out; } + /* we have to parse the first expression to find if we have a comma after it */ + optr2 = *p; if (**p == '<') { (*p)++; l -> lint2 = 0; } - // for compatibility with asxxxx // * followed by a digit, alpha, or _, or ., or ?, or another * is "f8" else if (**p == '*') @@ -87,10 +86,17 @@ { l -> lint2 = -1; } - - l -> minlen = OPLEN(instab[l -> insn].ops[0]) + 1 + elen; - l -> maxlen = OPLEN(instab[l -> insn].ops[2]) + 2 + elen; + lwasm_skip_to_next_token(l, p); + s = lwasm_parse_expr(as, p); + + if (**p == ',') + { + /* we have an indexed mode here - reset and transfer control to indexing mode */ + lw_expr_destroy(s); + *p = optr2; + goto indexed; + } if (!s) { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -99,6 +105,8 @@ lwasm_save_expr(l, 0, s); + l -> minlen = OPLEN(instab[l -> insn].ops[0]) + 1 + elen; + l -> maxlen = OPLEN(instab[l -> insn].ops[2]) + 2 + elen; if (as -> output_format == OUTPUT_OBJ && l -> lint2 == -1) { l -> lint2 = 2; diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_indexed.c --- a/lwasm/insn_indexed.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_indexed.c Wed Oct 14 20:49:41 2015 -0600 @@ -38,147 +38,250 @@ */ void insn_parse_indexed_aux(asmstate_t *as, line_t *l, char **p) { - struct opvals { char *opstr; int pb; }; - - static const char *regs = "X Y U S W PCRPC "; - static const struct opvals simpleindex[] = - { - {",x", 0x84}, {",y", 0xa4}, {",u", 0xc4}, {",s", 0xe4}, - {",x+", 0x80}, {",y+", 0xa0}, {",u+", 0xc0}, {",s+", 0xe0}, - {",x++", 0x81}, {",y++", 0xa1}, {",u++", 0xc1}, {",s++", 0xe1}, - {",-x", 0x82}, {",-y", 0xa2}, {",-u", 0xc2}, {",-s", 0xe2}, - {",--x", 0x83}, {",--y", 0xa3}, {",--u", 0xc3}, {",--s", 0xe3}, - {"a,x", 0x86}, {"a,y", 0xa6}, {"a,u", 0xc6}, {"a,s", 0xe6}, - {"b,x", 0x85}, {"b,y", 0xa5}, {"b,u", 0xc5}, {"b,s", 0xe5}, - {"e,x", 0x87}, {"e,y", 0xa7}, {"e,u", 0xc7}, {"e,s", 0xe7}, - {"f,x", 0x8a}, {"f,y", 0xaa}, {"f,u", 0xca}, {"f,s", 0xea}, - {"d,x", 0x8b}, {"d,y", 0xab}, {"d,u", 0xcb}, {"d,s", 0xeb}, - {"w,x", 0x8e}, {"w,y", 0xae}, {"w,u", 0xce}, {"w,s", 0xee}, - {",w", 0x8f}, {",w++", 0xcf}, {",--w", 0xef}, - - {"[,x]", 0x94}, {"[,y]", 0xb4}, {"[,u]", 0xd4}, {"[,s]", 0xf4}, - {"[,x++]", 0x91}, {"[,y++]", 0xb1}, {"[,u++]", 0xd1}, {"[,s++]", 0xf1}, - {"[,--x]", 0x93}, {"[,--y]", 0xb3}, {"[,--u]", 0xd3}, {"[,--s]", 0xf3}, - {"[a,x]", 0x96}, {"[a,y]", 0xb6}, {"[a,u]", 0xd6}, {"[a,s]", 0xf6}, - {"[b,x]", 0x95}, {"[b,y]", 0xb5}, {"[b,u]", 0xd5}, {"[b,s]", 0xf5}, - {"[e,x]", 0x97}, {"[e,y]", 0xb7}, {"[e,u]", 0xd7}, {"[e,s]", 0xf7}, - {"[f,x]", 0x9a}, {"[f,y]", 0xba}, {"[f,u]", 0xda}, {"[f,s]", 0xfa}, - {"[d,x]", 0x9b}, {"[d,y]", 0xbb}, {"[d,u]", 0xdb}, {"[d,s]", 0xfb}, - {"[w,x]", 0x9e}, {"[w,y]", 0xbe}, {"[w,u]", 0xde}, {"[w,s]", 0xfe}, - {"[,w]", 0x90}, {"[,w++]", 0xd0}, {"[,--w]", 0xf0}, - - { "", -1 } - }; - static const char *regs9 = "X Y U S PCRPC "; - static const struct opvals simpleindex9[] = - { - {",x", 0x84}, {",y", 0xa4}, {",u", 0xc4}, {",s", 0xe4}, - {",x+", 0x80}, {",y+", 0xa0}, {",u+", 0xc0}, {",s+", 0xe0}, - {",x++", 0x81}, {",y++", 0xa1}, {",u++", 0xc1}, {",s++", 0xe1}, - {",-x", 0x82}, {",-y", 0xa2}, {",-u", 0xc2}, {",-s", 0xe2}, - {",--x", 0x83}, {",--y", 0xa3}, {",--u", 0xc3}, {",--s", 0xe3}, - {"a,x", 0x86}, {"a,y", 0xa6}, {"a,u", 0xc6}, {"a,s", 0xe6}, - {"b,x", 0x85}, {"b,y", 0xa5}, {"b,u", 0xc5}, {"b,s", 0xe5}, - {"d,x", 0x8b}, {"d,y", 0xab}, {"d,u", 0xcb}, {"d,s", 0xeb}, - - {"[,x]", 0x94}, {"[,y]", 0xb4}, {"[,u]", 0xd4}, {"[,s]", 0xf4}, - {"[,x++]", 0x91}, {"[,y++]", 0xb1}, {"[,u++]", 0xd1}, {"[,s++]", 0xf1}, - {"[,--x]", 0x93}, {"[,--y]", 0xb3}, {"[,--u]", 0xd3}, {"[,--s]", 0xf3}, - {"[a,x]", 0x96}, {"[a,y]", 0xb6}, {"[a,u]", 0xd6}, {"[a,s]", 0xf6}, - {"[b,x]", 0x95}, {"[b,y]", 0xb5}, {"[b,u]", 0xd5}, {"[b,s]", 0xf5}, - {"[d,x]", 0x9b}, {"[d,y]", 0xbb}, {"[d,u]", 0xdb}, {"[d,s]", 0xfb}, - - { "", -1 } - }; - char stbuf[25]; - int i, j, rn; + static const char *regs = "X Y U S W PCRPC "; + int i, rn; int indir = 0; - int f0 = 1; - const struct opvals *simples; + int f0 = 0; const char *reglist; lw_expr_t e; - + char *tstr; + + if (CURPRAGMA(l, PRAGMA_6809)) { - simples = simpleindex9; reglist = regs9; } else { - simples = simpleindex; reglist = regs; } - - // fetch out operand for lookup - for (i = 0; i < 24; i++) - { - if (*((*p) + i) && !isspace(*((*p) + i))) - stbuf[i] = *((*p) + i); - else - break; - } - stbuf[i] = '\0'; - - // now look up operand in "simple" table - if (!*((*p) + i) || isspace(*((*p) + i))) - { - // do simple lookup - for (j = 0; simples[j].opstr[0]; j++) - { - if (!strcasecmp(stbuf, simples[j].opstr)) - break; - } - if (simples[j].opstr[0]) - { - l -> pb = simples[j].pb; - l -> lint = 0; - (*p) += i; - return; - } - } - - // now do the "hard" ones - // is it indirect? if (**p == '[') { indir = 1; (*p)++; } - - // look for a "," - all indexed modes have a "," except extended indir - rn = 0; - for (i = 0; (*p)[i] && !isspace((*p)[i]); i++) + lwasm_skip_to_next_token(l, p); + if (**p == ',') { - if ((*p)[i] == ',') + int incdec = 0; + /* we have a pre-dec, post-inc, or no offset mode here */ + (*p)++; + lwasm_skip_to_next_token(l, p); + if (**p == '-') { + incdec = -1; + (*p)++; + if (**p == '-') + { + incdec = -2; + (*p)++; + } + lwasm_skip_to_next_token(l, p); + } + /* allowed registers: X, Y, U, S, or W (6309) */ + switch (**p) + { + case 'x': + case 'X': + rn = 0; + break; + + case 'y': + case 'Y': rn = 1; break; - } - } - - // if no "," and indirect, do extended indir - if (!rn && indir) - { - // eat the extended addressing indicator if present - if (**p == '>') - (*p)++; - // extended indir - l -> pb = 0x9f; - e = lwasm_parse_expr(as, p); - if (!e || **p != ']') - { + + case 'u': + case 'U': + rn = 2; + break; + + case 's': + case 'S': + rn = 3; + break; + + case 'w': + case 'W': + if (CURPRAGMA(l, PRAGMA_6809)) + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + rn = 4; + break; + + default: lwasm_register_error(as, l, E_OPERAND_BAD); return; } - lwasm_save_expr(l, 0, e); - (*p)++; - l -> lint = 2; + lwasm_skip_to_next_token(l, p); + if (**p == '+') + { + if (incdec != 0) + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + incdec = 1; + (*p)++; + if (**p == '+') + { + incdec = 2; + (*p)++; + } + lwasm_skip_to_next_token(l, p); + } + if (indir) + { + if (**p != ']') + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + } + if (indir || rn == 4) + { + if (incdec == 1 || incdec == -1) + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + } + if (rn == 4) + { + if (indir) + { + if (incdec == 0) + i = 0x90; + else if (incdec == -2) + i = 0xF0; + else + i = 0xD0; + } + else + { + if (incdec == 0) + i = 0x8F; + else if (incdec == -2) + i = 0xEF; + else + i = 0xCF; + } + } + else + { + switch (incdec) + { + case 0: + i = 0x84; + break; + case 1: + i = 0x80; + break; + case 2: + i = 0x81; + break; + case -1: + i = 0x82; + break; + case -2: + i = 0x83; + break; + } + i = (rn << 5) | i | (indir << 4); + } + l -> pb = i; + l -> lint = 0; return; } - + i = toupper(**p); + if ( + (i == 'A' || i == 'B' || i == 'D') || + (!CURPRAGMA(l, PRAGMA_6809) && (i == 'E' || i == 'F' || i == 'W')) + ) + { + tstr = *p + 1; + lwasm_skip_to_next_token(l, &tstr); + if (*tstr == ',') + { + *p = tstr + 1; + lwasm_skip_to_next_token(l, p); + switch (**p) + { + case 'x': + case 'X': + rn = 0; + break; + + case 'y': + case 'Y': + rn = 1; + break; + + case 'u': + case 'U': + rn = 2; + break; + + case 's': + case 'S': + rn = 3; + break; + + default: + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + lwasm_skip_to_next_token(l, p); + if (indir) + { + if (**p != ']') + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + } + + switch (i) + { + case 'A': + i = 0x86; + break; + + case 'B': + i = 0x85; + break; + + case 'D': + i = 0x8B; + break; + + case 'E': + i = 0x87; + break; + + case 'F': + i = 0x8A; + break; + + case 'W': + i = 0x8E; + break; + } + l -> pb = i | (indir << 4) | (rn << 5); + l -> lint = 0; + return; + } + } + + /* we have the "expression" types now */ if (**p == '<') { l -> lint = 1; @@ -189,12 +292,17 @@ l -> lint = 2; (*p)++; } - - if (**p == '0' && *((*p)+1) == ',') + lwasm_skip_to_next_token(l, p); + if (**p == '0') { - f0 = 1; + tstr = *p + 1; + lwasm_skip_to_next_token(l, &tstr); + if (*tstr == ',') + { + f0 = 1; + } } - + // now we have to evaluate the expression e = lwasm_parse_expr(as, p); if (!e) @@ -203,14 +311,22 @@ return; } lwasm_save_expr(l, 0, e); - - // now look for a comma; if not present, explode - if (*(*p)++ != ',') + + if (**p != ',') { - lwasm_register_error(as, l, E_OPERAND_BAD); + /* if no comma, we have extended indirect */ + if (l -> lint == 1 || **p != ']') + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + l -> lint = 2; + l -> pb = 0x9F; return; } - + (*p)++; + lwasm_skip_to_next_token(l, p); // now get the register rn = lwasm_lookupreg3(reglist, p); if (rn < 0) @@ -462,7 +578,8 @@ { // we know how big it is v = lw_expr_intval(e); - if (v == 0 && !CURPRAGMA(l, PRAGMA_NOINDEX0TONONE) && (l -> pb & 0x07) <= 4) + + if (v == 0 && !CURPRAGMA(l, PRAGMA_NOINDEX0TONONE) && (l -> pb & 0x07) <= 4 && ((l -> pb & 0x40) == 0)) { if ((l -> pb & 0x07) < 4) { diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_logicmem.c --- a/lwasm/insn_logicmem.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_logicmem.c Wed Oct 14 20:49:41 2015 -0600 @@ -50,6 +50,7 @@ } lwasm_save_expr(l, 100, s); + lwasm_skip_to_next_token(l, p); if (**p != ',' && **p != ';') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -57,7 +58,7 @@ } (*p)++; - + lwasm_skip_to_next_token(l, p); // now we have a general addressing mode - call for it insn_parse_gen_aux(as, l, p, 1); } diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_rlist.c --- a/lwasm/insn_rlist.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_rlist.c Wed Oct 14 20:49:41 2015 -0600 @@ -41,12 +41,16 @@ lwasm_register_error2(as, l, E_REGISTER_BAD, "'%s'", *p); return; } + lwasm_skip_to_next_token(l, p); if (**p && **p != ',' && !isspace(**p)) { lwasm_register_error(as, l, E_OPERAND_BAD); } if (**p == ',') + { (*p)++; + lwasm_skip_to_next_token(l, p); + } if ((instab[l -> insn].ops[0]) & 2) { // pshu/pulu diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_rtor.c --- a/lwasm/insn_rtor.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_rtor.c Wed Oct 14 20:49:41 2015 -0600 @@ -35,6 +35,7 @@ // A,B,CC,DP,0,0,E,F r0 = lwasm_lookupreg2(!CURPRAGMA(l, PRAGMA_6809) ? regs : regs9, p); + lwasm_skip_to_next_token(l, p); if (r0 < 0 || *(*p)++ != ',') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -42,6 +43,7 @@ } else { + lwasm_skip_to_next_token(l, p); r1 = lwasm_lookupreg2(!CURPRAGMA(l, PRAGMA_6809) ? regs : regs9, p); if (r1 < 0) { diff -r bbe5401a9bf3 -r b20f14edda5a lwasm/insn_tfm.c --- a/lwasm/insn_tfm.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_tfm.c Wed Oct 14 20:49:41 2015 -0600 @@ -48,11 +48,13 @@ (*p)++; tfm = 2; } + lwasm_skip_to_next_token(l, p); if (*(*p)++ != ',') { lwasm_register_error(as, l, E_UNKNOWN_OPERATION); return; } + lwasm_skip_to_next_token(l, p); c = strchr(reglist, toupper(*(*p)++)); if (!c) { @@ -131,6 +133,7 @@ // D,X,Y,U,S,PC,W,V // A,B,CC,DP,0,0,E,F r0 = lwasm_lookupreg2(regs, p); + lwasm_skip_to_next_token(l, p); if (r0 < 0 || *(*p)++ != ',') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -138,6 +141,7 @@ } else { + lwasm_skip_to_next_token(l, p); r1 = lwasm_lookupreg2(regs, p); if (r1 < 0) {