comparison src/lwasm.c @ 0:57495da01900

Initial checking of LWASM
author lost
date Fri, 03 Oct 2008 02:44:20 +0000
parents
children 34568fab6058
comparison
equal deleted inserted replaced
-1:000000000000 0:57495da01900
1 /*
2 * lwasm.c
3 *
4 * main code for lwasm
5 */
6
7 #include <ctype.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #define __lwasm_c_seen__
13 #include "instab.h"
14 #include "lwasm.h"
15
16 void lwasm_read_file(asmstate_t *as, char *fname);
17 extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr);
18 extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr);
19
20 #define debug(mess, ...) do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0)
21
22 void register_error(asmstate_t *as, sourceline_t *cl, int errcode)
23 {
24 errortab_t *e;
25
26 e = malloc(sizeof(errortab_t));
27
28 e -> errnum = errcode;
29 e -> line = cl;
30 e -> next = cl -> errors;
31 cl -> errors = e;
32
33 as -> errorcount++;
34 }
35
36 int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val);
37
38 int eval_min(int v1, int v2, int v3, int v4)
39 {
40 if (v2 < v1)
41 v1 = v2;
42 if (v3 < v1)
43 v1 = v3;
44 if (v4 < v1)
45 v1 = v4;
46 return v1;
47 }
48
49 int eval_max(int v1, int v2, int v3, int v4)
50 {
51 if (v2 > v1)
52 v1 = v2;
53 if (v3 > v1)
54 v1 = v3;
55 if (v4 > v1)
56 v1 = v4;
57 return v1;
58 }
59
60 int lookupreg3(const char *rlist, char **str)
61 {
62 int rval = 0;
63 int f = 0;
64 const char *reglist = rlist;
65
66 while (*reglist)
67 {
68 if (toupper(**str) == *reglist)
69 {
70 // first char matches
71 if (reglist[1] == ' ')
72 {
73 f = 1;
74 break;
75 }
76 if (toupper(*(*str + 1)) == reglist[1])
77 {
78 // second char matches
79 if (reglist[2] == ' ')
80 {
81 f = 1;
82 break;
83 }
84 if (toupper(*(*str + 2)) == reglist[2])
85 {
86 f = 1;
87 break;
88 }
89 }
90 }
91 reglist += 3;
92 rval++;
93 }
94 if (f == 0)
95 return -1;
96
97
98 reglist = rval * 3 + rlist;
99 if (reglist[1] == ' ')
100 (*str) += 1;
101 else if (reglist[2] == ' ')
102 (*str) += 2;
103 else
104 (*str)+=3;
105 return rval;
106 }
107
108
109 int lookupreg(const char *reglist, char **str)
110 {
111 int rval = 0;
112 while (*reglist)
113 {
114 if (toupper(**str) == *reglist)
115 {
116 // first char matches
117 if (reglist[1] == ' ' && !isalpha(*(*str + 1)))
118 break;
119 if (toupper(*(*str + 1)) == reglist[1])
120 break;
121 }
122 reglist += 2;
123 rval++;
124 }
125 if (!*reglist)
126 return -1;
127 if (reglist[1] == ' ')
128 (*str)++;
129 else
130 (*str)+=2;
131 return rval;
132 }
133
134 void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb)
135 {
136 cl -> len += 1;
137 if (as -> passnum != 2)
138 return;
139
140 if (cl -> numcodebytes >= cl -> codesize)
141 {
142 cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32);
143 cl -> codesize += 32;
144 }
145 debug("EMIT: %02x\n", cb & 0xff);
146 cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF;
147 }
148
149 // parse a symble out of the line and return a pointer
150 // to a static pointer
151 // return NULL if not a symbol or a bad symbol
152 char *parse_symbol(asmstate_t *as, char **ptr)
153 {
154 static char *symptr = NULL;
155 char *tptr = *ptr;
156 int sl = 0;
157
158 // symbol can start with _,a-z,A-Z
159
160 if (!strchr(SYMCHAR_START, **ptr))
161 return NULL;
162
163 while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr))
164 {
165 tptr++;
166 sl++;
167 }
168
169 symptr = realloc(symptr, sl + 1);
170 tptr = symptr;
171 while (sl)
172 {
173 *tptr++ = *(*ptr)++;
174 sl--;
175 }
176 *tptr = '\0';
177 return symptr;
178 }
179
180 // resolve an instruction
181 void resolve_insn(asmstate_t *as, sourceline_t *cl)
182 {
183 char *optr;
184 char opbuf[MAX_OP_LEN + 1];
185 char *symbol = NULL;
186 int c;
187
188 cl -> code_symloc = as -> addr;
189
190 cl -> addrset = 0;
191 cl -> isequ = 0;
192 cl -> len = 0;
193 cl -> undef = 0;
194
195 // only parse line on first pass
196 if (as -> passnum == 1)
197 {
198 optr = cl -> line;
199 if (!*optr || *optr == '*' || *optr == ';')
200 {
201 cl -> opcode = -1;
202 cl -> remainder = cl -> line;
203 return;
204 }
205
206 if (!isspace(*optr))
207 {
208 symbol = parse_symbol(as, &optr);
209 if (*optr && !isspace(*optr) && !(as -> inmacro))
210 {
211 errorp1(ERR_BADSYM);
212 while (*optr && !isspace(*optr))
213 optr++;
214 }
215 if (symbol)
216 {
217 cl -> symstr = strdup(symbol);
218 cl -> hassym = 1;
219 }
220 }
221
222 while (isspace(*optr))
223 optr++;
224
225 // parse opcode
226 if (*optr && *optr != ';')
227 {
228 c = 0;
229 while (c < MAX_OP_LEN && *optr && !isspace(*optr))
230 {
231 opbuf[c++] = *optr++;
232 }
233 opbuf[c] = '\0';
234 if (*optr && !isspace(*optr) && !(as -> inmacro))
235 {
236 errorp1(ERR_BADOP);
237 cl -> opcode = -1;
238 }
239 else
240 {
241 cl -> opcstr = strdup(opbuf);
242 for (c = 0; instab[c].opcode; c++)
243 {
244 if (!strcasecmp(opbuf, instab[c].opcode))
245 break;
246 }
247 if (!instab[c].opcode && opbuf[0] == '*')
248 {
249 cl -> opcode = -1;
250 }
251 else if (!instab[c].opcode && !(as -> inmacro))
252 {
253 cl -> opcode = -1;
254
255 // look up macro
256 if (as -> macros)
257 {
258 macrotab_t *m;
259
260 for (m = as -> macros; m; m = m -> next)
261 {
262 if (!strcmp(m -> name, opbuf))
263 break;
264 }
265 if (m)
266 {
267 // we have a macro here
268 cl -> macro = m;
269 while (*optr && isspace(*optr))
270 optr++;
271 expand_macro(as, cl, &optr);
272 return;
273 }
274 else
275 {
276 errorp1(ERR_BADOP);
277 }
278 }
279 else
280 {
281 errorp1(ERR_BADOP);
282 }
283 }
284 else
285 cl -> opcode = c;
286 }
287 }
288 else
289 cl -> opcode = -1;
290
291 if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM)
292 {
293 add_macro_line(as, cl, cl -> line);
294 cl -> opcode = -1;
295 cl -> remainder = cl -> line;
296 cl -> opcstr = NULL;
297 cl -> operstr = NULL;
298 cl -> symstr = NULL;
299 cl -> hassym = 0;
300 cl -> macrodef = 1;
301 return;
302 }
303 // parse operand
304 while (*optr && isspace(*optr))
305 optr++;
306
307 cl -> operstr = optr;
308 }
309 else
310 optr = cl -> operstr;
311
312 if (as -> skipcond)
313 {
314 // if skipping a condition, need to skip a macro
315 if (cl -> opcode >= 0)
316 {
317 if (instab[cl -> opcode].specialnum == SPECIAL_MACRO)
318 {
319 as -> skipmacro = 1;
320 }
321 else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM)
322 {
323 as -> skipmacro = 0;
324 }
325 else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro))
326 {
327 as -> skipcount++;
328 }
329 else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro))
330 {
331 as -> skipcount--;
332 if (as -> skipcount <= 0)
333 {
334 as -> skipcond = 0;
335 as -> noelse = 0;
336 }
337 }
338 else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro))
339 {
340 if (as -> skipcount == 1)
341 {
342 as -> skipcount = 0;
343 as -> skipcond = 0;
344 as -> noelse = 1;
345 return;
346 }
347 }
348 }
349 if (as -> skipcond)
350 cl -> skipped = 1;
351 return;
352 }
353
354 // do the code thing
355 // on pass 1, no code is generated
356 // on pass 2, code is generated using the "emit()" macro
357 if (cl -> opcode >= 0)
358 {
359 if (instab[cl -> opcode].opfn)
360 {
361 (*(instab[cl -> opcode].opfn))(as, cl, &optr);
362 if (as -> passnum == 1)
363 {
364 if (*optr)
365 {
366 char *t = optr;
367 char t2;
368
369 t2 = *optr;
370 cl -> operstr = strdup(cl -> operstr);
371 *optr = t2;
372 while (*t && isspace(*t))
373 t++;
374 cl -> remainder = strdup(t);
375
376 }
377 cl -> remainder = optr;
378 }
379 }
380 else
381 {
382 errorp1(ERR_BADOP);
383 cl -> opcode = -1;
384 }
385 }
386 // address of the symbol may have been changed by a pseudo op
387 // so we couldn't register it above
388 // that means it may turn out to be a "forward ref" in pass 1
389 if (cl -> hassym)
390 {
391 register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE);
392 }
393
394 as -> addr += cl -> len;
395 }
396
397 void generate_code(asmstate_t *as)
398 {
399 sourceline_t *cl;
400
401 as -> addr = 0;
402 as -> dpval = 0;
403 as -> passnum = 2;
404 for (cl = as -> source_head; cl; cl = cl -> next)
405 {
406 resolve_insn(as, cl);
407 }
408 }
409
410 void lwasm_read_file(asmstate_t *as, char *fname)
411 {
412 FILE *f;
413 int cline = 0;
414 sourceline_t *cl;
415 size_t bufflen;
416 char *buff = NULL;
417 int retval;
418
419 as -> passnum = 1;
420
421 f = fopen(fname, "r");
422 if (!f)
423 {
424 fprintf(stderr, "Cannot open input file %s: %s\n", fname, strerror(errno));
425 return;
426 }
427
428 while (!feof(f))
429 {
430 retval = getline(&buff, &bufflen, f);
431 debug(" read line (%s:%d): %s\n", fname, cline, buff);
432 if (retval < 0)
433 {
434 if (feof(f))
435 break;
436 fprintf(stderr, "Error reading '%s': %s\n", fname, strerror(errno));
437 exit(1);
438 }
439 if (strchr(buff, '\n'))
440 *strchr(buff, '\n') = '\0';
441 if (strchr(buff, '\r'))
442 *strchr(buff, '\r') = '\0';
443 cl = calloc(sizeof(sourceline_t), 1);
444 if (!cl)
445 {
446 perror("Malloc");
447 exit(1);
448 }
449
450 cl -> lineno = cline++;
451 cl -> sourcefile = fname;
452 cl -> opcode = -1;
453 cl -> addrmode = -1;
454 cl -> addr = as -> addr;
455 cl -> dpval = as -> dpval;
456 cl -> prev = as -> source_tail;
457 if (as -> source_tail)
458 as -> source_tail -> next = cl;
459 as -> source_tail = cl;
460 if (as -> source_head == NULL)
461 as -> source_head = cl;
462 cl -> line = strdup(buff);
463
464 resolve_insn(as, cl);
465
466 if (cl -> opcode >= 0 && instab[cl -> opcode].instype == INSTYPE_PSEUDO && instab[cl -> opcode].specialnum == SPECIAL_END)
467 break;
468
469 *buff = '\0';
470
471 }
472 if (buff)
473 free(buff);
474
475 fclose(f);
476
477 return;
478 }
479
480 /*
481 below this point is the expression evaluation package
482
483 Supported binary operators: + - / * %
484 Supported unary operators: -
485
486 <infix>: + | - | * | / | %
487 <unary>: -
488 <expr>: <term> <infix> <term>
489 <term>: <unary> <term>
490 <term>: ( <expr> )
491 <term>: <symbol>
492 <term>: ' <char>
493 <term>: " <char> <char>
494 <term>: *
495 <term>: <number>
496
497 <number>: <dec>
498 <number>: & <dec>
499
500 <number>: $ <hex>
501 <number>: <hex> H
502 <number>: @ <oct>
503 <number>: <oct> O
504 <number>: <oct> Q
505
506 <number>: % <bin>
507 <number>: <bin> B
508
509 <bin>: 0 | 1
510 <oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7
511 <dec>: <oct> | 8 | 9
512 <hex>: <dec> | A | B | C | D | E | F
513
514 NOTE: hex values which start with a non-digit will need to be prefixed
515 by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the
516 assembler cannot tell the difference between CCH as a symbol or CCH as
517 the value $CC
518
519 */
520
521 // will throw an error and return 0 in tval if there's a problem
522 // -1 is problem; cl -> undef set is undefined symbol
523 int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval)
524 {
525 char tc;
526 int rval;
527 int binval;
528 int octval;
529 int decval;
530 int hexval;
531 int valtype;
532 int digval;
533 int bindone = 0;
534
535 *tval = 0;
536
537 beginagain:
538 tc = **optr;
539 if (tc == '+')
540 {
541 // unary +, ignored for symetry
542 (*optr)++;
543 goto beginagain;
544 }
545
546 if (tc == '(')
547 {
548 (*optr)++;
549 rval = eval_expr(as, cl, optr, tval);
550 if (rval < 0)
551 return rval;
552 if (**optr != ')')
553 {
554 errorp1(ERR_BADEXPR);
555 return -1;
556 }
557 (*optr)++;
558 return 0;
559 }
560
561 if (tc == '-')
562 {
563 (*optr)++;
564 rval = eval_term(as, cl, optr, tval);
565 if (rval < 0)
566 return rval;
567 *tval = -*tval;
568 return 0;
569 }
570
571 // current address (of current instruction, not PC)
572 if (tc == '*')
573 {
574 *tval = cl -> addr;
575 (*optr)++;
576 return 0;
577 }
578
579 if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc)))
580 {
581 // evaluate a symbol
582 char *symbuf;
583
584 symbuf = parse_symbol(as, optr);
585 if (!symbuf)
586 {
587 errorp1(ERR_BADSYM);
588 *tval = 0;
589 return -1;
590 }
591
592 debug(" looking up symbol: %s\n", symbuf);
593 *tval = lookup_symbol(as, symbuf);
594
595 // if not found, flag forward ref
596 if (*tval == -1)
597 {
598 errorp2(ERR_UNDEF);
599 cl -> undef = 1;
600 *tval = 0;
601 return 0;
602 }
603 return 0;
604 }
605
606 if (tc == '%')
607 {
608 // binary number
609 int v1 = 0;
610 (*optr)++;
611 while (strchr("01", **optr))
612 {
613 v1 = v1 << 1 | ((*(*optr)++) - '0');
614 }
615 *tval = v1;
616 return 0;
617 }
618 if (tc == '$')
619 {
620 // hex number
621 int v1 = 0;
622 (*optr)++;
623 debug("HEX CONST: %s\n", *optr);
624 while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr)))
625 {
626 debug("HEX 2: %02x\n", tc);
627 if (**optr >= 'A')
628 {
629 v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10);
630 }
631 else
632 {
633 v1 = v1 << 4 | ((*(*optr)++) - '0');
634 }
635 }
636 *tval = v1;
637 return 0;
638 }
639 if (tc == '@')
640 {
641 // octal number
642 int v1 = 0;
643 (*optr)++;
644 while (strchr("01234567", **optr))
645 {
646 v1 = v1 << 3 | ((*(*optr)++) - '0');
647 }
648 *tval = v1;
649 return 0;
650 }
651 if (tc == '&')
652 {
653 // decimal number
654 int v1 = 0;
655 (*optr)++;
656 while (strchr("0123456789", **optr))
657 {
658 v1 = v1 * 10 + ((*(*optr)++) - '0');
659 }
660 *tval = v1;
661 return 0;
662 }
663 if (tc == '\'')
664 {
665 (*optr)++;
666 if (!**optr)
667 {
668 errorp1(ERR_BADEXPR);
669 return -2;
670 }
671 *tval = *(*optr)++;
672 return 0;
673 }
674 if (tc == '"')
675 {
676 (*optr)++;
677 if (!**optr || !*(*optr + 1))
678 {
679 errorp1(ERR_BADEXPR);
680 return -2;
681 }
682 *tval = *(*optr)++ << 8 | *(*optr)++;
683 return 0;
684 }
685 // end of string
686 if (tc == '\0')
687 {
688 // error if at EOS as we are looking for a term
689 errorp1(ERR_BADEXPR);
690 return -1;
691 }
692
693 // we have a generic number here which may be decimal, hex, binary, or octal
694 // based on a suffix
695
696 // possible data types are binary (1), octal (2), decimal(4), hex (8)
697 valtype = 15;
698 hexval = octval = decval = binval = 0;
699 while (1)
700 {
701
702 // printf(" %c\n", **optr);
703 if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr))
704 {
705 // end of string, must be decimal or the end of a bin
706 if (bindone == 1)
707 {
708 *tval = binval;
709 return 0;
710 }
711 if (valtype & 4)
712 {
713 *tval = decval;
714 return 0;
715 }
716 else
717 {
718 errorp1(ERR_BADEXPR);
719 return -1;
720 }
721 }
722 tc = toupper(*(*optr)++);
723
724 if (tc == 'H')
725 {
726 if (valtype & 8)
727 {
728 *tval = hexval;
729 return 0;
730 }
731 else
732 {
733 // syntax error
734 errorp1(ERR_BADEXPR);
735 return -1;
736 }
737 }
738
739 if (tc == 'Q' || tc == 'O')
740 {
741 if (valtype && 2)
742 {
743 *tval = octval;
744 return 0;
745 }
746 else
747 {
748 errorp1(ERR_BADEXPR);
749 return -1;
750 }
751 }
752
753 digval = tc - '0';
754 if (digval > 9)
755 digval -= 7;
756
757 // if it's not in the range of a hex digit, error out
758 if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F')
759 {
760 (*optr)--;
761 if (valtype & 4)
762 {
763 *tval = decval;
764 return 0;
765 }
766 // if we're in hex/bin mode and run to the end of the number
767 // we must have a binary constant or an error
768 // if the previous character is B, then we have binary
769 // else we have error since hex would require a terminating H
770 // which would be caught above
771 if (valtype == 8 && toupper(*(*optr)) == 'B')
772 {
773 *tval = binval;
774 return 0;
775 }
776 errorp1(ERR_BADEXPR);
777 return -1;
778 }
779
780 // if we have any characters past the end of the B, it's not binary
781 if (bindone == 1)
782 bindone = 0;
783 if (tc == 'B')
784 bindone = 1;
785 if (digval > 1)
786 valtype &= 14;
787 else if (digval > 7)
788 valtype &= 13;
789 else if (digval > 9)
790 valtype &= 11;
791
792 if (valtype & 8)
793 {
794 hexval = (hexval << 4) | digval;
795 }
796 if (valtype & 4)
797 {
798 decval = decval * 10 + digval;
799 }
800 if (valtype & 2)
801 {
802 octval = (octval << 3) | digval;
803 }
804 if (valtype & 1 && !bindone)
805 {
806 binval = (binval << 1) | digval;
807 }
808
809 }
810 // can't get here from there
811 }
812
813 // returns -1 if the expression cannot be parsed
814 // and returns -2 if there is an undefined symbol reference
815 // resulting value will be in *val; undefined symbols are parsed as
816 // value 0 but cl -> undef will be set.
817 int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val)
818 {
819 int left;
820 int right;
821 char oper;
822 int rval;
823
824 // by default, return 0 in val
825 *val = 0;
826 cl -> undef = 0;
827
828 rval = eval_term(as, cl, optr, &left);
829 if (rval < 0)
830 return rval;
831
832 nextop:
833 oper = **optr;
834
835 // end of expr
836 if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')')
837 goto retleft;
838
839 // unrecognized chars
840 if (!strchr("+-*/%", oper))
841 goto retleft;
842
843 (*optr)++;
844
845 rval = eval_term(as, cl, optr, &right);
846 // propagate error
847 if (rval < 0)
848 return rval;
849
850 // do the operation and put it in "left"
851 switch (oper)
852 {
853 case '+':
854 left += right;
855 break;
856
857 case '-':
858 left -= right;
859 break;
860
861 case '*':
862 left *= right;
863 break;
864
865 case '/':
866 left /= right;
867 break;
868
869 case '%':
870 left %= right;
871 break;
872 }
873
874 goto nextop;
875
876 retleft:
877 *val = left;
878 return 0;
879 }