comparison lwcc/lex.c @ 308:670ea8f90212 ccdev

Converted preproc logic to library and moved some utility stuff to lwlib The strbuf and strpool stuff is generally useful so move it to lwlib where other such things live. Also, package the preprocessor logic into a library for easy use in multiple places.
author William Astle <lost@l-w.ca>
date Sat, 21 Sep 2013 13:33:54 -0600
parents b08787e5b9f3
children ee3e52ab2288
comparison
equal deleted inserted replaced
307:9e342c4e4b66 308:670ea8f90212
21 21
22 #include <ctype.h> 22 #include <ctype.h>
23 #include <stdio.h> 23 #include <stdio.h>
24 24
25 #include <lw_alloc.h> 25 #include <lw_alloc.h>
26 #include <lw_strbuf.h>
26 27
27 #include "cpp.h" 28 #include "cpp.h"
28 #include "strbuf.h"
29 #include "token.h" 29 #include "token.h"
30 30
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if 31 /* fetch a raw input byte from the current file. Will return CPP_EOF if
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. 32 EOF is encountered and CPP_EOL if an end of line sequence is encountered.
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is 33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is
358 int scol = pp -> column; 358 int scol = pp -> column;
359 char *strval = NULL; 359 char *strval = NULL;
360 int ttype = TOK_NONE; 360 int ttype = TOK_NONE;
361 int c, c2; 361 int c, c2;
362 int cl; 362 int cl;
363 struct strbuf *strbuf; 363 struct lw_strbuf *strbuf;
364 struct token *t = NULL; 364 struct token *t = NULL;
365 struct preproc_info *fs; 365 struct preproc_info *fs;
366 366
367 fileagain: 367 fileagain:
368 c = preproc_lex_fetch_byte(pp); 368 c = preproc_lex_fetch_byte(pp);
614 614
615 case '\'': 615 case '\'':
616 /* character constant - turns into a uint */ 616 /* character constant - turns into a uint */
617 chrlit: 617 chrlit:
618 cl = 0; 618 cl = 0;
619 strbuf = strbuf_new(); 619 strbuf = lw_strbuf_new();
620 for (;;) 620 for (;;)
621 { 621 {
622 c = preproc_lex_fetch_byte(pp); 622 c = preproc_lex_fetch_byte(pp);
623 if (c == CPP_EOF || c == CPP_EOL || c == '\'') 623 if (c == CPP_EOF || c == CPP_EOL || c == '\'')
624 break; 624 break;
625 cl++; 625 cl++;
626 if (c == '\\') 626 if (c == '\\')
627 { 627 {
628 strbuf_add(strbuf, '\\'); 628 lw_strbuf_add(strbuf, '\\');
629 c = preproc_lex_fetch_byte(pp); 629 c = preproc_lex_fetch_byte(pp);
630 if (c == CPP_EOF || c == CPP_EOL) 630 if (c == CPP_EOF || c == CPP_EOL)
631 { 631 {
632 if (!pp -> lexstr) 632 if (!pp -> lexstr)
633 preproc_throw_error(pp, "Invalid character constant"); 633 preproc_throw_error(pp, "Invalid character constant");
634 ttype = TOK_ERROR; 634 ttype = TOK_ERROR;
635 strval = strbuf_end(strbuf); 635 strval = lw_strbuf_end(strbuf);
636 goto out; 636 goto out;
637 } 637 }
638 cl++; 638 cl++;
639 strbuf_add(strbuf, c); 639 lw_strbuf_add(strbuf, c);
640 continue; 640 continue;
641 } 641 }
642 strbuf_add(strbuf, c); 642 lw_strbuf_add(strbuf, c);
643 } 643 }
644 strval = strbuf_end(strbuf); 644 strval = lw_strbuf_end(strbuf);
645 if (cl == 0) 645 if (cl == 0)
646 { 646 {
647 ttype = TOK_ERROR; 647 ttype = TOK_ERROR;
648 if (!pp -> lexstr) 648 if (!pp -> lexstr)
649 preproc_throw_error(pp, "Invalid character constant"); 649 preproc_throw_error(pp, "Invalid character constant");
653 goto out; 653 goto out;
654 654
655 case '"': 655 case '"':
656 strlit: 656 strlit:
657 /* string literal */ 657 /* string literal */
658 strbuf = strbuf_new(); 658 strbuf = lw_strbuf_new();
659 strbuf_add(strbuf, '"'); 659 lw_strbuf_add(strbuf, '"');
660 for (;;) 660 for (;;)
661 { 661 {
662 c = preproc_lex_fetch_byte(pp); 662 c = preproc_lex_fetch_byte(pp);
663 if (c == CPP_EOF || c == CPP_EOL) 663 if (c == CPP_EOF || c == CPP_EOL)
664 { 664 {
665 ttype = TOK_ERROR; 665 ttype = TOK_ERROR;
666 strval = strbuf_end(strbuf); 666 strval = lw_strbuf_end(strbuf);
667 if (!pp -> lexstr) 667 if (!pp -> lexstr)
668 preproc_throw_error(pp, "Invalid string constant"); 668 preproc_throw_error(pp, "Invalid string constant");
669 goto out; 669 goto out;
670 } 670 }
671 if (c == '"') 671 if (c == '"')
672 break; 672 break;
673 if (c == '\\') 673 if (c == '\\')
674 { 674 {
675 strbuf_add(strbuf, '\\'); 675 lw_strbuf_add(strbuf, '\\');
676 c = preproc_lex_fetch_byte(pp); 676 c = preproc_lex_fetch_byte(pp);
677 if (c == CPP_EOF || c == CPP_EOL) 677 if (c == CPP_EOF || c == CPP_EOL)
678 { 678 {
679 ttype = TOK_ERROR; 679 ttype = TOK_ERROR;
680 if (!pp -> lexstr) 680 if (!pp -> lexstr)
681 preproc_throw_error(pp, "Invalid string constant"); 681 preproc_throw_error(pp, "Invalid string constant");
682 strval = strbuf_end(strbuf); 682 strval = lw_strbuf_end(strbuf);
683 goto out; 683 goto out;
684 } 684 }
685 cl++; 685 cl++;
686 strbuf_add(strbuf, c); 686 lw_strbuf_add(strbuf, c);
687 continue; 687 continue;
688 } 688 }
689 strbuf_add(strbuf, c); 689 lw_strbuf_add(strbuf, c);
690 } 690 }
691 strbuf_add(strbuf, '"'); 691 lw_strbuf_add(strbuf, '"');
692 strval = strbuf_end(strbuf); 692 strval = lw_strbuf_end(strbuf);
693 ttype = TOK_STR_LIT; 693 ttype = TOK_STR_LIT;
694 goto out; 694 goto out;
695 695
696 case 'L': 696 case 'L':
697 /* check for wide string or wide char const */ 697 /* check for wide string or wide char const */
716 case 'G': case 'H': case 'I': case 'J': case 'K': 716 case 'G': case 'H': case 'I': case 'J': case 'K':
717 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 717 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
718 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 718 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
719 case 'Y': case 'Z': 719 case 'Y': case 'Z':
720 /* we have an identifier here */ 720 /* we have an identifier here */
721 strbuf = strbuf_new(); 721 strbuf = lw_strbuf_new();
722 strbuf_add(strbuf, c); 722 lw_strbuf_add(strbuf, c);
723 for (;;) 723 for (;;)
724 { 724 {
725 c = preproc_lex_fetch_byte(pp); 725 c = preproc_lex_fetch_byte(pp);
726 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) 726 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
727 { 727 {
728 strbuf_add(strbuf, c); 728 lw_strbuf_add(strbuf, c);
729 continue; 729 continue;
730 } 730 }
731 else 731 else
732 { 732 {
733 strbuf_add(strbuf, 0); 733 lw_strbuf_add(strbuf, 0);
734 strval = strbuf_end(strbuf); 734 strval = lw_strbuf_end(strbuf);
735 break; 735 break;
736 } 736 }
737 } 737 }
738 preproc_lex_unfetch_byte(pp, c); 738 preproc_lex_unfetch_byte(pp, c);
739 ttype = TOK_IDENT; 739 ttype = TOK_IDENT;
741 741
742 case '.': 742 case '.':
743 c = preproc_lex_fetch_byte(pp); 743 c = preproc_lex_fetch_byte(pp);
744 if (c >= '0' && c <= '9') 744 if (c >= '0' && c <= '9')
745 { 745 {
746 strbuf = strbuf_new(); 746 strbuf = lw_strbuf_new();
747 strbuf_add(strbuf, '.'); 747 lw_strbuf_add(strbuf, '.');
748 goto numlit; 748 goto numlit;
749 } 749 }
750 else if (c == '.') 750 else if (c == '.')
751 { 751 {
752 c = preproc_lex_fetch_byte(pp); 752 c = preproc_lex_fetch_byte(pp);
761 ttype = TOK_DOT; 761 ttype = TOK_DOT;
762 goto out; 762 goto out;
763 763
764 case '0': case '1': case '2': case '3': case '4': 764 case '0': case '1': case '2': case '3': case '4':
765 case '5': case '6': case '7': case '8': case '9': 765 case '5': case '6': case '7': case '8': case '9':
766 strbuf = strbuf_new(); 766 strbuf = lw_strbuf_new();
767 numlit: 767 numlit:
768 ttype = TOK_NUMBER; 768 ttype = TOK_NUMBER;
769 strbuf_add(strbuf, c); 769 lw_strbuf_add(strbuf, c);
770 for (;;) 770 for (;;)
771 { 771 {
772 c = preproc_lex_fetch_byte(pp); 772 c = preproc_lex_fetch_byte(pp);
773 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) 773 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
774 break; 774 break;
775 strbuf_add(strbuf, c); 775 lw_strbuf_add(strbuf, c);
776 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') 776 if (c == 'e' || c == 'E' || c == 'p' || c == 'P')
777 { 777 {
778 c = preproc_lex_fetch_byte(pp); 778 c = preproc_lex_fetch_byte(pp);
779 if (c == '+' || c == '-') 779 if (c == '+' || c == '-')
780 { 780 {
781 strbuf_add(strbuf, c); 781 lw_strbuf_add(strbuf, c);
782 continue; 782 continue;
783 } 783 }
784 preproc_lex_unfetch_byte(pp, c); 784 preproc_lex_unfetch_byte(pp, c);
785 } 785 }
786 } 786 }
787 strval = strbuf_end(strbuf); 787 strval = lw_strbuf_end(strbuf);
788 preproc_lex_unfetch_byte(pp, c); 788 preproc_lex_unfetch_byte(pp, c);
789 goto out; 789 goto out;
790 790
791 default: 791 default:
792 ttype = TOK_CHAR; 792 ttype = TOK_CHAR;