# HG changeset patch # User William Astle # Date 1565062029 21600 # Node ID 5b8871fd7503a694a409d2658c9e79b011e38d69 # Parent 6073f4a334750e68a7843ee1a8ecf97c8a7e9bf0# Parent 836dc53719805fa8b247a9f0b319d6cea63a47ff Merged previous lwcc development branch into mainline. diff -r 6073f4a33475 -r 5b8871fd7503 .hgignore --- a/.hgignore Sat Jun 29 21:54:43 2019 -0600 +++ b/.hgignore Mon Aug 05 21:27:09 2019 -0600 @@ -10,7 +10,8 @@ /lwar$ /lwasm$ /lwcc/lwcc$ -/lwcc/lwcpp$ +/lwcc/lwcc-cpp$ +/lwcc/lwcc-cc # for windows \.suo$ diff -r 6073f4a33475 -r 5b8871fd7503 COPYING --- a/COPYING Sat Jun 29 21:54:43 2019 -0600 +++ b/COPYING Mon Aug 05 21:27:09 2019 -0600 @@ -1,5 +1,3 @@ See the file GPL3 for a copy of the GNU General Public License, version 3, -under which the majority of this distribution is licensed. - -Some specific source files, including many of those in the lwcc subdirectory -are licensed differently. See each individual file for specific details. +under which the majority of this distribution is licensed. Any exceptions +will be noted in the relevant source files. \ No newline at end of file diff -r 6073f4a33475 -r 5b8871fd7503 Makefile --- a/Makefile Sat Jun 29 21:54:43 2019 -0600 +++ b/Makefile Mon Aug 05 21:27:09 2019 -0600 @@ -7,12 +7,29 @@ #PROGSUFFIX := .exe #BUILDTPREFIX=i586-mingw32msvc- +LWTOOLS_VERSION = 4.8 +LWTOOLS_VERSION_SUFFIX = -devcc + +PACKAGE_VERSION = $(LWTOOLS_VERSION)$(LWTOOLS_VERSION_SUFFIX) + +ifeq ($(PREFIX),) ifneq ($(DESTDIR),) -INSTALLDIR = $(DESTDIR)/usr/bin +PREFIX = /usr else -INSTALLDIR ?= /usr/local/bin +PREFIX = /usr/local +endif endif +LIBDIR = $(PREFIX)/lib +BINDIR = $(PREFIX)/bin + +INSTALLDIR = $(DESTDIR)$(PREFIX) +INSTALLBIN = $(DESTDIR)$(BINDIR) +INSTALLLIB = $(DESTDIR)$(LIBDIR) + +LWCC_LIBDIR = $(LIBDIR)/lwcc/$(PACKAGE_VERSION) +LWCC_INSTALLLIBDIR = $(DESTDIR)$(LWCC_LIBDIR) + # this are probably pointless but they will make sure # the variables are set without overriding the environment # or automatic values from make itself. @@ -28,6 +45,8 @@ endif CPPFLAGS += -I lwlib -Icommon +CPPFLAGS += -DPREFIX=$(PREFIX) -DLWCC_LIBDIR=$(LWCC_LIBDIR) +CPPFLAGS += -DPROGSUFFIX=$(PROGSUFFIX) LDFLAGS += -Llwlib -llw CFLAGS ?= -O3 -Wall -Wno-char-subscripts @@ -35,7 +54,14 @@ MAIN_TARGETS := lwasm/lwasm$(PROGSUFFIX) \ lwlink/lwlink$(PROGSUFFIX) \ lwar/lwar$(PROGSUFFIX) \ - lwlink/lwobjdump$(PROGSUFFIX) + lwlink/lwobjdump$(PROGSUFFIX) \ + lwcc/lwcc$(PROGSUFFIX) \ + lwcc/lwcc-cpp$(PROGSUFFIX) \ + lwcc/lwcc-cc$(PROGSUFFIX) + +LWCC_LIBBIN_FILES = lwcc/lwcc-cpp$(PROGSUFFIX) lwcc/lwcc-cc$(PROGSUFFIX) +LWCC_LIBLIB_FILES = +LWCC_LIBINC_FILES = .PHONY: all all: $(MAIN_TARGETS) @@ -44,7 +70,8 @@ lwar_srcs := $(addprefix lwar/,$(lwar_srcs)) lwlib_srcs := lw_alloc.c lw_realloc.c lw_free.c lw_error.c lw_expr.c \ - lw_stack.c lw_string.c lw_stringlist.c lw_cmdline.c + lw_stack.c lw_string.c lw_stringlist.c lw_cmdline.c lw_strbuf.c \ + lw_strpool.c lwlib_srcs := $(addprefix lwlib/,$(lwlib_srcs)) lwlink_srcs := main.c lwlink.c readfiles.c expr.c script.c link.c output.c map.c @@ -71,11 +98,37 @@ lwlib_deps := $(lwlib_srcs:.c=.d) lwobjdump_deps := $(lwobjdump_srcs:.c=.d) -.PHONY: lwlink lwasm lwar lwobjdump +lwcc_driver_srcs := driver-main.c +lwcc_driver_srcs := $(addprefix lwcc/,$(lwcc_driver_srcs)) +lwcc_driver_objs := $(lwcc_driver_srcs:.c=.o) +lwcc_driver_deps := $(lwcc_driver_srcs:.c=.d) + +lwcc_cpp_srcs := cpp-main.c +lwcc_cpp_srcs := $(addprefix lwcc/,$(lwcc_cpp_srcs)) +lwcc_cpp_objs := $(lwcc_cpp_srcs:.c=.o) +lwcc_cpp_deps := $(lwcc_cpp_srcs:.c=.d) + +# parse_c.c needs to be first here +lwcc_cc_srcs := parse_c.c cc-main.c tree.c parse.c token_names.c +lwcc_cc_srcs := $(addprefix lwcc/,$(lwcc_cc_srcs)) +lwcc_cc_objs := $(lwcc_cc_srcs:.c=.o) +lwcc_cc_deps := $(lwcc_cc_srcs:.c=.d) + +lwcc_cpplib_srcs := cpp.c lex.c token.c preproc.c symbol.c +lwcc_cpplib_srcs := $(addprefix lwcc/,$(lwcc_cpplib_srcs)) +lwcc_cpplib_objs := $(lwcc_cpplib_srcs:.c=.o) +lwcc_cpplib_deps := $(lwcc_cpplib_srcs:.c=.d) + +lwcc_deps := $(lwcc_cpp_deps) $(lwcc_driver_deps) $(lwcc_cpplib_deps) $(lwcc_cc_deps) + +.PHONY: lwlink lwasm lwar lwobjdump lwcc lwlink: lwlink/lwlink$(PROGSUFFIX) lwasm: lwasm/lwasm$(PROGSUFFIX) lwar: lwar/lwar$(PROGSUFFIX) lwobjdump: lwlink/lwobjdump$(PROGSUFFIX) +lwcc: lwcc/lwcc$(PROGSUFFIX) +lwcc-cpp: lwcc/lwcc-cpp$(PROGSUFFIX) +lwcc-cpplib: lwcc/libcpp.a lwasm/lwasm$(PROGSUFFIX): $(lwasm_objs) lwlib @echo Linking $@ @@ -93,6 +146,25 @@ @echo Linking $@ @$(CC) -o $@ $(lwar_objs) $(LDFLAGS) +lwcc/lwcc$(PROGSUFFIX): $(lwcc_driver_objs) lwlib + @echo Linking $@ + @$(CC) -o $@ $(lwcc_driver_objs) $(LDFLAGS) + +lwcc/lwcc-cpp$(PROGSUFFIX): $(lwcc_cpp_objs) lwlib lwcc-cpplib + @echo Linking $@ + @$(CC) -o $@ $(lwcc_cpp_objs) lwcc/libcpp.a $(LDFLAGS) + +lwcc/lwcc-cc$(PROGSUFFIX): $(lwcc_cc_objs) lwlib lwcc-cpplib + @echo Linking $@ + @$(CC) -o $@ $(lwcc_cc_objs) lwcc/libcpp.a $(LDFLAGS) + +.INTERMEDIATE: lwcc-cpplib +lwcc-cpplib: lwcc/libcpp.a +lwcc/libcpp.a: $(lwcc_cpplib_objs) + @echo Linking $@ + @$(AR) rc $@ $(lwcc_cpplib_objs) + @$(RANLIB) $@ + #.PHONY: lwlib .INTERMEDIATE: lwlib lwlib: lwlib/liblw.a @@ -102,15 +174,26 @@ @$(AR) rc $@ $(lwlib_objs) @$(RANLIB) $@ -alldeps := $(lwasm_deps) $(lwlink_deps) $(lwar_deps) $(lwlib_deps) ($lwobjdump_deps) +alldeps := $(lwasm_deps) $(lwlink_deps) $(lwar_deps) $(lwlib_deps) ($lwobjdump_deps) $(lwcc_deps) -include $(alldeps) extra_clean := $(extra_clean) *~ */*~ +lwcc/parse_c.c lwcc/parse_c.h: lwcc/parse_c.y + rm -f lwcc/parse_c.h lwcc/parse_c.c + lemon -q lwcc/parse_c.y + +lwcc/token_names.c: lwcc/parse_c.h + echo "char *ptoken_names[] = {" > $@ + echo '"TOKEN_NONE",' >> $@ + cat lwcc/parse_c.h | sed -e 's/#define \(.*\) .*$$/"\1",/g' -e 's/ //g' >> $@ + echo '"" };' >> $@ + + %.o: %.c @echo "Building dependencies for $@" - @$(CC) -MM $(CPPFLAGS) -o $*.d $< + @$(CC) -MM -MG $(CPPFLAGS) -o $*.d $< @mv -f $*.d $*.d.tmp @sed -e 's|.*:|$*.o $*.d:|' < $*.d.tmp > $*.d @sed -e 's/.*://' -e 's/\\$$//' < $*.d.tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $*.d @@ -123,6 +206,8 @@ clean: $(cleantargs) @echo "Cleaning up" @rm -f lwlib/liblw.a lwasm/lwasm$(PROGSUFFIX) lwlink/lwlink$(PROGSUFFIX) lwlink/lwobjdump$(PROGSUFFIX) lwar/lwar$(PROGSUFFIX) + @rm -f lwcc/lwcc$(PROGSUFFIX) lwcc/lwcc-cpp$(PROGSUFFIX) lwcc/libcpp.a + @rm -f $(lwcc_driver_objs) $(lwcc_cpp_objs) $(lwcc_cpplib_objs) $(lwcc_cc_objs) @rm -f $(lwasm_objs) $(lwlink_objs) $(lwar_objs) $(lwlib_objs) $(lwobjdump_objs) @rm -f $(extra_clean) @rm -f */*.exe @@ -131,6 +216,7 @@ realclean: clean $(realcleantargs) @echo "Cleaning up even more" @rm -f $(lwasm_deps) $(lwlink_deps) $(lwar_deps) $(lwlib_deps) $(lwobjdump_deps) + @rm -f $(lwcc_driver_deps) $(lwcc_cpp_deps) $(lwcc_cpplib_deps) $(lwcc_cc_deps) print-%: @echo $* = $($*) @@ -139,6 +225,19 @@ install: $(MAIN_TARGETS) install -d $(INSTALLDIR) install $(MAIN_TARGETS) $(INSTALLDIR) + install -d $(LWCC_INSTALLLIBDIR) + install -d $(LWCC_INSTALLLIBDIR)/bin + install -d $(LWCC_INSTALLLIBDIR)/lib + install -d $(LWCC_INSTALLLIBDIR)/include +ifneq ($(LWCC_LIBBIN_FILES),) + install $(LWCC_LIBBIN_FILES) $(LWCC_INSTALLLIBDIR)/bin +endif +ifneq ($(LWCC_LIBLIB_FILES),) + install $(LWCC_LIBLIB_FILES) $(LWCC_INSTALLLIBDIR)/lib +endif +ifneq ($(LWCC_LIBINC_FILES),) + install $(LWCC_LIBINC_FILES) $(LWCC_INSTALLLIBDIR)/include +endif .PHONY: test test: all test/runtests diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/README.txt Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,100 @@ +This is the lwcc C compiler for lwtools. It was written using various other +C compilers as guides. Special thanks to the developers of the PCC compiler. +While none of the actual code from PCC was actually used, much of compiler +itself served as a template for creating lwcc. + + +LIMITATIONS AND DESIGN CHOICES +============================== + +The direct interface to both the compiler proper and the preprocessor is +specifically undefined. Indeed, the preprocessor may, in fact, not be a +separate program at all. Relying on the specific format of the output of the +preprocessor is specifically forbidden even though it is possible to obtain +preprocessed output from the compiler driver. This is provided for debugging +purposes only. + +The preprocessor supports variadic macros. It also supports stringification, +and token concatenation but only within a macro expansion. There are +examples online that use the construct "#__LINE__" to get a string version +of the current line number. + +The preprocessor defaults to ignoring trigraphs because they are basically a +stupid idea on any current system. They have their place for systems where +creating the nine characters specified by the trigraphs is very difficult or +impossible. It is possible, however, to instruct the preprocessor to decode +trigraph sequences. + +The nonstandard "#pragma once" feature is not supported at all. The effect +is easily accomplished using standard macros and conditionals. It is, +therefore, unneeded complexity. + +The nonstandard idea of preprocessor assertions is also completely +unsupported. It is just as easy to test predefined macros and such tests are +much more portable. + +The preprocessor supports __LINE__, __FILE__, __DATE__, and __TIME__. The +compiler itself supports __func__ as a predefined string constant if +encountered because there is no way for the preprocessor to determine what +function it occurs within. The preprocessor does not define __STDC__, +__STDC_VERSION__, or __STDC_HOSTED__. I have seen no truly useful purpose +for these and since lwcc does not, at this time, conform to any known C +standard, it would be incorrect to define the first two. + +The compiler driver may define additional macros depending on its idea of +the context. + + +RUNTIME INFORMATION +=================== + +The compiler driver has a built in base directory where it searches for its +various components as needed. In the discussion below, BASEDIR stands for +that directory. + +BASEDIR may be specified by the -B option to the driver. Care must be taken +when doing so, however, because specifying an invalid -B will cause the +compiler to fail completely. It will completely override the built in search +paths for the compiler provided files and programs. + +Because BASEDIR is part of the actual compiler, it is not affected by +--sysroot or -isysroot options. + +If BASEDIR does not exist, compiler component programs will be searched for +in the standard execution paths. This may lead to incorrect results so it is +important to make certain that the specified BASEDIR exists. + +If -B is not specified, the default BASEDIR is +$(PREFIX)/lib/lwcc/$(VERSION)/ where PREFIX is the build prefix from the +Makefile and VERSION is the lwtools version. + +The contents of BASEDIR are as follows: + +BASEDIR/bin + +Various binaries for the parts of the compiler system. Notably, this +includes the preprocessor and compiler proper. The specific names and +contents of this directory cannot be relied upon and these programs should +not be called directly. Ever. Don't do it. + + +BASEDIR/lib + +This directory contains various libraries that provide support for any +portion of the compiler's output. The driver will arrange to pass the +appropriate arguments to the linker to include these as required. + +The most notable file in this directory is liblwcc.a wich contains the +support routines for the compiler's code generation. Depending on ABI and +code generation options supported, there may be multiple versions of +liblwcc.a. The driver will arrange for the correct one to be referenced. + + +BASEDIR/include + +This directory contains any C header files that the compiler provides. +Notably, this includes stdint.h, stdarg.h, and setjmp.h as these are +specific to the compiler. The driver will arrange for this directory to be +searched prior to the standard system directories so that these files will +override any present in those directories. + diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/cc-main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cc-main.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,222 @@ +/* +lwcc/cpp-main.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "cpp.h" +#include "tree.h" + +node_t *process_file(const char *); +static void do_error(const char *f, ...); +extern node_t *parse_program(struct preproc_info *pp); + +node_t *program_tree = NULL; + +/* command line option handling */ +#define PROGVER "lwcc-cc from " PACKAGE_STRING +char *program_name; + +/* input files */ +lw_stringlist_t input_files; +lw_stringlist_t includedirs; +lw_stringlist_t sysincludedirs; +lw_stringlist_t macrolist; + +/* various flags */ +int trigraphs = 0; +char *output_file = NULL; +FILE *output_fp = NULL; + +static struct lw_cmdline_options options[] = +{ + { "output", 'o', "FILE", 0, "Output to FILE"}, + { "include", 'i', "FILE", 0, "Pre-include FILE" }, + { "includedir", 'I', "PATH", 0, "Add entry to the user include path" }, + { "sincludedir", 'S', "PATH", 0, "Add entry to the system include path" }, + { "define", 'D', "SYM[=VAL]",0, "Automatically define SYM to be VAL (or 1)"}, + { "trigraphs", 0x100, NULL, 0, "Enable interpretation of trigraphs" }, + { 0 } +}; + +static int parse_opts(int key, char *arg, void *state) +{ + switch (key) + { + case 'o': + if (output_file) + do_error("Output file specified more than once."); + output_file = arg; + break; + + case 0x100: + trigraphs = 1; + break; + + case 'I': + lw_stringlist_addstring(includedirs, arg); + break; + + case 'S': + lw_stringlist_addstring(sysincludedirs, arg); + break; + + case 'D': + lw_stringlist_addstring(macrolist, arg); + break; + + case lw_cmdline_key_end: + break; + + case lw_cmdline_key_arg: + lw_stringlist_addstring(input_files, arg); + break; + + default: + return lw_cmdline_err_unknown; + } + return 0; +} + +static struct lw_cmdline_parser cmdline_parser = +{ + options, + parse_opts, + "INPUTFILE", + "lwcc-cc - C compiler for lwcc", + PROGVER +}; + +int main(int argc, char **argv) +{ + program_name = argv[0]; + int retval = 0; + node_t *n; + + input_files = lw_stringlist_create(); + includedirs = lw_stringlist_create(); + sysincludedirs = lw_stringlist_create(); + macrolist = lw_stringlist_create(); + + /* parse command line arguments */ + lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, NULL); + + /* set up output file */ + if (output_file == NULL || strcmp(output_file, "-") == 0) + { + output_fp = stdout; + } + else + { + output_fp = fopen(output_file, "wb"); + if (output_fp == NULL) + { + do_error("Failed to create output file %s: %s", output_file, strerror(errno)); + } + } + + program_tree = node_create(NODE_PROGRAM); + + if (lw_stringlist_nstrings(input_files) == 0) + { + /* if no input files, work on stdin */ + n = process_file("-"); + if (!n) + retval = 1; + else + node_addchild(program_tree, n); + } + else + { + char *s; + lw_stringlist_reset(input_files); + for (s = lw_stringlist_current(input_files); s; s = lw_stringlist_next(input_files)) + { + n = process_file(s); + if (!n) + retval = 1; + if (retval != 0) + break; + node_addchild(program_tree, n); + } + } + lw_stringlist_destroy(input_files); + lw_stringlist_destroy(includedirs); + lw_stringlist_destroy(sysincludedirs); + lw_stringlist_destroy(macrolist); + + node_display(program_tree, stdout); + node_destroy(program_tree); + exit(retval); +} + +node_t *process_file(const char *fn) +{ + struct preproc_info *pp; + char *tstr; + node_t *n; + + pp = preproc_init(fn); + if (!pp) + return NULL; + + /* set up the include paths */ + lw_stringlist_reset(includedirs); + for (tstr = lw_stringlist_current(includedirs); tstr; tstr = lw_stringlist_next(includedirs)) + { + preproc_add_include(pp, tstr, 0); + } + + lw_stringlist_reset(sysincludedirs); + for (tstr = lw_stringlist_current(sysincludedirs); tstr; tstr = lw_stringlist_next(sysincludedirs)) + { + preproc_add_include(pp, tstr, 1); + } + + /* set up pre-defined macros */ + lw_stringlist_reset(macrolist); + for (tstr = lw_stringlist_current(macrolist); tstr; tstr = lw_stringlist_next(macrolist)) + { + preproc_add_macro(pp, tstr); + } + + n = parse_program(pp); + preproc_finish(pp); + return n; +} + +static void do_error(const char *f, ...) +{ + va_list args; + va_start(args, f); + fprintf(stderr, "ERROR: "); + vfprintf(stderr, f, args); + va_end(args); + fprintf(stderr, "\n"); + exit(1); +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/cpp-main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp-main.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,259 @@ +/* +lwcc/cpp-main.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpp.h" + +int process_file(const char *); +static void do_error(const char *f, ...); + +/* command line option handling */ +#define PROGVER "lwcc-cpp from " PACKAGE_STRING +char *program_name; + +/* input files */ +lw_stringlist_t input_files; +lw_stringlist_t includedirs; +lw_stringlist_t sysincludedirs; +lw_stringlist_t macrolist; + +/* various flags */ +int trigraphs = 0; +char *output_file = NULL; +FILE *output_fp = NULL; + +static struct lw_cmdline_options options[] = +{ + { "output", 'o', "FILE", 0, "Output to FILE"}, + { "include", 'i', "FILE", 0, "Pre-include FILE" }, + { "includedir", 'I', "PATH", 0, "Add entry to the user include path" }, + { "sincludedir", 'S', "PATH", 0, "Add entry to the system include path" }, + { "define", 'D', "SYM[=VAL]",0, "Automatically define SYM to be VAL (or 1)"}, + { "trigraphs", 0x100, NULL, 0, "Enable interpretation of trigraphs" }, + { 0 } +}; + +static int parse_opts(int key, char *arg, void *state) +{ + switch (key) + { + case 'o': + if (output_file) + do_error("Output file specified more than once."); + output_file = arg; + break; + + case 0x100: + trigraphs = 1; + break; + + case 'I': + lw_stringlist_addstring(includedirs, arg); + break; + + case 'S': + lw_stringlist_addstring(sysincludedirs, arg); + break; + + case 'D': + lw_stringlist_addstring(macrolist, arg); + break; + + case lw_cmdline_key_end: + break; + + case lw_cmdline_key_arg: + lw_stringlist_addstring(input_files, arg); + break; + + default: + return lw_cmdline_err_unknown; + } + return 0; +} + +static struct lw_cmdline_parser cmdline_parser = +{ + options, + parse_opts, + "INPUTFILE", + "lwcc-cpp - C preprocessor for lwcc", + PROGVER +}; + +int main(int argc, char **argv) +{ + program_name = argv[0]; + int retval = 0; + + input_files = lw_stringlist_create(); + includedirs = lw_stringlist_create(); + sysincludedirs = lw_stringlist_create(); + macrolist = lw_stringlist_create(); + + /* parse command line arguments */ + lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, NULL); + + /* set up output file */ + if (output_file == NULL || strcmp(output_file, "-") == 0) + { + output_fp = stdout; + } + else + { + output_fp = fopen(output_file, "wb"); + if (output_fp == NULL) + { + do_error("Failed to create output file %s: %s", output_file, strerror(errno)); + } + } + + if (lw_stringlist_nstrings(input_files) == 0) + { + /* if no input files, work on stdin */ + retval = process_file("-"); + } + else + { + char *s; + lw_stringlist_reset(input_files); + for (s = lw_stringlist_current(input_files); s; s = lw_stringlist_next(input_files)) + { + retval = process_file(s); + if (retval != 0) + break; + } + } + lw_stringlist_destroy(input_files); + lw_stringlist_destroy(includedirs); + lw_stringlist_destroy(sysincludedirs); + lw_stringlist_destroy(macrolist); + exit(retval); +} + +static void print_line_marker(FILE *fp, int line, const char *fn, int flag) +{ + fprintf(fp, "\n# %d \"", line); + while (*fn) + { + if (*fn < 32 || *fn == 34 || *fn > 126) + { + fprintf(fp, "\\%03o", *fn); + } + else + { + fprintf(fp, "%c", *fn); + } + fn++; + } + fprintf(fp, "\" %d", flag); +} + +int process_file(const char *fn) +{ + struct preproc_info *pp; + struct token *tok = NULL; + int last_line = 0; + char *last_fn = NULL; + char *tstr; + + pp = preproc_init(fn); + if (!pp) + return -1; + + /* set up the include paths */ + lw_stringlist_reset(includedirs); + for (tstr = lw_stringlist_current(includedirs); tstr; tstr = lw_stringlist_next(includedirs)) + { + preproc_add_include(pp, tstr, 0); + } + + lw_stringlist_reset(sysincludedirs); + for (tstr = lw_stringlist_current(sysincludedirs); tstr; tstr = lw_stringlist_next(sysincludedirs)) + { + preproc_add_include(pp, tstr, 1); + } + + /* set up pre-defined macros */ + lw_stringlist_reset(macrolist); + for (tstr = lw_stringlist_current(macrolist); tstr; tstr = lw_stringlist_next(macrolist)) + { + preproc_add_macro(pp, tstr); + } + + print_line_marker(output_fp, 1, fn, 1); + last_fn = lw_strdup(fn); + for (;;) + { + tok = preproc_next(pp); + if (tok -> ttype == TOK_EOF) + break; + if (strcmp(tok -> fn, last_fn) != 0) + { + int lt = 1; + if (tok -> lineno != 1) + { + lt = 2; + } + lw_free(last_fn); + last_fn = lw_strdup(tok -> fn); + last_line = tok -> lineno; + print_line_marker(output_fp, last_line, last_fn, lt); + } + else + { + while (tok -> lineno > last_line) + { + fprintf(output_fp, "\n"); + last_line++; + } + } + token_print(tok, output_fp); + if (tok -> ttype == TOK_EOL) + last_line++; + token_free(tok); + } + token_free(tok); + lw_free(last_fn); +// symtab_dump(pp); + preproc_finish(pp); + return 0; +} + +static void do_error(const char *f, ...) +{ + va_list args; + va_start(args, f); + fprintf(stderr, "ERROR: "); + vfprintf(stderr, f, args); + va_end(args); + fprintf(stderr, "\n"); + exit(1); +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/cpp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,194 @@ +/* +lwcc/cpp.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include "cpp.h" + + +struct token *preproc_lex_next_token(struct preproc_info *); + +struct preproc_info *preproc_init(const char *fn) +{ + FILE *fp; + struct preproc_info *pp; + + if (!fn || (fn[0] == '-' && fn[1] == '0')) + { + fp = stdin; + } + else + { + fp = fopen(fn, "rb"); + } + if (!fp) + return NULL; + + pp = lw_alloc(sizeof(struct preproc_info)); + memset(pp, 0, sizeof(struct preproc_info)); + pp -> strpool = lw_strpool_create(); + pp -> fn = lw_strpool_strdup(pp -> strpool, fn); + pp -> fp = fp; + pp -> ra = CPP_NOUNG; + pp -> unget = CPP_NOUNG; + pp -> ppeolseen = 1; + pp -> lineno = 1; + pp -> n = NULL; + pp -> quotelist = lw_stringlist_create(); + pp -> inclist = lw_stringlist_create(); + return pp; +} + +void preproc_add_include(struct preproc_info *pp, char *dir, int sys) +{ + if (sys) + lw_stringlist_addstring(pp -> inclist, dir); + else + lw_stringlist_addstring(pp -> quotelist, dir); +} + +struct token *preproc_next_token(struct preproc_info *pp) +{ + struct token *t; + + if (pp -> curtok) + token_free(pp -> curtok); + + /* + If there is a list of tokens to process, move it to the "unget" queue + with an EOF marker at the end of it. + */ + if (pp -> sourcelist) + { + for (t = pp -> sourcelist; t -> next; t = t -> next) + /* do nothing */ ; + t -> next = token_create(TOK_EOF, NULL, -1, -1, ""); + t -> next -> next = pp -> tokqueue; + pp -> tokqueue = pp -> sourcelist; + pp -> sourcelist = NULL; + } +again: + if (pp -> tokqueue) + { + t = pp -> tokqueue; + pp -> tokqueue = t -> next; + if (pp -> tokqueue) + pp -> tokqueue -> prev = NULL; + t -> next = NULL; + t -> prev = NULL; + pp -> curtok = t; + goto ret; + } + pp -> curtok = preproc_lex_next_token(pp); + t = pp -> curtok; +ret: + if (t -> ttype == TOK_ENDEXPAND) + { + struct expand_e *e; + e = pp -> expand_list; + pp -> expand_list = e -> next; + lw_free(e); + goto again; + } + return t; +} + +void preproc_unget_token(struct preproc_info *pp, struct token *t) +{ + t -> next = pp -> tokqueue; + pp -> tokqueue = t; + if (pp -> curtok == t) + pp -> curtok = NULL; +} + +void preproc_finish(struct preproc_info *pp) +{ + fclose(pp -> fp); + lw_stringlist_destroy(pp -> inclist); + lw_stringlist_destroy(pp -> quotelist); + if (pp -> curtok) + token_free(pp -> curtok); + while (pp -> tokqueue) + { + preproc_next_token(pp); + token_free(pp -> curtok); + } + lw_strpool_free(pp -> strpool); + lw_free(pp); +} + +void preproc_register_error_callback(struct preproc_info *pp, void (*cb)(const char *)) +{ + pp -> errorcb = cb; +} + +void preproc_register_warning_callback(struct preproc_info *pp, void (*cb)(const char *)) +{ + pp -> warningcb = cb; +} + +static void preproc_throw_error_default(const char *m) +{ + fprintf(stderr, "ERROR: %s\n", m); +} + +static void preproc_throw_warning_default(const char *m) +{ + fprintf(stderr, "WARNING: %s\n", m); +} + +static void preproc_throw_message(struct preproc_info *pp, void (*cb)(const char *), const char *m, va_list args) +{ + int s, s2; + char *b; + + s2 = snprintf(NULL, 0, "(%s:%d:%d) ", pp -> fn, pp -> lineno, pp -> column); + s = vsnprintf(NULL, 0, m, args); + b = lw_alloc(s + s2 + 1); + snprintf(b, s2 + 1, "(%s:%d:%d) ", pp -> fn, pp -> lineno, pp -> column); + vsnprintf(b + s2, s + 1, m, args); + (*cb)(b); + lw_free(b); +} + +void preproc_throw_error(struct preproc_info *pp, const char *m, ...) +{ + va_list args; + va_start(args, m); + preproc_throw_message(pp, pp -> errorcb ? pp -> errorcb : preproc_throw_error_default, m, args); + va_end(args); + exit(1); +} + +void preproc_throw_warning(struct preproc_info *pp, const char *m, ...) +{ + va_list args; + va_start(args, m); + preproc_throw_message(pp, pp -> warningcb ? pp -> warningcb : preproc_throw_warning_default, m, args); + va_end(args); +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/cpp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,90 @@ +/* +lwcc/cpp.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef cpp_h_seen___ +#define cpp_h_seen___ + +#include + +#include + +//#include "symbol.h" +#include "token.h" + +#define TOKBUFSIZE 32 + +struct expand_e +{ + struct expand_e *next; + struct symtab_e *s; // symbol table entry of the expanding symbol +}; + +struct preproc_info +{ + const char *fn; + FILE *fp; + struct token *tokqueue; + struct token *curtok; + void (*errorcb)(const char *); + void (*warningcb)(const char *); + int eolstate; // internal for use in handling newlines + int lineno; // the current input line number + int column; // the current input column + int trigraphs; // nonzero if we're going to handle trigraphs + int ra; + int qseen; + int ungetbufl; + int ungetbufs; + int *ungetbuf; + int unget; + int eolseen; + int nlseen; + int ppeolseen; // nonzero if we've seen only whitespace (or nothing) since a newline + int skip_level; // nonzero if we're in a false conditional + int found_level; // nonzero if we're in a true conditional + int else_level; // for counting #else directives + int else_skip_level; // ditto + struct symtab_e *sh; // the preprocessor's symbol table + struct token *sourcelist; // for expanding a list of tokens + struct expand_e *expand_list; // record of which macros are currently being expanded + char *lexstr; // for lexing a string (token pasting) + int lexstrloc; // ditto + struct preproc_info *n; // next in file stack + struct preproc_info *filestack; // stack of saved files during include + struct lw_strpool *strpool; + lw_stringlist_t quotelist; + lw_stringlist_t inclist; +}; + +extern struct preproc_info *preproc_init(const char *); +extern struct token *preproc_next_token(struct preproc_info *); +extern struct token *preproc_next_processed_token(struct preproc_info *); +extern void preproc_finish(struct preproc_info *); +extern void preproc_register_error_callback(struct preproc_info *, void (*)(const char *)); +extern void preproc_register_warning_callback(struct preproc_info *, void (*)(const char *)); +extern void preproc_throw_error(struct preproc_info *, const char *, ...); +extern void preproc_throw_warning(struct preproc_info *, const char *, ...); +extern void preproc_unget_token(struct preproc_info *, struct token *); +extern void preproc_add_include(struct preproc_info *, char *, int); +extern void preproc_add_macro(struct preproc_info *, char *); +extern struct token *preproc_next(struct preproc_info *); + +#endif // cpp_h_seen___ diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/driver-main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/driver-main.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,1072 @@ +/* +lwcc/driver/main.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define VERSTRING "lwcc from " PACKAGE_STRING +#define S(x) S2(x) +#define S2(x) #x + +#define BASEDIR S(LWCC_LIBDIR) + +/* list of compilation phases */ +enum phase_t { + PHASE_DEFAULT = 0, + PHASE_PREPROCESS, + PHASE_COMPILE, + PHASE_ASSEMBLE, + PHASE_LINK +}; + +/* these are the names of various programs the compiler calls */ +const char *linker_program_name = "lwlink"; +const char *compiler_program_name = "lwcc1"; +const char *assembler_program_name = "lwasm"; +const char *preprocessor_program_name = "lwcpp"; + +/* this will be set to the directory where temporary files get created */ +const char *temp_directory = NULL; + +/* these are for book keeping if we get interrupted - the volatile and atomic + types are needed because they are accessed in a signal handler */ +static volatile sig_atomic_t sigterm_received = 0; +static volatile sig_atomic_t child_pid = 0; + +/* path specified with --sysroot */ +const char *sysroot = ""; +/* path specified with -isysroot */ +const char *isysroot = NULL; + +/* record which phase to stop after for -c, -E, and -S */ +/* default is to stop after PHASE_LINK */ +static int stop_after = PHASE_DEFAULT; + +int nostdinc = 0; // set if -nostdinc is specified +int nostartfiles = 0; // set if -nostartfiles is specified +int nostdlib = 0; // set if -nostdlib is specified +int verbose_mode = 0; // set to number of --verbose arguments +int save_temps = 0; // set if -save-temps is specified +int debug_mode = 0; // set if -g specified +int pic_mode = 0; // set to 1 if -fpic, 2 if -fPIC; last one specified wins +const char *output_file; // set to the value of the -o option (output file) + +/* compiler base directory - from -B */ +const char *basedir = BASEDIR; + +/* used to ensure a unique temporary file at every stage */ +static int file_counter = 0; + +/* these are various string lists used to keep track of things, mostly + command line arguments. */ + +lw_stringlist_t input_files; // input files from command line +lw_stringlist_t runtime_dirs; // directories to search for runtime files +lw_stringlist_t lib_dirs; // directories to search for library files +lw_stringlist_t program_dirs; // directories to search for compiler program components +lw_stringlist_t preproc_args; // recorded arguments to pass through to the preprocessor +lw_stringlist_t include_dirs; // include paths specified with -I +lw_stringlist_t includes; // include paths specified with -include +lw_stringlist_t user_sysincdirs; // include paths specified with -isystem +lw_stringlist_t asm_args; // recorded arguments to pass through to the assembler +lw_stringlist_t linker_args; // recorded arguments to pass through to the linker +lw_stringlist_t sysincdirs; // the standard system include directories +lw_stringlist_t tempfiles; // a list of temporary files created which need to be cleaned up +lw_stringlist_t compiler_args; // recorded arguments to pass through to the compiler +lw_stringlist_t priv_sysincdirs; // system include directories for lwcc itself + +/* forward delcarations */ +static void parse_command_line(int, char **); + +/* signal handler for SIGTERM - all it does is record the fact that + SIGTERM happened and propagate the signal to whatever child process + might currently be running */ +static void exit_on_signal(int sig) +{ + sigterm_received = 1; + if (child_pid) + kill(child_pid, SIGTERM); +} + +/* utility function to carp about an error condition and bail */ +void do_error(const char *f, ...) +{ + va_list arg; + va_start(arg, f); + fprintf(stderr, "ERROR: "); + vfprintf(stderr, f, arg); + putc('\n', stderr); + va_end(arg); + exit(1); +} + +/* utility function to carp about some condition; do not bail */ +void do_warning(const char *f, ...) +{ + va_list arg; + va_start(arg, f); + fprintf(stderr, "WARNING: "); + vfprintf(stderr, f, arg); + putc('\n', stderr); + va_end(arg); +} + +/* utility function to print out an array of strings - stops at the first + NULL string pointer. */ +static void print_array(char **arr) +{ + int c = 0; + while (*arr) + { + if (c) + printf(" "); + printf("%s", *arr); + arr++; + c = 1; + } +} + +/* expand any search path entries to reflect the sysroot and + isysroot settings. Note that it does NOT apply to the compiler + program search path */ +static void expand_sysroot(void) +{ + /* list of path lists to process for replacements of = */ + lw_stringlist_t *lists[] = { &sysincdirs, &include_dirs, &user_sysincdirs, &lib_dirs, NULL }; + /* list of replacement strings for = in the same order */ + const char *sysroots[] = { isysroot, isysroot, isysroot, sysroot, NULL }; + size_t i, sysroot_len, value_len; + char *path; + lw_stringlist_t newlist; + lw_stringlist_t working; + char *s; + + /* for each list, run through entry by entry, do any needed replacement + and add the entry to a new list. Then replace the old list with the + new one. */ + for (i = 0; lists[i] != NULL; i++) + { + working = *lists[i]; + newlist = lw_stringlist_create(); + + lw_stringlist_reset(working); + for (s = lw_stringlist_current(working); s; s = lw_stringlist_next(working)) + { + if (s[0] == '=') + { + sysroot_len = strlen(sysroots[i]); + value_len = strlen(s); + /* note that the skipped = will make up for the trailing NUL */ + path = lw_alloc(sysroot_len + value_len); + memcpy(path, sysroots[i], sysroot_len); + /* the +1 here will copy the trailing NUL */ + memcpy(path + sysroot_len, s + 1, value_len); + lw_stringlist_addstring(newlist, path); + lw_free(path); + } + else + { + lw_stringlist_addstring(newlist, s); + } + } + lw_stringlist_destroy(working); + *lists[i] = newlist; + } +} + +/* look for file fn in path list p which is okay for access mode mode. + Return a string allocated by lw_alloc. */ +static char *find_file(const char *fn, lw_stringlist_t p, int mode) +{ + char *s; + char *f; + size_t lf, lp; + int need_slash; + + lf = strlen(fn); + lw_stringlist_reset(p); + for (s = lw_stringlist_current(p); s; s = lw_stringlist_next(p)) + { + lp = strlen(s); + need_slash = 0; + if (lp && s[lp - 1] == '/') + need_slash = 1; + f = lw_alloc(lp + lf + need_slash + 1); + memcpy(f, s, lp); + if (need_slash) + f[lp] = '/'; + /* +1 gets the NUL */ + memcpy(f + lp + need_slash, fn, lf + 1); + if (access(f, mode) == 0) + return f; + lw_free(f); + } + /* if not found anywhere, try the bare filename - it might work */ + return lw_strdup(fn); +} + +/* take a string list which contains an argv and execute the specified + program */ +static int execute_program(lw_stringlist_t args) +{ + int argc; + char **argv; + int result; + char *s; + + argc = lw_stringlist_nstrings(args); + argv = lw_alloc(sizeof(char *) * (argc + 1)); + lw_stringlist_reset(args); + for (result = 0, s = lw_stringlist_current(args); s; s = lw_stringlist_next(args)) + { + argv[result] = s; + } + argv[result] = NULL; + + if (verbose_mode) + { + printf("Executing "); + print_array(argv); + printf("\n"); + } + + /* bail now if a signal happened */ + if (sigterm_received) + { + lw_free(argv); + return 1; + } + + /* make sure stdio has flushed everything so that output from the + child process doesn't get intermingled */ + fflush(NULL); + + /* now make the child process */ + child_pid = fork(); + if (child_pid == 0) + { + /* child process */ + /* try executing program */ + execvp(argv[0], argv); + /* only way to get here is if execvp() failed so carp about it and exit */ + fprintf(stderr, "Exec of %s failed: %s", argv[0], strerror(errno)); + /* exit with failure but don't call any atexit(), etc., functions */ + _exit(127); + } + else if (child_pid == -1) + { + /* failure to make child process */ + do_error("Failed to execute program %s: %s", argv[0], strerror(errno)); + } + /* clean up argv */ + lw_free(argv); + + /* parent process - wait for child to exit */ + while (waitpid(child_pid, &result, 0) == -1 && errno == EINTR) + /* do nothing */; + /* fetch actual return status */ + result = WEXITSTATUS(result); + if (result) + { + /* carp about non-zero return status */ + do_error("%s terminated with status %d", argv[0], result); + } + /* return nonzero if signalled to exit */ + return sigterm_received; +} + +/* +construct an output file name as follows: + +1. if it is the last phase of compilation and an output file name is + specified, use that if not specified +2. if it is the last phase or we are saving temporary files, any suffix + on f is removed and replaced with nsuffix +3. otherwise, a temporary file is created. If necessary, a temporary + directory is created to hold the temporary file. The name of the temporary + file is recorded in the tempfiles string list for later cleanup. The name + of the temporary directory is recorded in temp_directory for later cleanup. +*/ +static char *output_name(const char *f, const char *nsuffix, int last) +{ + const char *osuffix; + char *name; + size_t lf, ls, len; + int counter_len; + + /* get a new file counter */ + file_counter++; + + /* if the output was specified, use it */ + if (last && output_file) + { + return lw_strdup(output_file); + } + + /* find the start of the old suffix */ + osuffix = strrchr(f, '.'); + if (osuffix != NULL && strchr(osuffix, '/') != NULL) + osuffix = NULL; + if (osuffix == NULL) + osuffix = f + strlen(f); + + ls = strlen(nsuffix); + + /* if this is the last stage or we're saving temps, use a name derived + from the original file name by replacing the suffix with nsuffix */ + if (save_temps || last) + { + lf = osuffix - f; + name = lw_alloc(lf + ls + 1); + memcpy(name, f, lf); + /* note that the +1 will copy the trailing NUL */ + memcpy(name + lf, nsuffix, ls + 1); + return name; + } + + /* finally, use a temporary file */ + if (temp_directory == NULL) + { + /* if we haven't already made a temporary directory, do so */ + const char *dirtempl; + char *path; + size_t dirtempl_len; + int need_slash; + + /* look for a TMPFIR environment variable and use that if present + but use /tmp as a fallback */ + dirtempl = getenv("TMPDIR"); + if (dirtempl == NULL) + dirtempl = "/tmp"; + dirtempl_len = strlen(dirtempl); + /* work out if we need to add a slash on the end of the directory */ + if (dirtempl_len && dirtempl[dirtempl_len - 1] == '/') + need_slash = 0; + else + need_slash = 1; + /* make a string of the form /lwcc-XXXXXX */ + path = lw_alloc(dirtempl_len + need_slash + 11 + 1); + memcpy(path, dirtempl, dirtempl_len); + if (need_slash) + path[dirtempl_len] = '/'; + memcpy(path + dirtempl_len + need_slash, "lwcc-XXXXXX", 12); + /* now make a temporary directory */ + if (mkdtemp(path) == NULL) + do_error("mkdtemp failed: %s", strerror(errno)); + /* record the temporary directory name */ + temp_directory = path; + } + /* now create a file name in the temporary directory. The strategy here + uses a counter that is passed along and is guaranteed to be unique for + every file requested. */ + lf = strlen(temp_directory); + /* this gets the length of the counter as a string but doesn't actually + allocate anything so we can make a string long enough */ + counter_len = snprintf(NULL, 0, "%d", file_counter); + if (counter_len < 1) + do_error("snprintf failure: %s", strerror(errno)); + len = lf + 1 + (size_t)counter_len + ls + 1; + name = lw_alloc(len); + /* it should be impossible for ths snprintf call to fail */ + snprintf(name, len, "%s/%d%s", temp_directory, file_counter, nsuffix); + + /* record the temporary file name for later */ + lw_stringlist_addstring(tempfiles, name); + return name; +} + +/* this calls the actual compiler, passing the contents of compiler_args + as arguments. It also adds the input file and output file. */ +static int compile_file(const char *file, char *input, char **output, const char *suffix) +{ + lw_stringlist_t args; + char *out; + int retval; + char *s; + + args = lw_stringlist_create(); + + /* find the compiler executable and make that argv[0] */ + s = find_file(compiler_program_name, program_dirs, X_OK); + lw_stringlist_addstring(args, s); + lw_free(s); + + /* add all the saved compiler arguments to argv */ + lw_stringlist_reset(compiler_args); + for (s = lw_stringlist_current(compiler_args); s; s = lw_stringlist_next(compiler_args)) + { + lw_stringlist_addstring(args, s); + } + /* work out the output file name and add that to argv */ + out = output_name(file, suffix, stop_after == PHASE_COMPILE); + lw_stringlist_addstring(args, "-o"); + lw_stringlist_addstring(args, out); + /* add the input file to argv */ + lw_stringlist_addstring(args, input); + /* if the input file name and the output file name pointers are the same + free the input one */ + if (*output == input) + lw_free(input); + /* tell the caller what the output name is */ + *output = out; + /* actually run the compiler */ + retval = execute_program(args); + + lw_stringlist_destroy(args); + return retval; +} + +/* this calls the actual assembler, passing the contents of asm_args + as arguments. It also adds the input file and output file. */ +static int assemble_file(const char *file, char *input, char **output, const char *suffix) +{ + lw_stringlist_t args; + char *out; + int retval; + char *s; + + args = lw_stringlist_create(); + + /* find the assembler binary and add that as argv[0] */ + s = find_file(assembler_program_name, program_dirs, X_OK); + lw_stringlist_addstring(args, s); + lw_free(s); + + /* add asm_args to argv */ + lw_stringlist_reset(asm_args); + for (s = lw_stringlist_current(asm_args); s; s = lw_stringlist_next(asm_args)) + { + lw_stringlist_addstring(args, s); + } + /* get an output file name and add that to argv */ + out = output_name(file, ".o", stop_after == PHASE_ASSEMBLE); + lw_stringlist_addstring(args, "-o"); + lw_stringlist_addstring(args, out); + /* finally, add the input file */ + lw_stringlist_addstring(args, input); + /* clean up input file name if same as output pointer */ + if (*output == input) + lw_free(input); + /* tell caller what file we made */ + *output = out; + /* actually run the assembler */ + retval = execute_program(args); + + lw_stringlist_destroy(args); + return retval; +} + +/* run the preprocessor. Pass along preproc_args and appropriate options + for all the include directories */ +static int preprocess_file(const char *file, char *input, char **output, const char *suffix) +{ + lw_stringlist_t args; + char *s; + char *out; + int retval; + + args = lw_stringlist_create(); + + /* find the linker binary and make that argv[0] */ + s = find_file(preprocessor_program_name, program_dirs, X_OK); + lw_stringlist_addstring(args, s); + lw_free(s); + + /* add preproc_args to argv */ + lw_stringlist_reset(preproc_args); + for (s = lw_stringlist_current(preproc_args); s; s = lw_stringlist_next(preproc_args)) + { + lw_stringlist_addstring(args, s); + } + + /* add the include files specified by -i */ + lw_stringlist_reset(includes); + for (s = lw_stringlist_current(includes); s; s = lw_stringlist_next(includes)) + { + lw_stringlist_addstring(args, "-i"); + lw_stringlist_addstring(args, s); + } + + /* add the include directories specified by -I */ + lw_stringlist_reset(include_dirs); + for (s = lw_stringlist_current(include_dirs); s; s = lw_stringlist_next(include_dirs)) + { + lw_stringlist_addstring(args, "-I"); + lw_stringlist_addstring(args, s); + } + + /* add the user specified system include directories (-isystem) */ + lw_stringlist_reset(user_sysincdirs); + for (s = lw_stringlist_current(user_sysincdirs); s; s = lw_stringlist_next(user_sysincdirs)) + { + lw_stringlist_addstring(args, "-S"); + lw_stringlist_addstring(args, s); + } + + /* and, if not -nostdinc, the standard system include directories */ + if (!nostdinc) + { + lw_stringlist_reset(priv_sysincdirs); + for (s = lw_stringlist_current(priv_sysincdirs); s; s = lw_stringlist_next(priv_sysincdirs)) + { + lw_stringlist_addstring(args, "-S"); + lw_stringlist_addstring(args, s); + } + lw_stringlist_reset(sysincdirs); + for (s = lw_stringlist_current(sysincdirs); s; s = lw_stringlist_next(sysincdirs)) + { + lw_stringlist_addstring(args, "-S"); + lw_stringlist_addstring(args, s); + } + } + + /* if we stop after preprocessing, output to stdout if no output file */ + if (stop_after == PHASE_PREPROCESS && output_file == NULL) + { + out = lw_strdup("-"); + } + else + { + /* otherwise, make an output file */ + out = output_name(file, suffix, stop_after == PHASE_PREPROCESS); + } + /* if not stdout, add the output file to argv */ + if (strcmp(out, "-") != 0) + { + lw_stringlist_addstring(args, "-o"); + lw_stringlist_addstring(args, out); + } + /* add the input file name to argv */ + lw_stringlist_addstring(args, input); + + /* if input and output pointers are same, clean up input */ + if (*output == input) + lw_free(input); + /* tell caller what our output file is */ + *output = out; + /* finally, actually run the preprocessor */ + retval = execute_program(args); + + lw_stringlist_destroy(args); + return retval; +} + +/* +handle an input file through the various stages of compilation. If any +stage decides to handle an input file, that fact is recorded. If control +reaches the end of the function without a file being handled, that +fact is mentioned to the user. Unknown files are passed to the linker +if nothing handles them and linking is to be done. It's possible the linker +will actually know what to do with them. +*/ +static int handle_input_file(const char *f) +{ + const char *suffix; + char *src; + int handled, retval; + + /* note: this needs to handle -x but for now, assume c for stdin */ + if (strcmp(f, "-") == 0) + { + suffix = ".c"; + } + else + { + /* work out the suffix on the file */ + suffix = strrchr(f, '.'); + if (suffix != NULL && strchr(suffix, '/') != NULL) + suffix = NULL; + if (suffix == NULL) + suffix = ""; + } + + /* make a copy of the file */ + src = lw_strdup(f); + + /* preprocess if appropriate */ + if (strcmp(suffix, ".c") == 0) + { + /* preprocessed c input source goes to .i */ + suffix = ".i"; + retval = preprocess_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + else if (strcmp(suffix, ".S") == 0) + { + /* preprocessed asm source goes to .s */ + suffix = ".s"; + retval = preprocess_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + /* if we're only preprocessing, bail */ + if (stop_after == PHASE_PREPROCESS) + goto done; + + /* now on to compile if appropriate */ + if (strcmp(suffix, ".i") == 0) + { + /* preprocessed c source goes to .s after compiling */ + suffix = ".s"; + retval = compile_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + /* bail if we're only compiling, not assembling */ + if (stop_after == PHASE_COMPILE) + goto done; + + /* assemble if appropriate */ + if (strcmp(suffix, ".s") == 0) + { + /* assembler output is an object file */ + suffix = ".o"; + retval = assemble_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + /* bail if we're not linking */ + if (stop_after == PHASE_ASSEMBLE) + goto done; + + /* if we get here with a .o unhandled, pretend it is handled */ + if (strcmp(suffix, ".o") == 0) + handled = 1; + + /* add the final file name to the linker args */ + lw_stringlist_addstring(linker_args, src); +done: + if (!handled && !retval) + { + /* carp about unhandled files if there is no error */ + if (stop_after == PHASE_LINK) + { + do_warning("unknown suffix %s; passing file down to linker", suffix); + } + else + { + do_warning("unknown suffix %s; skipped", suffix); + } + } + /* clean up the file name */ + lw_free(src); + + return retval; +} + +/* +This actually runs the linker. Along the way, all the files the linker +is supposed to handle will have been added to linker_args. +*/ +static int handle_linking(void) +{ + lw_stringlist_t linker_flags; + char *s; + int retval; + + linker_flags = lw_stringlist_create(); + + /* find the linker binary and make that argv[0] */ + s = find_file(linker_program_name, program_dirs, X_OK); + lw_stringlist_addstring(linker_flags, s); + lw_free(s); + + /* tell the linker about the output file name, if specified */ + if (output_file) + { + lw_stringlist_addstring(linker_flags, "-o"); + lw_stringlist_addstring(linker_flags, (char *)output_file); + } + + /* add the standard library options if not -nostdlib */ + if (!nostdlib) + { + } + + /* add the standard startup files if not -nostartfiles */ + if (!nostartfiles) + { + } + + /* pass along the various input files, etc., to the linker */ + lw_stringlist_reset(linker_args); + for (s = lw_stringlist_current(linker_args); s; s = lw_stringlist_next(linker_args)) + { + lw_stringlist_addstring(linker_flags, s); + } + + /* actually run the linker */ + retval = execute_program(linker_flags); + + lw_stringlist_destroy(linker_flags); + return retval; +} + +/* +Do various setup tasks, process the command line, handle the input files, +and clean up. +*/ +int main(int argc, char **argv) +{ + char *ap; + int retval; + + input_files = lw_stringlist_create(); + runtime_dirs = lw_stringlist_create(); + lib_dirs = lw_stringlist_create(); + program_dirs = lw_stringlist_create(); + preproc_args = lw_stringlist_create(); + include_dirs = lw_stringlist_create(); + user_sysincdirs = lw_stringlist_create(); + asm_args = lw_stringlist_create(); + linker_args = lw_stringlist_create(); + sysincdirs = lw_stringlist_create(); + includes = lw_stringlist_create(); + tempfiles = lw_stringlist_create(); + compiler_args = lw_stringlist_create(); + priv_sysincdirs = lw_stringlist_create(); + + parse_command_line(argc, argv); + if (stop_after == PHASE_DEFAULT) + stop_after = PHASE_LINK; + + if (verbose_mode) + printf("%s\n", VERSTRING); + + if (isysroot == NULL) + isysroot = sysroot; + expand_sysroot(); + + if (stop_after != PHASE_LINK && output_file && lw_stringlist_nstrings(input_files) > 1) + { + do_error("-o cannot be specified with multiple inputs unless linking"); + } + + // default to stdout for preprocessing + if (stop_after == PHASE_PREPROCESS && output_file == NULL) + output_file = "-"; + + if (lw_stringlist_nstrings(input_files) == 0) + do_error("No input files specified"); + + /* handle -B here */ + ap = lw_alloc(strlen(basedir) + 10); + strcpy(ap, basedir); + strcat(ap, "/bin"); + lw_stringlist_addstring(program_dirs, ap); + strcpy(ap, basedir); + strcat(ap, "/lib"); + lw_stringlist_addstring(runtime_dirs, ap); + strcpy(ap, basedir); + strcat(ap, "/include"); + lw_stringlist_addstring(priv_sysincdirs, ap); + lw_free(ap); + + retval = 0; + /* make sure we exit if interrupted */ + signal(SIGTERM, exit_on_signal); + + /* handle input files */ + lw_stringlist_reset(input_files); + for (ap = lw_stringlist_current(input_files); ap; ap = lw_stringlist_next(input_files)) + { + if (handle_input_file(ap)) + retval = 1; + } + + if (!retval && stop_after >= PHASE_LINK) + { + retval = handle_linking(); + } + + /* if a signal nixed us, mention the fact */ + if (sigterm_received) + do_warning("Terminating on signal"); + + /* clean up temporary files */ + if (!save_temps) + { + lw_stringlist_reset(tempfiles); + for (ap = lw_stringlist_current(tempfiles); ap; ap = lw_stringlist_next(tempfiles)) + { + if (unlink(ap) == -1) + { + do_warning("Removal of %s failed: %s", ap, strerror(errno)); + } + } + if (temp_directory) + { + if (rmdir(temp_directory) == -1) + { + do_warning("Removal of temporary directory %s failed: %s", temp_directory, strerror(errno)); + } + } + } + + /* be polite and clean up all the string lists */ + lw_stringlist_destroy(input_files); + lw_stringlist_destroy(runtime_dirs); + lw_stringlist_destroy(lib_dirs); + lw_stringlist_destroy(program_dirs); + lw_stringlist_destroy(preproc_args); + lw_stringlist_destroy(include_dirs); + lw_stringlist_destroy(user_sysincdirs); + lw_stringlist_destroy(asm_args); + lw_stringlist_destroy(linker_args); + lw_stringlist_destroy(sysincdirs); + lw_stringlist_destroy(includes); + lw_stringlist_destroy(tempfiles); + lw_stringlist_destroy(compiler_args); + lw_stringlist_destroy(priv_sysincdirs); + return retval; +} + +struct option_e +{ + char *optbase; // base name of option, with - + int needarg; // nonzero if option needs argument + int noextra; // nonzero if there must not be anything after optbase + int optcode; // option code (passed to fn) + void *optptr; // pointer for opt (passed to fn) + int (*fn)(char *, char *, int, void *); // function to handle argument, NULL to ignore it +}; + +enum CMD_MISC { + CMD_MISC_VERSION, + CMD_MISC_OPTIMIZE, +}; + +enum OPT_ARG { + OPT_ARG_OPT = 0, // argument is optional + OPT_ARG_SEP = 1, // argument may be separate + OPT_ARG_INC = 2, // argument must not be separate +}; + +/* set an integer at *optptr to optcode */ +static int cmdline_set_int(char *opt, char *optarg, int optcode, void *optptr) +{ + *((int *)optptr) = optcode; + return 0; +} + +/* set a string at *optptr to optarg */ +static int cmdline_set_string(char *opt, char *optarg, int optcode, void *optptr) +{ + char **s = (char **)optptr; + *s = optarg; + + return 0; +} + +/* set a string at *optptr to optarg */ +static int cmdline_set_stringifnull(char *opt, char *optarg, int optcode, void *optptr) +{ + char **s = (char **)optptr; + + if (*s) + do_error("Multiple %.*s options specified", optcode ? optcode : strlen(opt), opt); + *s = optarg; + + return 0; +} + +/* split arg on commas and add the results to string list *optptr */ +static int cmdline_argsplit(char *opt, char *arg, int optcode, void *optptr) +{ + lw_stringlist_t l = *(lw_stringlist_t *)optptr; + char *next; + + for (; arg != NULL; arg = next) + { + next = strchr(arg, ','); + if (next != NULL) + *next++ = '\0'; + lw_stringlist_addstring(l, arg); + } + return 0; +} + +/* add opt to string list *optptr */ +static int cmdline_arglist(char *opt, char *arg, int optcode, void *optptr) +{ + lw_stringlist_t l = *(lw_stringlist_t *)optptr; + + lw_stringlist_addstring(l, opt); + return 0; +} + +/* add optarg to string list *optptr */ +static int cmdline_optarglist(char *opt, char *optarg, int optcode, void *optptr) +{ + lw_stringlist_t l = *(lw_stringlist_t *)optptr; + + lw_stringlist_addstring(l, optarg); + return 0; +} + +static int cmdline_misc(char *opt, char *optarg, int optcode, void *optptr) +{ + switch (optcode) + { + case CMD_MISC_VERSION: + printf("%s\n", VERSTRING); + exit(0); + + case CMD_MISC_OPTIMIZE: + if (!optarg) + return 0; + switch (*optarg) + { + case '0': + case '1': + case '2': + case '3': + case 's': + return 0; + } + return -1; + + default: + return -1; + } + return 0; +} + +static int cmdline_set_intifzero(char *opt, char *optarg, int optcode, void *optptr) +{ + int *iv = (int *)optptr; + + if (*iv && *iv != optcode) + { + do_error("conflicting compiler option specified: %s", opt); + } + *iv = optcode; + return 0; +} + +struct option_e optionlist[] = +{ + { "--version", OPT_ARG_OPT, 1, CMD_MISC_VERSION, NULL, cmdline_misc }, + { "--sysroot=", OPT_ARG_INC, 0, 0, &sysroot, cmdline_set_string }, + { "-B", OPT_ARG_INC, 0, 0, &basedir, cmdline_set_string }, + { "-C", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, + { "-c", OPT_ARG_OPT, 1, PHASE_COMPILE, &stop_after, cmdline_set_intifzero }, + { "-D", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_arglist }, + { "-E", OPT_ARG_OPT, 1, PHASE_PREPROCESS, &stop_after, cmdline_set_intifzero }, + { "-fPIC", OPT_ARG_OPT, 1, 2, &pic_mode, cmdline_set_int }, + { "-fpic", OPT_ARG_OPT, 1, 1, &pic_mode, cmdline_set_int }, + { "-g", OPT_ARG_OPT, 1, 1, &debug_mode, cmdline_set_int }, + { "-I", OPT_ARG_SEP, 0, 0, &include_dirs, cmdline_optarglist }, + { "-include", OPT_ARG_SEP, 1, 0, &includes, cmdline_optarglist }, + { "-isysroot", OPT_ARG_SEP, 1, 0, &isysroot, cmdline_set_string }, + { "-isystem", OPT_ARG_SEP, 1, 0, &user_sysincdirs, cmdline_optarglist }, + { "-M", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, + { "-nostartfiles", OPT_ARG_OPT, 1, 1, &nostartfiles, cmdline_set_int }, + { "-nostdinc", OPT_ARG_OPT, 1, 1, &nostdinc, cmdline_set_int }, + { "-nostdlib", OPT_ARG_OPT, 1, 1, &nostdlib, cmdline_set_int }, + { "-O", OPT_ARG_OPT, 0, CMD_MISC_OPTIMIZE, NULL, cmdline_misc }, + { "-o", OPT_ARG_SEP, 0, 2, &output_file, cmdline_set_stringifnull }, + { "-S", OPT_ARG_OPT, 1, PHASE_ASSEMBLE, &stop_after, cmdline_set_intifzero }, + { "-save-temps", OPT_ARG_OPT, 1, 1, &save_temps, cmdline_set_int }, + { "-trigraphs", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, + { "-U", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_arglist }, + { "-v", OPT_ARG_OPT, 1, 1, &verbose_mode, cmdline_set_int }, + { "-Wp,", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_argsplit }, + { "-Wa,", OPT_ARG_INC, 0, 0, &asm_args, cmdline_argsplit }, + { "-Wl,", OPT_ARG_INC, 0, 0, &linker_args, cmdline_argsplit }, + { "-W", OPT_ARG_INC, 0, 0, NULL, NULL }, /* warning options */ + { "-x", OPT_ARG_SEP, 1, 0, NULL, NULL }, /* language options */ + { NULL, 0, 0 } +}; + +static void parse_command_line(int argc, char **argv) +{ + int i, j, olen, ilen; + char *optarg; + + for (i = 1; i < argc; i++) + { + if (argv[i][0] != '-' || argv[i][1] == '\0') + { + /* we have a non-option argument */ + lw_stringlist_addstring(input_files, argv[i]); + continue; + } + olen = strlen(argv[i]); + for (j = 0; optionlist[j].optbase; j++) + { + ilen = strlen(optionlist[j].optbase); + /* if length of optbase is longer than argv[i], it can't match */ + if (ilen > olen) + continue; + /* does the base match? */ + if (strncmp(optionlist[j].optbase, argv[i], ilen) == 0) + break; + } + if (optionlist[j].optbase == NULL) + { + do_error("Unsupported option %s", argv[i]); + } + /* is the option supposed to be exact? */ + if (optionlist[j].noextra && argv[i][ilen] != '\0') + { + do_error("Unsupported option %s", argv[i]); + } + /* is there an argument? */ + optarg = NULL; + if (argv[i][ilen]) + optarg = argv[i] + ilen; + if (!optarg && optionlist[j].needarg == 1) + { + if (i == argc) + { + do_error("Option %s requires an argument", argv[i]); + } + optarg = argv[++i]; + } + if (!optarg && optionlist[j].needarg == 2) + { + do_error("Option %s requires an argument", argv[i]); + } + /* handle the option */ + if (optionlist[j].fn) + { + if ((*(optionlist[j].fn))(argv[i], optarg, optionlist[j].optcode, optionlist[j].optptr) != 0) + do_error("Unsupported option %s %s", argv[i], optarg ? optarg : ""); + } + } +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/lex.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/lex.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,802 @@ +/* +lwcc/lex.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include + +#include +#include + +#include "cpp.h" +#include "token.h" + +/* fetch a raw input byte from the current file. Will return CPP_EOF if + EOF is encountered and CPP_EOL if an end of line sequence is encountered. + End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is + returned on the first CR or LF encountered. The complementary CR or LF + is munched, if present, when the *next* character is read. This always + operates on file_stack. + + This function also accounts for line numbers in input files and also + character columns. +*/ +static int fetch_byte_ll(struct preproc_info *pp) +{ + int c; + + if (pp -> eolstate != 0) + { + pp -> lineno++; + pp -> column = 0; + } + c = getc(pp -> fp); + pp -> column++; + if (pp -> eolstate == 1) + { + // just saw CR, munch LF + if (c == 10) + c = getc(pp -> fp); + pp -> eolstate = 0; + } + else if (pp -> eolstate == 2) + { + // just saw LF, much CR + if (c == 13) + c = getc(pp -> fp); + pp -> eolstate = 0; + } + + if (c == 10) + { + // we have LF - end of line, flag to munch CR + pp -> eolstate = 2; + c = CPP_EOL; + } + else if (c == 13) + { + // we have CR - end of line, flag to munch LF + pp -> eolstate = 1; + c = CPP_EOL; + } + else if (c == EOF) + { + c = CPP_EOF; + } + return c; +} + +/* This function takes a sequence of bytes from the _ll function above + and does trigraph interpretation on it, but only if the global + trigraphs is nonzero. */ +static int fetch_byte_tg(struct preproc_info *pp) +{ + int c; + + if (!pp -> trigraphs) + { + c = fetch_byte_ll(pp); + } + else + { + /* we have to do the trigraph shit here */ + if (pp -> ra != CPP_NOUNG) + { + if (pp -> qseen > 0) + { + c = '?'; + pp -> qseen -= 1; + return c; + } + else + { + c = pp -> ra; + pp -> ra = CPP_NOUNG; + return c; + } + } + + c = fetch_byte_ll(pp); + while (c == '?') + { + pp -> qseen++; + c = fetch_byte_ll(pp); + } + + if (pp -> qseen >= 2) + { + // we have a trigraph + switch (c) + { + case '=': + c = '#'; + pp -> qseen -= 2; + break; + + case '/': + c = '\\'; + pp -> qseen -= 2; + break; + + case '\'': + c = '^'; + pp -> qseen -= 2; + break; + + case '(': + c = '['; + pp -> qseen -= 2; + break; + + case ')': + c = ']'; + pp -> qseen -= 2; + break; + + case '!': + c = '|'; + pp -> qseen -= 2; + break; + + case '<': + c = '{'; + pp -> qseen -= 2; + break; + + case '>': + c = '}'; + pp -> qseen -= 2; + break; + + case '-': + c = '~'; + pp -> qseen -= 2; + break; + } + if (pp -> qseen > 0) + { + pp -> ra = c; + c = '?'; + pp -> qseen--; + } + } + else if (pp -> qseen > 0) + { + pp -> ra = c; + c = '?'; + pp -> qseen--; + } + } + return c; +} + +/* This function puts a byte back onto the front of the input stream used + by fetch_byte(). Theoretically, an unlimited number of characters can + be unfetched. Line and column counting may be incorrect if unfetched + characters cross a token boundary. */ +void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) +{ + if (pp -> lexstr) + { + if (c == CPP_EOL) + return; + if (pp -> lexstrloc > 0) + { + pp -> lexstrloc--; + return; + } + } + + if (pp -> ungetbufl >= pp -> ungetbufs) + { + pp -> ungetbufs += 100; + pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); + } + pp -> ungetbuf[pp -> ungetbufl++] = c; +} + +/* This function retrieves a byte from the input stream. It performs + backslash-newline splicing on the returned bytes. Any character + retrieved from the unfetch buffer is presumed to have already passed + the backslash-newline filter. */ +static int fetch_byte(struct preproc_info *pp) +{ + int c; + + if (pp -> lexstr) + { + if (pp -> lexstr[pp -> lexstrloc]) + return pp -> lexstr[pp -> lexstrloc++]; + else + return CPP_EOL; + } + + if (pp -> ungetbufl > 0) + { + pp -> ungetbufl--; + c = pp -> ungetbuf[pp -> ungetbufl]; + if (pp -> ungetbufl == 0) + { + lw_free(pp -> ungetbuf); + pp -> ungetbuf = NULL; + pp -> ungetbufs = 0; + } + return c; + } + +again: + if (pp -> unget != CPP_NOUNG) + { + c = pp -> unget; + pp -> unget = CPP_NOUNG; + } + else + { + c = fetch_byte_tg(pp); + } + if (c == '\\') + { + int c2; + c2 = fetch_byte_tg(pp); + if (c2 == CPP_EOL) + goto again; + else + pp -> unget = c2; + } + return c; +} + + + +/* +Lex a token off the current input file. + +Returned tokens are as follows: + +* all words starting with [a-zA-Z_] are returned as TOK_IDENT +* numbers are returned as their appropriate type +* all whitespace in a sequence, including comments, is returned as + a single instance of TOK_WSPACE +* TOK_EOL is returned in the case of the end of a line +* TOK_EOF is returned when the end of the file is reached +* If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised +* Any symbolic operator, etc., recognized by C will be returned as such + a token +* TOK_HASH will be returned for a # +* trigraphs will be interpreted +* backslash-newline will be interpreted +* any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL +*/ + + +int preproc_lex_fetch_byte(struct preproc_info *pp) +{ + int c; + c = fetch_byte(pp); + if (c == CPP_EOF && pp -> eolseen == 0) + { + preproc_throw_warning(pp, "No newline at end of file"); + pp -> eolseen = 1; + return CPP_EOL; + } + + if (c == CPP_EOL) + { + pp -> eolseen = 1; + return c; + } + + pp -> eolseen = 0; + + /* convert comments to a single space here */ + if (c == '/') + { + int c2; + c2 = fetch_byte(pp); + if (c2 == '/') + { + /* single line comment */ + c = ' '; + for (;;) + { + c2 = fetch_byte(pp); + if (c2 == CPP_EOF || c2 == CPP_EOL) + break; + } + preproc_lex_unfetch_byte(pp, c2); + } + else if (c2 == '*') + { + /* block comment */ + c = ' '; + for (;;) + { + c2 = fetch_byte(pp); + if (c2 == CPP_EOF) + { + preproc_lex_unfetch_byte(pp, c); + break; + } + if (c2 == '*') + { + /* maybe end of comment */ + c2 = preproc_lex_fetch_byte(pp); + if (c2 == '/') + break; + } + } + } + else + { + /* not a comment - restore lookahead character */ + preproc_lex_unfetch_byte(pp, c2); + } + } + return c; +} + +struct token *preproc_lex_next_token(struct preproc_info *pp) +{ + int sline = pp -> lineno; + int scol = pp -> column; + char *strval = NULL; + int ttype = TOK_NONE; + int c, c2; + int cl; + struct lw_strbuf *strbuf; + struct token *t = NULL; + struct preproc_info *fs; + +fileagain: + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF) + { + if (pp -> nlseen == 0) + { + c = CPP_EOL; + } + } + + if (pp -> lineno != sline) + { + sline = pp -> lineno; + scol = pp -> column; + } + + if (c == CPP_EOF) + { + /* check if we fell off the end of an include file */ + if (pp -> filestack) + { + if (pp -> skip_level || pp -> found_level) + { + preproc_throw_error(pp, "Unbalanced conditionals in include file"); + } + fclose(pp -> fp); + fs = pp -> filestack; + *pp = *fs; + pp -> filestack = fs -> n; + goto fileagain; + } + else + { + ttype = TOK_EOF; + goto out; + } + } + if (c == CPP_EOL) + { + pp -> nlseen = 1; + ttype = TOK_EOL; + goto out; + } + + pp -> nlseen = 0; + if (isspace(c)) + { + while (isspace(c)) + c = preproc_lex_fetch_byte(pp); + preproc_lex_unfetch_byte(pp, c); + ttype = TOK_WSPACE; + goto out; + } + + switch (c) + { + case '?': + ttype = TOK_QMARK; + goto out; + + case ':': + ttype = TOK_COLON; + goto out; + + case ',': + ttype = TOK_COMMA; + goto out; + + case '(': + ttype = TOK_OPAREN; + goto out; + + case ')': + ttype = TOK_CPAREN; + goto out; + + case '{': + ttype = TOK_OBRACE; + goto out; + + case '}': + ttype = TOK_CBRACE; + goto out; + + case '[': + ttype = TOK_OSQUARE; + goto out; + + case ']': + ttype = TOK_CSQUARE; + goto out; + + case '~': + ttype = TOK_COM; + goto out; + + case ';': + ttype = TOK_EOS; + goto out; + + /* and now for the possible multi character tokens */ + case '#': + ttype = TOK_HASH; + c = preproc_lex_fetch_byte(pp); + if (c == '#') + ttype = TOK_DBLHASH; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '^': + ttype = TOK_XOR; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_XORASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '!': + ttype = TOK_BNOT; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_NE; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '*': + ttype = TOK_STAR; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_MULASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '/': + ttype = TOK_DIV; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_DIVASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '=': + ttype = TOK_ASS; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_EQ; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '%': + ttype = TOK_MOD; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_MODASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '-': + ttype = TOK_SUB; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_SUBASS; + else if (c == '-') + ttype = TOK_DBLSUB; + else if (c == '>') + ttype = TOK_ARROW; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '+': + ttype = TOK_ADD; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_ADDASS; + else if (c == '+') + ttype = TOK_DBLADD; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + + case '&': + ttype = TOK_BWAND; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_BWANDASS; + else if (c == '&') + ttype = TOK_BAND; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '|': + ttype = TOK_BWOR; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_BWORASS; + else if (c == '|') + ttype = TOK_BOR; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '<': + ttype = TOK_LT; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_LE; + else if (c == '<') + { + ttype = TOK_LSH; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_LSHASS; + else + preproc_lex_unfetch_byte(pp, c); + } + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + + case '>': + ttype = TOK_GT; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_GE; + else if (c == '>') + { + ttype = TOK_RSH; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_RSHASS; + else + preproc_lex_unfetch_byte(pp, c); + } + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '\'': + /* character constant - turns into a uint */ +chrlit: + cl = 0; + strbuf = lw_strbuf_new(); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL || c == '\'') + break; + cl++; + if (c == '\\') + { + lw_strbuf_add(strbuf, '\\'); + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL) + { + if (!pp -> lexstr) + preproc_throw_error(pp, "Invalid character constant"); + ttype = TOK_ERROR; + strval = lw_strbuf_end(strbuf); + goto out; + } + cl++; + lw_strbuf_add(strbuf, c); + continue; + } + lw_strbuf_add(strbuf, c); + } + strval = lw_strbuf_end(strbuf); + if (cl == 0) + { + ttype = TOK_ERROR; + if (!pp -> lexstr) + preproc_throw_error(pp, "Invalid character constant"); + } + else + ttype = TOK_CHR_LIT; + goto out; + + case '"': +strlit: + /* string literal */ + strbuf = lw_strbuf_new(); + lw_strbuf_add(strbuf, '"'); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL) + { + ttype = TOK_ERROR; + strval = lw_strbuf_end(strbuf); + if (!pp -> lexstr) + preproc_throw_error(pp, "Invalid string constant"); + goto out; + } + if (c == '"') + break; + if (c == '\\') + { + lw_strbuf_add(strbuf, '\\'); + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL) + { + ttype = TOK_ERROR; + if (!pp -> lexstr) + preproc_throw_error(pp, "Invalid string constant"); + strval = lw_strbuf_end(strbuf); + goto out; + } + cl++; + lw_strbuf_add(strbuf, c); + continue; + } + lw_strbuf_add(strbuf, c); + } + lw_strbuf_add(strbuf, '"'); + strval = lw_strbuf_end(strbuf); + ttype = TOK_STR_LIT; + goto out; + + case 'L': + /* check for wide string or wide char const */ + c2 = preproc_lex_fetch_byte(pp); + if (c2 == '\'') + { + goto chrlit; + } + else if (c2 == '"') + { + goto strlit; + } + preproc_lex_unfetch_byte(pp, c2); + /* fall through for identifier */ + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + /* we have an identifier here */ + strbuf = lw_strbuf_new(); + lw_strbuf_add(strbuf, c); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) + { + lw_strbuf_add(strbuf, c); + continue; + } + else + { + lw_strbuf_add(strbuf, 0); + strval = lw_strbuf_end(strbuf); + break; + } + } + preproc_lex_unfetch_byte(pp, c); + ttype = TOK_IDENT; + goto out; + + case '.': + c = preproc_lex_fetch_byte(pp); + if (c >= '0' && c <= '9') + { + strbuf = lw_strbuf_new(); + lw_strbuf_add(strbuf, '.'); + goto numlit; + } + else if (c == '.') + { + c = preproc_lex_fetch_byte(pp); + if (c == '.') + { + ttype = TOK_ELLIPSIS; + goto out; + } + preproc_lex_unfetch_byte(pp, c); + } + preproc_lex_unfetch_byte(pp, c); + ttype = TOK_DOT; + goto out; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + strbuf = lw_strbuf_new(); +numlit: + ttype = TOK_NUMBER; + lw_strbuf_add(strbuf, c); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) + break; + lw_strbuf_add(strbuf, c); + if (c == 'e' || c == 'E' || c == 'p' || c == 'P') + { + c = preproc_lex_fetch_byte(pp); + if (c == '+' || c == '-') + { + lw_strbuf_add(strbuf, c); + continue; + } + preproc_lex_unfetch_byte(pp, c); + } + } + strval = lw_strbuf_end(strbuf); + preproc_lex_unfetch_byte(pp, c); + goto out; + + default: + ttype = TOK_CHAR; + strval = lw_alloc(2); + strval[0] = c; + strval[1] = 0; + break; + } +out: + t = token_create(ttype, strval, sline, scol, pp -> fn); + lw_free(strval); + return t; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/parse.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/parse.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,243 @@ +/* +lwcc/parse.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "cpp.h" +#include "tree.h" +#include "parse.h" + +#include "parse_c.h" + + +void *Parse(void *parser, int tokid, struct tokendata *tdata, struct parserinfo *pi); +void *ParseAlloc(void *(*alloc)(size_t size)); +void ParseFree(void *parser, void (*free)(void *ptr)); + +void tokendata_free(struct tokendata *td) +{ + if (td) + { + if (td -> strval) + lw_free(td -> strval); + lw_free(td); + } +} + +extern char *ptoken_names[]; +char *tokendata_name(struct tokendata *td) +{ + if (td -> tokid < 0) + return "****UNKNOWN****"; + return ptoken_names[td -> tokid]; +} + +void tokendata_print(FILE *fp, struct tokendata *td) +{ + fprintf(fp, "TOKEN: %s", tokendata_name(td)); + if (td -> strval) + fprintf(fp, " \"%s\"", td -> strval); + fprintf(fp, "\n"); +} + +#define TOK_KW_IF -1 +#define TOK_KW_ELSE -2 +#define TOK_KW_WHILE -3 +#define TOK_KW_DO -4 +#define TOK_KW_FOR -5 +#define TOK_KW_VOID -6 +#define TOK_KW_INT -7 +#define TOK_KW_CHAR -8 +#define TOK_KW_SHORT -9 +#define TOK_KW_LONG -10 +#define TOK_KW_UNSIGNED -11 +#define TOK_KW_SIGNED -12 +#define TOK_KW_FLOAT -13 +#define TOK_KW_DOUBLE -14 +#define TOK_KW_STRUCT -15 +#define TOK_KW_UNION -16 +#define TOK_KW_TYPEDEF -17 +#define TOK_KW_STATIC -18 +#define TOK_KW_SWITCH -19 +#define TOK_KW_CASE -20 +#define TOK_KW_DEFAULT -21 +#define TOK_KW_BREAK -22 +#define TOK_KW_CONTINUE -23 +#define TOK_KW_CONST -24 +#define TOK_KW_AUTO -25 +#define TOK_KW_ENUM -26 +#define TOK_KW_REGISTER -27 +#define TOK_KW_SIZEOF -28 +#define TOK_KW_VOLATILE -29 +#define TOK_KW_RETURN -30 +#define TOK_KW_EXTERN -31 +#define TOK_KW_GOTO -32 +#define TOK_TYPENAME -100 + +static struct { int tok; char *word; } keyword_list[] = { + { TOK_KW_IF, "if" }, + { TOK_KW_ELSE, "else" }, + { TOK_KW_WHILE, "while" }, + { TOK_KW_DO, "do" }, + { TOK_KW_FOR, "for" }, + { TOK_KW_VOID, "void" }, + { TOK_KW_INT, "int" }, + { TOK_KW_CHAR, "char" }, + { TOK_KW_SHORT, "short" }, + { TOK_KW_LONG, "long" }, + { TOK_KW_UNSIGNED, "unsigned" }, + { TOK_KW_SIGNED, "signed" }, + { TOK_KW_FLOAT, "float" }, + { TOK_KW_DOUBLE, "double" }, + { TOK_KW_STRUCT, "struct" }, + { TOK_KW_UNION, "union" }, + { TOK_KW_TYPEDEF, "typedef" }, + { TOK_KW_STATIC, "static" }, + { TOK_KW_SWITCH, "switch" }, + { TOK_KW_CASE, "case" }, + { TOK_KW_DEFAULT, "default" }, + { TOK_KW_BREAK, "break" }, + { TOK_KW_CONTINUE, "continue" }, + { TOK_KW_CONST, "const" }, + { TOK_KW_AUTO, "auto" }, + { TOK_KW_ENUM, "enum" }, + { TOK_KW_REGISTER, "register" }, + { TOK_KW_SIZEOF, "sizeof" }, + { TOK_KW_VOLATILE, "volatile" }, + { TOK_KW_RETURN, "return" }, + { TOK_KW_EXTERN, "extern" }, + { TOK_KW_GOTO, "goto" }, + { 0, "" } +}; + +struct token *parse_next(struct preproc_info *pp) +{ + struct token *tok; + int i; + + for (;;) + { + tok = preproc_next(pp); + if (tok -> ttype == TOK_WSPACE) + continue; + if (tok -> ttype == TOK_EOL) + continue; + if (tok -> ttype == TOK_CHAR) + { + // random character + fprintf(stderr, "Random character %02x\n", tok -> strval[0]); + if (tok -> strval[0] < 32 || tok -> strval[0] > 126) + continue; + } + break; + } + if (tok -> ttype == TOK_IDENT) + { + /* convert identifier tokens to their respective meanings */ + for (i = 0; keyword_list[i].tok != TOK_NONE; i++) + { + if (strcmp(keyword_list[i].word, tok -> strval) == 0) + { + tok -> ttype = keyword_list[i].tok; + goto out; + } + } + /* check for a registered type here */ + } +out: + fprintf(stderr, "Lexed: "); + token_print(tok, stderr); + fprintf(stderr, " (%d)\n", tok -> ttype); + return tok; +} + +static struct { + int tokid; + int ttype; +} toktable[] = { + { PTOK_IDENTIFIER, TOK_IDENT }, + { PTOK_ENDS, TOK_EOS }, + { PTOK_KW_INT, TOK_KW_INT }, + { PTOK_KW_LONG, TOK_KW_LONG }, + { PTOK_KW_SHORT, TOK_KW_SHORT }, + { PTOK_KW_CHAR, TOK_KW_CHAR }, + { PTOK_KW_SIGNED, TOK_KW_SIGNED }, + { PTOK_KW_UNSIGNED, TOK_KW_UNSIGNED }, + { PTOK_STAR, TOK_STAR }, + { PTOK_KW_VOID, TOK_KW_VOID }, + { PTOK_KW_FLOAT, TOK_KW_FLOAT }, + { PTOK_KW_DOUBLE, TOK_KW_DOUBLE }, + { PTOK_OBRACE, TOK_OBRACE }, + { PTOK_CBRACE, TOK_CBRACE }, + { PTOK_OPAREN, TOK_OPAREN }, + { PTOK_CPAREN, TOK_CPAREN }, + { 0, 0 } +}; + +static int lookup_ptok(int ttype) +{ + int i; + for (i = 0; toktable[i].tokid != 0; i++) + if (toktable[i].ttype == ttype) + return toktable[i].tokid; + return -1; +} + +node_t *parse_program(struct preproc_info *pp) +{ + struct token *tok; + struct tokendata *td; + struct parserinfo pi = { NULL }; + void *parser; + + /* the cast below shuts up a warning */ + parser = ParseAlloc((void *)lw_alloc); + for (;;) + { + tok = parse_next(pp); + if (tok -> ttype == TOK_EOF) + break; + + td = lw_alloc(sizeof(struct tokendata)); + td -> strval = NULL; + td -> numval[0] = 0; + td -> numval[1] = 0; + td -> numval[2] = 0; + td -> numval[3] = 0; + td -> numval[4] = 0; + td -> numval[5] = 0; + td -> numval[6] = 0; + td -> numval[7] = 0; + td -> tokid = lookup_ptok(tok -> ttype); + if (tok -> strval) + td -> strval = lw_strdup(tok -> strval); + + tokendata_print(stderr, td); + + Parse(parser, td -> tokid, td, &pi); + } + Parse(parser, 0, NULL, &pi); + ParseFree(parser, lw_free); + return pi.parsetree; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/parse.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/parse.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,46 @@ +/* +lwcc/parse.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef parse_h_seen__ +#define parse_h_seen__ + +#include +#include "tree.h" + +struct tokendata +{ + int tokid; + unsigned char numval[8]; + char *strval; +}; + + +extern void tokendata_free(struct tokendata *td); + +struct parserinfo +{ + node_t *parsetree; +}; + +extern char *tokendata_name(struct tokendata *td); +extern void tokendata_print(FILE *fp, struct tokendata *td); + +#endif // parse_h_seen__ diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/parse_c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/parse_c.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,1199 @@ +/* Driver template for the LEMON parser generator. +** The author disclaims copyright to this source code. +*/ +/* First off, code is included that follows the "include" declaration +** in the input grammar file. */ +#include +#line 1 "lwcc/parse_c.y" + +#include // only needed due to a bug in lemon +#include +#include "parse.h" +#include "tree.h" +#line 14 "lwcc/parse_c.c" +/* Next is all token values, in a form suitable for use by makeheaders. +** This section will be null unless lemon is run with the -m switch. +*/ +/* +** These constants (all generated automatically by the parser generator) +** specify the various kinds of tokens (terminals) that the parser +** understands. +** +** Each symbol here is a terminal symbol in the grammar. +*/ +/* Make sure the INTERFACE macro is defined. +*/ +#ifndef INTERFACE +# define INTERFACE 1 +#endif +/* The next thing included is series of defines which control +** various aspects of the generated parser. +** YYCODETYPE is the data type used for storing terminal +** and nonterminal numbers. "unsigned char" is +** used if there are fewer than 250 terminals +** and nonterminals. "int" is used otherwise. +** YYNOCODE is a number of type YYCODETYPE which corresponds +** to no legal terminal or nonterminal number. This +** number is used to fill in empty slots of the hash +** table. +** YYFALLBACK If defined, this indicates that one or more tokens +** have fall-back values which should be used if the +** original value of the token will not parse. +** YYACTIONTYPE is the data type used for storing terminal +** and nonterminal numbers. "unsigned char" is +** used if there are fewer than 250 rules and +** states combined. "int" is used otherwise. +** ParseTOKENTYPE is the data type used for minor tokens given +** directly to the parser from the tokenizer. +** YYMINORTYPE is the data type used for all minor tokens. +** This is typically a union of many types, one of +** which is ParseTOKENTYPE. The entry in the union +** for base tokens is called "yy0". +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** ParseARG_SDECL A static variable declaration for the %extra_argument +** ParseARG_PDECL A parameter declaration for the %extra_argument +** ParseARG_STORE Code to store %extra_argument into yypParser +** ParseARG_FETCH Code to extract %extra_argument from yypParser +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +*/ +#define YYCODETYPE unsigned char +#define YYNOCODE 30 +#define YYACTIONTYPE unsigned char +#define ParseTOKENTYPE struct tokendata * +typedef union { + int yyinit; + ParseTOKENTYPE yy0; + node_t * yy18; +} YYMINORTYPE; +#ifndef YYSTACKDEPTH +#define YYSTACKDEPTH 100 +#endif +#define ParseARG_SDECL struct parserinfo *pinfo ; +#define ParseARG_PDECL , struct parserinfo *pinfo +#define ParseARG_FETCH struct parserinfo *pinfo = yypParser->pinfo +#define ParseARG_STORE yypParser->pinfo = pinfo +#define YYNSTATE 36 +#define YYNRULE 30 +#define YY_NO_ACTION (YYNSTATE+YYNRULE+2) +#define YY_ACCEPT_ACTION (YYNSTATE+YYNRULE+1) +#define YY_ERROR_ACTION (YYNSTATE+YYNRULE) + +/* The yyzerominor constant is used to initialize instances of +** YYMINORTYPE objects to zero. */ +static const YYMINORTYPE yyzerominor = { 0 }; + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +#ifndef yytestcase +# define yytestcase(X) +#endif + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N < YYNSTATE Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE. +** +** N == YYNSTATE+YYNRULE A syntax error has occurred. +** +** N == YYNSTATE+YYNRULE+1 The parser accepts its input. +** +** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as +** +** yy_action[ yy_shift_ofst[S] + X ] +** +** If the index value yy_shift_ofst[S]+X is out of range or if the value +** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X or if yy_shift_ofst[S] +** is equal to YY_SHIFT_USE_DFLT, it means that the action is not in the table +** and that yy_default[S] should be used instead. +** +** The formula above is for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of +** YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +*/ +#define YY_ACTTAB_COUNT (44) +static const YYACTIONTYPE yy_action[] = { + /* 0 */ 36, 27, 26, 29, 24, 23, 22, 7, 3, 2, + /* 10 */ 16, 9, 14, 35, 34, 33, 6, 8, 25, 18, + /* 20 */ 16, 9, 14, 21, 10, 10, 32, 20, 20, 30, + /* 30 */ 67, 1, 19, 28, 15, 5, 4, 68, 11, 68, + /* 40 */ 17, 31, 13, 12, +}; +static const YYCODETYPE yy_lookahead[] = { + /* 0 */ 0, 2, 3, 16, 4, 5, 6, 7, 8, 9, + /* 10 */ 10, 11, 12, 20, 21, 22, 23, 7, 25, 26, + /* 20 */ 10, 11, 12, 6, 7, 7, 1, 10, 10, 1, + /* 30 */ 18, 19, 10, 14, 10, 24, 27, 29, 13, 29, + /* 40 */ 26, 28, 26, 15, +}; +#define YY_SHIFT_USE_DFLT (-14) +#define YY_SHIFT_COUNT (12) +#define YY_SHIFT_MIN (-13) +#define YY_SHIFT_MAX (28) +static const signed char yy_shift_ofst[] = { + /* 0 */ -14, 0, 10, 10, 28, 25, -1, 17, 18, 24, + /* 10 */ 22, 19, -13, +}; +#define YY_REDUCE_USE_DFLT (-8) +#define YY_REDUCE_COUNT (6) +#define YY_REDUCE_MIN (-7) +#define YY_REDUCE_MAX (16) +static const signed char yy_reduce_ofst[] = { + /* 0 */ 12, -7, 16, 14, 13, 9, 11, +}; +static const YYACTIONTYPE yy_default[] = { + /* 0 */ 38, 66, 51, 50, 66, 66, 66, 55, 55, 58, + /* 10 */ 57, 66, 66, 52, 61, 60, 54, 53, 49, 59, + /* 20 */ 56, 48, 47, 46, 45, 43, 44, 42, 64, 65, + /* 30 */ 63, 62, 41, 40, 39, 37, +}; + +/* The next table maps tokens into fallback tokens. If a construct +** like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +*/ +#ifdef YYFALLBACK +static const YYCODETYPE yyFallback[] = { +}; +#endif /* YYFALLBACK */ + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +*/ +struct yyStackEntry { + YYACTIONTYPE stateno; /* The state-number */ + YYCODETYPE major; /* The major token value. This is the code + ** number for the token at this stack level */ + YYMINORTYPE minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; +typedef struct yyStackEntry yyStackEntry; + +/* The state of the parser is completely contained in an instance of +** the following structure */ +struct yyParser { + int yyidx; /* Index of top element in stack */ +#ifdef YYTRACKMAXSTACKDEPTH + int yyidxMax; /* Maximum value of yyidx */ +#endif + int yyerrcnt; /* Shifts left before out of the error */ + ParseARG_SDECL /* A place to hold %extra_argument */ +#if YYSTACKDEPTH<=0 + int yystksz; /* Current side of the stack */ + yyStackEntry *yystack; /* The parser's stack */ +#else + yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ +#endif +}; +typedef struct yyParser yyParser; + +#ifndef NDEBUG +#include +static FILE *yyTraceFILE = 0; +static char *yyTracePrompt = 0; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
    +**
  • A FILE* to which trace output should be written. +** If NULL, then tracing is turned off. +**
  • A prefix string written at the beginning of every +** line of trace output. If NULL, then tracing is +** turned off. +**
+** +** Outputs: +** None. +*/ +void ParseTrace(FILE *TraceFILE, char *zTracePrompt){ + yyTraceFILE = TraceFILE; + yyTracePrompt = zTracePrompt; + if( yyTraceFILE==0 ) yyTracePrompt = 0; + else if( yyTracePrompt==0 ) yyTraceFILE = 0; +} +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +static const char *const yyTokenName[] = { + "$", "ENDS", "IDENTIFIER", "STAR", + "KW_VOID", "KW_FLOAT", "KW_DOUBLE", "KW_LONG", + "KW_UNSIGNED", "KW_SIGNED", "KW_INT", "KW_SHORT", + "KW_CHAR", "OPAREN", "CPAREN", "OBRACE", + "CBRACE", "error", "program", "rprogram", + "globaldecl", "vardecl", "fundecl", "datatype", + "ident", "typename", "baseint", "arglist", + "statementblock", +}; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* For tracing reduce actions, the names of all rules are required. +*/ +static const char *const yyRuleName[] = { + /* 0 */ "program ::= rprogram", + /* 1 */ "rprogram ::= rprogram globaldecl", + /* 2 */ "rprogram ::=", + /* 3 */ "globaldecl ::= vardecl", + /* 4 */ "globaldecl ::= fundecl", + /* 5 */ "vardecl ::= datatype ident ENDS", + /* 6 */ "ident ::= IDENTIFIER", + /* 7 */ "datatype ::= typename", + /* 8 */ "datatype ::= datatype STAR", + /* 9 */ "typename ::= KW_VOID", + /* 10 */ "typename ::= KW_FLOAT", + /* 11 */ "typename ::= KW_DOUBLE", + /* 12 */ "typename ::= KW_LONG KW_DOUBLE", + /* 13 */ "typename ::= baseint", + /* 14 */ "typename ::= KW_UNSIGNED", + /* 15 */ "typename ::= KW_SIGNED", + /* 16 */ "typename ::= KW_SIGNED baseint", + /* 17 */ "typename ::= KW_UNSIGNED baseint", + /* 18 */ "baseint ::= KW_INT", + /* 19 */ "baseint ::= KW_LONG", + /* 20 */ "baseint ::= KW_LONG KW_INT", + /* 21 */ "baseint ::= KW_LONG KW_LONG", + /* 22 */ "baseint ::= KW_SHORT", + /* 23 */ "baseint ::= KW_LONG KW_LONG KW_INT", + /* 24 */ "baseint ::= KW_SHORT KW_INT", + /* 25 */ "baseint ::= KW_CHAR", + /* 26 */ "fundecl ::= datatype ident arglist statementblock", + /* 27 */ "fundecl ::= datatype ident arglist ENDS", + /* 28 */ "arglist ::= OPAREN CPAREN", + /* 29 */ "statementblock ::= OBRACE CBRACE", +}; +#endif /* NDEBUG */ + + +#if YYSTACKDEPTH<=0 +/* +** Try to increase the size of the parser stack. +*/ +static void yyGrowStack(yyParser *p){ + int newSize; + yyStackEntry *pNew; + + newSize = p->yystksz*2 + 100; + pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); + if( pNew ){ + p->yystack = pNew; + p->yystksz = newSize; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack grows to %d entries!\n", + yyTracePrompt, p->yystksz); + } +#endif + } +} +#endif + +/* +** This function allocates a new parser. +** The only argument is a pointer to a function which works like +** malloc. +** +** Inputs: +** A pointer to the function used to allocate memory. +** +** Outputs: +** A pointer to a parser. This pointer is used in subsequent calls +** to Parse and ParseFree. +*/ +void *ParseAlloc(void *(*mallocProc)(size_t)){ + yyParser *pParser; + pParser = (yyParser*)(*mallocProc)( (size_t)sizeof(yyParser) ); + if( pParser ){ + pParser->yyidx = -1; +#ifdef YYTRACKMAXSTACKDEPTH + pParser->yyidxMax = 0; +#endif +#if YYSTACKDEPTH<=0 + pParser->yystack = NULL; + pParser->yystksz = 0; + yyGrowStack(pParser); +#endif + } + return pParser; +} + +/* The following function deletes the value associated with a +** symbol. The symbol can be either a terminal or nonterminal. +** "yymajor" is the symbol code, and "yypminor" is a pointer to +** the value. +*/ +static void yy_destructor( + yyParser *yypParser, /* The parser */ + YYCODETYPE yymajor, /* Type code for object to destroy */ + YYMINORTYPE *yypminor /* The object to be destroyed */ +){ + ParseARG_FETCH; + switch( yymajor ){ + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are not used + ** inside the C code. + */ + /* TERMINAL Destructor */ + case 1: /* ENDS */ + case 2: /* IDENTIFIER */ + case 3: /* STAR */ + case 4: /* KW_VOID */ + case 5: /* KW_FLOAT */ + case 6: /* KW_DOUBLE */ + case 7: /* KW_LONG */ + case 8: /* KW_UNSIGNED */ + case 9: /* KW_SIGNED */ + case 10: /* KW_INT */ + case 11: /* KW_SHORT */ + case 12: /* KW_CHAR */ + case 13: /* OPAREN */ + case 14: /* CPAREN */ + case 15: /* OBRACE */ + case 16: /* CBRACE */ +{ +#line 10 "lwcc/parse_c.y" + tokendata_free((yypminor->yy0)); +#line 420 "lwcc/parse_c.c" +} + break; + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +** +** Return the major token number for the symbol popped. +*/ +static int yy_pop_parser_stack(yyParser *pParser){ + YYCODETYPE yymajor; + yyStackEntry *yytos = &pParser->yystack[pParser->yyidx]; + + if( pParser->yyidx<0 ) return 0; +#ifndef NDEBUG + if( yyTraceFILE && pParser->yyidx>=0 ){ + fprintf(yyTraceFILE,"%sPopping %s\n", + yyTracePrompt, + yyTokenName[yytos->major]); + } +#endif + yymajor = yytos->major; + yy_destructor(pParser, yymajor, &yytos->minor); + pParser->yyidx--; + return yymajor; +} + +/* +** Deallocate and destroy a parser. Destructors are all called for +** all stack elements before shutting the parser down. +** +** Inputs: +**
    +**
  • A pointer to the parser. This should be a pointer +** obtained from ParseAlloc. +**
  • A pointer to a function used to reclaim memory obtained +** from malloc. +**
+*/ +void ParseFree( + void *p, /* The parser to be deleted */ + void (*freeProc)(void*) /* Function used to reclaim memory */ +){ + yyParser *pParser = (yyParser*)p; + if( pParser==0 ) return; + while( pParser->yyidx>=0 ) yy_pop_parser_stack(pParser); +#if YYSTACKDEPTH<=0 + free(pParser->yystack); +#endif + (*freeProc)((void*)pParser); +} + +/* +** Return the peak depth of the stack for a parser. +*/ +#ifdef YYTRACKMAXSTACKDEPTH +int ParseStackPeak(void *p){ + yyParser *pParser = (yyParser*)p; + return pParser->yyidxMax; +} +#endif + +/* +** Find the appropriate action for a parser given the terminal +** look-ahead token iLookAhead. +** +** If the look-ahead token is YYNOCODE, then check to see if the action is +** independent of the look-ahead. If it is, return the action, otherwise +** return YY_NO_ACTION. +*/ +static int yy_find_shift_action( + yyParser *pParser, /* The parser */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; + int stateno = pParser->yystack[pParser->yyidx].stateno; + + if( stateno>YY_SHIFT_COUNT + || (i = yy_shift_ofst[stateno])==YY_SHIFT_USE_DFLT ){ + return yy_default[stateno]; + } + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + if( iLookAhead>0 ){ +#ifdef YYFALLBACK + YYCODETYPE iFallback; /* Fallback token */ + if( iLookAhead %s\n", + yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]); + } +#endif + return yy_find_shift_action(pParser, iFallback); + } +#endif +#ifdef YYWILDCARD + { + int j = i - iLookAhead + YYWILDCARD; + if( +#if YY_SHIFT_MIN+YYWILDCARD<0 + j>=0 && +#endif +#if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT + j %s\n", + yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[YYWILDCARD]); + } +#endif /* NDEBUG */ + return yy_action[j]; + } + } +#endif /* YYWILDCARD */ + } + return yy_default[stateno]; + }else{ + return yy_action[i]; + } +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +** +** If the look-ahead token is YYNOCODE, then check to see if the action is +** independent of the look-ahead. If it is, return the action, otherwise +** return YY_NO_ACTION. +*/ +static int yy_find_reduce_action( + int stateno, /* Current state number */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; +#ifdef YYERRORSYMBOL + if( stateno>YY_REDUCE_COUNT ){ + return yy_default[stateno]; + } +#else + assert( stateno<=YY_REDUCE_COUNT ); +#endif + i = yy_reduce_ofst[stateno]; + assert( i!=YY_REDUCE_USE_DFLT ); + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; +#ifdef YYERRORSYMBOL + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + return yy_default[stateno]; + } +#else + assert( i>=0 && iyyidx--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt); + } +#endif + while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +#line 90 "lwcc/parse_c.y" + + fprintf(stderr, "Parser stack overflow\n"); +#line 605 "lwcc/parse_c.c" + ParseARG_STORE; /* Suppress warning about unused %extra_argument var */ +} + +/* +** Perform a shift action. +*/ +static void yy_shift( + yyParser *yypParser, /* The parser to be shifted */ + int yyNewState, /* The new state to shift in */ + int yyMajor, /* The major token to shift in */ + YYMINORTYPE *yypMinor /* Pointer to the minor token to shift in */ +){ + yyStackEntry *yytos; + yypParser->yyidx++; +#ifdef YYTRACKMAXSTACKDEPTH + if( yypParser->yyidx>yypParser->yyidxMax ){ + yypParser->yyidxMax = yypParser->yyidx; + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yyidx>=YYSTACKDEPTH ){ + yyStackOverflow(yypParser, yypMinor); + return; + } +#else + if( yypParser->yyidx>=yypParser->yystksz ){ + yyGrowStack(yypParser); + if( yypParser->yyidx>=yypParser->yystksz ){ + yyStackOverflow(yypParser, yypMinor); + return; + } + } +#endif + yytos = &yypParser->yystack[yypParser->yyidx]; + yytos->stateno = (YYACTIONTYPE)yyNewState; + yytos->major = (YYCODETYPE)yyMajor; + yytos->minor = *yypMinor; +#ifndef NDEBUG + if( yyTraceFILE && yypParser->yyidx>0 ){ + int i; + fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyNewState); + fprintf(yyTraceFILE,"%sStack:",yyTracePrompt); + for(i=1; i<=yypParser->yyidx; i++) + fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]); + fprintf(yyTraceFILE,"\n"); + } +#endif +} + +/* The following table contains information about every rule that +** is used during the reduce. +*/ +static const struct { + YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */ + unsigned char nrhs; /* Number of right-hand side symbols in the rule */ +} yyRuleInfo[] = { + { 18, 1 }, + { 19, 2 }, + { 19, 0 }, + { 20, 1 }, + { 20, 1 }, + { 21, 3 }, + { 24, 1 }, + { 23, 1 }, + { 23, 2 }, + { 25, 1 }, + { 25, 1 }, + { 25, 1 }, + { 25, 2 }, + { 25, 1 }, + { 25, 1 }, + { 25, 1 }, + { 25, 2 }, + { 25, 2 }, + { 26, 1 }, + { 26, 1 }, + { 26, 2 }, + { 26, 2 }, + { 26, 1 }, + { 26, 3 }, + { 26, 2 }, + { 26, 1 }, + { 22, 4 }, + { 22, 4 }, + { 27, 2 }, + { 28, 2 }, +}; + +static void yy_accept(yyParser*); /* Forward Declaration */ + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +*/ +static void yy_reduce( + yyParser *yypParser, /* The parser */ + int yyruleno /* Number of the rule by which to reduce */ +){ + int yygoto; /* The next state */ + int yyact; /* The next action */ + YYMINORTYPE yygotominor; /* The LHS of the rule reduced */ + yyStackEntry *yymsp; /* The top of the parser's stack */ + int yysize; /* Amount to pop the stack */ + ParseARG_FETCH; + yymsp = &yypParser->yystack[yypParser->yyidx]; +#ifndef NDEBUG + if( yyTraceFILE && yyruleno>=0 + && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ + fprintf(yyTraceFILE, "%sReduce [%s].\n", yyTracePrompt, + yyRuleName[yyruleno]); + } +#endif /* NDEBUG */ + + /* Silence complaints from purify about yygotominor being uninitialized + ** in some cases when it is copied into the stack after the following + ** switch. yygotominor is uninitialized when a rule reduces that does + ** not set the value of its left-hand side nonterminal. Leaving the + ** value of the nonterminal uninitialized is utterly harmless as long + ** as the value is never used. So really the only thing this code + ** accomplishes is to quieten purify. + ** + ** 2007-01-16: The wireshark project (www.wireshark.org) reports that + ** without this code, their parser segfaults. I'm not sure what there + ** parser is doing to make this happen. This is the second bug report + ** from wireshark this week. Clearly they are stressing Lemon in ways + ** that it has not been previously stressed... (SQLite ticket #2172) + */ + /*memset(&yygotominor, 0, sizeof(yygotominor));*/ + yygotominor = yyzerominor; + + + switch( yyruleno ){ + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ + case 0: /* program ::= rprogram */ +#line 14 "lwcc/parse_c.y" +{ yygotominor.yy18 = yymsp[0].minor.yy18; pinfo -> parsetree = yygotominor.yy18; } +#line 749 "lwcc/parse_c.c" + break; + case 1: /* rprogram ::= rprogram globaldecl */ +#line 16 "lwcc/parse_c.y" +{ + yygotominor.yy18 = yymsp[-1].minor.yy18; + node_addchild(yygotominor.yy18, yymsp[0].minor.yy18); +} +#line 757 "lwcc/parse_c.c" + break; + case 2: /* rprogram ::= */ +#line 20 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_PROGRAM); } +#line 762 "lwcc/parse_c.c" + break; + case 3: /* globaldecl ::= vardecl */ + case 4: /* globaldecl ::= fundecl */ yytestcase(yyruleno==4); + case 7: /* datatype ::= typename */ yytestcase(yyruleno==7); + case 13: /* typename ::= baseint */ yytestcase(yyruleno==13); +#line 22 "lwcc/parse_c.y" +{ yygotominor.yy18 = yymsp[0].minor.yy18; } +#line 770 "lwcc/parse_c.c" + break; + case 5: /* vardecl ::= datatype ident ENDS */ +#line 25 "lwcc/parse_c.y" +{ + yygotominor.yy18 = node_create(NODE_DECL, yymsp[-2].minor.yy18, yymsp[-1].minor.yy18); + yy_destructor(yypParser,1,&yymsp[0].minor); +} +#line 778 "lwcc/parse_c.c" + break; + case 6: /* ident ::= IDENTIFIER */ +#line 29 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_IDENT, yymsp[0].minor.yy0 -> strval); } +#line 783 "lwcc/parse_c.c" + break; + case 8: /* datatype ::= datatype STAR */ +#line 32 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_PTR, yymsp[-1].minor.yy18); yy_destructor(yypParser,3,&yymsp[0].minor); +} +#line 789 "lwcc/parse_c.c" + break; + case 9: /* typename ::= KW_VOID */ +#line 34 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_VOID); yy_destructor(yypParser,4,&yymsp[0].minor); +} +#line 795 "lwcc/parse_c.c" + break; + case 10: /* typename ::= KW_FLOAT */ +#line 35 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_FLOAT); yy_destructor(yypParser,5,&yymsp[0].minor); +} +#line 801 "lwcc/parse_c.c" + break; + case 11: /* typename ::= KW_DOUBLE */ +#line 36 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_DOUBLE); yy_destructor(yypParser,6,&yymsp[0].minor); +} +#line 807 "lwcc/parse_c.c" + break; + case 12: /* typename ::= KW_LONG KW_DOUBLE */ +#line 37 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_LDOUBLE); yy_destructor(yypParser,7,&yymsp[-1].minor); + yy_destructor(yypParser,6,&yymsp[0].minor); +} +#line 814 "lwcc/parse_c.c" + break; + case 14: /* typename ::= KW_UNSIGNED */ +#line 39 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_UINT); yy_destructor(yypParser,8,&yymsp[0].minor); +} +#line 820 "lwcc/parse_c.c" + break; + case 15: /* typename ::= KW_SIGNED */ +#line 40 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_INT); yy_destructor(yypParser,9,&yymsp[0].minor); +} +#line 826 "lwcc/parse_c.c" + break; + case 16: /* typename ::= KW_SIGNED baseint */ +#line 41 "lwcc/parse_c.y" +{ yygotominor.yy18 = yymsp[0].minor.yy18; if (yygotominor.yy18 -> type == NODE_TYPE_CHAR) yygotominor.yy18 -> type = NODE_TYPE_SCHAR; yy_destructor(yypParser,9,&yymsp[-1].minor); +} +#line 832 "lwcc/parse_c.c" + break; + case 17: /* typename ::= KW_UNSIGNED baseint */ +#line 42 "lwcc/parse_c.y" +{ + yygotominor.yy18 = yymsp[0].minor.yy18; + switch (yygotominor.yy18 -> type) + { + case NODE_TYPE_CHAR: + yygotominor.yy18 -> type = NODE_TYPE_UCHAR; + break; + case NODE_TYPE_SHORT: + yygotominor.yy18 -> type = NODE_TYPE_USHORT; + break; + case NODE_TYPE_INT: + yygotominor.yy18 -> type = NODE_TYPE_UINT; + break; + case NODE_TYPE_LONG: + yygotominor.yy18 -> type = NODE_TYPE_ULONG; + break; + case NODE_TYPE_LONGLONG: + yygotominor.yy18 -> type = NODE_TYPE_ULONGLONG; + break; + } + yy_destructor(yypParser,8,&yymsp[-1].minor); +} +#line 858 "lwcc/parse_c.c" + break; + case 18: /* baseint ::= KW_INT */ +#line 64 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_INT); yy_destructor(yypParser,10,&yymsp[0].minor); +} +#line 864 "lwcc/parse_c.c" + break; + case 19: /* baseint ::= KW_LONG */ +#line 65 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_LONG); yy_destructor(yypParser,7,&yymsp[0].minor); +} +#line 870 "lwcc/parse_c.c" + break; + case 20: /* baseint ::= KW_LONG KW_INT */ +#line 66 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_LONG); yy_destructor(yypParser,7,&yymsp[-1].minor); + yy_destructor(yypParser,10,&yymsp[0].minor); +} +#line 877 "lwcc/parse_c.c" + break; + case 21: /* baseint ::= KW_LONG KW_LONG */ +#line 67 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_LONGLONG); yy_destructor(yypParser,7,&yymsp[-1].minor); + yy_destructor(yypParser,7,&yymsp[0].minor); +} +#line 884 "lwcc/parse_c.c" + break; + case 22: /* baseint ::= KW_SHORT */ +#line 68 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_SHORT); yy_destructor(yypParser,11,&yymsp[0].minor); +} +#line 890 "lwcc/parse_c.c" + break; + case 23: /* baseint ::= KW_LONG KW_LONG KW_INT */ +#line 69 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_LONGLONG); yy_destructor(yypParser,7,&yymsp[-2].minor); + yy_destructor(yypParser,7,&yymsp[-1].minor); + yy_destructor(yypParser,10,&yymsp[0].minor); +} +#line 898 "lwcc/parse_c.c" + break; + case 24: /* baseint ::= KW_SHORT KW_INT */ +#line 70 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_SHORT); yy_destructor(yypParser,11,&yymsp[-1].minor); + yy_destructor(yypParser,10,&yymsp[0].minor); +} +#line 905 "lwcc/parse_c.c" + break; + case 25: /* baseint ::= KW_CHAR */ +#line 71 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_TYPE_CHAR); yy_destructor(yypParser,12,&yymsp[0].minor); +} +#line 911 "lwcc/parse_c.c" + break; + case 26: /* fundecl ::= datatype ident arglist statementblock */ +#line 74 "lwcc/parse_c.y" +{ + yygotominor.yy18 = node_create(NODE_FUNDEF, yymsp[-3].minor.yy18, yymsp[-2].minor.yy18, yymsp[-1].minor.yy18, yymsp[0].minor.yy18); +} +#line 918 "lwcc/parse_c.c" + break; + case 27: /* fundecl ::= datatype ident arglist ENDS */ +#line 78 "lwcc/parse_c.y" +{ + yygotominor.yy18 = node_create(NODE_FUNDECL, yymsp[-3].minor.yy18, yymsp[-2].minor.yy18, yymsp[-1].minor.yy18); + yy_destructor(yypParser,1,&yymsp[0].minor); +} +#line 926 "lwcc/parse_c.c" + break; + case 28: /* arglist ::= OPAREN CPAREN */ +#line 82 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_FUNARGS); yy_destructor(yypParser,13,&yymsp[-1].minor); + yy_destructor(yypParser,14,&yymsp[0].minor); +} +#line 933 "lwcc/parse_c.c" + break; + case 29: /* statementblock ::= OBRACE CBRACE */ +#line 84 "lwcc/parse_c.y" +{ yygotominor.yy18 = node_create(NODE_BLOCK); yy_destructor(yypParser,15,&yymsp[-1].minor); + yy_destructor(yypParser,16,&yymsp[0].minor); +} +#line 940 "lwcc/parse_c.c" + break; + default: + break; + }; + yygoto = yyRuleInfo[yyruleno].lhs; + yysize = yyRuleInfo[yyruleno].nrhs; + yypParser->yyidx -= yysize; + yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto); + if( yyact < YYNSTATE ){ +#ifdef NDEBUG + /* If we are not debugging and the reduce action popped at least + ** one element off the stack, then we can push the new element back + ** onto the stack here, and skip the stack overflow test in yy_shift(). + ** That gives a significant speed improvement. */ + if( yysize ){ + yypParser->yyidx++; + yymsp -= yysize-1; + yymsp->stateno = (YYACTIONTYPE)yyact; + yymsp->major = (YYCODETYPE)yygoto; + yymsp->minor = yygotominor; + }else +#endif + { + yy_shift(yypParser,yyact,yygoto,&yygotominor); + } + }else{ + assert( yyact == YYNSTATE + YYNRULE + 1 ); + yy_accept(yypParser); + } +} + +/* +** The following code executes when the parse fails +*/ +#ifndef YYNOERRORRECOVERY +static void yy_parse_failed( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + } +#endif + while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser fails */ +#line 86 "lwcc/parse_c.y" + + fprintf(stderr, "Parse error\n"); +#line 991 "lwcc/parse_c.c" + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} +#endif /* YYNOERRORRECOVERY */ + +/* +** The following code executes when a syntax error first occurs. +*/ +static void yy_syntax_error( + yyParser *yypParser, /* The parser */ + int yymajor, /* The major type of the error token */ + YYMINORTYPE yyminor /* The minor type of the error token */ +){ + ParseARG_FETCH; +#define TOKEN (yyminor.yy0) +#line 94 "lwcc/parse_c.y" + + fprintf(stderr, "Undexpected token %d: ", TOKEN -> tokid); + tokendata_print(stderr, TOKEN); +#line 1010 "lwcc/parse_c.c" + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* +** The following is executed when the parser accepts +*/ +static void yy_accept( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); + } +#endif + while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "ParseAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
    +**
  • A pointer to the parser (an opaque structure.) +**
  • The major token number. +**
  • The minor token number. +**
  • An option argument of a grammar-specified type. +**
+** +** Outputs: +** None. +*/ +void Parse( + void *yyp, /* The parser */ + int yymajor, /* The major token code number */ + ParseTOKENTYPE yyminor /* The value for the token */ + ParseARG_PDECL /* Optional %extra_argument parameter */ +){ + YYMINORTYPE yyminorunion; + int yyact; /* The parser action. */ + int yyendofinput; /* True if we are at the end of input */ +#ifdef YYERRORSYMBOL + int yyerrorhit = 0; /* True if yymajor has invoked an error */ +#endif + yyParser *yypParser; /* The parser */ + + /* (re)initialize the parser, if necessary */ + yypParser = (yyParser*)yyp; + if( yypParser->yyidx<0 ){ +#if YYSTACKDEPTH<=0 + if( yypParser->yystksz <=0 ){ + /*memset(&yyminorunion, 0, sizeof(yyminorunion));*/ + yyminorunion = yyzerominor; + yyStackOverflow(yypParser, &yyminorunion); + return; + } +#endif + yypParser->yyidx = 0; + yypParser->yyerrcnt = -1; + yypParser->yystack[0].stateno = 0; + yypParser->yystack[0].major = 0; + } + yyminorunion.yy0 = yyminor; + yyendofinput = (yymajor==0); + ParseARG_STORE; + +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sInput %s\n",yyTracePrompt,yyTokenName[yymajor]); + } +#endif + + do{ + yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); + if( yyactyyerrcnt--; + yymajor = YYNOCODE; + }else if( yyact < YYNSTATE + YYNRULE ){ + yy_reduce(yypParser,yyact-YYNSTATE); + }else{ + assert( yyact == YY_ERROR_ACTION ); +#ifdef YYERRORSYMBOL + int yymx; +#endif +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); + } +#endif +#ifdef YYERRORSYMBOL + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if( yypParser->yyerrcnt<0 ){ + yy_syntax_error(yypParser,yymajor,yyminorunion); + } + yymx = yypParser->yystack[yypParser->yyidx].major; + if( yymx==YYERRORSYMBOL || yyerrorhit ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sDiscard input token %s\n", + yyTracePrompt,yyTokenName[yymajor]); + } +#endif + yy_destructor(yypParser, (YYCODETYPE)yymajor,&yyminorunion); + yymajor = YYNOCODE; + }else{ + while( + yypParser->yyidx >= 0 && + yymx != YYERRORSYMBOL && + (yyact = yy_find_reduce_action( + yypParser->yystack[yypParser->yyidx].stateno, + YYERRORSYMBOL)) >= YYNSTATE + ){ + yy_pop_parser_stack(yypParser); + } + if( yypParser->yyidx < 0 || yymajor==0 ){ + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yy_parse_failed(yypParser); + yymajor = YYNOCODE; + }else if( yymx!=YYERRORSYMBOL ){ + YYMINORTYPE u2; + u2.YYERRSYMDT = 0; + yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2); + } + } + yypParser->yyerrcnt = 3; + yyerrorhit = 1; +#elif defined(YYNOERRORRECOVERY) + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + yy_syntax_error(yypParser,yymajor,yyminorunion); + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yymajor = YYNOCODE; + +#else /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if( yypParser->yyerrcnt<=0 ){ + yy_syntax_error(yypParser,yymajor,yyminorunion); + } + yypParser->yyerrcnt = 3; + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + if( yyendofinput ){ + yy_parse_failed(yypParser); + } + yymajor = YYNOCODE; +#endif + } + }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 ); + return; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/parse_c.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/parse_c.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,16 @@ +#define PTOK_ENDS 1 +#define PTOK_IDENTIFIER 2 +#define PTOK_STAR 3 +#define PTOK_KW_VOID 4 +#define PTOK_KW_FLOAT 5 +#define PTOK_KW_DOUBLE 6 +#define PTOK_KW_LONG 7 +#define PTOK_KW_UNSIGNED 8 +#define PTOK_KW_SIGNED 9 +#define PTOK_KW_INT 10 +#define PTOK_KW_SHORT 11 +#define PTOK_KW_CHAR 12 +#define PTOK_OPAREN 13 +#define PTOK_CPAREN 14 +#define PTOK_OBRACE 15 +#define PTOK_CBRACE 16 diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/parse_c.y --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/parse_c.y Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,97 @@ +%include { +#include // only needed due to a bug in lemon +#include +#include "parse.h" +#include "tree.h" +} + +%token_type { struct tokendata * } +%token_prefix PTOK_ +%token_destructor { tokendata_free($$); } +%default_type { node_t * } +%extra_argument { struct parserinfo *pinfo } + +program(A) ::= rprogram(B). { A = B; pinfo -> parsetree = A; } + +rprogram(A) ::= rprogram(C) globaldecl(B). { + A = C; + node_addchild(A, B); +} +rprogram(A) ::= . { A = node_create(NODE_PROGRAM); } + +globaldecl(A) ::= vardecl(B). { A = B; } +globaldecl(A) ::= fundecl(B). { A = B; } + +vardecl(A) ::= datatype(B) ident(C) ENDS. { + A = node_create(NODE_DECL, B, C); +} + +ident(A) ::= IDENTIFIER(B). { A = node_create(NODE_IDENT, B -> strval); } + +datatype(A) ::= typename(B). { A = B; } +datatype(A) ::= datatype(B) STAR. { A = node_create(NODE_TYPE_PTR, B); } + +typename(A) ::= KW_VOID. { A = node_create(NODE_TYPE_VOID); } +typename(A) ::= KW_FLOAT. { A = node_create(NODE_TYPE_FLOAT); } +typename(A) ::= KW_DOUBLE. { A = node_create(NODE_TYPE_DOUBLE); } +typename(A) ::= KW_LONG KW_DOUBLE. { A = node_create(NODE_TYPE_LDOUBLE); } +typename(A) ::= baseint(B). { A = B; } +typename(A) ::= KW_UNSIGNED. { A = node_create(NODE_TYPE_UINT); } +typename(A) ::= KW_SIGNED. { A = node_create(NODE_TYPE_INT); } +typename(A) ::= KW_SIGNED baseint(B). { A = B; if (A -> type == NODE_TYPE_CHAR) A -> type = NODE_TYPE_SCHAR; } +typename(A) ::= KW_UNSIGNED baseint(B). { + A = B; + switch (A -> type) + { + case NODE_TYPE_CHAR: + A -> type = NODE_TYPE_UCHAR; + break; + case NODE_TYPE_SHORT: + A -> type = NODE_TYPE_USHORT; + break; + case NODE_TYPE_INT: + A -> type = NODE_TYPE_UINT; + break; + case NODE_TYPE_LONG: + A -> type = NODE_TYPE_ULONG; + break; + case NODE_TYPE_LONGLONG: + A -> type = NODE_TYPE_ULONGLONG; + break; + } +} + +baseint(A) ::= KW_INT. { A = node_create(NODE_TYPE_INT); } +baseint(A) ::= KW_LONG. { A = node_create(NODE_TYPE_LONG); } +baseint(A) ::= KW_LONG KW_INT. { A = node_create(NODE_TYPE_LONG); } +baseint(A) ::= KW_LONG KW_LONG. { A = node_create(NODE_TYPE_LONGLONG); } +baseint(A) ::= KW_SHORT. { A = node_create(NODE_TYPE_SHORT); } +baseint(A) ::= KW_LONG KW_LONG KW_INT. { A = node_create(NODE_TYPE_LONGLONG); } +baseint(A) ::= KW_SHORT KW_INT. { A = node_create(NODE_TYPE_SHORT); } +baseint(A) ::= KW_CHAR. { A = node_create(NODE_TYPE_CHAR); } + + +fundecl(A) ::= datatype(B) ident(C) arglist(D) statementblock(E). { + A = node_create(NODE_FUNDEF, B, C, D, E); +} + +fundecl(A) ::= datatype(B) ident(C) arglist(D) ENDS. { + A = node_create(NODE_FUNDECL, B, C, D); +} + +arglist(A) ::= OPAREN CPAREN. { A = node_create(NODE_FUNARGS); } + +statementblock(A) ::= OBRACE CBRACE. { A = node_create(NODE_BLOCK); } + +%parse_failure { + fprintf(stderr, "Parse error\n"); +} + +%stack_overflow { + fprintf(stderr, "Parser stack overflow\n"); +} + +%syntax_error { + fprintf(stderr, "Undexpected token %d: ", TOKEN -> tokid); + tokendata_print(stderr, TOKEN); +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/preproc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/preproc.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,1672 @@ +/* +lwcc/preproc.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cpp.h" +#include "symbol.h" +#include "token.h" + +static int expand_macro(struct preproc_info *, char *); +static void process_directive(struct preproc_info *); +static long eval_expr(struct preproc_info *); +extern struct token *preproc_lex_next_token(struct preproc_info *); +static long preproc_numval(struct preproc_info *, struct token *); +static int eval_escape(char **); +extern int preproc_lex_fetch_byte(struct preproc_info *); +extern void preproc_lex_unfetch_byte(struct preproc_info *, int); + + +struct token *preproc_next_processed_token(struct preproc_info *pp) +{ + struct token *ct; + +again: + ct = preproc_next_token(pp); + if (ct -> ttype == TOK_EOF) + return ct; + if (ct -> ttype == TOK_EOL) + { + pp -> ppeolseen = 1; + return ct; + } + + if (ct -> ttype == TOK_HASH && pp -> ppeolseen == 1) + { + // preprocessor directive + process_directive(pp); + goto again; + } + // if we're in a false section, don't return the token; keep scanning + if (pp -> skip_level) + goto again; + + if (ct -> ttype != TOK_WSPACE) + pp -> ppeolseen = 0; + + if (ct -> ttype == TOK_IDENT) + { + // possible macro expansion + if (expand_macro(pp, ct -> strval)) + goto again; + } + + return ct; +} + +static struct token *preproc_next_processed_token_nws(struct preproc_info *pp) +{ + struct token *t; + + do + { + t = preproc_next_processed_token(pp); + } while (t -> ttype == TOK_WSPACE); + return t; +} + +static struct token *preproc_next_token_nws(struct preproc_info *pp) +{ + struct token *t; + + do + { + t = preproc_next_token(pp); + } while (t -> ttype == TOK_WSPACE); + return t; +} + +static void skip_eol(struct preproc_info *pp) +{ + struct token *t; + + if (pp -> curtok && pp -> curtok -> ttype == TOK_EOL) + return; + do + { + t = preproc_next_token(pp); + } while (t -> ttype != TOK_EOL); +} + +static void check_eol(struct preproc_info *pp) +{ + struct token *t; + + t = preproc_next_token_nws(pp); + if (t -> ttype != TOK_EOL) + preproc_throw_warning(pp, "Extra text after preprocessor directive"); + skip_eol(pp); +} + +static void dir_ifdef(struct preproc_info *pp) +{ + struct token *ct; + + if (pp -> skip_level) + { + pp -> skip_level++; + skip_eol(pp); + return; + } + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad #ifdef"); + skip_eol(pp); + } + + if (symtab_find(pp, ct -> strval) == NULL) + { + pp -> skip_level++; + } + else + { + pp -> found_level++; + } + check_eol(pp); +} + +static void dir_ifndef(struct preproc_info *pp) +{ + struct token *ct; + + if (pp -> skip_level) + { + pp -> skip_level++; + skip_eol(pp); + return; + } + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad #ifdef"); + skip_eol(pp); + } + + if (symtab_find(pp, ct -> strval) != NULL) + { + pp -> skip_level++; + } + else + { + pp -> found_level++; + } + check_eol(pp); +} + +static void dir_if(struct preproc_info *pp) +{ + if (pp -> skip_level || !eval_expr(pp)) + pp -> skip_level++; + else + pp -> found_level++; +} + +static void dir_elif(struct preproc_info *pp) +{ + if (pp -> skip_level == 0) + pp -> else_skip_level = pp -> found_level; + if (pp -> skip_level) + { + if (pp -> else_skip_level > pp -> found_level) + ; + else if (--(pp -> skip_level) != 0) + pp -> skip_level++; + else if (eval_expr(pp)) + pp -> found_level++; + else + pp -> skip_level++; + } + else if (pp -> found_level) + { + pp -> skip_level++; + pp -> found_level--; + } + else + preproc_throw_error(pp, "#elif in non-conditional section"); +} + +static void dir_else(struct preproc_info *pp) +{ + if (pp -> skip_level) + { + if (pp -> else_skip_level > pp -> found_level) + ; + else if (--(pp -> skip_level) != 0) + pp -> skip_level++; + else + pp -> found_level++; + } + else if (pp -> found_level) + { + pp -> skip_level++; + pp -> found_level--; + } + else + { + preproc_throw_error(pp, "#else in non-conditional section"); + } + if (pp -> else_level == pp -> found_level + pp -> skip_level) + { + preproc_throw_error(pp, "Too many #else"); + } + pp -> else_level = pp -> found_level + pp -> skip_level; + check_eol(pp); +} + +static void dir_endif(struct preproc_info *pp) +{ + if (pp -> skip_level) + pp -> skip_level--; + else if (pp -> found_level) + pp -> found_level--; + else + preproc_throw_error(pp, "#endif in non-conditional section"); + if (pp -> skip_level == 0) + pp -> else_skip_level = 0; + pp -> else_level = 0; + check_eol(pp); +} + +static void dir_define(struct preproc_info *pp) +{ + struct token_list *tl = NULL; + struct token *ct; + int nargs = -1; + int vargs = 0; + char *mname = NULL; + + char **arglist = NULL; + + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_IDENT) + goto baddefine; + + mname = lw_strdup(ct -> strval); + ct = preproc_next_token(pp); + if (ct -> ttype == TOK_WSPACE) + { + /* object like macro */ + } + else if (ct -> ttype == TOK_EOL) + { + /* object like macro - empty value */ + goto out; + } + else if (ct -> ttype == TOK_OPAREN) + { + /* function like macro - parse args */ + nargs = 0; + vargs = 0; + for (;;) + { + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_EOL) + { + goto baddefine; + } + if (ct -> ttype == TOK_CPAREN) + break; + + if (ct -> ttype == TOK_IDENT) + { + /* parameter name */ + nargs++; + /* record argument name */ + arglist = lw_realloc(arglist, sizeof(char *) * nargs); + arglist[nargs - 1] = lw_strdup(ct -> strval); + + /* check for end of args or comma */ + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_CPAREN) + break; + else if (ct -> ttype == TOK_COMMA) + continue; + else + goto baddefine; + } + else if (ct -> ttype == TOK_ELLIPSIS) + { + /* variadic macro */ + vargs = 1; + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_CPAREN) + goto baddefine; + break; + } + else + goto baddefine; + } + } + else + { +baddefine: + preproc_throw_error(pp, "bad #define", ct -> ttype); +baddefine2: + token_list_destroy(tl); + skip_eol(pp); + lw_free(mname); + while (nargs > 0) + lw_free(arglist[--nargs]); + lw_free(arglist); + return; + } + + tl = token_list_create(); + for (;;) + { + ct = preproc_next_token(pp); + + if (ct -> ttype == TOK_EOL) + break; + token_list_append(tl, token_dup(ct)); + } +out: + if (strcmp(mname, "defined") == 0) + { + preproc_throw_warning(pp, "attempt to define 'defined' as a macro not allowed"); + goto baddefine2; + } + else if (symtab_find(pp, mname) != NULL) + { + /* need to do a token compare between the old value and the new value + to decide whether to complain */ + preproc_throw_warning(pp, "%s previous defined", mname); + symtab_undef(pp, mname); + } + symtab_define(pp, mname, tl, nargs, arglist, vargs); + lw_free(mname); + while (nargs > 0) + lw_free(arglist[--nargs]); + lw_free(arglist); + /* no need to check for EOL here */ +} + +void preproc_add_macro(struct preproc_info *pp, char *str) +{ + char *s; + + pp -> lexstr = lw_strdup(str); + pp -> lexstrloc = 0; + s = strchr(pp -> lexstr, '='); + if (s) + *s = ' '; + + dir_define(pp); + + lw_free(pp -> lexstr); + pp -> lexstr = NULL; + pp -> lexstrloc = 0; +} + +static void dir_undef(struct preproc_info *pp) +{ + struct token *ct; + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad #undef"); + skip_eol(pp); + } + + symtab_undef(pp, ct -> strval); + check_eol(pp); +} + +char *streol(struct preproc_info *pp) +{ + struct lw_strbuf *s; + struct token *ct; + int i; + + s = lw_strbuf_new(); + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + while (ct -> ttype != TOK_EOL) + { + for (i = 0; ct -> strval[i]; i++) + lw_strbuf_add(s, ct -> strval[i]); + ct = preproc_next_token(pp); + } + return lw_strbuf_end(s); +} + +static void dir_error(struct preproc_info *pp) +{ + char *s; + + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + s = streol(pp); + preproc_throw_error(pp, "%s", s); + lw_free(s); +} + +static void dir_warning(struct preproc_info *pp) +{ + char *s; + + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + s = streol(pp); + preproc_throw_warning(pp, "%s", s); + lw_free(s); +} + +static char *preproc_file_exists_in_dir(char *dir, char *fn) +{ + int l; + char *f; + + l = snprintf(NULL, 0, "%s/%s", dir, fn); + f = lw_alloc(l + 1); + snprintf(f, l + 1, "%s/%s", dir, fn); + + if (access(f, R_OK) == 0) + return f; + lw_free(f); + return NULL; +} + +static char *preproc_find_file(struct preproc_info *pp, char *fn, int sys) +{ + char *tstr; + char *pref; + char *rfn; + + /* pass through absolute paths, dumb as they are */ + if (fn[0] == '/') + return lw_strdup(fn); + + if (!sys) + { + /* look in the directory with the current file */ + tstr = strchr(pp -> fn, '/'); + if (!tstr) + pref = lw_strdup("."); + else + { + pref = lw_alloc(tstr - pp -> fn + 1); + memcpy(pref, pp -> fn, tstr - pp -> fn); + pref[tstr - pp -> fn] = 0; + } + rfn = preproc_file_exists_in_dir(pref, fn); + lw_free(pref); + if (rfn) + return rfn; + + /* look in the "quote" dir list */ + lw_stringlist_reset(pp -> quotelist); + for (pref = lw_stringlist_current(pp -> quotelist); pref; pref = lw_stringlist_next(pp -> quotelist)) + { + rfn = preproc_file_exists_in_dir(pref, fn); + if (rfn) + return rfn; + } + } + + /* look in the "include" dir list */ + lw_stringlist_reset(pp -> inclist); + for (pref = lw_stringlist_current(pp -> inclist); pref; pref = lw_stringlist_next(pp -> inclist)) + { + rfn = preproc_file_exists_in_dir(pref, fn); + if (rfn) + return rfn; + } + + /* the default search list is provided by the driver program */ + return NULL; +} + +static void dir_include(struct preproc_info *pp) +{ + FILE *fp; + struct token *ct; + int sys = 0; + char *fn; + struct lw_strbuf *strbuf; + int i; + struct preproc_info *fs; + + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_STR_LIT) + { +usrinc: + sys = strlen(ct -> strval); + fn = lw_alloc(sys - 1); + memcpy(fn, ct -> strval + 1, sys - 2); + fn[sys - 2] = 0; + sys = 0; + goto doinc; + } + else if (ct -> ttype == TOK_LT) + { + strbuf = lw_strbuf_new(); + for (;;) + { + int c; + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOL) + { + preproc_lex_unfetch_byte(pp, c); + preproc_throw_error(pp, "Bad #include"); + lw_free(lw_strbuf_end(strbuf)); + break; + } + if (c == '>') + break; + lw_strbuf_add(strbuf, c); + } + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_EOL) + { + preproc_throw_error(pp, "Bad #include"); + skip_eol(pp); + lw_free(lw_strbuf_end(strbuf)); + return; + } + sys = 1; + fn = lw_strbuf_end(strbuf); + goto doinc; + } + else + { + preproc_unget_token(pp, ct); + // computed include + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype == TOK_STR_LIT) + goto usrinc; + else if (ct -> ttype == TOK_LT) + { + strbuf = lw_strbuf_new(); + for (;;) + { + ct = preproc_next_processed_token(pp); + if (ct -> ttype == TOK_GT) + break; + if (ct -> ttype == TOK_EOL) + { + preproc_throw_error(pp, "Bad #include"); + lw_free(lw_strbuf_end(strbuf)); + return; + } + for (i = 0; ct -> strval[i]; ct++) + { + lw_strbuf_add(strbuf, ct -> strval[i]); + } + } + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype != TOK_EOL) + { + preproc_throw_error(pp, "Bad #include"); + skip_eol(pp); + lw_free(lw_strbuf_end(strbuf)); + return; + } + sys = 1; + fn = lw_strbuf_end(strbuf); + goto doinc; + } + else + { + skip_eol(pp); + preproc_throw_error(pp, "Bad #include"); + return; + } + } +doinc: + fn = preproc_find_file(pp, fn, sys); + if (!fn) + goto badfile; + fp = fopen(fn, "rb"); + if (!fp) + { + lw_free(fn); +badfile: + preproc_throw_error(pp, "Cannot open #include file %s - this is fatal", fn); + exit(1); + } + + /* save the current include file state, etc. */ + fs = lw_alloc(sizeof(struct preproc_info)); + *fs = *pp; + fs -> n = pp -> filestack; + pp -> curtok = NULL; + pp -> filestack = fs; + pp -> fn = lw_strpool_strdup(pp -> strpool, fn); + lw_free(fn); + pp -> fp = fp; + pp -> ra = CPP_NOUNG; + pp -> ppeolseen = 1; + pp -> eolstate = 0; + pp -> lineno = 1; + pp -> column = 0; + pp -> qseen = 0; + pp -> ungetbufl = 0; + pp -> ungetbufs = 0; + pp -> ungetbuf = NULL; + pp -> unget = 0; + pp -> eolseen = 0; + pp -> nlseen = 0; + pp -> skip_level = 0; + pp -> found_level = 0; + pp -> else_level = 0; + pp -> else_skip_level = 0; + pp -> tokqueue = NULL; + // now get on with processing +} + +static void dir_line(struct preproc_info *pp) +{ + struct token *ct; + long lineno; + char *estr; + + lineno = -1; + + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype == TOK_NUMBER) + { + lineno = strtoul(ct -> strval, &estr, 10); + if (*estr) + { + preproc_throw_error(pp, "Bad #line"); + skip_eol(pp); + return; + } + } + else + { + preproc_throw_error(pp, "Bad #line"); + skip_eol(pp); + return; + } + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype == TOK_EOL) + { + pp -> lineno = lineno; + return; + } + if (ct -> ttype != TOK_STR_LIT) + { + preproc_throw_error(pp, "Bad #line"); + skip_eol(pp); + return; + } + estr = lw_strdup(ct -> strval); + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype != TOK_EOL) + { + preproc_throw_error(pp, "Bad #line"); + skip_eol(pp); + lw_free(estr); + return; + } + pp -> fn = estr; + pp -> lineno = lineno; +} + +static void dir_pragma(struct preproc_info *pp) +{ + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + preproc_throw_warning(pp, "Unsupported #pragma"); + skip_eol(pp); +} + +struct { char *name; void (*fn)(struct preproc_info *); } dirlist[] = +{ + { "ifdef", dir_ifdef }, + { "ifndef", dir_ifndef }, + { "if", dir_if }, + { "else", dir_else }, + { "elif", dir_elif }, + { "endif", dir_endif }, + { "define", dir_define }, + { "undef", dir_undef }, + { "include", dir_include }, + { "error", dir_error }, + { "warning", dir_warning }, + { "line", dir_line }, + { "pragma", dir_pragma }, + { NULL, NULL } +}; + +static void process_directive(struct preproc_info *pp) +{ + struct token *ct; + int i; + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + // NULL directive + if (ct -> ttype == TOK_EOL) + return; + + if (ct -> ttype == TOK_NUMBER) + { + // this is probably a file marker from a previous run of the preprocessor + char *fn; + struct lw_strbuf *sb; + + i = preproc_numval(pp, ct); + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_STR_LIT) + goto baddir; + pp -> lineno = i; + sb = lw_strbuf_new(); + for (fn = ct -> strval; *fn && *fn != '"'; ) + { + if (*fn == '\\') + { + lw_strbuf_add(sb, eval_escape(&fn)); + } + else + { + lw_strbuf_add(sb, *fn++); + } + } + fn = lw_strbuf_end(sb); + pp -> fn = lw_strpool_strdup(pp -> strpool, fn); + lw_free(fn); + skip_eol(pp); + return; + } + + if (ct -> ttype != TOK_IDENT) + goto baddir; + + for (i = 0; dirlist[i].name; i++) + { + if (strcmp(dirlist[i].name, ct -> strval) == 0) + { + (*(dirlist[i].fn))(pp); + return; + } + } +baddir: + preproc_throw_error(pp, "Bad preprocessor directive"); + while (ct -> ttype != TOK_EOL) + ct = preproc_next_token(pp); + return; +} + +/* +Evaluate a preprocessor expression +*/ + +/* same as skip_eol() but the EOL token is not consumed */ +static void skip_eoe(struct preproc_info *pp) +{ + skip_eol(pp); + preproc_unget_token(pp, pp -> curtok); +} + +static long eval_expr_real(struct preproc_info *, int); + +static long eval_term_real(struct preproc_info *pp) +{ + long tval = 0; + struct token *ct; + +eval_next: + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype == TOK_EOL) + { + preproc_throw_error(pp, "Bad expression"); + return 0; + } + + switch (ct -> ttype) + { + case TOK_OPAREN: + tval = eval_expr_real(pp, 0); + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype != ')') + { + preproc_throw_error(pp, "Unbalanced () in expression"); + skip_eoe(pp); + return 0; + } + return tval; + + case TOK_ADD: // unary + + goto eval_next; + + case TOK_SUB: // unary - + tval = eval_expr_real(pp, 200); + return -tval; + + /* NOTE: we should only get "TOK_IDENT" from an undefined macro */ + case TOK_IDENT: // some sort of function, symbol, etc. + if (strcmp(ct -> strval, "defined")) + { + /* the defined operator */ + /* any number in the "defined" bit will be + treated as a defined symbol, even zero */ + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_OPAREN) + { + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + if (symtab_find(pp, ct -> strval) == NULL) + tval = 0; + else + tval = 1; + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_CPAREN) + { + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + return tval; + } + else if (ct -> ttype == TOK_IDENT) + { + return (symtab_find(pp, ct -> strval) != NULL) ? 1 : 0; + } + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + /* unknown identifier - it's zero */ + return 0; + + /* numbers */ + case TOK_NUMBER: + return preproc_numval(pp, ct); + + default: + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + return 0; +} + +static long eval_expr_real(struct preproc_info *pp, int p) +{ + static const struct operinfo + { + int tok; + int prec; + } operators[] = + { + { TOK_ADD, 100 }, + { TOK_SUB, 100 }, + { TOK_STAR, 150 }, + { TOK_DIV, 150 }, + { TOK_MOD, 150 }, + { TOK_LT, 75 }, + { TOK_LE, 75 }, + { TOK_GT, 75 }, + { TOK_GE, 75 }, + { TOK_EQ, 70 }, + { TOK_NE, 70 }, + { TOK_BAND, 30 }, + { TOK_BOR, 25 }, + { TOK_NONE, 0 } + }; + + int op; + long term1, term2, term3; + struct token *ct; + + term1 = eval_term_real(pp); +eval_next: + ct = preproc_next_processed_token_nws(pp); + for (op = 0; operators[op].tok != TOK_NONE; op++) + { + if (operators[op].tok == ct -> ttype) + break; + } + /* if it isn't a recognized operator, assume end of expression */ + if (operators[op].tok == TOK_NONE) + { + preproc_unget_token(pp, ct); + return term1; + } + + /* if new operation is not higher than the current precedence, let the previous op finish */ + if (operators[op].prec <= p) + return term1; + + /* get the second term */ + term2 = eval_expr_real(pp, operators[op].prec); + + switch (operators[op].tok) + { + case TOK_ADD: + term3 = term1 + term2; + break; + + case TOK_SUB: + term3 = term1 - term2; + break; + + case TOK_STAR: + term3 = term1 * term2; + break; + + case TOK_DIV: + if (!term2) + { + preproc_throw_warning(pp, "Division by zero"); + term3 = 0; + break; + } + term3 = term1 / term2; + break; + + case TOK_MOD: + if (!term2) + { + preproc_throw_warning(pp, "Division by zero"); + term3 = 0; + break; + } + term3 = term1 % term2; + break; + + case TOK_BAND: + term3 = (term1 && term2); + break; + + case TOK_BOR: + term3 = (term1 || term2); + break; + + case TOK_EQ: + term3 = (term1 == term2); + break; + + case TOK_NE: + term3 = (term1 != term2); + break; + + case TOK_GT: + term3 = (term1 > term2); + break; + + case TOK_GE: + term3 = (term1 >= term2); + break; + + case TOK_LT: + term3 = (term1 < term2); + break; + + case TOK_LE: + term3 = (term1 <= term2); + break; + + default: + term3 = 0; + break; + } + term1 = term3; + goto eval_next; +} + +static long eval_expr(struct preproc_info *pp) +{ + long rv; + struct token *t; + + rv = eval_expr_real(pp, 0); + t = preproc_next_token_nws(pp); + if (t -> ttype != TOK_EOL) + { + preproc_throw_error(pp, "Bad expression"); + skip_eol(pp); + } + return rv; +} + +static int eval_escape(char **t) +{ + int c; + int c2; + + if (**t == 0) + return 0; + c = *(*t)++; + int rv = 0; + + switch (c) + { + case 'n': + return 10; + case 'r': + return 13; + case 'b': + return 8; + case 'e': + return 27; + case 'f': + return 12; + case 't': + return 9; + case 'v': + return 11; + case 'a': + return 7; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + // octal constant + rv = c - '0'; + c2 = 1; + for (; c2 < 3; c2++) + { + c = *(*t)++; + if (c < '0' || c > '7') + break; + rv = (rv << 3) | (c - '0'); + } + return rv; + case 'x': + // hex constant + for (;;) + { + c = *(*t)++; + if (c < '0' || (c > '9' && c < 'A') || (c > 'F' && c < 'a') || c > 'f') + break; + c = c - '0'; + if (c > 9) + c -= 7; + if (c > 15) + c -= 32; + rv = (rv << 4) | c; + } + return rv & 0xff; + default: + return c; + } +} + +/* convert a numeric string to a number */ +long preproc_numval(struct preproc_info *pp, struct token *t) +{ + unsigned long long rv = 0; + unsigned long long rv2 = 0; + char *tstr = t -> strval; + int radix = 10; + int c; + int ovf = 0; + union { long sv; unsigned long uv; } tv; + + if (t -> ttype == TOK_CHR_LIT) + { + tstr++; + while (*tstr && *tstr != '\'') + { + if (*tstr == '\\') + { + tstr++; + c = eval_escape(&tstr); + } + else + c = *tstr++; + rv = (rv << 8) | c; + if (rv / radix < rv2) + ovf = 1; + rv2 = rv; + + } + goto done; + } + + + if (*tstr == '0') + { + radix = 8; + tstr++; + if (*tstr == 'x') + { + radix = 16; + tstr++; + } + } + while (*tstr) + { + c = *tstr++; + if (c < '0' || (c > '9' && c < 'A') || (c > 'F' && c < 'a') || c > 'f') + break; + c -= '0'; + if (c > 9) + c -= 7; + if (c > 15) + c -= 32; + if (c >= radix) + break; + rv = rv * radix + c; + if (rv / radix < rv2) + ovf = 1; + rv2 = rv; + } + tstr--; + while (*tstr == 'l' || *tstr == 'L') + tstr++; + tv.uv = rv; + if (tv.sv < 0 && radix == 10) + ovf = 1; +done: + if (ovf) + preproc_throw_error(pp, "Constant out of range: %s", t -> strval); + return rv; +} + +/* +Below here is the logic for expanding a macro +*/ +static char *stringify(struct token_list *tli) +{ + struct lw_strbuf *s; + int ws = 0; + struct token *tl = tli -> head; + + s = lw_strbuf_new(); + lw_strbuf_add(s, '"'); + + while (tl && tl -> ttype == TOK_WSPACE) + tl = tl -> next; + + for (; tl; tl = tl -> next) + { + if (tl -> ttype == TOK_WSPACE) + { + ws = 1; + continue; + } + if (ws) + { + lw_strbuf_add(s, ' '); + } + for (ws = 0; tl -> strval[ws]; ws++) + { + if (tl -> ttype == TOK_STR_LIT || tl -> ttype == TOK_CHR_LIT) + { + if (tl -> strval[ws] == '"' || tl -> strval[ws] == '\\') + lw_strbuf_add(s, '\\'); + } + } + ws = 0; + } + + lw_strbuf_add(s, '"'); + return lw_strbuf_end(s); +} + +static int macro_arg(struct symtab_e *s, char *str) +{ + int i; + if (strcmp(str, "__VA_ARGS__") == 0) + i = s -> nargs; + else + for (i = 0; i < s -> nargs; i++) + if (strcmp(s -> params[i], str) == 0) + break; + if (i == s -> nargs) + if (s -> vargs == 0) + return -1; + return i; +} + +/* return list to tokens as a result of ## expansion */ +static struct token_list *paste_tokens(struct preproc_info *pp, struct symtab_e *s, struct token_list **arglist, struct token *t1, struct token *t2) +{ + struct token_list *left; + struct token_list *right; + char *tstr; + struct token *ttok; + int i; + + if (t1 -> ttype == TOK_IDENT) + { + i = macro_arg(s, t1 -> strval); + if (i == -1) + { + left = token_list_create(); + token_list_append(left, token_dup(t1)); + } + else + { + left = token_list_dup(arglist[i]); + } + } + else + { + left = token_list_create(); + token_list_append(left, token_dup(t1)); + } + // munch trailing white space + while (left -> tail && left -> tail -> ttype == TOK_WSPACE) + { + token_list_remove(left -> tail); + } + + if (t2 -> ttype == TOK_IDENT) + { + i = macro_arg(s, t2 -> strval); + if (i == -1) + { + right = token_list_create(); + token_list_append(right, token_dup(t2)); + } + else + { + right = token_list_dup(arglist[i]); + } + } + else + { + right = token_list_create(); + token_list_append(right, token_dup(t2)); + } + // munch leading white space + while (right -> head && right -> head -> ttype == TOK_WSPACE) + { + token_list_remove(right -> head); + } + + // nothing to append at all + if (left -> head != NULL && right -> head == NULL) + { + // right arg is empty - use left + token_list_destroy(right); + return left; + } + if (left -> head == NULL && right -> head != NULL) + { + // left arg is empty, use right + token_list_destroy(left); + return right; + } + if (left -> head == NULL && right -> head == NULL) + { + // both empty, use left + token_list_destroy(right); + return left; + } + + // both non-empty - past left tail with right head + // then past the right list onto the left + tstr = lw_alloc(strlen(left -> tail -> strval) + strlen(right -> head -> strval) + 1); + strcpy(tstr, left -> tail -> strval); + strcat(tstr, right -> head -> strval); + + pp -> lexstr = tstr; + pp -> lexstrloc = 0; + + ttok = preproc_lex_next_token(pp); + if (ttok -> ttype != TOK_ERROR && pp -> lexstr[pp -> lexstrloc] == 0) + { + // we have a new token here + token_list_remove(left -> tail); + token_list_remove(right -> head); + token_list_append(left, token_dup(ttok)); + } + lw_free(tstr); + pp -> lexstr = NULL; + pp -> lexstrloc = 0; + for (ttok = right -> head; ttok; ttok = ttok -> next) + { + token_list_append(left, token_dup(ttok)); + } + token_list_destroy(right); + return left; +} + +static int expand_macro(struct preproc_info *pp, char *mname) +{ + struct symtab_e *s; + struct token *t, *t2, *t3; + int nargs = 0; + struct expand_e *e; + struct token_list **exparglist = NULL; + struct token_list **arglist = NULL; + int i; + int pcount; + char *tstr; + struct token_list *expand_list; + int repl; + struct token_list *rtl; + + // check for built in macros + if (strcmp(mname, "__FILE__") == 0) + { + struct lw_strbuf *sb; + + sb = lw_strbuf_new(); + lw_strbuf_add(sb, '"'); + for (tstr = (char *)(pp -> fn); *tstr; tstr++) + { + if (*tstr == 32 || (*tstr > 34 && *tstr < 127)) + { + lw_strbuf_add(sb, *tstr); + } + else + { + lw_strbuf_add(sb, '\\'); + lw_strbuf_add(sb, (*tstr >> 6) + '0'); + lw_strbuf_add(sb, ((*tstr >> 3) & 7) + '0'); + lw_strbuf_add(sb, (*tstr & 7) + '0'); + } + } + lw_strbuf_add(sb, '"'); + tstr = lw_strbuf_end(sb); + preproc_unget_token(pp, token_create(TOK_STR_LIT, tstr, pp -> lineno, pp -> column, pp -> fn)); + lw_free(tstr); + return 1; + } + else if (strcmp(mname, "__LINE__") == 0) + { + char nbuf[25]; + snprintf(nbuf, 25, "%d", pp -> lineno); + preproc_unget_token(pp, token_create(TOK_NUMBER, nbuf, pp -> lineno, pp -> column, pp -> fn)); + return 1; + } + else if (strcmp(mname, "__DATE__") == 0) + { + char dbuf[14]; + struct tm *tv; + time_t tm; + static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + + tm = time(NULL); + tv = localtime(&tm); + snprintf(dbuf, 14, "\"%s %2d %04d\"", months[tv -> tm_mon], tv -> tm_mday, tv -> tm_year + 1900); + preproc_unget_token(pp, token_create(TOK_STR_LIT, dbuf, pp -> lineno, pp -> column, pp -> fn)); + return 1; + } + else if (strcmp(mname, "__TIME__") == 0) + { + char tbuf[11]; + struct tm *tv; + time_t tm; + + tm = time(NULL); + tv = localtime(&tm); + snprintf(tbuf, 11, "\"%02d:%02d:%02d\"", tv -> tm_hour, tv -> tm_min, tv -> tm_sec); + preproc_unget_token(pp, token_create(TOK_STR_LIT, tbuf, pp -> lineno, pp -> column, pp -> fn)); + return 1; + } + + s = symtab_find(pp, mname); + if (!s) + return 0; + + for (e = pp -> expand_list; e; e = e -> next) + { + /* don't expand if we're already expanding the same macro */ + if (e -> s == s) + return 0; + } + + if (s -> nargs == -1) + { + /* short circuit NULL expansion */ + if (s -> tl == NULL) + return 1; + + goto expandmacro; + } + + // look for opening paren after optional whitespace + t2 = NULL; + t = NULL; + for (;;) + { + t = preproc_next_token(pp); + if (t -> ttype != TOK_WSPACE && t -> ttype != TOK_EOL) + break; + t -> next = t2; + t2 = t2; + } + if (t -> ttype != TOK_OPAREN) + { + // not a function-like invocation + while (t2) + { + t = t2 -> next; + preproc_unget_token(pp, t2); + t2 = t; + } + return 0; + } + + // parse parameters here + t = preproc_next_token_nws(pp); + nargs = 1; + arglist = lw_alloc(sizeof(struct token_list *)); + arglist[0] = token_list_create(); + t2 = NULL; + + while (t -> ttype != TOK_CPAREN) + { + pcount = 0; + if (t -> ttype == TOK_EOF) + { + preproc_throw_error(pp, "Unexpected EOF in macro call"); + break; + } + if (t -> ttype == TOK_EOL) + continue; + if (t -> ttype == TOK_OPAREN) + pcount++; + else if (t -> ttype == TOK_CPAREN && pcount) + pcount--; + if (t -> ttype == TOK_COMMA && pcount == 0) + { + if (!(s -> vargs) || (nargs > s -> nargs)) + { + nargs++; + arglist = lw_realloc(arglist, sizeof(struct token_list *) * nargs); + arglist[nargs - 1] = token_list_create(); + t2 = NULL; + continue; + } + } + token_list_append(arglist[nargs - 1], token_dup(t)); + } + + if (s -> vargs) + { + if (nargs <= s -> nargs) + { + preproc_throw_error(pp, "Wrong number of arguments (%d) for variadic macro %s which takes %d arguments", nargs, mname, s -> nargs); + } + } + else + { + if (s -> nargs != nargs && !(s -> nargs == 0 && nargs == 1 && arglist[nargs - 1])) + { + preproc_throw_error(pp, "Wrong number of arguments (%d) for macro %s which takes %d arguments", nargs, mname, s -> nargs); + } + } + + /* now calculate the pre-expansions of the arguments */ + exparglist = lw_alloc(nargs * sizeof(struct token_list *)); + for (i = 0; i < nargs; i++) + { + exparglist[i] = token_list_create(); + // NOTE: do nothing if empty argument + if (arglist[i] == NULL || arglist[i] -> head == NULL) + continue; + pp -> sourcelist = arglist[i]->head; + for (;;) + { + t = preproc_next_processed_token(pp); + if (t -> ttype == TOK_EOF) + break; + token_list_append(exparglist[i], token_dup(t)); + } + } + +expandmacro: + expand_list = token_list_dup(s -> tl); + + // scan for stringification and handle it + repl = 0; + while (repl == 0) + { + for (t = expand_list -> head; t; t = t -> next) + { + if (t -> ttype == TOK_HASH && t -> next && t -> next -> ttype == TOK_IDENT) + { + i = macro_arg(s, t -> next -> strval); + if (i != -1) + { + repl = 1; + tstr = stringify(arglist[i]); + token_list_remove(t -> next); + token_list_insert(expand_list, t, token_create(TOK_STR_LIT, tstr, t -> lineno, t -> column, t -> fn)); + token_list_remove(t); + lw_free(tstr); + break; + } + } + } + repl = 1; + } + + + // scan for concatenation and handle it + + for (t = expand_list -> head; t; t = t -> next) + { + if (t -> ttype == TOK_DBLHASH) + { + // have a concatenation operator here + for (t2 = t -> prev; t2; t2 = t2 -> prev) + { + if (t2 -> ttype != TOK_WSPACE) + break; + } + for (t3 = t -> next; t3; t3 = t3 -> next) + { + if (t3 -> ttype != TOK_WSPACE) + break; + } + // if no non-whitespace before or after, ignore it + if (!t2 || !t3) + continue; + // eat the whitespace before and after + while (t -> prev != t2) + token_list_remove(t -> prev); + while (t -> next != t3) + token_list_remove(t -> next); + // now paste t -> prev with t -> next and replace t with the result + // continue scanning for ## at t -> next -> next + t3 = t -> next -> next; + + rtl = paste_tokens(pp, s, arglist, t -> prev, t -> next); + token_list_remove(t -> next); + token_list_remove(t -> prev); + t2 = t -> prev; + token_list_remove(t); + for (t = rtl -> head; t; t = t -> next) + { + token_list_insert(expand_list, t2, token_dup(t)); + } + t = t3 -> prev; + token_list_destroy(rtl); + } + } + + // now scan for arguments and expand them + for (t = expand_list -> head; t; t = t -> next) + { + again: + if (t -> ttype == TOK_IDENT) + { + /* identifiers might need expansion to arguments */ + i = macro_arg(s, t -> strval); + if (i != -1) + { + t3 = t -> next; + for (t2 = exparglist[i] -> tail; t2; t2 = t2 -> prev) + token_list_insert(expand_list, t, token_dup(t2)); + token_list_remove(t); + t = t3; + goto again; + } + } + } + + /* put the new expansion in front of the input, if relevant; if we + expanded to nothing, no need to create an expansion record or + put anything into the input queue */ + if (expand_list -> head) + { + token_list_append(expand_list, token_create(TOK_ENDEXPAND, "", -1, -1, "")); + + // move the expanded list into the token queue + for (t = expand_list -> tail; t; t = t -> prev) + preproc_unget_token(pp, token_dup(t)); + + /* set up expansion record */ + e = lw_alloc(sizeof(struct expand_e)); + e -> next = pp -> expand_list; + pp -> expand_list = e; + e -> s = s; + } + + /* now clean up */ + token_list_destroy(expand_list); + for (i = 0; i < nargs; i++) + { + token_list_destroy(arglist[i]); + token_list_destroy(exparglist[i]); + } + lw_free(arglist); + lw_free(exparglist); + + return 1; +} + +struct token *preproc_next(struct preproc_info *pp) +{ + struct token *t; + + t = preproc_next_processed_token(pp); + pp -> curtok = NULL; + return t; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/symbol.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/symbol.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,131 @@ +/* +lwcc/symbol.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include + +#include +#include + +#include "cpp.h" +#include "symbol.h" +#include "token.h" + +void symbol_free(struct symtab_e *s) +{ + int i; + + lw_free(s -> name); + + for (i = 0; i < s -> nargs; i++) + lw_free(s -> params[i]); + lw_free(s -> params); + token_list_destroy(s -> tl); +} + +struct symtab_e *symtab_find(struct preproc_info *pp, char *name) +{ + struct symtab_e *s; + + for (s = pp -> sh; s; s = s -> next) + { + if (strcmp(s -> name, name) == 0) + { + return s; + } + } + return NULL; +} + +void symtab_undef(struct preproc_info *pp, char *name) +{ + struct symtab_e *s, **p; + + p = &(pp -> sh); + for (s = pp -> sh; s; s = s -> next) + { + if (strcmp(s -> name, name) == 0) + { + (*p) -> next = s -> next; + symbol_free(s); + return; + } + p = &((*p) -> next); + } +} + +void symtab_define(struct preproc_info *pp, char *name, struct token_list *def, int nargs, char **params, int vargs) +{ + struct symtab_e *s; + int i; + + s = lw_alloc(sizeof(struct symtab_e)); + s -> name = lw_strdup(name); + s -> tl = def; + s -> nargs = nargs; + s -> params = NULL; + if (params) + { + s -> params = lw_alloc(sizeof(char *) * nargs); + for (i = 0; i < nargs; i++) + s -> params[i] = lw_strdup(params[i]); + } + s -> vargs = vargs; + s -> next = pp -> sh; + pp -> sh = s; +} + +void symtab_dump(struct preproc_info *pp) +{ + struct symtab_e *s; + struct token *t; + int i; + + for (s = pp -> sh; s; s = s -> next) + { + printf("%s", s -> name); + if (s -> nargs >= 0) + { + printf("("); + for (i = 0; i < s -> nargs; i++) + { + if (i) + printf(","); + printf("%s", s -> params[i]); + } + if (s -> vargs) + { + if (s -> nargs) + printf(","); + printf("..."); + } + printf(")"); + } + printf(" => "); + if (s -> tl) + { + for (t = s -> tl -> head; t; t = t -> next) + { + token_print(t, stdout); + } + } + printf("\n"); + } +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/symbol.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/symbol.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,42 @@ +/* +lwcc/symbol.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef symbol_h_seen___ +#define symbol_h_seen___ + +#include "cpp.h" +#include "token.h" + +struct symtab_e +{ + char *name; // symbol name + struct token_list *tl; // token list the name is defined as, NULL for none + int nargs; // number named of arguments - -1 for object like macro + int vargs; // set if macro has varargs style + char **params; // the names of the parameters + struct symtab_e *next; // next entry in list +}; + +struct symtab_e *symtab_find(struct preproc_info *, char *); +void symtab_undef(struct preproc_info *, char *); +void symtab_define(struct preproc_info *, char *, struct token_list *, int, char **, int); + +#endif // symbol_h_seen___ diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/token.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/token.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,248 @@ +/* +lwcc/token.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include + +#include +#include + +#include "token.h" + +struct token *token_create(int ttype, char *strval, int row, int col, const char *fn) +{ + struct token *t; + + t = lw_alloc(sizeof(struct token)); + t -> ttype = ttype; + if (strval) + t -> strval = lw_strdup(strval); + else + t -> strval = NULL; + t -> lineno = row; + t -> column = col; + t -> fn = fn; + t -> next = NULL; + t -> prev = NULL; + t -> list = NULL; + return t; +} + +void token_free(struct token *t) +{ + lw_free(t -> strval); + lw_free(t); +} + +struct token *token_dup(struct token *t) +{ + struct token *t2; + + t2 = lw_alloc(sizeof(struct token)); + t2 -> ttype = t -> ttype; + t2 -> lineno = t -> lineno; + t2 -> column = t -> column; + t2 -> list = NULL; + t2 -> next = NULL; + t2 -> prev = NULL; + if (t -> strval) + t2 -> strval = lw_strdup(t -> strval); + else + t2 -> strval = NULL; + return t2; +} + +static struct { int ttype; char *tstr; } tok_strs[] = +{ + { TOK_WSPACE, " " }, + { TOK_EOL, "\n" }, + { TOK_DIV, "/" }, + { TOK_ADD, "+" }, + { TOK_SUB, "-" }, + { TOK_OPAREN, "(" }, + { TOK_CPAREN, ")" }, + { TOK_NE, "!=" }, + { TOK_EQ, "==" }, + { TOK_LE, "<=" }, + { TOK_LT, "<" }, + { TOK_GE, ">=" }, + { TOK_GT, ">" }, + { TOK_BAND, "&&" }, + { TOK_BOR, "||" }, + { TOK_BNOT, "!" }, + { TOK_MOD, "%"}, + { TOK_COMMA, "," }, + { TOK_ELLIPSIS, "..." }, + { TOK_QMARK, "?" }, + { TOK_COLON, ":" }, + { TOK_OBRACE, "{" }, + { TOK_CBRACE, "}" }, + { TOK_OSQUARE, "[" }, + { TOK_CSQUARE, "]" }, + { TOK_COM, "~" }, + { TOK_EOS, ";" }, + { TOK_HASH, "#" }, + { TOK_DBLHASH, "##" }, + { TOK_XOR, "^" }, + { TOK_XORASS, "^=" }, + { TOK_STAR, "*" }, + { TOK_MULASS, "*=" }, + { TOK_DIVASS, "/=" }, + { TOK_ASS, "=" }, + { TOK_MODASS, "%=" }, + { TOK_SUBASS, "-=" }, + { TOK_DBLSUB, "--" }, + { TOK_ADDASS, "+=" }, + { TOK_DBLADD, "++" }, + { TOK_BWAND, "&" }, + { TOK_BWANDASS, "&=" }, + { TOK_BWOR, "|" }, + { TOK_BWORASS, "|=" }, + { TOK_LSH, "<<" }, + { TOK_LSHASS, "<<=" }, + { TOK_RSH, ">>" }, + { TOK_RSHASS, ">>=" }, + { TOK_DOT, "." }, + { TOK_ARROW, "->" }, + { TOK_NONE, "" } +}; + +void token_print(struct token *t, FILE *f) +{ + int i; + for (i = 0; tok_strs[i].ttype != TOK_NONE; i++) + { + if (tok_strs[i].ttype == t -> ttype) + { + fprintf(f, "%s", tok_strs[i].tstr); + break; + } + } + if (t -> strval) + fprintf(f, "%s", t -> strval); +} + +/* token list management */ +struct token_list *token_list_create(void) +{ + struct token_list *tl; + tl = lw_alloc(sizeof(struct token_list)); + tl -> head = NULL; + tl -> tail = NULL; + return tl; +} + +void token_list_destroy(struct token_list *tl) +{ + if (tl == NULL) + return; + while (tl -> head) + { + tl -> tail = tl -> head; + tl -> head = tl -> head -> next; + token_free(tl -> tail); + } + lw_free(tl); +} + +void token_list_append(struct token_list *tl, struct token *tok) +{ + tok -> list = tl; + if (tl -> head == NULL) + { + tl -> head = tl -> tail = tok; + tok -> next = tok -> prev = NULL; + return; + } + tl -> tail -> next = tok; + tok -> prev = tl -> tail; + tl -> tail = tok; + tok -> next = NULL; + return; +} + +void token_list_remove(struct token *tok) +{ + if (tok -> list == NULL) + return; + + if (tok -> prev) + tok -> prev -> next = tok -> next; + if (tok -> next) + tok -> next -> prev = tok -> prev; + if (tok == tok -> list -> head) + tok -> list -> head = tok -> next; + if (tok == tok -> list -> tail) + tok -> list -> tail = tok -> prev; + tok -> list = NULL; +} + +void token_list_prepend(struct token_list *tl, struct token *tok) +{ + tok -> list = tl; + if (tl -> head == NULL) + { + tl -> head = tl -> tail = tok; + tok -> next = tok -> prev = NULL; + } + tl -> head -> prev = tok; + tok -> next = tl -> head; + tl -> head = tok; + tok -> prev = NULL; +} + +void token_list_insert(struct token_list *tl, struct token *after, struct token *newt) +{ + struct token *t; + + if (after == NULL || tl -> head == NULL) + { + token_list_prepend(tl, newt); + return; + } + + for (t = tl -> head; t && t != after; t = t -> next) + /* do nothing */ ; + if (!t) + { + token_list_append(tl, newt); + return; + } + newt -> prev = t; + newt -> next = t -> next; + if (t -> next) + t -> next -> prev = newt; + else + tl -> tail = newt; + t -> next = newt; +} + +struct token_list *token_list_dup(struct token_list *tl) +{ + struct token_list *nl; + struct token *t; + + nl = token_list_create(); + for (t = tl -> head; t; t = t -> next) + { + token_list_append(nl, token_dup(t)); + } + return nl; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/token.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/token.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,132 @@ +/* +lwcc/token.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef token_h_seen___ +#define token_h_seen___ + +#include + +enum +{ + CPP_NOUNG = -3, + CPP_EOL = -2, + CPP_EOF = -1, +}; + +#define TOK_NONE 0 +#define TOK_EOF 1 +#define TOK_EOL 2 +#define TOK_WSPACE 3 +#define TOK_IDENT 4 +#define TOK_NUMBER 5 +#define TOK_CHAR 6 +#define TOK_ADD 8 +#define TOK_SUB 9 +#define TOK_OPAREN 10 +#define TOK_CPAREN 11 +#define TOK_NE 12 +#define TOK_EQ 13 +#define TOK_LE 14 +#define TOK_LT 15 +#define TOK_GE 16 +#define TOK_GT 17 +#define TOK_BAND 18 +#define TOK_BOR 19 +#define TOK_BNOT 20 +#define TOK_MOD 21 +#define TOK_COMMA 22 +#define TOK_ELLIPSIS 23 +#define TOK_QMARK 24 +#define TOK_COLON 25 +#define TOK_OBRACE 26 +#define TOK_CBRACE 27 +#define TOK_OSQUARE 28 +#define TOK_CSQUARE 29 +#define TOK_COM 30 +#define TOK_EOS 31 +#define TOK_HASH 32 +#define TOK_DBLHASH 33 +#define TOK_XOR 34 +#define TOK_XORASS 35 +#define TOK_STAR 36 +#define TOK_MULASS 37 +#define TOK_DIV 38 +#define TOK_DIVASS 39 +#define TOK_ASS 40 +#define TOK_MODASS 41 +#define TOK_SUBASS 42 +#define TOK_DBLSUB 43 +#define TOK_ADDASS 44 +#define TOK_DBLADD 45 +#define TOK_BWAND 46 +#define TOK_BWANDASS 47 +#define TOK_BWOR 48 +#define TOK_BWORASS 49 +#define TOK_LSH 50 +#define TOK_LSHASS 51 +#define TOK_RSH 52 +#define TOK_RSHASS 53 +#define TOK_DOT 54 +#define TOK_CHR_LIT 55 +#define TOK_STR_LIT 56 +#define TOK_ARROW 57 +#define TOK_ENDEXPAND 58 +#define TOK_ERROR 59 +#define TOK_MAX 60 + +struct token +{ + int ttype; // token type + char *strval; // the token value if relevant + struct token *prev; // previous token in a list + struct token *next; // next token in a list + struct token_list *list;// pointer to head of list descriptor this token is on + int lineno; // line number token came from + int column; // character column token came from + const char *fn; // file name token came from +}; + +struct token_list +{ + struct token *head; // the head of the list + struct token *tail; // the tail of the list +}; + +extern void token_free(struct token *); +extern struct token *token_create(int, char *strval, int, int, const char *); +extern struct token *token_dup(struct token *); +/* add a token to the end of a list */ +extern void token_list_append(struct token_list *, struct token *); +/* add a token to the start of a list */ +extern void token_list_prepend(struct token_list *, struct token *); +/* remove individual token from whatever list it is on */ +extern void token_list_remove(struct token *); +/* replace token with list of tokens specified */ +extern void token_list_insert(struct token_list *, struct token *, struct token *); +/* duplicate a list to a new list pointer */ +extern struct token_list *token_list_dup(struct token_list *); +/* print a token out */ +extern struct token_list *token_list_create(void); +extern void token_list_destroy(struct token_list *); + +extern void token_print(struct token *, FILE *); + +#endif // token_h_seen___ diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/token_names.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/token_names.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,19 @@ +char *ptoken_names[] = { +"TOKEN_NONE", +"PTOK_ENDS", +"PTOK_IDENTIFIER", +"PTOK_STAR", +"PTOK_KW_VOID", +"PTOK_KW_FLOAT", +"PTOK_KW_DOUBLE", +"PTOK_KW_LONG", +"PTOK_KW_UNSIGNED", +"PTOK_KW_SIGNED", +"PTOK_KW_INT", +"PTOK_KW_SHORT", +"PTOK_KW_CHAR", +"PTOK_OPAREN", +"PTOK_CPAREN", +"PTOK_OBRACE", +"PTOK_CBRACE", +"" }; diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/tree.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/tree.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,186 @@ +/* +lwcc/tree.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "tree.h" + +static char *node_names[] = { + "NONE", + "PROGRAM", + "DECL", + "TYPE_CHAR", + "TYPE_SHORT", + "TYPE_INT", + "TYPE_LONG", + "TYPE_LONGLONG", + "IDENT", + "TYPE_PTR", + "TYPE_SCHAR", + "TYPE_UCHAR", + "TYPE_USHORT", + "TYPE_UINT", + "TYPE_ULONG", + "TYPE_ULONGLONG", + "TYPE_VOID", + "TYPE_FLOAT", + "TYPE_DOUBLE", + "TYPE_LDOUBLE", + "FUNDEF", + "FUNDECL", + "FUNARGS", + "BLOCK", +}; + + + +node_t *node_create(int type, ...) +{ + node_t *r; + int nargs = 0; + va_list args; + + va_start(args, type); + r = lw_alloc(sizeof(node_t)); + memset(r, 0, sizeof(node_t)); + r -> type = type; + + switch (type) + { + case NODE_DECL: + nargs = 2; + break; + + case NODE_TYPE_PTR: + nargs = 1; + break; + + case NODE_IDENT: + r -> strval = lw_strdup(va_arg(args, char *)); + break; + + case NODE_FUNDEF: + nargs = 4; + break; + + case NODE_FUNDECL: + nargs = 3; + break; + } + + while (nargs--) + { + node_addchild(r, va_arg(args, node_t *)); + } + va_end(args); + return r; +} + +void node_destroy(node_t *node) +{ + node_t *n; + + while (node -> children) + { + n = node -> children -> next_child; + node_destroy(node -> children); + node -> children = n; + } + lw_free(node -> strval); + lw_free(node); +} + +void node_addchild(node_t *node, node_t *nn) +{ + node_t *tmp; + + if (!nn) + return; + + nn -> parent = node; + nn -> next_child = NULL; + if (node -> children) + { + for (tmp = node -> children; tmp -> next_child; tmp = tmp -> next_child) + /* do nothing */ ; + tmp -> next_child = nn; + } + else + { + node -> children = nn; + } +} + +void node_removechild(node_t *node, node_t *nn) +{ + node_t **pp; + node_t *np; + + if (!node) + node = nn -> parent; + + pp = &(node -> children); + for (np = node -> children; np; np = np -> next_child) + { + if (np -> next_child == nn) + break; + pp = &((*pp) -> next_child); + } + if (!np) + return; + + *pp = nn -> next_child; + nn -> parent = NULL; + nn -> next_child = NULL; +} + +void node_removechild_destroy(node_t *node, node_t *nn) +{ + node_removechild(node, nn); + node_destroy(nn); +} + +static void node_display_aux(node_t *node, FILE *f, int level) +{ + node_t *nn; + int i; + + for (i = 0; i < level * 4; i++) + fputc(' ', f); + fprintf(f, "(%s ", node_names[node -> type]); + if (node -> strval) + fprintf(f, "\"%s\" ", node -> strval); + fputc('\n', f); + for (nn = node -> children; nn; nn = nn -> next_child) + node_display_aux(nn, f, level + 1); + for (i = 0; i < level * 4; i++) + fputc(' ', f); + fputc(')', f); + fputc('\n', f); +} + +void node_display(node_t *node, FILE *f) +{ + node_display_aux(node, f, 0); +} diff -r 6073f4a33475 -r 5b8871fd7503 lwcc/tree.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/tree.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,73 @@ +/* +lwcc/tree.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef tree_h_seen___ +#define tree_h_seen___ + +#include + +/* the various node types */ +#define NODE_NONE 0 // a node with no type +#define NODE_PROGRAM 1 // the whole program +#define NODE_DECL 2 // a declaration +#define NODE_TYPE_CHAR 3 // a character type +#define NODE_TYPE_SHORT 4 // short int +#define NODE_TYPE_INT 5 // integer +#define NODE_TYPE_LONG 6 // long int +#define NODE_TYPE_LONGLONG 7 // long long +#define NODE_IDENT 8 // an identifier of some kind +#define NODE_TYPE_PTR 9 // a pointer +#define NODE_TYPE_SCHAR 10 // signed char +#define NODE_TYPE_UCHAR 11 // unsigned char +#define NODE_TYPE_USHORT 12 // unsigned short +#define NODE_TYPE_UINT 13 // unsigned int +#define NODE_TYPE_ULONG 14 // unsigned long +#define NODE_TYPE_ULONGLONG 15 // unsigned long long +#define NODE_TYPE_VOID 16 // void +#define NODE_TYPE_FLOAT 17 // float +#define NODE_TYPE_DOUBLE 18 // double +#define NODE_TYPE_LDOUBLE 19 // long double +#define NODE_FUNDEF 20 // function definition +#define NODE_FUNDECL 21 // function declaration +#define NODE_FUNARGS 22 // list of function args +#define NODE_BLOCK 23 // statement block +#define NODE_NUMTYPES 24 // the number of node types + +typedef struct node_s node_t; + +struct node_s +{ + int type; // node type + char *strval; // any string value associated with the node + unsigned char ival[8]; // any 64 bit integer value associated with the node, signed or unsigned + node_t *children; // pointer to list of child nodes + node_t *next_child; // pointer to next child in the list + node_t *parent; // pointer to parent node +}; + +extern node_t *node_create(int, ...); +extern void node_destroy(node_t *); +extern void node_addchild(node_t *, node_t *); +extern void node_removechild(node_t *, node_t *); +extern void node_display(node_t *, FILE *); +extern void node_removechild_destroy(node_t *, node_t *); + +#endif // tree_h_seen___ diff -r 6073f4a33475 -r 5b8871fd7503 lwlib/lw_strbuf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwlib/lw_strbuf.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,56 @@ +/* +lwlib/lw_strbuf.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include + +#include "lw_alloc.h" +#include "lw_strbuf.h" + +struct lw_strbuf *lw_strbuf_new(void) +{ + struct lw_strbuf *lw_strbuf; + + lw_strbuf = lw_alloc(sizeof(struct lw_strbuf)); + lw_strbuf -> str = NULL; + lw_strbuf -> bo = 0; + lw_strbuf -> bl = 0; + return lw_strbuf; +} + +void lw_strbuf_add(struct lw_strbuf *lw_strbuf, int c) +{ + if (lw_strbuf -> bo >= lw_strbuf -> bl) + { + lw_strbuf -> bl += 100; + lw_strbuf -> str = lw_realloc(lw_strbuf -> str, lw_strbuf -> bl); + } + lw_strbuf -> str[lw_strbuf -> bo++] = c; +} + +char *lw_strbuf_end(struct lw_strbuf *lw_strbuf) +{ + char *rv; + + lw_strbuf_add(lw_strbuf, 0); + rv = lw_strbuf -> str; + lw_free(lw_strbuf); + return rv; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwlib/lw_strbuf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwlib/lw_strbuf.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,36 @@ +/* +lwlib/lw_strbuf.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef ___lw_strbuf_h_seen___ +#define ___lw_strbuf_h_seen___ + +struct lw_strbuf +{ + char *str; + int bl; + int bo; +}; + +extern struct lw_strbuf *lw_strbuf_new(void); +extern void lw_strbuf_add(struct lw_strbuf *, int); +extern char *lw_strbuf_end(struct lw_strbuf *); + +#endif // ___lw_strbuf_h_seen___ diff -r 6073f4a33475 -r 5b8871fd7503 lwlib/lw_strpool.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwlib/lw_strpool.c Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,71 @@ +/* +lwlib/lw_strpool.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include + +#include "lw_alloc.h" +#include "lw_string.h" +#include "lw_strpool.h" + +struct lw_strpool *lw_strpool_create(void) +{ + struct lw_strpool *sp; + + sp = lw_alloc(sizeof(struct lw_strpool)); + sp -> nstrs = 0; + sp -> strs = NULL; + return sp; +} + +extern void lw_strpool_free(struct lw_strpool *sp) +{ + int i; + + for (i = 0; i < sp -> nstrs; i++) + lw_free(sp -> strs[i]); + lw_free(sp -> strs); + lw_free(sp); +} + +char *lw_strpool_strdup(struct lw_strpool *sp, const char *s) +{ + int i; + + if (!s) + return NULL; + + /* first do a fast scan for a pointer match */ + for (i = 0; i < sp -> nstrs; i++) + if (sp -> strs[i] == s) + return sp -> strs[i]; + + /* no match - do a slow scan for a string match */ + for (i = 0; i < sp -> nstrs; i++) + if (strcmp(sp -> strs[i], s) == 0) + return sp -> strs[i]; + + /* no match - create a new string entry */ + sp -> strs = lw_realloc(sp -> strs, sizeof(char *) * (sp -> nstrs + 1)); + sp -> strs[sp -> nstrs] = lw_strdup(s); + sp -> nstrs++; + return sp -> strs[sp -> nstrs - 1]; +} diff -r 6073f4a33475 -r 5b8871fd7503 lwlib/lw_strpool.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwlib/lw_strpool.h Mon Aug 05 21:27:09 2019 -0600 @@ -0,0 +1,35 @@ +/* +lwlib/lw_strpool.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#ifndef ___lw_strpool_h_seen___ +#define ___lw_strpool_h_seen___ + +struct lw_strpool +{ + int nstrs; + char **strs; +}; + +extern struct lw_strpool *lw_strpool_create(void); +extern void lw_strpool_free(struct lw_strpool *); +extern char *lw_strpool_strdup(struct lw_strpool *, const char *); + +#endif // ___lw_strpool_h_seen____