Mercurial > hg > index.cgi

diff lwcc/preproc.c @ 495:5b8871fd7503
Merged previous lwcc development branch into mainline.
author: William Astle <lost@l-w.ca>
date: Mon, 05 Aug 2019 21:27:09 -0600
parents: 836dc5371980
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lwcc/preproc.c	Mon Aug 05 21:27:09 2019 -0600
@@ -0,0 +1,1672 @@
+/*
+lwcc/preproc.c
+
+Copyright © 2013 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <lw_alloc.h>
+#include <lw_string.h>
+#include <lw_strbuf.h>
+#include <lw_strpool.h>
+
+#include "cpp.h"
+#include "symbol.h"
+#include "token.h"
+
+static int expand_macro(struct preproc_info *, char *);
+static void process_directive(struct preproc_info *);
+static long eval_expr(struct preproc_info *);
+extern struct token *preproc_lex_next_token(struct preproc_info *);
+static long preproc_numval(struct preproc_info *, struct token *);
+static int eval_escape(char **);
+extern int preproc_lex_fetch_byte(struct preproc_info *);
+extern void preproc_lex_unfetch_byte(struct preproc_info *, int);
+
+
+struct token *preproc_next_processed_token(struct preproc_info *pp)
+{
+	struct token *ct;
+
+again:
+	ct = preproc_next_token(pp);
+	if (ct -> ttype == TOK_EOF)
+		return ct;
+	if (ct -> ttype == TOK_EOL)
+	{
+		pp -> ppeolseen = 1;
+		return ct;
+	}
+
+	if (ct -> ttype == TOK_HASH && pp -> ppeolseen == 1)
+	{
+		// preprocessor directive 
+		process_directive(pp);
+		goto again;
+	}
+	// if we're in a false section, don't return the token; keep scanning
+	if (pp -> skip_level)
+		goto again;
+
+	if (ct -> ttype != TOK_WSPACE)
+		pp -> ppeolseen = 0;
+	
+	if (ct -> ttype == TOK_IDENT)
+	{
+		// possible macro expansion
+		if (expand_macro(pp, ct -> strval))
+	 		goto again;
+	}
+	
+	return ct;
+}
+
+static struct token *preproc_next_processed_token_nws(struct preproc_info *pp)
+{
+	struct token *t;
+	
+	do
+	{
+		t = preproc_next_processed_token(pp);
+	} while (t -> ttype == TOK_WSPACE);
+	return t;
+}
+
+static struct token *preproc_next_token_nws(struct preproc_info *pp)
+{
+	struct token *t;
+	
+	do
+	{
+		t = preproc_next_token(pp);
+	} while (t -> ttype == TOK_WSPACE);
+	return t;
+}
+
+static void skip_eol(struct preproc_info *pp)
+{
+	struct token *t;
+	
+	if (pp -> curtok && pp -> curtok -> ttype == TOK_EOL)
+		return;
+	do
+	{
+		t = preproc_next_token(pp);
+	} while (t -> ttype != TOK_EOL);
+}
+
+static void check_eol(struct preproc_info *pp)
+{
+	struct token *t;
+
+	t = preproc_next_token_nws(pp);
+	if (t -> ttype != TOK_EOL)
+		preproc_throw_warning(pp, "Extra text after preprocessor directive");
+	skip_eol(pp);
+}
+
+static void dir_ifdef(struct preproc_info *pp)
+{
+	struct token *ct;
+	
+	if (pp -> skip_level)
+	{
+		pp -> skip_level++;
+		skip_eol(pp);
+		return;
+	}
+	
+	do
+	{
+		ct = preproc_next_token(pp);
+	} while (ct -> ttype == TOK_WSPACE);
+	
+	if (ct -> ttype != TOK_IDENT)
+	{
+		preproc_throw_error(pp, "Bad #ifdef");
+		skip_eol(pp);
+	}
+	
+	if (symtab_find(pp, ct -> strval) == NULL)
+	{
+		pp -> skip_level++;
+	}
+	else
+	{
+		pp -> found_level++;
+	}
+	check_eol(pp);
+}
+
+static void dir_ifndef(struct preproc_info *pp)
+{
+	struct token *ct;
+	
+	if (pp -> skip_level)
+	{
+		pp -> skip_level++;
+		skip_eol(pp);
+		return;
+	}
+	
+	do
+	{
+		ct = preproc_next_token(pp);
+	} while (ct -> ttype == TOK_WSPACE);
+	
+	if (ct -> ttype != TOK_IDENT)
+	{
+		preproc_throw_error(pp, "Bad #ifdef");
+		skip_eol(pp);
+	}
+	
+	if (symtab_find(pp, ct -> strval) != NULL)
+	{
+		pp -> skip_level++;
+	}
+	else
+	{
+		pp -> found_level++;
+	}
+	check_eol(pp);
+}
+
+static void dir_if(struct preproc_info *pp)
+{
+	if (pp -> skip_level || !eval_expr(pp))
+		pp -> skip_level++;
+	else
+		pp -> found_level++;
+}
+
+static void dir_elif(struct preproc_info *pp)
+{
+	if (pp -> skip_level == 0)
+		pp -> else_skip_level = pp -> found_level;
+	if (pp -> skip_level)
+	{
+		if (pp -> else_skip_level > pp -> found_level)
+			;
+		else if (--(pp -> skip_level) != 0)
+			pp -> skip_level++;
+		else if (eval_expr(pp))
+			pp -> found_level++;
+		else
+			pp -> skip_level++;
+	}
+	else if (pp -> found_level)
+	{
+		pp -> skip_level++;
+		pp -> found_level--;
+	}
+	else
+		preproc_throw_error(pp, "#elif in non-conditional section");
+}
+
+static void dir_else(struct preproc_info *pp)
+{
+	if (pp -> skip_level)
+	{
+		if (pp -> else_skip_level > pp -> found_level)
+			;
+		else if (--(pp -> skip_level) != 0)
+			pp -> skip_level++;
+		else
+			pp -> found_level++;
+	}
+	else if (pp -> found_level)
+	{
+		pp -> skip_level++;
+		pp -> found_level--;
+	}
+	else
+	{
+		preproc_throw_error(pp, "#else in non-conditional section");
+	}
+	if (pp -> else_level == pp -> found_level + pp -> skip_level)
+	{
+		preproc_throw_error(pp, "Too many #else");
+	}
+	pp -> else_level = pp -> found_level + pp -> skip_level;
+	check_eol(pp);
+}
+
+static void dir_endif(struct preproc_info *pp)
+{
+	if (pp -> skip_level)
+		pp -> skip_level--;
+	else if (pp -> found_level)
+		pp -> found_level--;
+	else
+		preproc_throw_error(pp, "#endif in non-conditional section");
+	if (pp -> skip_level == 0)
+		pp -> else_skip_level = 0;
+	pp -> else_level = 0;
+	check_eol(pp);
+}
+
+static void dir_define(struct preproc_info *pp)
+{
+	struct token_list *tl = NULL;
+	struct token *ct;
+	int nargs = -1;
+	int vargs = 0;
+	char *mname = NULL;
+
+	char **arglist = NULL;
+	
+	if (pp -> skip_level)
+	{
+		skip_eol(pp);
+		return;
+	}
+
+	ct = preproc_next_token_nws(pp);
+	if (ct -> ttype != TOK_IDENT)
+		goto baddefine;
+	
+	mname = lw_strdup(ct -> strval);
+	ct = preproc_next_token(pp);
+	if (ct -> ttype == TOK_WSPACE)
+	{
+		/* object like macro */
+	}
+	else if (ct -> ttype == TOK_EOL)
+	{
+		/* object like macro - empty value */
+		goto out;
+	}
+	else if (ct -> ttype == TOK_OPAREN)
+	{
+		/* function like macro - parse args */
+		nargs = 0;
+		vargs = 0;
+		for (;;)
+		{
+			ct = preproc_next_token_nws(pp);
+			if (ct -> ttype == TOK_EOL)
+			{
+				goto baddefine;
+			}
+			if (ct -> ttype == TOK_CPAREN)
+				break;
+			
+			if (ct -> ttype == TOK_IDENT)
+			{
+				/* parameter name */
+				nargs++;
+				/* record argument name */
+				arglist = lw_realloc(arglist, sizeof(char *) * nargs);
+				arglist[nargs - 1] = lw_strdup(ct -> strval);
+				
+				/* check for end of args or comma */
+				ct = preproc_next_token_nws(pp);
+				if (ct -> ttype == TOK_CPAREN)
+					break;
+				else if (ct -> ttype == TOK_COMMA)
+					continue;
+				else
+					goto baddefine;
+			}
+			else if (ct -> ttype == TOK_ELLIPSIS)
+			{
+				/* variadic macro */
+				vargs = 1;
+				ct = preproc_next_token_nws(pp);
+				if (ct -> ttype != TOK_CPAREN)
+					goto baddefine;
+				break;
+			}
+			else
+				goto baddefine;
+		}
+	}
+	else
+	{
+baddefine:
+		preproc_throw_error(pp, "bad #define", ct -> ttype);
+baddefine2:
+		token_list_destroy(tl);
+		skip_eol(pp);
+		lw_free(mname);
+		while (nargs > 0)
+			lw_free(arglist[--nargs]);
+		lw_free(arglist);
+		return;
+	}
+
+	tl = token_list_create();	
+	for (;;)
+	{
+		ct = preproc_next_token(pp);
+	
+		if (ct -> ttype == TOK_EOL)
+			break;
+		token_list_append(tl, token_dup(ct));
+	}
+out:
+	if (strcmp(mname, "defined") == 0)
+	{
+		preproc_throw_warning(pp, "attempt to define 'defined' as a macro not allowed");
+		goto baddefine2;
+	}
+	else if (symtab_find(pp, mname) != NULL)
+	{
+		/* need to do a token compare between the old value and the new value
+		   to decide whether to complain */
+		preproc_throw_warning(pp, "%s previous defined", mname);
+		symtab_undef(pp, mname);
+	}
+	symtab_define(pp, mname, tl, nargs, arglist, vargs);
+	lw_free(mname);
+	while (nargs > 0)
+		lw_free(arglist[--nargs]);
+	lw_free(arglist);
+	/* no need to check for EOL here */
+}
+
+void preproc_add_macro(struct preproc_info *pp, char *str)
+{
+	char *s;
+	
+	pp -> lexstr = lw_strdup(str);
+	pp -> lexstrloc = 0;
+	s = strchr(pp -> lexstr, '=');
+	if (s)
+		*s = ' ';
+		
+	dir_define(pp);
+	
+	lw_free(pp -> lexstr);
+	pp -> lexstr = NULL;
+	pp -> lexstrloc = 0;
+}
+
+static void dir_undef(struct preproc_info *pp)
+{
+	struct token *ct;
+	if (pp -> skip_level)
+	{
+		skip_eol(pp);
+		return;
+	}
+	
+	do
+	{
+		ct = preproc_next_token(pp);
+	} while (ct -> ttype == TOK_WSPACE);
+	
+	if (ct -> ttype != TOK_IDENT)
+	{
+		preproc_throw_error(pp, "Bad #undef");
+		skip_eol(pp);
+	}
+	
+	symtab_undef(pp, ct -> strval);
+	check_eol(pp);
+}
+
+char *streol(struct preproc_info *pp)
+{
+	struct lw_strbuf *s;
+	struct token *ct;
+	int i;
+		
+	s = lw_strbuf_new();
+	do
+	{
+		ct = preproc_next_token(pp);
+	} while (ct -> ttype == TOK_WSPACE);
+	
+	while (ct -> ttype != TOK_EOL)
+	{
+		for (i = 0; ct -> strval[i]; i++)
+			lw_strbuf_add(s, ct -> strval[i]);
+		ct = preproc_next_token(pp);
+	}
+	return lw_strbuf_end(s);
+}
+
+static void dir_error(struct preproc_info *pp)
+{
+	char *s;
+	
+	if (pp -> skip_level)
+	{
+		skip_eol(pp);
+		return;
+	}
+	
+	s = streol(pp);
+	preproc_throw_error(pp, "%s", s);
+	lw_free(s);
+}
+
+static void dir_warning(struct preproc_info *pp)
+{
+	char *s;
+	
+	if (pp -> skip_level)
+	{
+		skip_eol(pp);
+		return;
+	}
+	
+	s = streol(pp);
+	preproc_throw_warning(pp, "%s", s);
+	lw_free(s);
+}
+
+static char *preproc_file_exists_in_dir(char *dir, char *fn)
+{
+	int l;
+	char *f;
+	
+	l = snprintf(NULL, 0, "%s/%s", dir, fn);
+	f = lw_alloc(l + 1);
+	snprintf(f, l + 1, "%s/%s", dir, fn);
+	
+	if (access(f, R_OK) == 0)
+		return f;
+	lw_free(f);
+	return NULL;
+}
+
+static char *preproc_find_file(struct preproc_info *pp, char *fn, int sys)
+{
+	char *tstr;
+	char *pref;
+	char *rfn;
+
+	/* pass through absolute paths, dumb as they are */	
+	if (fn[0] == '/')
+		return lw_strdup(fn);
+
+	if (!sys)
+	{
+		/* look in the directory with the current file */
+		tstr = strchr(pp -> fn, '/');
+		if (!tstr)
+			pref = lw_strdup(".");
+		else
+		{
+			pref = lw_alloc(tstr - pp -> fn + 1);
+			memcpy(pref, pp -> fn, tstr - pp -> fn);
+			pref[tstr - pp -> fn] = 0;
+		}
+		rfn = preproc_file_exists_in_dir(pref, fn);
+		lw_free(pref);
+		if (rfn)
+			return rfn;
+		
+		/* look in the "quote" dir list */
+		lw_stringlist_reset(pp -> quotelist);
+		for (pref = lw_stringlist_current(pp -> quotelist); pref; pref = lw_stringlist_next(pp -> quotelist))
+		{
+			rfn = preproc_file_exists_in_dir(pref, fn);
+			if (rfn)
+				return rfn;
+		}
+	}
+	
+	/* look in the "include" dir list */
+	lw_stringlist_reset(pp -> inclist);
+	for (pref = lw_stringlist_current(pp -> inclist); pref; pref = lw_stringlist_next(pp -> inclist))
+	{
+		rfn = preproc_file_exists_in_dir(pref, fn);
+		if (rfn)
+			return rfn;
+	}
+
+	/* the default search list is provided by the driver program */	
+	return NULL;
+}
+
+static void dir_include(struct preproc_info *pp)
+{
+	FILE *fp;
+	struct token *ct;
+	int sys = 0;
+	char *fn;
+	struct lw_strbuf *strbuf;
+	int i;
+	struct preproc_info *fs;
+	
+	ct = preproc_next_token_nws(pp);
+	if (ct -> ttype == TOK_STR_LIT)
+	{
+usrinc:
+		sys = strlen(ct -> strval);
+		fn = lw_alloc(sys - 1);
+		memcpy(fn, ct -> strval + 1, sys - 2);
+		fn[sys - 2] = 0;
+		sys = 0;
+		goto doinc;
+	}
+	else if (ct -> ttype == TOK_LT)
+	{
+		strbuf = lw_strbuf_new();
+		for (;;)
+		{
+			int c;
+			c = preproc_lex_fetch_byte(pp);
+			if (c == CPP_EOL)
+			{
+				preproc_lex_unfetch_byte(pp, c);
+				preproc_throw_error(pp, "Bad #include");
+				lw_free(lw_strbuf_end(strbuf));
+				break;
+			}
+			if (c == '>')
+				break;
+			lw_strbuf_add(strbuf, c);
+		}
+		ct = preproc_next_token_nws(pp);
+		if (ct -> ttype != TOK_EOL)
+		{
+			preproc_throw_error(pp, "Bad #include");
+			skip_eol(pp);
+			lw_free(lw_strbuf_end(strbuf));
+			return;
+		}
+		sys = 1;
+		fn = lw_strbuf_end(strbuf);
+		goto doinc;
+	}
+	else
+	{
+		preproc_unget_token(pp, ct);
+		// computed include
+		ct = preproc_next_processed_token_nws(pp);
+		if (ct -> ttype == TOK_STR_LIT)
+			goto usrinc;
+		else if (ct -> ttype == TOK_LT)
+		{
+			strbuf = lw_strbuf_new();
+			for (;;)
+			{
+				ct = preproc_next_processed_token(pp);
+				if (ct -> ttype == TOK_GT)
+					break;
+				if (ct -> ttype == TOK_EOL)
+				{
+					preproc_throw_error(pp, "Bad #include");
+					lw_free(lw_strbuf_end(strbuf));
+					return;
+				}
+				for (i = 0; ct -> strval[i]; ct++)
+				{
+					lw_strbuf_add(strbuf, ct -> strval[i]);
+				}
+			}
+			ct = preproc_next_processed_token_nws(pp);
+			if (ct -> ttype != TOK_EOL)
+			{
+				preproc_throw_error(pp, "Bad #include");
+				skip_eol(pp);
+				lw_free(lw_strbuf_end(strbuf));
+				return;
+			}
+			sys = 1;
+			fn = lw_strbuf_end(strbuf);
+			goto doinc;
+		}
+		else
+		{
+			skip_eol(pp);
+			preproc_throw_error(pp, "Bad #include");
+			return;
+		}
+	}
+doinc:
+	fn = preproc_find_file(pp, fn, sys);
+	if (!fn)
+		goto badfile;
+	fp = fopen(fn, "rb");
+	if (!fp)
+	{
+		lw_free(fn);
+badfile:
+		preproc_throw_error(pp, "Cannot open #include file %s - this is fatal", fn);
+		exit(1);
+	}
+	
+	/* save the current include file state, etc. */
+	fs = lw_alloc(sizeof(struct preproc_info));
+	*fs = *pp;
+	fs -> n = pp -> filestack;
+	pp -> curtok = NULL;
+	pp -> filestack = fs;
+	pp -> fn = lw_strpool_strdup(pp -> strpool, fn);
+	lw_free(fn);
+	pp -> fp = fp;
+	pp -> ra = CPP_NOUNG;
+	pp -> ppeolseen = 1;
+	pp -> eolstate = 0;
+	pp -> lineno = 1;
+	pp -> column = 0;
+	pp -> qseen = 0;
+	pp -> ungetbufl = 0;
+	pp -> ungetbufs = 0;
+	pp -> ungetbuf = NULL;
+	pp -> unget = 0;
+	pp -> eolseen = 0;
+	pp -> nlseen = 0;
+	pp -> skip_level = 0;
+	pp -> found_level = 0;
+	pp -> else_level = 0;
+	pp -> else_skip_level = 0;
+	pp -> tokqueue = NULL;	
+	// now get on with processing
+}
+
+static void dir_line(struct preproc_info *pp)
+{
+	struct token *ct;
+	long lineno;
+	char *estr;
+	
+	lineno = -1;
+	
+	ct = preproc_next_processed_token_nws(pp);
+	if (ct -> ttype == TOK_NUMBER)
+	{
+		lineno = strtoul(ct -> strval, &estr, 10);
+		if (*estr)
+		{
+			preproc_throw_error(pp, "Bad #line");
+			skip_eol(pp);
+			return;
+		}
+	}
+	else
+	{
+		preproc_throw_error(pp, "Bad #line");
+		skip_eol(pp);
+		return;
+	}
+	ct = preproc_next_processed_token_nws(pp);
+	if (ct -> ttype == TOK_EOL)
+	{
+		pp -> lineno = lineno;
+		return;
+	}
+	if (ct -> ttype != TOK_STR_LIT)
+	{
+		preproc_throw_error(pp, "Bad #line");
+		skip_eol(pp);
+		return;
+	}
+	estr = lw_strdup(ct -> strval);
+	ct = preproc_next_processed_token_nws(pp);
+	if (ct -> ttype != TOK_EOL)
+	{
+		preproc_throw_error(pp, "Bad #line");
+		skip_eol(pp);
+		lw_free(estr);
+		return;
+	}
+	pp -> fn = estr;
+	pp -> lineno = lineno;
+}
+
+static void dir_pragma(struct preproc_info *pp)
+{
+	if (pp -> skip_level)
+	{
+		skip_eol(pp);
+		return;
+	}
+	
+	preproc_throw_warning(pp, "Unsupported #pragma");
+	skip_eol(pp);
+}
+
+struct { char *name; void (*fn)(struct preproc_info *); } dirlist[] =
+{
+	{ "ifdef", dir_ifdef },
+	{ "ifndef", dir_ifndef },
+	{ "if", dir_if },
+	{ "else", dir_else },
+	{ "elif", dir_elif },
+	{ "endif", dir_endif },
+	{ "define", dir_define },
+	{ "undef", dir_undef },
+	{ "include", dir_include },
+	{ "error", dir_error },
+	{ "warning", dir_warning },
+	{ "line", dir_line },
+	{ "pragma", dir_pragma },
+	{ NULL, NULL }
+};
+
+static void process_directive(struct preproc_info *pp)
+{
+	struct token *ct;
+	int i;
+	
+	do
+	{
+		ct = preproc_next_token(pp);
+	} while (ct -> ttype == TOK_WSPACE);
+	
+	// NULL directive
+	if (ct -> ttype == TOK_EOL)
+		return;
+	
+	if (ct -> ttype == TOK_NUMBER)
+	{
+		// this is probably a file marker from a previous run of the preprocessor
+		char *fn;
+		struct lw_strbuf *sb;
+		
+		i = preproc_numval(pp, ct);
+		ct  = preproc_next_token_nws(pp);
+		if (ct -> ttype != TOK_STR_LIT)
+			goto baddir;
+		pp -> lineno = i;
+		sb = lw_strbuf_new();
+		for (fn = ct -> strval; *fn && *fn != '"'; )
+		{
+			if (*fn == '\\')
+			{
+				lw_strbuf_add(sb, eval_escape(&fn));
+			}
+			else
+			{
+				lw_strbuf_add(sb, *fn++);
+			}
+		}
+		fn = lw_strbuf_end(sb);
+		pp -> fn = lw_strpool_strdup(pp -> strpool, fn);
+		lw_free(fn);
+		skip_eol(pp);
+		return;
+	}
+	
+	if (ct -> ttype != TOK_IDENT)
+		goto baddir;
+	
+	for (i = 0; dirlist[i].name; i++)
+	{
+		if (strcmp(dirlist[i].name, ct -> strval) == 0)
+		{
+			(*(dirlist[i].fn))(pp);
+			return;
+		}
+	}
+baddir:
+		preproc_throw_error(pp, "Bad preprocessor directive");
+		while (ct -> ttype != TOK_EOL)
+			ct = preproc_next_token(pp);
+		return;
+}
+
+/*
+Evaluate a preprocessor expression
+*/
+
+/* same as skip_eol() but the EOL token is not consumed */
+static void skip_eoe(struct preproc_info *pp)
+{
+	skip_eol(pp);
+	preproc_unget_token(pp, pp -> curtok);
+}
+
+static long eval_expr_real(struct preproc_info *, int);
+
+static long eval_term_real(struct preproc_info *pp)
+{
+	long tval = 0;
+	struct token *ct;
+	
+eval_next:
+	ct = preproc_next_processed_token_nws(pp);
+	if (ct -> ttype == TOK_EOL)
+	{
+		preproc_throw_error(pp, "Bad expression");
+		return 0;
+	}
+	
+	switch (ct -> ttype)
+	{
+	case TOK_OPAREN:
+		tval = eval_expr_real(pp, 0);
+		ct = preproc_next_processed_token_nws(pp);
+		if (ct -> ttype != ')')
+		{
+			preproc_throw_error(pp, "Unbalanced () in expression");
+			skip_eoe(pp);
+			return 0;
+		}
+		return tval;
+
+	case TOK_ADD: // unary +
+		goto eval_next;
+
+	case TOK_SUB: // unary -	
+		tval = eval_expr_real(pp, 200);
+		return -tval;
+
+	/* NOTE: we should only get "TOK_IDENT" from an undefined macro */		
+	case TOK_IDENT: // some sort of function, symbol, etc.
+		if (strcmp(ct -> strval, "defined"))
+		{
+			/* the defined operator */
+			/* any number in the "defined" bit will be
+			   treated as a defined symbol, even zero */
+			ct = preproc_next_token_nws(pp);
+			if (ct -> ttype == TOK_OPAREN)
+			{
+				ct = preproc_next_token_nws(pp);
+				if (ct -> ttype != TOK_IDENT)
+				{
+					preproc_throw_error(pp, "Bad expression");
+					skip_eoe(pp);
+					return 0;
+				}
+				if (symtab_find(pp, ct -> strval) == NULL)
+					tval = 0;
+				else
+					tval = 1;
+				ct = preproc_next_token_nws(pp);
+				if (ct -> ttype != TOK_CPAREN)
+				{
+					preproc_throw_error(pp, "Bad expression");
+					skip_eoe(pp);
+					return 0;
+				}
+				return tval;
+			}
+			else if (ct -> ttype == TOK_IDENT)
+			{
+				return (symtab_find(pp, ct -> strval) != NULL) ? 1 : 0;
+			}
+			preproc_throw_error(pp, "Bad expression");
+			skip_eoe(pp);
+			return 0;
+		}
+		/* unknown identifier - it's zero */
+		return 0;
+	
+	/* numbers */
+	case TOK_NUMBER:
+		return preproc_numval(pp, ct);	
+		
+	default:
+		preproc_throw_error(pp, "Bad expression");
+		skip_eoe(pp);
+		return 0;
+	}
+	return 0;
+}
+
+static long eval_expr_real(struct preproc_info *pp, int p)
+{
+	static const struct operinfo
+	{
+		int tok;
+		int prec;
+	} operators[] =
+	{
+		{ TOK_ADD, 100 },
+		{ TOK_SUB, 100 },
+		{ TOK_STAR, 150 },
+		{ TOK_DIV, 150 },
+		{ TOK_MOD, 150 },
+		{ TOK_LT, 75 },
+		{ TOK_LE, 75 },
+		{ TOK_GT, 75 },
+		{ TOK_GE, 75 },
+		{ TOK_EQ, 70 },
+		{ TOK_NE, 70 },
+		{ TOK_BAND, 30 },
+		{ TOK_BOR, 25 },
+		{ TOK_NONE, 0 }
+	};
+	
+	int op;
+	long term1, term2, term3;
+	struct token *ct;
+	
+	term1 = eval_term_real(pp);
+eval_next:
+	ct = preproc_next_processed_token_nws(pp);
+	for (op = 0; operators[op].tok != TOK_NONE; op++)
+	{
+		if (operators[op].tok == ct -> ttype)
+			break;
+	}
+	/* if it isn't a recognized operator, assume end of expression */
+	if (operators[op].tok == TOK_NONE)
+	{
+		preproc_unget_token(pp, ct);
+		return term1;
+	}
+
+	/* if new operation is not higher than the current precedence, let the previous op finish */
+	if (operators[op].prec <= p)
+		return term1;
+
+	/* get the second term */
+	term2 = eval_expr_real(pp, operators[op].prec);
+	
+	switch (operators[op].tok)
+	{
+	case TOK_ADD:
+		term3 = term1 + term2;
+		break;
+	
+	case TOK_SUB:
+		term3 = term1 - term2;
+		break;
+	
+	case TOK_STAR:
+		term3 = term1 * term2;
+		break;
+	
+	case TOK_DIV:
+		if (!term2)
+		{
+			preproc_throw_warning(pp, "Division by zero");
+			term3 = 0;
+			break;
+		}
+		term3 = term1 / term2;
+		break;
+	
+	case TOK_MOD:
+		if (!term2)
+		{
+			preproc_throw_warning(pp, "Division by zero");
+			term3 = 0;
+			break;
+		}
+		term3 = term1 % term2;
+		break;
+		
+	case TOK_BAND:
+		term3 = (term1 && term2);
+		break;
+	
+	case TOK_BOR:
+		term3 = (term1 || term2);
+		break;
+	
+	case TOK_EQ:
+		term3 = (term1 == term2);
+		break;
+	
+	case TOK_NE:
+		term3 = (term1 != term2);
+		break;
+	
+	case TOK_GT:
+		term3 = (term1 > term2);
+		break;
+	
+	case TOK_GE:
+		term3 = (term1 >= term2);
+		break;
+	
+	case TOK_LT:
+		term3 = (term1 < term2);
+		break;
+	
+	case TOK_LE:
+		term3 = (term1 <= term2);
+		break;
+
+	default:
+		term3 = 0;
+		break;
+	}
+	term1 = term3;
+	goto eval_next;
+}
+
+static long eval_expr(struct preproc_info *pp)
+{
+	long rv;
+	struct token *t;
+	
+	rv = eval_expr_real(pp, 0);
+	t = preproc_next_token_nws(pp);
+	if (t -> ttype != TOK_EOL)
+	{
+		preproc_throw_error(pp, "Bad expression");
+		skip_eol(pp);
+	}
+	return rv;
+}
+
+static int eval_escape(char **t)
+{
+	int c;
+	int c2;
+	
+	if (**t == 0)
+		return 0;
+	c = *(*t)++;
+	int rv = 0;
+	
+	switch (c)
+	{
+	case 'n':
+		return 10;
+	case 'r':
+		return 13;
+	case 'b':
+		return 8;
+	case 'e':
+		return 27;
+	case 'f':
+		return 12;
+	case 't':
+		return 9;
+	case 'v':
+		return 11;
+	case 'a':
+		return 7;
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7':
+		// octal constant
+		rv = c - '0';
+		c2 = 1;
+		for (; c2 < 3; c2++)
+		{
+			c = *(*t)++;
+			if (c < '0' || c > '7')
+				break;
+			rv = (rv << 3) | (c - '0');
+		}
+		return rv;
+	case 'x':
+		// hex constant
+		for (;;)
+		{
+			c = *(*t)++;
+			if (c < '0' || (c > '9' && c < 'A') || (c > 'F' && c < 'a') || c > 'f')
+				break;
+			c = c - '0';
+			if (c > 9)
+				c -= 7;
+			if (c > 15)
+				c -= 32;
+			rv = (rv << 4) | c;
+		}
+		return rv & 0xff;
+	default:
+		return c;
+	}
+}
+
+/* convert a numeric string to a number */
+long preproc_numval(struct preproc_info *pp, struct token *t)
+{
+	unsigned long long rv = 0;
+	unsigned long long rv2 = 0;
+	char *tstr = t -> strval;
+	int radix = 10;
+	int c;
+	int ovf = 0;
+	union { long sv; unsigned long uv; } tv;
+		
+	if (t -> ttype == TOK_CHR_LIT)
+	{
+		tstr++;
+		while (*tstr && *tstr != '\'')
+		{
+			if (*tstr == '\\')
+			{
+				tstr++;
+				c = eval_escape(&tstr);
+			}
+			else
+				c = *tstr++;
+			rv = (rv << 8) | c;
+			if (rv / radix < rv2)
+				ovf = 1;
+			rv2 = rv;
+			
+		}
+		goto done;
+	}
+	
+	
+	if (*tstr == '0')
+	{
+		radix = 8;
+		tstr++;
+		if (*tstr == 'x')
+		{
+			radix = 16;
+			tstr++;
+		}
+	}
+	while (*tstr)
+	{
+		c = *tstr++;
+		if (c < '0' || (c > '9' && c < 'A') || (c > 'F' && c < 'a') || c > 'f')
+			break;
+		c -= '0';
+		if (c > 9)
+			c -= 7;
+		if (c > 15)
+			c -= 32;
+		if (c >= radix)
+			break;
+		rv = rv * radix + c;
+		if (rv / radix < rv2)
+			ovf = 1;
+		rv2 = rv;
+	}
+	tstr--;
+	while (*tstr == 'l' || *tstr == 'L')
+		tstr++;
+	tv.uv = rv;
+	if (tv.sv < 0 && radix == 10)
+		ovf = 1;
+done:
+	if (ovf)
+		preproc_throw_error(pp, "Constant out of range: %s", t -> strval);
+	return rv;
+}
+
+/*
+Below here is the logic for expanding a macro
+*/
+static char *stringify(struct token_list *tli)
+{
+	struct lw_strbuf *s;
+	int ws = 0;
+	struct token *tl = tli -> head;
+	
+	s = lw_strbuf_new();
+	lw_strbuf_add(s, '"');
+
+	while (tl && tl -> ttype == TOK_WSPACE)
+		tl = tl -> next;
+	
+	for (; tl; tl = tl -> next)
+	{
+		if (tl -> ttype == TOK_WSPACE)
+		{
+			ws = 1;
+			continue;
+		}
+		if (ws)
+		{
+			lw_strbuf_add(s, ' ');
+		}
+		for (ws = 0; tl -> strval[ws]; ws++)
+		{
+			if (tl -> ttype == TOK_STR_LIT || tl -> ttype == TOK_CHR_LIT)
+			{
+				if (tl -> strval[ws] == '"' || tl -> strval[ws] == '\\')
+					lw_strbuf_add(s, '\\');
+			}
+		}
+		ws = 0;
+	}
+	
+	lw_strbuf_add(s, '"');
+	return lw_strbuf_end(s);
+}
+
+static int macro_arg(struct symtab_e *s, char *str)
+{
+	int i;
+	if (strcmp(str, "__VA_ARGS__") == 0)
+		i = s -> nargs;
+	else
+		for (i = 0; i < s -> nargs; i++)
+			if (strcmp(s -> params[i], str) == 0)
+				break;
+	if (i == s -> nargs)
+		if (s -> vargs == 0)
+			return -1;
+	return i;
+}
+
+/* return list to tokens as a result of ## expansion */
+static struct token_list *paste_tokens(struct preproc_info *pp, struct symtab_e *s, struct token_list **arglist, struct token *t1, struct token *t2)
+{
+	struct token_list *left;
+	struct token_list *right;
+	char *tstr;
+	struct token *ttok;
+	int i;
+	
+	if (t1 -> ttype == TOK_IDENT)
+	{
+		i = macro_arg(s, t1 -> strval);
+		if (i == -1)
+		{
+			left = token_list_create();
+			token_list_append(left, token_dup(t1));
+		}
+		else
+		{
+			left = token_list_dup(arglist[i]);
+		}
+	}
+	else
+	{
+		left = token_list_create();
+		token_list_append(left, token_dup(t1));
+	}
+	// munch trailing white space
+	while (left -> tail && left -> tail -> ttype == TOK_WSPACE)
+	{
+		token_list_remove(left -> tail);
+	}
+
+	if (t2 -> ttype == TOK_IDENT)
+	{
+		i = macro_arg(s, t2 -> strval);
+		if (i == -1)
+		{
+			right = token_list_create();
+			token_list_append(right, token_dup(t2));
+		}
+		else
+		{
+			right = token_list_dup(arglist[i]);
+		}
+	}
+	else
+	{
+		right = token_list_create();
+		token_list_append(right, token_dup(t2));
+	}
+	// munch leading white space
+	while (right -> head && right -> head -> ttype == TOK_WSPACE)
+	{
+		token_list_remove(right -> head);
+	}
+
+	// nothing to append at all
+	if (left -> head != NULL && right -> head == NULL)
+	{
+		// right arg is empty - use left
+		token_list_destroy(right);
+		return left;
+	}
+	if (left -> head == NULL && right -> head != NULL)
+	{
+		// left arg is empty, use right
+		token_list_destroy(left);
+		return right;
+	}
+	if (left -> head == NULL && right -> head == NULL)
+	{
+		// both empty, use left
+		token_list_destroy(right);
+		return left;
+	}
+	
+	// both non-empty - past left tail with right head
+	// then past the right list onto the left
+	tstr = lw_alloc(strlen(left -> tail -> strval) + strlen(right -> head -> strval) + 1);
+	strcpy(tstr, left -> tail -> strval);
+	strcat(tstr, right -> head -> strval);
+	
+	pp -> lexstr = tstr;
+	pp -> lexstrloc = 0;
+	
+	ttok = preproc_lex_next_token(pp);
+	if (ttok -> ttype != TOK_ERROR && pp -> lexstr[pp -> lexstrloc] == 0)
+	{
+		// we have a new token here
+		token_list_remove(left -> tail);
+		token_list_remove(right -> head);
+		token_list_append(left, token_dup(ttok));
+	}
+	lw_free(tstr);
+	pp -> lexstr = NULL;
+	pp -> lexstrloc = 0;
+	for (ttok = right -> head; ttok; ttok = ttok -> next)
+	{
+		token_list_append(left, token_dup(ttok));
+	}
+	token_list_destroy(right);
+	return left;
+}
+
+static int expand_macro(struct preproc_info *pp, char *mname)
+{
+	struct symtab_e *s;
+	struct token *t, *t2, *t3;
+	int nargs = 0;
+	struct expand_e *e;
+	struct token_list **exparglist = NULL;
+	struct token_list **arglist = NULL;
+	int i;
+	int pcount;
+	char *tstr;
+	struct token_list *expand_list;
+	int repl;
+	struct token_list *rtl;
+	
+	// check for built in macros
+	if (strcmp(mname, "__FILE__") == 0)
+	{
+		struct lw_strbuf *sb;
+		
+		sb = lw_strbuf_new();
+		lw_strbuf_add(sb, '"');
+		for (tstr = (char *)(pp -> fn); *tstr; tstr++)
+		{
+			if (*tstr == 32 || (*tstr > 34 && *tstr < 127))
+			{
+				lw_strbuf_add(sb, *tstr);
+			}
+			else
+			{
+				lw_strbuf_add(sb, '\\');
+				lw_strbuf_add(sb, (*tstr >> 6) + '0');
+				lw_strbuf_add(sb, ((*tstr >> 3) & 7) + '0');
+				lw_strbuf_add(sb, (*tstr & 7) + '0');
+			}
+		}
+		lw_strbuf_add(sb, '"');
+		tstr = lw_strbuf_end(sb);
+		preproc_unget_token(pp, token_create(TOK_STR_LIT, tstr, pp -> lineno, pp -> column, pp -> fn));
+		lw_free(tstr);
+		return 1;
+	}
+	else if (strcmp(mname, "__LINE__") == 0)
+	{
+		char nbuf[25];
+		snprintf(nbuf, 25, "%d", pp -> lineno);
+		preproc_unget_token(pp, token_create(TOK_NUMBER, nbuf, pp -> lineno, pp -> column, pp -> fn));
+		return 1;
+	}
+	else if (strcmp(mname, "__DATE__") == 0)
+	{
+		char dbuf[14];
+		struct tm *tv;
+		time_t tm;
+		static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+		
+		tm = time(NULL);
+		tv = localtime(&tm);
+		snprintf(dbuf, 14, "\"%s %2d %04d\"", months[tv -> tm_mon], tv -> tm_mday, tv -> tm_year + 1900);
+		preproc_unget_token(pp, token_create(TOK_STR_LIT, dbuf, pp -> lineno, pp -> column, pp -> fn));
+		return 1;
+	}
+	else if (strcmp(mname, "__TIME__") == 0)
+	{
+		char tbuf[11];
+		struct tm *tv;
+		time_t tm;
+		
+		tm = time(NULL);
+		tv = localtime(&tm);
+		snprintf(tbuf, 11, "\"%02d:%02d:%02d\"", tv -> tm_hour, tv -> tm_min, tv -> tm_sec);
+		preproc_unget_token(pp, token_create(TOK_STR_LIT, tbuf, pp -> lineno, pp -> column, pp -> fn));
+		return 1;
+	}
+	
+	s = symtab_find(pp, mname);
+	if (!s)
+		return 0;
+	
+	for (e = pp -> expand_list; e; e = e -> next)
+	{
+		/* don't expand if we're already expanding the same macro */
+		if (e -> s == s)
+			return 0;
+	}
+
+	if (s -> nargs == -1)
+	{
+		/* short circuit NULL expansion */
+		if (s -> tl == NULL)
+			return 1;
+
+		goto expandmacro;
+	}
+	
+	// look for opening paren after optional whitespace
+	t2 = NULL;
+	t = NULL;
+	for (;;)
+	{
+		t = preproc_next_token(pp);
+		if (t -> ttype != TOK_WSPACE && t -> ttype != TOK_EOL)
+			break;
+		t -> next = t2;
+		t2 = t2;
+	}
+	if (t -> ttype != TOK_OPAREN)
+	{
+		// not a function-like invocation
+		while (t2)
+		{
+			t = t2 -> next;
+			preproc_unget_token(pp, t2);
+			t2 = t;
+		}
+		return 0;
+	}
+	
+	// parse parameters here
+	t = preproc_next_token_nws(pp);
+	nargs = 1;
+	arglist = lw_alloc(sizeof(struct token_list *));
+	arglist[0] = token_list_create();
+	t2 = NULL;
+	
+	while (t -> ttype != TOK_CPAREN)
+	{
+		pcount = 0;
+		if (t -> ttype == TOK_EOF)
+		{
+			preproc_throw_error(pp, "Unexpected EOF in macro call");
+			break;
+		}
+		if (t -> ttype == TOK_EOL)
+			continue;
+		if (t -> ttype == TOK_OPAREN)
+			pcount++;
+		else if (t -> ttype == TOK_CPAREN && pcount)
+			pcount--;
+		if (t -> ttype == TOK_COMMA && pcount == 0)
+		{
+			if (!(s -> vargs) || (nargs > s -> nargs))
+			{
+				nargs++;
+				arglist = lw_realloc(arglist, sizeof(struct token_list *) * nargs);
+				arglist[nargs - 1] = token_list_create();
+				t2 = NULL;
+				continue;
+			}
+		}
+		token_list_append(arglist[nargs - 1], token_dup(t));
+	}
+
+	if (s -> vargs)
+	{
+		if (nargs <= s -> nargs)
+		{
+			preproc_throw_error(pp, "Wrong number of arguments (%d) for variadic macro %s which takes %d arguments", nargs, mname, s -> nargs);
+		}
+	}
+	else
+	{
+		if (s -> nargs != nargs && !(s -> nargs == 0 && nargs == 1 && arglist[nargs - 1]))
+		{
+			preproc_throw_error(pp, "Wrong number of arguments (%d) for macro %s which takes %d arguments", nargs, mname, s -> nargs);
+		}
+	}
+
+	/* now calculate the pre-expansions of the arguments */
+	exparglist = lw_alloc(nargs * sizeof(struct token_list *));
+	for (i = 0; i < nargs; i++)
+	{
+		exparglist[i] = token_list_create();
+		// NOTE: do nothing if empty argument
+		if (arglist[i] == NULL || arglist[i] -> head == NULL)
+			continue;
+		pp -> sourcelist = arglist[i]->head;
+		for (;;)
+		{
+			t = preproc_next_processed_token(pp);
+			if (t -> ttype == TOK_EOF)
+				break;
+			token_list_append(exparglist[i], token_dup(t));
+		}
+	}
+
+expandmacro:
+	expand_list = token_list_dup(s -> tl);
+
+	// scan for stringification and handle it
+	repl = 0;
+	while (repl == 0)
+	{
+		for (t = expand_list -> head; t; t = t -> next)
+		{
+			if (t -> ttype == TOK_HASH && t -> next && t -> next -> ttype == TOK_IDENT)
+			{
+				i = macro_arg(s, t -> next -> strval);
+				if (i != -1)
+				{
+					repl = 1;
+					tstr = stringify(arglist[i]);
+					token_list_remove(t -> next);
+					token_list_insert(expand_list, t, token_create(TOK_STR_LIT, tstr, t -> lineno, t -> column, t -> fn));
+					token_list_remove(t);
+					lw_free(tstr);
+					break;
+				}
+			}
+		}
+		repl = 1;
+	}
+
+
+	// scan for concatenation and handle it	
+	
+	for (t = expand_list -> head; t; t = t -> next)
+	{
+		if (t -> ttype == TOK_DBLHASH)
+		{
+			// have a concatenation operator here
+			for (t2 = t -> prev; t2; t2 = t2 -> prev)
+			{
+				if (t2 -> ttype != TOK_WSPACE)
+					break;
+			}
+			for (t3 = t -> next; t3; t3 = t3 -> next)
+			{
+				if (t3 -> ttype != TOK_WSPACE)
+					break;
+			}
+			// if no non-whitespace before or after, ignore it
+			if (!t2 || !t3)
+				continue;
+			// eat the whitespace before and after
+			while (t -> prev != t2)
+				token_list_remove(t -> prev);
+			while (t -> next != t3)
+				token_list_remove(t -> next);
+			// now paste t -> prev with t -> next and replace t with the result
+			// continue scanning for ## at t -> next -> next
+			t3 = t -> next -> next;
+			
+			rtl = paste_tokens(pp, s, arglist, t -> prev, t -> next);
+			token_list_remove(t -> next);
+			token_list_remove(t -> prev);
+			t2 = t -> prev;
+			token_list_remove(t);
+			for (t = rtl -> head; t; t = t -> next)
+			{
+				token_list_insert(expand_list, t2, token_dup(t));
+			}
+			t = t3 -> prev;
+			token_list_destroy(rtl);
+		}
+	}
+	
+	// now scan for arguments and expand them
+	for (t = expand_list -> head; t; t = t -> next)
+	{
+	again:
+		if (t -> ttype == TOK_IDENT)
+		{
+			/* identifiers might need expansion to arguments */
+			i = macro_arg(s, t -> strval);
+			if (i != -1)
+			{
+				t3 = t -> next;
+				for (t2 = exparglist[i] -> tail; t2; t2 = t2 -> prev)
+					token_list_insert(expand_list, t, token_dup(t2));
+				token_list_remove(t);
+				t = t3;
+				goto again;
+			}
+		}
+	}
+
+	/* put the new expansion in front of the input, if relevant; if we
+	   expanded to nothing, no need to create an expansion record or
+	   put anything into the input queue */
+	if (expand_list -> head)
+	{
+		token_list_append(expand_list, token_create(TOK_ENDEXPAND, "", -1, -1, ""));
+		
+		// move the expanded list into the token queue
+		for (t = expand_list -> tail; t; t = t -> prev)
+			preproc_unget_token(pp, token_dup(t));
+		
+		/* set up expansion record */
+		e = lw_alloc(sizeof(struct expand_e));
+		e -> next = pp -> expand_list;
+		pp -> expand_list = e;
+		e -> s = s;
+	}
+	
+	/* now clean up */
+	token_list_destroy(expand_list);
+	for (i = 0; i < nargs; i++)
+	{
+		token_list_destroy(arglist[i]);
+		token_list_destroy(exparglist[i]);
+	}
+	lw_free(arglist);
+	lw_free(exparglist);
+	
+	return 1;
+}
+
+struct token *preproc_next(struct preproc_info *pp)
+{
+	struct token *t;
+	
+	t = preproc_next_processed_token(pp);
+	pp -> curtok = NULL;
+	return t;
+}
author	William Astle <lost@l-w.ca>
date	Mon, 05 Aug 2019 21:27:09 -0600
parents	836dc5371980
children