# HG changeset patch
# User lost@l-w.ca
# Date 1295924889 25200
# Node ID 87590f43e76d3fce609c64621e255734b0428e97
# Parent 421d7ceb4d8646a4c30ef383c205b6bcfa40cee1
Started lwbasic parser; checkpoint
diff -r 421d7ceb4d86 -r 87590f43e76d Makefile
--- a/Makefile Mon Jan 24 18:31:07 2011 -0700
+++ b/Makefile Mon Jan 24 20:08:09 2011 -0700
@@ -19,6 +19,7 @@
CPPFLAGS += -I lwlib -DPACKAGE_STRING='"lwtools 4.0-pre"'
LDFLAGS += -L$(PWD)/lwlib -llw
+CFLAGS ?= -g -Wall
MAIN_TARGETS := lwasm/lwasm$(PROGSUFFIX) \
lwlink/lwlink$(PROGSUFFIX) \
diff -r 421d7ceb4d86 -r 87590f43e76d lwbasic/compiler.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lwbasic/compiler.c Mon Jan 24 20:08:09 2011 -0700
@@ -0,0 +1,168 @@
+/*
+compiler.c
+
+Copyright © 2011 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see .
+*/
+
+/*
+This is the actual compiler bit; it drives the parser and code generation
+*/
+
+#include
+
+#include "lwbasic.h"
+
+/* parse a type; the next token will be acquired as a result */
+/* the token advancement is to provide consistency */
+static int parse_type(cstate *state)
+{
+ int pt = -1;
+
+ switch (state -> lexer_token)
+ {
+ case token_kw_integer:
+ pt = 1;
+ break;
+
+ default:
+ lwb_error("Invalid type specification");
+ }
+ lexer(state);
+ /* look for "unsigned" modifier for integer types */
+ return pt;
+}
+
+
+/* issub means RETURNS is not allowed; !issub means RETURNS is required */
+static void parse_subfunc(cstate *state, int issub)
+{
+ int pt;
+
+ lexer(state);
+ if (state -> lexer_token != token_identifier)
+ {
+ lwb_error("Invalid sub name '%s'", state -> lexer_token_string);
+ }
+
+ printf(" = %s\n", state -> lexer_token_string);
+
+ lexer(state);
+ if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private)
+ {
+ printf(" = %s\n", state -> lexer_token_string);
+ lexer(state);
+ }
+
+ /* ignore the "PARAMS" keyword if present */
+ if (state -> lexer_token == token_kw_params)
+ lexer(state);
+
+ if (state -> lexer_token == token_eol)
+ goto noparms;
+
+paramagain:
+ if (state -> lexer_token != token_identifier)
+ {
+ lwb_error("Parameter name expected, get %d, %s\n", state -> lexer_token, state -> lexer_token_string);
+ }
+ printf("Got = %s\n", state -> lexer_token_string);
+ lexer(state);
+
+ if (state -> lexer_token != token_kw_as)
+ lwb_error("Expecting AS\n");
+ lexer(state);
+
+ pt = parse_type(state);
+ printf("Got = %d\n", pt);
+
+ if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',')
+ {
+ lexer(state);
+ goto paramagain;
+ }
+
+noparms:
+ if (!issub)
+ {
+ int rt;
+
+ if (state -> lexer_token != token_kw_returns)
+ {
+ lwb_error("FUNCTION must have RETURNS\n");
+ }
+ lexer(state);
+ if (state -> lexer_token == token_identifier)
+ {
+ printf("Return value named: %s\n", state -> lexer_token_string);
+ lexer(state);
+ if (state -> lexer_token != token_kw_as)
+ lwb_error("Execting AS after RETURNS");
+ lexer(state);
+ }
+ rt = parse_type(state);
+ printf("Return type: %d\n", rt);
+ }
+ else
+ {
+ if (state -> lexer_token == token_kw_returns)
+ {
+ lwb_error("SUB cannot specify RETURNS\n");
+ }
+ }
+
+
+ if (state -> lexer_token != token_eol)
+ {
+ lwb_error("EOL expected; found %d, %s\n", state -> lexer_token, state -> lexer_token_string);
+ }
+}
+
+void compiler(cstate *state)
+{
+ state -> lexer_curchar = -1;
+
+ /* now look for a global declaration */
+ for (;;)
+ {
+ state -> parser_state = parser_state_global;
+ lexer(state);
+ switch (state -> lexer_token)
+ {
+ case token_kw_function:
+ printf("Function\n");
+ parse_subfunc(state, 0);
+ break;
+
+ case token_kw_sub:
+ printf("Sub\n");
+ parse_subfunc(state, 1);
+ break;
+
+ /* blank lines are allowed */
+ case token_eol:
+ continue;
+
+ /* EOF is allowed - end of parsing */
+ case token_eof:
+ return;
+
+ default:
+ lwb_error("Invalid token %d, %s in global state\n", state -> lexer_token, state -> lexer_token_string);
+ }
+ }
+}
diff -r 421d7ceb4d86 -r 87590f43e76d lwbasic/input.c
--- a/lwbasic/input.c Mon Jan 24 18:31:07 2011 -0700
+++ b/lwbasic/input.c Mon Jan 24 20:08:09 2011 -0700
@@ -28,6 +28,7 @@
#include
#include
+#include
#define __input_c_seen__
#include "lwbasic.h"
@@ -54,8 +55,7 @@
sp -> fp = fopen(state -> input_file, "rb");
if (!(sp -> fp))
{
- fprintf(stderr, "Cannot open input file\n");
- exit(1);
+ lwb_error("Cannot open input file\n");
}
}
diff -r 421d7ceb4d86 -r 87590f43e76d lwbasic/lexer.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lwbasic/lexer.c Mon Jan 24 20:08:09 2011 -0700
@@ -0,0 +1,216 @@
+/*
+lexer.c
+
+Copyright © 2011 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see .
+*/
+
+/*
+This handles the gritty details of parsing tokens
+*/
+
+#include
+#include
+#include
+
+#include
+#include
+
+#define __lexer_c_seen__
+#include "lwbasic.h"
+
+/*
+A token idenfier is returned by lexer(). The actual string value
+is found in state->lexer_lexer_token_string; if the token as an integer value,
+it will be found in state->lexer_token_number in the appropriate "value"
+slot.
+*/
+
+struct token_list
+{
+ char *string;
+ int token;
+};
+
+static struct token_list lexer_global_tokens[] =
+{
+ { "function", token_kw_function },
+ { "sub", token_kw_sub },
+ { "public", token_kw_public },
+ { "private", token_kw_private },
+ { "as", token_kw_as },
+ { "params", token_kw_params },
+ { "returns", token_kw_returns },
+ { NULL }
+};
+
+static int lexer_getchar(cstate *state)
+{
+ int c;
+ c = input_getchar(state);
+ if (c == -2)
+ {
+ lwb_error("Error reading input stream.");
+ }
+ return c;
+}
+
+static void lexer_nextchar(cstate *state)
+{
+ state -> lexer_curchar = lexer_getchar(state);
+ if (state -> lexer_curchar == state -> lexer_ignorechar)
+ state -> lexer_curchar = lexer_getchar(state);
+ state -> lexer_ignorechar = 0;
+}
+
+static int lexer_curchar(cstate *state)
+{
+ if (state -> lexer_curchar == -1)
+ {
+ lexer_nextchar(state);
+ }
+
+ return state -> lexer_curchar;
+}
+
+static void lexer_skip_white(cstate *state)
+{
+ int c;
+
+ for (;;)
+ {
+ c = lexer_curchar(state);
+ if (!(c == 0 || c == ' ' || c == '\t'))
+ return;
+ lexer_nextchar(state);
+ }
+}
+
+/* must not be called unless the word will be non-zero length */
+static void lexer_word(cstate *state)
+{
+ int wordlen = 0;
+ int wordpos = 0;
+ char *word = NULL;
+ int c;
+ struct token_list *tok = NULL;
+
+ for (;;) {
+ c = lexer_curchar(state);
+ if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80)
+ {
+ /* character is part of word */
+ if (wordpos >= wordlen)
+ {
+ word = lw_realloc(word, wordlen + 32);
+ wordlen += 32;
+ }
+ word[wordpos++] = c;
+ }
+ else
+ break;
+
+ lexer_nextchar(state);
+ }
+
+ word[wordpos] = 0;
+ lw_free(state -> lexer_token_string);
+ state -> lexer_token_string = lw_strdup(word);
+
+ switch (state -> parser_state)
+ {
+ default:
+ tok = lexer_global_tokens;
+ }
+
+ /* check for tokens if appropriate */
+ /* force uppercase */
+ if (tok)
+ {
+ for (c = 0; word[c]; c++)
+ if (word[c] >= 'A' && word[c] <= 'Z')
+ word[c] = word[c] + 0x20;
+
+ while (tok -> string)
+ {
+ if (strcmp(tok -> string, word) == 0)
+ break;
+ tok++;
+ }
+ }
+
+ lw_free(word);
+ if (tok && tok -> string)
+ state -> lexer_token = tok -> token;
+ else
+ state -> lexer_token = token_identifier;
+}
+
+static void lexer_empty_token(cstate *state)
+{
+ lw_free(state -> lexer_token_string);
+ state -> lexer_token_string = NULL;
+}
+
+void lexer(cstate *state)
+{
+ int c;
+
+ lexer_skip_white(state);
+
+ lexer_empty_token(state);
+
+ c = lexer_curchar(state);
+ if (c == -1)
+ {
+ state -> lexer_token = token_eof;
+ return;
+ }
+
+ if (c == '\n')
+ {
+ /* LF */
+ lexer_nextchar(state);
+ state -> lexer_ignorechar = '\r';
+ state -> lexer_token = token_eol;
+ return;
+ }
+
+ if (c == '\r')
+ {
+ /* CR */
+ lexer_nextchar(state);
+ state -> lexer_ignorechar = '\n';
+ state -> lexer_token = token_eol;
+ return;
+ }
+
+ if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80)
+ {
+ /* we have a word here; identifier, keyword, etc. */
+ lexer_word(state);
+ return;
+ }
+
+ /* return the character if all else fails */
+ state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2);
+ state -> lexer_token_string[0] = c;
+ state -> lexer_token_string[1] = 0;
+ lexer_nextchar(state);
+ state -> lexer_token = token_char;
+ return;
+}
diff -r 421d7ceb4d86 -r 87590f43e76d lwbasic/lwbasic.h
--- a/lwbasic/lwbasic.h Mon Jan 24 18:31:07 2011 -0700
+++ b/lwbasic/lwbasic.h Mon Jan 24 20:08:09 2011 -0700
@@ -26,18 +26,73 @@
#ifndef __lwbasic_h_seen__
#define __lwbasic_h_seen__
+#include
+
+/* note: integer and uinteger will be the same for positive values from 0
+through 0x7FFFFFFF; the unsigned type should be used for doing ascii
+conversions and then if a negative value was discovered, it should be
+negated IFF it is in range. */
+
+union lexer_numbers
+{
+ uint32_t uinteger;
+ int32_t integer;
+};
+
typedef struct
{
char *output_file;
char *input_file;
int debug_level;
+
+ char *lexer_token_string;
+ union lexer_numbers lexer_token_number;
+ int lexer_token;
+ int lexer_curchar;
+ int lexer_ignorechar;
+
+ int parser_state;
void *input_state;
} cstate;
+/* parser states */
+enum
+{
+ parser_state_global = 0, /* only global decls allowed */
+ parser_state_error
+};
+
+/* token types */
+enum
+{
+ token_kw_sub, /* SUB keyword */
+ token_kw_function, /* FUNCTION keyword */
+ token_kw_as, /* AS keyword */
+ token_kw_public, /* PUBLIC keyword */
+ token_kw_private, /* PRIVATE keyword */
+ token_kw_params, /* PARAMS keyword */
+ token_kw_returns, /* RETURNS keyword */
+ token_kw_integer, /* INTEGER keyword */
+ token_identifier, /* an identifier (variable, function, etc. */
+ token_char, /* single character; fallback */
+ token_uint, /* unsigned integer up to 32 bits */
+ token_int, /* signed integer up to 32 bits */
+ token_eol, /* end of line */
+ token_eof /* end of file */
+};
+
#ifndef __input_c_seen__
extern int input_getchar(cstate *state);
#endif
+#ifndef __main_c_seen__
+extern void lwb_error(const char *fmt, ...);
+#endif
+
+#ifndef __lexer_c_seen__
+extern void lexer(cstate *state);
+#endif
+
#endif /* __lwbasic_h_seen__ */
diff -r 421d7ceb4d86 -r 87590f43e76d lwbasic/main.c
--- a/lwbasic/main.c Mon Jan 24 18:31:07 2011 -0700
+++ b/lwbasic/main.c Mon Jan 24 20:08:09 2011 -0700
@@ -25,11 +25,13 @@
#include
#include
+#include
#include
#include
#include
+#define __main_c_seen__
#include "lwbasic.h"
#define PROGVER "lwbasic from " PACKAGE_STRING
@@ -90,11 +92,26 @@
PROGVER
};
+extern void compiler(cstate *state);
+
int main(int argc, char **argv)
{
cstate state = { 0 };
lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, &state);
+ compiler(&state);
+
exit(0);
}
+
+void lwb_error(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ exit(1);
+}
diff -r 421d7ceb4d86 -r 87590f43e76d lwbasic/rules.make
--- a/lwbasic/rules.make Mon Jan 24 18:31:07 2011 -0700
+++ b/lwbasic/rules.make Mon Jan 24 20:08:09 2011 -0700
@@ -1,7 +1,7 @@
dirname := $(dir $(lastword $(MAKEFILE_LIST)))
lwbasic_dir := $(dirname)
-lwbasic_lsrcs := main.c input.c
+lwbasic_lsrcs := main.c input.c compiler.c lexer.c
lwbasic_srcs := $(addprefix $(dirname),$(lwbasic_lsrcs))
lwbasic_objs := $(lwbasic_srcs:.c=.o)