Add compiler and vm-translator

2021-01-04 17:00:48 -03:00
commit 553c87029f
46 changed files with 4767 additions and 0 deletions
--- a/tokenizer/tokenizer-tables.h
+++ b/tokenizer/tokenizer-tables.h
@@ -0,0 +1,19 @@
+#ifndef TOKENIZER_TABLES_H
+#define TOKENIZER_TABLES_H
+#include "util.h"
+
+
+const char* keywordsraw[] = {
+	"class", "constructor", "function", "method", "field", "static",
+	"var", "int", "char", "boolean", "void", "true", "false", "null",
+	"this", "let", "do", "if", "else", "while", "return"
+};
+mkstrlist(keywords, keywordsraw);
+
+const char* symbolsraw[] = {
+	"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
+	"&", "|", "<", ">", "=", "~"
+};
+mkstrlist(symbols, symbolsraw);
+
+#endif 
--- a/tokenizer/tokenizer.c
+++ b/tokenizer/tokenizer.c
@@ -0,0 +1,257 @@
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include "tokenizer.h"
+#include "tokenizer-tables.h"
+
+// Data types
+typedef enum {
+	common, charsymbol, space
+} CHARTYPE;
+
+typedef struct {
+	char* str;
+	int size;
+	int count;
+} STRING;
+
+// String manipulation
+STRING* mkstring(int size);
+void append(STRING* s, char c);
+void freestr(STRING* str);
+
+// Token manipulation;
+TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type);
+TOKEN* appendtoken(TOKEN* curitem, STRING* token, char* file, int definedat);
+#define mktoken() (TOKEN*)malloc(sizeof(TOKEN))
+
+// Char types
+CHARTYPE getchartype(unsigned char c);
+bool iskeyword(STRING* tk);
+bool issymbol(STRING* tk);
+bool isint(char* str);
+bool isintcons(STRING* tk);
+bool isidentifier(STRING* tk);
+TOKENTYPE gettokentype(STRING* tk, char* file, int definedat);
+
+// Stream handling
+void skipln(FILE* input);
+void skipmultiln(FILE* input, int* lnscount);
+bool handlecomment(FILE* input, int* lnscount);
+void readstr(FILE* input, STRING* tmp, int definedat);
+
+// String manipulation
+STRING* mkstring(int size) {
+	STRING* str = (STRING*)malloc(sizeof(STRING));
+	str->size = sizeof(char) * size; // initial size
+	str->str = (char*)malloc(str->size);
+	str->count = 0;
+	return str;
+}
+
+void append(STRING* s, char c) {
+	int targsize = sizeof(char) * (s->count + 1);
+	if(s->size <= targsize) {
+		s->size = targsize * 2;
+		s->str = (char*)realloc(s->str, s->size);
+	}
+
+	s->str[s->count] = c;
+	s->count++;
+}
+
+void freestr(STRING* str) {
+	free(str->str);
+	free(str);
+}
+
+// Token manipulation;
+TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) {
+	curitem->token = (char*)malloc(sizeof(char)*token->count);
+	strcpy(curitem->token, token->str);
+	curitem->definedat = definedat;
+	curitem->type = type;
+	TOKEN* nextitem = mktoken();
+	curitem->next = nextitem;
+	token->count = 0;
+	return nextitem;
+}
+
+void freetokens(TOKEN* t) {
+	free(t->token);
+	TOKEN* next = t->next;
+	free(t);
+	if(next != NULL)
+		freetokens(next);
+}
+
+TOKEN* appendtoken(TOKEN* curitem, STRING* token, char* file, int definedat) {
+	append(token, '\0');
+	return appendtokenraw(curitem, token, definedat, gettokentype(token, file, definedat));
+}
+
+// Char types
+CHARTYPE getchartype(unsigned char c) {
+	if(isspace(c)) return space;
+	if(isalnum(c) || c == '_' || c == '"') return common;
+	return charsymbol;
+}
+
+bool iskeyword(STRING* tk) {
+	return existsinarray(&keywords, tk->str);
+}
+
+bool issymbol(STRING* tk) {
+	if(tk->count != 2)
+		return false;
+	return existsinarray(&symbols, tk->str);
+}
+
+bool isint(char* str) {
+	int i = 0;
+	while(str[i] != '\0') {
+		if(!isdigit(str[i]))
+			return false;
+		i++;
+	}
+	return true;
+}
+
+bool isintcons(STRING* tk) {
+	if(!isint(tk->str))
+		return false;
+	int val = atoi(tk->str);
+	return val >= 0 && val <= 32767;
+}
+
+bool isidentifier(STRING* tk) {
+	if(isdigit(tk->str[0]))
+		return false;
+
+	int count = tk->count - 1;
+	for(int i = 0; i < count; i++)
+		if(!isalnum(tk->str[i]) && tk->str[i] != '_')
+			return false;
+	return true;
+}
+
+TOKENTYPE gettokentype(STRING* tk, char* file, int definedat) {
+	if(iskeyword(tk)) return keyword;
+	if(issymbol(tk)) return symbol;
+	if(isintcons(tk)) return integer;
+	if(isidentifier(tk)) return identifier;
+	eprintf("Unexpected token '%s'; file '%s', line %i\n", tk->str, file, definedat);
+	exit(1);
+}
+
+// Stream handling
+void skipln(FILE* input) {
+	unsigned char c;
+	while(c = fgetc(input), c != '\0')
+		if(c == '\n')
+			break;
+}
+
+void skipmultiln(FILE* input, int* lnscount) {
+	unsigned char c;
+	while(c = fgetc(input), c != '\0')
+		if(c == '\n')
+			(*lnscount)++;
+		else if(c == '*')
+			if(fgetc(input) == '/')
+				break;
+}
+
+bool handlecomment(FILE* input, int* lnscount) {
+	unsigned char nextc = fgetc(input);
+	if(nextc == '/') {
+		skipln(input);
+		(*lnscount)++;
+		return true;
+	}
+	else if(nextc == '*') {
+		unsigned char furtherc = fgetc(input);
+		if(furtherc == '*') {
+			skipmultiln(input, lnscount);
+			return true;
+		}
+		ungetc(furtherc, input);
+	}
+	ungetc(nextc, input);
+	return false;
+}
+
+void readstr(FILE* input, STRING* tmp, int definedat) {
+	unsigned char c;
+	while(c = fgetc(input), c != '\0') {
+		if(c == '\n') {
+			eprintf("Unexpected end of line; line %i", definedat);
+			exit(1);
+		}
+		if(c == '"')
+			break;
+		append(tmp, c);
+	}
+	append(tmp, '\0');
+}
+
+TOKEN* tokenize(char* file) {
+	TOKEN* head = mktoken();
+	TOKEN* lastitem = head;
+	TOKEN* curitem = head;
+
+	STRING* tmptoken = mkstring(200);
+	CHARTYPE lasttype = space;
+	CHARTYPE curtype;
+
+	int lnscount = 1;
+	FILE* input = fopen(file, "r");
+	
+	unsigned char c;
+	while(!feof(input)) {
+		c = fgetc(input);
+		if(c == '\n')
+			lnscount++;
+		else if(c == '/' && handlecomment(input, &lnscount)) 
+			continue;
+		else if(c == '"') {
+			if(lasttype != space)
+				curitem = appendtoken(curitem, tmptoken, file, lnscount);
+			readstr(input, tmptoken, lnscount);
+			lastitem = curitem;
+			curitem = appendtokenraw(curitem, tmptoken, lnscount, string);
+			lasttype = space;
+			continue;
+		}
+
+		curtype = getchartype(c);
+
+		if(curtype == common) {
+			if(lasttype == charsymbol) {
+				lastitem = curitem;
+				curitem = appendtoken(curitem, tmptoken, file, lnscount);
+			}
+			append(tmptoken, c);
+		} else {
+			if(lasttype != space){
+				lastitem = curitem;
+				curitem = appendtoken(curitem, tmptoken, file, lnscount);
+			}
+			if(curtype == charsymbol)
+				append(tmptoken, c);
+		}
+		lasttype = curtype;
+	}
+
+	if(curitem == head) {
+		eprintf("File '%s' is empty\n", file);
+		exit(1);
+	}
+
+	lastitem->next = NULL;
+	free(curitem);
+	freestr(tmptoken);
+	fclose(input);
+	return head;
+}
--- a/tokenizer/tokenizer.h
+++ b/tokenizer/tokenizer.h
@@ -0,0 +1,21 @@
+#ifndef TOKENIZER_H
+#define TOKENIZER_H
+#include <stdio.h>
+
+/* tokenizer
+ * Simple tool that splits a stream into many tokens. */
+
+typedef enum {
+	keyword, identifier, symbol, integer, string
+} TOKENTYPE;
+
+typedef struct token {
+	char* token;
+	TOKENTYPE type;
+	int definedat;
+	struct token* next;
+} TOKEN;
+
+TOKEN* tokenize(char* filename);
+void freetokens(TOKEN* t);
+#endif