Reorganize tokenizer

2020-12-21 18:11:23 -03:00 · 2020-12-21 18:11:23 -03:00 · c629a01b59
commit c629a01b59
parent c3df97b04b
7 changed files with 92 additions and 57 deletions
--- a/compiler-scopes.c
+++ b/compiler-scopes.c
@ -79,18 +79,18 @@ VARDEC* tovardec(OBJ* obj) {
 void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) {
 	DEBUGINFO* debugother = other->getdebug(other);
-	fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
+	eprintf("Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
 				name, debug->file, debug->definedat, debugother->file, debugother->definedat);
 	exit(1);
 }
 void notdeclared(char* name, DEBUGINFO* debug) {
-	fprintf(stderr, "'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
+	eprintf("'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
 	exit(1);
 }
 void invalidparent(SUBROUTCALL* call) {
-	fprintf(stderr, "Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
+	eprintf("Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
 	exit(1);
 }
--- a/compiler.c
+++ b/compiler.c
@ -63,11 +63,11 @@ LINE* mathopln(char op) {
 		return onetoken("and");
 	if(op == '/') {
 		char* tokens[] = { "call", "Math.divide", "2" };
-		return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
+		return mksimpleln(tokens, strcount(tokens));
 	}
 	if(op == '*') {
 		char* tokens[] = { "call", "Math.multiply", "2" };
-		return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
+		return mksimpleln(tokens, strcount(tokens));
 	}
 }
@ -77,7 +77,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
 	if(e->type == intconstant) {
 		char* tokens[] = { "push", "constant", itoa(e->integer) };
-		myblk = mklnblk(mksimpleln(tokens, sizeof(tokens) / sizeof(char*)));
+		myblk = mklnblk(mksimpleln(tokens, strcount(tokens)));
 	}
 	else if(e->type == unaryopterm) {
 		myblk = compileexpression(s, e->expression);
@ -88,7 +88,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
 		myblk = compileexpression(s, e->expression);
 	}
 	else {
-		fprintf(stderr, "Unsupported term yet %i\n", e->type);
+		eprintf("Unsupported term yet %i\n", e->type);
 		exit(1);
 	}
@ -178,7 +178,7 @@ LINEBLOCK* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) {
 	else if(st->type == returnstatement)
 		return compileret(s, st->retst);
 	else {
-		fprintf(stderr, "UNSUPPORTED\n");
+		eprintf("UNSUPPORTED\n");
 		exit(1);
 	}
 }
--- a/parser.c
+++ b/parser.c
@ -42,7 +42,7 @@ const char* tokentypes[] = {
 DEBUGINFO* getdebug(PARSER* p) {
 	DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
 	d->file = p->file;
-	d->definedat = p->current->truen;
+	d->definedat = p->current->definedat;
 	return d;
 }
@ -59,7 +59,7 @@ void restorecp(PARSER* p) {
 }
 void unexpectedtoken(PARSER* p) {
-	fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file);
+	fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->definedat, p->file);
 }
 void unexpected(PARSER* p) {
@ -75,7 +75,7 @@ void checkcontent(PARSER* p, const char* content) {
 void checktype(PARSER* p, TOKENTYPE type) {
 	if(p->current->type != type) {
-		fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file);
+		fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->definedat, p->file);
 		exit(1);
 	}
 }
--- a/tokenizer-tables.h
+++ b/tokenizer-tables.h
@ -1,17 +1,18 @@
-#ifndef TOKENS_H
+#ifndef TOKENIZER_TABLES_H
-#define TOKENS_H
+#define TOKENIZER_TABLES_H
 #include "util.h"
 const char* keywords[] = {
 	"class", "constructor", "function", "method", "field", "static",
 	"var", "int", "char", "boolean", "void", "true", "false", "null",
 	"this", "let", "do", "if", "else", "while", "return"
 };
-const int keywordssize = sizeof(keywords) / sizeof(char*);
+const int keywordssize = strcount(keyword);
 const char* symbols[] = {
 	"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
 	"&", "|", "<", ">", "=", "~"
 };
-const int symbolssize = sizeof(symbols) / sizeof(char*);
+const int symbolssize = strcount(symbols);
 #endif 
--- a/tokenizer.c
+++ b/tokenizer.c
@ -2,9 +2,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdbool.h>
 #include "tokens.h"
 #include "tokenizer.h"
 #include "tokenizer-tables.h"
 // Data types
 typedef enum {
 	common, charsymbol, space
 } CHARTYPE;
@ -15,14 +16,38 @@ typedef struct {
 	int count;
 } STRING;
-TOKEN* mktokenlist() {
+// String manipulation
-	return (TOKEN*)malloc(sizeof(TOKEN));
+STRING* mkstring(int size);
-}
+void append(STRING* s, char c);
 void freestr(STRING* str);
-CHARTYPE getchartype(unsigned char c) {
+// Token manipulation;
-	if(isspace(c)) return space;
+TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type);
-	if(isalnum(c) || c == '_' || c == '"') return common;
+TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat);
-	return charsymbol;
+#define mktoken() (TOKEN*)malloc(sizeof(TOKEN))
 // Char types
 CHARTYPE getchartype(unsigned char c);
 bool iskeyword(STRING* tk);
 bool issymbol(STRING* tk);
 bool isint(char* str);
 bool isintcons(STRING* tk);
 bool isidentifier(STRING* tk);
 TOKENTYPE gettokentype(STRING* tk, int definedat);
 // Stream handling
 void skipln(FILE* input);
 void skipmultiln(FILE* input, int* lnscount);
 bool handlecomment(FILE* input, int* lnscount);
 void readstr(FILE* input, STRING* tmp, int definedat);
 // String manipulation
 STRING* mkstring(int size) {
 	STRING* str = (STRING*)malloc(sizeof(STRING));
 	str->size = sizeof(char) * size; // initial size
 	str->str = (char*)malloc(str->size);
 	str->count = 0;
 	return str;
 }
 void append(STRING* s, char c) {
@ -36,12 +61,33 @@ void append(STRING* s, char c) {
 	s->count++;
 }
-STRING* mkstring(int size) {
+void freestr(STRING* str) {
-	STRING* str = (STRING*)malloc(sizeof(STRING));
+	free(str->str);
-	str->size = sizeof(char) * size; // initial size
+	free(str);
-	str->str = (char*)malloc(str->size);
+}
-	str->count = 0;
+
-	return str;
+// Token manipulation;
 TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) {
 	curitem->token = (char*)malloc(sizeof(char)*token->count);
 	strcpy(curitem->token, token->str);
 	curitem->definedat = definedat;
 	curitem->type = type;
 	TOKEN* nextitem = mktoken();
 	curitem->next = nextitem;
 	token->count = 0;
 	return nextitem;
 }
 TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat) {
 	append(token, '\0');
 	return appendtokenraw(curitem, token, definedat, gettokentype(token, definedat));
 }
 // Char types
 CHARTYPE getchartype(unsigned char c) {
 	if(isspace(c)) return space;
 	if(isalnum(c) || c == '_' || c == '"') return common;
 	return charsymbol;
 }
 bool iskeyword(STRING* tk) {
@ -88,31 +134,16 @@ bool isidentifier(STRING* tk) {
 	return true;
 }
-TOKENTYPE gettokentype(STRING* tk, int truen) {
+TOKENTYPE gettokentype(STRING* tk, int definedat) {
 	if(iskeyword(tk)) return keyword;
 	if(issymbol(tk)) return symbol;
 	if(isintcons(tk)) return integer;
 	if(isidentifier(tk)) return identifier;
-	fprintf(stderr, "Unexpected token '%s'; line %i\n", tk->str, truen);
+	eprintf("Unexpected token '%s'; line %i\n", tk->str, definedat);
 	exit(1);
 }
-TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) {
+// Stream handling
 	curitem->token = (char*)malloc(sizeof(char)*token->count);
 	strcpy(curitem->token, token->str);
 	curitem->truen = truen;
 	curitem->type = type;
 	TOKEN* nextitem = mktokenlist();
 	curitem->next = nextitem;
 	token->count = 0;
 	return nextitem;
 }
 TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) {
 	append(token, '\0');
 	return appendtokenraw(curitem, token, truen, gettokentype(token, truen));
 }
 void skipln(FILE* input) {
 	unsigned char c;
 	while(c = fgetc(input), c != '\0')
@ -149,11 +180,11 @@ bool handlecomment(FILE* input, int* lnscount) {
 	return false;
 }
-void readstr(FILE* input, STRING* tmp, int truen) {
+void readstr(FILE* input, STRING* tmp, int definedat) {
 	unsigned char c;
 	while(c = fgetc(input), c != '\0') {
 		if(c == '\n') {
-			fprintf(stderr, "Unexpected end of line; line %i", truen);
+			eprintf("Unexpected end of line; line %i", definedat);
 			exit(1);
 		}
 		if(c == '"')
@ -163,13 +194,8 @@ void readstr(FILE* input, STRING* tmp, int truen) {
 	append(tmp, '\0');
 }
 void freestr(STRING* str) {
 	free(str->str);
 	free(str);
 }
 TOKEN* tokenize(FILE* input) {
-	TOKEN* head = mktokenlist();
+	TOKEN* head = mktoken();
 	TOKEN* lastitem = head;
 	TOKEN* curitem = head;
--- a/tokenizer.h
+++ b/tokenizer.h
@ -2,6 +2,9 @@
 #define TOKENIZER_H
 #include <stdio.h>
 /* tokenizer
 * Simple tool that splits a stream into many tokens. */
 typedef enum {
 	keyword, identifier, symbol, integer, string
 } TOKENTYPE;
@ -9,10 +12,10 @@ typedef enum {
 typedef struct token {
 	char* token;
 	TOKENTYPE type;
-	int truen;
+	int definedat;
 	struct token* next;
 } TOKEN;
 TOKEN* tokenize(FILE* input);
-void freetokenlist(TOKEN l);
+void freetokenlist(TOKEN* list);
 #endif
--- a/util.h
+++ b/util.h
@ -5,6 +5,11 @@
 /* util
 * Random utilities. */
 // Macros
 #define eprintf(...) fprintf (stderr, __VA_ARGS__)
 #define count(array, type) ((sizeof(array)) / (sizeof(type)))
 #define strcount(array) count(array, char*)
 typedef struct stringlist {
 	char* content;
 	struct stringlist* next;