From c629a01b593c8fa425d624d94f52acd336afffeb Mon Sep 17 00:00:00 2001 From: Augusto Gunsch Date: Mon, 21 Dec 2020 18:11:23 -0300 Subject: [PATCH] Reorganize tokenizer --- compiler-scopes.c | 6 +- compiler.c | 10 ++-- parser.c | 6 +- tokens.h => tokenizer-tables.h | 9 +-- tokenizer.c | 106 ++++++++++++++++++++------------- tokenizer.h | 7 ++- util.h | 5 ++ 7 files changed, 92 insertions(+), 57 deletions(-) rename tokens.h => tokenizer-tables.h (68%) diff --git a/compiler-scopes.c b/compiler-scopes.c index f7606b4..84d0c8f 100644 --- a/compiler-scopes.c +++ b/compiler-scopes.c @@ -79,18 +79,18 @@ VARDEC* tovardec(OBJ* obj) { void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) { DEBUGINFO* debugother = other->getdebug(other); - fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n", + eprintf("Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n", name, debug->file, debug->definedat, debugother->file, debugother->definedat); exit(1); } void notdeclared(char* name, DEBUGINFO* debug) { - fprintf(stderr, "'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat); + eprintf("'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat); exit(1); } void invalidparent(SUBROUTCALL* call) { - fprintf(stderr, "Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat); + eprintf("Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat); exit(1); } diff --git a/compiler.c b/compiler.c index c63bd4c..e840f84 100644 --- a/compiler.c +++ b/compiler.c @@ -63,11 +63,11 @@ LINE* mathopln(char op) { return onetoken("and"); if(op == '/') { char* tokens[] = { "call", "Math.divide", "2" }; - return mksimpleln(tokens, sizeof(tokens) / sizeof(char*)); + return mksimpleln(tokens, strcount(tokens)); } if(op == '*') { char* tokens[] = { "call", "Math.multiply", "2" }; - return mksimpleln(tokens, sizeof(tokens) / sizeof(char*)); + return mksimpleln(tokens, strcount(tokens)); } } @@ -77,7 +77,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) { if(e->type == intconstant) { char* tokens[] = { "push", "constant", itoa(e->integer) }; - myblk = mklnblk(mksimpleln(tokens, sizeof(tokens) / sizeof(char*))); + myblk = mklnblk(mksimpleln(tokens, strcount(tokens))); } else if(e->type == unaryopterm) { myblk = compileexpression(s, e->expression); @@ -88,7 +88,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) { myblk = compileexpression(s, e->expression); } else { - fprintf(stderr, "Unsupported term yet %i\n", e->type); + eprintf("Unsupported term yet %i\n", e->type); exit(1); } @@ -178,7 +178,7 @@ LINEBLOCK* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) { else if(st->type == returnstatement) return compileret(s, st->retst); else { - fprintf(stderr, "UNSUPPORTED\n"); + eprintf("UNSUPPORTED\n"); exit(1); } } diff --git a/parser.c b/parser.c index f338f7d..d305c00 100644 --- a/parser.c +++ b/parser.c @@ -42,7 +42,7 @@ const char* tokentypes[] = { DEBUGINFO* getdebug(PARSER* p) { DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO)); d->file = p->file; - d->definedat = p->current->truen; + d->definedat = p->current->definedat; return d; } @@ -59,7 +59,7 @@ void restorecp(PARSER* p) { } void unexpectedtoken(PARSER* p) { - fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file); + fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->definedat, p->file); } void unexpected(PARSER* p) { @@ -75,7 +75,7 @@ void checkcontent(PARSER* p, const char* content) { void checktype(PARSER* p, TOKENTYPE type) { if(p->current->type != type) { - fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file); + fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->definedat, p->file); exit(1); } } diff --git a/tokens.h b/tokenizer-tables.h similarity index 68% rename from tokens.h rename to tokenizer-tables.h index 5990c9b..768ebc7 100644 --- a/tokens.h +++ b/tokenizer-tables.h @@ -1,17 +1,18 @@ -#ifndef TOKENS_H -#define TOKENS_H +#ifndef TOKENIZER_TABLES_H +#define TOKENIZER_TABLES_H +#include "util.h" const char* keywords[] = { "class", "constructor", "function", "method", "field", "static", "var", "int", "char", "boolean", "void", "true", "false", "null", "this", "let", "do", "if", "else", "while", "return" }; -const int keywordssize = sizeof(keywords) / sizeof(char*); +const int keywordssize = strcount(keyword); const char* symbols[] = { "{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/", "&", "|", "<", ">", "=", "~" }; -const int symbolssize = sizeof(symbols) / sizeof(char*); +const int symbolssize = strcount(symbols); #endif diff --git a/tokenizer.c b/tokenizer.c index e32c2d8..3838b4b 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -2,9 +2,10 @@ #include #include #include -#include "tokens.h" #include "tokenizer.h" +#include "tokenizer-tables.h" +// Data types typedef enum { common, charsymbol, space } CHARTYPE; @@ -15,14 +16,38 @@ typedef struct { int count; } STRING; -TOKEN* mktokenlist() { - return (TOKEN*)malloc(sizeof(TOKEN)); -} +// String manipulation +STRING* mkstring(int size); +void append(STRING* s, char c); +void freestr(STRING* str); -CHARTYPE getchartype(unsigned char c) { - if(isspace(c)) return space; - if(isalnum(c) || c == '_' || c == '"') return common; - return charsymbol; +// Token manipulation; +TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type); +TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat); +#define mktoken() (TOKEN*)malloc(sizeof(TOKEN)) + +// Char types +CHARTYPE getchartype(unsigned char c); +bool iskeyword(STRING* tk); +bool issymbol(STRING* tk); +bool isint(char* str); +bool isintcons(STRING* tk); +bool isidentifier(STRING* tk); +TOKENTYPE gettokentype(STRING* tk, int definedat); + +// Stream handling +void skipln(FILE* input); +void skipmultiln(FILE* input, int* lnscount); +bool handlecomment(FILE* input, int* lnscount); +void readstr(FILE* input, STRING* tmp, int definedat); + +// String manipulation +STRING* mkstring(int size) { + STRING* str = (STRING*)malloc(sizeof(STRING)); + str->size = sizeof(char) * size; // initial size + str->str = (char*)malloc(str->size); + str->count = 0; + return str; } void append(STRING* s, char c) { @@ -36,12 +61,33 @@ void append(STRING* s, char c) { s->count++; } -STRING* mkstring(int size) { - STRING* str = (STRING*)malloc(sizeof(STRING)); - str->size = sizeof(char) * size; // initial size - str->str = (char*)malloc(str->size); - str->count = 0; - return str; +void freestr(STRING* str) { + free(str->str); + free(str); +} + +// Token manipulation; +TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) { + curitem->token = (char*)malloc(sizeof(char)*token->count); + strcpy(curitem->token, token->str); + curitem->definedat = definedat; + curitem->type = type; + TOKEN* nextitem = mktoken(); + curitem->next = nextitem; + token->count = 0; + return nextitem; +} + +TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat) { + append(token, '\0'); + return appendtokenraw(curitem, token, definedat, gettokentype(token, definedat)); +} + +// Char types +CHARTYPE getchartype(unsigned char c) { + if(isspace(c)) return space; + if(isalnum(c) || c == '_' || c == '"') return common; + return charsymbol; } bool iskeyword(STRING* tk) { @@ -88,31 +134,16 @@ bool isidentifier(STRING* tk) { return true; } -TOKENTYPE gettokentype(STRING* tk, int truen) { +TOKENTYPE gettokentype(STRING* tk, int definedat) { if(iskeyword(tk)) return keyword; if(issymbol(tk)) return symbol; if(isintcons(tk)) return integer; if(isidentifier(tk)) return identifier; - fprintf(stderr, "Unexpected token '%s'; line %i\n", tk->str, truen); + eprintf("Unexpected token '%s'; line %i\n", tk->str, definedat); exit(1); } -TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) { - curitem->token = (char*)malloc(sizeof(char)*token->count); - strcpy(curitem->token, token->str); - curitem->truen = truen; - curitem->type = type; - TOKEN* nextitem = mktokenlist(); - curitem->next = nextitem; - token->count = 0; - return nextitem; -} - -TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) { - append(token, '\0'); - return appendtokenraw(curitem, token, truen, gettokentype(token, truen)); -} - +// Stream handling void skipln(FILE* input) { unsigned char c; while(c = fgetc(input), c != '\0') @@ -149,11 +180,11 @@ bool handlecomment(FILE* input, int* lnscount) { return false; } -void readstr(FILE* input, STRING* tmp, int truen) { +void readstr(FILE* input, STRING* tmp, int definedat) { unsigned char c; while(c = fgetc(input), c != '\0') { if(c == '\n') { - fprintf(stderr, "Unexpected end of line; line %i", truen); + eprintf("Unexpected end of line; line %i", definedat); exit(1); } if(c == '"') @@ -163,13 +194,8 @@ void readstr(FILE* input, STRING* tmp, int truen) { append(tmp, '\0'); } -void freestr(STRING* str) { - free(str->str); - free(str); -} - TOKEN* tokenize(FILE* input) { - TOKEN* head = mktokenlist(); + TOKEN* head = mktoken(); TOKEN* lastitem = head; TOKEN* curitem = head; diff --git a/tokenizer.h b/tokenizer.h index e631a42..5c4cda6 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -2,6 +2,9 @@ #define TOKENIZER_H #include +/* tokenizer + * Simple tool that splits a stream into many tokens. */ + typedef enum { keyword, identifier, symbol, integer, string } TOKENTYPE; @@ -9,10 +12,10 @@ typedef enum { typedef struct token { char* token; TOKENTYPE type; - int truen; + int definedat; struct token* next; } TOKEN; TOKEN* tokenize(FILE* input); -void freetokenlist(TOKEN l); +void freetokenlist(TOKEN* list); #endif diff --git a/util.h b/util.h index 61521c6..8122ced 100644 --- a/util.h +++ b/util.h @@ -5,6 +5,11 @@ /* util * Random utilities. */ +// Macros +#define eprintf(...) fprintf (stderr, __VA_ARGS__) +#define count(array, type) ((sizeof(array)) / (sizeof(type))) +#define strcount(array) count(array, char*) + typedef struct stringlist { char* content; struct stringlist* next;