Reorganize tokenizer

This commit is contained in:
Augusto Gunsch 2020-12-21 18:11:23 -03:00
parent c3df97b04b
commit c629a01b59
No known key found for this signature in database
GPG Key ID: F7EEFE29825C72DC
7 changed files with 92 additions and 57 deletions

View File

@ -79,18 +79,18 @@ VARDEC* tovardec(OBJ* obj) {
void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) { void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) {
DEBUGINFO* debugother = other->getdebug(other); DEBUGINFO* debugother = other->getdebug(other);
fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n", eprintf("Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
name, debug->file, debug->definedat, debugother->file, debugother->definedat); name, debug->file, debug->definedat, debugother->file, debugother->definedat);
exit(1); exit(1);
} }
void notdeclared(char* name, DEBUGINFO* debug) { void notdeclared(char* name, DEBUGINFO* debug) {
fprintf(stderr, "'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat); eprintf("'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
exit(1); exit(1);
} }
void invalidparent(SUBROUTCALL* call) { void invalidparent(SUBROUTCALL* call) {
fprintf(stderr, "Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat); eprintf("Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
exit(1); exit(1);
} }

View File

@ -63,11 +63,11 @@ LINE* mathopln(char op) {
return onetoken("and"); return onetoken("and");
if(op == '/') { if(op == '/') {
char* tokens[] = { "call", "Math.divide", "2" }; char* tokens[] = { "call", "Math.divide", "2" };
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*)); return mksimpleln(tokens, strcount(tokens));
} }
if(op == '*') { if(op == '*') {
char* tokens[] = { "call", "Math.multiply", "2" }; char* tokens[] = { "call", "Math.multiply", "2" };
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*)); return mksimpleln(tokens, strcount(tokens));
} }
} }
@ -77,7 +77,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
if(e->type == intconstant) { if(e->type == intconstant) {
char* tokens[] = { "push", "constant", itoa(e->integer) }; char* tokens[] = { "push", "constant", itoa(e->integer) };
myblk = mklnblk(mksimpleln(tokens, sizeof(tokens) / sizeof(char*))); myblk = mklnblk(mksimpleln(tokens, strcount(tokens)));
} }
else if(e->type == unaryopterm) { else if(e->type == unaryopterm) {
myblk = compileexpression(s, e->expression); myblk = compileexpression(s, e->expression);
@ -88,7 +88,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
myblk = compileexpression(s, e->expression); myblk = compileexpression(s, e->expression);
} }
else { else {
fprintf(stderr, "Unsupported term yet %i\n", e->type); eprintf("Unsupported term yet %i\n", e->type);
exit(1); exit(1);
} }
@ -178,7 +178,7 @@ LINEBLOCK* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) {
else if(st->type == returnstatement) else if(st->type == returnstatement)
return compileret(s, st->retst); return compileret(s, st->retst);
else { else {
fprintf(stderr, "UNSUPPORTED\n"); eprintf("UNSUPPORTED\n");
exit(1); exit(1);
} }
} }

View File

@ -42,7 +42,7 @@ const char* tokentypes[] = {
DEBUGINFO* getdebug(PARSER* p) { DEBUGINFO* getdebug(PARSER* p) {
DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO)); DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
d->file = p->file; d->file = p->file;
d->definedat = p->current->truen; d->definedat = p->current->definedat;
return d; return d;
} }
@ -59,7 +59,7 @@ void restorecp(PARSER* p) {
} }
void unexpectedtoken(PARSER* p) { void unexpectedtoken(PARSER* p) {
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file); fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->definedat, p->file);
} }
void unexpected(PARSER* p) { void unexpected(PARSER* p) {
@ -75,7 +75,7 @@ void checkcontent(PARSER* p, const char* content) {
void checktype(PARSER* p, TOKENTYPE type) { void checktype(PARSER* p, TOKENTYPE type) {
if(p->current->type != type) { if(p->current->type != type) {
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file); fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->definedat, p->file);
exit(1); exit(1);
} }
} }

View File

@ -1,17 +1,18 @@
#ifndef TOKENS_H #ifndef TOKENIZER_TABLES_H
#define TOKENS_H #define TOKENIZER_TABLES_H
#include "util.h"
const char* keywords[] = { const char* keywords[] = {
"class", "constructor", "function", "method", "field", "static", "class", "constructor", "function", "method", "field", "static",
"var", "int", "char", "boolean", "void", "true", "false", "null", "var", "int", "char", "boolean", "void", "true", "false", "null",
"this", "let", "do", "if", "else", "while", "return" "this", "let", "do", "if", "else", "while", "return"
}; };
const int keywordssize = sizeof(keywords) / sizeof(char*); const int keywordssize = strcount(keyword);
const char* symbols[] = { const char* symbols[] = {
"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/", "{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
"&", "|", "<", ">", "=", "~" "&", "|", "<", ">", "=", "~"
}; };
const int symbolssize = sizeof(symbols) / sizeof(char*); const int symbolssize = strcount(symbols);
#endif #endif

View File

@ -2,9 +2,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdbool.h> #include <stdbool.h>
#include "tokens.h"
#include "tokenizer.h" #include "tokenizer.h"
#include "tokenizer-tables.h"
// Data types
typedef enum { typedef enum {
common, charsymbol, space common, charsymbol, space
} CHARTYPE; } CHARTYPE;
@ -15,14 +16,38 @@ typedef struct {
int count; int count;
} STRING; } STRING;
TOKEN* mktokenlist() { // String manipulation
return (TOKEN*)malloc(sizeof(TOKEN)); STRING* mkstring(int size);
} void append(STRING* s, char c);
void freestr(STRING* str);
CHARTYPE getchartype(unsigned char c) { // Token manipulation;
if(isspace(c)) return space; TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type);
if(isalnum(c) || c == '_' || c == '"') return common; TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat);
return charsymbol; #define mktoken() (TOKEN*)malloc(sizeof(TOKEN))
// Char types
CHARTYPE getchartype(unsigned char c);
bool iskeyword(STRING* tk);
bool issymbol(STRING* tk);
bool isint(char* str);
bool isintcons(STRING* tk);
bool isidentifier(STRING* tk);
TOKENTYPE gettokentype(STRING* tk, int definedat);
// Stream handling
void skipln(FILE* input);
void skipmultiln(FILE* input, int* lnscount);
bool handlecomment(FILE* input, int* lnscount);
void readstr(FILE* input, STRING* tmp, int definedat);
// String manipulation
STRING* mkstring(int size) {
STRING* str = (STRING*)malloc(sizeof(STRING));
str->size = sizeof(char) * size; // initial size
str->str = (char*)malloc(str->size);
str->count = 0;
return str;
} }
void append(STRING* s, char c) { void append(STRING* s, char c) {
@ -36,12 +61,33 @@ void append(STRING* s, char c) {
s->count++; s->count++;
} }
STRING* mkstring(int size) { void freestr(STRING* str) {
STRING* str = (STRING*)malloc(sizeof(STRING)); free(str->str);
str->size = sizeof(char) * size; // initial size free(str);
str->str = (char*)malloc(str->size); }
str->count = 0;
return str; // Token manipulation;
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) {
curitem->token = (char*)malloc(sizeof(char)*token->count);
strcpy(curitem->token, token->str);
curitem->definedat = definedat;
curitem->type = type;
TOKEN* nextitem = mktoken();
curitem->next = nextitem;
token->count = 0;
return nextitem;
}
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat) {
append(token, '\0');
return appendtokenraw(curitem, token, definedat, gettokentype(token, definedat));
}
// Char types
CHARTYPE getchartype(unsigned char c) {
if(isspace(c)) return space;
if(isalnum(c) || c == '_' || c == '"') return common;
return charsymbol;
} }
bool iskeyword(STRING* tk) { bool iskeyword(STRING* tk) {
@ -88,31 +134,16 @@ bool isidentifier(STRING* tk) {
return true; return true;
} }
TOKENTYPE gettokentype(STRING* tk, int truen) { TOKENTYPE gettokentype(STRING* tk, int definedat) {
if(iskeyword(tk)) return keyword; if(iskeyword(tk)) return keyword;
if(issymbol(tk)) return symbol; if(issymbol(tk)) return symbol;
if(isintcons(tk)) return integer; if(isintcons(tk)) return integer;
if(isidentifier(tk)) return identifier; if(isidentifier(tk)) return identifier;
fprintf(stderr, "Unexpected token '%s'; line %i\n", tk->str, truen); eprintf("Unexpected token '%s'; line %i\n", tk->str, definedat);
exit(1); exit(1);
} }
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) { // Stream handling
curitem->token = (char*)malloc(sizeof(char)*token->count);
strcpy(curitem->token, token->str);
curitem->truen = truen;
curitem->type = type;
TOKEN* nextitem = mktokenlist();
curitem->next = nextitem;
token->count = 0;
return nextitem;
}
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) {
append(token, '\0');
return appendtokenraw(curitem, token, truen, gettokentype(token, truen));
}
void skipln(FILE* input) { void skipln(FILE* input) {
unsigned char c; unsigned char c;
while(c = fgetc(input), c != '\0') while(c = fgetc(input), c != '\0')
@ -149,11 +180,11 @@ bool handlecomment(FILE* input, int* lnscount) {
return false; return false;
} }
void readstr(FILE* input, STRING* tmp, int truen) { void readstr(FILE* input, STRING* tmp, int definedat) {
unsigned char c; unsigned char c;
while(c = fgetc(input), c != '\0') { while(c = fgetc(input), c != '\0') {
if(c == '\n') { if(c == '\n') {
fprintf(stderr, "Unexpected end of line; line %i", truen); eprintf("Unexpected end of line; line %i", definedat);
exit(1); exit(1);
} }
if(c == '"') if(c == '"')
@ -163,13 +194,8 @@ void readstr(FILE* input, STRING* tmp, int truen) {
append(tmp, '\0'); append(tmp, '\0');
} }
void freestr(STRING* str) {
free(str->str);
free(str);
}
TOKEN* tokenize(FILE* input) { TOKEN* tokenize(FILE* input) {
TOKEN* head = mktokenlist(); TOKEN* head = mktoken();
TOKEN* lastitem = head; TOKEN* lastitem = head;
TOKEN* curitem = head; TOKEN* curitem = head;

View File

@ -2,6 +2,9 @@
#define TOKENIZER_H #define TOKENIZER_H
#include <stdio.h> #include <stdio.h>
/* tokenizer
* Simple tool that splits a stream into many tokens. */
typedef enum { typedef enum {
keyword, identifier, symbol, integer, string keyword, identifier, symbol, integer, string
} TOKENTYPE; } TOKENTYPE;
@ -9,10 +12,10 @@ typedef enum {
typedef struct token { typedef struct token {
char* token; char* token;
TOKENTYPE type; TOKENTYPE type;
int truen; int definedat;
struct token* next; struct token* next;
} TOKEN; } TOKEN;
TOKEN* tokenize(FILE* input); TOKEN* tokenize(FILE* input);
void freetokenlist(TOKEN l); void freetokenlist(TOKEN* list);
#endif #endif

5
util.h
View File

@ -5,6 +5,11 @@
/* util /* util
* Random utilities. */ * Random utilities. */
// Macros
#define eprintf(...) fprintf (stderr, __VA_ARGS__)
#define count(array, type) ((sizeof(array)) / (sizeof(type)))
#define strcount(array) count(array, char*)
typedef struct stringlist { typedef struct stringlist {
char* content; char* content;
struct stringlist* next; struct stringlist* next;