Reorganize tokenizer

This commit is contained in:
Augusto Gunsch 2020-12-21 18:11:23 -03:00
parent c3df97b04b
commit c629a01b59
No known key found for this signature in database
GPG Key ID: F7EEFE29825C72DC
7 changed files with 92 additions and 57 deletions

View File

@ -79,18 +79,18 @@ VARDEC* tovardec(OBJ* obj) {
void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) {
DEBUGINFO* debugother = other->getdebug(other);
fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
eprintf("Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
name, debug->file, debug->definedat, debugother->file, debugother->definedat);
exit(1);
}
void notdeclared(char* name, DEBUGINFO* debug) {
fprintf(stderr, "'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
eprintf("'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
exit(1);
}
void invalidparent(SUBROUTCALL* call) {
fprintf(stderr, "Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
eprintf("Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
exit(1);
}

View File

@ -63,11 +63,11 @@ LINE* mathopln(char op) {
return onetoken("and");
if(op == '/') {
char* tokens[] = { "call", "Math.divide", "2" };
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
return mksimpleln(tokens, strcount(tokens));
}
if(op == '*') {
char* tokens[] = { "call", "Math.multiply", "2" };
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
return mksimpleln(tokens, strcount(tokens));
}
}
@ -77,7 +77,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
if(e->type == intconstant) {
char* tokens[] = { "push", "constant", itoa(e->integer) };
myblk = mklnblk(mksimpleln(tokens, sizeof(tokens) / sizeof(char*)));
myblk = mklnblk(mksimpleln(tokens, strcount(tokens)));
}
else if(e->type == unaryopterm) {
myblk = compileexpression(s, e->expression);
@ -88,7 +88,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
myblk = compileexpression(s, e->expression);
}
else {
fprintf(stderr, "Unsupported term yet %i\n", e->type);
eprintf("Unsupported term yet %i\n", e->type);
exit(1);
}
@ -178,7 +178,7 @@ LINEBLOCK* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) {
else if(st->type == returnstatement)
return compileret(s, st->retst);
else {
fprintf(stderr, "UNSUPPORTED\n");
eprintf("UNSUPPORTED\n");
exit(1);
}
}

View File

@ -42,7 +42,7 @@ const char* tokentypes[] = {
DEBUGINFO* getdebug(PARSER* p) {
DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
d->file = p->file;
d->definedat = p->current->truen;
d->definedat = p->current->definedat;
return d;
}
@ -59,7 +59,7 @@ void restorecp(PARSER* p) {
}
void unexpectedtoken(PARSER* p) {
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file);
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->definedat, p->file);
}
void unexpected(PARSER* p) {
@ -75,7 +75,7 @@ void checkcontent(PARSER* p, const char* content) {
void checktype(PARSER* p, TOKENTYPE type) {
if(p->current->type != type) {
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file);
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->definedat, p->file);
exit(1);
}
}

View File

@ -1,17 +1,18 @@
#ifndef TOKENS_H
#define TOKENS_H
#ifndef TOKENIZER_TABLES_H
#define TOKENIZER_TABLES_H
#include "util.h"
const char* keywords[] = {
"class", "constructor", "function", "method", "field", "static",
"var", "int", "char", "boolean", "void", "true", "false", "null",
"this", "let", "do", "if", "else", "while", "return"
};
const int keywordssize = sizeof(keywords) / sizeof(char*);
const int keywordssize = strcount(keyword);
const char* symbols[] = {
"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
"&", "|", "<", ">", "=", "~"
};
const int symbolssize = sizeof(symbols) / sizeof(char*);
const int symbolssize = strcount(symbols);
#endif

View File

@ -2,9 +2,10 @@
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "tokens.h"
#include "tokenizer.h"
#include "tokenizer-tables.h"
// Data types
typedef enum {
common, charsymbol, space
} CHARTYPE;
@ -15,14 +16,38 @@ typedef struct {
int count;
} STRING;
TOKEN* mktokenlist() {
return (TOKEN*)malloc(sizeof(TOKEN));
}
// String manipulation
STRING* mkstring(int size);
void append(STRING* s, char c);
void freestr(STRING* str);
CHARTYPE getchartype(unsigned char c) {
if(isspace(c)) return space;
if(isalnum(c) || c == '_' || c == '"') return common;
return charsymbol;
// Token manipulation;
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type);
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat);
#define mktoken() (TOKEN*)malloc(sizeof(TOKEN))
// Char types
CHARTYPE getchartype(unsigned char c);
bool iskeyword(STRING* tk);
bool issymbol(STRING* tk);
bool isint(char* str);
bool isintcons(STRING* tk);
bool isidentifier(STRING* tk);
TOKENTYPE gettokentype(STRING* tk, int definedat);
// Stream handling
void skipln(FILE* input);
void skipmultiln(FILE* input, int* lnscount);
bool handlecomment(FILE* input, int* lnscount);
void readstr(FILE* input, STRING* tmp, int definedat);
// String manipulation
STRING* mkstring(int size) {
STRING* str = (STRING*)malloc(sizeof(STRING));
str->size = sizeof(char) * size; // initial size
str->str = (char*)malloc(str->size);
str->count = 0;
return str;
}
void append(STRING* s, char c) {
@ -36,12 +61,33 @@ void append(STRING* s, char c) {
s->count++;
}
STRING* mkstring(int size) {
STRING* str = (STRING*)malloc(sizeof(STRING));
str->size = sizeof(char) * size; // initial size
str->str = (char*)malloc(str->size);
str->count = 0;
return str;
void freestr(STRING* str) {
free(str->str);
free(str);
}
// Token manipulation;
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) {
curitem->token = (char*)malloc(sizeof(char)*token->count);
strcpy(curitem->token, token->str);
curitem->definedat = definedat;
curitem->type = type;
TOKEN* nextitem = mktoken();
curitem->next = nextitem;
token->count = 0;
return nextitem;
}
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat) {
append(token, '\0');
return appendtokenraw(curitem, token, definedat, gettokentype(token, definedat));
}
// Char types
CHARTYPE getchartype(unsigned char c) {
if(isspace(c)) return space;
if(isalnum(c) || c == '_' || c == '"') return common;
return charsymbol;
}
bool iskeyword(STRING* tk) {
@ -88,31 +134,16 @@ bool isidentifier(STRING* tk) {
return true;
}
TOKENTYPE gettokentype(STRING* tk, int truen) {
TOKENTYPE gettokentype(STRING* tk, int definedat) {
if(iskeyword(tk)) return keyword;
if(issymbol(tk)) return symbol;
if(isintcons(tk)) return integer;
if(isidentifier(tk)) return identifier;
fprintf(stderr, "Unexpected token '%s'; line %i\n", tk->str, truen);
eprintf("Unexpected token '%s'; line %i\n", tk->str, definedat);
exit(1);
}
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) {
curitem->token = (char*)malloc(sizeof(char)*token->count);
strcpy(curitem->token, token->str);
curitem->truen = truen;
curitem->type = type;
TOKEN* nextitem = mktokenlist();
curitem->next = nextitem;
token->count = 0;
return nextitem;
}
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) {
append(token, '\0');
return appendtokenraw(curitem, token, truen, gettokentype(token, truen));
}
// Stream handling
void skipln(FILE* input) {
unsigned char c;
while(c = fgetc(input), c != '\0')
@ -149,11 +180,11 @@ bool handlecomment(FILE* input, int* lnscount) {
return false;
}
void readstr(FILE* input, STRING* tmp, int truen) {
void readstr(FILE* input, STRING* tmp, int definedat) {
unsigned char c;
while(c = fgetc(input), c != '\0') {
if(c == '\n') {
fprintf(stderr, "Unexpected end of line; line %i", truen);
eprintf("Unexpected end of line; line %i", definedat);
exit(1);
}
if(c == '"')
@ -163,13 +194,8 @@ void readstr(FILE* input, STRING* tmp, int truen) {
append(tmp, '\0');
}
void freestr(STRING* str) {
free(str->str);
free(str);
}
TOKEN* tokenize(FILE* input) {
TOKEN* head = mktokenlist();
TOKEN* head = mktoken();
TOKEN* lastitem = head;
TOKEN* curitem = head;

View File

@ -2,6 +2,9 @@
#define TOKENIZER_H
#include <stdio.h>
/* tokenizer
* Simple tool that splits a stream into many tokens. */
typedef enum {
keyword, identifier, symbol, integer, string
} TOKENTYPE;
@ -9,10 +12,10 @@ typedef enum {
typedef struct token {
char* token;
TOKENTYPE type;
int truen;
int definedat;
struct token* next;
} TOKEN;
TOKEN* tokenize(FILE* input);
void freetokenlist(TOKEN l);
void freetokenlist(TOKEN* list);
#endif

5
util.h
View File

@ -5,6 +5,11 @@
/* util
* Random utilities. */
// Macros
#define eprintf(...) fprintf (stderr, __VA_ARGS__)
#define count(array, type) ((sizeof(array)) / (sizeof(type)))
#define strcount(array) count(array, char*)
typedef struct stringlist {
char* content;
struct stringlist* next;