Reorganize tokenizer
This commit is contained in:
parent
c3df97b04b
commit
c629a01b59
|
@ -79,18 +79,18 @@ VARDEC* tovardec(OBJ* obj) {
|
|||
|
||||
void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) {
|
||||
DEBUGINFO* debugother = other->getdebug(other);
|
||||
fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
|
||||
eprintf("Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
|
||||
name, debug->file, debug->definedat, debugother->file, debugother->definedat);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void notdeclared(char* name, DEBUGINFO* debug) {
|
||||
fprintf(stderr, "'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
|
||||
eprintf("'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void invalidparent(SUBROUTCALL* call) {
|
||||
fprintf(stderr, "Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
|
||||
eprintf("Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
|
10
compiler.c
10
compiler.c
|
@ -63,11 +63,11 @@ LINE* mathopln(char op) {
|
|||
return onetoken("and");
|
||||
if(op == '/') {
|
||||
char* tokens[] = { "call", "Math.divide", "2" };
|
||||
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
|
||||
return mksimpleln(tokens, strcount(tokens));
|
||||
}
|
||||
if(op == '*') {
|
||||
char* tokens[] = { "call", "Math.multiply", "2" };
|
||||
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
|
||||
return mksimpleln(tokens, strcount(tokens));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
|
|||
|
||||
if(e->type == intconstant) {
|
||||
char* tokens[] = { "push", "constant", itoa(e->integer) };
|
||||
myblk = mklnblk(mksimpleln(tokens, sizeof(tokens) / sizeof(char*)));
|
||||
myblk = mklnblk(mksimpleln(tokens, strcount(tokens)));
|
||||
}
|
||||
else if(e->type == unaryopterm) {
|
||||
myblk = compileexpression(s, e->expression);
|
||||
|
@ -88,7 +88,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
|
|||
myblk = compileexpression(s, e->expression);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unsupported term yet %i\n", e->type);
|
||||
eprintf("Unsupported term yet %i\n", e->type);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -178,7 +178,7 @@ LINEBLOCK* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) {
|
|||
else if(st->type == returnstatement)
|
||||
return compileret(s, st->retst);
|
||||
else {
|
||||
fprintf(stderr, "UNSUPPORTED\n");
|
||||
eprintf("UNSUPPORTED\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
|
6
parser.c
6
parser.c
|
@ -42,7 +42,7 @@ const char* tokentypes[] = {
|
|||
DEBUGINFO* getdebug(PARSER* p) {
|
||||
DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
|
||||
d->file = p->file;
|
||||
d->definedat = p->current->truen;
|
||||
d->definedat = p->current->definedat;
|
||||
return d;
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ void restorecp(PARSER* p) {
|
|||
}
|
||||
|
||||
void unexpectedtoken(PARSER* p) {
|
||||
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file);
|
||||
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->definedat, p->file);
|
||||
}
|
||||
|
||||
void unexpected(PARSER* p) {
|
||||
|
@ -75,7 +75,7 @@ void checkcontent(PARSER* p, const char* content) {
|
|||
|
||||
void checktype(PARSER* p, TOKENTYPE type) {
|
||||
if(p->current->type != type) {
|
||||
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file);
|
||||
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->definedat, p->file);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
#ifndef TOKENS_H
|
||||
#define TOKENS_H
|
||||
#ifndef TOKENIZER_TABLES_H
|
||||
#define TOKENIZER_TABLES_H
|
||||
#include "util.h"
|
||||
|
||||
const char* keywords[] = {
|
||||
"class", "constructor", "function", "method", "field", "static",
|
||||
"var", "int", "char", "boolean", "void", "true", "false", "null",
|
||||
"this", "let", "do", "if", "else", "while", "return"
|
||||
};
|
||||
const int keywordssize = sizeof(keywords) / sizeof(char*);
|
||||
const int keywordssize = strcount(keyword);
|
||||
|
||||
const char* symbols[] = {
|
||||
"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
|
||||
"&", "|", "<", ">", "=", "~"
|
||||
};
|
||||
const int symbolssize = sizeof(symbols) / sizeof(char*);
|
||||
const int symbolssize = strcount(symbols);
|
||||
|
||||
#endif
|
106
tokenizer.c
106
tokenizer.c
|
@ -2,9 +2,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include "tokens.h"
|
||||
#include "tokenizer.h"
|
||||
#include "tokenizer-tables.h"
|
||||
|
||||
// Data types
|
||||
typedef enum {
|
||||
common, charsymbol, space
|
||||
} CHARTYPE;
|
||||
|
@ -15,14 +16,38 @@ typedef struct {
|
|||
int count;
|
||||
} STRING;
|
||||
|
||||
TOKEN* mktokenlist() {
|
||||
return (TOKEN*)malloc(sizeof(TOKEN));
|
||||
}
|
||||
// String manipulation
|
||||
STRING* mkstring(int size);
|
||||
void append(STRING* s, char c);
|
||||
void freestr(STRING* str);
|
||||
|
||||
CHARTYPE getchartype(unsigned char c) {
|
||||
if(isspace(c)) return space;
|
||||
if(isalnum(c) || c == '_' || c == '"') return common;
|
||||
return charsymbol;
|
||||
// Token manipulation;
|
||||
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type);
|
||||
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat);
|
||||
#define mktoken() (TOKEN*)malloc(sizeof(TOKEN))
|
||||
|
||||
// Char types
|
||||
CHARTYPE getchartype(unsigned char c);
|
||||
bool iskeyword(STRING* tk);
|
||||
bool issymbol(STRING* tk);
|
||||
bool isint(char* str);
|
||||
bool isintcons(STRING* tk);
|
||||
bool isidentifier(STRING* tk);
|
||||
TOKENTYPE gettokentype(STRING* tk, int definedat);
|
||||
|
||||
// Stream handling
|
||||
void skipln(FILE* input);
|
||||
void skipmultiln(FILE* input, int* lnscount);
|
||||
bool handlecomment(FILE* input, int* lnscount);
|
||||
void readstr(FILE* input, STRING* tmp, int definedat);
|
||||
|
||||
// String manipulation
|
||||
STRING* mkstring(int size) {
|
||||
STRING* str = (STRING*)malloc(sizeof(STRING));
|
||||
str->size = sizeof(char) * size; // initial size
|
||||
str->str = (char*)malloc(str->size);
|
||||
str->count = 0;
|
||||
return str;
|
||||
}
|
||||
|
||||
void append(STRING* s, char c) {
|
||||
|
@ -36,12 +61,33 @@ void append(STRING* s, char c) {
|
|||
s->count++;
|
||||
}
|
||||
|
||||
STRING* mkstring(int size) {
|
||||
STRING* str = (STRING*)malloc(sizeof(STRING));
|
||||
str->size = sizeof(char) * size; // initial size
|
||||
str->str = (char*)malloc(str->size);
|
||||
str->count = 0;
|
||||
return str;
|
||||
void freestr(STRING* str) {
|
||||
free(str->str);
|
||||
free(str);
|
||||
}
|
||||
|
||||
// Token manipulation;
|
||||
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) {
|
||||
curitem->token = (char*)malloc(sizeof(char)*token->count);
|
||||
strcpy(curitem->token, token->str);
|
||||
curitem->definedat = definedat;
|
||||
curitem->type = type;
|
||||
TOKEN* nextitem = mktoken();
|
||||
curitem->next = nextitem;
|
||||
token->count = 0;
|
||||
return nextitem;
|
||||
}
|
||||
|
||||
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat) {
|
||||
append(token, '\0');
|
||||
return appendtokenraw(curitem, token, definedat, gettokentype(token, definedat));
|
||||
}
|
||||
|
||||
// Char types
|
||||
CHARTYPE getchartype(unsigned char c) {
|
||||
if(isspace(c)) return space;
|
||||
if(isalnum(c) || c == '_' || c == '"') return common;
|
||||
return charsymbol;
|
||||
}
|
||||
|
||||
bool iskeyword(STRING* tk) {
|
||||
|
@ -88,31 +134,16 @@ bool isidentifier(STRING* tk) {
|
|||
return true;
|
||||
}
|
||||
|
||||
TOKENTYPE gettokentype(STRING* tk, int truen) {
|
||||
TOKENTYPE gettokentype(STRING* tk, int definedat) {
|
||||
if(iskeyword(tk)) return keyword;
|
||||
if(issymbol(tk)) return symbol;
|
||||
if(isintcons(tk)) return integer;
|
||||
if(isidentifier(tk)) return identifier;
|
||||
fprintf(stderr, "Unexpected token '%s'; line %i\n", tk->str, truen);
|
||||
eprintf("Unexpected token '%s'; line %i\n", tk->str, definedat);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) {
|
||||
curitem->token = (char*)malloc(sizeof(char)*token->count);
|
||||
strcpy(curitem->token, token->str);
|
||||
curitem->truen = truen;
|
||||
curitem->type = type;
|
||||
TOKEN* nextitem = mktokenlist();
|
||||
curitem->next = nextitem;
|
||||
token->count = 0;
|
||||
return nextitem;
|
||||
}
|
||||
|
||||
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) {
|
||||
append(token, '\0');
|
||||
return appendtokenraw(curitem, token, truen, gettokentype(token, truen));
|
||||
}
|
||||
|
||||
// Stream handling
|
||||
void skipln(FILE* input) {
|
||||
unsigned char c;
|
||||
while(c = fgetc(input), c != '\0')
|
||||
|
@ -149,11 +180,11 @@ bool handlecomment(FILE* input, int* lnscount) {
|
|||
return false;
|
||||
}
|
||||
|
||||
void readstr(FILE* input, STRING* tmp, int truen) {
|
||||
void readstr(FILE* input, STRING* tmp, int definedat) {
|
||||
unsigned char c;
|
||||
while(c = fgetc(input), c != '\0') {
|
||||
if(c == '\n') {
|
||||
fprintf(stderr, "Unexpected end of line; line %i", truen);
|
||||
eprintf("Unexpected end of line; line %i", definedat);
|
||||
exit(1);
|
||||
}
|
||||
if(c == '"')
|
||||
|
@ -163,13 +194,8 @@ void readstr(FILE* input, STRING* tmp, int truen) {
|
|||
append(tmp, '\0');
|
||||
}
|
||||
|
||||
void freestr(STRING* str) {
|
||||
free(str->str);
|
||||
free(str);
|
||||
}
|
||||
|
||||
TOKEN* tokenize(FILE* input) {
|
||||
TOKEN* head = mktokenlist();
|
||||
TOKEN* head = mktoken();
|
||||
TOKEN* lastitem = head;
|
||||
TOKEN* curitem = head;
|
||||
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
#define TOKENIZER_H
|
||||
#include <stdio.h>
|
||||
|
||||
/* tokenizer
|
||||
* Simple tool that splits a stream into many tokens. */
|
||||
|
||||
typedef enum {
|
||||
keyword, identifier, symbol, integer, string
|
||||
} TOKENTYPE;
|
||||
|
@ -9,10 +12,10 @@ typedef enum {
|
|||
typedef struct token {
|
||||
char* token;
|
||||
TOKENTYPE type;
|
||||
int truen;
|
||||
int definedat;
|
||||
struct token* next;
|
||||
} TOKEN;
|
||||
|
||||
TOKEN* tokenize(FILE* input);
|
||||
void freetokenlist(TOKEN l);
|
||||
void freetokenlist(TOKEN* list);
|
||||
#endif
|
||||
|
|
5
util.h
5
util.h
|
@ -5,6 +5,11 @@
|
|||
/* util
|
||||
* Random utilities. */
|
||||
|
||||
// Macros
|
||||
#define eprintf(...) fprintf (stderr, __VA_ARGS__)
|
||||
#define count(array, type) ((sizeof(array)) / (sizeof(type)))
|
||||
#define strcount(array) count(array, char*)
|
||||
|
||||
typedef struct stringlist {
|
||||
char* content;
|
||||
struct stringlist* next;
|
||||
|
|
Loading…
Reference in New Issue