Reorganize tokenizer
This commit is contained in:
parent
c3df97b04b
commit
c629a01b59
|
@ -79,18 +79,18 @@ VARDEC* tovardec(OBJ* obj) {
|
||||||
|
|
||||||
void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) {
|
void doubledeclaration(char* name, DEBUGINFO* debug, OBJ* other) {
|
||||||
DEBUGINFO* debugother = other->getdebug(other);
|
DEBUGINFO* debugother = other->getdebug(other);
|
||||||
fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
|
eprintf("Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n",
|
||||||
name, debug->file, debug->definedat, debugother->file, debugother->definedat);
|
name, debug->file, debug->definedat, debugother->file, debugother->definedat);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void notdeclared(char* name, DEBUGINFO* debug) {
|
void notdeclared(char* name, DEBUGINFO* debug) {
|
||||||
fprintf(stderr, "'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
|
eprintf("'%s' not declared; file '%s', line %i\n", name, debug->file, debug->definedat);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void invalidparent(SUBROUTCALL* call) {
|
void invalidparent(SUBROUTCALL* call) {
|
||||||
fprintf(stderr, "Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
|
eprintf("Invalid subroutine parent '%s'; file '%s', line %i\n", call->parentname, call->debug->file, call->debug->definedat);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
10
compiler.c
10
compiler.c
|
@ -63,11 +63,11 @@ LINE* mathopln(char op) {
|
||||||
return onetoken("and");
|
return onetoken("and");
|
||||||
if(op == '/') {
|
if(op == '/') {
|
||||||
char* tokens[] = { "call", "Math.divide", "2" };
|
char* tokens[] = { "call", "Math.divide", "2" };
|
||||||
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
|
return mksimpleln(tokens, strcount(tokens));
|
||||||
}
|
}
|
||||||
if(op == '*') {
|
if(op == '*') {
|
||||||
char* tokens[] = { "call", "Math.multiply", "2" };
|
char* tokens[] = { "call", "Math.multiply", "2" };
|
||||||
return mksimpleln(tokens, sizeof(tokens) / sizeof(char*));
|
return mksimpleln(tokens, strcount(tokens));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
|
||||||
|
|
||||||
if(e->type == intconstant) {
|
if(e->type == intconstant) {
|
||||||
char* tokens[] = { "push", "constant", itoa(e->integer) };
|
char* tokens[] = { "push", "constant", itoa(e->integer) };
|
||||||
myblk = mklnblk(mksimpleln(tokens, sizeof(tokens) / sizeof(char*)));
|
myblk = mklnblk(mksimpleln(tokens, strcount(tokens)));
|
||||||
}
|
}
|
||||||
else if(e->type == unaryopterm) {
|
else if(e->type == unaryopterm) {
|
||||||
myblk = compileexpression(s, e->expression);
|
myblk = compileexpression(s, e->expression);
|
||||||
|
@ -88,7 +88,7 @@ LINEBLOCK* compileexpression(SCOPE* s, TERM* e) {
|
||||||
myblk = compileexpression(s, e->expression);
|
myblk = compileexpression(s, e->expression);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "Unsupported term yet %i\n", e->type);
|
eprintf("Unsupported term yet %i\n", e->type);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,7 +178,7 @@ LINEBLOCK* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) {
|
||||||
else if(st->type == returnstatement)
|
else if(st->type == returnstatement)
|
||||||
return compileret(s, st->retst);
|
return compileret(s, st->retst);
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "UNSUPPORTED\n");
|
eprintf("UNSUPPORTED\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
6
parser.c
6
parser.c
|
@ -42,7 +42,7 @@ const char* tokentypes[] = {
|
||||||
DEBUGINFO* getdebug(PARSER* p) {
|
DEBUGINFO* getdebug(PARSER* p) {
|
||||||
DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
|
DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
|
||||||
d->file = p->file;
|
d->file = p->file;
|
||||||
d->definedat = p->current->truen;
|
d->definedat = p->current->definedat;
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ void restorecp(PARSER* p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void unexpectedtoken(PARSER* p) {
|
void unexpectedtoken(PARSER* p) {
|
||||||
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file);
|
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->definedat, p->file);
|
||||||
}
|
}
|
||||||
|
|
||||||
void unexpected(PARSER* p) {
|
void unexpected(PARSER* p) {
|
||||||
|
@ -75,7 +75,7 @@ void checkcontent(PARSER* p, const char* content) {
|
||||||
|
|
||||||
void checktype(PARSER* p, TOKENTYPE type) {
|
void checktype(PARSER* p, TOKENTYPE type) {
|
||||||
if(p->current->type != type) {
|
if(p->current->type != type) {
|
||||||
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file);
|
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->definedat, p->file);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,17 +1,18 @@
|
||||||
#ifndef TOKENS_H
|
#ifndef TOKENIZER_TABLES_H
|
||||||
#define TOKENS_H
|
#define TOKENIZER_TABLES_H
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
const char* keywords[] = {
|
const char* keywords[] = {
|
||||||
"class", "constructor", "function", "method", "field", "static",
|
"class", "constructor", "function", "method", "field", "static",
|
||||||
"var", "int", "char", "boolean", "void", "true", "false", "null",
|
"var", "int", "char", "boolean", "void", "true", "false", "null",
|
||||||
"this", "let", "do", "if", "else", "while", "return"
|
"this", "let", "do", "if", "else", "while", "return"
|
||||||
};
|
};
|
||||||
const int keywordssize = sizeof(keywords) / sizeof(char*);
|
const int keywordssize = strcount(keyword);
|
||||||
|
|
||||||
const char* symbols[] = {
|
const char* symbols[] = {
|
||||||
"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
|
"{", "}", "(", ")", "[", "]", ".", ",", ";", "+", "-", "*", "/",
|
||||||
"&", "|", "<", ">", "=", "~"
|
"&", "|", "<", ">", "=", "~"
|
||||||
};
|
};
|
||||||
const int symbolssize = sizeof(symbols) / sizeof(char*);
|
const int symbolssize = strcount(symbols);
|
||||||
|
|
||||||
#endif
|
#endif
|
106
tokenizer.c
106
tokenizer.c
|
@ -2,9 +2,10 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include "tokens.h"
|
|
||||||
#include "tokenizer.h"
|
#include "tokenizer.h"
|
||||||
|
#include "tokenizer-tables.h"
|
||||||
|
|
||||||
|
// Data types
|
||||||
typedef enum {
|
typedef enum {
|
||||||
common, charsymbol, space
|
common, charsymbol, space
|
||||||
} CHARTYPE;
|
} CHARTYPE;
|
||||||
|
@ -15,14 +16,38 @@ typedef struct {
|
||||||
int count;
|
int count;
|
||||||
} STRING;
|
} STRING;
|
||||||
|
|
||||||
TOKEN* mktokenlist() {
|
// String manipulation
|
||||||
return (TOKEN*)malloc(sizeof(TOKEN));
|
STRING* mkstring(int size);
|
||||||
}
|
void append(STRING* s, char c);
|
||||||
|
void freestr(STRING* str);
|
||||||
|
|
||||||
CHARTYPE getchartype(unsigned char c) {
|
// Token manipulation;
|
||||||
if(isspace(c)) return space;
|
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type);
|
||||||
if(isalnum(c) || c == '_' || c == '"') return common;
|
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat);
|
||||||
return charsymbol;
|
#define mktoken() (TOKEN*)malloc(sizeof(TOKEN))
|
||||||
|
|
||||||
|
// Char types
|
||||||
|
CHARTYPE getchartype(unsigned char c);
|
||||||
|
bool iskeyword(STRING* tk);
|
||||||
|
bool issymbol(STRING* tk);
|
||||||
|
bool isint(char* str);
|
||||||
|
bool isintcons(STRING* tk);
|
||||||
|
bool isidentifier(STRING* tk);
|
||||||
|
TOKENTYPE gettokentype(STRING* tk, int definedat);
|
||||||
|
|
||||||
|
// Stream handling
|
||||||
|
void skipln(FILE* input);
|
||||||
|
void skipmultiln(FILE* input, int* lnscount);
|
||||||
|
bool handlecomment(FILE* input, int* lnscount);
|
||||||
|
void readstr(FILE* input, STRING* tmp, int definedat);
|
||||||
|
|
||||||
|
// String manipulation
|
||||||
|
STRING* mkstring(int size) {
|
||||||
|
STRING* str = (STRING*)malloc(sizeof(STRING));
|
||||||
|
str->size = sizeof(char) * size; // initial size
|
||||||
|
str->str = (char*)malloc(str->size);
|
||||||
|
str->count = 0;
|
||||||
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
void append(STRING* s, char c) {
|
void append(STRING* s, char c) {
|
||||||
|
@ -36,12 +61,33 @@ void append(STRING* s, char c) {
|
||||||
s->count++;
|
s->count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
STRING* mkstring(int size) {
|
void freestr(STRING* str) {
|
||||||
STRING* str = (STRING*)malloc(sizeof(STRING));
|
free(str->str);
|
||||||
str->size = sizeof(char) * size; // initial size
|
free(str);
|
||||||
str->str = (char*)malloc(str->size);
|
}
|
||||||
str->count = 0;
|
|
||||||
return str;
|
// Token manipulation;
|
||||||
|
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int definedat, TOKENTYPE type) {
|
||||||
|
curitem->token = (char*)malloc(sizeof(char)*token->count);
|
||||||
|
strcpy(curitem->token, token->str);
|
||||||
|
curitem->definedat = definedat;
|
||||||
|
curitem->type = type;
|
||||||
|
TOKEN* nextitem = mktoken();
|
||||||
|
curitem->next = nextitem;
|
||||||
|
token->count = 0;
|
||||||
|
return nextitem;
|
||||||
|
}
|
||||||
|
|
||||||
|
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int definedat) {
|
||||||
|
append(token, '\0');
|
||||||
|
return appendtokenraw(curitem, token, definedat, gettokentype(token, definedat));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Char types
|
||||||
|
CHARTYPE getchartype(unsigned char c) {
|
||||||
|
if(isspace(c)) return space;
|
||||||
|
if(isalnum(c) || c == '_' || c == '"') return common;
|
||||||
|
return charsymbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool iskeyword(STRING* tk) {
|
bool iskeyword(STRING* tk) {
|
||||||
|
@ -88,31 +134,16 @@ bool isidentifier(STRING* tk) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
TOKENTYPE gettokentype(STRING* tk, int truen) {
|
TOKENTYPE gettokentype(STRING* tk, int definedat) {
|
||||||
if(iskeyword(tk)) return keyword;
|
if(iskeyword(tk)) return keyword;
|
||||||
if(issymbol(tk)) return symbol;
|
if(issymbol(tk)) return symbol;
|
||||||
if(isintcons(tk)) return integer;
|
if(isintcons(tk)) return integer;
|
||||||
if(isidentifier(tk)) return identifier;
|
if(isidentifier(tk)) return identifier;
|
||||||
fprintf(stderr, "Unexpected token '%s'; line %i\n", tk->str, truen);
|
eprintf("Unexpected token '%s'; line %i\n", tk->str, definedat);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) {
|
// Stream handling
|
||||||
curitem->token = (char*)malloc(sizeof(char)*token->count);
|
|
||||||
strcpy(curitem->token, token->str);
|
|
||||||
curitem->truen = truen;
|
|
||||||
curitem->type = type;
|
|
||||||
TOKEN* nextitem = mktokenlist();
|
|
||||||
curitem->next = nextitem;
|
|
||||||
token->count = 0;
|
|
||||||
return nextitem;
|
|
||||||
}
|
|
||||||
|
|
||||||
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) {
|
|
||||||
append(token, '\0');
|
|
||||||
return appendtokenraw(curitem, token, truen, gettokentype(token, truen));
|
|
||||||
}
|
|
||||||
|
|
||||||
void skipln(FILE* input) {
|
void skipln(FILE* input) {
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
while(c = fgetc(input), c != '\0')
|
while(c = fgetc(input), c != '\0')
|
||||||
|
@ -149,11 +180,11 @@ bool handlecomment(FILE* input, int* lnscount) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void readstr(FILE* input, STRING* tmp, int truen) {
|
void readstr(FILE* input, STRING* tmp, int definedat) {
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
while(c = fgetc(input), c != '\0') {
|
while(c = fgetc(input), c != '\0') {
|
||||||
if(c == '\n') {
|
if(c == '\n') {
|
||||||
fprintf(stderr, "Unexpected end of line; line %i", truen);
|
eprintf("Unexpected end of line; line %i", definedat);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if(c == '"')
|
if(c == '"')
|
||||||
|
@ -163,13 +194,8 @@ void readstr(FILE* input, STRING* tmp, int truen) {
|
||||||
append(tmp, '\0');
|
append(tmp, '\0');
|
||||||
}
|
}
|
||||||
|
|
||||||
void freestr(STRING* str) {
|
|
||||||
free(str->str);
|
|
||||||
free(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
TOKEN* tokenize(FILE* input) {
|
TOKEN* tokenize(FILE* input) {
|
||||||
TOKEN* head = mktokenlist();
|
TOKEN* head = mktoken();
|
||||||
TOKEN* lastitem = head;
|
TOKEN* lastitem = head;
|
||||||
TOKEN* curitem = head;
|
TOKEN* curitem = head;
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,9 @@
|
||||||
#define TOKENIZER_H
|
#define TOKENIZER_H
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
/* tokenizer
|
||||||
|
* Simple tool that splits a stream into many tokens. */
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
keyword, identifier, symbol, integer, string
|
keyword, identifier, symbol, integer, string
|
||||||
} TOKENTYPE;
|
} TOKENTYPE;
|
||||||
|
@ -9,10 +12,10 @@ typedef enum {
|
||||||
typedef struct token {
|
typedef struct token {
|
||||||
char* token;
|
char* token;
|
||||||
TOKENTYPE type;
|
TOKENTYPE type;
|
||||||
int truen;
|
int definedat;
|
||||||
struct token* next;
|
struct token* next;
|
||||||
} TOKEN;
|
} TOKEN;
|
||||||
|
|
||||||
TOKEN* tokenize(FILE* input);
|
TOKEN* tokenize(FILE* input);
|
||||||
void freetokenlist(TOKEN l);
|
void freetokenlist(TOKEN* list);
|
||||||
#endif
|
#endif
|
||||||
|
|
5
util.h
5
util.h
|
@ -5,6 +5,11 @@
|
||||||
/* util
|
/* util
|
||||||
* Random utilities. */
|
* Random utilities. */
|
||||||
|
|
||||||
|
// Macros
|
||||||
|
#define eprintf(...) fprintf (stderr, __VA_ARGS__)
|
||||||
|
#define count(array, type) ((sizeof(array)) / (sizeof(type)))
|
||||||
|
#define strcount(array) count(array, char*)
|
||||||
|
|
||||||
typedef struct stringlist {
|
typedef struct stringlist {
|
||||||
char* content;
|
char* content;
|
||||||
struct stringlist* next;
|
struct stringlist* next;
|
||||||
|
|
Loading…
Reference in New Issue