jack-compiler/parser.c

566 lines
12 KiB
C
Raw Normal View History

2020-12-14 14:12:20 -05:00
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include "parser.h"
char* parseidentifier(PARSER* p);
STATEMENT* parsestatements(PARSER* p);
SUBROUTCALL* parsesubroutcall(PARSER* p);
TERM* parseexpression(PARSER* p);
TERM* parseterm(PARSER* p);
const char* keywordconstants[] = {
"true", "false", "null", "this"
};
const int keywordconstantssize = sizeof(keywordconstants) / sizeof(char*);
const char* ops[] = {
"+", "-", "*", "/", "&", "|", "<", ">", "="
};
const int opssize = sizeof(ops) / sizeof(char*);
const char* varclasses[] = {
"static", "field"
};
const int varclassessize = sizeof(varclasses) / sizeof(char*);
const char* vardectypes[] = {
"int", "char", "boolean"
};
const int vardectypessize = sizeof(vardectypes) / sizeof(char*);
const char* subroutclasses[] = {
"constructor", "function", "method"
};
const int subroutclassessize = sizeof(subroutclasses) / sizeof(char*);
const char* tokentypes[] = {
"keyword", "identifier", "symbol", "integerConstant", "stringConstant"
};
2020-12-21 13:05:49 -05:00
DEBUGINFO* getdebug(PARSER* p) {
DEBUGINFO* d = (DEBUGINFO*)malloc(sizeof(DEBUGINFO));
d->file = p->file;
d->definedat = p->current->truen;
return d;
}
2020-12-14 14:12:20 -05:00
void next(PARSER* p) {
p->current = p->current->next;
}
void checkpoint(PARSER* p) {
p->checkpoint = p->current;
}
void restorecp(PARSER* p) {
p->current = p->checkpoint;
}
void unexpectedtoken(PARSER* p) {
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file);
}
void unexpected(PARSER* p) {
unexpectedtoken(p);
exit(1);
}
void checkcontent(PARSER* p, const char* content) {
if(strcmp(p->current->token, content))
unexpected(p);
next(p);
}
void checktype(PARSER* p, TOKENTYPE type) {
if(p->current->type != type) {
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file);
exit(1);
}
}
TERM* parsetermnullified(PARSER* p) {
TERM* t = (TERM*)malloc(sizeof(TERM));
if(p->current->type == integer) {
t->type = intconstant;
t->integer = atoi(p->current->token);
next(p);
} else if(p->current->type == string) {
t->type = stringconstant;
t->string = p->current->token;
next(p);
} else if(p->current->type == keyword) {
t->type = keywordconstant;
bool valid = false;
for(int i = 0; i < keywordconstantssize; i++)
if(!strcmp(p->current->token, keywordconstants[i]))
valid = true;
if(!valid)
unexpected(p);
t->string = p->current->token;
next(p);
} else if(!strcmp(p->current->token, "-") || !strcmp(p->current->token, "~")) {
t->type = unaryopterm;
next(p);
t->expression = parseterm(p);
2020-12-21 14:49:37 -05:00
t->expression->next = NULL;
2020-12-14 14:12:20 -05:00
} else if(!strcmp(p->current->token, "(")) {
next(p);
t->type = innerexpression;
t->expression = parseexpression(p);
checkcontent(p, ")");
} else if(p->current->type == identifier) {
SUBROUTCALL* call = parsesubroutcall(p);
if(call == NULL) {
t->string = p->current->token;
next(p);
if(!strcmp(p->current->token, "[")) {
next(p);
t->arrayexp = parseexpression(p);
t->type = arrayitem;
checkcontent(p, "]");
} else {
t->type = varname;
}
} else {
t->type = subroutcall;
t->call = call;
}
} else {
return NULL;
}
return t;
}
bool isop(TOKEN* t) {
for(int i = 0; i < opssize; i++)
if(!strcmp(t->token, ops[i]))
return true;
return false;
}
TERM* parseexpressionnullified(PARSER* p) {
TERM* head = parseterm(p);
TERM* current = head;
TERM* nextt;
while(isop(p->current)) {
current->op = p->current->token[0];
next(p);
2020-12-21 14:49:37 -05:00
nextt = parseterm(p);
2020-12-14 14:12:20 -05:00
current->next = nextt;
current = nextt;
}
if(current != NULL)
current->next = NULL;
return head;
}
TERM* parseterm(PARSER* p) {
TERM* t = parsetermnullified(p);
if(t == NULL)
unexpected(p);
return t;
}
TERM* parseexpression(PARSER* p) {
TERM* t = parseexpressionnullified(p);
if(t == NULL)
unexpected(p);
return t;
}
EXPRESSIONLIST* parseexpressionlist(PARSER* p) {
if(!strcmp(p->current->token, ")"))
return NULL;
EXPRESSIONLIST* head = (EXPRESSIONLIST*)malloc(sizeof(EXPRESSIONLIST));
head->expression = parseexpressionnullified(p);
EXPRESSIONLIST* current = head;
EXPRESSIONLIST* nextls;
while(!strcmp(p->current->token, ",")) {
next(p);
nextls = (EXPRESSIONLIST*)malloc(sizeof(EXPRESSIONLIST));
nextls->expression = parseexpression(p);
current->next = nextls;
current = nextls;
}
if(current != NULL)
current->next = NULL;
return head;
}
SUBROUTCALL* parsesubroutcall(PARSER* p) {
checkpoint(p);
SUBROUTCALL* call = (SUBROUTCALL*)malloc(sizeof(SUBROUTCALL));
if(!strcmp(p->current->next->token, ".")) {
if(p->current->type != identifier) {
free(call);
return NULL;
}
call->parentname = p->current->token;
next(p);
next(p);
}
else
call->parentname = NULL;
if(p->current->type != identifier) {
free(call);
restorecp(p);
return NULL;
}
2020-12-21 13:05:49 -05:00
call->debug = getdebug(p);
2020-12-20 13:58:10 -05:00
2020-12-14 14:12:20 -05:00
call->name = p->current->token;
next(p);
if(strcmp(p->current->token, "(")) {
free(call);
restorecp(p);
return NULL;
}
next(p);
call->parameters = parseexpressionlist(p);
if(strcmp(p->current->token, ")")) {
free(call);
restorecp(p);
return NULL;
}
next(p);
return call;
}
CONDSTATEMENT* parsecond(PARSER* p) {
checkcontent(p, "(");
CONDSTATEMENT* st = (CONDSTATEMENT*)malloc(sizeof(CONDSTATEMENT));
st->expression = parseexpression(p);
checkcontent(p, ")");
checkcontent(p, "{");
st->statements = parsestatements(p);
checkcontent(p, "}");
return st;
}
IFSTATEMENT* parseif(PARSER* p) {
IFSTATEMENT* ifst = (IFSTATEMENT*)malloc(sizeof(IFSTATEMENT));
ifst->base = parsecond(p);
if(!strcmp(p->current->token, "else")) {
next(p);
checkcontent(p, "{");
ifst->elsestatements = parsestatements(p);
checkcontent(p, "}");
}
else
ifst->elsestatements = NULL;
return ifst;
}
LETSTATEMENT* parselet(PARSER* p) {
LETSTATEMENT* letstatement = (LETSTATEMENT*)malloc(sizeof(LETSTATEMENT));
letstatement->varname = parseidentifier(p);
if(!strcmp(p->current->token, "[")) {
next(p);
letstatement->arrayind = parseexpression(p);
checkcontent(p, "]");
}
else
letstatement->arrayind = NULL;
checkcontent(p, "=");
letstatement->expression = parseexpression(p);
checkcontent(p, ";");
return letstatement;
}
STATEMENT* parsestatement(PARSER* p) {
STATEMENT* st = (STATEMENT*)malloc(sizeof(STATEMENT));
if(!strcmp(p->current->token, "let")) {
next(p);
st->type = letstatement;
st->letst = parselet(p);
} else if(!strcmp(p->current->token, "if")) {
next(p);
st->type = ifstatement;
st->ifst = parseif(p);
} else if(!strcmp(p->current->token, "while")) {
next(p);
st->type = whilestatement;
st->whilest = parsecond(p);
} else if(!strcmp(p->current->token, "do")) {
next(p);
st->type = dostatement;
st->dost = parsesubroutcall(p);
checkcontent(p, ";");
} else if(!strcmp(p->current->token, "return")) {
next(p);
st->type = returnstatement;
if(strcmp(p->current->token, ";")) {
st->retst = parseexpressionnullified(p);
checkcontent(p, ";");
}
else {
st->retst = NULL;
next(p);
}
} else {
free(st);
return NULL;
}
return st;
}
STATEMENT* parsestatements(PARSER* p) {
STATEMENT* head = parsestatement(p);
STATEMENT* current = head;
STATEMENT* next;
while(next = parsestatement(p), next != NULL) {
current->next = next;
current = next;
}
if(current != NULL)
current->next = NULL;
return head;
}
2020-12-20 13:58:10 -05:00
char* parsetype(PARSER* p, bool* primitive) {
2020-12-14 14:12:20 -05:00
char* result = p->current->token;
if(p->current->type == keyword)
for(int i = 0; i < vardectypessize; i++) {
if(!strcmp(p->current->token, vardectypes[i])) {
next(p);
2020-12-20 13:58:10 -05:00
*primitive = true;
2020-12-14 14:12:20 -05:00
return result;
}
}
else if (p->current->type == identifier) {
next(p);
2020-12-20 13:58:10 -05:00
*primitive = false;
2020-12-14 14:12:20 -05:00
return result;
}
else
unexpected(p);
}
int parsepossibilities(PARSER* p, const char** strings, int sz) {
for(int i = 0; i < sz; i++)
if(!strcmp(p->current->token, strings[i]))
return i;
return -1;
}
VARCLASS parsevarclass(PARSER* p) {
return parsepossibilities(p, varclasses, varclassessize);
}
SUBROUTCLASS parsesubroutclass(PARSER* p) {
return parsepossibilities(p, subroutclasses, subroutclassessize);
}
char* parseidentifier(PARSER* p) {
checktype(p, identifier);
char* result = p->current->token;
next(p);
return result;
}
void parsevardeccommon(PARSER* p, VARDEC* v) {
v->typeclass = p->current->type;
2020-12-20 13:58:10 -05:00
v->type = parsetype(p, &(v->primitive));
2020-12-14 14:12:20 -05:00
STRINGLIST* currstr = (STRINGLIST*)malloc(sizeof(STRINGLIST));
v->names = currstr;
2020-12-21 13:05:49 -05:00
v->debug = getdebug(p);
2020-12-14 14:12:20 -05:00
v->names->content = parseidentifier(p);
while(!strcmp(p->current->token, ",")) {
next(p);
STRINGLIST* nextstr = (STRINGLIST*)malloc(sizeof(STRINGLIST));
nextstr->content = parseidentifier(p);
currstr->next = nextstr;
currstr = nextstr;
}
currstr->next = NULL;
checkcontent(p, ";");
}
CLASSVARDEC* parseclassvardec(PARSER* p) {
VARCLASS varclass = parsevarclass(p);
if(varclass == -1)
return NULL;
next(p);
CLASSVARDEC* classvardec = (CLASSVARDEC*)malloc(sizeof(CLASSVARDEC));
classvardec->varclass = varclass;
classvardec->base = (VARDEC*)malloc(sizeof(VARDEC));
parsevardeccommon(p, classvardec->base);
return classvardec;
}
CLASSVARDEC* parseclassvardecs(PARSER* p) {
CLASSVARDEC* head = parseclassvardec(p);
CLASSVARDEC* current = head;
CLASSVARDEC* next;
while(next = parseclassvardec(p), next != NULL) {
current->next = next;
current= next;
}
if(current != NULL)
current->next = NULL;
return head;
}
VARDEC* parsevardec(PARSER* p) {
if(strcmp(p->current->token, "var"))
return NULL;
next(p);
VARDEC* vardec = (VARDEC*)malloc(sizeof(VARDEC));
parsevardeccommon(p, vardec);
return vardec;
}
VARDEC* parsevardecs(PARSER* p) {
VARDEC* head = parsevardec(p);
VARDEC* current = head;
VARDEC* next;
while(next = parsevardec(p), next != NULL) {
current->next = next;
current = next;
}
if(current != NULL)
current->next = NULL;
return head;
}
PARAMETER* parseparameter(PARSER* p) {
PARAMETER* param = (PARAMETER*)malloc(sizeof(PARAMETER));
if(!strcmp(p->current->token, ")"))
return NULL;
2020-12-20 13:58:10 -05:00
bool dummy;
param->type = parsetype(p, &dummy);
2020-12-14 14:12:20 -05:00
param->name = parseidentifier(p);
return param;
}
PARAMETER* parseparameters(PARSER* p) {
PARAMETER* head = parseparameter(p);
PARAMETER* current = head;
while(!strcmp(p->current->token, ",")) {
2020-12-21 14:49:37 -05:00
next(p);
2020-12-14 14:12:20 -05:00
current->next = parseparameter(p);
current = current->next;
}
if(current != NULL)
current->next = NULL;
return head;
}
SUBROUTBODY* parsesubroutbody(PARSER* p) {
SUBROUTBODY* subroutbody = (SUBROUTBODY*)malloc(sizeof(SUBROUTBODY));
subroutbody->vardecs = parsevardecs(p);
subroutbody->statements = parsestatements(p);
return subroutbody;
}
SUBDEC* parsesubroutdec(PARSER* p) {
SUBROUTCLASS subroutclass = parsesubroutclass(p);
if(subroutclass == -1)
return NULL;
next(p);
SUBDEC* subdec = (SUBDEC*)malloc(sizeof(SUBDEC));
subdec->subroutclass = subroutclass;
subdec->typeclass = p->current->type;
if(!strcmp(p->current->token, "void")) {
subdec->type = p->current->token;
next(p);
}
2020-12-20 13:58:10 -05:00
else {
bool dummy;
subdec->type = parsetype(p, &dummy);
}
2020-12-21 13:05:49 -05:00
subdec->debug = getdebug(p);
2020-12-14 14:12:20 -05:00
subdec->name = parseidentifier(p);
checkcontent(p, "(");
subdec->parameters = parseparameters(p);
checkcontent(p, ")");
checkcontent(p, "{");
subdec->body = parsesubroutbody(p);
checkcontent(p, "}");
return subdec;
}
SUBDEC* parsesubroutdecs(PARSER* p) {
SUBDEC* head = parsesubroutdec(p);
2020-12-21 14:49:37 -05:00
SUBDEC* current = head;
2020-12-14 14:12:20 -05:00
SUBDEC* next;
while(next = parsesubroutdec(p), next != NULL) {
current->next = next;
current = next;
}
if(current != NULL)
current->next = NULL;
return head;
}
CLASS* parseclass(PARSER* p) {
checkcontent(p, "class");
CLASS* class = (CLASS*)malloc(sizeof(CLASS));
2020-12-21 13:05:49 -05:00
class->debug = getdebug(p);
2020-12-20 13:58:10 -05:00
2020-12-14 14:12:20 -05:00
class->name = parseidentifier(p);
checkcontent(p, "{");
class->vardecs = parseclassvardecs(p);
class->subdecs = parsesubroutdecs(p);
checkcontent(p, "}");
2020-12-20 13:58:10 -05:00
class->next = NULL;
2020-12-14 14:12:20 -05:00
return class;
}
PARSER* mkparser(TOKEN* tokens, char* file) {
PARSER* parser = (PARSER*)malloc(sizeof(PARSER));
parser->tokens = tokens;
parser->current = tokens;
parser->file = file;
return parser;
}
void parse(PARSER* parser) {
parser->output = parseclass(parser);
}