Add parser
This commit is contained in:
parent
eb7778c267
commit
5b19ab0914
2
Makefile
2
Makefile
|
@ -1,4 +1,4 @@
|
|||
FILES = tokenizer.c main.c parser.c
|
||||
FILES = tokenizer.c main.c parser.c printer.c
|
||||
INCLUDES = -I.
|
||||
CFLAGS = -std=c99 -g
|
||||
OUTFILE = compiler
|
||||
|
|
24
main.c
24
main.c
|
@ -3,19 +3,8 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "tokenizer.h"
|
||||
|
||||
const char* types[] = {
|
||||
"keyword", "symbol", "integerConstant", "stringConstant", "identifier"
|
||||
};
|
||||
|
||||
void printtks(TOKENLIST* tks, FILE* output) {
|
||||
fprintf(output, "<%s> %s </%s>\r\n", types[tks->type], tks->token, types[tks->type]);
|
||||
TOKENLIST* next = tks->next;
|
||||
free(tks->token);
|
||||
free(tks);
|
||||
if(next != NULL)
|
||||
printtks(next, output);
|
||||
}
|
||||
#include "printer.h"
|
||||
#include "parser.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if(argc < 2) {
|
||||
|
@ -30,11 +19,10 @@ int main(int argc, char* argv[]) {
|
|||
return errno;
|
||||
}
|
||||
|
||||
FILE* output = fopen("out.xml", "w");
|
||||
fprintf(output, "<tokens>\r\n");
|
||||
printtks(tokenize(input), output);
|
||||
fprintf(output, "</tokens>\r\n");
|
||||
fclose(output);
|
||||
PARSER* p = mkparser(tokenize(input), argv[1]);
|
||||
parse(p);
|
||||
|
||||
printparser(stdout, p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
542
parser.c
542
parser.c
|
@ -0,0 +1,542 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "parser.h"
|
||||
|
||||
char* parseidentifier(PARSER* p);
|
||||
STATEMENT* parsestatements(PARSER* p);
|
||||
SUBROUTCALL* parsesubroutcall(PARSER* p);
|
||||
TERM* parseexpression(PARSER* p);
|
||||
TERM* parseterm(PARSER* p);
|
||||
|
||||
const char* keywordconstants[] = {
|
||||
"true", "false", "null", "this"
|
||||
};
|
||||
const int keywordconstantssize = sizeof(keywordconstants) / sizeof(char*);
|
||||
|
||||
const char* ops[] = {
|
||||
"+", "-", "*", "/", "&", "|", "<", ">", "="
|
||||
};
|
||||
const int opssize = sizeof(ops) / sizeof(char*);
|
||||
|
||||
const char* varclasses[] = {
|
||||
"static", "field"
|
||||
};
|
||||
const int varclassessize = sizeof(varclasses) / sizeof(char*);
|
||||
|
||||
const char* vardectypes[] = {
|
||||
"int", "char", "boolean"
|
||||
};
|
||||
const int vardectypessize = sizeof(vardectypes) / sizeof(char*);
|
||||
|
||||
const char* subroutclasses[] = {
|
||||
"constructor", "function", "method"
|
||||
};
|
||||
const int subroutclassessize = sizeof(subroutclasses) / sizeof(char*);
|
||||
|
||||
const char* tokentypes[] = {
|
||||
"keyword", "identifier", "symbol", "integerConstant", "stringConstant"
|
||||
};
|
||||
|
||||
void next(PARSER* p) {
|
||||
p->current = p->current->next;
|
||||
}
|
||||
|
||||
void checkpoint(PARSER* p) {
|
||||
p->checkpoint = p->current;
|
||||
}
|
||||
|
||||
void restorecp(PARSER* p) {
|
||||
p->current = p->checkpoint;
|
||||
}
|
||||
|
||||
void unexpectedtoken(PARSER* p) {
|
||||
fprintf(stderr, "Unexpected token '%s' (of type %s); line %i, file '%s'\n", p->current->token, tokentypes[p->current->type], p->current->truen, p->file);
|
||||
}
|
||||
|
||||
void unexpected(PARSER* p) {
|
||||
unexpectedtoken(p);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void checkcontent(PARSER* p, const char* content) {
|
||||
if(strcmp(p->current->token, content))
|
||||
unexpected(p);
|
||||
next(p);
|
||||
}
|
||||
|
||||
void checktype(PARSER* p, TOKENTYPE type) {
|
||||
if(p->current->type != type) {
|
||||
fprintf(stderr, "Unexpected %s; line %i, file '%s'\n", tokentypes[p->current->type], p->current->truen, p->file);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
TERM* parsetermnullified(PARSER* p) {
|
||||
TERM* t = (TERM*)malloc(sizeof(TERM));
|
||||
|
||||
if(p->current->type == integer) {
|
||||
t->type = intconstant;
|
||||
t->integer = atoi(p->current->token);
|
||||
next(p);
|
||||
} else if(p->current->type == string) {
|
||||
t->type = stringconstant;
|
||||
t->string = p->current->token;
|
||||
next(p);
|
||||
} else if(p->current->type == keyword) {
|
||||
t->type = keywordconstant;
|
||||
bool valid = false;
|
||||
for(int i = 0; i < keywordconstantssize; i++)
|
||||
if(!strcmp(p->current->token, keywordconstants[i]))
|
||||
valid = true;
|
||||
if(!valid)
|
||||
unexpected(p);
|
||||
|
||||
t->string = p->current->token;
|
||||
next(p);
|
||||
} else if(!strcmp(p->current->token, "-") || !strcmp(p->current->token, "~")) {
|
||||
t->type = unaryopterm;
|
||||
t->unaryop = p->current->token[0];
|
||||
next(p);
|
||||
t->expression = parseterm(p);
|
||||
} else if(!strcmp(p->current->token, "(")) {
|
||||
next(p);
|
||||
t->type = innerexpression;
|
||||
t->expression = parseexpression(p);
|
||||
checkcontent(p, ")");
|
||||
} else if(p->current->type == identifier) {
|
||||
SUBROUTCALL* call = parsesubroutcall(p);
|
||||
if(call == NULL) {
|
||||
t->string = p->current->token;
|
||||
next(p);
|
||||
if(!strcmp(p->current->token, "[")) {
|
||||
next(p);
|
||||
t->arrayexp = parseexpression(p);
|
||||
t->type = arrayitem;
|
||||
checkcontent(p, "]");
|
||||
} else {
|
||||
t->type = varname;
|
||||
}
|
||||
} else {
|
||||
t->type = subroutcall;
|
||||
t->call = call;
|
||||
}
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
bool isop(TOKEN* t) {
|
||||
for(int i = 0; i < opssize; i++)
|
||||
if(!strcmp(t->token, ops[i]))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
TERM* parseexpressionnullified(PARSER* p) {
|
||||
TERM* head = parseterm(p);
|
||||
TERM* current = head;
|
||||
TERM* nextt;
|
||||
while(isop(p->current)) {
|
||||
current->op = p->current->token[0];
|
||||
next(p);
|
||||
nextt = parsetermnullified(p);
|
||||
current->next = nextt;
|
||||
current = nextt;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
TERM* parseterm(PARSER* p) {
|
||||
TERM* t = parsetermnullified(p);
|
||||
if(t == NULL)
|
||||
unexpected(p);
|
||||
return t;
|
||||
}
|
||||
|
||||
TERM* parseexpression(PARSER* p) {
|
||||
TERM* t = parseexpressionnullified(p);
|
||||
if(t == NULL)
|
||||
unexpected(p);
|
||||
return t;
|
||||
}
|
||||
|
||||
EXPRESSIONLIST* parseexpressionlist(PARSER* p) {
|
||||
if(!strcmp(p->current->token, ")"))
|
||||
return NULL;
|
||||
EXPRESSIONLIST* head = (EXPRESSIONLIST*)malloc(sizeof(EXPRESSIONLIST));
|
||||
head->expression = parseexpressionnullified(p);
|
||||
EXPRESSIONLIST* current = head;
|
||||
EXPRESSIONLIST* nextls;
|
||||
while(!strcmp(p->current->token, ",")) {
|
||||
next(p);
|
||||
nextls = (EXPRESSIONLIST*)malloc(sizeof(EXPRESSIONLIST));
|
||||
nextls->expression = parseexpression(p);
|
||||
current->next = nextls;
|
||||
current = nextls;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
SUBROUTCALL* parsesubroutcall(PARSER* p) {
|
||||
checkpoint(p);
|
||||
SUBROUTCALL* call = (SUBROUTCALL*)malloc(sizeof(SUBROUTCALL));
|
||||
if(!strcmp(p->current->next->token, ".")) {
|
||||
if(p->current->type != identifier) {
|
||||
free(call);
|
||||
return NULL;
|
||||
}
|
||||
call->parentname = p->current->token;
|
||||
next(p);
|
||||
next(p);
|
||||
}
|
||||
else
|
||||
call->parentname = NULL;
|
||||
|
||||
if(p->current->type != identifier) {
|
||||
free(call);
|
||||
restorecp(p);
|
||||
return NULL;
|
||||
}
|
||||
call->name = p->current->token;
|
||||
next(p);
|
||||
|
||||
if(strcmp(p->current->token, "(")) {
|
||||
free(call);
|
||||
restorecp(p);
|
||||
return NULL;
|
||||
}
|
||||
next(p);
|
||||
|
||||
call->parameters = parseexpressionlist(p);
|
||||
|
||||
if(strcmp(p->current->token, ")")) {
|
||||
free(call);
|
||||
restorecp(p);
|
||||
return NULL;
|
||||
}
|
||||
next(p);
|
||||
return call;
|
||||
}
|
||||
|
||||
CONDSTATEMENT* parsecond(PARSER* p) {
|
||||
checkcontent(p, "(");
|
||||
CONDSTATEMENT* st = (CONDSTATEMENT*)malloc(sizeof(CONDSTATEMENT));
|
||||
st->expression = parseexpression(p);
|
||||
checkcontent(p, ")");
|
||||
checkcontent(p, "{");
|
||||
st->statements = parsestatements(p);
|
||||
checkcontent(p, "}");
|
||||
|
||||
return st;
|
||||
}
|
||||
|
||||
IFSTATEMENT* parseif(PARSER* p) {
|
||||
IFSTATEMENT* ifst = (IFSTATEMENT*)malloc(sizeof(IFSTATEMENT));
|
||||
|
||||
ifst->base = parsecond(p);
|
||||
|
||||
if(!strcmp(p->current->token, "else")) {
|
||||
next(p);
|
||||
checkcontent(p, "{");
|
||||
ifst->elsestatements = parsestatements(p);
|
||||
checkcontent(p, "}");
|
||||
}
|
||||
else
|
||||
ifst->elsestatements = NULL;
|
||||
|
||||
return ifst;
|
||||
}
|
||||
|
||||
LETSTATEMENT* parselet(PARSER* p) {
|
||||
LETSTATEMENT* letstatement = (LETSTATEMENT*)malloc(sizeof(LETSTATEMENT));
|
||||
|
||||
letstatement->varname = parseidentifier(p);
|
||||
|
||||
if(!strcmp(p->current->token, "[")) {
|
||||
next(p);
|
||||
letstatement->arrayind = parseexpression(p);
|
||||
checkcontent(p, "]");
|
||||
}
|
||||
else
|
||||
letstatement->arrayind = NULL;
|
||||
|
||||
checkcontent(p, "=");
|
||||
|
||||
letstatement->expression = parseexpression(p);
|
||||
|
||||
checkcontent(p, ";");
|
||||
|
||||
return letstatement;
|
||||
}
|
||||
|
||||
STATEMENT* parsestatement(PARSER* p) {
|
||||
STATEMENT* st = (STATEMENT*)malloc(sizeof(STATEMENT));
|
||||
if(!strcmp(p->current->token, "let")) {
|
||||
next(p);
|
||||
st->type = letstatement;
|
||||
st->letst = parselet(p);
|
||||
} else if(!strcmp(p->current->token, "if")) {
|
||||
next(p);
|
||||
st->type = ifstatement;
|
||||
st->ifst = parseif(p);
|
||||
} else if(!strcmp(p->current->token, "while")) {
|
||||
next(p);
|
||||
st->type = whilestatement;
|
||||
st->whilest = parsecond(p);
|
||||
} else if(!strcmp(p->current->token, "do")) {
|
||||
next(p);
|
||||
st->type = dostatement;
|
||||
st->dost = parsesubroutcall(p);
|
||||
checkcontent(p, ";");
|
||||
} else if(!strcmp(p->current->token, "return")) {
|
||||
next(p);
|
||||
st->type = returnstatement;
|
||||
if(strcmp(p->current->token, ";")) {
|
||||
st->retst = parseexpressionnullified(p);
|
||||
checkcontent(p, ";");
|
||||
}
|
||||
else {
|
||||
st->retst = NULL;
|
||||
next(p);
|
||||
}
|
||||
} else {
|
||||
free(st);
|
||||
return NULL;
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
STATEMENT* parsestatements(PARSER* p) {
|
||||
STATEMENT* head = parsestatement(p);
|
||||
STATEMENT* current = head;
|
||||
STATEMENT* next;
|
||||
while(next = parsestatement(p), next != NULL) {
|
||||
current->next = next;
|
||||
current = next;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
char* parsetype(PARSER* p) {
|
||||
char* result = p->current->token;
|
||||
if(p->current->type == keyword)
|
||||
for(int i = 0; i < vardectypessize; i++) {
|
||||
if(!strcmp(p->current->token, vardectypes[i])) {
|
||||
next(p);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
else if (p->current->type == identifier) {
|
||||
next(p);
|
||||
return result;
|
||||
}
|
||||
else
|
||||
unexpected(p);
|
||||
}
|
||||
|
||||
int parsepossibilities(PARSER* p, const char** strings, int sz) {
|
||||
for(int i = 0; i < sz; i++)
|
||||
if(!strcmp(p->current->token, strings[i]))
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
VARCLASS parsevarclass(PARSER* p) {
|
||||
return parsepossibilities(p, varclasses, varclassessize);
|
||||
}
|
||||
|
||||
SUBROUTCLASS parsesubroutclass(PARSER* p) {
|
||||
return parsepossibilities(p, subroutclasses, subroutclassessize);
|
||||
}
|
||||
|
||||
char* parseidentifier(PARSER* p) {
|
||||
checktype(p, identifier);
|
||||
char* result = p->current->token;
|
||||
next(p);
|
||||
return result;
|
||||
}
|
||||
|
||||
void parsevardeccommon(PARSER* p, VARDEC* v) {
|
||||
v->typeclass = p->current->type;
|
||||
v->type = parsetype(p);
|
||||
|
||||
STRINGLIST* currstr = (STRINGLIST*)malloc(sizeof(STRINGLIST));
|
||||
v->names = currstr;
|
||||
|
||||
v->names->content = parseidentifier(p);
|
||||
|
||||
while(!strcmp(p->current->token, ",")) {
|
||||
next(p);
|
||||
STRINGLIST* nextstr = (STRINGLIST*)malloc(sizeof(STRINGLIST));
|
||||
nextstr->content = parseidentifier(p);
|
||||
currstr->next = nextstr;
|
||||
currstr = nextstr;
|
||||
}
|
||||
currstr->next = NULL;
|
||||
|
||||
checkcontent(p, ";");
|
||||
}
|
||||
|
||||
CLASSVARDEC* parseclassvardec(PARSER* p) {
|
||||
VARCLASS varclass = parsevarclass(p);
|
||||
if(varclass == -1)
|
||||
return NULL;
|
||||
next(p);
|
||||
|
||||
CLASSVARDEC* classvardec = (CLASSVARDEC*)malloc(sizeof(CLASSVARDEC));
|
||||
classvardec->varclass = varclass;
|
||||
|
||||
classvardec->base = (VARDEC*)malloc(sizeof(VARDEC));
|
||||
|
||||
parsevardeccommon(p, classvardec->base);
|
||||
|
||||
return classvardec;
|
||||
}
|
||||
|
||||
CLASSVARDEC* parseclassvardecs(PARSER* p) {
|
||||
CLASSVARDEC* head = parseclassvardec(p);
|
||||
CLASSVARDEC* current = head;
|
||||
CLASSVARDEC* next;
|
||||
while(next = parseclassvardec(p), next != NULL) {
|
||||
current->next = next;
|
||||
current= next;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
VARDEC* parsevardec(PARSER* p) {
|
||||
if(strcmp(p->current->token, "var"))
|
||||
return NULL;
|
||||
next(p);
|
||||
|
||||
VARDEC* vardec = (VARDEC*)malloc(sizeof(VARDEC));
|
||||
|
||||
parsevardeccommon(p, vardec);
|
||||
|
||||
return vardec;
|
||||
}
|
||||
|
||||
VARDEC* parsevardecs(PARSER* p) {
|
||||
VARDEC* head = parsevardec(p);
|
||||
VARDEC* current = head;
|
||||
VARDEC* next;
|
||||
while(next = parsevardec(p), next != NULL) {
|
||||
current->next = next;
|
||||
current = next;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
PARAMETER* parseparameter(PARSER* p) {
|
||||
PARAMETER* param = (PARAMETER*)malloc(sizeof(PARAMETER));
|
||||
if(!strcmp(p->current->token, ")"))
|
||||
return NULL;
|
||||
param->type = parsetype(p);
|
||||
param->name = parseidentifier(p);
|
||||
return param;
|
||||
}
|
||||
|
||||
PARAMETER* parseparameters(PARSER* p) {
|
||||
PARAMETER* head = parseparameter(p);
|
||||
PARAMETER* current = head;
|
||||
while(!strcmp(p->current->token, ",")) {
|
||||
current->next = parseparameter(p);
|
||||
current = current->next;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
SUBROUTBODY* parsesubroutbody(PARSER* p) {
|
||||
SUBROUTBODY* subroutbody = (SUBROUTBODY*)malloc(sizeof(SUBROUTBODY));
|
||||
subroutbody->vardecs = parsevardecs(p);
|
||||
subroutbody->statements = parsestatements(p);
|
||||
|
||||
return subroutbody;
|
||||
}
|
||||
|
||||
SUBDEC* parsesubroutdec(PARSER* p) {
|
||||
SUBROUTCLASS subroutclass = parsesubroutclass(p);
|
||||
if(subroutclass == -1)
|
||||
return NULL;
|
||||
|
||||
next(p);
|
||||
SUBDEC* subdec = (SUBDEC*)malloc(sizeof(SUBDEC));
|
||||
subdec->subroutclass = subroutclass;
|
||||
|
||||
subdec->typeclass = p->current->type;
|
||||
if(!strcmp(p->current->token, "void")) {
|
||||
subdec->type = p->current->token;
|
||||
next(p);
|
||||
}
|
||||
else
|
||||
subdec->type = parsetype(p);
|
||||
|
||||
subdec->name = parseidentifier(p);
|
||||
|
||||
checkcontent(p, "(");
|
||||
subdec->parameters = parseparameters(p);
|
||||
checkcontent(p, ")");
|
||||
|
||||
checkcontent(p, "{");
|
||||
subdec->body = parsesubroutbody(p);
|
||||
checkcontent(p, "}");
|
||||
|
||||
return subdec;
|
||||
}
|
||||
|
||||
SUBDEC* parsesubroutdecs(PARSER* p) {
|
||||
SUBDEC* head = parsesubroutdec(p);
|
||||
SUBDEC* current= head;
|
||||
SUBDEC* next;
|
||||
while(next = parsesubroutdec(p), next != NULL) {
|
||||
current->next = next;
|
||||
current = next;
|
||||
}
|
||||
if(current != NULL)
|
||||
current->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
CLASS* parseclass(PARSER* p) {
|
||||
checkcontent(p, "class");
|
||||
|
||||
CLASS* class = (CLASS*)malloc(sizeof(CLASS));
|
||||
|
||||
class->name = parseidentifier(p);
|
||||
|
||||
checkcontent(p, "{");
|
||||
|
||||
class->vardecs = parseclassvardecs(p);
|
||||
|
||||
class->subdecs = parsesubroutdecs(p);
|
||||
|
||||
checkcontent(p, "}");
|
||||
return class;
|
||||
}
|
||||
|
||||
PARSER* mkparser(TOKEN* tokens, char* file) {
|
||||
PARSER* parser = (PARSER*)malloc(sizeof(PARSER));
|
||||
parser->tokens = tokens;
|
||||
parser->current = tokens;
|
||||
parser->file = file;
|
||||
return parser;
|
||||
}
|
||||
|
||||
void parse(PARSER* parser) {
|
||||
parser->output = parseclass(parser);
|
||||
}
|
132
parser.h
132
parser.h
|
@ -0,0 +1,132 @@
|
|||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
#include "tokenizer.h"
|
||||
|
||||
struct statement;
|
||||
struct explist;
|
||||
|
||||
typedef enum {
|
||||
ifstatement, whilestatement, letstatement, dostatement, returnstatement
|
||||
} STATEMENTTYPE;
|
||||
|
||||
typedef enum {
|
||||
varname, intconstant, stringconstant, keywordconstant, arrayitem, subroutcall, innerexpression, unaryopterm
|
||||
} TERMTYPE;
|
||||
|
||||
typedef struct {
|
||||
char* parentname;
|
||||
char* name;
|
||||
struct explist* parameters;
|
||||
} SUBROUTCALL;
|
||||
|
||||
typedef struct term {
|
||||
TERMTYPE type;
|
||||
union {
|
||||
char* string;
|
||||
int integer;
|
||||
SUBROUTCALL* call;
|
||||
struct term* expression;
|
||||
};
|
||||
char unaryop;
|
||||
struct term* arrayexp;
|
||||
char op;
|
||||
struct term* next;
|
||||
} TERM;
|
||||
|
||||
typedef struct explist {
|
||||
TERM* expression;
|
||||
struct explist* next;
|
||||
} EXPRESSIONLIST;
|
||||
|
||||
typedef struct {
|
||||
TERM* expression;
|
||||
struct statement* statements;
|
||||
} CONDSTATEMENT;
|
||||
|
||||
typedef struct {
|
||||
CONDSTATEMENT* base;
|
||||
struct statement* elsestatements;
|
||||
} IFSTATEMENT;
|
||||
|
||||
typedef struct {
|
||||
char* varname;
|
||||
TERM* arrayind;
|
||||
TERM* expression;
|
||||
} LETSTATEMENT;
|
||||
|
||||
typedef struct statement {
|
||||
STATEMENTTYPE type;
|
||||
union {
|
||||
CONDSTATEMENT* whilest;
|
||||
IFSTATEMENT* ifst;
|
||||
LETSTATEMENT* letst;
|
||||
SUBROUTCALL* dost;
|
||||
TERM* retst;
|
||||
};
|
||||
struct statement* next;
|
||||
} STATEMENT;
|
||||
|
||||
typedef enum {
|
||||
stat, field
|
||||
} VARCLASS;
|
||||
|
||||
typedef struct stringlist {
|
||||
char* content;
|
||||
struct stringlist* next;
|
||||
} STRINGLIST;
|
||||
|
||||
typedef struct vardec {
|
||||
char* type;
|
||||
TOKENTYPE typeclass;
|
||||
STRINGLIST* names;
|
||||
struct vardec* next;
|
||||
} VARDEC;
|
||||
|
||||
typedef struct classvardec {
|
||||
VARCLASS varclass;
|
||||
VARDEC* base;
|
||||
struct classvardec* next;
|
||||
} CLASSVARDEC;
|
||||
|
||||
typedef enum {
|
||||
constructor, function, method
|
||||
} SUBROUTCLASS;
|
||||
|
||||
typedef struct parameter {
|
||||
char* type;
|
||||
char* name;
|
||||
struct parameter* next;
|
||||
} PARAMETER;
|
||||
|
||||
typedef struct SUBROUTBODY {
|
||||
VARDEC* vardecs;
|
||||
STATEMENT* statements;
|
||||
} SUBROUTBODY;
|
||||
|
||||
typedef struct subdec {
|
||||
SUBROUTCLASS subroutclass;
|
||||
char* type;
|
||||
TOKENTYPE typeclass;
|
||||
char* name;
|
||||
PARAMETER* parameters;
|
||||
SUBROUTBODY* body;
|
||||
struct subdec* next;
|
||||
} SUBDEC;
|
||||
|
||||
typedef struct {
|
||||
char* name;
|
||||
CLASSVARDEC* vardecs;
|
||||
SUBDEC* subdecs;
|
||||
} CLASS;
|
||||
|
||||
typedef struct {
|
||||
TOKEN* tokens;
|
||||
TOKEN* current;
|
||||
TOKEN* checkpoint;
|
||||
char* file;
|
||||
CLASS* output;
|
||||
} PARSER;
|
||||
|
||||
PARSER* mkparser(TOKEN* tokens, char* file);
|
||||
void parse(PARSER* parser);
|
||||
#endif
|
|
@ -0,0 +1,443 @@
|
|||
#include "printer.h"
|
||||
void printexpression(TERM* e, FILE* output, int depth);
|
||||
void printstatements(STATEMENT* st, FILE* output, int depth);
|
||||
|
||||
const char* tmpvarclasses[] = {
|
||||
"static", "field"
|
||||
};
|
||||
const int tmpvarclassessize = sizeof(tmpvarclasses) / sizeof(char*);
|
||||
|
||||
const char* tmpsubroutclasses[] = {
|
||||
"constructor", "function", "method"
|
||||
};
|
||||
const int tmpsubroutclassessize = sizeof(tmpsubroutclasses) / sizeof(char*);
|
||||
|
||||
const char* tmptokentypes[] = {
|
||||
"keyword", "identifier"
|
||||
};
|
||||
|
||||
void printident(FILE* output, int depth) {
|
||||
for(int i = 0; i < depth; i++)
|
||||
fprintf(output, " ");
|
||||
}
|
||||
|
||||
void printstringlist(FILE* output, int depth, STRINGLIST* ls) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", ls->content);
|
||||
if(ls->next != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> , </symbol>\r\n");
|
||||
printstringlist(output, depth, ls->next);
|
||||
}
|
||||
}
|
||||
|
||||
void printvardec(VARDEC* vd, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<%s> %s </%s>\r\n", tmptokentypes[vd->typeclass], vd->type, tmptokentypes[vd->typeclass]);
|
||||
|
||||
printstringlist(output, depth, vd->names);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ; </symbol>\r\n");
|
||||
}
|
||||
|
||||
void printvardecs(VARDEC* vd, FILE* output, int depth) {
|
||||
VARDEC* current = vd;
|
||||
while(current != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<varDec>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> var </keyword>\r\n");
|
||||
printvardec(current, output, depth+1);
|
||||
current = current->next;
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</varDec>\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
void printclassvardec(CLASSVARDEC* vd, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<keyword> %s </keyword>\r\n", tmpvarclasses[vd->varclass]);
|
||||
|
||||
printvardec(vd->base, output, depth);
|
||||
}
|
||||
|
||||
void printclassvardecs(CLASSVARDEC* vd, FILE* output, int depth) {
|
||||
CLASSVARDEC* current = vd;
|
||||
while(current != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<classVarDec>\r\n");
|
||||
|
||||
printclassvardec(current, output, depth+1);
|
||||
current = current->next;
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</classVarDec>\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
void printparameter(PARAMETER* p, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<keyword> %s </keyword>\r\n", p->type);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", p->name);
|
||||
|
||||
if(p->next != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> , </symbol>\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
void printparameters(PARAMETER* p, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<parameterList>\r\n");
|
||||
|
||||
PARAMETER* current = p;
|
||||
while(current != NULL) {
|
||||
printparameter(current, output, depth+1);
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</parameterList>\r\n");
|
||||
}
|
||||
|
||||
void printexpressionlist(EXPRESSIONLIST* list, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<expressionList>\r\n");
|
||||
|
||||
if(list != NULL) {
|
||||
EXPRESSIONLIST* current = list;
|
||||
while(current != NULL) {
|
||||
printexpression(current->expression, output, depth+1);
|
||||
current = current->next;
|
||||
if(current != NULL) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> , </symbol>\r\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</expressionList>\r\n");
|
||||
}
|
||||
|
||||
void printsubroutcall(SUBROUTCALL* c, FILE* output, int depth) {
|
||||
if(c->parentname != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", c->parentname);
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> . </symbol>\r\n");
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", c->name);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ( </symbol>\r\n");
|
||||
|
||||
printexpressionlist(c->parameters, output, depth);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ) </symbol>\r\n");
|
||||
}
|
||||
|
||||
void printterm(TERM* e, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<term>\r\n");
|
||||
|
||||
if(e->type == varname) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", e->string);
|
||||
} else if(e->type == subroutcall) {
|
||||
printsubroutcall(e->call, output, depth+1);
|
||||
} else if(e->type == stringconstant) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<stringConstant> %s </stringConstant>\r\n", e->string);
|
||||
} else if(e->type == keywordconstant) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> %s </keyword>\r\n", e->string);
|
||||
} else if(e->type == intconstant) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<integerConstant> %i </integerConstant>\r\n", e->integer);
|
||||
} else if(e->type == arrayitem) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", e->string);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> [ </symbol>\r\n");
|
||||
|
||||
printexpression(e->arrayexp, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ] </symbol>\r\n");
|
||||
} else if(e->type == innerexpression) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ( </symbol>\r\n");
|
||||
|
||||
printexpression(e->expression, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ) </symbol>\r\n");
|
||||
} else {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> %c </symbol>\r\n", e->unaryop);
|
||||
|
||||
printterm(e->expression, output, depth+1);
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</term>\r\n");
|
||||
|
||||
if(e->next != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> %c </symbol>\r\n", e->op);
|
||||
}
|
||||
}
|
||||
|
||||
void printexpression(TERM* e, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<expression>\r\n");
|
||||
|
||||
TERM* current = e;
|
||||
while(current != NULL) {
|
||||
printterm(current, output, depth+1);
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</expression>\r\n");
|
||||
}
|
||||
|
||||
void printcond(CONDSTATEMENT* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ( </symbol>\r\n");
|
||||
|
||||
printexpression(st->expression, output, depth);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ) </symbol>\r\n");
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> { </symbol>\r\n");
|
||||
|
||||
printstatements(st->statements, output, depth);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> } </symbol>\r\n");
|
||||
}
|
||||
|
||||
void printif(IFSTATEMENT* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<ifStatement>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> if </keyword>\r\n");
|
||||
|
||||
printcond(st->base, output, depth+1);
|
||||
|
||||
if(st->elsestatements != NULL) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> else </keyword>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> { </symbol>\r\n");
|
||||
|
||||
printstatements(st->elsestatements, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> } </symbol>\r\n");
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</ifStatement>\r\n");
|
||||
}
|
||||
|
||||
void printwhile(CONDSTATEMENT* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<whileStatement>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> while </keyword>\r\n");
|
||||
|
||||
printcond(st, output, depth+1);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</whileStatement>\r\n");
|
||||
}
|
||||
|
||||
void printlet(LETSTATEMENT* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<letStatement>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> let </keyword>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", st->varname);
|
||||
|
||||
if(st->arrayind != NULL) {
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> [ </symbol>\r\n");
|
||||
|
||||
printexpression(st->arrayind, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ] </symbol>\r\n");
|
||||
}
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> = </symbol>\r\n");
|
||||
|
||||
printexpression(st->expression, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ; </symbol>\r\n");
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</letStatement>\r\n");
|
||||
}
|
||||
|
||||
void printdo(SUBROUTCALL* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<doStatement>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> do </keyword>\r\n");
|
||||
|
||||
printsubroutcall(st, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ; </symbol>\r\n");
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</doStatement>\r\n");
|
||||
}
|
||||
|
||||
void printreturn(TERM* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<returnStatement>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> return </keyword>\r\n");
|
||||
|
||||
if(st != NULL)
|
||||
printexpression(st, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> ; </symbol>\r\n");
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</returnStatement>\r\n");
|
||||
}
|
||||
|
||||
void printstatement(STATEMENT* st, FILE* output, int depth) {
|
||||
if(st->type == ifstatement)
|
||||
printif(st->ifst, output, depth);
|
||||
else if(st->type == letstatement)
|
||||
printlet(st->letst, output, depth);
|
||||
else if(st->type == whilestatement)
|
||||
printwhile(st->whilest, output, depth);
|
||||
else if(st->type == dostatement)
|
||||
printdo(st->dost, output, depth);
|
||||
else
|
||||
printreturn(st->retst, output, depth);
|
||||
}
|
||||
|
||||
void printstatements(STATEMENT* st, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<statements>\r\n");
|
||||
|
||||
STATEMENT* current = st;
|
||||
while(current != NULL) {
|
||||
printstatement(current, output, depth+1);
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</statements>\r\n");
|
||||
}
|
||||
|
||||
void printsubroutbody(SUBROUTBODY* bd, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<subroutineBody>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> { </symbol>\r\n");
|
||||
|
||||
printvardecs(bd->vardecs, output, depth+1);
|
||||
|
||||
printstatements(bd->statements, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> } </symbol>\r\n");
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</subroutineBody>\r\n");
|
||||
}
|
||||
|
||||
void printsubroutdec(SUBDEC* sd, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<keyword> %s </keyword>\r\n", tmpsubroutclasses[sd->subroutclass]);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<%s> %s </%s>\r\n", tmptokentypes[sd->typeclass], sd->type, tmptokentypes[sd->typeclass]);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", sd->name);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ( </symbol>\r\n");
|
||||
|
||||
printparameters(sd->parameters, output, depth);
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "<symbol> ) </symbol>\r\n");
|
||||
|
||||
printsubroutbody(sd->body, output, depth);
|
||||
}
|
||||
|
||||
void printsubroutdecs(SUBDEC* sd, FILE* output, int depth) {
|
||||
SUBDEC* current = sd;
|
||||
while(current != NULL) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<subroutineDec>\r\n");
|
||||
|
||||
printsubroutdec(current, output, depth+1);
|
||||
current = current->next;
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</subroutineDec>\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
void printclass(CLASS* c, FILE* output, int depth) {
|
||||
printident(output, depth);
|
||||
fprintf(output, "<class>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<keyword> class </keyword>\r\n");
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<identifier> %s </identifier>\r\n", c->name);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> { </symbol>\r\n");
|
||||
|
||||
printclassvardecs(c->vardecs, output, depth+1);
|
||||
|
||||
printsubroutdecs(c->subdecs, output, depth+1);
|
||||
|
||||
printident(output, depth+1);
|
||||
fprintf(output, "<symbol> } </symbol>\r\n");
|
||||
|
||||
printident(output, depth);
|
||||
fprintf(output, "</class>\r\n");
|
||||
}
|
||||
|
||||
void printparser(FILE* output, PARSER* p) {
|
||||
printclass(p->output, output, 0);
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
#ifndef PRINTER_H
|
||||
#define PRINTER_H
|
||||
#include "parser.h"
|
||||
|
||||
void printparser(FILE* output, PARSER* p);
|
||||
|
||||
#endif
|
18
tokenizer.c
18
tokenizer.c
|
@ -15,8 +15,8 @@ typedef struct {
|
|||
int count;
|
||||
} STRING;
|
||||
|
||||
TOKENLIST* mktokenlist() {
|
||||
return (TOKENLIST*)malloc(sizeof(TOKENLIST));
|
||||
TOKEN* mktokenlist() {
|
||||
return (TOKEN*)malloc(sizeof(TOKEN));
|
||||
}
|
||||
|
||||
CHARTYPE getchartype(unsigned char c) {
|
||||
|
@ -97,18 +97,18 @@ TOKENTYPE gettokentype(STRING* tk, int truen) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
TOKENLIST* appendtokenraw(TOKENLIST* curitem, STRING* token, int truen, TOKENTYPE type) {
|
||||
TOKEN* appendtokenraw(TOKEN* curitem, STRING* token, int truen, TOKENTYPE type) {
|
||||
curitem->token = (char*)malloc(sizeof(char)*token->count);
|
||||
strcpy(curitem->token, token->str);
|
||||
curitem->truen = truen;
|
||||
curitem->type = type;
|
||||
TOKENLIST* nextitem = mktokenlist();
|
||||
TOKEN* nextitem = mktokenlist();
|
||||
curitem->next = nextitem;
|
||||
token->count = 0;
|
||||
return nextitem;
|
||||
}
|
||||
|
||||
TOKENLIST* appendtoken(TOKENLIST* curitem, STRING* token, int truen) {
|
||||
TOKEN* appendtoken(TOKEN* curitem, STRING* token, int truen) {
|
||||
append(token, '\0');
|
||||
return appendtokenraw(curitem, token, truen, gettokentype(token, truen));
|
||||
}
|
||||
|
@ -168,10 +168,10 @@ void freestr(STRING* str) {
|
|||
free(str);
|
||||
}
|
||||
|
||||
TOKENLIST* tokenize(FILE* input) {
|
||||
TOKENLIST* head = mktokenlist();
|
||||
TOKENLIST* lastitem = head;
|
||||
TOKENLIST* curitem = head;
|
||||
TOKEN* tokenize(FILE* input) {
|
||||
TOKEN* head = mktokenlist();
|
||||
TOKEN* lastitem = head;
|
||||
TOKEN* curitem = head;
|
||||
|
||||
STRING* tmptoken = mkstring(200);
|
||||
CHARTYPE lasttype = space;
|
||||
|
|
12
tokenizer.h
12
tokenizer.h
|
@ -3,16 +3,16 @@
|
|||
#include <stdio.h>
|
||||
|
||||
typedef enum {
|
||||
keyword, symbol, integer, string, identifier
|
||||
keyword, identifier, symbol, integer, string
|
||||
} TOKENTYPE;
|
||||
|
||||
typedef struct tklist {
|
||||
typedef struct token {
|
||||
char* token;
|
||||
TOKENTYPE type;
|
||||
int truen;
|
||||
struct tklist* next;
|
||||
} TOKENLIST;
|
||||
struct token* next;
|
||||
} TOKEN;
|
||||
|
||||
TOKENLIST* tokenize(FILE* input);
|
||||
void freetokenlist(TOKENLIST l);
|
||||
TOKEN* tokenize(FILE* input);
|
||||
void freetokenlist(TOKEN l);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue