From f647a754a715d2f067786ae7779c5a293b74f974 Mon Sep 17 00:00:00 2001 From: Augusto Gunsch Date: Sun, 20 Dec 2020 15:58:10 -0300 Subject: [PATCH] Start translator --- Makefile | 2 +- compiler.c | 527 +++++++++++++++++++++++++++++++++++++++++++++++++++++ compiler.h | 23 +++ main.c | 23 ++- parser.c | 30 ++- parser.h | 14 +- printer.c | 2 +- util.c | 57 ++++++ util.h | 26 +++ 9 files changed, 692 insertions(+), 12 deletions(-) create mode 100644 compiler.c create mode 100644 compiler.h create mode 100644 util.c create mode 100644 util.h diff --git a/Makefile b/Makefile index dbc2b15..e0228a5 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -FILES = tokenizer.c main.c parser.c printer.c +FILES = tokenizer.c main.c parser.c printer.c compiler.c util.c INCLUDES = -I. CFLAGS = -std=c99 -g OUTFILE = compiler diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..6739752 --- /dev/null +++ b/compiler.c @@ -0,0 +1,527 @@ +#include +#include +#include +#include +#include "compiler.h" + +typedef enum { + subdec, classvardec, vardec, cl +} OBJTYPE; + +void addtoken(LINE* l, char* token) { + l->tokens[l->tokenscount] = token; + l->tokenscount++; +} + +bool existclass(CLASS* c, char* name) { + CLASS* current = c; + while(current != NULL) { + if(!strcmp(current->name, name)) + return true; + current = current->next; + } + return false; +} + +void doubledeclarationmsg(char* name, char* f1, int l1, char* f2, int l2) { + fprintf(stderr, "Double declaration of '%s' at '%s', line %i; previously defined at '%s', line %i\n", name, f1, l1, f2, l2); + exit(1); +} + +void xtractinfo(void* obj, OBJTYPE type, char** file, int* line) { + if(type == classvardec) { + *file = ((CLASSVARDEC*)obj)->base->file; + *line = ((CLASSVARDEC*)obj)->base->definedat; + } + else if(type == vardec) { + *file = ((VARDEC*)obj)->file; + *line = ((VARDEC*)obj)->definedat; + } + else if(type == subdec) { + *file = ((SUBDEC*)obj)->file; + *line = ((SUBDEC*)obj)->definedat; + } + else if(type == cl) { + *file = ((CLASS*)obj)->file; + *line = ((CLASS*)obj)->definedat; + } +} + +void doubledeclaration(char* name, void* o1, OBJTYPE t1, void* o2, OBJTYPE t2) { + char* f1; + char* f2; + int l1, l2; + xtractinfo(o1, t1, &f1, &l1); + xtractinfo(o2, t2, &f2, &l2); + doubledeclarationmsg(name, f1, l1, f2, l2); +} + +SCOPE* mkscope(SCOPE* prev) { + SCOPE* s = (SCOPE*)malloc(sizeof(SCOPE)); + s->subroutines = NULL; + s->classvardecs = NULL; + s->vardecs = NULL; + s->classes = NULL; + s->previous = prev; + return s; +} + +void popscope(SCOPE** s) { + SCOPE* prev = (*s)->previous; + free(*s); + (*s) = prev; +} + +bool existstr(STRINGLIST* strs, char* str) { + STRINGLIST* current = strs; + while(current != NULL) { + if(!strcmp(current->content, str)) + return true; + current = current->next; + } + return false; +} + +VARDEC* getvardec(SCOPE* s, char* name) { + VARDEC* current = s->vardecs; + while(current != NULL) { + if(existstr(current->names, name)) + return current; + current = current->next; + } + if(s->previous != NULL) + return getvardec(s->previous, name); + return NULL; +} + +CLASSVARDEC* getclassvardec(SCOPE* s, char* name) { + CLASSVARDEC* current = s->classvardecs; + while(current != NULL) { + if(existstr(current->base->names, name)) + return current; + current = current->next; + } + if(s->previous != NULL) + return getclassvardec(s->previous, name); + return NULL; +} + +SUBDEC* getsubdec(SCOPE* s, char* name) { + SUBDEC* current = s->subroutines; + while(current != NULL) { + if(!strcmp(current->name, name)) + return current; + current = current->next; + } + if(s->previous != NULL) + return getsubdec(s->previous, name); + return NULL; +} + +SUBDEC* getsubdecfromclass(CLASS* c, char* name) { + SUBDEC* current = c->subdecs; + while(current != NULL) { + if(!strcmp(current->name, name)) + return current; + current = current->next; + } + return NULL; +} + +CLASS* getclass(SCOPE* s, char* name) { + CLASS* current = s->classes; + while(current != NULL) { + if(!strcmp(current->name, name)) + return current; + current = current->next; + } + if(s->previous != NULL) + return getclass(s->previous, name); + return NULL; +} + +void* getbyname(SCOPE* s, OBJTYPE* t, char* name) { + SUBDEC* sd = getsubdec(s, name); + if(sd != NULL) { + *t = subdec; + return sd; + } + CLASSVARDEC* cvd = getclassvardec(s, name); + if(cvd != NULL) { + *t = classvardec; + return cvd; + } + VARDEC* vd = getvardec(s, name); + if(vd != NULL) { + *t = vardec; + return vd; + } + CLASS* c = getclass(s, name); + if(c != NULL) { + *t = cl; + return c; + } + return NULL; +} + +void* getbynamelist(SCOPE* s, STRINGLIST* names, OBJTYPE* t, char** name) { + STRINGLIST* current = names; + while(current != NULL) { + void* obj = getbyname(s, t, current->content); + if(obj != NULL) { + *name = current->content; + return obj; + } + current = current->next; + } + if(s->previous != NULL) + return getbynamelist(s->previous, names, t, name); + return NULL; +} + +void addclassvardec(SCOPE* s, CLASSVARDEC* v) { + OBJTYPE type; + char* name; + void* tmp = getbynamelist(s, v->base->names, &type, &name); + if(tmp != NULL) + doubledeclaration(name, v, classvardec, tmp, type); + v->next = s->classvardecs; + s->classvardecs = v; +} + +void addvardec(SCOPE* s, VARDEC* v) { + OBJTYPE type; + char* name; + void* tmp = getbynamelist(s, v->names, &type, &name); + if(tmp != NULL) + doubledeclaration(name, v, vardec, tmp, type); + v->next = s->vardecs; + s->vardecs = v; +} + +void addsubdec(SCOPE* s, SUBDEC* sd) { + OBJTYPE type; + void* tmp = getbyname(s, &type, sd->name); + if(tmp != NULL) + doubledeclaration(sd->name, sd, subdec, tmp, type); + sd->next = s->subroutines; + s->subroutines = sd; +} + +void addclass(SCOPE* s, CLASS* c) { + OBJTYPE type; + void* tmp = getbyname(s, &type, c->name); + if(tmp != NULL) + doubledeclaration(c->name, c, cl, tmp, type); + c->next = s->classes; + s->classes = c; +} + +void addclassvardecs(SCOPE* s, CLASSVARDEC* vs) { + CLASSVARDEC* current = vs; + while(current != NULL) { + addclassvardec(s, current); + current = current->next; + } +} + +void addvardecs(SCOPE* s, VARDEC* vs) { + VARDEC* current = vs; + while(current != NULL) { + addvardec(s, current); + current = current->next; + } +} + +void addsubdecs(SCOPE* s, SUBDEC* ss) { + SUBDEC* current = ss; + while(current != NULL) { + addsubdec(s, current); + current = current->next; + } +} + +void addclasses(SCOPE* s, CLASS* c) { + CLASS* current = c; + while(current != NULL) { + addclass(s, current); + current = current->next; + } +} + +int countparameters(EXPRESSIONLIST* params) { + int i = 0; + while(params != NULL) { + i++; + params = params->next; + } + return i; +} + +int countlocalvars(VARDEC* decs) { + int i = 0; + while(decs != NULL) { + i++; + decs = decs->next; + } + return i; +} + +char* dotlabel(char* n1, char* n2) { + int sz = (strlen(n1) + strlen(n2) + 2) * sizeof(char); + char* result = (char*)malloc(sz); + snprintf(result, sz, "%s.%s", n1, n2); + return result; +} + +char* subdecname(CLASS* c, SUBDEC* sd) { + return dotlabel(c->name, sd->name); +} + +SUBDEC* getsubdecfromparent(SCOPE* s, SUBROUTCALL* call) { + SUBDEC* sd; + OBJTYPE type; + void* parent = getbyname(s, &type, call->parentname); + if(type == cl) + sd = getsubdecfromclass((CLASS*)parent, call->name); + else { + VARDEC* vd; + if (type == classvardec) + vd = ((CLASSVARDEC*)parent)->base; + else if (type == vardec) + vd = (VARDEC*)parent; + else { + fprintf(stderr, "Unexpected subroutine identifier; file '%s', line %i\n", call->file, call->definedat); + exit(1); + } + if(vd->primitive) { + fprintf(stderr, "Primitive type doesn't have methods; file '%s', line %i\n", call->file, call->definedat); + exit(1); + } + sd = getsubdecfromparent(s, call); + } + return sd; +} + +LINE* onetoken(char* str) { + LINE* ln = mkline(1); + addtoken(ln, ezheapstr(str)); + return ln; +} + +LINE* mksimpleln(char** tokens) { + int count = sizeof(tokens) / sizeof(char*); + + LINE* ln = mkline(count); + for(int i = 0; i < count; i++) + addtoken(ln, ezheapstr(tokens[i])); + + return ln; +} + +LINE* mathopln(char op) { + if(op == '+') + return onetoken("add"); + if(op == '-') + return onetoken("sub"); + if(op == '=') + return onetoken("eq"); + if(op == '>') + return onetoken("gt"); + if(op == '<') + return onetoken("lt"); + if(op == '|') + return onetoken("or"); + if(op == '&') + return onetoken("and"); + if(op == '/') { + char* tokens[] = { "call", "Math.divide", "2" }; + return mksimpleln(tokens); + } + if(op == '*') { + char* tokens[] = { "call", "Math.multiply", "2" }; + return mksimpleln(tokens); + } +} + +LINE* compileexpression(SCOPE* s, TERM* e, LINE** tail) { + LINE* nexts = NULL; + LINE* nextstail; + LINE* r; + + if(e->next != NULL) { + nexts = compileexpression(s, e->next, &nextstail); + LINE* op = mathopln(e->op); + nextstail->next = op; + nextstail = op; + op->next = NULL; + } + + if(e->type == intconstant) { + r = mkline(3); + addtoken(r, ezheapstr("push")); + addtoken(r, ezheapstr("constant")); + addtoken(r, itoa(e->integer)); + } + else if(e->type == unaryopterm) { + r = mkline(1); + addtoken(r, ezheapstr("neg")); + } + else if(e->type == innerexpression) { + r = compileexpression(s, e->expression, tail); // might be wrong tail + } + else { + fprintf(stderr, "Unsuported SHIT %i\n", e->type); + exit(1); + } + + if(nexts != NULL) { + r->next = nexts; + (*tail) = nextstail; + } + else { + (*tail) = r; + r->next = NULL; + } + return r; +} + +LINE* compileparameters(SCOPE* s, EXPRESSIONLIST* ps, LINE** tail) { + LINE* head; + LINE* mytail; + if(ps != NULL) + head = compileexpression(s, ps->expression, &mytail); + LINE* currln = head; + EXPRESSIONLIST* current = ps->next; + while(current != NULL) { + LINE* newln = compileexpression(s, current->expression, &mytail); + current = current->next; + currln->next = newln; + currln = newln; + } + (*tail) = mytail; + return head; +} + +LINE* compilesubroutcall(SCOPE* s, CLASS* c, SUBROUTCALL* call) { + /* FOR NOW THERE IS NO OS SO THIS WILL CAUSE PROBLEMS + SUBDEC* sd; + if(call->parentname != NULL) + sd = getsubdecfromparent(s, call); + else + sd = getsubdec(s, call->name); + if(sd == NULL) { + fprintf(stderr, "Method '%s' does not exist; file '%', line %i\n", call->name, call->file, call->definedat); + exit(1); + } + */ + + // At the moment can only call functions + LINE* tail; + LINE* head = compileparameters(s, call->parameters, &tail); + + LINE* callvm = mkline(3); + addtoken(callvm, ezheapstr("call")); + if(call->parentname != NULL) + addtoken(callvm, dotlabel(call->parentname, call->name)); + else + addtoken(callvm, dotlabel(c->name, call->name)); + + addtoken(callvm, itoa(countparameters(call->parameters))); + + tail->next = callvm; + tail = callvm; + + return head; +} + +LINE* compileret(SCOPE* s, TERM* e) { + // missing expression handling + if(e == NULL) { + LINE* r = mkline(1); + addtoken(r, ezheapstr("return")); + return r; + } +} + +LINE* compilestatement(SCOPE* s, CLASS* c, STATEMENT* st) { + if(st->type == dostatement) + return compilesubroutcall(s, c, st->dost); + else if(st->type == returnstatement) + return compileret(s, st->retst); + else { + fprintf(stderr, "UNSUPPORTED\n"); + exit(1); + } +} + +LINE* compilestatements(SCOPE* s, CLASS* c, STATEMENT* sts) { + LINE* head; + LINE* curr; + if(sts != NULL) { + head = compilestatement(s, c, sts); + curr = head; + sts = sts->next; + while(sts != NULL) { + LINE* ln = compilestatement(s, c, sts); + curr->next = ln; + curr = ln; + sts = sts->next; + } + } + return head; +} + +LINE* compilefunbody(SCOPE* s, CLASS* c, SUBROUTBODY* b) { + // missing scope and vardecs handling + LINE* head = compilestatements(s, c, b->statements); + return head; +} + +LINE* compilefundec(SCOPE* s, CLASS* c, SUBDEC* f) { + LINE* head = mkline(3); + addtoken(head, ezheapstr("function")); + addtoken(head, subdecname(c, f)); + addtoken(head, itoa(countlocalvars(f->body->vardecs))); + + head->next = compilefunbody(s, c, f->body); + return head; +} + +LINE* compilesubdec(SCOPE* s, CLASS* c, SUBDEC* sd) { + // 'this' and arguments are pushed by caller + // Must have a 'return' at the end + // Label names must have class name too (see mapping) + + // types: method, function, constructor + // must switch all of these + if(sd->subroutclass == function) + return compilefundec(s, c, sd); +} + +void compileclass(COMPILER* c, CLASS* class) { + SCOPE* topscope = mkscope(c->globalscope); + addclassvardecs(topscope, class->vardecs); + addsubdecs(topscope, class->subdecs); + + SUBDEC* current = class->subdecs; + while(current != NULL) { + compilesubdec(topscope, class, current); + current = current->next; + } +} + +void compile(COMPILER* c) { + CLASS* current = c->globalscope->classes; + while(current != NULL) { + compileclass(c, current); + current = current->next; + } +} + +COMPILER* mkcompiler(CLASS* classes) { + COMPILER* c = (COMPILER*)malloc(sizeof(COMPILER)); + c->globalscope = mkscope(NULL); + addclasses(c->globalscope, classes); + return c; +} diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..a999b60 --- /dev/null +++ b/compiler.h @@ -0,0 +1,23 @@ +#ifndef COMPILER_H +#define COMPILER_H +#include "util.h" +#include "parser.h" + +typedef struct scope { + SUBDEC* subroutines; + CLASSVARDEC* classvardecs; + VARDEC* vardecs; + CLASS* classes; + struct scope* previous; +} SCOPE; + +typedef struct { + SCOPE* globalscope; + LINE* output; + LINE* lastln; +} COMPILER; + +COMPILER* mkcompiler(CLASS* classes); +void compile(); + +#endif diff --git a/main.c b/main.c index b7a3f85..5fd73af 100644 --- a/main.c +++ b/main.c @@ -5,6 +5,24 @@ #include "tokenizer.h" #include "printer.h" #include "parser.h" +#include "compiler.h" + +void println(LINE* ln) { + for(int i = 0; i < ln->tokenscount; i++) { + printf("%s", ln->tokens[i]); + if(i != ln->tokenscount-1) + printf(" "); + } + printf("\n"); +} + +void printcompiler(COMPILER* c) { + LINE* current = c->output; + while(current != NULL) { + println(current); + current = current->next; + } +} int main(int argc, char* argv[]) { if(argc < 2) { @@ -21,8 +39,9 @@ int main(int argc, char* argv[]) { PARSER* p = mkparser(tokenize(input), argv[1]); parse(p); - - printparser(stdout, p); + COMPILER* c = mkcompiler(p->output); + compile(c); + printcompiler(c); return 0; } diff --git a/parser.c b/parser.c index 3ebe23a..932cd6d 100644 --- a/parser.c +++ b/parser.c @@ -97,7 +97,6 @@ TERM* parsetermnullified(PARSER* p) { next(p); } else if(!strcmp(p->current->token, "-") || !strcmp(p->current->token, "~")) { t->type = unaryopterm; - t->unaryop = p->current->token[0]; next(p); t->expression = parseterm(p); } else if(!strcmp(p->current->token, "(")) { @@ -204,6 +203,10 @@ SUBROUTCALL* parsesubroutcall(PARSER* p) { restorecp(p); return NULL; } + + call->definedat = p->current->truen; + call->file = p->file; + call->name = p->current->token; next(p); @@ -326,17 +329,19 @@ STATEMENT* parsestatements(PARSER* p) { return head; } -char* parsetype(PARSER* p) { +char* parsetype(PARSER* p, bool* primitive) { char* result = p->current->token; if(p->current->type == keyword) for(int i = 0; i < vardectypessize; i++) { if(!strcmp(p->current->token, vardectypes[i])) { next(p); + *primitive = true; return result; } } else if (p->current->type == identifier) { next(p); + *primitive = false; return result; } else @@ -367,11 +372,13 @@ char* parseidentifier(PARSER* p) { void parsevardeccommon(PARSER* p, VARDEC* v) { v->typeclass = p->current->type; - v->type = parsetype(p); + v->type = parsetype(p, &(v->primitive)); STRINGLIST* currstr = (STRINGLIST*)malloc(sizeof(STRINGLIST)); v->names = currstr; + v->file = p->file; + v->definedat = p->current->truen; v->names->content = parseidentifier(p); while(!strcmp(p->current->token, ",")) { @@ -444,7 +451,8 @@ PARAMETER* parseparameter(PARSER* p) { PARAMETER* param = (PARAMETER*)malloc(sizeof(PARAMETER)); if(!strcmp(p->current->token, ")")) return NULL; - param->type = parsetype(p); + bool dummy; + param->type = parsetype(p, &dummy); param->name = parseidentifier(p); return param; } @@ -483,8 +491,13 @@ SUBDEC* parsesubroutdec(PARSER* p) { subdec->type = p->current->token; next(p); } - else - subdec->type = parsetype(p); + else { + bool dummy; + subdec->type = parsetype(p, &dummy); + } + + subdec->file = p->file; + subdec->definedat = p->current->truen; subdec->name = parseidentifier(p); @@ -517,6 +530,9 @@ CLASS* parseclass(PARSER* p) { CLASS* class = (CLASS*)malloc(sizeof(CLASS)); + class->definedat = p->current->truen; + class->file = p->file; + class->name = parseidentifier(p); checkcontent(p, "{"); @@ -526,6 +542,8 @@ CLASS* parseclass(PARSER* p) { class->subdecs = parsesubroutdecs(p); checkcontent(p, "}"); + + class->next = NULL; return class; } diff --git a/parser.h b/parser.h index 12f8c04..8a10a5a 100644 --- a/parser.h +++ b/parser.h @@ -1,5 +1,6 @@ #ifndef PARSER_H #define PARSER_H +#include #include "tokenizer.h" struct statement; @@ -16,6 +17,8 @@ typedef enum { typedef struct { char* parentname; char* name; + char* file; + int definedat; struct explist* parameters; } SUBROUTCALL; @@ -27,7 +30,6 @@ typedef struct term { SUBROUTCALL* call; struct term* expression; }; - char unaryop; struct term* arrayexp; char op; struct term* next; @@ -76,7 +78,10 @@ typedef struct stringlist { } STRINGLIST; typedef struct vardec { + char* file; + int definedat; char* type; + bool primitive; TOKENTYPE typeclass; STRINGLIST* names; struct vardec* next; @@ -104,6 +109,8 @@ typedef struct SUBROUTBODY { } SUBROUTBODY; typedef struct subdec { + char* file; + int definedat; SUBROUTCLASS subroutclass; char* type; TOKENTYPE typeclass; @@ -113,10 +120,13 @@ typedef struct subdec { struct subdec* next; } SUBDEC; -typedef struct { +typedef struct cl { char* name; CLASSVARDEC* vardecs; SUBDEC* subdecs; + char* file; + int definedat; + struct cl* next; } CLASS; typedef struct { diff --git a/printer.c b/printer.c index bdc24e2..8fc81e3 100644 --- a/printer.c +++ b/printer.c @@ -184,7 +184,7 @@ void printterm(TERM* e, FILE* output, int depth) { fprintf(output, " ) \r\n"); } else { printident(output, depth+1); - fprintf(output, " %c \r\n", e->unaryop); + fprintf(output, " ~ \r\n"); printterm(e->expression, output, depth+1); } diff --git a/util.c b/util.c new file mode 100644 index 0000000..26fc95f --- /dev/null +++ b/util.c @@ -0,0 +1,57 @@ +#include +#include +#include "util.h" + +char* heapstr(char* str, int len) { + int sz = sizeof(char) * (len + 1); + char* outstr = (char*)malloc(sz); + strcpy(outstr, str); + return outstr; +} + +char* ezheapstr(char* str) { + return heapstr(str, strlen(str)); +} + +int countplaces(int n) { + int places = 1; + int divisor = 1; + if(n < 0) { + n = -n; + places++; + } + while(n / divisor >= 10) { + places++; + divisor *= 10; + } + return places; +} + +char* itoa(int i) { + int sz = sizeof(char)*(countplaces(i)+1); + char* a = (char*)malloc(sz); + snprintf(a, sz, "%i", i); + return a; +} + +void printlns(LINELIST* lns, FILE* stream) { + LINELIST* curln = lns; + while(curln != NULL) { + fprintf(stream, "%s\n", curln->content); + curln = curln->next; + } +} + +void freelns(LINELIST* lns) { + LINELIST* next = lns->next; + free(lns); + if(next != NULL) + freelns(next); +} + +LINE* mkline(int count) { + LINE* l = (LINE*)malloc(sizeof(LINE)); + l->tokenscount = 0; + l->tokens = (char**)malloc(sizeof(char*)*count); + return l; +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..d38d7f2 --- /dev/null +++ b/util.h @@ -0,0 +1,26 @@ +#ifndef UTIL_H +#define UTIL_H + +#include + +char* heapstr(char* str, int len); +char* ezheapstr(char* str); +int countplaces(int n); + +typedef struct line { + char** tokens; + int tokenscount; + struct line* next; +} LINE; + +typedef struct lnls { + char* content; + int truen; + struct lnls* next; +} LINELIST; + +void printlns(LINELIST* lns, FILE* stream); +void freelns(LINELIST* lns); +LINE* mkline(int count); +char* itoa(int i); +#endif