Split parsing functionality

This commit is contained in:
Augusto Gunsch 2020-11-20 20:24:02 -03:00
parent 8e490746d1
commit aa1898fd66
No known key found for this signature in database
GPG Key ID: F7EEFE29825C72DC
6 changed files with 280 additions and 237 deletions

View File

@ -1,4 +1,4 @@
FILES = assembler.c main.c util.c FILES = assembler.c main.c util.c parser.c
INCLUDES = -I. INCLUDES = -I.
CFLAGS = -std=c99 CFLAGS = -std=c99
OUTFILE = assembler OUTFILE = assembler

View File

@ -17,17 +17,14 @@ void skipln(ASSEMBLER* a);
void readrest(ASSEMBLER* a, int trueln); void readrest(ASSEMBLER* a, int trueln);
int isvar(char* var); int isvar(char* var);
void initsymbols(SYMBOLARRAY* s); void initsymbols(SYMBOLARRAY* s);
ASSEMBLER* mkassembler(FILE* input);
void populatevars(ASSEMBLER* a); void populatevars(ASSEMBLER* a);
SYMBOL* readlabel(ASSEMBLER* a, int trueln); SYMBOL* readlabel(ASSEMBLER* a, LINELIST* ln, int count);
void chop(ASSEMBLER* a); void replacevar(LINELIST* ln, int val);
void replacevar(SYMBOL* ln, int val);
void preprocess(ASSEMBLER* a); void preprocess(ASSEMBLER* a);
void transa(SYMBOL* ln); void transa(LINELIST* ln);
char* lookctable(TABLE* t, bool cond, char* token, const char* fieldname, int trueln); char* lookctable(TABLE* t, bool cond, char* token, const char* fieldname, int trueln);
void transb(SYMBOL* ln); void transb(LINELIST* ln);
void translate(ASSEMBLER* a); void translate(ASSEMBLER* a);
void gatherinfo(ASSEMBLER* a);
void freeassembler(ASSEMBLER* a); void freeassembler(ASSEMBLER* a);
void expandsymbols(SYMBOLARRAY* a, int toaddn) { void expandsymbols(SYMBOLARRAY* a, int toaddn) {
@ -77,33 +74,6 @@ int getsymbol(ASSEMBLER* a, char* name) {
return -1; return -1;
} }
void skipln(ASSEMBLER* a) {
char c;
while(c = fgetc(a->input), c != -1)
if(c == '\n')
break;
}
void readrest(ASSEMBLER* a, int trueln) {
char c;
while(c = fgetc(a->input), c != -1) {
if(c == '\n')
break;
if(isspace(c))
continue;
if(c == '/') {
char nc = fgetc(a->input);
if(nc == '/') {
skipln(a);
break;
}
ungetc(nc, a->input);
}
fprintf(stderr, "Unexpected '%c' at line '%i'\n", c, trueln);
exit(1);
}
}
int isvar(char* var) { int isvar(char* var) {
int i = 0; int i = 0;
while(1) { while(1) {
@ -117,27 +87,22 @@ int isvar(char* var) {
} }
void initsymbols(SYMBOLARRAY* s) { void initsymbols(SYMBOLARRAY* s) {
s->size = s->count * sizeof(SYMBOL*); s->size = 150 * sizeof(SYMBOL*);
s->items = (SYMBOL**)malloc(s->size); s->items = (SYMBOL**)malloc(s->size);
s->count = 0; s->count = 0;
} }
ASSEMBLER* mkassembler(FILE* input) { ASSEMBLER* mkassembler(LINELIST* input) {
ASSEMBLER* a = (ASSEMBLER*)malloc(sizeof(ASSEMBLER)); ASSEMBLER* a = (ASSEMBLER*)malloc(sizeof(ASSEMBLER));
a->lns = (SYMBOLARRAY*)malloc(sizeof(SYMBOLARRAY));
a->labels = (SYMBOLARRAY*)malloc(sizeof(SYMBOLARRAY)); a->labels = (SYMBOLARRAY*)malloc(sizeof(SYMBOLARRAY));
a->vars = (SYMBOLARRAY*)malloc(sizeof(SYMBOLARRAY)); a->vars = (SYMBOLARRAY*)malloc(sizeof(SYMBOLARRAY));
a->input = input; a->lns = input;
gatherinfo(a);
initsymbols(a->lns);
initsymbols(a->labels); initsymbols(a->labels);
a->vars->count = 80; // arbitrary number for initial size
initsymbols(a->vars); initsymbols(a->vars);
populatevars(a); populatevars(a);
chop(a); a->varsramind = BOTTOM_VAR;
return a; return a;
} }
@ -183,117 +148,39 @@ void populatevars(ASSEMBLER* a) {
a->vars->items[firstamnt+ramvamnt+1] = mksymbol("KBD", 4, 24576); a->vars->items[firstamnt+ramvamnt+1] = mksymbol("KBD", 4, 24576);
} }
SYMBOL* readlabel(ASSEMBLER* a, int trueln) { SYMBOL* readlabel(ASSEMBLER* a, LINELIST* ln, int count) {
char* name = (char*)malloc(sizeof(char)*(a->maxwidth-1)); int i = 1;
int i = 0;
char c; char c;
int maxind = a->maxwidth-2; while(true) {
while(c = fgetc(a->input), c != -1) { c = ln->content[i];
if(c == ')') if(c == ')')
break; break;
if(i == maxind) { if(c == '\0') {
fprintf(stderr, "Label width bigger than the maximum (%i characters); line %i\n", fprintf(stderr, "Unexpected end of line; line %i\n", ln->truen+1);
maxind, trueln+1);
exit(1);
}
if(c == '\n') {
fprintf(stderr, "Unexpected end of line; line %i\n", trueln+1);
exit(1); exit(1);
} }
if(isspace(c) || c == '(') { if(isspace(c) || c == '(') {
fprintf(stderr, "Unexpected '%c'; line %i\n", c, trueln+1); fprintf(stderr, "Unexpected '%c'; line %i\n", c, ln->truen+1);
exit(1); exit(1);
} }
name[i] = c;
i++; i++;
} }
name[i] = '\0';
readrest(a, trueln); if (i == 1) {
fprintf(stderr, "Label has no content; line %i\n", ln->truen+1);
exit(1);
}
int size = i * sizeof(char);
char* name = (char*)malloc(size);
snprintf(name, size, "%s", ln->content+sizeof(char));
SYMBOL* l = (SYMBOL*)malloc(sizeof(SYMBOL)); SYMBOL* l = (SYMBOL*)malloc(sizeof(SYMBOL));
l->name = name; l->name = name;
l->value = a->lns->count; l->value = count;
return l; return l;
} }
// Splits the stream into an array of strings, stripping comments, white spaces and labels void replacevar(LINELIST* ln, int val) {
// Requires vars array to check for duplicate symbols, but doesn't modify it
void chop(ASSEMBLER* a) {
char c;
char tmpln[a->maxwidth];
int lnind = 0;
int lnscount = 0;
int truelnscount = 1;
bool comment = false;
bool spacedln = false;
while(c = fgetc(a->input), c != -1) {
if(c == '\n') {
if(comment) {
comment = false;
ungetc(c, a->input);
continue;
}
truelnscount++;
if(!lnind)
continue;
tmpln[lnind] = '\0';
pushsymbol(a->lns, mksymbol(tmpln, lnind+1, truelnscount));
lnind = 0;
spacedln = false;
lnscount++;
continue;
}
if(comment)
continue;
if(isspace(c)) {
if(lnind)
spacedln = true;
continue;
}
if(c == '(') {
if(lnind) {
fprintf(stderr, "Unexpected char '%c'; line %i:%i\n", c, truelnscount, lnind+1);
exit(1);
}
SYMBOL* l = readlabel(a, truelnscount);
if(getsymbol(a, l->name) != -1) {
fprintf(stderr, "Already defined symbol '%s'; line %i\n", l->name, truelnscount);
exit(1);
}
pushsymbol(a->labels, l);
truelnscount++;
continue;
}
if(c == '/') {
char nc = fgetc(a->input);
if(nc == '/') {
comment = true;
continue;
}
ungetc(nc, a->input);
}
if(spacedln) {
fprintf(stderr, "Unexpected char '%c'; line %i:%i\n", c, lnscount+1, lnind+1);
exit(1);
}
tmpln[lnind] = c;
lnind++;
}
fclose(a->input);
}
void replacevar(SYMBOL* ln, int val) {
free(ln->content); free(ln->content);
int size = sizeof(char)*(countplaces(val) + 2); int size = sizeof(char)*(countplaces(val) + 2);
char* newln = (char *)malloc(size); char* newln = (char *)malloc(size);
@ -301,30 +188,72 @@ void replacevar(SYMBOL* ln, int val) {
ln->content = newln; ln->content = newln;
} }
void preprocess(ASSEMBLER* a) { void handlevarsymbol(ASSEMBLER* a, LINELIST* ln) {
int varsramind = BOTTOM_VAR; char* afterat = ln->content+sizeof(char);
for(int i = 0; i < a->lncount; i++) { if(isvar(afterat)) {
if(a->lns->items[i]->content[0] == '@') { int val = getsymbol(a, afterat);
char* afterat = a->lns->items[i]->content+sizeof(char); if(val == -1) {
if(isvar(afterat)) { if(a->varsramind == RAM_LIMIT) {
int val = getsymbol(a, afterat); fprintf(stderr, "Variable amount reached RAM limit (%i); line %i\n", RAM_LIMIT, ln->truen);
if(val == -1) { exit(1);
if(varsramind == RAM_LIMIT) {
fprintf(stderr, "Variable amount reached RAM limit (%i); line %i\n", RAM_LIMIT, a->lns->items[i]->truen);
exit(1);
}
SYMBOL* var = mksymbol(afterat, strlen(afterat)+1, varsramind);
varsramind++;
pushsymbol(a->vars, var);
val = var->value;
}
replacevar(a->lns->items[i], val);
} }
SYMBOL* var = mksymbol(afterat, strlen(afterat)+1, a->varsramind);
a->varsramind++;
pushsymbol(a->vars, var);
val = var->value;
}
replacevar(ln, val);
}
}
void handlelabelsymbol(ASSEMBLER* a, LINELIST* ln, int count) {
SYMBOL* l = readlabel(a, ln, count);
if(getsymbol(a, l->name) != -1) {
fprintf(stderr, "Already defined symbol '%s'; line %i\n", l->name, ln->truen);
exit(1);
}
pushsymbol(a->labels, l);
}
void stripvars(ASSEMBLER* a) {
LINELIST* curln = a->lns;
while(curln != NULL) {
if(curln->content[0] == '@')
handlevarsymbol(a, curln);
curln = curln->next;
}
}
void striplabels(ASSEMBLER* a) {
LINELIST* curln = a->lns;
LINELIST* lastln;
int count = 0;
while(curln != NULL) {
if(curln->content[0] == '(') {
handlelabelsymbol(a, curln, count);
if(count > 0)
lastln->next = curln->next;
else
a->lns = curln->next;
LINELIST* tmp = curln;
curln = curln->next;
free(tmp);
}
else {
lastln = curln;
curln = curln->next;
count++;
} }
} }
} }
void transa(SYMBOL* ln) { void preprocess(ASSEMBLER* a) {
striplabels(a);
stripvars(a);
}
void transa(LINELIST* ln) {
int add = atoi(ln->content+sizeof(char)); int add = atoi(ln->content+sizeof(char));
if(add >= INST_LIMIT) { if(add >= INST_LIMIT) {
@ -369,7 +298,7 @@ char* lookctable(TABLE* t, bool cond, char* token, const char* fieldname, int tr
exit(1); exit(1);
} }
void transb(SYMBOL* ln) { void transb(LINELIST* ln) {
bool hasjmp = false; bool hasjmp = false;
bool hasdest = false; bool hasdest = false;
bool hascmp = false; bool hascmp = false;
@ -377,7 +306,7 @@ void transb(SYMBOL* ln) {
int tmpi = 0; int tmpi = 0;
char tmp[C_TOKEN_SIZE], dest[C_TOKEN_SIZE], cmp[C_TOKEN_SIZE], jmp[C_TOKEN_SIZE]; char tmp[C_TOKEN_SIZE], dest[C_TOKEN_SIZE], cmp[C_TOKEN_SIZE], jmp[C_TOKEN_SIZE];
while(1) { while(true) {
if(ln->content[i] == '\0') { if(ln->content[i] == '\0') {
tmp[tmpi] = '\0'; tmp[tmpi] = '\0';
if(hasjmp) if(hasjmp)
@ -433,68 +362,17 @@ void transb(SYMBOL* ln) {
} }
void translate(ASSEMBLER* a) { void translate(ASSEMBLER* a) {
for(int i = 0; i < a->lns->count; i++) LINELIST* curln = a->lns;
if(a->lns->items[i]->content[0] == '@') while(curln != NULL) {
transa(a->lns->items[i]); if(curln->content[0] == '@')
transa(curln);
else else
transb(a->lns->items[i]); transb(curln);
} curln = curln->next;
void gatherinfo(ASSEMBLER* a) {
char c;
bool readsmt = false;
bool comment = false;
int lnwidth = 1;
a->truelnscount = 1;
a->maxwidth = 0;
a->labels->count = 0;
a->lns->count = 0;
while(c = fgetc(a->input), c != -1) {
if(c == '\n') {
a->truelnscount++;
comment = false;
if(lnwidth > a->maxwidth)
a->maxwidth = lnwidth;
if(readsmt) {
if(a->lns->count == INST_LIMIT) {
fprintf(stderr, "Reached instruction limit (%i); line %i\n", INST_LIMIT, a->truelnscount);
exit(1);
}
a->lns->count++;
}
readsmt = false;
lnwidth = 1;
continue;
}
if(comment)
continue;
if(c == '(') {
a->labels->count++;
comment = true;
continue;
}
if(c == '/') {
char nc = fgetc(a->input);
if(nc == '/') {
comment = true;
continue;
}
ungetc(nc, a->input);
}
if(isspace(c)) {
continue;
}
readsmt = true;
lnwidth++;
} }
rewind(a->input);
a->lncount = a->lns->count;
} }
void freeassembler(ASSEMBLER* a) { void freeassembler(ASSEMBLER* a) {
freesymbols(a->lns);
freesymbols(a->vars); freesymbols(a->vars);
freesymbols(a->labels); freesymbols(a->labels);
free(a); free(a);

View File

@ -1,23 +1,17 @@
#ifndef ASSEMBLER_H #ifndef ASSEMBLER_H
#define ASSEMBLER_H #define ASSEMBLER_H
#include <stdio.h> #include <stdio.h>
#include "parser.h"
#define RAM_LIMIT 24577 #define RAM_LIMIT 24577
#define TOP_VAR 16383 #define TOP_VAR 16383
#define BOTTOM_VAR 16 #define BOTTOM_VAR 16
#define INST_SIZE 17 #define INST_SIZE 17
#define C_TOKEN_SIZE 4 #define C_TOKEN_SIZE 4
#define INST_LIMIT 1<<15
typedef struct { typedef struct {
union { char* name;
char* name; int value;
char* content;
};
union {
int value;
int truen;
};
} SYMBOL; } SYMBOL;
typedef struct { typedef struct {
@ -27,20 +21,19 @@ typedef struct {
} SYMBOLARRAY; } SYMBOLARRAY;
typedef struct { typedef struct {
FILE* input;
int maxwidth; int maxwidth;
int truelnscount; int truelnscount;
int lncount; int lncount;
SYMBOLARRAY* lns; LINELIST* lns;
SYMBOLARRAY* labels; SYMBOLARRAY* labels;
SYMBOLARRAY* vars; SYMBOLARRAY* vars;
int varsramind;
} ASSEMBLER; } ASSEMBLER;
ASSEMBLER* mkassembler(FILE* input); ASSEMBLER* mkassembler(LINELIST* input);
void preprocess(ASSEMBLER* a); void preprocess(ASSEMBLER* a);
void translate(ASSEMBLER* a); void translate(ASSEMBLER* a);
void freeassembler(ASSEMBLER* a); void freeassembler(ASSEMBLER* a);

17
main.c
View File

@ -12,6 +12,14 @@ char* getoutname(char* fname, int fnamelen) {
return outf; return outf;
} }
void printlns(LINELIST* lns, FILE* stream) {
LINELIST* curln = lns;
while(curln != NULL) {
fprintf(stream, "%s\n", curln->content);
curln = curln->next;
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if(argc < 2) { if(argc < 2) {
printf("Usage: %s {input}\n", argv[0]); printf("Usage: %s {input}\n", argv[0]);
@ -31,7 +39,10 @@ int main(int argc, char* argv[]) {
return errno; return errno;
} }
ASSEMBLER* a = mkassembler(input); PARSER* p = mkparser(input);
parse(p);
ASSEMBLER* a = mkassembler(p->output);
// variable substitution // variable substitution
preprocess(a); preprocess(a);
@ -48,9 +59,7 @@ int main(int argc, char* argv[]) {
return errno; return errno;
} }
for(int i = 0; i < a->lns->count; i++) { printlns(a->lns, output);
fprintf(output, "%s\n", a->lns->items[i]->content);
}
free(outf); free(outf);
freeassembler(a); freeassembler(a);

142
parser.c Normal file
View File

@ -0,0 +1,142 @@
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "parser.h"
void pushln(LINELIST** curln, char* tmpln, int lnind, int truen) {
int size = (lnind+1)*sizeof(char);
char* newcontent = (char*)malloc(size);
strcpy(newcontent, tmpln);
(*curln)->content = newcontent;
(*curln)->truen = truen;
LINELIST* nextln = (LINELIST*)malloc(sizeof(LINELIST));
(*curln)->next = nextln;
(*curln) = nextln;
}
void chop(PARSER* p) {
char c;
char tmpln[p->maxwidth];
int lnind = 0;
int lnscount = 0;
int truelnscount = 0;
LINELIST* firstln = (LINELIST*)malloc(sizeof(LINELIST));
LINELIST* lastln;
LINELIST* curln = firstln;
bool comment = false;
bool spacedln = false;
while(c = fgetc(p->input), c != -1) {
if(c == '\n') {
if(comment) {
comment = false;
ungetc(c, p->input);
continue;
}
truelnscount++;
if(!lnind)
continue;
tmpln[lnind] = '\0';
lastln = curln;
pushln(&curln, tmpln, lnind, truelnscount);
lnind = 0;
spacedln = false;
lnscount++;
continue;
}
if(comment)
continue;
if(isspace(c)) {
if(lnind)
spacedln = true;
continue;
}
if(c == '/') {
char nc = fgetc(p->input);
if(nc == '/') {
comment = true;
continue;
}
ungetc(nc, p->input);
}
if(spacedln) {
fprintf(stderr, "Unexpected char '%c'; line %i:%i\n", c, lnscount+1, lnind+1);
exit(1);
}
tmpln[lnind] = c;
lnind++;
}
fclose(p->input);
free(curln);
lastln->next = NULL;
p->output = firstln;
}
void gatherinfo(PARSER* p) {
char c;
bool readsmt = false;
bool comment = false;
int lnwidth = 1;
int lnscount = 0;
int truelnscount = 1;
p->maxwidth = 0;
while(c = fgetc(p->input), c != -1) {
if(c == '\n') {
truelnscount++;
comment = false;
if(lnwidth > p->maxwidth)
p->maxwidth = lnwidth;
if(readsmt) {
if(lnscount == INST_LIMIT) {
fprintf(stderr, "Reached instruction limit (%i); line %i\n", INST_LIMIT, truelnscount);
exit(1);
}
lnscount++;
}
readsmt = false;
lnwidth = 1;
continue;
}
if(comment)
continue;
if(c == '/') {
char nc = fgetc(p->input);
if(nc == '/') {
comment = true;
continue;
}
ungetc(nc, p->input);
}
if(isspace(c)) {
continue;
}
readsmt = true;
lnwidth++;
}
rewind(p->input);
}
PARSER* mkparser(FILE* input) {
PARSER* p = (PARSER*)malloc(sizeof(PARSER));
p->input = input;
};
void parse(PARSER* p) {
gatherinfo(p);
chop(p);
}

21
parser.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef PARSER_H
#define PARSER_H
#include <stdio.h>
#define INST_LIMIT 1<<15
typedef struct lnls {
char* content;
int truen;
struct lnls* next;
} LINELIST;
typedef struct {
FILE* input;
int maxwidth;
LINELIST* output;
} PARSER;
PARSER* mkparser(FILE* input);
void parse(PARSER* p);
#endif