185 lines
7 KiB
C
185 lines
7 KiB
C
#include <regex>
|
|
#include "Assembler.h"
|
|
|
|
// label<whitespace>instruction<whitespace>field0<whitespace>field1<whitespace>field2<whitespace>#comments
|
|
#define LINE_REGEX R"~(^([[:alnum:]]{1,12})?\s+(.+?)(\s+(#.*))?$)~"
|
|
#define BLANK_REGEX R"~(^\s*$)~"
|
|
#define COMMENT_REGEX R"~(^#.*$)~"
|
|
|
|
#define INST_REGEX R"~(^([[:alpha:]]+(?:\.[[:alpha:]]+)?)(\s+.*)?$)~"
|
|
#define DIRECTIVE_REGEX R"~(^\.dfill\s+([[:alnum:]]{1,12}|-?\d+\.?\d*)$)~"
|
|
|
|
Assembler::Assembler(std::vector<std::string> asmLines_) :
|
|
asmLines(asmLines_)
|
|
{
|
|
ih = new InstHandler(&labels);
|
|
}
|
|
|
|
void Assembler::parse() {
|
|
std::regex lineRegex(LINE_REGEX);
|
|
std::regex blankRegex(BLANK_REGEX);
|
|
std::regex commentRegex(COMMENT_REGEX);
|
|
std::regex directiveRegex(DIRECTIVE_REGEX);
|
|
std::smatch matches;
|
|
std::smatch dfMatches;
|
|
uint32_t icount = 0;
|
|
|
|
// initial pass: handle dfill directive, track labels, filter out bad formatting, parse instructions
|
|
for (uint64_t i = 0; i < asmLines.size(); i++) {
|
|
if (std::regex_match(asmLines[i], matches, lineRegex)) {
|
|
// handle .dfill directive
|
|
bool dfill = false;
|
|
std::string inst(matches.str(2));
|
|
if (std::regex_match(inst, dfMatches, directiveRegex)) {
|
|
// a .dfill directive means we will need to increment icount extra later, and might have to align to 8 bytes
|
|
dfill = true;
|
|
if (icount % 2 == 1) {
|
|
// alignment placeholder
|
|
instructions.push_back((inst_t){i, {"BLANK"}, icount, false, 0});
|
|
icount++;
|
|
}
|
|
}
|
|
|
|
// record location of label, if we're creating one
|
|
if (matches.str(1).size() > 0 && !newLabel(matches.str(1), icount)) return error(i, "label already exists");
|
|
|
|
// parse and save
|
|
if (dfill) {
|
|
if (!parseDfill(dfMatches.str(1), icount, i)) return error(i, "dfill parse error");
|
|
} else {
|
|
if (!parseInst(inst, icount, i)) return error(i, "inst parse error");
|
|
}
|
|
|
|
icount += (dfill ? 2 : 1);
|
|
} else if (!std::regex_match(asmLines[i], blankRegex) && !std::regex_match(asmLines[i], commentRegex)) {
|
|
return error(i, "invalid syntax");
|
|
}
|
|
}
|
|
|
|
// pass instructions to handler to fill out hex, and resolve dfill labels
|
|
for (uint32_t i = 0; i < instructions.size(); i++) {
|
|
if (instructions[i].tokens[0] == ".dfill" || instructions[i].tokens[0] == "BLANK") {
|
|
// don't do anything with a BLANK
|
|
if (instructions[i].usesLabel) {
|
|
// resolve dfill label
|
|
uint64_t labelLoc = ih->findLabel(instructions[i].tokens[1]);
|
|
if (labelLoc == (uint64_t)-1) return error(instructions[i].lineNumber, "invalid label");
|
|
labelLoc <<= 2;
|
|
instructions[i].hex = (uint32_t)labelLoc;
|
|
instructions[i+1].hex = (uint32_t)(labelLoc>>32);
|
|
i++;
|
|
}
|
|
} else {
|
|
// call handler on potential instruction to do individual format checks and assembling
|
|
if (!ih->handle(&instructions[i])) return error(instructions[i].lineNumber, "couldn't handle");
|
|
}
|
|
}
|
|
}
|
|
|
|
// interpret dfill directive and put appropriate lines of hex into output
|
|
int Assembler::parseDfill(std::string arg, uint32_t icount, uint64_t ln) {
|
|
uint64_t dec = arg.find('.');
|
|
if (dec > 0 && dec < arg.size()-1) {
|
|
// is a floating point
|
|
try {
|
|
// union to make access to underlying bytes easier
|
|
Fp val = {std::stod(arg)};
|
|
|
|
instructions.push_back((inst_t){ln, {".dfill", arg}, icount, false, val.i[0]});
|
|
instructions.push_back((inst_t){ln, {".dfill", arg}, icount+1, false, val.i[1]});
|
|
return 1;
|
|
} catch (...) {
|
|
// if stod fails for whatever reason, probably the user's fault
|
|
return 0;
|
|
}
|
|
} else if (std::regex_match(arg, std::regex("^-?\\d+$"))) {
|
|
// is an integer
|
|
try {
|
|
// have to use stoll to correctly interpret negative!
|
|
uint64_t val = std::stoll(arg);
|
|
|
|
instructions.push_back((inst_t){ln, {".dfill", arg}, icount, false, (uint32_t)val});
|
|
instructions.push_back((inst_t){ln, {".dfill", arg}, icount+1, false, (uint32_t)(val>>32)});
|
|
return 1;
|
|
} catch (...) {
|
|
// if stoll fails for whatever reason, probably the user's fault
|
|
return 0;
|
|
}
|
|
} else if (std::regex_match(arg, std::regex("^[[:alnum:]]{1,12}$"))) {
|
|
// is a label. mark it to resolve later
|
|
instructions.push_back((inst_t){ln, {".dfill", arg}, icount, true, 0});
|
|
instructions.push_back((inst_t){ln, {".dfill", arg}, icount+1, true, 0});
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// tokenize the instruction and make sure the format makes sense
|
|
int Assembler::parseInst(std::string inst, uint32_t icount, uint64_t ln) {
|
|
//printf("%s\n", inst.c_str());
|
|
|
|
// tokenize
|
|
std::vector<std::string> tokens = tokenizeInst(inst);
|
|
|
|
// sanity check. never more than 4 tokens (1 instruction + 3 fields)
|
|
uint64_t numTokens = tokens.size();
|
|
if (numTokens > 4 || numTokens < 1) return 0;
|
|
inst_t i = (inst_t){ln, tokens, icount, false, 0};
|
|
instructions.push_back(i);
|
|
|
|
return 1;
|
|
}
|
|
|
|
// attempt to record a new label, or error if it exists already
|
|
int Assembler::newLabel(std::string name, uint32_t icount) {
|
|
for (uint64_t i = 0; i < labels.size(); i++) {
|
|
if (labels[i].name == name) return 0;
|
|
}
|
|
labels.push_back((label_t){name, icount});
|
|
return 1;
|
|
}
|
|
|
|
// split instruction string into lowercase instruction and all existing fields
|
|
std::vector<std::string> Assembler::tokenizeInst(std::string inst) {
|
|
std::vector<std::string> tks;
|
|
static std::regex emptyRegex("^\\s*$");
|
|
static std::regex instRegex(INST_REGEX);
|
|
static std::regex fieldRegex("^\\s+(-?[[:alnum:]]+)(\\s+.*)?$");
|
|
static std::smatch matches;
|
|
|
|
// if instruction isn't right format, stop
|
|
if (!std::regex_match(inst, matches, instRegex)) return tks;
|
|
tks.push_back(toLowerCase(matches.str(1)));
|
|
|
|
inst = matches.str(2);
|
|
while (!std::regex_match(inst, emptyRegex)) {
|
|
std::regex_match(inst, matches, fieldRegex);
|
|
tks.push_back(matches.str(1));
|
|
inst = matches.str(2);
|
|
}
|
|
return tks;
|
|
}
|
|
|
|
// print a custom error and exit
|
|
void Assembler::error(uint64_t ln, std::string msg) {
|
|
printf("Error on line %ld: \"%s\"\n%s\n", ln+1, asmLines[ln].c_str(), msg.c_str());
|
|
exit(0);
|
|
}
|
|
|
|
// utility to convert string to lower case
|
|
std::string Assembler::toLowerCase(std::string str) {
|
|
for (uint64_t i = 0; i < str.length(); i++) {
|
|
if (str[i] >= 'A' && str[i] <= 'Z') str[i] += 32;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
// output pure hex
|
|
std::vector<uint32_t> Assembler::assemble() {
|
|
std::vector<uint32_t> hexCode;
|
|
for (uint32_t i = 0; i < instructions.size(); i++) {
|
|
hexCode.push_back(instructions[i].hex);
|
|
}
|
|
return hexCode;
|
|
}
|