copy project to git

This commit is contained in:
snedmore 2025-11-04 14:00:16 -05:00
parent 942851fc79
commit 978c1f9fdb
8 changed files with 728 additions and 33 deletions

36
.gitignore vendored
View file

@ -1,34 +1,6 @@
# ---> C++
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o *.o
*.obj ma
.vscode/
# Precompiled Headers Tests/
*.gch run.pl
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app

185
Assembler.C Normal file
View file

@ -0,0 +1,185 @@
#include <regex>
#include "Assembler.h"
// label<whitespace>instruction<whitespace>field0<whitespace>field1<whitespace>field2<whitespace>#comments
#define LINE_REGEX R"~(^([[:alnum:]]{1,12})?\s+(.+?)(\s+(#.*))?$)~"
#define BLANK_REGEX R"~(^\s*$)~"
#define COMMENT_REGEX R"~(^#.*$)~"
#define INST_REGEX R"~(^([[:alpha:]]+(?:\.[[:alpha:]]+)?)(\s+.*)?$)~"
#define DIRECTIVE_REGEX R"~(^\.dfill\s+([[:alnum:]]{1,12}|-?\d+\.?\d*)$)~"
Assembler::Assembler(std::vector<std::string> asmLines_) :
asmLines(asmLines_)
{
ih = new InstHandler(&labels);
}
void Assembler::parse() {
std::regex lineRegex(LINE_REGEX);
std::regex blankRegex(BLANK_REGEX);
std::regex commentRegex(COMMENT_REGEX);
std::regex directiveRegex(DIRECTIVE_REGEX);
std::smatch matches;
std::smatch dfMatches;
uint32_t icount = 0;
// initial pass: handle dfill directive, track labels, filter out bad formatting, parse instructions
for (uint64_t i = 0; i < asmLines.size(); i++) {
if (std::regex_match(asmLines[i], matches, lineRegex)) {
// handle .dfill directive
bool dfill = false;
std::string inst(matches.str(2));
if (std::regex_match(inst, dfMatches, directiveRegex)) {
// a .dfill directive means we will need to increment icount extra later, and might have to align to 8 bytes
dfill = true;
if (icount % 2 == 1) {
// alignment placeholder
instructions.push_back((inst_t){i, {"BLANK"}, icount, false, 0});
icount++;
}
}
// record location of label, if we're creating one
if (matches.str(1).size() > 0 && !newLabel(matches.str(1), icount)) return error(i, "label already exists");
// parse and save
if (dfill) {
if (!parseDfill(dfMatches.str(1), icount, i)) return error(i, "dfill parse error");
} else {
if (!parseInst(inst, icount, i)) return error(i, "inst parse error");
}
icount += (dfill ? 2 : 1);
} else if (!std::regex_match(asmLines[i], blankRegex) && !std::regex_match(asmLines[i], commentRegex)) {
return error(i, "invalid syntax");
}
}
// pass instructions to handler to fill out hex, and resolve dfill labels
for (uint32_t i = 0; i < instructions.size(); i++) {
if (instructions[i].tokens[0] == ".dfill" || instructions[i].tokens[0] == "BLANK") {
// don't do anything with a BLANK
if (instructions[i].usesLabel) {
// resolve dfill label
uint64_t labelLoc = ih->findLabel(instructions[i].tokens[1]);
if (labelLoc == (uint64_t)-1) return error(instructions[i].lineNumber, "invalid label");
labelLoc <<= 2;
instructions[i].hex = (uint32_t)labelLoc;
instructions[i+1].hex = (uint32_t)(labelLoc>>32);
i++;
}
} else {
// call handler on potential instruction to do individual format checks and assembling
if (!ih->handle(&instructions[i])) return error(instructions[i].lineNumber, "couldn't handle");
}
}
}
// interpret dfill directive and put appropriate lines of hex into output
int Assembler::parseDfill(std::string arg, uint32_t icount, uint64_t ln) {
uint64_t dec = arg.find('.');
if (dec > 0 && dec < arg.size()-1) {
// is a floating point
try {
// union to make access to underlying bytes easier
Fp val = {std::stod(arg)};
instructions.push_back((inst_t){ln, {".dfill", arg}, icount, false, val.i[0]});
instructions.push_back((inst_t){ln, {".dfill", arg}, icount+1, false, val.i[1]});
return 1;
} catch (...) {
// if stod fails for whatever reason, probably the user's fault
return 0;
}
} else if (std::regex_match(arg, std::regex("^-?\\d+$"))) {
// is an integer
try {
// have to use stoll to correctly interpret negative!
uint64_t val = std::stoll(arg);
instructions.push_back((inst_t){ln, {".dfill", arg}, icount, false, (uint32_t)val});
instructions.push_back((inst_t){ln, {".dfill", arg}, icount+1, false, (uint32_t)(val>>32)});
return 1;
} catch (...) {
// if stoll fails for whatever reason, probably the user's fault
return 0;
}
} else if (std::regex_match(arg, std::regex("^[[:alnum:]]{1,12}$"))) {
// is a label. mark it to resolve later
instructions.push_back((inst_t){ln, {".dfill", arg}, icount, true, 0});
instructions.push_back((inst_t){ln, {".dfill", arg}, icount+1, true, 0});
return 1;
} else {
return 0;
}
}
// tokenize the instruction and make sure the format makes sense
int Assembler::parseInst(std::string inst, uint32_t icount, uint64_t ln) {
//printf("%s\n", inst.c_str());
// tokenize
std::vector<std::string> tokens = tokenizeInst(inst);
// sanity check. never more than 4 tokens (1 instruction + 3 fields)
uint64_t numTokens = tokens.size();
if (numTokens > 4 || numTokens < 1) return 0;
inst_t i = (inst_t){ln, tokens, icount, false, 0};
instructions.push_back(i);
return 1;
}
// attempt to record a new label, or error if it exists already
int Assembler::newLabel(std::string name, uint32_t icount) {
for (uint64_t i = 0; i < labels.size(); i++) {
if (labels[i].name == name) return 0;
}
labels.push_back((label_t){name, icount});
return 1;
}
// split instruction string into lowercase instruction and all existing fields
std::vector<std::string> Assembler::tokenizeInst(std::string inst) {
std::vector<std::string> tks;
static std::regex emptyRegex("^\\s*$");
static std::regex instRegex(INST_REGEX);
static std::regex fieldRegex("^\\s+(-?[[:alnum:]]+)(\\s+.*)?$");
static std::smatch matches;
// if instruction isn't right format, stop
if (!std::regex_match(inst, matches, instRegex)) return tks;
tks.push_back(toLowerCase(matches.str(1)));
inst = matches.str(2);
while (!std::regex_match(inst, emptyRegex)) {
std::regex_match(inst, matches, fieldRegex);
tks.push_back(matches.str(1));
inst = matches.str(2);
}
return tks;
}
// print a custom error and exit
void Assembler::error(uint64_t ln, std::string msg) {
printf("Error on line %ld: \"%s\"\n%s\n", ln+1, asmLines[ln].c_str(), msg.c_str());
exit(0);
}
// utility to convert string to lower case
std::string Assembler::toLowerCase(std::string str) {
for (uint64_t i = 0; i < str.length(); i++) {
if (str[i] >= 'A' && str[i] <= 'Z') str[i] += 32;
}
return str;
}
// output pure hex
std::vector<uint32_t> Assembler::assemble() {
std::vector<uint32_t> hexCode;
for (uint32_t i = 0; i < instructions.size(); i++) {
hexCode.push_back(instructions[i].hex);
}
return hexCode;
}

36
Assembler.h Normal file
View file

@ -0,0 +1,36 @@
#include <string>
#include <vector>
#include <regex>
#include "InstHandler.h"
#ifndef ASSEMBLER_H
#define ASSEMBLER_H
class Assembler {
private:
InstHandler * ih;
std::vector<std::string> asmLines;
std::vector<label_t> labels;
std::vector<inst_t> instructions;
union Fp {
double d;
uint32_t i[2];
};
int parseDfill(std::string arg, uint32_t icount, uint64_t ln);
int parseInst(std::string inst, uint32_t icount, uint64_t ln);
int newLabel(std::string name, uint32_t icount);
std::vector<std::string> tokenizeInst(std::string inst);
std::string toLowerCase(std::string str);
void error(uint64_t ln, std::string msg);
public:
Assembler(std::vector<std::string>);
void parse();
std::vector<uint32_t> assemble();
};
#endif

361
InstHandler.C Normal file
View file

@ -0,0 +1,361 @@
#include "InstHandler.h"
#define HANDLER(f) [this](inst_t* inst){return f(inst);}
#define REQ_TKS(n) if (inst->tokens.size() != n) return 0
#define PARSE_REG(n, r) if (!parseRegister(inst->tokens[n], r)) return 0
#define PARSE_FPREG(n, r) if (!parseFPRegister(inst->tokens[n], r)) return 0
#define PARSE_IMM(n, i, d) if (!parseImmediate(inst->tokens[n], i, d)) return 0
#define OPCODE(o) ((uint32_t)o << 26)
#define RS(r) (r << 21)
#define RT(r) (r << 16)
#define RD(r) (r << 11)
#define IMM16(i) (i & 0x0000FFFF)
#define COMB_I(o) 0u | OPCODE(o) | RS(rs) | RT(rt) | IMM16(imm)
#define COMB_R(o, f) 0u | OPCODE(o) | RS(rs) | RT(rt) | RD(rd) | ((uint32_t)f & 0x0000003F)
// set up function map
InstHandler::InstHandler(std::vector<label_t>* labels_) :
labels(labels_)
{
funcMap["ld"] = HANDLER(handleLD);
funcMap["l.d"] = HANDLER(handleLfD);
funcMap["sd"] = HANDLER(handleSD);
funcMap["s.d"] = HANDLER(handleSfD);
funcMap["daddi"] = HANDLER(handleDADDI);
funcMap["daddiu"] = HANDLER(handleDADDIU);
funcMap["beq"] = HANDLER(handleBEQ);
funcMap["bne"] = HANDLER(handleBNE);
funcMap["dadd"] = HANDLER(handleDADD);
funcMap["dsub"] = HANDLER(handleDSUB);
funcMap["add.d"] = HANDLER(handleADDfD);
funcMap["sub.d"] = HANDLER(handleSUBfD);
funcMap["mul.d"] = HANDLER(handleMULfD);
funcMap["div.d"] = HANDLER(handleDIVfD);
funcMap["j"] = HANDLER(handleJ);
funcMap["halt"] = HANDLER(handleHALT);
funcMap["nop"] = HANDLER(handleNOP);
funcMap["dump"] = HANDLER(handleDUMP);
}
// call the appropriate function. this is where the instruction op is validated
int InstHandler::handle(inst_t* inst) {
try {
return funcMap.at(inst->tokens[0])(inst);
} catch (...) {
// not found in map
return 0;
}
}
int InstHandler::handleLD(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_REG(1, rt);
PARSE_REG(3, rs);
uint32_t imm;
PARSE_IMM(2, imm, false);
inst->hex = COMB_I(55);
return 1;
}
int InstHandler::handleLfD(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_FPREG(1, rt);
PARSE_REG(3, rs);
uint32_t imm;
PARSE_IMM(2, imm, false);
inst->hex = COMB_I(53);
return 1;
}
int InstHandler::handleSD(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_REG(1, rt);
PARSE_REG(3, rs);
uint32_t imm;
PARSE_IMM(2, imm, false);
inst->hex = COMB_I(63);
return 1;
}
int InstHandler::handleSfD(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_FPREG(1, rt);
PARSE_REG(3, rs);
uint32_t imm;
PARSE_IMM(2, imm, false);
inst->hex = COMB_I(61);
return 1;
}
int InstHandler::handleDADDI(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_REG(1, rt);
PARSE_REG(2, rs);
uint32_t imm;
PARSE_IMM(3, imm, false);
inst->hex = COMB_I(24);
return 1;
}
int InstHandler::handleDADDIU(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_REG(1, rt);
PARSE_REG(2, rs);
uint32_t imm;
PARSE_IMM(3, imm, false);
inst->hex = COMB_I(25);
return 1;
}
int InstHandler::handleBEQ(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_REG(1, rt);
PARSE_REG(2, rs);
uint32_t imm;
if (!parseImmediateBranch(inst->tokens[3], imm, inst->loc)) return 0;
inst->hex = COMB_I(4);
return 1;
}
int InstHandler::handleBNE(inst_t* inst) {
REQ_TKS(4);
uint32_t rt, rs;
PARSE_REG(1, rt);
PARSE_REG(2, rs);
uint32_t imm;
if (!parseImmediateBranch(inst->tokens[3], imm, inst->loc)) return 0;
inst->hex = COMB_I(5);
return 1;
}
int InstHandler::handleDADD(inst_t* inst) {
REQ_TKS(4);
uint32_t rd, rs, rt;
PARSE_REG(1, rd);
PARSE_REG(2, rs);
PARSE_REG(3, rt);
inst->hex = COMB_R(0, 44);
return 1;
}
int InstHandler::handleDSUB(inst_t* inst) {
REQ_TKS(4);
uint32_t rd, rs, rt;
PARSE_REG(1, rd);
PARSE_REG(2, rs);
PARSE_REG(3, rt);
inst->hex = COMB_R(0, 46);
return 1;
}
int InstHandler::handleADDfD(inst_t* inst) {
REQ_TKS(4);
uint32_t rd, rs, rt;
PARSE_FPREG(1, rd);
PARSE_FPREG(2, rs);
PARSE_FPREG(3, rt);
inst->hex = COMB_R(0, 47);
return 1;
}
int InstHandler::handleSUBfD(inst_t* inst) {
REQ_TKS(4);
uint32_t rd, rs, rt;
PARSE_FPREG(1, rd);
PARSE_FPREG(2, rs);
PARSE_FPREG(3, rt);
inst->hex = COMB_R(0, 48);
return 1;
}
int InstHandler::handleMULfD(inst_t* inst) {
REQ_TKS(4);
uint32_t rd, rs, rt;
PARSE_FPREG(1, rd);
PARSE_FPREG(2, rs);
PARSE_FPREG(3, rt);
inst->hex = COMB_R(0, 49);
return 1;
}
int InstHandler::handleDIVfD(inst_t* inst) {
REQ_TKS(4);
uint32_t rd, rs, rt;
PARSE_FPREG(1, rd);
PARSE_FPREG(2, rs);
PARSE_FPREG(3, rt);
inst->hex = COMB_R(0, 50);
return 1;
}
int InstHandler::handleJ(inst_t* inst) {
REQ_TKS(2);
uint32_t imm;
if (!parseImmediateJump(inst->tokens[1], imm)) return 0;
inst->hex = 0u | OPCODE(2) | (imm & 0x03FFFFFF);
return 1;
}
int InstHandler::handleHALT(inst_t* inst) {
REQ_TKS(1);
inst->hex = 0u | OPCODE(1);
return 1;
}
int InstHandler::handleNOP(inst_t* inst) {
REQ_TKS(1);
inst->hex = 0u | OPCODE(3);
return 1;
}
int InstHandler::handleDUMP(inst_t* inst) {
REQ_TKS(2);
uint32_t imm;
PARSE_IMM(1, imm, true);
inst->hex = 0u | OPCODE(44) | (imm & 0x03FFFFFF);
return 1;
}
// verifies register arguments are written correctly and sets the reg var to the reg number
int InstHandler::parseRegister(std::string arg, uint32_t& reg) {
static std::regex regRegex("^[rR](\\d{1,2})$");
if (!std::regex_match(arg, matches, regRegex)) return 0;
reg = std::stoul(matches.str(1));
if (reg > 31) return 0;
return 1;
}
int InstHandler::parseFPRegister(std::string arg, uint32_t& reg) {
static std::regex regRegex("^[fF](\\d{1,2})$");
if (!std::regex_match(arg, matches, regRegex)) return 0;
reg = std::stoul(matches.str(1));
if (reg > 31) return 0;
return 1;
}
// for most immediate functions; parses the immediate value or looks up the label value
int InstHandler::parseImmediate(std::string arg, uint32_t& imm, bool isDump) {
static std::regex immRegex("^-?\\d+$");
static std::regex labelRegex("^[[:alnum:]]{1,12}$");
if (std::regex_match(arg, immRegex)) {
try {
imm = std::stol(arg);
return 1;
} catch (...) {
return 0;
}
} else if (!isDump && std::regex_match(arg, labelRegex)) {
imm = findLabel(arg);
if (imm == (uint32_t)-1) return 0;
imm <<= 2;
return 1;
} else {
return 0;
}
}
// jumps don't shift labels left at all, but also aren't pc-relative?
int InstHandler::parseImmediateJump(std::string arg, uint32_t& imm) {
static std::regex immRegex("^-?\\d+$");
static std::regex labelRegex("^[[:alnum:]]{1,12}$");
if (std::regex_match(arg, immRegex)) {
try {
imm = std::stol(arg);
return 1;
} catch (...) {
return 0;
}
} else if (std::regex_match(arg, labelRegex)) {
imm = findLabel(arg);
if (imm == (uint32_t)-1) return 0;
return 1;
} else {
return 0;
}
}
// branches to labels are pc-relative i guess
int InstHandler::parseImmediateBranch(std::string arg, uint32_t& imm, uint32_t icount) {
static std::regex immRegex("^-?\\d+$");
static std::regex labelRegex("^[[:alnum:]]{1,12}$");
if (std::regex_match(arg, immRegex)) {
try {
imm = std::stol(arg);
return 1;
} catch (...) {
return 0;
}
} else if (std::regex_match(arg, labelRegex)) {
imm = findLabel(arg);
if (imm == (uint32_t)-1) return 0;
imm -= icount + 1;
return 1;
} else {
return 0;
}
}
// looks up label
uint32_t InstHandler::findLabel(std::string l) {
for (uint64_t i = 0; i < labels->size(); i++) {
if (labels->at(i).name == l) return labels->at(i).loc;
}
return -1;
}

60
InstHandler.h Normal file
View file

@ -0,0 +1,60 @@
#include <string>
#include <vector>
#include <map>
#include <regex>
#ifndef INSTHANDLER_H
#define INSTHANDLER_H
typedef struct {
uint64_t lineNumber;
std::vector<std::string> tokens;
uint32_t loc;
bool usesLabel;
uint32_t hex;
//xstd::string comment;
} inst_t;
typedef struct {
std::string name;
uint32_t loc;
} label_t;
class InstHandler {
private:
typedef std::function<int(inst_t*)> lf_t;
std::smatch matches;
std::map<std::string, lf_t> funcMap;
std::vector<label_t>* labels;
int handleLD(inst_t* inst);
int handleLfD(inst_t* inst);
int handleSD(inst_t* inst);
int handleSfD(inst_t* inst);
int handleDADDI(inst_t* inst);
int handleDADDIU(inst_t* inst);
int handleBEQ(inst_t* inst);
int handleBNE(inst_t* inst);
int handleDADD(inst_t* inst);
int handleDSUB(inst_t* inst);
int handleADDfD(inst_t* inst);
int handleSUBfD(inst_t* inst);
int handleMULfD(inst_t* inst);
int handleDIVfD(inst_t* inst);
int handleJ(inst_t* inst);
int handleHALT(inst_t* inst);
int handleNOP(inst_t* inst);
int handleDUMP(inst_t* inst);
int parseRegister(std::string arg, uint32_t& reg);
int parseFPRegister(std::string arg, uint32_t& reg);
int parseImmediate(std::string arg, uint32_t& imm, bool isDump);
int parseImmediateJump(std::string arg, uint32_t& imm);
int parseImmediateBranch(std::string arg, uint32_t& imm, uint32_t icount);
public:
InstHandler(std::vector<label_t>* labels_);
int handle(inst_t* inst);
uint32_t findLabel(std::string l);
};
#endif

View file

@ -1,2 +1,12 @@
# mips64-assembler # CS5483 Computer Architecture Project 1: MIPS Assembler
## Ryan Densmore
## Compiling and Running
1. To compile my assembler, simply run the Makefile via `make`.
2. Run the assembler with `./ma <file.asm>`.
## Tests
While the provided `run.pl` seems a little janky to use, I currently pass 29/29 tests. Running the `run.pl` program currently in my directory will run every test, and clean up the files for the tests that pass.
## Todos
Had I more time, I would actually try to implement outputting comments. There appears to be some sort of memory issue that only occurs when I try to store another string (containing the comment) alongside every instruction. There are a few other ways I could do it, but attaching the comment to its relevant instruction in the data structure makes the most sense.

53
ma.C Normal file
View file

@ -0,0 +1,53 @@
/* CS5483 Project 1: MIPS64 Assembler
* Ryan Densmore
*/
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "Assembler.h"
std::string parseArgs(int argc, char ** argv);
void usage();
int main(int argc, char ** argv) {
std::string name = parseArgs(argc, argv);
std::fstream asmFile(argv[1], std::fstream::in);
if (!asmFile.is_open()) usage();
std::string line;
std::vector<std::string> lines;
while (asmFile.good()) {
std::getline(asmFile, line);
lines.push_back(std::move(line));
}
Assembler assembler(std::move(lines));
assembler.parse();
// assemble
std::vector<uint32_t> hexCode = assembler.assemble();
std::fstream hexFile(name + ".hex", std::fstream::out);
char hex[9];
for (uint32_t i = 0; i < hexCode.size(); i++) {
sprintf(hex, "%.8X", hexCode[i]);
hexFile << hex << "\n";
}
}
std::string parseArgs(int argc, char ** argv) {
// make sure only one arg
if (argc != 2) usage();
// make sure it's a .asm
std::string fileArg(argv[1]);
if (fileArg.length() < 4 || fileArg.substr(fileArg.length()-4, 4) != ".asm") usage();
return fileArg.substr(0, fileArg.length()-4);
}
void usage() {
printf("usage: ma <file>.asm\n");
exit(0);
}

18
makefile Normal file
View file

@ -0,0 +1,18 @@
CC = g++
CFLAGS = -c -std=c++11 -O2 -Wall -Werror
OBJS = InstHandler.o Assembler.o ma.o
ma: $(OBJS)
$(CC) $(OBJS) -o ma
ma.o: ma.C Assembler.h
$(CC) $(CFLAGS) ma.C -o ma.o
Assembler.o: Assembler.C Assembler.h InstHandler.h
$(CC) $(CFLAGS) Assembler.C -o Assembler.o
InstHandler.o: InstHandler.C InstHandler.h
$(CC) $(CFLAGS) InstHandler.C -o InstHandler.o
clean:
rm *.o ma *.asm *.problems tmp1 tmp2