yaze 0.2.0
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
asm_parser.h
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <regex>
5#include <sstream>
6#include <stdexcept>
7#include <string>
8#include <unordered_map>
9#include <vector>
10
11#include "absl/strings/str_cat.h"
12#include "absl/strings/str_split.h"
14
15namespace yaze {
16namespace app {
17namespace emu {
18
49
50// Key structure for mnemonic and addressing mode
52 std::string mnemonic;
54
55 bool operator==(const MnemonicMode& other) const {
56 return mnemonic == other.mnemonic && mode == other.mode;
57 }
58};
59
60// Custom hash function for the MnemonicMode structure
62 std::size_t operator()(const MnemonicMode& k) const {
63 return std::hash<std::string>()(k.mnemonic) ^
64 (std::hash<int>()(static_cast<int>(k.mode)) << 1);
65 }
66};
67
68class AsmParser {
69 public:
70 std::vector<uint8_t> Parse(const std::string& instruction) {
72 auto tokens = Tokenize(instruction);
73 if (tokens.size() < 1) {
74 throw std::runtime_error("Invalid instruction format: " + instruction);
75 }
76
77 size_t index = 0;
78 std::vector<uint8_t> bytes;
79 while (index < tokens.size()) {
80 // For each "line" worth of tokens, we need to extract the
81 // mnemonic, optional addressing mode qualifier, and operand.
82 // The operand can come in a variety of formats:
83 // - Immediate: #$01
84 // - Immediate Word: #$1234
85 // - Absolute: $1234
86 // - Absolute Long: $123456
87 // This parser is not exhaustive and only supports a subset of
88 // the possible addressing modes and operands.
89 const std::string& mnemonic = tokens[index];
90 index++;
91
92 // Check if addressing mode qualifier is present
93 // Either .b, .w, .l, or nothing, which could mean
94 // it was omitted or the operand is implied
95 std::string qualifier = "";
96 std::string potential_mode = tokens[index];
97 if (absl::StrContains(potential_mode, ".")) {
98 qualifier = potential_mode;
99 index++;
100 }
101
102 // Now we check for either the immediate mode
103 // symbol # or the address symbol $ to determine
104 // the next step
105 std::string operand = tokens[index];
106 if (operand == "#") {
107 index++;
108 // Check if the next token is a # character, in which case it is
109 // a hexadecimal value that needs to be converted to a byte
110 if (tokens[index] == "#") {
111 index++;
112 operand = tokens[index];
113 index++;
114 }
115 } else if (operand == "$") {
116 index++;
117 operand = tokens[index];
118 index++;
119 }
120
121 AddressingMode mode = DetermineMode(tokens);
122
123 MnemonicMode key{mnemonic, mode};
124 auto opcode_entry = mnemonic_to_opcode_.find(key);
125 if (opcode_entry == mnemonic_to_opcode_.end()) {
126 throw std::runtime_error("Opcode not found for mnemonic and mode: " +
127 mnemonic);
128 }
129
130 bytes.push_back(opcode_entry->second);
131 AppendOperandBytes(bytes, operand, mode);
132 }
133
134 return bytes;
135 }
136
137 // Example: ADC.b #$01
138 // Returns: ["ADC", ".b", "#", "$", "01"]
139 std::vector<std::string> Tokenize(const std::string& instruction) {
140 std::vector<std::string> tokens;
141 std::regex tokenRegex{R"((\w+|\.\w+|[\#$]|[0-9a-fA-F]+|[a-zA-Z]+))"};
142 auto words_begin = std::sregex_iterator(instruction.begin(),
143 instruction.end(), tokenRegex);
144 auto words_end = std::sregex_iterator();
145
146 for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
147 std::smatch match = *i;
148 tokens.push_back(match.str());
149 }
150 return tokens;
151 }
152
153 private:
154 void AppendOperandBytes(std::vector<uint8_t>& bytes,
155 const std::string& operand,
156 const AddressingMode& addressing_mode) {
157 // Handle different addressing modes
158 switch (addressing_mode) {
160 bytes.push_back(static_cast<uint8_t>(std::stoi(operand, nullptr, 16)));
161 break;
162 }
164 uint16_t word_operand =
165 static_cast<uint16_t>(std::stoi(operand, nullptr, 16));
166 bytes.push_back(static_cast<uint8_t>(word_operand & 0xFF));
167 bytes.push_back(static_cast<uint8_t>((word_operand >> 8) & 0xFF));
168 break;
169 }
171 uint32_t long_operand =
172 static_cast<uint32_t>(std::stoul(operand, nullptr, 16));
173 bytes.push_back(static_cast<uint8_t>(long_operand & 0xFF));
174 bytes.push_back(static_cast<uint8_t>((long_operand >> 8) & 0xFF));
175 bytes.push_back(static_cast<uint8_t>((long_operand >> 16) & 0xFF));
176 break;
177 }
179 break;
180 }
181 default:
182 // Unknown, append it anyway
183 bytes.push_back(static_cast<uint8_t>(std::stoi(operand, nullptr, 16)));
184 }
185 }
186
187 AddressingMode DetermineMode(const std::vector<std::string>& tokens) {
188 const std::string& addressingMode = tokens[1];
189 if (addressingMode == ".b") {
191 } else if (addressingMode == ".w") {
193 } else if (addressingMode == ".l") {
195 } else {
197 }
198 }
199
200 bool TryParseByte(const std::string& str, uint8_t& value) {
201 try {
202 value = std::stoi(str, nullptr, 16);
203 return true;
204 } catch (const std::invalid_argument& e) {
205 return false;
206 }
207 }
208
209 bool TryParseHex(const std::string& str, uint32_t& value) {
210 try {
211 value = std::stoul(str, nullptr, 16);
212 return true;
213 } catch (const std::invalid_argument& e) {
214 return false;
215 }
216 }
217
227 0x71;
239 0x71;
251 0x31;
261 0x90;
263 0xB0;
265 0xF0;
270 0x30;
272 0xD0;
274 0x10;
276 0x80;
279 0x82;
281 0x50;
283 0x70;
296 0xD1;
318 0x51;
336 0xB1;
361 0x11;
365 0x62;
401 0xF1;
413 0x91;
440 }
441
442 std::unordered_map<MnemonicMode, uint8_t, MnemonicModeHash>
444};
445
446} // namespace emu
447} // namespace app
448} // namespace yaze
void AppendOperandBytes(std::vector< uint8_t > &bytes, const std::string &operand, const AddressingMode &addressing_mode)
Definition asm_parser.h:154
std::unordered_map< MnemonicMode, uint8_t, MnemonicModeHash > mnemonic_to_opcode_
Definition asm_parser.h:443
bool TryParseHex(const std::string &str, uint32_t &value)
Definition asm_parser.h:209
AddressingMode DetermineMode(const std::vector< std::string > &tokens)
Definition asm_parser.h:187
bool TryParseByte(const std::string &str, uint8_t &value)
Definition asm_parser.h:200
std::vector< std::string > Tokenize(const std::string &instruction)
Definition asm_parser.h:139
std::vector< uint8_t > Parse(const std::string &instruction)
Definition asm_parser.h:70
Definition common.cc:21
std::size_t operator()(const MnemonicMode &k) const
Definition asm_parser.h:62
bool operator==(const MnemonicMode &other) const
Definition asm_parser.h:55