yaze 0.2.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
asm_parser.h
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <regex>
5#include <stdexcept>
6#include <string>
7#include <unordered_map>
8#include <vector>
9
10#include "absl/strings/match.h"
11
12namespace yaze {
13namespace emu {
14
45
46// Key structure for mnemonic and addressing mode
48 std::string mnemonic;
50
51 bool operator==(const MnemonicMode& other) const {
52 return mnemonic == other.mnemonic && mode == other.mode;
53 }
54};
55
56// Custom hash function for the MnemonicMode structure
58 std::size_t operator()(const MnemonicMode& k) const {
59 return std::hash<std::string>()(k.mnemonic) ^
60 (std::hash<int>()(static_cast<int>(k.mode)) << 1);
61 }
62};
63
64class AsmParser {
65 public:
66 std::vector<uint8_t> Parse(const std::string& instruction) {
68 auto tokens = Tokenize(instruction);
69 if (tokens.size() < 1) {
70 throw std::runtime_error("Invalid instruction format: " + instruction);
71 }
72
73 size_t index = 0;
74 std::vector<uint8_t> bytes;
75 while (index < tokens.size()) {
76 // For each "line" worth of tokens, we need to extract the
77 // mnemonic, optional addressing mode qualifier, and operand.
78 // The operand can come in a variety of formats:
79 // - Immediate: #$01
80 // - Immediate Word: #$1234
81 // - Absolute: $1234
82 // - Absolute Long: $123456
83 // This parser is not exhaustive and only supports a subset of
84 // the possible addressing modes and operands.
85 const std::string& mnemonic = tokens[index];
86 index++;
87
88 // Check if addressing mode qualifier is present
89 // Either .b, .w, .l, or nothing, which could mean
90 // it was omitted or the operand is implied
91 std::string qualifier = "";
92 std::string potential_mode = tokens[index];
93 if (absl::StrContains(potential_mode, ".")) {
94 qualifier = potential_mode;
95 index++;
96 }
97
98 // Now we check for either the immediate mode
99 // symbol # or the address symbol $ to determine
100 // the next step
101 std::string operand = tokens[index];
102 if (operand == "#") {
103 index++;
104 // Check if the next token is a # character, in which case it is
105 // a hexadecimal value that needs to be converted to a byte
106 if (tokens[index] == "#") {
107 index++;
108 operand = tokens[index];
109 index++;
110 }
111 } else if (operand == "$") {
112 index++;
113 operand = tokens[index];
114 index++;
115 }
116
117 AddressingMode mode = DetermineMode(tokens);
118
119 MnemonicMode key{mnemonic, mode};
120 auto opcode_entry = mnemonic_to_opcode_.find(key);
121 if (opcode_entry == mnemonic_to_opcode_.end()) {
122 throw std::runtime_error("Opcode not found for mnemonic and mode: " +
123 mnemonic);
124 }
125
126 bytes.push_back(opcode_entry->second);
127 AppendOperandBytes(bytes, operand, mode);
128 }
129
130 return bytes;
131 }
132
133 // Example: ADC.b #$01
134 // Returns: ["ADC", ".b", "#", "$", "01"]
135 std::vector<std::string> Tokenize(const std::string& instruction) {
136 std::vector<std::string> tokens;
137 std::regex tokenRegex{R"((\w+|\.\w+|[\#$]|[0-9a-fA-F]+|[a-zA-Z]+))"};
138 auto words_begin = std::sregex_iterator(instruction.begin(),
139 instruction.end(), tokenRegex);
140 auto words_end = std::sregex_iterator();
141
142 for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
143 std::smatch match = *i;
144 tokens.push_back(match.str());
145 }
146 return tokens;
147 }
148
149 private:
150 void AppendOperandBytes(std::vector<uint8_t>& bytes,
151 const std::string& operand,
152 const AddressingMode& addressing_mode) {
153 // Handle different addressing modes
154 switch (addressing_mode) {
156 bytes.push_back(static_cast<uint8_t>(std::stoi(operand, nullptr, 16)));
157 break;
158 }
160 uint16_t word_operand =
161 static_cast<uint16_t>(std::stoi(operand, nullptr, 16));
162 bytes.push_back(static_cast<uint8_t>(word_operand & 0xFF));
163 bytes.push_back(static_cast<uint8_t>((word_operand >> 8) & 0xFF));
164 break;
165 }
167 uint32_t long_operand =
168 static_cast<uint32_t>(std::stoul(operand, nullptr, 16));
169 bytes.push_back(static_cast<uint8_t>(long_operand & 0xFF));
170 bytes.push_back(static_cast<uint8_t>((long_operand >> 8) & 0xFF));
171 bytes.push_back(static_cast<uint8_t>((long_operand >> 16) & 0xFF));
172 break;
173 }
175 break;
176 }
177 default:
178 // Unknown, append it anyway
179 bytes.push_back(static_cast<uint8_t>(std::stoi(operand, nullptr, 16)));
180 }
181 }
182
183 AddressingMode DetermineMode(const std::vector<std::string>& tokens) {
184 const std::string& addressingMode = tokens[1];
185 if (addressingMode == ".b") {
187 } else if (addressingMode == ".w") {
189 } else if (addressingMode == ".l") {
191 } else {
193 }
194 }
195
196 bool TryParseByte(const std::string& str, uint8_t& value) {
197 try {
198 value = std::stoi(str, nullptr, 16);
199 return true;
200 } catch (const std::invalid_argument& e) {
201 return false;
202 }
203 }
204
205 bool TryParseHex(const std::string& str, uint32_t& value) {
206 try {
207 value = std::stoul(str, nullptr, 16);
208 return true;
209 } catch (const std::invalid_argument& e) {
210 return false;
211 }
212 }
213
223 0x71;
235 0x71;
247 0x31;
257 0x90;
259 0xB0;
261 0xF0;
266 0x30;
268 0xD0;
270 0x10;
272 0x80;
275 0x82;
277 0x50;
279 0x70;
292 0xD1;
314 0x51;
332 0xB1;
357 0x11;
361 0x62;
397 0xF1;
409 0x91;
436 }
437
438 std::unordered_map<MnemonicMode, uint8_t, MnemonicModeHash>
440};
441
442} // namespace emu
443} // namespace yaze
std::vector< uint8_t > Parse(const std::string &instruction)
Definition asm_parser.h:66
bool TryParseHex(const std::string &str, uint32_t &value)
Definition asm_parser.h:205
std::unordered_map< MnemonicMode, uint8_t, MnemonicModeHash > mnemonic_to_opcode_
Definition asm_parser.h:439
std::vector< std::string > Tokenize(const std::string &instruction)
Definition asm_parser.h:135
void CreateInternalOpcodeMap()
Definition asm_parser.h:214
bool TryParseByte(const std::string &str, uint8_t &value)
Definition asm_parser.h:196
AddressingMode DetermineMode(const std::vector< std::string > &tokens)
Definition asm_parser.h:183
void AppendOperandBytes(std::vector< uint8_t > &bytes, const std::string &operand, const AddressingMode &addressing_mode)
Definition asm_parser.h:150
SNES Emulation and debugging tools.
Definition apu.cc:13
Main namespace for the application.
Definition controller.cc:18
std::size_t operator()(const MnemonicMode &k) const
Definition asm_parser.h:58
bool operator==(const MnemonicMode &other) const
Definition asm_parser.h:51
AddressingMode mode
Definition asm_parser.h:49