yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
symbol_provider.cc
Go to the documentation of this file.
2
3#include <algorithm>
4#include <cctype>
5#include <fstream>
6#include <regex>
7#include <sstream>
8
9#ifndef __EMSCRIPTEN__
10#include <filesystem>
11#endif
12
13#include "absl/strings/str_format.h"
14#include "absl/strings/str_split.h"
15#include "absl/strings/strip.h"
16#include "absl/strings/match.h"
17
18namespace yaze {
19namespace emu {
20namespace debug {
21
22namespace {
23
24// Helper to read entire file into string
25absl::StatusOr<std::string> ReadFileContent(const std::string& path) {
26 std::ifstream file(path);
27 if (!file.is_open()) {
28 return absl::NotFoundError(
29 absl::StrFormat("Failed to open file: %s", path));
30 }
31 std::stringstream buffer;
32 buffer << file.rdbuf();
33 return buffer.str();
34}
35
36// Parse 24-bit hex address from string (e.g., "008034" or "$008034")
37std::optional<uint32_t> ParseAddress(const std::string& str) {
38 std::string clean = str;
39 // Remove $ prefix if present
40 if (!clean.empty() && clean[0] == '$') {
41 clean = clean.substr(1);
42 }
43 // Remove 0x prefix if present
44 if (clean.size() >= 2 && clean[0] == '0' &&
45 (clean[1] == 'x' || clean[1] == 'X')) {
46 clean = clean.substr(2);
47 }
48 // Remove any trailing colon
49 if (!clean.empty() && clean.back() == ':') {
50 clean.pop_back();
51 }
52
53 if (clean.empty() || clean.size() > 6) return std::nullopt;
54
55 try {
56 size_t pos;
57 uint32_t addr = std::stoul(clean, &pos, 16);
58 if (pos != clean.size()) return std::nullopt;
59 return addr;
60 } catch (...) {
61 return std::nullopt;
62 }
63}
64
65// Check if a string is a valid label name
66bool IsValidLabelName(const std::string& name) {
67 if (name.empty()) return false;
68 // First char must be alpha, underscore, or dot (for local labels)
69 char first = name[0];
70 if (!std::isalpha(first) && first != '_' && first != '.') return false;
71 // Rest must be alphanumeric, underscore, or dot
72 for (size_t i = 1; i < name.size(); ++i) {
73 char c = name[i];
74 if (!std::isalnum(c) && c != '_' && c != '.') return false;
75 }
76 return true;
77}
78
79// Simple wildcard matching (supports * only)
80bool WildcardMatch(const std::string& pattern, const std::string& str) {
81 size_t p = 0, s = 0;
82 size_t starPos = std::string::npos;
83 size_t matchPos = 0;
84
85 while (s < str.size()) {
86 if (p < pattern.size() && (pattern[p] == str[s] || pattern[p] == '?')) {
87 ++p;
88 ++s;
89 } else if (p < pattern.size() && pattern[p] == '*') {
90 starPos = p++;
91 matchPos = s;
92 } else if (starPos != std::string::npos) {
93 p = starPos + 1;
94 s = ++matchPos;
95 } else {
96 return false;
97 }
98 }
99
100 while (p < pattern.size() && pattern[p] == '*') ++p;
101 return p == pattern.size();
102}
103
104// Simple path utilities that work on all platforms
105std::string GetFilename(const std::string& path) {
106 size_t pos = path.find_last_of("/\\");
107 if (pos == std::string::npos) return path;
108 return path.substr(pos + 1);
109}
110
111std::string GetExtension(const std::string& path) {
112 std::string filename = GetFilename(path);
113 size_t pos = filename.find_last_of('.');
114 if (pos == std::string::npos) return "";
115 return filename.substr(pos);
116}
117
118} // namespace
119
120absl::Status SymbolProvider::LoadAsarAsmFile(const std::string& path) {
121 auto content_or = ReadFileContent(path);
122 if (!content_or.ok()) {
123 return content_or.status();
124 }
125
126 return ParseAsarAsmContent(*content_or, GetFilename(path));
127}
128
130 const std::string& directory_path) {
131#ifdef __EMSCRIPTEN__
132 // Directory iteration not supported in WASM builds
133 // Use LoadAsarAsmFile with explicit file paths instead
134 (void)directory_path;
135 return absl::UnimplementedError(
136 "Directory loading not supported in browser builds. "
137 "Please load individual symbol files.");
138#else
139 std::filesystem::path dir(directory_path);
140 if (!std::filesystem::exists(dir)) {
141 return absl::NotFoundError(
142 absl::StrFormat("Directory not found: %s", directory_path));
143 }
144
145 int files_loaded = 0;
146 for (const auto& entry : std::filesystem::directory_iterator(dir)) {
147 if (entry.is_regular_file()) {
148 auto ext = entry.path().extension().string();
149 if (ext == ".asm" || ext == ".s") {
150 auto status = LoadAsarAsmFile(entry.path().string());
151 if (status.ok()) {
152 ++files_loaded;
153 }
154 }
155 }
156 }
157
158 if (files_loaded == 0) {
159 return absl::NotFoundError("No ASM files found in directory");
160 }
161
162 return absl::OkStatus();
163#endif
164}
165
166absl::Status SymbolProvider::LoadSymbolFile(const std::string& path,
167 SymbolFormat format) {
168 auto content_or = ReadFileContent(path);
169 if (!content_or.ok()) {
170 return content_or.status();
171 }
172
173 const std::string& content = *content_or;
174 std::string ext = GetExtension(path);
175
176 // Auto-detect format if needed
177 if (format == SymbolFormat::kAuto) {
178 format = DetectFormat(content, ext);
179 }
180
181 switch (format) {
183 return ParseAsarAsmContent(content, GetFilename(path));
185 return ParseWlaDxSymFile(content);
187 return ParseMesenMlbFile(content);
190 return ParseBsnesSymFile(content);
191 default:
192 return absl::InvalidArgumentError("Unknown symbol format");
193 }
194}
195
197 symbols_by_address_.emplace(symbol.address, symbol);
198 symbols_by_name_[symbol.name] = symbol;
199}
200
201void SymbolProvider::AddAsarSymbols(const std::vector<Symbol>& symbols) {
202 for (const auto& sym : symbols) {
203 AddSymbol(sym);
204 }
205}
206
208 symbols_by_address_.clear();
209 symbols_by_name_.clear();
210}
211
212std::string SymbolProvider::GetSymbolName(uint32_t address) const {
213 auto it = symbols_by_address_.find(address);
214 if (it != symbols_by_address_.end()) {
215 return it->second.name;
216 }
217 return "";
218}
219
220std::optional<Symbol> SymbolProvider::GetSymbol(uint32_t address) const {
221 auto it = symbols_by_address_.find(address);
222 if (it != symbols_by_address_.end()) {
223 return it->second;
224 }
225 return std::nullopt;
226}
227
229 uint32_t address) const {
230 std::vector<Symbol> result;
231 auto range = symbols_by_address_.equal_range(address);
232 for (auto it = range.first; it != range.second; ++it) {
233 result.push_back(it->second);
234 }
235 return result;
236}
237
238std::optional<Symbol> SymbolProvider::FindSymbol(
239 const std::string& name) const {
240 auto it = symbols_by_name_.find(name);
241 if (it != symbols_by_name_.end()) {
242 return it->second;
243 }
244 return std::nullopt;
245}
246
248 const std::string& pattern) const {
249 std::vector<Symbol> result;
250 for (const auto& [name, sym] : symbols_by_name_) {
251 if (WildcardMatch(pattern, name)) {
252 result.push_back(sym);
253 }
254 }
255 return result;
256}
257
258std::vector<Symbol> SymbolProvider::GetSymbolsInRange(uint32_t start,
259 uint32_t end) const {
260 std::vector<Symbol> result;
261 auto it_start = symbols_by_address_.lower_bound(start);
262 auto it_end = symbols_by_address_.upper_bound(end);
263 for (auto it = it_start; it != it_end; ++it) {
264 result.push_back(it->second);
265 }
266 return result;
267}
268
270 uint32_t address) const {
271 if (symbols_by_address_.empty()) return std::nullopt;
272
273 // Find first symbol > address
274 auto it = symbols_by_address_.upper_bound(address);
275
276 if (it == symbols_by_address_.begin()) {
277 // All symbols are > address, no symbol at or before
278 return std::nullopt;
279 }
280
281 // Go back to the symbol at or before address
282 --it;
283 return it->second;
284}
285
286std::string SymbolProvider::FormatAddress(uint32_t address,
287 uint32_t max_offset) const {
288 // Check for exact match first
289 auto exact = GetSymbol(address);
290 if (exact) {
291 return exact->name;
292 }
293
294 // Check for nearest symbol with offset
295 auto nearest = GetNearestSymbol(address);
296 if (nearest) {
297 uint32_t offset = address - nearest->address;
298 if (offset <= max_offset) {
299 return absl::StrFormat("%s+$%X", nearest->name, offset);
300 }
301 }
302
303 // No symbol found, just format as hex
304 return absl::StrFormat("$%06X", address);
305}
306
307std::function<std::string(uint32_t)> SymbolProvider::CreateResolver() const {
308 return [this](uint32_t address) -> std::string {
309 return GetSymbolName(address);
310 };
311}
312
313absl::Status SymbolProvider::ParseAsarAsmContent(const std::string& content,
314 const std::string& filename) {
315 std::istringstream stream(content);
316 std::string line;
317 int line_number = 0;
318
319 std::string current_label; // Current global label (for local label scope)
320 uint32_t last_address = 0;
321
322 // Regex patterns for usdasm format
323 // Label definition: word followed by colon at start of line
324 std::regex label_regex(R"(^([A-Za-z_][A-Za-z0-9_]*):)");
325 // Local label: dot followed by word and colon
326 std::regex local_label_regex(R"(^(\.[A-Za-z_][A-Za-z0-9_]*))");
327 // Address line: #_XXXXXX: instruction
328 std::regex address_regex(R"(^#_([0-9A-Fa-f]{6}):)");
329
330 bool pending_label = false;
331 std::string pending_label_name;
332 bool pending_is_local = false;
333
334 while (std::getline(stream, line)) {
335 ++line_number;
336
337 // Skip empty lines and comment-only lines
338 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
339 if (trimmed.empty() || trimmed[0] == ';') continue;
340
341 std::smatch match;
342
343 // Check for address line
344 if (std::regex_search(line, match, address_regex)) {
345 auto addr = ParseAddress(match[1].str());
346 if (addr) {
347 last_address = *addr;
348
349 // If we have a pending label, associate it with this address
350 if (pending_label) {
351 Symbol sym;
352 sym.name = pending_label_name;
353 sym.address = *addr;
354 sym.file = filename;
355 sym.line = line_number;
356 sym.is_local = pending_is_local;
357
358 AddSymbol(sym);
359 pending_label = false;
360 }
361 }
362 }
363
364 // Check for global label (at start of line, not indented)
365 if (line[0] != ' ' && line[0] != '\t' && line[0] != '#') {
366 if (std::regex_search(line, match, label_regex)) {
367 current_label = match[1].str();
368 pending_label = true;
369 pending_label_name = current_label;
370 pending_is_local = false;
371 }
372 }
373
374 // Check for local label
375 if (std::regex_search(trimmed, match, local_label_regex)) {
376 std::string local_name = match[1].str();
377 // Create fully qualified name: GlobalLabel.local_name
378 std::string full_name = current_label.empty()
379 ? local_name
380 : current_label + local_name;
381 pending_label = true;
382 pending_label_name = full_name;
383 pending_is_local = true;
384 }
385 }
386
387 return absl::OkStatus();
388}
389
390absl::Status SymbolProvider::ParseWlaDxSymFile(const std::string& content) {
391 // WLA-DX format:
392 // [labels]
393 // 00:8000 Reset
394 // 00:8034 MainGameLoop
395
396 std::istringstream stream(content);
397 std::string line;
398 bool in_labels_section = false;
399
400 std::regex label_regex(R"(^([0-9A-Fa-f]{2}):([0-9A-Fa-f]{4})\s+(\S+))");
401
402 while (std::getline(stream, line)) {
403 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
404
405 if (trimmed == "[labels]") {
406 in_labels_section = true;
407 continue;
408 }
409 if (trimmed.empty() || trimmed[0] == '[') {
410 if (trimmed[0] == '[') in_labels_section = false;
411 continue;
412 }
413
414 if (!in_labels_section) continue;
415
416 std::smatch match;
417 if (std::regex_search(trimmed, match, label_regex)) {
418 uint32_t bank = std::stoul(match[1].str(), nullptr, 16);
419 uint32_t offset = std::stoul(match[2].str(), nullptr, 16);
420 uint32_t address = (bank << 16) | offset;
421 std::string name = match[3].str();
422
423 Symbol sym(name, address);
424 AddSymbol(sym);
425 }
426 }
427
428 return absl::OkStatus();
429}
430
431absl::Status SymbolProvider::ParseMesenMlbFile(const std::string& content) {
432 // Mesen .mlb format:
433 // PRG:address:name
434 // or just
435 // address:name
436
437 std::istringstream stream(content);
438 std::string line;
439
440 std::regex label_regex(R"(^(?:PRG:)?([0-9A-Fa-f]+):(\S+))");
441
442 while (std::getline(stream, line)) {
443 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
444 if (trimmed.empty() || trimmed[0] == ';') continue;
445
446 std::smatch match;
447 if (std::regex_search(trimmed, match, label_regex)) {
448 auto addr = ParseAddress(match[1].str());
449 if (addr) {
450 Symbol sym(match[2].str(), *addr);
451 AddSymbol(sym);
452 }
453 }
454 }
455
456 return absl::OkStatus();
457}
458
459absl::Status SymbolProvider::ParseBsnesSymFile(const std::string& content) {
460 // bsnes/No$snes format:
461 // 008000 Reset
462 // 008034 MainGameLoop
463
464 std::istringstream stream(content);
465 std::string line;
466
467 std::regex label_regex(R"(^([0-9A-Fa-f]{6})\s+(\S+))");
468
469 while (std::getline(stream, line)) {
470 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
471 if (trimmed.empty() || trimmed[0] == ';' || trimmed[0] == '#') continue;
472
473 std::smatch match;
474 if (std::regex_search(trimmed, match, label_regex)) {
475 auto addr = ParseAddress(match[1].str());
476 if (addr) {
477 Symbol sym(match[2].str(), *addr);
478 AddSymbol(sym);
479 }
480 }
481 }
482
483 return absl::OkStatus();
484}
485
487 const std::string& extension) const {
488 // Check extension first
489 if (extension == ".asm" || extension == ".s") {
490 return SymbolFormat::kAsar;
491 }
492 if (extension == ".mlb") {
494 }
495
496 // Check content for format hints
497 if (content.find("[labels]") != std::string::npos) {
499 }
500 if (content.find("PRG:") != std::string::npos) {
502 }
503 if (content.find("#_") != std::string::npos) {
504 return SymbolFormat::kAsar;
505 }
506
507 // Default to bsnes format (most generic)
509}
510
511} // namespace debug
512} // namespace emu
513} // namespace yaze
std::function< std::string(uint32_t)> CreateResolver() const
Create a symbol resolver function for the disassembler.
std::vector< Symbol > GetSymbolsInRange(uint32_t start, uint32_t end) const
Get all symbols in an address range.
std::string FormatAddress(uint32_t address, uint32_t max_offset=0x100) const
Format an address with symbol info.
void AddSymbol(const Symbol &symbol)
Add a single symbol manually.
std::map< std::string, Symbol > symbols_by_name_
std::vector< Symbol > GetSymbolsAtAddress(uint32_t address) const
Get all symbols at an address (there may be multiple)
void AddAsarSymbols(const std::vector< Symbol > &symbols)
Add symbols from Asar patch results.
absl::Status ParseBsnesSymFile(const std::string &content)
absl::Status LoadAsarAsmDirectory(const std::string &directory_path)
Load symbols from a directory of ASM files.
absl::Status ParseWlaDxSymFile(const std::string &content)
std::multimap< uint32_t, Symbol > symbols_by_address_
absl::Status LoadAsarAsmFile(const std::string &path)
Load symbols from an Asar-style ASM file (usdasm format)
SymbolFormat DetectFormat(const std::string &content, const std::string &extension) const
absl::Status LoadSymbolFile(const std::string &path, SymbolFormat format=SymbolFormat::kAuto)
Load symbols from a .sym file (various formats)
std::string GetSymbolName(uint32_t address) const
Get symbol name for an address.
void Clear()
Clear all loaded symbols.
std::optional< Symbol > GetNearestSymbol(uint32_t address) const
Get nearest symbol at or before an address.
absl::Status ParseAsarAsmContent(const std::string &content, const std::string &filename)
absl::Status ParseMesenMlbFile(const std::string &content)
std::optional< Symbol > GetSymbol(uint32_t address) const
Get full symbol info for an address.
std::optional< Symbol > FindSymbol(const std::string &name) const
Find symbol by name.
std::vector< Symbol > FindSymbolsMatching(const std::string &pattern) const
Find symbols matching a pattern (supports wildcards)
bool WildcardMatch(const std::string &pattern, const std::string &str)
absl::StatusOr< std::string > ReadFileContent(const std::string &path)
std::optional< uint32_t > ParseAddress(const std::string &str)
SymbolFormat
Supported symbol file formats.
Information about a symbol (label, constant, or address)