yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
symbol_provider.cc
Go to the documentation of this file.
2
3#include <algorithm>
4#include <cctype>
5#include <fstream>
6#include <regex>
7#include <sstream>
8
9#ifndef __EMSCRIPTEN__
10#include <filesystem>
11#endif
12
13#include "nlohmann/json.hpp"
14#include "absl/strings/match.h"
15#include "absl/strings/str_format.h"
16#include "absl/strings/str_split.h"
17#include "absl/strings/strip.h"
18
19namespace yaze {
20namespace emu {
21namespace debug {
22
23namespace {
24
25// Helper to read entire file into string
26absl::StatusOr<std::string> ReadFileContent(const std::string& path) {
27 std::ifstream file(path);
28 if (!file.is_open()) {
29 return absl::NotFoundError(
30 absl::StrFormat("Failed to open file: %s", path));
31 }
32 std::stringstream buffer;
33 buffer << file.rdbuf();
34 return buffer.str();
35}
36
37// Parse 24-bit hex address from string (e.g., "008034" or "$008034")
38std::optional<uint32_t> ParseAddress(const std::string& str) {
39 std::string clean = str;
40 // Remove $ prefix if present
41 if (!clean.empty() && clean[0] == '$') {
42 clean = clean.substr(1);
43 }
44 // Remove 0x prefix if present
45 if (clean.size() >= 2 && clean[0] == '0' &&
46 (clean[1] == 'x' || clean[1] == 'X')) {
47 clean = clean.substr(2);
48 }
49 // Remove any trailing colon
50 if (!clean.empty() && clean.back() == ':') {
51 clean.pop_back();
52 }
53
54 if (clean.empty() || clean.size() > 6)
55 return std::nullopt;
56
57 try {
58 size_t pos;
59 uint32_t addr = std::stoul(clean, &pos, 16);
60 if (pos != clean.size())
61 return std::nullopt;
62 return addr;
63 } catch (...) {
64 return std::nullopt;
65 }
66}
67
68// Check if a string is a valid label name
69bool IsValidLabelName(const std::string& name) {
70 if (name.empty())
71 return false;
72 // First char must be alpha, underscore, or dot (for local labels)
73 char first = name[0];
74 if (!std::isalpha(first) && first != '_' && first != '.')
75 return false;
76 // Rest must be alphanumeric, underscore, or dot
77 for (size_t i = 1; i < name.size(); ++i) {
78 char c = name[i];
79 if (!std::isalnum(c) && c != '_' && c != '.')
80 return false;
81 }
82 return true;
83}
84
85// Simple wildcard matching (supports * only)
86bool WildcardMatch(const std::string& pattern, const std::string& str) {
87 size_t p = 0, s = 0;
88 size_t starPos = std::string::npos;
89 size_t matchPos = 0;
90
91 while (s < str.size()) {
92 if (p < pattern.size() && (pattern[p] == str[s] || pattern[p] == '?')) {
93 ++p;
94 ++s;
95 } else if (p < pattern.size() && pattern[p] == '*') {
96 starPos = p++;
97 matchPos = s;
98 } else if (starPos != std::string::npos) {
99 p = starPos + 1;
100 s = ++matchPos;
101 } else {
102 return false;
103 }
104 }
105
106 while (p < pattern.size() && pattern[p] == '*')
107 ++p;
108 return p == pattern.size();
109}
110
111// Simple path utilities that work on all platforms
112std::string GetFilename(const std::string& path) {
113 size_t pos = path.find_last_of("/\\");
114 if (pos == std::string::npos)
115 return path;
116 return path.substr(pos + 1);
117}
118
119std::string GetExtension(const std::string& path) {
120 std::string filename = GetFilename(path);
121 size_t pos = filename.find_last_of('.');
122 if (pos == std::string::npos)
123 return "";
124 return filename.substr(pos);
125}
126
127} // namespace
128
129absl::Status SymbolProvider::LoadAsarAsmFile(const std::string& path) {
130 auto content_or = ReadFileContent(path);
131 if (!content_or.ok()) {
132 return content_or.status();
133 }
134
135 return ParseAsarAsmContent(*content_or, GetFilename(path));
136}
137
139 const std::string& directory_path) {
140#ifdef __EMSCRIPTEN__
141 // Directory iteration not supported in WASM builds
142 // Use LoadAsarAsmFile with explicit file paths instead
143 (void)directory_path;
144 return absl::UnimplementedError(
145 "Directory loading not supported in browser builds. "
146 "Please load individual symbol files.");
147#else
148 std::filesystem::path dir(directory_path);
149 if (!std::filesystem::exists(dir)) {
150 return absl::NotFoundError(
151 absl::StrFormat("Directory not found: %s", directory_path));
152 }
153
154 int files_loaded = 0;
155 for (const auto& entry : std::filesystem::directory_iterator(dir)) {
156 if (entry.is_regular_file()) {
157 auto ext = entry.path().extension().string();
158 if (ext == ".asm" || ext == ".s") {
159 auto status = LoadAsarAsmFile(entry.path().string());
160 if (status.ok()) {
161 ++files_loaded;
162 }
163 }
164 }
165 }
166
167 if (files_loaded == 0) {
168 return absl::NotFoundError("No ASM files found in directory");
169 }
170
171 return absl::OkStatus();
172#endif
173}
174
175absl::Status SymbolProvider::LoadSymbolFile(const std::string& path,
176 SymbolFormat format) {
177 auto content_or = ReadFileContent(path);
178 if (!content_or.ok()) {
179 return content_or.status();
180 }
181
182 const std::string& content = *content_or;
183 std::string ext = GetExtension(path);
184
185 // Auto-detect format if needed
186 if (format == SymbolFormat::kAuto) {
187 format = DetectFormat(content, ext);
188 }
189
190 switch (format) {
192 return ParseAsarAsmContent(content, GetFilename(path));
194 return ParseWlaDxSymFile(content);
196 return ParseMesenMlbFile(content);
199 return ParseBsnesSymFile(content);
201 return ParseSourceMapJson(content);
202 default:
203 return absl::InvalidArgumentError("Unknown symbol format");
204 }
205}
206
208 symbols_by_address_.emplace(symbol.address, symbol);
209 symbols_by_name_[symbol.name] = symbol;
210}
211
212void SymbolProvider::AddAsarSymbols(const std::vector<Symbol>& symbols) {
213 for (const auto& sym : symbols) {
214 AddSymbol(sym);
215 }
216}
217
219 symbols_by_address_.clear();
220 symbols_by_name_.clear();
221}
222
223std::string SymbolProvider::GetSymbolName(uint32_t address) const {
224 auto it = symbols_by_address_.find(address);
225 if (it != symbols_by_address_.end()) {
226 return it->second.name;
227 }
228 return "";
229}
230
231std::optional<Symbol> SymbolProvider::GetSymbol(uint32_t address) const {
232 auto it = symbols_by_address_.find(address);
233 if (it != symbols_by_address_.end()) {
234 return it->second;
235 }
236 return std::nullopt;
237}
238
240 uint32_t address) const {
241 std::vector<Symbol> result;
242 auto range = symbols_by_address_.equal_range(address);
243 for (auto it = range.first; it != range.second; ++it) {
244 result.push_back(it->second);
245 }
246 return result;
247}
248
249std::optional<Symbol> SymbolProvider::FindSymbol(
250 const std::string& name) const {
251 auto it = symbols_by_name_.find(name);
252 if (it != symbols_by_name_.end()) {
253 return it->second;
254 }
255 return std::nullopt;
256}
257
259 const std::string& pattern) const {
260 std::vector<Symbol> result;
261 for (const auto& [name, sym] : symbols_by_name_) {
262 if (WildcardMatch(pattern, name)) {
263 result.push_back(sym);
264 }
265 }
266 return result;
267}
268
269std::vector<Symbol> SymbolProvider::GetSymbolsInRange(uint32_t start,
270 uint32_t end) const {
271 std::vector<Symbol> result;
272 auto it_start = symbols_by_address_.lower_bound(start);
273 auto it_end = symbols_by_address_.upper_bound(end);
274 for (auto it = it_start; it != it_end; ++it) {
275 result.push_back(it->second);
276 }
277 return result;
278}
279
280std::optional<Symbol> SymbolProvider::GetNearestSymbol(uint32_t address) const {
281 if (symbols_by_address_.empty())
282 return std::nullopt;
283
284 // Find first symbol > address
285 auto it = symbols_by_address_.upper_bound(address);
286
287 if (it == symbols_by_address_.begin()) {
288 // All symbols are > address, no symbol at or before
289 return std::nullopt;
290 }
291
292 // Go back to the symbol at or before address
293 --it;
294 return it->second;
295}
296
297std::string SymbolProvider::FormatAddress(uint32_t address,
298 uint32_t max_offset) const {
299 // Check for exact match first
300 auto exact = GetSymbol(address);
301 if (exact) {
302 return exact->name;
303 }
304
305 // Check for nearest symbol with offset
306 auto nearest = GetNearestSymbol(address);
307 if (nearest) {
308 uint32_t offset = address - nearest->address;
309 if (offset <= max_offset) {
310 return absl::StrFormat("%s+$%X", nearest->name, offset);
311 }
312 }
313
314 // No symbol found, just format as hex
315 return absl::StrFormat("$%06X", address);
316}
317
318std::string SymbolProvider::GetSourceLocation(uint32_t address) const {
319 auto exact = GetSymbol(address);
320 if (exact && !exact->file.empty()) {
321 return absl::StrFormat("%s:%d", exact->file, exact->line);
322 }
323
324 auto nearest = GetNearestSymbol(address);
325 if (nearest && !nearest->file.empty()) {
326 // We could add the offset too, but file:line is usually what IDEs want
327 return absl::StrFormat("%s:%d", nearest->file, nearest->line);
328 }
329
330 return "";
331}
332
333std::function<std::string(uint32_t)> SymbolProvider::CreateResolver() const {
334 return [this](uint32_t address) -> std::string {
335 return GetSymbolName(address);
336 };
337}
338
339absl::StatusOr<std::string> SymbolProvider::ExportSymbols(
340 SymbolFormat format) const {
341 std::stringstream ss;
342
343 // Collect symbols into a sorted vector
344 std::vector<Symbol> sorted_symbols;
345 sorted_symbols.reserve(symbols_by_address_.size());
346 for (const auto& pair : symbols_by_address_) {
347 sorted_symbols.push_back(pair.second);
348 }
349 // Sort by address then name to ensure deterministic output
350 std::sort(sorted_symbols.begin(), sorted_symbols.end(),
351 [](const Symbol& a, const Symbol& b) {
352 if (a.address != b.address) return a.address < b.address;
353 return a.name < b.name;
354 });
355
356 switch (format) {
358 // Mesen .mlb format:
359 // PRG:start-end:label
360 // or simple address:label
361 // Mesen2 prefers simple address:label or range.
362 // We will use "address:label" for single addresses.
363 for (const auto& sym : sorted_symbols) {
364 ss << absl::StrFormat("PRG:%X:%s\n", sym.address, sym.name);
365 }
366 break;
367 }
369 ss << "[labels]\n";
370 for (const auto& sym : sorted_symbols) {
371 uint32_t bank = (sym.address >> 16) & 0xFF;
372 uint32_t offset = sym.address & 0xFFFF;
373 ss << absl::StrFormat("%02X:%04X %s\n", bank, offset, sym.name);
374 }
375 break;
376 }
377 case SymbolFormat::kAsar: {
378 // Export as simple labels.
379 // Note: This isn't perfect for recreating the full source,
380 // but good enough for symbol tables.
381 for (const auto& sym : sorted_symbols) {
382 ss << absl::StrFormat("org $%06X\n%s:\n", sym.address, sym.name);
383 }
384 break;
385 }
388 for (const auto& sym : sorted_symbols) {
389 ss << absl::StrFormat("%06X %s\n", sym.address, sym.name);
390 }
391 break;
392 }
393 default:
394 return absl::InvalidArgumentError("Unsupported export format");
395 }
396
397 return ss.str();
398}
399
400absl::Status SymbolProvider::ParseAsarAsmContent(const std::string& content,
401 const std::string& filename) {
402 std::istringstream stream(content);
403 std::string line;
404 int line_number = 0;
405
406 std::string current_label; // Current global label (for local label scope)
407 uint32_t last_address = 0;
408
409 // Regex patterns for usdasm format
410 // Label definition: word followed by colon at start of line
411 std::regex label_regex(R"(^([A-Za-z_][A-Za-z0-9_]*):)");
412 // Local label: dot followed by word and colon
413 std::regex local_label_regex(R"(^(\.[A-Za-z_][A-Za-z0-9_]*))");
414 // Address line: #_XXXXXX: instruction
415 std::regex address_regex(R"(^#_([0-9A-Fa-f]{6}):)");
416
417 bool pending_label = false;
418 std::string pending_label_name;
419 bool pending_is_local = false;
420
421 while (std::getline(stream, line)) {
422 ++line_number;
423
424 // Skip empty lines and comment-only lines
425 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
426 if (trimmed.empty() || trimmed[0] == ';')
427 continue;
428
429 std::smatch match;
430
431 // Check for address line
432 if (std::regex_search(line, match, address_regex)) {
433 auto addr = ParseAddress(match[1].str());
434 if (addr) {
435 last_address = *addr;
436
437 // If we have a pending label, associate it with this address
438 if (pending_label) {
439 Symbol sym;
440 sym.name = pending_label_name;
441 sym.address = *addr;
442 sym.file = filename;
443 sym.line = line_number;
444 sym.is_local = pending_is_local;
445
446 AddSymbol(sym);
447 pending_label = false;
448 }
449 }
450 }
451
452 // Check for global label (at start of line, not indented)
453 if (line[0] != ' ' && line[0] != '\t' && line[0] != '#') {
454 if (std::regex_search(line, match, label_regex)) {
455 current_label = match[1].str();
456 pending_label = true;
457 pending_label_name = current_label;
458 pending_is_local = false;
459 }
460 }
461
462 // Check for local label
463 if (std::regex_search(trimmed, match, local_label_regex)) {
464 std::string local_name = match[1].str();
465 // Create fully qualified name: GlobalLabel.local_name
466 std::string full_name =
467 current_label.empty() ? local_name : current_label + local_name;
468 pending_label = true;
469 pending_label_name = full_name;
470 pending_is_local = true;
471 }
472 }
473
474 return absl::OkStatus();
475}
476
477absl::Status SymbolProvider::ParseWlaDxSymFile(const std::string& content) {
478 // WLA-DX format:
479 // [labels]
480 // 00:8000 Reset
481 // 2C:86BA :neg_1_1 (Leading colon is common in Asar output)
482
483 std::istringstream stream(content);
484 std::string line;
485 bool in_labels_section = false;
486
487 // Pattern: bank:offset name
488 // Spacing can be multiple spaces or tabs
489 std::regex label_regex(R"(^([0-9A-Fa-f]{2}):([0-9A-Fa-f]{4})\s+(\S+))");
490
491 while (std::getline(stream, line)) {
492 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
493
494 if (trimmed == "[labels]") {
495 in_labels_section = true;
496 continue;
497 }
498 if (trimmed.empty() || (trimmed[0] == '[' && trimmed != "[labels]")) {
499 if (trimmed[0] == '[')
500 in_labels_section = false;
501 continue;
502 }
503
504 if (!in_labels_section)
505 continue;
506
507 std::smatch match;
508 if (std::regex_search(trimmed, match, label_regex)) {
509 uint32_t bank = std::stoul(match[1].str(), nullptr, 16);
510 uint32_t offset = std::stoul(match[2].str(), nullptr, 16);
511 uint32_t address = (bank << 16) | offset;
512 std::string name = match[3].str();
513
514 // Strip leading colon if present
515 if (!name.empty() && name[0] == ':') {
516 name = name.substr(1);
517 }
518
519 if (!name.empty()) {
520 Symbol sym(name, address);
521 AddSymbol(sym);
522 }
523 }
524 }
525
526 return absl::OkStatus();
527}
528
529absl::Status SymbolProvider::ParseMesenMlbFile(const std::string& content) {
530 // Mesen .mlb format:
531 // MemoryType:Address[:EndAddress]:Name[:Comment]
532 // e.g., PRG:8000:Reset
533 // e.g., SnesWorkRam:7E0010:MODE:@watch fmt=hex
534
535 std::istringstream stream(content);
536 std::string line;
537
538 while (std::getline(stream, line)) {
539 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
540 if (trimmed.empty() || trimmed[0] == ';')
541 continue;
542
543 std::vector<std::string> parts = absl::StrSplit(trimmed, ':');
544 if (parts.size() < 2)
545 continue;
546
547 // Check if first part is a memory type or an address
548 std::string addr_str;
549 std::string name_str;
550
551 auto first_addr = ParseAddress(parts[0]);
552 if (first_addr) {
553 // Format is address:name or address:end:name
554 addr_str = parts[0];
555 name_str = (parts.size() > 2 && ParseAddress(parts[1])) ? parts[2] : parts[1];
556 } else {
557 // Format is MemoryType:address:name or MemoryType:address:end:name
558 if (parts.size() < 3)
559 continue;
560 addr_str = parts[1];
561 name_str = (parts.size() > 3 && ParseAddress(parts[2])) ? parts[3] : parts[2];
562 }
563
564 auto addr = ParseAddress(addr_str);
565 if (addr && !name_str.empty()) {
566 // Remove any Mesen markers like @watch from name
567 size_t marker_pos = name_str.find('@');
568 if (marker_pos != std::string::npos) {
569 name_str = name_str.substr(0, marker_pos);
570 name_str = std::string(absl::StripAsciiWhitespace(name_str));
571 }
572
573 if (!name_str.empty()) {
574 Symbol sym(name_str, *addr);
575 AddSymbol(sym);
576 }
577 }
578 }
579
580 return absl::OkStatus();
581}
582
583absl::Status SymbolProvider::ParseBsnesSymFile(const std::string& content) {
584 // bsnes/No$snes format:
585 // 008000 Reset
586 // 008034 MainGameLoop
587
588 std::istringstream stream(content);
589 std::string line;
590
591 std::regex label_regex(R"(^([0-9A-Fa-f]{6})\s+(\S+))");
592
593 while (std::getline(stream, line)) {
594 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
595 if (trimmed.empty() || trimmed[0] == ';' || trimmed[0] == '#')
596 continue;
597
598 std::smatch match;
599 if (std::regex_search(trimmed, match, label_regex)) {
600 auto addr = ParseAddress(match[1].str());
601 if (addr) {
602 Symbol sym(match[2].str(), *addr);
603 AddSymbol(sym);
604 }
605 }
606 }
607
608 return absl::OkStatus();
609}
610
611absl::Status SymbolProvider::ParseSourceMapJson(const std::string& content) {
612 try {
613 auto j = nlohmann::json::parse(content);
614 if (!j.contains("entries") || !j["entries"].is_array()) {
615 return absl::InvalidArgumentError("Invalid source map: missing entries");
616 }
617
618 // Map file IDs to paths
619 std::map<int, std::string> file_map;
620 if (j.contains("files") && j["files"].is_array()) {
621 for (const auto& f : j["files"]) {
622 int id = f.value("id", -1);
623 std::string path = f.value("path", "");
624 if (id != -1 && !path.empty()) {
625 file_map[id] = path;
626 }
627 }
628 }
629
630 // Process entries
631 for (const auto& entry : j["entries"]) {
632 std::string addr_str = entry.value("address", "");
633 auto addr_opt = ParseAddress(addr_str);
634 if (!addr_opt) continue;
635
636 int file_id = entry.value("file_id", -1);
637 int line = entry.value("line", 0);
638 std::string symbol_name = entry.value("symbol", "");
639
640 Symbol sym;
641 sym.address = *addr_opt;
642 sym.line = line;
643 if (file_map.count(file_id)) {
644 sym.file = file_map[file_id];
645 }
646
647 if (!symbol_name.empty()) {
648 sym.name = symbol_name;
649 AddSymbol(sym);
650 } else {
651 // If it's just a source mapping without a symbol name,
652 // we can still store it, but we might want a special name
653 // or just let it exist for GetSourceLocation.
654 // For now, let's give it a placeholder if we want it in GetSymbol info.
655 // But GetSourceLocation uses symbols_by_address_.
656 // Multi-map allows multiple symbols at same address.
657 sym.name = absl::StrFormat("src_%06X", sym.address);
658 AddSymbol(sym);
659 }
660 }
661 } catch (const std::exception& e) {
662 return absl::InvalidArgumentError(
663 absl::StrFormat("Failed to parse source map JSON: %s", e.what()));
664 }
665
666 return absl::OkStatus();
667}
668
670 const std::string& extension) const {
671 // Check extension first
672 if (extension == ".asm" || extension == ".s") {
673 return SymbolFormat::kAsar;
674 }
675 if (extension == ".mlb") {
677 }
678 if (extension == ".json") {
680 }
681
682 // Check content for format hints
683 if (content.find("[labels]") != std::string::npos) {
685 }
686 if (content.find("PRG:") != std::string::npos) {
688 }
689 if (content.find("#_") != std::string::npos) {
690 return SymbolFormat::kAsar;
691 }
692
693 // Default to bsnes format (most generic)
695}
696
697} // namespace debug
698} // namespace emu
699} // namespace yaze
std::function< std::string(uint32_t)> CreateResolver() const
Create a symbol resolver function for the disassembler.
std::vector< Symbol > GetSymbolsInRange(uint32_t start, uint32_t end) const
Get all symbols in an address range.
std::string FormatAddress(uint32_t address, uint32_t max_offset=0x100) const
Format an address with symbol info.
void AddSymbol(const Symbol &symbol)
Add a single symbol manually.
std::map< std::string, Symbol > symbols_by_name_
std::vector< Symbol > GetSymbolsAtAddress(uint32_t address) const
Get all symbols at an address (there may be multiple)
void AddAsarSymbols(const std::vector< Symbol > &symbols)
Add symbols from Asar patch results.
std::string GetSourceLocation(uint32_t address) const
Get source file and line for an address (for VS Code integration)
absl::Status ParseBsnesSymFile(const std::string &content)
absl::Status LoadAsarAsmDirectory(const std::string &directory_path)
Load symbols from a directory of ASM files.
absl::Status ParseWlaDxSymFile(const std::string &content)
std::multimap< uint32_t, Symbol > symbols_by_address_
absl::StatusOr< std::string > ExportSymbols(SymbolFormat format) const
Export all symbols to a string in the specified format.
absl::Status ParseSourceMapJson(const std::string &content)
absl::Status LoadAsarAsmFile(const std::string &path)
Load symbols from an Asar-style ASM file (usdasm format)
SymbolFormat DetectFormat(const std::string &content, const std::string &extension) const
absl::Status LoadSymbolFile(const std::string &path, SymbolFormat format=SymbolFormat::kAuto)
Load symbols from a .sym file (various formats)
std::string GetSymbolName(uint32_t address) const
Get symbol name for an address.
void Clear()
Clear all loaded symbols.
std::optional< Symbol > GetNearestSymbol(uint32_t address) const
Get nearest symbol at or before an address.
absl::Status ParseAsarAsmContent(const std::string &content, const std::string &filename)
absl::Status ParseMesenMlbFile(const std::string &content)
std::optional< Symbol > GetSymbol(uint32_t address) const
Get full symbol info for an address.
std::optional< Symbol > FindSymbol(const std::string &name) const
Find symbol by name.
std::vector< Symbol > FindSymbolsMatching(const std::string &pattern) const
Find symbols matching a pattern (supports wildcards)
bool WildcardMatch(const std::string &pattern, const std::string &str)
absl::StatusOr< std::string > ReadFileContent(const std::string &path)
std::optional< uint32_t > ParseAddress(const std::string &str)
SymbolFormat
Supported symbol file formats.
Information about a symbol (label, constant, or address)