yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
symbol_provider.cc
Go to the documentation of this file.
2
3#include <algorithm>
4#include <cctype>
5#include <fstream>
6#include <regex>
7#include <sstream>
8
9#ifndef __EMSCRIPTEN__
10#include <filesystem>
11#endif
12
13#include "absl/strings/match.h"
14#include "absl/strings/str_format.h"
15#include "absl/strings/str_split.h"
16#include "absl/strings/strip.h"
17
18namespace yaze {
19namespace emu {
20namespace debug {
21
22namespace {
23
24// Helper to read entire file into string
25absl::StatusOr<std::string> ReadFileContent(const std::string& path) {
26 std::ifstream file(path);
27 if (!file.is_open()) {
28 return absl::NotFoundError(
29 absl::StrFormat("Failed to open file: %s", path));
30 }
31 std::stringstream buffer;
32 buffer << file.rdbuf();
33 return buffer.str();
34}
35
36// Parse 24-bit hex address from string (e.g., "008034" or "$008034")
37std::optional<uint32_t> ParseAddress(const std::string& str) {
38 std::string clean = str;
39 // Remove $ prefix if present
40 if (!clean.empty() && clean[0] == '$') {
41 clean = clean.substr(1);
42 }
43 // Remove 0x prefix if present
44 if (clean.size() >= 2 && clean[0] == '0' &&
45 (clean[1] == 'x' || clean[1] == 'X')) {
46 clean = clean.substr(2);
47 }
48 // Remove any trailing colon
49 if (!clean.empty() && clean.back() == ':') {
50 clean.pop_back();
51 }
52
53 if (clean.empty() || clean.size() > 6)
54 return std::nullopt;
55
56 try {
57 size_t pos;
58 uint32_t addr = std::stoul(clean, &pos, 16);
59 if (pos != clean.size())
60 return std::nullopt;
61 return addr;
62 } catch (...) {
63 return std::nullopt;
64 }
65}
66
67// Check if a string is a valid label name
68bool IsValidLabelName(const std::string& name) {
69 if (name.empty())
70 return false;
71 // First char must be alpha, underscore, or dot (for local labels)
72 char first = name[0];
73 if (!std::isalpha(first) && first != '_' && first != '.')
74 return false;
75 // Rest must be alphanumeric, underscore, or dot
76 for (size_t i = 1; i < name.size(); ++i) {
77 char c = name[i];
78 if (!std::isalnum(c) && c != '_' && c != '.')
79 return false;
80 }
81 return true;
82}
83
84// Simple wildcard matching (supports * only)
85bool WildcardMatch(const std::string& pattern, const std::string& str) {
86 size_t p = 0, s = 0;
87 size_t starPos = std::string::npos;
88 size_t matchPos = 0;
89
90 while (s < str.size()) {
91 if (p < pattern.size() && (pattern[p] == str[s] || pattern[p] == '?')) {
92 ++p;
93 ++s;
94 } else if (p < pattern.size() && pattern[p] == '*') {
95 starPos = p++;
96 matchPos = s;
97 } else if (starPos != std::string::npos) {
98 p = starPos + 1;
99 s = ++matchPos;
100 } else {
101 return false;
102 }
103 }
104
105 while (p < pattern.size() && pattern[p] == '*')
106 ++p;
107 return p == pattern.size();
108}
109
110// Simple path utilities that work on all platforms
111std::string GetFilename(const std::string& path) {
112 size_t pos = path.find_last_of("/\\");
113 if (pos == std::string::npos)
114 return path;
115 return path.substr(pos + 1);
116}
117
118std::string GetExtension(const std::string& path) {
119 std::string filename = GetFilename(path);
120 size_t pos = filename.find_last_of('.');
121 if (pos == std::string::npos)
122 return "";
123 return filename.substr(pos);
124}
125
126} // namespace
127
128absl::Status SymbolProvider::LoadAsarAsmFile(const std::string& path) {
129 auto content_or = ReadFileContent(path);
130 if (!content_or.ok()) {
131 return content_or.status();
132 }
133
134 return ParseAsarAsmContent(*content_or, GetFilename(path));
135}
136
138 const std::string& directory_path) {
139#ifdef __EMSCRIPTEN__
140 // Directory iteration not supported in WASM builds
141 // Use LoadAsarAsmFile with explicit file paths instead
142 (void)directory_path;
143 return absl::UnimplementedError(
144 "Directory loading not supported in browser builds. "
145 "Please load individual symbol files.");
146#else
147 std::filesystem::path dir(directory_path);
148 if (!std::filesystem::exists(dir)) {
149 return absl::NotFoundError(
150 absl::StrFormat("Directory not found: %s", directory_path));
151 }
152
153 int files_loaded = 0;
154 for (const auto& entry : std::filesystem::directory_iterator(dir)) {
155 if (entry.is_regular_file()) {
156 auto ext = entry.path().extension().string();
157 if (ext == ".asm" || ext == ".s") {
158 auto status = LoadAsarAsmFile(entry.path().string());
159 if (status.ok()) {
160 ++files_loaded;
161 }
162 }
163 }
164 }
165
166 if (files_loaded == 0) {
167 return absl::NotFoundError("No ASM files found in directory");
168 }
169
170 return absl::OkStatus();
171#endif
172}
173
174absl::Status SymbolProvider::LoadSymbolFile(const std::string& path,
175 SymbolFormat format) {
176 auto content_or = ReadFileContent(path);
177 if (!content_or.ok()) {
178 return content_or.status();
179 }
180
181 const std::string& content = *content_or;
182 std::string ext = GetExtension(path);
183
184 // Auto-detect format if needed
185 if (format == SymbolFormat::kAuto) {
186 format = DetectFormat(content, ext);
187 }
188
189 switch (format) {
191 return ParseAsarAsmContent(content, GetFilename(path));
193 return ParseWlaDxSymFile(content);
195 return ParseMesenMlbFile(content);
198 return ParseBsnesSymFile(content);
199 default:
200 return absl::InvalidArgumentError("Unknown symbol format");
201 }
202}
203
205 symbols_by_address_.emplace(symbol.address, symbol);
206 symbols_by_name_[symbol.name] = symbol;
207}
208
209void SymbolProvider::AddAsarSymbols(const std::vector<Symbol>& symbols) {
210 for (const auto& sym : symbols) {
211 AddSymbol(sym);
212 }
213}
214
216 symbols_by_address_.clear();
217 symbols_by_name_.clear();
218}
219
220std::string SymbolProvider::GetSymbolName(uint32_t address) const {
221 auto it = symbols_by_address_.find(address);
222 if (it != symbols_by_address_.end()) {
223 return it->second.name;
224 }
225 return "";
226}
227
228std::optional<Symbol> SymbolProvider::GetSymbol(uint32_t address) const {
229 auto it = symbols_by_address_.find(address);
230 if (it != symbols_by_address_.end()) {
231 return it->second;
232 }
233 return std::nullopt;
234}
235
237 uint32_t address) const {
238 std::vector<Symbol> result;
239 auto range = symbols_by_address_.equal_range(address);
240 for (auto it = range.first; it != range.second; ++it) {
241 result.push_back(it->second);
242 }
243 return result;
244}
245
246std::optional<Symbol> SymbolProvider::FindSymbol(
247 const std::string& name) const {
248 auto it = symbols_by_name_.find(name);
249 if (it != symbols_by_name_.end()) {
250 return it->second;
251 }
252 return std::nullopt;
253}
254
256 const std::string& pattern) const {
257 std::vector<Symbol> result;
258 for (const auto& [name, sym] : symbols_by_name_) {
259 if (WildcardMatch(pattern, name)) {
260 result.push_back(sym);
261 }
262 }
263 return result;
264}
265
266std::vector<Symbol> SymbolProvider::GetSymbolsInRange(uint32_t start,
267 uint32_t end) const {
268 std::vector<Symbol> result;
269 auto it_start = symbols_by_address_.lower_bound(start);
270 auto it_end = symbols_by_address_.upper_bound(end);
271 for (auto it = it_start; it != it_end; ++it) {
272 result.push_back(it->second);
273 }
274 return result;
275}
276
277std::optional<Symbol> SymbolProvider::GetNearestSymbol(uint32_t address) const {
278 if (symbols_by_address_.empty())
279 return std::nullopt;
280
281 // Find first symbol > address
282 auto it = symbols_by_address_.upper_bound(address);
283
284 if (it == symbols_by_address_.begin()) {
285 // All symbols are > address, no symbol at or before
286 return std::nullopt;
287 }
288
289 // Go back to the symbol at or before address
290 --it;
291 return it->second;
292}
293
294std::string SymbolProvider::FormatAddress(uint32_t address,
295 uint32_t max_offset) const {
296 // Check for exact match first
297 auto exact = GetSymbol(address);
298 if (exact) {
299 return exact->name;
300 }
301
302 // Check for nearest symbol with offset
303 auto nearest = GetNearestSymbol(address);
304 if (nearest) {
305 uint32_t offset = address - nearest->address;
306 if (offset <= max_offset) {
307 return absl::StrFormat("%s+$%X", nearest->name, offset);
308 }
309 }
310
311 // No symbol found, just format as hex
312 return absl::StrFormat("$%06X", address);
313}
314
315std::function<std::string(uint32_t)> SymbolProvider::CreateResolver() const {
316 return [this](uint32_t address) -> std::string {
317 return GetSymbolName(address);
318 };
319}
320
321absl::Status SymbolProvider::ParseAsarAsmContent(const std::string& content,
322 const std::string& filename) {
323 std::istringstream stream(content);
324 std::string line;
325 int line_number = 0;
326
327 std::string current_label; // Current global label (for local label scope)
328 uint32_t last_address = 0;
329
330 // Regex patterns for usdasm format
331 // Label definition: word followed by colon at start of line
332 std::regex label_regex(R"(^([A-Za-z_][A-Za-z0-9_]*):)");
333 // Local label: dot followed by word and colon
334 std::regex local_label_regex(R"(^(\.[A-Za-z_][A-Za-z0-9_]*))");
335 // Address line: #_XXXXXX: instruction
336 std::regex address_regex(R"(^#_([0-9A-Fa-f]{6}):)");
337
338 bool pending_label = false;
339 std::string pending_label_name;
340 bool pending_is_local = false;
341
342 while (std::getline(stream, line)) {
343 ++line_number;
344
345 // Skip empty lines and comment-only lines
346 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
347 if (trimmed.empty() || trimmed[0] == ';')
348 continue;
349
350 std::smatch match;
351
352 // Check for address line
353 if (std::regex_search(line, match, address_regex)) {
354 auto addr = ParseAddress(match[1].str());
355 if (addr) {
356 last_address = *addr;
357
358 // If we have a pending label, associate it with this address
359 if (pending_label) {
360 Symbol sym;
361 sym.name = pending_label_name;
362 sym.address = *addr;
363 sym.file = filename;
364 sym.line = line_number;
365 sym.is_local = pending_is_local;
366
367 AddSymbol(sym);
368 pending_label = false;
369 }
370 }
371 }
372
373 // Check for global label (at start of line, not indented)
374 if (line[0] != ' ' && line[0] != '\t' && line[0] != '#') {
375 if (std::regex_search(line, match, label_regex)) {
376 current_label = match[1].str();
377 pending_label = true;
378 pending_label_name = current_label;
379 pending_is_local = false;
380 }
381 }
382
383 // Check for local label
384 if (std::regex_search(trimmed, match, local_label_regex)) {
385 std::string local_name = match[1].str();
386 // Create fully qualified name: GlobalLabel.local_name
387 std::string full_name =
388 current_label.empty() ? local_name : current_label + local_name;
389 pending_label = true;
390 pending_label_name = full_name;
391 pending_is_local = true;
392 }
393 }
394
395 return absl::OkStatus();
396}
397
398absl::Status SymbolProvider::ParseWlaDxSymFile(const std::string& content) {
399 // WLA-DX format:
400 // [labels]
401 // 00:8000 Reset
402 // 00:8034 MainGameLoop
403
404 std::istringstream stream(content);
405 std::string line;
406 bool in_labels_section = false;
407
408 std::regex label_regex(R"(^([0-9A-Fa-f]{2}):([0-9A-Fa-f]{4})\s+(\S+))");
409
410 while (std::getline(stream, line)) {
411 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
412
413 if (trimmed == "[labels]") {
414 in_labels_section = true;
415 continue;
416 }
417 if (trimmed.empty() || trimmed[0] == '[') {
418 if (trimmed[0] == '[')
419 in_labels_section = false;
420 continue;
421 }
422
423 if (!in_labels_section)
424 continue;
425
426 std::smatch match;
427 if (std::regex_search(trimmed, match, label_regex)) {
428 uint32_t bank = std::stoul(match[1].str(), nullptr, 16);
429 uint32_t offset = std::stoul(match[2].str(), nullptr, 16);
430 uint32_t address = (bank << 16) | offset;
431 std::string name = match[3].str();
432
433 Symbol sym(name, address);
434 AddSymbol(sym);
435 }
436 }
437
438 return absl::OkStatus();
439}
440
441absl::Status SymbolProvider::ParseMesenMlbFile(const std::string& content) {
442 // Mesen .mlb format:
443 // PRG:address:name
444 // or just
445 // address:name
446
447 std::istringstream stream(content);
448 std::string line;
449
450 std::regex label_regex(R"(^(?:PRG:)?([0-9A-Fa-f]+):(\S+))");
451
452 while (std::getline(stream, line)) {
453 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
454 if (trimmed.empty() || trimmed[0] == ';')
455 continue;
456
457 std::smatch match;
458 if (std::regex_search(trimmed, match, label_regex)) {
459 auto addr = ParseAddress(match[1].str());
460 if (addr) {
461 Symbol sym(match[2].str(), *addr);
462 AddSymbol(sym);
463 }
464 }
465 }
466
467 return absl::OkStatus();
468}
469
470absl::Status SymbolProvider::ParseBsnesSymFile(const std::string& content) {
471 // bsnes/No$snes format:
472 // 008000 Reset
473 // 008034 MainGameLoop
474
475 std::istringstream stream(content);
476 std::string line;
477
478 std::regex label_regex(R"(^([0-9A-Fa-f]{6})\s+(\S+))");
479
480 while (std::getline(stream, line)) {
481 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
482 if (trimmed.empty() || trimmed[0] == ';' || trimmed[0] == '#')
483 continue;
484
485 std::smatch match;
486 if (std::regex_search(trimmed, match, label_regex)) {
487 auto addr = ParseAddress(match[1].str());
488 if (addr) {
489 Symbol sym(match[2].str(), *addr);
490 AddSymbol(sym);
491 }
492 }
493 }
494
495 return absl::OkStatus();
496}
497
499 const std::string& extension) const {
500 // Check extension first
501 if (extension == ".asm" || extension == ".s") {
502 return SymbolFormat::kAsar;
503 }
504 if (extension == ".mlb") {
506 }
507
508 // Check content for format hints
509 if (content.find("[labels]") != std::string::npos) {
511 }
512 if (content.find("PRG:") != std::string::npos) {
514 }
515 if (content.find("#_") != std::string::npos) {
516 return SymbolFormat::kAsar;
517 }
518
519 // Default to bsnes format (most generic)
521}
522
523} // namespace debug
524} // namespace emu
525} // namespace yaze
std::function< std::string(uint32_t)> CreateResolver() const
Create a symbol resolver function for the disassembler.
std::vector< Symbol > GetSymbolsInRange(uint32_t start, uint32_t end) const
Get all symbols in an address range.
std::string FormatAddress(uint32_t address, uint32_t max_offset=0x100) const
Format an address with symbol info.
void AddSymbol(const Symbol &symbol)
Add a single symbol manually.
std::map< std::string, Symbol > symbols_by_name_
std::vector< Symbol > GetSymbolsAtAddress(uint32_t address) const
Get all symbols at an address (there may be multiple)
void AddAsarSymbols(const std::vector< Symbol > &symbols)
Add symbols from Asar patch results.
absl::Status ParseBsnesSymFile(const std::string &content)
absl::Status LoadAsarAsmDirectory(const std::string &directory_path)
Load symbols from a directory of ASM files.
absl::Status ParseWlaDxSymFile(const std::string &content)
std::multimap< uint32_t, Symbol > symbols_by_address_
absl::Status LoadAsarAsmFile(const std::string &path)
Load symbols from an Asar-style ASM file (usdasm format)
SymbolFormat DetectFormat(const std::string &content, const std::string &extension) const
absl::Status LoadSymbolFile(const std::string &path, SymbolFormat format=SymbolFormat::kAuto)
Load symbols from a .sym file (various formats)
std::string GetSymbolName(uint32_t address) const
Get symbol name for an address.
void Clear()
Clear all loaded symbols.
std::optional< Symbol > GetNearestSymbol(uint32_t address) const
Get nearest symbol at or before an address.
absl::Status ParseAsarAsmContent(const std::string &content, const std::string &filename)
absl::Status ParseMesenMlbFile(const std::string &content)
std::optional< Symbol > GetSymbol(uint32_t address) const
Get full symbol info for an address.
std::optional< Symbol > FindSymbol(const std::string &name) const
Find symbol by name.
std::vector< Symbol > FindSymbolsMatching(const std::string &pattern) const
Find symbols matching a pattern (supports wildcards)
bool WildcardMatch(const std::string &pattern, const std::string &str)
absl::StatusOr< std::string > ReadFileContent(const std::string &path)
std::optional< uint32_t > ParseAddress(const std::string &str)
SymbolFormat
Supported symbol file formats.
Information about a symbol (label, constant, or address)