yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
message_data.cc
Go to the documentation of this file.
1#include "message_data.h"
2
3#include <fstream>
4#include <optional>
5#include <sstream>
6#include <string>
7
8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_split.h"
11#include "core/rom_settings.h"
12#include "rom/snes.h"
13#include "rom/write_fence.h"
14#include "util/hex.h"
15#include "util/log.h"
16#include "util/macro.h"
17
18namespace yaze {
19namespace editor {
20
25
30
31uint8_t FindMatchingCharacter(char value) {
32 // CharEncoder contains duplicate glyph mappings (for example, space), so we
33 // choose the lowest byte value to keep reverse lookups deterministic.
34 uint8_t best_match = 0xFF;
35 const wchar_t target =
36 static_cast<wchar_t>(static_cast<unsigned char>(value));
37 for (const auto& [key, char_value] : CharEncoder) {
38 if (char_value != target) {
39 continue;
40 }
41 if (best_match == 0xFF || key < best_match) {
42 best_match = key;
43 }
44 }
45 return best_match;
46}
47
48int8_t FindDictionaryEntry(uint8_t value) {
49 if (value < DICTOFF || value == 0xFF) {
50 return -1;
51 }
52 return value - DICTOFF;
53}
54
55std::optional<TextElement> FindMatchingCommand(uint8_t b) {
56 for (const auto& text_element : TextCommands) {
57 if (text_element.ID == b) {
58 return text_element;
59 }
60 }
61 return std::nullopt;
62}
63
64std::optional<TextElement> FindMatchingSpecial(uint8_t value) {
65 auto it = std::ranges::find_if(SpecialChars,
66 [value](const TextElement& text_element) {
67 return text_element.ID == value;
68 });
69 if (it != SpecialChars.end()) {
70 return *it;
71 }
72 return std::nullopt;
73}
74
75ParsedElement FindMatchingElement(const std::string& str) {
76 std::smatch match;
77 std::vector<TextElement> commands_and_chars = TextCommands;
78 commands_and_chars.insert(commands_and_chars.end(), SpecialChars.begin(),
79 SpecialChars.end());
80 for (auto& text_element : commands_and_chars) {
81 match = text_element.MatchMe(str);
82 if (match.size() > 0) {
83 if (text_element.HasArgument) {
84 std::string arg = match[1].str().substr(1);
85 try {
86 return ParsedElement(text_element, std::stoi(arg, nullptr, 16));
87 } catch (const std::invalid_argument& e) {
88 util::logf("Error parsing argument for %s: %s",
89 text_element.GenericToken.c_str(), arg.c_str());
90 return ParsedElement(text_element, 0);
91 } catch (const std::out_of_range& e) {
92 util::logf("Argument out of range for %s: %s",
93 text_element.GenericToken.c_str(), arg.c_str());
94 return ParsedElement(text_element, 0);
95 }
96 } else {
97 return ParsedElement(text_element, 0);
98 }
99 }
100 }
101
102 const auto dictionary_element =
103 TextElement(0x80, DICTIONARYTOKEN, true, "Dictionary");
104
105 match = dictionary_element.MatchMe(str);
106 if (match.size() > 0) {
107 try {
108 // match[1] captures ":XX" — strip the leading colon
109 std::string dict_arg = match[1].str().substr(1);
110 return ParsedElement(dictionary_element,
111 DICTOFF + std::stoi(dict_arg, nullptr, 16));
112 } catch (const std::exception& e) {
113 util::logf("Error parsing dictionary token: %s", match[1].str().c_str());
114 return ParsedElement();
115 }
116 }
117 return ParsedElement();
118}
119
120std::string ParseTextDataByte(uint8_t value) {
121 if (CharEncoder.contains(value)) {
122 char c = CharEncoder.at(value);
123 std::string str = "";
124 str.push_back(c);
125 return str;
126 }
127
128 // Check for command.
129 if (auto text_element = FindMatchingCommand(value);
130 text_element != std::nullopt) {
131 return text_element->GenericToken;
132 }
133
134 // Check for special characters.
135 if (auto special_element = FindMatchingSpecial(value);
136 special_element != std::nullopt) {
137 return special_element->GenericToken;
138 }
139
140 // Check for dictionary.
141 int8_t dictionary = FindDictionaryEntry(value);
142 if (dictionary >= 0) {
143 return absl::StrFormat("[%s:%02X]", DICTIONARYTOKEN,
144 static_cast<unsigned char>(dictionary));
145 }
146
147 return "";
148}
149
150std::vector<uint8_t> ParseMessageToData(std::string str) {
151 std::vector<uint8_t> bytes;
152 std::string temp_string = std::move(str);
153 int pos = 0;
154 while (pos < temp_string.size()) {
155 // Get next text fragment.
156 if (temp_string[pos] == '[') {
157 int next = temp_string.find(']', pos);
158 if (next == -1) {
159 break;
160 }
161
162 ParsedElement parsedElement =
163 FindMatchingElement(temp_string.substr(pos, next - pos + 1));
164
165 const auto dictionary_element =
166 TextElement(0x80, DICTIONARYTOKEN, true, "Dictionary");
167
168 if (!parsedElement.Active) {
169 util::logf("Error parsing message: %s", temp_string);
170 break;
171 } else if (parsedElement.Parent == dictionary_element) {
172 bytes.push_back(parsedElement.Value);
173 } else {
174 bytes.push_back(parsedElement.Parent.ID);
175
176 if (parsedElement.Parent.HasArgument) {
177 bytes.push_back(parsedElement.Value);
178 }
179 }
180
181 pos = next + 1;
182 continue;
183 } else {
184 uint8_t bb = FindMatchingCharacter(temp_string[pos++]);
185
186 if (bb != 0xFF) {
187 bytes.push_back(bb);
188 }
189 }
190 }
191
192 return bytes;
193}
194
196 MessageParseResult result;
197 std::string temp_string(str);
198 size_t pos = 0;
199 bool warned_newline = false;
200
201 while (pos < temp_string.size()) {
202 char current = temp_string[pos];
203 if (current == '\r' || current == '\n') {
204 if (!warned_newline) {
205 result.warnings.push_back(
206 "Literal newlines are ignored; use [1], [2], [3], [V], or [K] "
207 "tokens for line breaks.");
208 warned_newline = true;
209 }
210 pos++;
211 continue;
212 }
213
214 if (current == '[') {
215 size_t close = temp_string.find(']', pos);
216 if (close == std::string::npos) {
217 result.errors.push_back(
218 absl::StrFormat("Unclosed token starting at position %zu", pos));
219 break;
220 }
221
222 std::string token = temp_string.substr(pos, close - pos + 1);
223 ParsedElement parsed_element = FindMatchingElement(token);
224 const auto dictionary_element =
225 TextElement(0x80, DICTIONARYTOKEN, true, "Dictionary");
226
227 if (!parsed_element.Active) {
228 result.errors.push_back(absl::StrFormat("Unknown token: %s", token));
229 pos = close + 1;
230 continue;
231 }
232
233 if (!parsed_element.Parent.HasArgument) {
234 if (token != parsed_element.Parent.GetParamToken()) {
235 result.errors.push_back(absl::StrFormat("Unknown token: %s", token));
236 pos = close + 1;
237 continue;
238 }
239 }
240
241 if (parsed_element.Parent == dictionary_element) {
242 result.bytes.push_back(parsed_element.Value);
243 } else {
244 result.bytes.push_back(parsed_element.Parent.ID);
245 if (parsed_element.Parent.HasArgument) {
246 result.bytes.push_back(parsed_element.Value);
247 }
248 }
249
250 pos = close + 1;
251 continue;
252 }
253
254 uint8_t bb = FindMatchingCharacter(current);
255 if (bb == 0xFF) {
256 result.errors.push_back(absl::StrFormat(
257 "Unsupported character '%c' at position %zu", current, pos));
258 pos++;
259 continue;
260 }
261
262 result.bytes.push_back(bb);
263 pos++;
264 }
265
266 return result;
267}
268
270 switch (bank) {
272 return "vanilla";
274 return "expanded";
275 }
276 return "vanilla";
277}
278
279absl::StatusOr<MessageBank> MessageBankFromString(std::string_view value) {
280 const std::string lowered = absl::AsciiStrToLower(std::string(value));
281 if (lowered == "vanilla") {
283 }
284 if (lowered == "expanded") {
286 }
287 return absl::InvalidArgumentError(
288 absl::StrFormat("Unknown message bank: %s", std::string(value)));
289}
290
291std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom) {
292 std::vector<DictionaryEntry> AllDictionaries;
293 for (int i = 0; i < kNumDictionaryEntries; i++) {
294 std::vector<uint8_t> bytes;
295 std::stringstream stringBuilder;
296
297 int address = SnesToPc(
298 kTextData + (rom->data()[kPointersDictionaries + (i * 2) + 1] << 8) +
299 rom->data()[kPointersDictionaries + (i * 2)]);
300
301 int temppush_backress =
303 (rom->data()[kPointersDictionaries + ((i + 1) * 2) + 1] << 8) +
304 rom->data()[kPointersDictionaries + ((i + 1) * 2)]);
305
306 while (address < temppush_backress) {
307 uint8_t uint8_tDictionary = rom->data()[address++];
308 bytes.push_back(uint8_tDictionary);
309 stringBuilder << ParseTextDataByte(uint8_tDictionary);
310 }
311
312 AllDictionaries.push_back(DictionaryEntry{(uint8_t)i, stringBuilder.str()});
313 }
314
315 std::ranges::sort(AllDictionaries,
316 [](const DictionaryEntry& a, const DictionaryEntry& b) {
317 return a.Contents.size() > b.Contents.size();
318 });
319
320 return AllDictionaries;
321}
322
324 std::string str, const std::vector<DictionaryEntry>& dictionary) {
325 std::string temp = std::move(str);
326 for (const auto& entry : dictionary) {
327 if (entry.ContainedInString(temp)) {
328 temp = entry.ReplaceInstancesOfIn(temp);
329 }
330 }
331 return temp;
332}
333
335 uint8_t value, const std::vector<DictionaryEntry>& dictionary) {
336 for (const auto& entry : dictionary) {
337 if (entry.ID + DICTOFF == value) {
338 return entry;
339 }
340 }
341 return DictionaryEntry();
342}
343
344absl::StatusOr<MessageData> ParseSingleMessage(
345 const std::vector<uint8_t>& rom_data, int* current_pos) {
346 if (current_pos == nullptr) {
347 return absl::InvalidArgumentError("current_pos is null");
348 }
349 if (*current_pos < 0 ||
350 static_cast<size_t>(*current_pos) >= rom_data.size()) {
351 return absl::OutOfRangeError("current_pos is out of range");
352 }
353
354 MessageData message_data;
355 int pos = *current_pos;
356 uint8_t current_byte;
357 std::vector<uint8_t> temp_bytes_raw;
358 std::vector<uint8_t> temp_bytes_parsed;
359 std::string current_message_raw;
360 std::string current_message_parsed;
361
362 // Read the message data
363 while (pos < static_cast<int>(rom_data.size())) {
364 current_byte = rom_data[pos++];
365
366 if (current_byte == kMessageTerminator) {
367 message_data.ID = message_data.ID + 1;
368 message_data.Address = pos;
369 message_data.RawString = current_message_raw;
370 message_data.Data = temp_bytes_raw;
371 message_data.DataParsed = temp_bytes_parsed;
372 message_data.ContentsParsed = current_message_parsed;
373
374 temp_bytes_raw.clear();
375 temp_bytes_parsed.clear();
376 current_message_raw.clear();
377 current_message_parsed.clear();
378
379 *current_pos = pos;
380 return message_data;
381 } else if (current_byte == 0xFF) {
382 return absl::InvalidArgumentError("message terminator not found");
383 }
384
385 temp_bytes_raw.push_back(current_byte);
386
387 // Check for command.
388 auto text_element = FindMatchingCommand(current_byte);
389 if (text_element != std::nullopt) {
390 temp_bytes_parsed.push_back(current_byte);
391 if (text_element->HasArgument) {
392 if (pos >= static_cast<int>(rom_data.size())) {
393 return absl::OutOfRangeError("message command argument out of range");
394 }
395 uint8_t arg_byte = rom_data[pos++];
396 temp_bytes_raw.push_back(arg_byte);
397 temp_bytes_parsed.push_back(arg_byte);
398 current_message_raw.append(text_element->GetParamToken(arg_byte));
399 current_message_parsed.append(text_element->GetParamToken(arg_byte));
400 } else {
401 current_message_raw.append(text_element->GetParamToken());
402 current_message_parsed.append(text_element->GetParamToken());
403 }
404 continue;
405 }
406
407 // Check for special characters.
408 if (auto special_element = FindMatchingSpecial(current_byte);
409 special_element != std::nullopt) {
410 current_message_raw.append(special_element->GetParamToken());
411 current_message_parsed.append(special_element->GetParamToken());
412 temp_bytes_parsed.push_back(current_byte);
413 continue;
414 }
415
416 // Check for dictionary.
417 int8_t dictionary = FindDictionaryEntry(current_byte);
418 if (dictionary >= 0) {
419 std::string token = absl::StrFormat(
420 "[%s:%02X]", DICTIONARYTOKEN, static_cast<unsigned char>(dictionary));
421 current_message_raw.append(token);
422 current_message_parsed.append(token);
423 temp_bytes_parsed.push_back(current_byte);
424 continue;
425 }
426
427 // Everything else.
428 if (CharEncoder.contains(current_byte)) {
429 std::string str = "";
430 str.push_back(CharEncoder.at(current_byte));
431 current_message_raw.append(str);
432 current_message_parsed.append(str);
433 temp_bytes_parsed.push_back(current_byte);
434 }
435 }
436
437 *current_pos = pos;
438 return absl::InvalidArgumentError("message terminator not found");
439}
440
441std::vector<std::string> ParseMessageData(
442 std::vector<MessageData>& message_data,
443 const std::vector<DictionaryEntry>& dictionary_entries) {
444 std::vector<std::string> parsed_messages;
445
446 for (auto& message : message_data) {
447 std::string parsed_message = "";
448 // Use index-based loop to properly skip argument bytes
449 for (size_t pos = 0; pos < message.Data.size(); ++pos) {
450 uint8_t byte = message.Data[pos];
451
452 // Check for text commands first (they may have arguments to skip)
453 auto text_element = FindMatchingCommand(byte);
454 if (text_element != std::nullopt) {
455 // Add newline for certain commands
456 if (text_element->ID == kScrollVertical || text_element->ID == kLine2 ||
457 text_element->ID == kLine3) {
458 parsed_message.append("\n");
459 }
460 // If command has an argument, get it from next byte and skip it
461 if (text_element->HasArgument && pos + 1 < message.Data.size()) {
462 uint8_t arg_byte = message.Data[pos + 1];
463 parsed_message.append(text_element->GetParamToken(arg_byte));
464 pos++; // Skip the argument byte
465 } else {
466 parsed_message.append(text_element->GetParamToken());
467 }
468 continue; // Move to next byte
469 }
470
471 // Check for special characters
472 auto special_element = FindMatchingSpecial(byte);
473 if (special_element != std::nullopt) {
474 parsed_message.append(special_element->GetParamToken());
475 continue;
476 }
477
478 // Check for dictionary entries
479 if (byte >= DICTOFF && byte < (DICTOFF + 97)) {
480 DictionaryEntry dic_entry;
481 for (const auto& entry : dictionary_entries) {
482 if (entry.ID == byte - DICTOFF) {
483 dic_entry = entry;
484 break;
485 }
486 }
487 parsed_message.append(dic_entry.Contents);
488 continue;
489 }
490
491 // Finally check for regular characters
492 if (CharEncoder.contains(byte)) {
493 parsed_message.push_back(CharEncoder.at(byte));
494 }
495 }
496 parsed_messages.push_back(parsed_message);
497 }
498
499 return parsed_messages;
500}
501
502std::vector<MessageData> ReadAllTextData(uint8_t* rom, int pos, int max_pos) {
503 std::vector<MessageData> list_of_texts;
504 int message_id = 0;
505
506 if (!rom) {
507 return list_of_texts;
508 }
509 if (max_pos > 0 && (pos < 0 || pos >= max_pos)) {
510 return list_of_texts;
511 }
512
513 std::vector<uint8_t> raw_message;
514 std::vector<uint8_t> parsed_message;
515 std::string current_raw_message;
516 std::string current_parsed_message;
517
518 bool did_bank_switch = false;
519 uint8_t current_byte = 0;
520 while (current_byte != 0xFF) {
521 if (max_pos > 0 && (pos < 0 || pos >= max_pos))
522 break;
523 current_byte = rom[pos++];
524 if (current_byte == kMessageTerminator) {
525 list_of_texts.push_back(
526 MessageData(message_id++, pos, current_raw_message, raw_message,
527 current_parsed_message, parsed_message));
528 raw_message.clear();
529 parsed_message.clear();
530 current_raw_message.clear();
531 current_parsed_message.clear();
532 continue;
533 } else if (current_byte == 0xFF) {
534 break;
535 }
536
537 raw_message.push_back(current_byte);
538
539 auto text_element = FindMatchingCommand(current_byte);
540 if (text_element != std::nullopt) {
541 parsed_message.push_back(current_byte);
542 if (text_element->HasArgument) {
543 if (max_pos > 0 && (pos < 0 || pos >= max_pos))
544 break;
545 current_byte = rom[pos++];
546 raw_message.push_back(current_byte);
547 parsed_message.push_back(current_byte);
548 }
549
550 current_raw_message.append(text_element->GetParamToken(current_byte));
551 current_parsed_message.append(text_element->GetParamToken(current_byte));
552
553 if (text_element->Token == kBankToken && !did_bank_switch) {
554 did_bank_switch = true;
555 pos = kTextData2;
556 }
557
558 continue;
559 }
560
561 // Check for special characters.
562 auto special_element = FindMatchingSpecial(current_byte);
563 if (special_element != std::nullopt) {
564 current_raw_message.append(special_element->GetParamToken());
565 current_parsed_message.append(special_element->GetParamToken());
566 parsed_message.push_back(current_byte);
567 continue;
568 }
569
570 // Check for dictionary.
571 int8_t dictionary = FindDictionaryEntry(current_byte);
572 if (dictionary >= 0) {
573 current_raw_message.append(absl::StrFormat(
574 "[%s:%s]", DICTIONARYTOKEN,
575 util::HexByte(static_cast<unsigned char>(dictionary))));
576
577 // Safety: bounds-check dictionary pointer reads and dictionary expansion.
578 // This parser is used by tooling (RomDoctor) that may run on dummy or
579 // partially-initialized ROM buffers.
580 const int ptr_a = kPointersDictionaries + (dictionary * 2);
581 const int ptr_b = kPointersDictionaries + ((dictionary + 1) * 2);
582 if (max_pos > 0) {
583 if (ptr_a < 0 || ptr_a + 1 >= max_pos || ptr_b < 0 ||
584 ptr_b + 1 >= max_pos) {
585 continue;
586 }
587 }
588
589 uint32_t address =
590 Get24LocalFromPC(rom, kPointersDictionaries + (dictionary * 2));
591 uint32_t address_end =
592 Get24LocalFromPC(rom, kPointersDictionaries + ((dictionary + 1) * 2));
593
594 if (max_pos > 0) {
595 const uint32_t max_u = static_cast<uint32_t>(max_pos);
596 if (address >= max_u || address_end > max_u || address_end < address) {
597 continue;
598 }
599 }
600
601 for (uint32_t i = address; i < address_end; i++) {
602 if (max_pos > 0 && i >= static_cast<uint32_t>(max_pos))
603 break;
604 parsed_message.push_back(rom[i]);
605 current_parsed_message.append(ParseTextDataByte(rom[i]));
606 }
607
608 continue;
609 }
610
611 // Everything else.
612 if (CharEncoder.contains(current_byte)) {
613 std::string str = "";
614 str.push_back(CharEncoder.at(current_byte));
615 current_raw_message.append(str);
616 current_parsed_message.append(str);
617 parsed_message.push_back(current_byte);
618 }
619 }
620
621 return list_of_texts;
622}
623
624absl::Status LoadExpandedMessages(std::string& expanded_message_path,
625 std::vector<std::string>& parsed_messages,
626 std::vector<MessageData>& expanded_messages,
627 std::vector<DictionaryEntry>& dictionary) {
628 static Rom expanded_message_rom;
629 if (!expanded_message_rom.LoadFromFile(expanded_message_path).ok()) {
630 return absl::InternalError("Failed to load expanded message ROM");
631 }
632 expanded_messages = ReadAllTextData(expanded_message_rom.mutable_data(), 0);
633 auto parsed_expanded_messages =
634 ParseMessageData(expanded_messages, dictionary);
635 // Insert into parsed_messages
636 for (const auto& expanded_message : expanded_messages) {
637 parsed_messages.push_back(parsed_expanded_messages[expanded_message.ID]);
638 }
639 return absl::OkStatus();
640}
641
643 const std::vector<MessageData>& messages) {
644 nlohmann::json j = nlohmann::json::array();
645 for (const auto& msg : messages) {
646 j.push_back({{"id", msg.ID},
647 {"address", msg.Address},
648 {"raw_string", msg.RawString},
649 {"parsed_string", msg.ContentsParsed}});
650 }
651 return j;
652}
653
654absl::Status ExportMessagesToJson(const std::string& path,
655 const std::vector<MessageData>& messages) {
656 try {
657 nlohmann::json j = SerializeMessagesToJson(messages);
658 std::ofstream file(path);
659 if (!file.is_open()) {
660 return absl::InternalError(
661 absl::StrFormat("Failed to open file for writing: %s", path));
662 }
663 file << j.dump(2); // Pretty print with 2-space indent
664 return absl::OkStatus();
665 } catch (const std::exception& e) {
666 return absl::InternalError(
667 absl::StrFormat("JSON export failed: %s", e.what()));
668 }
669}
670
672 const std::vector<MessageData>& vanilla,
673 const std::vector<MessageData>& expanded) {
674 nlohmann::json j;
675 j["format"] = "yaze-message-bundle";
676 j["version"] = kMessageBundleVersion;
677 j["counts"] = {{"vanilla", vanilla.size()}, {"expanded", expanded.size()}};
678 j["messages"] = nlohmann::json::array();
679
680 auto append_messages = [&j](const std::vector<MessageData>& messages,
681 MessageBank bank) {
682 for (const auto& msg : messages) {
683 nlohmann::json entry;
684 entry["id"] = msg.ID;
685 entry["bank"] = MessageBankToString(bank);
686 entry["address"] = msg.Address;
687 entry["raw"] = msg.RawString;
688 entry["parsed"] = msg.ContentsParsed;
689 entry["text"] =
690 !msg.RawString.empty() ? msg.RawString : msg.ContentsParsed;
691 entry["length"] = msg.Data.size();
692 const std::string validation_text =
693 !msg.RawString.empty() ? msg.RawString : msg.ContentsParsed;
694 auto warnings = ValidateMessageLineWidths(validation_text);
695 if (!warnings.empty()) {
696 entry["line_width_warnings"] = warnings;
697 }
698 j["messages"].push_back(entry);
699 }
700 };
701
702 append_messages(vanilla, MessageBank::kVanilla);
703 append_messages(expanded, MessageBank::kExpanded);
704
705 return j;
706}
707
709 const std::string& path, const std::vector<MessageData>& vanilla,
710 const std::vector<MessageData>& expanded) {
711 try {
712 nlohmann::json j = SerializeMessageBundle(vanilla, expanded);
713 std::ofstream file(path);
714 if (!file.is_open()) {
715 return absl::InternalError(
716 absl::StrFormat("Failed to open file for writing: %s", path));
717 }
718 file << j.dump(2);
719 return absl::OkStatus();
720 } catch (const std::exception& e) {
721 return absl::InternalError(
722 absl::StrFormat("Message bundle export failed: %s", e.what()));
723 }
724}
725
726namespace {
727absl::StatusOr<MessageBundleEntry> ParseMessageBundleEntry(
728 const nlohmann::json& entry, MessageBank default_bank) {
729 if (!entry.is_object()) {
730 return absl::InvalidArgumentError("Message entry must be an object");
731 }
732
733 MessageBundleEntry result;
734 result.id = entry.value("id", -1);
735 if (result.id < 0) {
736 return absl::InvalidArgumentError("Message entry missing valid id");
737 }
738
739 if (entry.contains("bank")) {
740 if (!entry["bank"].is_string()) {
741 return absl::InvalidArgumentError("Message entry bank must be string");
742 }
743 auto bank_or = MessageBankFromString(entry["bank"].get<std::string>());
744 if (!bank_or.ok()) {
745 return bank_or.status();
746 }
747 result.bank = bank_or.value();
748 } else {
749 result.bank = default_bank;
750 }
751
752 if (entry.contains("raw") && entry["raw"].is_string()) {
753 result.raw = entry["raw"].get<std::string>();
754 } else if (entry.contains("raw_string") && entry["raw_string"].is_string()) {
755 result.raw = entry["raw_string"].get<std::string>();
756 }
757
758 if (entry.contains("parsed") && entry["parsed"].is_string()) {
759 result.parsed = entry["parsed"].get<std::string>();
760 } else if (entry.contains("parsed_string") &&
761 entry["parsed_string"].is_string()) {
762 result.parsed = entry["parsed_string"].get<std::string>();
763 }
764
765 if (entry.contains("text") && entry["text"].is_string()) {
766 result.text = entry["text"].get<std::string>();
767 }
768
769 if (result.text.empty()) {
770 if (!result.raw.empty()) {
771 result.text = result.raw;
772 } else if (!result.parsed.empty()) {
773 result.text = result.parsed;
774 }
775 }
776
777 if (result.text.empty()) {
778 return absl::InvalidArgumentError(
779 absl::StrFormat("Message entry %d missing text content", result.id));
780 }
781
782 return result;
783}
784} // namespace
785
786absl::StatusOr<std::vector<MessageBundleEntry>> ParseMessageBundleJson(
787 const nlohmann::json& json) {
788 std::vector<MessageBundleEntry> entries;
789
790 if (json.is_array()) {
791 for (const auto& entry : json) {
792 auto parsed_or = ParseMessageBundleEntry(entry, MessageBank::kVanilla);
793 if (!parsed_or.ok()) {
794 return parsed_or.status();
795 }
796 entries.push_back(parsed_or.value());
797 }
798 return entries;
799 }
800
801 if (!json.is_object()) {
802 return absl::InvalidArgumentError("Message bundle JSON must be object");
803 }
804
805 if (json.contains("version") && json["version"].is_number_integer()) {
806 int version = json["version"].get<int>();
807 if (version != kMessageBundleVersion) {
808 return absl::InvalidArgumentError(
809 absl::StrFormat("Unsupported message bundle version: %d", version));
810 }
811 }
812
813 if (!json.contains("messages") || !json["messages"].is_array()) {
814 return absl::InvalidArgumentError("Message bundle missing messages array");
815 }
816
817 for (const auto& entry : json["messages"]) {
818 auto parsed_or = ParseMessageBundleEntry(entry, MessageBank::kVanilla);
819 if (!parsed_or.ok()) {
820 return parsed_or.status();
821 }
822 entries.push_back(parsed_or.value());
823 }
824
825 return entries;
826}
827
828absl::StatusOr<std::vector<MessageBundleEntry>> LoadMessageBundleFromJson(
829 const std::string& path) {
830 std::ifstream file(path);
831 if (!file.is_open()) {
832 return absl::NotFoundError(
833 absl::StrFormat("Cannot open message bundle: %s", path));
834 }
835
836 nlohmann::json json;
837 try {
838 file >> json;
839 } catch (const std::exception& e) {
840 return absl::InvalidArgumentError(
841 absl::StrFormat("Failed to parse JSON: %s", e.what()));
842 }
843
844 return ParseMessageBundleJson(json);
845}
846
847// ===========================================================================
848// Line Width Validation
849// ===========================================================================
850
851std::vector<std::string> ValidateMessageLineWidths(const std::string& message) {
852 std::vector<std::string> warnings;
853
854 // Split message into lines on line-break tokens: [1], [2], [3], [V], [K]
855 // We walk through the string, counting visible characters per line.
856 int line_num = 1;
857 int visible_chars = 0;
858 bool all_spaces_this_line = true;
859 size_t pos = 0;
860
861 while (pos < message.size()) {
862 if (message[pos] == '[') {
863 // Find the closing bracket
864 size_t close = message.find(']', pos);
865 if (close == std::string::npos)
866 break;
867
868 std::string token = message.substr(pos, close - pos + 1);
869 pos = close + 1;
870
871 // Check if this token is a line-breaking command
872 // Line breaks: [1], [2], [3], [V], [K]
873 if (token == "[1]" || token == "[2]" || token == "[3]" ||
874 token == "[V]" || token == "[K]") {
875 // Check current line width before breaking.
876 // Exempt whitespace-only lines (used as screen clears in ALTTP).
877 if (visible_chars > kMaxLineWidth && !all_spaces_this_line) {
878 warnings.push_back(
879 absl::StrFormat("Line %d: %d visible characters (max %d)",
880 line_num, visible_chars, kMaxLineWidth));
881 }
882 line_num++;
883 visible_chars = 0;
884 all_spaces_this_line = true;
885 }
886 // Other command tokens ([W:02], [S:03], [SFX:2D], [L], [...], etc.)
887 // are not counted as visible characters - they're control codes or
888 // expand to game-rendered content that we can't measure in chars.
889 // Exception: [L] expands to player name but width varies (1-6 chars).
890 // For simplicity, we don't count command tokens.
891 continue;
892 }
893
894 // Regular visible character
895 if (message[pos] != ' ')
896 all_spaces_this_line = false;
897 visible_chars++;
898 pos++;
899 }
900
901 // Check the last line (exempt whitespace-only lines)
902 if (visible_chars > kMaxLineWidth && !all_spaces_this_line) {
903 warnings.push_back(
904 absl::StrFormat("Line %d: %d visible characters (max %d)", line_num,
905 visible_chars, kMaxLineWidth));
906 }
907
908 return warnings;
909}
910
911// ===========================================================================
912// Org Format (.org) Import/Export
913// ===========================================================================
914
915std::optional<std::pair<int, std::string>> ParseOrgHeader(
916 const std::string& line) {
917 // Expected format: "** XX - Label Text"
918 // where XX is a hex message ID
919 if (line.size() < 6 || line[0] != '*' || line[1] != '*' || line[2] != ' ') {
920 return std::nullopt;
921 }
922
923 // Find the " - " separator
924 size_t sep = line.find(" - ", 3);
925 if (sep == std::string::npos) {
926 return std::nullopt;
927 }
928
929 // Parse hex ID between "** " and " - "
930 std::string hex_id = line.substr(3, sep - 3);
931 int message_id;
932 try {
933 message_id = std::stoi(hex_id, nullptr, 16);
934 } catch (const std::exception&) {
935 return std::nullopt;
936 }
937
938 // Extract label after " - "
939 std::string label = line.substr(sep + 3);
940
941 return std::make_pair(message_id, label);
942}
943
944std::vector<std::pair<int, std::string>> ParseOrgContent(
945 const std::string& content) {
946 std::vector<std::pair<int, std::string>> messages;
947 std::istringstream stream(content);
948 std::string line;
949
950 int current_id = -1;
951 std::string current_body;
952
953 while (std::getline(stream, line)) {
954 // Check if this is a header line
955 auto header = ParseOrgHeader(line);
956 if (header.has_value()) {
957 // Save previous message if any
958 if (current_id >= 0) {
959 // Trim trailing newline from body
960 while (!current_body.empty() && current_body.back() == '\n') {
961 current_body.pop_back();
962 }
963 messages.push_back({current_id, current_body});
964 }
965
966 current_id = header->first;
967 current_body.clear();
968 continue;
969 }
970
971 // Skip top-level org headers (single *)
972 if (!line.empty() && line[0] == '*' &&
973 (line.size() < 2 || line[1] != '*')) {
974 continue;
975 }
976
977 // Accumulate body text
978 if (current_id >= 0) {
979 if (!current_body.empty()) {
980 current_body += "\n";
981 }
982 current_body += line;
983 }
984 }
985
986 // Save last message
987 if (current_id >= 0) {
988 while (!current_body.empty() && current_body.back() == '\n') {
989 current_body.pop_back();
990 }
991 messages.push_back({current_id, current_body});
992 }
993
994 return messages;
995}
996
998 const std::vector<std::pair<int, std::string>>& messages,
999 const std::vector<std::string>& labels) {
1000 std::string output;
1001 output += "* Oracle of Secrets English Dialogue\n";
1002
1003 for (size_t i = 0; i < messages.size(); ++i) {
1004 const auto& [msg_id, body] = messages[i];
1005 std::string label = (i < labels.size())
1006 ? labels[i]
1007 : absl::StrFormat("Message %02X", msg_id);
1008
1009 output += absl::StrFormat("** %02X - %s\n", msg_id, label);
1010 output += body;
1011 output += "\n\n";
1012 }
1013
1014 return output;
1015}
1016
1017// ===========================================================================
1018// Expanded Message Bank
1019// ===========================================================================
1020
1021std::vector<MessageData> ReadExpandedTextData(uint8_t* rom, int pos) {
1022 // Reuse ReadAllTextData — it already handles 0x7F terminators and 0xFF end
1023 return ReadAllTextData(rom, pos);
1024}
1025
1026absl::Status WriteExpandedTextData(Rom* rom, int start, int end,
1027 const std::vector<std::string>& messages) {
1028 if (rom == nullptr || !rom->is_loaded()) {
1029 return absl::InvalidArgumentError("ROM not loaded");
1030 }
1031 if (start < 0 || end < start) {
1032 return absl::InvalidArgumentError("Invalid expanded message region");
1033 }
1034
1035 const int capacity = end - start + 1;
1036 if (capacity <= 0) {
1037 return absl::InvalidArgumentError(
1038 "Expanded message region has no capacity");
1039 }
1040
1041 const auto& data = rom->vector();
1042 if (end >= static_cast<int>(data.size())) {
1043 return absl::OutOfRangeError("Expanded message region out of ROM range");
1044 }
1045
1046 // Serialize into a contiguous buffer, then do a single ROM write for safety
1047 // and determinism (and to honor write fences).
1048 std::vector<uint8_t> blob;
1049 blob.reserve(static_cast<size_t>(capacity));
1050
1051 int used = 0;
1052 for (size_t i = 0; i < messages.size(); ++i) {
1053 auto bytes = ParseMessageToData(messages[i]);
1054 const int needed = static_cast<int>(bytes.size()) + 1; // +0x7F
1055
1056 // Always reserve space for the final 0xFF.
1057 if (used + needed + 1 > capacity) {
1058 return absl::ResourceExhaustedError(absl::StrFormat(
1059 "Expanded message data exceeds bank boundary "
1060 "(at message %d, used=%d, needed=%d, capacity=%d, end=0x%06X)",
1061 static_cast<int>(i), used, needed, capacity, end));
1062 }
1063
1064 blob.insert(blob.end(), bytes.begin(), bytes.end());
1065 blob.push_back(kMessageTerminator);
1066 used += needed;
1067 }
1068
1069 if (used + 1 > capacity) {
1070 return absl::ResourceExhaustedError(
1071 "No space for end-of-region marker (0xFF)");
1072 }
1073 blob.push_back(0xFF);
1074
1075 // ROM safety: this writer must only touch the expanded message region.
1076 // NOTE: `end` is inclusive; convert to half-open for the fence.
1078 const uint32_t fence_start = static_cast<uint32_t>(start);
1079 const uint32_t fence_end =
1080 static_cast<uint32_t>(static_cast<uint64_t>(end) + 1ULL);
1081 RETURN_IF_ERROR(fence.Allow(fence_start, fence_end, "ExpandedMessageBank"));
1082 yaze::rom::ScopedWriteFence scope(rom, &fence);
1083
1084 return rom->WriteVector(start, std::move(blob));
1085}
1086
1087absl::Status WriteExpandedTextData(uint8_t* rom, int start, int end,
1088 const std::vector<std::string>& messages) {
1089 int pos = start;
1090 int capacity = end - start + 1;
1091
1092 for (size_t i = 0; i < messages.size(); ++i) {
1093 auto bytes = ParseMessageToData(messages[i]);
1094
1095 // Check space: bytes + terminator (0x7F) + final end marker (0xFF)
1096 int needed = static_cast<int>(bytes.size()) + 1; // +1 for 0x7F
1097 if (i == messages.size() - 1) {
1098 needed += 1; // +1 for final 0xFF
1099 }
1100
1101 if (pos + needed - start > capacity) {
1102 return absl::ResourceExhaustedError(
1103 absl::StrFormat("Expanded message data exceeds bank boundary "
1104 "(at message %d, pos 0x%06X, end 0x%06X)",
1105 static_cast<int>(i), pos, end));
1106 }
1107
1108 // Write encoded bytes
1109 for (uint8_t byte : bytes) {
1110 rom[pos++] = byte;
1111 }
1112 // Write message terminator
1113 rom[pos++] = kMessageTerminator;
1114 }
1115
1116 // Write end-of-region marker
1117 if (pos - start >= capacity) {
1118 return absl::ResourceExhaustedError(
1119 "No space for end-of-region marker (0xFF)");
1120 }
1121 rom[pos++] = 0xFF;
1122
1123 return absl::OkStatus();
1124}
1125
1126absl::Status WriteAllTextData(Rom* rom,
1127 const std::vector<MessageData>& messages) {
1128 if (rom == nullptr || !rom->is_loaded()) {
1129 return absl::InvalidArgumentError("ROM not loaded");
1130 }
1131
1132 int pos = kTextData;
1133 bool in_second_bank = false;
1134
1135 for (const auto& message : messages) {
1136 for (uint8_t value : message.Data) {
1137 RETURN_IF_ERROR(rom->WriteByte(pos, value));
1138
1139 if (value == kBankSwitchCommand) {
1140 if (!in_second_bank && pos > kTextDataEnd) {
1141 return absl::ResourceExhaustedError(absl::StrFormat(
1142 "Text data exceeds first bank (pos 0x%06X)", pos));
1143 }
1144 pos = kTextData2 - 1;
1145 in_second_bank = true;
1146 }
1147
1148 pos++;
1149 }
1150
1152 }
1153
1154 if (!in_second_bank && pos > kTextDataEnd) {
1155 return absl::ResourceExhaustedError(
1156 absl::StrFormat("Text data exceeds first bank (pos 0x%06X)", pos));
1157 }
1158
1159 if (in_second_bank && pos > kTextData2End) {
1160 return absl::ResourceExhaustedError(
1161 absl::StrFormat("Text data exceeds second bank (pos 0x%06X)", pos));
1162 }
1163
1164 RETURN_IF_ERROR(rom->WriteByte(pos, 0xFF));
1165 return absl::OkStatus();
1166}
1167
1168} // namespace editor
1169} // namespace yaze
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
Definition rom.h:28
absl::Status LoadFromFile(const std::string &filename, const LoadOptions &options=LoadOptions::Defaults())
Definition rom.cc:155
absl::Status WriteByte(int addr, uint8_t value)
Definition rom.cc:476
auto mutable_data()
Definition rom.h:140
const auto & vector() const
Definition rom.h:143
absl::Status WriteVector(int addr, std::vector< uint8_t > data)
Definition rom.cc:548
auto data() const
Definition rom.h:139
bool is_loaded() const
Definition rom.h:132
static RomSettings & Get()
uint32_t GetAddressOr(const std::string &key, uint32_t default_value) const
absl::Status Allow(uint32_t start, uint32_t end, std::string_view label)
Definition write_fence.h:32
constexpr char kExpandedMessageEnd[]
constexpr char kExpandedMessageStart[]
absl::StatusOr< MessageBundleEntry > ParseMessageBundleEntry(const nlohmann::json &entry, MessageBank default_bank)
uint8_t FindMatchingCharacter(char value)
const std::string kBankToken
nlohmann::json SerializeMessagesToJson(const std::vector< MessageData > &messages)
absl::StatusOr< MessageBank > MessageBankFromString(std::string_view value)
DictionaryEntry FindRealDictionaryEntry(uint8_t value, const std::vector< DictionaryEntry > &dictionary)
constexpr int kMaxLineWidth
int GetExpandedTextDataStart()
constexpr int kMessageBundleVersion
const std::string DICTIONARYTOKEN
constexpr uint8_t kScrollVertical
std::string ParseTextDataByte(uint8_t value)
absl::Status WriteAllTextData(Rom *rom, const std::vector< MessageData > &messages)
absl::Status LoadExpandedMessages(std::string &expanded_message_path, std::vector< std::string > &parsed_messages, std::vector< MessageData > &expanded_messages, std::vector< DictionaryEntry > &dictionary)
constexpr int kTextData
std::optional< std::pair< int, std::string > > ParseOrgHeader(const std::string &line)
std::string MessageBankToString(MessageBank bank)
constexpr int kExpandedTextDataEndDefault
constexpr int kTextData2
std::string ReplaceAllDictionaryWords(std::string str, const std::vector< DictionaryEntry > &dictionary)
absl::Status WriteExpandedTextData(Rom *rom, int start, int end, const std::vector< std::string > &messages)
nlohmann::json SerializeMessageBundle(const std::vector< MessageData > &vanilla, const std::vector< MessageData > &expanded)
constexpr uint8_t kLine2
constexpr int kPointersDictionaries
absl::StatusOr< std::vector< MessageBundleEntry > > LoadMessageBundleFromJson(const std::string &path)
constexpr int kNumDictionaryEntries
absl::StatusOr< MessageData > ParseSingleMessage(const std::vector< uint8_t > &rom_data, int *current_pos)
absl::StatusOr< std::vector< MessageBundleEntry > > ParseMessageBundleJson(const nlohmann::json &json)
std::vector< std::string > ParseMessageData(std::vector< MessageData > &message_data, const std::vector< DictionaryEntry > &dictionary_entries)
std::optional< TextElement > FindMatchingSpecial(uint8_t value)
constexpr uint8_t kMessageTerminator
std::vector< MessageData > ReadAllTextData(uint8_t *rom, int pos, int max_pos)
constexpr int kTextData2End
std::vector< DictionaryEntry > BuildDictionaryEntries(Rom *rom)
constexpr uint8_t kBankSwitchCommand
std::vector< uint8_t > ParseMessageToData(std::string str)
absl::Status ExportMessagesToJson(const std::string &path, const std::vector< MessageData > &messages)
absl::Status ExportMessageBundleToJson(const std::string &path, const std::vector< MessageData > &vanilla, const std::vector< MessageData > &expanded)
constexpr uint8_t DICTOFF
std::string ExportToOrgFormat(const std::vector< std::pair< int, std::string > > &messages, const std::vector< std::string > &labels)
std::vector< MessageData > ReadExpandedTextData(uint8_t *rom, int pos)
std::optional< TextElement > FindMatchingCommand(uint8_t b)
MessageParseResult ParseMessageToDataWithDiagnostics(std::string_view str)
int GetExpandedTextDataEnd()
ParsedElement FindMatchingElement(const std::string &str)
std::vector< std::string > ValidateMessageLineWidths(const std::string &message)
std::vector< std::pair< int, std::string > > ParseOrgContent(const std::string &content)
constexpr int kExpandedTextDataDefault
constexpr uint8_t kLine3
int8_t FindDictionaryEntry(uint8_t value)
constexpr int kTextDataEnd
std::string HexByte(uint8_t byte, HexStringParams params)
Definition hex.cc:30
void logf(const absl::FormatSpec< Args... > &format, Args &&... args)
Definition log.h:115
uint32_t Get24LocalFromPC(uint8_t *data, int addr, bool pc=true)
Definition snes.h:30
uint32_t SnesToPc(uint32_t addr) noexcept
Definition snes.h:8
#define RETURN_IF_ERROR(expr)
Definition snes.cc:22
std::vector< uint8_t > Data
std::vector< uint8_t > DataParsed
std::vector< uint8_t > bytes
std::vector< std::string > errors
std::vector< std::string > warnings
std::string GetParamToken(uint8_t value=0) const