yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
message_data.h
Go to the documentation of this file.
1#ifndef YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
2#define YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
3
4// ===========================================================================
5// Message Data System for Zelda 3 (A Link to the Past)
6// ===========================================================================
7//
8// This system handles the parsing, editing, and serialization of in-game text
9// messages from The Legend of Zelda: A Link to the Past (SNES).
10//
11// ## Architecture Overview
12//
13// The message system consists of several key components:
14//
15// 1. **Character Encoding** (`CharEncoder`):
16// Maps byte values (0x00-0x66) to displayable characters (A-Z, a-z, 0-9,
17// punctuation). This is the basic text representation in the ROM.
18//
19// 2. **Text Commands** (`TextCommands`):
20// Special control codes (0x67-0x80) that control message display behavior:
21// - Window appearance (border, position)
22// - Text flow (line breaks, scrolling, delays)
23// - Interactive elements (choices, player name insertion)
24// - Some commands have arguments (e.g., [W:02] = window border type 2)
25//
26// 3. **Special Characters** (`SpecialChars`):
27// Extended character set (0x43-0x5E) for game-specific symbols:
28// - Directional arrows
29// - Button prompts (A, B, X, Y)
30// - HP indicators
31// - Hieroglyphs
32//
33// 4. **Dictionary System** (`DictionaryEntry`):
34// Compression system using byte values 0x88+ to reference common
35// words/phrases stored separately in ROM. This saves space by replacing
36// frequently-used text with single-byte references.
37//
38// 5. **Message Data** (`MessageData`):
39// Represents a single in-game message with both raw binary data and parsed
40// human-readable text. Each message is terminated by 0x7F in ROM.
41//
42// ## Data Flow
43//
44// ### Reading from ROM:
45// ROM bytes → ReadAllTextData() → MessageData (raw) → ParseMessageData() →
46// Human-readable string with [command] tokens
47//
48// ### Writing to ROM:
49// User edits text → ParseMessageToData() → Binary bytes → ROM
50//
51// ### Dictionary Optimization:
52// Text string → OptimizeMessageForDictionary() → Replace common phrases with
53// [D:XX] tokens → Smaller binary representation
54//
55// ## ROM Memory Layout (SNES)
56//
57// - Text Data Block 1: 0xE0000 - 0xE7FFF (32KB)
58// - Text Data Block 2: 0x75F40 - 0x773FF (5.3KB)
59// - Dictionary Pointers: 0x74703
60// - Character Widths: Table storing pixel widths for proportional font
61// - Font Graphics: 0x70000+ (2bpp tile data)
62//
63// ## Message Format
64//
65// Messages are stored as byte sequences terminated by 0x7F:
66// Example: [0x00, 0x01, 0x02, 0x7F] = "ABC"
67// Example: [0x6A, 0x59, 0x2C, 0x61, 0x32, 0x28, 0x2B, 0x23, 0x7F]
68// = "[L] saved Hyrule" (0x6A = player name command)
69//
70// ## Token Syntax (Human-Readable Format)
71//
72// Commands: [TOKEN:HEX] or [TOKEN]
73// Examples: [W:02] (window border), [K] (wait for key)
74// Dictionary: [D:HEX]
75// Examples: [D:00] (first dictionary entry)
76// Special Chars:[TOKEN]
77// Examples: [A] (A button), [UP] (up arrow)
78//
79// ===========================================================================
80
81#include <optional>
82#include <regex>
83#include <string>
84#include <string_view>
85#include <unordered_map>
86#include <vector>
87
88#include <nlohmann/json.hpp>
89#include "absl/strings/match.h"
90#include "absl/strings/str_format.h"
91#include "absl/strings/str_replace.h"
92#include "rom/rom.h"
93
94namespace yaze {
95namespace editor {
96
97const std::string kBankToken = "BANK";
98const std::string DICTIONARYTOKEN = "D";
99constexpr uint8_t kMessageTerminator = 0x7F; // Marks end of message in ROM
100constexpr uint8_t DICTOFF = 0x88; // Dictionary entries start at byte 0x88
101constexpr uint8_t kWidthArraySize = 100;
102
103// Character encoding table: Maps ROM byte values to displayable characters
104// Used for both parsing ROM data into text and converting text back to bytes
105static const std::unordered_map<uint8_t, wchar_t> CharEncoder = {
106 {0x00, 'A'}, {0x01, 'B'}, {0x02, 'C'}, {0x03, 'D'}, {0x04, 'E'},
107 {0x05, 'F'}, {0x06, 'G'}, {0x07, 'H'}, {0x08, 'I'}, {0x09, 'J'},
108 {0x0A, 'K'}, {0x0B, 'L'}, {0x0C, 'M'}, {0x0D, 'N'}, {0x0E, 'O'},
109 {0x0F, 'P'}, {0x10, 'Q'}, {0x11, 'R'}, {0x12, 'S'}, {0x13, 'T'},
110 {0x14, 'U'}, {0x15, 'V'}, {0x16, 'W'}, {0x17, 'X'}, {0x18, 'Y'},
111 {0x19, 'Z'}, {0x1A, 'a'}, {0x1B, 'b'}, {0x1C, 'c'}, {0x1D, 'd'},
112 {0x1E, 'e'}, {0x1F, 'f'}, {0x20, 'g'}, {0x21, 'h'}, {0x22, 'i'},
113 {0x23, 'j'}, {0x24, 'k'}, {0x25, 'l'}, {0x26, 'm'}, {0x27, 'n'},
114 {0x28, 'o'}, {0x29, 'p'}, {0x2A, 'q'}, {0x2B, 'r'}, {0x2C, 's'},
115 {0x2D, 't'}, {0x2E, 'u'}, {0x2F, 'v'}, {0x30, 'w'}, {0x31, 'x'},
116 {0x32, 'y'}, {0x33, 'z'}, {0x34, '0'}, {0x35, '1'}, {0x36, '2'},
117 {0x37, '3'}, {0x38, '4'}, {0x39, '5'}, {0x3A, '6'}, {0x3B, '7'},
118 {0x3C, '8'}, {0x3D, '9'}, {0x3E, '!'}, {0x3F, '?'}, {0x40, '-'},
119 {0x41, '.'}, {0x42, ','}, {0x44, '>'}, {0x45, '('}, {0x46, ')'},
120 {0x4C, '"'}, {0x51, '\''}, {0x59, ' '}, {0x5A, '<'}, {0x5F, L'¡'},
121 {0x60, L'¡'}, {0x61, L'¡'}, {0x62, L' '}, {0x63, L' '}, {0x64, L' '},
122 {0x65, ' '}, {0x66, '_'},
123};
124
125// Finds the ROM byte value for a given character (reverse lookup in
126// CharEncoder) Returns 0xFF if character is not found
127uint8_t FindMatchingCharacter(char value);
128
129// Checks if a byte value represents a dictionary entry
130// Returns dictionary index (0-96) or -1 if not a dictionary entry
131int8_t FindDictionaryEntry(uint8_t value);
132
133// Converts a human-readable message string (with [command] tokens) into ROM
134// bytes This is the inverse operation of ParseMessageData
135std::vector<uint8_t> ParseMessageToData(std::string str);
136
137// Represents a single dictionary entry (common word/phrase) used for text
138// compression Dictionary entries are stored separately in ROM and referenced by
139// bytes 0x88-0xE8 Example: Dictionary entry 0x00 might contain "the" and be
140// referenced as [D:00]
142 uint8_t ID = 0; // Dictionary index (0-96)
143 std::string Contents = ""; // The actual text this entry represents
144 std::vector<uint8_t> Data; // Binary representation of Contents
145 int Length = 0; // Character count
146 std::string Token = ""; // Human-readable token like "[D:00]"
147
148 DictionaryEntry() = default;
149 DictionaryEntry(uint8_t i, std::string_view s)
150 : ID(i), Contents(s), Length(s.length()) {
151 Token = absl::StrFormat("[%s:%02X]", DICTIONARYTOKEN, ID);
153 }
154
155 // Checks if this dictionary entry's text appears in the given string
156 bool ContainedInString(std::string_view s) const {
157 // Convert to std::string to avoid Debian string_view bug with
158 // absl::StrContains
159 return absl::StrContains(std::string(s), Contents);
160 }
161
162 // Replaces all occurrences of this dictionary entry's text with its token
163 // Example: "the cat" with dictionary[0]="the" becomes "[D:00] cat"
164 std::string ReplaceInstancesOfIn(std::string_view s) const {
165 auto replaced_string = std::string(s);
166 size_t pos = replaced_string.find(Contents);
167 while (pos != std::string::npos) {
168 replaced_string.replace(pos, Contents.length(), Token);
169 pos = replaced_string.find(Contents, pos + Token.length());
170 }
171 return replaced_string;
172 }
173};
174
175constexpr int kTextData = 0xE0000;
176constexpr int kTextDataEnd = 0xE7FFF;
177constexpr int kNumDictionaryEntries = 0x61;
178constexpr int kPointersDictionaries = 0x74703;
179constexpr uint8_t kScrollVertical = 0x73;
180constexpr uint8_t kLine1 = 0x74;
181constexpr uint8_t kLine2 = 0x75;
182constexpr uint8_t kLine3 = 0x76;
183
184// Reads all dictionary entries from ROM and builds the dictionary table
185std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom);
186
187// Replaces all dictionary words in a string with their [D:XX] tokens
188// Used for text compression when saving messages back to ROM
189std::string ReplaceAllDictionaryWords(
190 std::string str, const std::vector<DictionaryEntry>& dictionary);
191
192// Looks up a dictionary entry by its ROM byte value
194 uint8_t value, const std::vector<DictionaryEntry>& dictionary);
195
196// Special marker inserted into commands to protect them from dictionary
197// replacements during optimization. Removed after dictionary replacement is
198// complete.
199const std::string CHEESE = "\uBEBE";
200
201// Represents a complete in-game message with both raw and parsed
202// representations Messages can exist in two forms:
203// 1. Raw: Direct ROM bytes with dictionary references as [D:XX] tokens
204// 2. Parsed: Fully expanded with dictionary words replaced by actual text
206 int ID = 0; // Message index in the ROM
207 int Address = 0; // ROM address where this message is stored
208 std::string RawString; // Human-readable with [D:XX] dictionary tokens
209 std::string ContentsParsed; // Fully expanded human-readable text
210 std::vector<uint8_t> Data; // Raw ROM bytes (may contain dict references)
211 std::vector<uint8_t> DataParsed; // Expanded bytes (dict entries expanded)
212
213 MessageData() = default;
214 MessageData(int id, int address, const std::string& rawString,
215 const std::vector<uint8_t>& rawData,
216 const std::string& parsedString,
217 const std::vector<uint8_t>& parsedData)
218 : ID(id),
219 Address(address),
220 RawString(rawString),
221 ContentsParsed(parsedString),
222 Data(rawData),
223 DataParsed(parsedData) {}
224
225 // Copy constructor
226 MessageData(const MessageData& other) {
227 ID = other.ID;
228 Address = other.Address;
229 RawString = other.RawString;
230 Data = other.Data;
231 DataParsed = other.DataParsed;
233 }
234
235 // Optimizes a message by replacing common phrases with dictionary tokens
236 // Inserts CHEESE markers inside commands to prevent dictionary replacement
237 // from corrupting command syntax like [W:02]
238 // Example: "Link saved the day" → "[D:00] saved [D:01] day"
240 std::string_view message_string,
241 const std::vector<DictionaryEntry>& dictionary) {
242 std::stringstream protons;
243 bool command = false;
244 // Insert CHEESE markers inside commands to protect them
245 for (const auto& c : message_string) {
246 if (c == '[') {
247 command = true;
248 } else if (c == ']') {
249 command = false;
250 }
251
252 protons << c;
253 if (command) {
254 protons << CHEESE; // Protect command contents from replacement
255 }
256 }
257
258 std::string protons_string = protons.str();
259 std::string replaced_string =
260 ReplaceAllDictionaryWords(protons_string, dictionary);
261 std::string final_string =
262 absl::StrReplaceAll(replaced_string, {{CHEESE, ""}});
263
264 return final_string;
265 }
266
267 // Updates this message with new text content
268 // Automatically optimizes the message using dictionary compression
269 void SetMessage(const std::string& message,
270 const std::vector<DictionaryEntry>& dictionary) {
271 RawString = message;
272 ContentsParsed = OptimizeMessageForDictionary(message, dictionary);
273 }
274};
275
276// Represents a text command or special character definition
277// Text commands control message display (line breaks, colors, choices, etc.)
278// Special characters are game-specific symbols (arrows, buttons, HP hearts)
280 uint8_t ID; // ROM byte value for this element
281 std::string Token; // Short token like "W" or "UP"
282 std::string GenericToken; // Display format like "[W:##]" or "[UP]"
283 std::string Pattern; // Regex pattern for parsing
284 std::string StrictPattern; // Strict regex pattern for exact matching
285 std::string Description; // Human-readable description
286 bool HasArgument; // True if command takes a parameter byte
287
288 TextElement() = default;
289 TextElement(uint8_t id, const std::string& token, bool arg,
290 const std::string& description) {
291 ID = id;
292 Token = token;
293 if (arg) {
294 GenericToken = absl::StrFormat("[%s:##]", Token);
295 } else {
296 GenericToken = absl::StrFormat("[%s]", Token);
297 }
298 HasArgument = arg;
299 Description = description;
300 if (arg) {
301 Pattern = absl::StrFormat(
302 "\\[%s(:[0-9A-F]{1,2})?\\]",
303 absl::StrReplaceAll(Token, {{"[", "\\["}, {"]", "\\]"}}));
304 } else {
305 Pattern = absl::StrFormat(
306 "\\[%s\\]", absl::StrReplaceAll(Token, {{"[", "\\["}, {"]", "\\]"}}));
307 }
308 StrictPattern = absl::StrFormat("^%s$", Pattern);
309 }
310
311 std::string GetParamToken(uint8_t value = 0) const {
312 if (HasArgument) {
313 return absl::StrFormat("[%s:%02X]", Token, value);
314 } else {
315 return absl::StrFormat("[%s]", Token);
316 }
317 }
318
319 std::smatch MatchMe(const std::string& dfrag) const {
320 std::regex pattern(StrictPattern);
321 std::smatch match;
322 std::regex_match(dfrag, match, pattern);
323 return match;
324 }
325
326 bool Empty() const { return ID == 0; }
327
328 // Comparison operator
329 bool operator==(const TextElement& other) const { return ID == other.ID; }
330};
331
332const static std::string kWindowBorder = "Window border";
333const static std::string kWindowPosition = "Window position";
334const static std::string kScrollSpeed = "Scroll speed";
335const static std::string kTextDrawSpeed = "Text draw speed";
336const static std::string kTextColor = "Text color";
337const static std::string kPlayerName = "Player name";
338const static std::string kLine1Str = "Line 1";
339const static std::string kLine2Str = "Line 2";
340const static std::string kLine3Str = "Line 3";
341const static std::string kWaitForKey = "Wait for key";
342const static std::string kScrollText = "Scroll text";
343const static std::string kDelayX = "Delay X";
344const static std::string kBCDNumber = "BCD number";
345const static std::string kSoundEffect = "Sound effect";
346const static std::string kChoose3 = "Choose 3";
347const static std::string kChoose2High = "Choose 2 high";
348const static std::string kChoose2Low = "Choose 2 low";
349const static std::string kChoose2Indented = "Choose 2 indented";
350const static std::string kChooseItem = "Choose item";
351const static std::string kNextAttractImage = "Next attract image";
352const static std::string kBankMarker = "Bank marker (automatic)";
353const static std::string kCrash = "Crash";
354
355static const std::vector<TextElement> TextCommands = {
356 TextElement(0x6B, "W", true, kWindowBorder),
357 TextElement(0x6D, "P", true, kWindowPosition),
358 TextElement(0x6E, "SPD", true, kScrollSpeed),
359 TextElement(0x7A, "S", true, kTextDrawSpeed),
360 TextElement(0x77, "C", true, kTextColor),
361 TextElement(0x6A, "L", false, kPlayerName),
362 TextElement(0x74, "1", false, kLine1Str),
363 TextElement(0x75, "2", false, kLine2Str),
364 TextElement(0x76, "3", false, kLine3Str),
365 TextElement(0x7E, "K", false, kWaitForKey),
366 TextElement(0x73, "V", false, kScrollText),
367 TextElement(0x78, "WT", true, kDelayX),
368 TextElement(0x6C, "N", true, kBCDNumber),
369 TextElement(0x79, "SFX", true, kSoundEffect),
370 TextElement(0x71, "CH3", false, kChoose3),
371 TextElement(0x72, "CH2", false, kChoose2High),
372 TextElement(0x6F, "CH2L", false, kChoose2Low),
373 TextElement(0x68, "CH2I", false, kChoose2Indented),
374 TextElement(0x69, "CHI", false, kChooseItem),
375 TextElement(0x67, "IMG", false, kNextAttractImage),
376 TextElement(0x80, kBankToken, false, kBankMarker),
377 TextElement(0x70, "NONO", false, kCrash),
378};
379
380// Finds the TextElement definition for a command byte value
381// Returns nullopt if the byte is not a recognized command
382std::optional<TextElement> FindMatchingCommand(uint8_t b);
383
384// Special characters available in Zelda 3 messages
385// These are symbols and game-specific icons that appear in text
386static const std::vector<TextElement> SpecialChars = {
387 TextElement(0x43, "...", false, "Ellipsis …"),
388 TextElement(0x4D, "UP", false, "Arrow ↑"),
389 TextElement(0x4E, "DOWN", false, "Arrow ↓"),
390 TextElement(0x4F, "LEFT", false, "Arrow ←"),
391 TextElement(0x50, "RIGHT", false, "Arrow →"),
392 TextElement(0x5B, "A", false, "Button Ⓐ"),
393 TextElement(0x5C, "B", false, "Button Ⓑ"),
394 TextElement(0x5D, "X", false, "Button ⓧ"),
395 TextElement(0x5E, "Y", false, "Button ⓨ"),
396 TextElement(0x52, "HP1L", false, "1 HP left"),
397 TextElement(0x53, "HP1R", false, "1 HP right"),
398 TextElement(0x54, "HP2L", false, "2 HP left"),
399 TextElement(0x55, "HP3L", false, "3 HP left"),
400 TextElement(0x56, "HP3R", false, "3 HP right"),
401 TextElement(0x57, "HP4L", false, "4 HP left"),
402 TextElement(0x58, "HP4R", false, "4 HP right"),
403 TextElement(0x47, "HY0", false, "Hieroglyph ☥"),
404 TextElement(0x48, "HY1", false, "Hieroglyph 𓈗"),
405 TextElement(0x49, "HY2", false, "Hieroglyph Ƨ"),
406 TextElement(0x4A, "LFL", false, "Link face left"),
407 TextElement(0x4B, "LFR", false, "Link face right"),
408};
409
410// Finds the TextElement definition for a special character byte
411// Returns nullopt if the byte is not a recognized special character
412std::optional<TextElement> FindMatchingSpecial(uint8_t b);
413
414// Result of parsing a text token like "[W:02]"
415// Contains both the command definition and its argument value
417 TextElement Parent; // The command or special character definition
418 uint8_t Value; // Argument value (if command has argument)
419 bool Active = false; // True if parsing was successful
420
421 ParsedElement() = default;
422 ParsedElement(const TextElement& textElement, uint8_t value)
423 : Parent(textElement), Value(value), Active(true) {}
424};
425
426// Parses a token string like "[W:02]" and returns its ParsedElement
427// Returns inactive ParsedElement if token is invalid
428ParsedElement FindMatchingElement(const std::string& str);
429
430// Converts a single ROM byte into its human-readable text representation
431// Handles characters, commands, special chars, and dictionary references
432std::string ParseTextDataByte(uint8_t value);
433
434// Parses a single message from ROM data starting at current_pos
435// Updates current_pos to point after the message terminator
436// Returns error if message is malformed (e.g., missing terminator)
437absl::StatusOr<MessageData> ParseSingleMessage(
438 const std::vector<uint8_t>& rom_data, int* current_pos);
439
440// Converts MessageData objects into human-readable strings with [command]
441// tokens This is the main function for displaying messages in the editor
442// Properly handles commands with arguments to avoid parsing errors
443std::vector<std::string> ParseMessageData(
444 std::vector<MessageData>& message_data,
445 const std::vector<DictionaryEntry>& dictionary_entries);
446
447constexpr int kTextData2 = 0x75F40;
448constexpr int kTextData2End = 0x773FF;
449
450// Reads all text data from the ROM and returns a vector of MessageData objects.
451std::vector<MessageData> ReadAllTextData(uint8_t* rom, int pos = kTextData);
452
453// Calls the file dialog and loads expanded messages from a BIN file.
454absl::Status LoadExpandedMessages(std::string& expanded_message_path,
455 std::vector<std::string>& parsed_messages,
456 std::vector<MessageData>& expanded_messages,
457 std::vector<DictionaryEntry>& dictionary);
458
459// Serializes a vector of MessageData to a JSON object.
460nlohmann::json SerializeMessagesToJson(const std::vector<MessageData>& messages);
461
462// Exports messages to a JSON file at the specified path.
463absl::Status ExportMessagesToJson(const std::string& path,
464 const std::vector<MessageData>& messages);
465
466} // namespace editor
467} // namespace yaze
468
469#endif // YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
Definition rom.h:24
std::vector< MessageData > ReadAllTextData(uint8_t *rom, int pos)
uint8_t FindMatchingCharacter(char value)
const std::string kBankToken
nlohmann::json SerializeMessagesToJson(const std::vector< MessageData > &messages)
DictionaryEntry FindRealDictionaryEntry(uint8_t value, const std::vector< DictionaryEntry > &dictionary)
const std::string CHEESE
const std::string DICTIONARYTOKEN
constexpr uint8_t kScrollVertical
std::string ParseTextDataByte(uint8_t value)
absl::Status LoadExpandedMessages(std::string &expanded_message_path, std::vector< std::string > &parsed_messages, std::vector< MessageData > &expanded_messages, std::vector< DictionaryEntry > &dictionary)
constexpr uint8_t kLine1
constexpr int kTextData
constexpr int kTextData2
std::string ReplaceAllDictionaryWords(std::string str, const std::vector< DictionaryEntry > &dictionary)
constexpr uint8_t kLine2
constexpr int kPointersDictionaries
constexpr int kNumDictionaryEntries
absl::StatusOr< MessageData > ParseSingleMessage(const std::vector< uint8_t > &rom_data, int *current_pos)
std::vector< std::string > ParseMessageData(std::vector< MessageData > &message_data, const std::vector< DictionaryEntry > &dictionary_entries)
std::optional< TextElement > FindMatchingSpecial(uint8_t value)
constexpr uint8_t kMessageTerminator
constexpr int kTextData2End
std::vector< DictionaryEntry > BuildDictionaryEntries(Rom *rom)
std::vector< uint8_t > ParseMessageToData(std::string str)
absl::Status ExportMessagesToJson(const std::string &path, const std::vector< MessageData > &messages)
constexpr uint8_t kWidthArraySize
constexpr uint8_t DICTOFF
std::optional< TextElement > FindMatchingCommand(uint8_t b)
ParsedElement FindMatchingElement(const std::string &str)
constexpr uint8_t kLine3
int8_t FindDictionaryEntry(uint8_t value)
constexpr int kTextDataEnd
bool ContainedInString(std::string_view s) const
std::string ReplaceInstancesOfIn(std::string_view s) const
DictionaryEntry(uint8_t i, std::string_view s)
std::vector< uint8_t > Data
MessageData(const MessageData &other)
std::vector< uint8_t > Data
std::vector< uint8_t > DataParsed
std::string OptimizeMessageForDictionary(std::string_view message_string, const std::vector< DictionaryEntry > &dictionary)
void SetMessage(const std::string &message, const std::vector< DictionaryEntry > &dictionary)
MessageData(int id, int address, const std::string &rawString, const std::vector< uint8_t > &rawData, const std::string &parsedString, const std::vector< uint8_t > &parsedData)
ParsedElement(const TextElement &textElement, uint8_t value)
std::smatch MatchMe(const std::string &dfrag) const
TextElement(uint8_t id, const std::string &token, bool arg, const std::string &description)
bool operator==(const TextElement &other) const
std::string GetParamToken(uint8_t value=0) const