yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
message_data.h
Go to the documentation of this file.
1#ifndef YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
2#define YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
3
4// ===========================================================================
5// Message Data System for Zelda 3 (A Link to the Past)
6// ===========================================================================
7//
8// This system handles the parsing, editing, and serialization of in-game text
9// messages from The Legend of Zelda: A Link to the Past (SNES).
10//
11// ## Architecture Overview
12//
13// The message system consists of several key components:
14//
15// 1. **Character Encoding** (`CharEncoder`):
16// Maps byte values (0x00-0x66) to displayable characters (A-Z, a-z, 0-9,
17// punctuation). This is the basic text representation in the ROM.
18//
19// 2. **Text Commands** (`TextCommands`):
20// Special control codes (0x67-0x80) that control message display behavior:
21// - Window appearance (border, position)
22// - Text flow (line breaks, scrolling, delays)
23// - Interactive elements (choices, player name insertion)
24// - Some commands have arguments (e.g., [W:02] = window border type 2)
25//
26// 3. **Special Characters** (`SpecialChars`):
27// Extended character set (0x43-0x5E) for game-specific symbols:
28// - Directional arrows
29// - Button prompts (A, B, X, Y)
30// - HP indicators
31// - Hieroglyphs
32//
33// 4. **Dictionary System** (`DictionaryEntry`):
34// Compression system using byte values 0x88+ to reference common words/phrases
35// stored separately in ROM. This saves space by replacing frequently-used
36// text with single-byte references.
37//
38// 5. **Message Data** (`MessageData`):
39// Represents a single in-game message with both raw binary data and parsed
40// human-readable text. Each message is terminated by 0x7F in ROM.
41//
42// ## Data Flow
43//
44// ### Reading from ROM:
45// ROM bytes → ReadAllTextData() → MessageData (raw) → ParseMessageData() →
46// Human-readable string with [command] tokens
47//
48// ### Writing to ROM:
49// User edits text → ParseMessageToData() → Binary bytes → ROM
50//
51// ### Dictionary Optimization:
52// Text string → OptimizeMessageForDictionary() → Replace common phrases with
53// [D:XX] tokens → Smaller binary representation
54//
55// ## ROM Memory Layout (SNES)
56//
57// - Text Data Block 1: 0xE0000 - 0xE7FFF (32KB)
58// - Text Data Block 2: 0x75F40 - 0x773FF (5.3KB)
59// - Dictionary Pointers: 0x74703
60// - Character Widths: Table storing pixel widths for proportional font
61// - Font Graphics: 0x70000+ (2bpp tile data)
62//
63// ## Message Format
64//
65// Messages are stored as byte sequences terminated by 0x7F:
66// Example: [0x00, 0x01, 0x02, 0x7F] = "ABC"
67// Example: [0x6A, 0x59, 0x2C, 0x61, 0x32, 0x28, 0x2B, 0x23, 0x7F]
68// = "[L] saved Hyrule" (0x6A = player name command)
69//
70// ## Token Syntax (Human-Readable Format)
71//
72// Commands: [TOKEN:HEX] or [TOKEN]
73// Examples: [W:02] (window border), [K] (wait for key)
74// Dictionary: [D:HEX]
75// Examples: [D:00] (first dictionary entry)
76// Special Chars:[TOKEN]
77// Examples: [A] (A button), [UP] (up arrow)
78//
79// ===========================================================================
80
81#include <optional>
82#include <regex>
83#include <string>
84#include <unordered_map>
85#include <vector>
86#include <string_view>
87
88#include "absl/strings/str_format.h"
89#include "absl/strings/str_replace.h"
90#include "absl/strings/match.h"
91#include "app/rom.h"
92
93namespace yaze {
94namespace editor {
95
96const std::string kBankToken = "BANK";
97const std::string DICTIONARYTOKEN = "D";
98constexpr uint8_t kMessageTerminator = 0x7F; // Marks end of message in ROM
99constexpr uint8_t DICTOFF = 0x88; // Dictionary entries start at byte 0x88
100constexpr uint8_t kWidthArraySize = 100;
101
102// Character encoding table: Maps ROM byte values to displayable characters
103// Used for both parsing ROM data into text and converting text back to bytes
104static const std::unordered_map<uint8_t, wchar_t> CharEncoder = {
105 {0x00, 'A'}, {0x01, 'B'}, {0x02, 'C'}, {0x03, 'D'}, {0x04, 'E'},
106 {0x05, 'F'}, {0x06, 'G'}, {0x07, 'H'}, {0x08, 'I'}, {0x09, 'J'},
107 {0x0A, 'K'}, {0x0B, 'L'}, {0x0C, 'M'}, {0x0D, 'N'}, {0x0E, 'O'},
108 {0x0F, 'P'}, {0x10, 'Q'}, {0x11, 'R'}, {0x12, 'S'}, {0x13, 'T'},
109 {0x14, 'U'}, {0x15, 'V'}, {0x16, 'W'}, {0x17, 'X'}, {0x18, 'Y'},
110 {0x19, 'Z'}, {0x1A, 'a'}, {0x1B, 'b'}, {0x1C, 'c'}, {0x1D, 'd'},
111 {0x1E, 'e'}, {0x1F, 'f'}, {0x20, 'g'}, {0x21, 'h'}, {0x22, 'i'},
112 {0x23, 'j'}, {0x24, 'k'}, {0x25, 'l'}, {0x26, 'm'}, {0x27, 'n'},
113 {0x28, 'o'}, {0x29, 'p'}, {0x2A, 'q'}, {0x2B, 'r'}, {0x2C, 's'},
114 {0x2D, 't'}, {0x2E, 'u'}, {0x2F, 'v'}, {0x30, 'w'}, {0x31, 'x'},
115 {0x32, 'y'}, {0x33, 'z'}, {0x34, '0'}, {0x35, '1'}, {0x36, '2'},
116 {0x37, '3'}, {0x38, '4'}, {0x39, '5'}, {0x3A, '6'}, {0x3B, '7'},
117 {0x3C, '8'}, {0x3D, '9'}, {0x3E, '!'}, {0x3F, '?'}, {0x40, '-'},
118 {0x41, '.'}, {0x42, ','}, {0x44, '>'}, {0x45, '('}, {0x46, ')'},
119 {0x4C, '"'}, {0x51, '\''}, {0x59, ' '}, {0x5A, '<'}, {0x5F, L'¡'},
120 {0x60, L'¡'}, {0x61, L'¡'}, {0x62, L' '}, {0x63, L' '}, {0x64, L' '},
121 {0x65, ' '}, {0x66, '_'},
122};
123
124// Finds the ROM byte value for a given character (reverse lookup in CharEncoder)
125// Returns 0xFF if character is not found
126uint8_t FindMatchingCharacter(char value);
127
128// Checks if a byte value represents a dictionary entry
129// Returns dictionary index (0-96) or -1 if not a dictionary entry
130int8_t FindDictionaryEntry(uint8_t value);
131
132// Converts a human-readable message string (with [command] tokens) into ROM bytes
133// This is the inverse operation of ParseMessageData
134std::vector<uint8_t> ParseMessageToData(std::string str);
135
136// Represents a single dictionary entry (common word/phrase) used for text compression
137// Dictionary entries are stored separately in ROM and referenced by bytes 0x88-0xE8
138// Example: Dictionary entry 0x00 might contain "the" and be referenced as [D:00]
140 uint8_t ID = 0; // Dictionary index (0-96)
141 std::string Contents = ""; // The actual text this entry represents
142 std::vector<uint8_t> Data; // Binary representation of Contents
143 int Length = 0; // Character count
144 std::string Token = ""; // Human-readable token like "[D:00]"
145
146 DictionaryEntry() = default;
147 DictionaryEntry(uint8_t i, std::string_view s)
148 : ID(i), Contents(s), Length(s.length()) {
149 Token = absl::StrFormat("[%s:%02X]", DICTIONARYTOKEN, ID);
151 }
152
153 // Checks if this dictionary entry's text appears in the given string
154 bool ContainedInString(std::string_view s) const {
155 // Convert to std::string to avoid Debian string_view bug with absl::StrContains
156 return absl::StrContains(std::string(s), Contents);
157 }
158
159 // Replaces all occurrences of this dictionary entry's text with its token
160 // Example: "the cat" with dictionary[0]="the" becomes "[D:00] cat"
161 std::string ReplaceInstancesOfIn(std::string_view s) const {
162 auto replaced_string = std::string(s);
163 size_t pos = replaced_string.find(Contents);
164 while (pos != std::string::npos) {
165 replaced_string.replace(pos, Contents.length(), Token);
166 pos = replaced_string.find(Contents, pos + Token.length());
167 }
168 return replaced_string;
169 }
170};
171
172constexpr int kTextData = 0xE0000;
173constexpr int kTextDataEnd = 0xE7FFF;
174constexpr int kNumDictionaryEntries = 0x61;
175constexpr int kPointersDictionaries = 0x74703;
176constexpr uint8_t kScrollVertical = 0x73;
177constexpr uint8_t kLine1 = 0x74;
178constexpr uint8_t kLine2 = 0x75;
179constexpr uint8_t kLine3 = 0x76;
180
181// Reads all dictionary entries from ROM and builds the dictionary table
182std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom);
183
184// Replaces all dictionary words in a string with their [D:XX] tokens
185// Used for text compression when saving messages back to ROM
186std::string ReplaceAllDictionaryWords(std::string str,
187 const std::vector<DictionaryEntry>& dictionary);
188
189// Looks up a dictionary entry by its ROM byte value
191 uint8_t value, const std::vector<DictionaryEntry>& dictionary);
192
193// Special marker inserted into commands to protect them from dictionary replacements
194// during optimization. Removed after dictionary replacement is complete.
195const std::string CHEESE = "\uBEBE";
196
197// Represents a complete in-game message with both raw and parsed representations
198// Messages can exist in two forms:
199// 1. Raw: Direct ROM bytes with dictionary references as [D:XX] tokens
200// 2. Parsed: Fully expanded with dictionary words replaced by actual text
202 int ID = 0; // Message index in the ROM
203 int Address = 0; // ROM address where this message is stored
204 std::string RawString; // Human-readable with [D:XX] dictionary tokens
205 std::string ContentsParsed; // Fully expanded human-readable text
206 std::vector<uint8_t> Data; // Raw ROM bytes (may contain dict references)
207 std::vector<uint8_t> DataParsed; // Expanded bytes (dict entries expanded)
208
209 MessageData() = default;
210 MessageData(int id, int address, const std::string& rawString,
211 const std::vector<uint8_t>& rawData,
212 const std::string& parsedString,
213 const std::vector<uint8_t>& parsedData)
214 : ID(id),
215 Address(address),
216 RawString(rawString),
217 ContentsParsed(parsedString),
218 Data(rawData),
219 DataParsed(parsedData) {}
220
221 // Copy constructor
222 MessageData(const MessageData& other) {
223 ID = other.ID;
224 Address = other.Address;
225 RawString = other.RawString;
226 Data = other.Data;
227 DataParsed = other.DataParsed;
229 }
230
231 // Optimizes a message by replacing common phrases with dictionary tokens
232 // Inserts CHEESE markers inside commands to prevent dictionary replacement
233 // from corrupting command syntax like [W:02]
234 // Example: "Link saved the day" → "[D:00] saved [D:01] day"
236 std::string_view message_string,
237 const std::vector<DictionaryEntry>& dictionary) {
238 std::stringstream protons;
239 bool command = false;
240 // Insert CHEESE markers inside commands to protect them
241 for (const auto& c : message_string) {
242 if (c == '[') {
243 command = true;
244 } else if (c == ']') {
245 command = false;
246 }
247
248 protons << c;
249 if (command) {
250 protons << CHEESE; // Protect command contents from replacement
251 }
252 }
253
254 std::string protons_string = protons.str();
255 std::string replaced_string =
256 ReplaceAllDictionaryWords(protons_string, dictionary);
257 std::string final_string =
258 absl::StrReplaceAll(replaced_string, {{CHEESE, ""}});
259
260 return final_string;
261 }
262
263 // Updates this message with new text content
264 // Automatically optimizes the message using dictionary compression
265 void SetMessage(const std::string& message,
266 const std::vector<DictionaryEntry>& dictionary) {
267 RawString = message;
268 ContentsParsed = OptimizeMessageForDictionary(message, dictionary);
269 }
270};
271
272// Represents a text command or special character definition
273// Text commands control message display (line breaks, colors, choices, etc.)
274// Special characters are game-specific symbols (arrows, buttons, HP hearts)
276 uint8_t ID; // ROM byte value for this element
277 std::string Token; // Short token like "W" or "UP"
278 std::string GenericToken; // Display format like "[W:##]" or "[UP]"
279 std::string Pattern; // Regex pattern for parsing
280 std::string StrictPattern; // Strict regex pattern for exact matching
281 std::string Description; // Human-readable description
282 bool HasArgument; // True if command takes a parameter byte
283
284 TextElement() = default;
285 TextElement(uint8_t id, const std::string& token, bool arg,
286 const std::string& description) {
287 ID = id;
288 Token = token;
289 if (arg) {
290 GenericToken = absl::StrFormat("[%s:##]", Token);
291 } else {
292 GenericToken = absl::StrFormat("[%s]", Token);
293 }
294 HasArgument = arg;
295 Description = description;
296 if (arg) {
297 Pattern = absl::StrFormat(
298 "\\[%s(:[0-9A-F]{1,2})?\\]",
299 absl::StrReplaceAll(Token, {{"[", "\\["}, {"]", "\\]"}}));
300 } else {
301 Pattern = absl::StrFormat(
302 "\\[%s\\]", absl::StrReplaceAll(Token, {{"[", "\\["}, {"]", "\\]"}}));
303 }
304 StrictPattern = absl::StrFormat("^%s$", Pattern);
305 }
306
307 std::string GetParamToken(uint8_t value = 0) const {
308 if (HasArgument) {
309 return absl::StrFormat("[%s:%02X]", Token, value);
310 } else {
311 return absl::StrFormat("[%s]", Token);
312 }
313 }
314
315 std::smatch MatchMe(const std::string& dfrag) const {
316 std::regex pattern(StrictPattern);
317 std::smatch match;
318 std::regex_match(dfrag, match, pattern);
319 return match;
320 }
321
322 bool Empty() const { return ID == 0; }
323
324 // Comparison operator
325 bool operator==(const TextElement& other) const { return ID == other.ID; }
326};
327
328const static std::string kWindowBorder = "Window border";
329const static std::string kWindowPosition = "Window position";
330const static std::string kScrollSpeed = "Scroll speed";
331const static std::string kTextDrawSpeed = "Text draw speed";
332const static std::string kTextColor = "Text color";
333const static std::string kPlayerName = "Player name";
334const static std::string kLine1Str = "Line 1";
335const static std::string kLine2Str = "Line 2";
336const static std::string kLine3Str = "Line 3";
337const static std::string kWaitForKey = "Wait for key";
338const static std::string kScrollText = "Scroll text";
339const static std::string kDelayX = "Delay X";
340const static std::string kBCDNumber = "BCD number";
341const static std::string kSoundEffect = "Sound effect";
342const static std::string kChoose3 = "Choose 3";
343const static std::string kChoose2High = "Choose 2 high";
344const static std::string kChoose2Low = "Choose 2 low";
345const static std::string kChoose2Indented = "Choose 2 indented";
346const static std::string kChooseItem = "Choose item";
347const static std::string kNextAttractImage = "Next attract image";
348const static std::string kBankMarker = "Bank marker (automatic)";
349const static std::string kCrash = "Crash";
350
351static const std::vector<TextElement> TextCommands = {
352 TextElement(0x6B, "W", true, kWindowBorder),
353 TextElement(0x6D, "P", true, kWindowPosition),
354 TextElement(0x6E, "SPD", true, kScrollSpeed),
355 TextElement(0x7A, "S", true, kTextDrawSpeed),
356 TextElement(0x77, "C", true, kTextColor),
357 TextElement(0x6A, "L", false, kPlayerName),
358 TextElement(0x74, "1", false, kLine1Str),
359 TextElement(0x75, "2", false, kLine2Str),
360 TextElement(0x76, "3", false, kLine3Str),
361 TextElement(0x7E, "K", false, kWaitForKey),
362 TextElement(0x73, "V", false, kScrollText),
363 TextElement(0x78, "WT", true, kDelayX),
364 TextElement(0x6C, "N", true, kBCDNumber),
365 TextElement(0x79, "SFX", true, kSoundEffect),
366 TextElement(0x71, "CH3", false, kChoose3),
367 TextElement(0x72, "CH2", false, kChoose2High),
368 TextElement(0x6F, "CH2L", false, kChoose2Low),
369 TextElement(0x68, "CH2I", false, kChoose2Indented),
370 TextElement(0x69, "CHI", false, kChooseItem),
371 TextElement(0x67, "IMG", false, kNextAttractImage),
372 TextElement(0x80, kBankToken, false, kBankMarker),
373 TextElement(0x70, "NONO", false, kCrash),
374};
375
376// Finds the TextElement definition for a command byte value
377// Returns nullopt if the byte is not a recognized command
378std::optional<TextElement> FindMatchingCommand(uint8_t b);
379
380// Special characters available in Zelda 3 messages
381// These are symbols and game-specific icons that appear in text
382static const std::vector<TextElement> SpecialChars = {
383 TextElement(0x43, "...", false, "Ellipsis …"),
384 TextElement(0x4D, "UP", false, "Arrow ↑"),
385 TextElement(0x4E, "DOWN", false, "Arrow ↓"),
386 TextElement(0x4F, "LEFT", false, "Arrow ←"),
387 TextElement(0x50, "RIGHT", false, "Arrow →"),
388 TextElement(0x5B, "A", false, "Button Ⓐ"),
389 TextElement(0x5C, "B", false, "Button Ⓑ"),
390 TextElement(0x5D, "X", false, "Button ⓧ"),
391 TextElement(0x5E, "Y", false, "Button ⓨ"),
392 TextElement(0x52, "HP1L", false, "1 HP left"),
393 TextElement(0x53, "HP1R", false, "1 HP right"),
394 TextElement(0x54, "HP2L", false, "2 HP left"),
395 TextElement(0x55, "HP3L", false, "3 HP left"),
396 TextElement(0x56, "HP3R", false, "3 HP right"),
397 TextElement(0x57, "HP4L", false, "4 HP left"),
398 TextElement(0x58, "HP4R", false, "4 HP right"),
399 TextElement(0x47, "HY0", false, "Hieroglyph ☥"),
400 TextElement(0x48, "HY1", false, "Hieroglyph 𓈗"),
401 TextElement(0x49, "HY2", false, "Hieroglyph Ƨ"),
402 TextElement(0x4A, "LFL", false, "Link face left"),
403 TextElement(0x4B, "LFR", false, "Link face right"),
404};
405
406// Finds the TextElement definition for a special character byte
407// Returns nullopt if the byte is not a recognized special character
408std::optional<TextElement> FindMatchingSpecial(uint8_t b);
409
410// Result of parsing a text token like "[W:02]"
411// Contains both the command definition and its argument value
413 TextElement Parent; // The command or special character definition
414 uint8_t Value; // Argument value (if command has argument)
415 bool Active = false; // True if parsing was successful
416
417 ParsedElement() = default;
418 ParsedElement(const TextElement& textElement, uint8_t value)
419 : Parent(textElement), Value(value), Active(true) {}
420};
421
422// Parses a token string like "[W:02]" and returns its ParsedElement
423// Returns inactive ParsedElement if token is invalid
424ParsedElement FindMatchingElement(const std::string& str);
425
426// Converts a single ROM byte into its human-readable text representation
427// Handles characters, commands, special chars, and dictionary references
428std::string ParseTextDataByte(uint8_t value);
429
430// Parses a single message from ROM data starting at current_pos
431// Updates current_pos to point after the message terminator
432// Returns error if message is malformed (e.g., missing terminator)
433absl::StatusOr<MessageData> ParseSingleMessage(
434 const std::vector<uint8_t>& rom_data, int* current_pos);
435
436// Converts MessageData objects into human-readable strings with [command] tokens
437// This is the main function for displaying messages in the editor
438// Properly handles commands with arguments to avoid parsing errors
439std::vector<std::string> ParseMessageData(
440 std::vector<MessageData>& message_data,
441 const std::vector<DictionaryEntry>& dictionary_entries);
442
443constexpr int kTextData2 = 0x75F40;
444constexpr int kTextData2End = 0x773FF;
445
446// Reads all text data from the ROM and returns a vector of MessageData objects.
447std::vector<MessageData> ReadAllTextData(uint8_t* rom, int pos = kTextData);
448
449// Calls the file dialog and loads expanded messages from a BIN file.
450absl::Status LoadExpandedMessages(std::string& expanded_message_path,
451 std::vector<std::string>& parsed_messages,
452 std::vector<MessageData>& expanded_messages,
453 std::vector<DictionaryEntry>& dictionary);
454
455} // namespace editor
456} // namespace yaze
457
458#endif // YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
The Rom class is used to load, save, and modify Rom data.
Definition rom.h:71
std::vector< MessageData > ReadAllTextData(uint8_t *rom, int pos)
uint8_t FindMatchingCharacter(char value)
const std::string kBankToken
DictionaryEntry FindRealDictionaryEntry(uint8_t value, const std::vector< DictionaryEntry > &dictionary)
const std::string CHEESE
const std::string DICTIONARYTOKEN
constexpr uint8_t kScrollVertical
std::string ParseTextDataByte(uint8_t value)
absl::Status LoadExpandedMessages(std::string &expanded_message_path, std::vector< std::string > &parsed_messages, std::vector< MessageData > &expanded_messages, std::vector< DictionaryEntry > &dictionary)
constexpr uint8_t kLine1
constexpr int kTextData
constexpr int kTextData2
std::string ReplaceAllDictionaryWords(std::string str, const std::vector< DictionaryEntry > &dictionary)
constexpr uint8_t kLine2
constexpr int kPointersDictionaries
constexpr int kNumDictionaryEntries
absl::StatusOr< MessageData > ParseSingleMessage(const std::vector< uint8_t > &rom_data, int *current_pos)
std::vector< std::string > ParseMessageData(std::vector< MessageData > &message_data, const std::vector< DictionaryEntry > &dictionary_entries)
std::optional< TextElement > FindMatchingSpecial(uint8_t value)
constexpr uint8_t kMessageTerminator
constexpr int kTextData2End
std::vector< DictionaryEntry > BuildDictionaryEntries(Rom *rom)
std::vector< uint8_t > ParseMessageToData(std::string str)
constexpr uint8_t kWidthArraySize
constexpr uint8_t DICTOFF
std::optional< TextElement > FindMatchingCommand(uint8_t b)
ParsedElement FindMatchingElement(const std::string &str)
constexpr uint8_t kLine3
int8_t FindDictionaryEntry(uint8_t value)
constexpr int kTextDataEnd
Main namespace for the application.
bool ContainedInString(std::string_view s) const
std::string ReplaceInstancesOfIn(std::string_view s) const
DictionaryEntry(uint8_t i, std::string_view s)
std::vector< uint8_t > Data
MessageData(const MessageData &other)
std::vector< uint8_t > Data
std::vector< uint8_t > DataParsed
std::string OptimizeMessageForDictionary(std::string_view message_string, const std::vector< DictionaryEntry > &dictionary)
void SetMessage(const std::string &message, const std::vector< DictionaryEntry > &dictionary)
MessageData(int id, int address, const std::string &rawString, const std::vector< uint8_t > &rawData, const std::string &parsedString, const std::vector< uint8_t > &parsedData)
ParsedElement(const TextElement &textElement, uint8_t value)
std::smatch MatchMe(const std::string &dfrag) const
TextElement(uint8_t id, const std::string &token, bool arg, const std::string &description)
bool operator==(const TextElement &other) const
std::string GetParamToken(uint8_t value=0) const