yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
compression.cc
Go to the documentation of this file.
1#include "compression.h"
2
3#include <climits>
4#include <iostream>
5#include <memory>
6#include <string>
7
8#include "absl/status/status.h"
9#include "absl/status/statusor.h"
10#include "absl/strings/str_format.h"
11#include "rom/rom.h"
12#include "util/hyrule_magic.h"
13#include "util/macro.h"
14
15#define DEBUG_LOG(msg) std::cout << msg << std::endl
16
17namespace yaze {
18namespace gfx {
19
20std::vector<uint8_t> HyruleMagicCompress(uint8_t const* const src,
21 int const oldsize, int* const size,
22 int const flag) {
23 unsigned char* b2 =
24 (unsigned char*)malloc(0x1000); // allocate a 2^12 sized buffer
25
26 int i, j, k, l, m = 0, n, o = 0, bd = 0, p, q = 0, r;
27
28 for (i = 0; i < oldsize;) {
29 l = src[i]; // grab a char from the buffer.
30
31 k = 0;
32
33 r = !!q; // r = the same logical value (0 or 1) as q, but not the same
34 // value necesarily.
35
36 for (j = 0; j < i - 1; j++) {
37 if (src[j] == l) {
38 m = oldsize - j;
39
40 for (n = 0; n < m; n++)
41 if (src[n + j] != src[n + i])
42 break;
43
44 if (n > k)
45 k = n, o = j;
46 }
47 }
48
49 for (n = i + 1; n < oldsize; n++) {
50 if (src[n] != l) {
51 // look for chars identical to the first one.
52 // stop if we can't find one.
53 // n will reach i+k+1 for some k >= 0.
54
55 break;
56 }
57 }
58
59 n -= i; // offset back by i. i.e. n = k+1 as above.
60
61 if (n > 1 + r)
62 p = 1;
63 else {
64 m = src[i + 1];
65
66 for (n = i + 2; n < oldsize; n++) {
67 if (src[n] != l)
68 break;
69
70 n++;
71
72 if (src[n] != m)
73 break;
74 }
75
76 n -= i;
77
78 if (n > 2 + r)
79 p = 2;
80 else {
81 m = oldsize - i;
82
83 for (n = 1; n < m; n++)
84 if (src[i + n] != l + n)
85 break;
86
87 if (n > 1 + r)
88 p = 3;
89 else
90 p = 0;
91 }
92 }
93
94 if (k > 3 + r && k > n + (p & 1))
95 p = 4, n = k;
96
97 if (!p)
98 q++, i++;
99 else {
100 if (q) {
101 q--;
102
103 if (q > 31) {
104 b2[bd++] = (unsigned char)(224 + (q >> 8));
105 }
106
107 b2[bd++] = (unsigned char)q;
108 q++;
109
110 memcpy(b2 + bd, src + i - q, q);
111
112 bd += q;
113 q = 0;
114 }
115
116 i += n;
117 n--;
118
119 if (n > 31) {
120 b2[bd++] = (unsigned char)(224 + (n >> 8) + (p << 2));
121 b2[bd++] = (unsigned char)n;
122 } else
123 b2[bd++] = (unsigned char)((p << 5) + n);
124
125 switch (p) {
126 case 1:
127 case 3:
128 b2[bd++] = (unsigned char)l;
129 break;
130
131 case 2:
132 b2[bd++] = (unsigned char)l;
133 b2[bd++] = (unsigned char)m;
134
135 break;
136
137 case 4:
138 if (flag) {
139 b2[bd++] = (unsigned char)(o >> 8);
140 b2[bd++] = (unsigned char)o;
141 } else {
142 b2[bd++] = (unsigned char)o;
143 b2[bd++] = (unsigned char)(o >> 8);
144 }
145 }
146
147 continue;
148 }
149 }
150
151 if (q) {
152 q--;
153
154 if (q > 31) {
155 b2[bd++] = (unsigned char)(224 + (q >> 8));
156 }
157
158 b2[bd++] = (unsigned char)q;
159 q++;
160
161 memcpy(b2 + bd, src + i - q, q);
162
163 bd += q;
164 }
165
166 b2[bd++] = 255;
167 b2 = (unsigned char*)realloc(b2, bd);
168 *size = bd;
169
170 std::vector<uint8_t> compressed_data(b2, b2 + bd);
171 free(b2);
172 return compressed_data;
173}
174
175std::vector<uint8_t> HyruleMagicDecompress(uint8_t const* src, int* const size,
176 int const p_big_endian,
177 size_t max_src_size) {
178 unsigned char* b2 = (unsigned char*)malloc(1024);
179 const uint8_t* const src_start = src;
180
181 int bd = 0, bs = 1024;
182
183 unsigned char a;
184 unsigned char b;
185 unsigned short c, d;
186
187 for (;;) {
188 // Bounds check: Ensure we can read at least one byte (command)
189 if (max_src_size != static_cast<size_t>(-1) &&
190 (size_t)(src - src_start) >= max_src_size) {
191 std::cerr << "HyruleMagicDecompress: Reached end of buffer unexpectedly."
192 << std::endl;
193 break;
194 }
195
196 // retrieve a uint8_t from the buffer.
197 a = *(src++);
198
199 // end the decompression routine if we encounter 0xff.
200 if (a == 0xff)
201 break;
202
203 // examine the top 3 bits of a.
204 b = (a >> 5);
205
206 if (b == 7) // i.e. 0b 111
207 {
208 // get bits 0b 0001 1100
209 b = ((a >> 2) & 7);
210
211 // get bits 0b 0000 0011, multiply by 256, OR with the next byte.
212 c = ((a & 0x0003) << 8);
213
214 // Bounds check: Ensure we can read the next byte
215 if (max_src_size != static_cast<size_t>(-1) &&
216 (size_t)(src - src_start) >= max_src_size) {
217 std::cerr << "HyruleMagicDecompress: Reached end of buffer reading "
218 "extended len"
219 << std::endl;
220 break;
221 }
222 c |= *(src++);
223 } else
224 // or get bits 0b 0001 1111
225 c = (uint16_t)(a & 31);
226
227 c++;
228
229 if ((bd + c) > (bs - 512)) {
230 // need to increase the buffer size.
231 bs += 1024;
232 // Safety check for excessive allocation
233 if (bs > 1024 * 1024 * 16) { // 16MB limit
234 std::cerr << "HyruleMagicDecompress: Excessive allocation detected."
235 << std::endl;
236 free(b2);
237 return std::vector<uint8_t>();
238 }
239 unsigned char* new_b2 = (unsigned char*)realloc(b2, bs);
240 if (!new_b2) {
241 free(b2);
242 return std::vector<uint8_t>();
243 }
244 b2 = new_b2;
245 }
246
247 // 7 was handled, here we handle other decompression codes.
248
249 switch (b) {
250 case 0: // 0b 000
251
252 // raw copy
253
254 // Bounds check: Ensure we can read c bytes
255 if (max_src_size != static_cast<size_t>(-1) &&
256 (size_t)(src - src_start + c) > max_src_size) {
257 std::cerr << "HyruleMagicDecompress: Raw copy exceeds buffer."
258 << std::endl;
259 goto end_decompression;
260 }
261
262 // copy info from the src buffer to our new buffer,
263 // at offset bd (which we'll be increasing;
264 memcpy(b2 + bd, src, c);
265
266 // increment the src pointer accordingly.
267 src += c;
268
269 bd += c;
270
271 break;
272
273 case 1: // 0b 001
274
275 // rle copy
276
277 // Bounds check: Ensure we can read 1 byte
278 if (max_src_size != static_cast<size_t>(-1) &&
279 (size_t)(src - src_start) >= max_src_size) {
280 std::cerr << "HyruleMagicDecompress: RLE copy exceeds buffer."
281 << std::endl;
282 goto end_decompression;
283 }
284
285 // make c duplicates of one byte, inc the src pointer.
286 memset(b2 + bd, *(src++), c);
287
288 // increase the b2 offset.
289 bd += c;
290
291 break;
292
293 case 2: // 0b 010
294
295 // rle 16-bit alternating copy
296
297 // Bounds check: Ensure we can read 2 bytes
298 if (max_src_size != static_cast<size_t>(-1) &&
299 (size_t)(src - src_start + 2) > max_src_size) {
300 std::cerr << "HyruleMagicDecompress: RLE 16-bit copy exceeds buffer."
301 << std::endl;
302 goto end_decompression;
303 }
304
305 d = zelda3::ldle16b(src);
306
307 src += 2;
308
309 while (c > 1) {
310 // copy that 16-bit number c/2 times into the b2 buffer.
311 zelda3::stle16b(b2 + bd, d);
312
313 bd += 2;
314 c -= 2; // hence c/2
315 }
316
317 if (c) // if there's a remainder of c/2, this handles it.
318 b2[bd++] = (char)d;
319
320 break;
321
322 case 3: // 0b 011
323
324 // incrementing copy
325
326 // Bounds check: Ensure we can read 1 byte
327 if (max_src_size != static_cast<size_t>(-1) &&
328 (size_t)(src - src_start) >= max_src_size) {
329 std::cerr << "HyruleMagicDecompress: Inc copy exceeds buffer."
330 << std::endl;
331 goto end_decompression;
332 }
333
334 // get the current src byte.
335 a = *(src++);
336
337 while (c--) {
338 // increment that byte and copy to b2 in c iterations.
339 // e.g. a = 4, b2 will have 4,5,6,7,8... written to it.
340 b2[bd++] = a++;
341 }
342
343 break;
344
345 default: // 0b 100, 101, 110
346
347 // lz copy
348
349 // Bounds check: Ensure we can read 2 bytes
350 if (max_src_size != static_cast<size_t>(-1) &&
351 (size_t)(src - src_start + 2) > max_src_size) {
352 std::cerr << "HyruleMagicDecompress: LZ copy exceeds buffer."
353 << std::endl;
354 goto end_decompression;
355 }
356
357 if (p_big_endian) {
358 d = (*src << 8) + src[1];
359 } else {
360 d = zelda3::ldle16b(src);
361 }
362
363 // Safety check for LZ back-reference
364 if (d >= bd) {
365 // This is technically allowed in some LZ variants (referencing future bytes that are 0 initialized)
366 // but usually indicates corruption in this context if d is way out.
367 // However, standard LZ references previous data.
368 // If d is an absolute offset in the buffer, it must be < bd?
369 // Wait, b2[d++] implies d is an index into b2.
370 // If d >= bd, we are reading uninitialized data or data we haven't written yet.
371 // But if it's a sliding window, d could be relative?
372 // The code says `b2[d++]`. This looks like absolute offset in output buffer.
373 // If d > bd, it's definitely suspicious, but maybe valid if it wraps?
374 // But this implementation reallocs b2, so it's a linear buffer.
375 // Let's just check if d + c > bs (buffer size) or something?
376 // Actually, if d points to garbage, we just copy garbage.
377 // But if d exceeds allocated memory, we crash.
378 // We realloc b2 to `bs`. So we must ensure `d + c <= bs`?
379 // No, `d` increments.
380 // We need to ensure `d` stays within valid `b2` range.
381 // Since we are writing to `bd`, `d` should probably be < `bd` usually.
382 // But let's just ensure `d < bs`.
383 }
384
385 while (c--) {
386 // copy from a different location in the buffer.
387 if (d >= bs) {
388 // Expand buffer if needed? Or just clamp?
389 // If we are reading past buffer size, it's bad.
390 // But we might have realloced b2 to be larger than bd.
391 // So d < bs is the hard limit.
392 if (d >= bs) {
393 std::cerr << "HyruleMagicDecompress: LZ ref out of bounds."
394 << std::endl;
395 goto end_decompression;
396 }
397 }
398 b2[bd++] = b2[d++];
399 }
400
401 src += 2;
402 }
403 }
404
405end_decompression:
406 b2 = (unsigned char*)realloc(b2, bd);
407
408 if (size)
409 (*size) = bd;
410
411 // return the unsigned char* buffer b2, which contains the uncompressed data.
412 std::vector<uint8_t> decompressed_data(b2, b2 + bd);
413 free(b2);
414 return decompressed_data;
415}
416
417namespace lc_lz2 {
418
420 std::cout << "Command: " << std::to_string(piece->command) << "\n";
421 std::cout << "Command Length: " << piece->length << "\n";
422 std::cout << "Argument: ";
423 auto arg_size = piece->argument.size();
424 for (int i = 0; i < arg_size; ++i) {
425 printf("%02X ", piece->argument.at(i));
426 }
427 std::cout << "\nArgument Length: " << piece->argument_length << "\n";
428}
429
431 auto compressed_chain = chain_head->next;
432 while (compressed_chain != nullptr) {
433 std::cout << "- Compression Piece -\n";
434 PrintCompressionPiece(compressed_chain);
435 compressed_chain = compressed_chain->next;
436 }
437}
438
439void CheckByteRepeat(const uint8_t* rom_data, DataSizeArray& data_size_taken,
440 CommandArgumentArray& cmd_args, uint& src_data_pos,
441 const unsigned int last_pos) {
442 unsigned int pos = src_data_pos;
443 char byte_to_repeat = rom_data[pos];
444 while (pos <= last_pos && rom_data[pos] == byte_to_repeat) {
445 data_size_taken[kCommandByteFill]++;
446 pos++;
447 }
448 cmd_args[kCommandByteFill][0] = byte_to_repeat;
449}
450
451void CheckWordRepeat(const uint8_t* rom_data, DataSizeArray& data_size_taken,
452 CommandArgumentArray& cmd_args, uint& src_data_pos,
453 const unsigned int last_pos) {
454 if (src_data_pos + 2 <= last_pos &&
455 rom_data[src_data_pos] != rom_data[src_data_pos + 1]) {
456 unsigned int pos = src_data_pos;
457 char byte1 = rom_data[pos];
458 char byte2 = rom_data[pos + 1];
459 pos += 2;
460 data_size_taken[kCommandWordFill] = 2;
461 while (pos + 1 <= last_pos) {
462 if (rom_data[pos] == byte1 && rom_data[pos + 1] == byte2)
463 data_size_taken[kCommandWordFill] += 2;
464 else
465 break;
466 pos += 2;
467 }
468 cmd_args[kCommandWordFill][0] = byte1;
469 cmd_args[kCommandWordFill][1] = byte2;
470 }
471}
472
473void CheckIncByte(const uint8_t* rom_data, DataSizeArray& data_size_taken,
474 CommandArgumentArray& cmd_args, uint& src_data_pos,
475 const unsigned int last_pos) {
476 unsigned int pos = src_data_pos;
477 char byte = rom_data[pos];
478 pos++;
479 data_size_taken[kCommandIncreasingFill] = 1;
480 byte++;
481 while (pos <= last_pos && byte == rom_data[pos]) {
482 data_size_taken[kCommandIncreasingFill]++;
483 byte++;
484 pos++;
485 }
486 cmd_args[kCommandIncreasingFill][0] = rom_data[src_data_pos];
487}
488
489void CheckIntraCopy(const uint8_t* rom_data, DataSizeArray& data_size_taken,
490 CommandArgumentArray& cmd_args, uint& src_data_pos,
491 const unsigned int last_pos, unsigned int start) {
492 if (src_data_pos != start) {
493 unsigned int searching_pos = start;
494 unsigned int current_pos_u = src_data_pos;
495 unsigned int copied_size = 0;
496 unsigned int search_start = start;
497
498 while (searching_pos < src_data_pos && current_pos_u <= last_pos) {
499 while (rom_data[current_pos_u] != rom_data[searching_pos] &&
500 searching_pos < src_data_pos)
501 searching_pos++;
502 search_start = searching_pos;
503 while (current_pos_u <= last_pos &&
504 rom_data[current_pos_u] == rom_data[searching_pos] &&
505 searching_pos < src_data_pos) {
506 copied_size++;
507 current_pos_u++;
508 searching_pos++;
509 }
510 if (copied_size > data_size_taken[kCommandRepeatingBytes]) {
511 search_start -= start;
512 printf("- Found repeat of %d at %d\n", copied_size, search_start);
513 data_size_taken[kCommandRepeatingBytes] = copied_size;
514 cmd_args[kCommandRepeatingBytes][0] = search_start & kSnesByteMax;
515 cmd_args[kCommandRepeatingBytes][1] = search_start >> 8;
516 }
517 current_pos_u = src_data_pos;
518 copied_size = 0;
519 }
520 }
521}
522
523// Check if a command managed to pick up `max_win` or more bytes
524// Avoids being even with copy command, since it's possible to merge copy
525void ValidateForByteGain(const DataSizeArray& data_size_taken,
526 const CommandSizeArray& cmd_size, uint& max_win,
527 uint& cmd_with_max) {
528 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
529 unsigned int cmd_size_taken = data_size_taken[cmd_i];
530 // TODO(@scawful): Replace conditional with table of command sizes
531 // "Table that is even with copy but all other cmd are 2"
532 auto table_check =
533 !(cmd_i == kCommandRepeatingBytes && cmd_size_taken == 3);
534 if (cmd_size_taken > max_win && cmd_size_taken > cmd_size[cmd_i] &&
535 table_check) {
536 printf("==> C:%d / S:%d\n", cmd_i, cmd_size_taken);
537 cmd_with_max = cmd_i;
538 max_win = cmd_size_taken;
539 }
540 }
541}
542
543void CompressionCommandAlternative(const uint8_t* rom_data,
544 CompressionPiecePointer& compressed_chain,
545 const CommandSizeArray& cmd_size,
546 const CommandArgumentArray& cmd_args,
547 uint& src_data_pos, uint& comp_accumulator,
548 uint& cmd_with_max, uint& max_win) {
549 printf("- Ok we get a gain from %d\n", cmd_with_max);
550 std::string buffer;
551 buffer.push_back(cmd_args[cmd_with_max][0]);
552 if (cmd_size[cmd_with_max] == 2) {
553 buffer.push_back(cmd_args[cmd_with_max][1]);
554 }
555
556 auto new_comp_piece = std::make_shared<CompressionPiece>(
557 cmd_with_max, max_win, buffer, cmd_size[cmd_with_max]);
558 PrintCompressionPiece(new_comp_piece);
559 // If we let non compressed stuff, we need to add a copy chunk before
560 if (comp_accumulator != 0) {
561 std::string copy_buff;
562 copy_buff.resize(comp_accumulator);
563 for (int i = 0; i < comp_accumulator; ++i) {
564 copy_buff[i] = rom_data[i + src_data_pos - comp_accumulator];
565 }
566 auto copy_chunk = std::make_shared<CompressionPiece>(
567 kCommandDirectCopy, comp_accumulator, copy_buff, comp_accumulator);
568 compressed_chain->next = copy_chunk;
569 compressed_chain = copy_chunk;
570 } else {
571 compressed_chain->next = new_comp_piece;
572 compressed_chain = new_comp_piece;
573 }
574 src_data_pos += max_win;
575 comp_accumulator = 0;
576}
577
578void CheckByteRepeatV2(const uint8_t* data, uint& src_pos,
579 const unsigned int last_pos, CompressionCommand& cmd) {
580 unsigned int i = 0;
581 while (src_pos + i < last_pos && data[src_pos] == data[src_pos + i]) {
582 ++i;
583 }
585 cmd.arguments[kCommandByteFill][0] = data[src_pos];
586}
587
588void CheckWordRepeatV2(const uint8_t* data, uint& src_pos,
589 const unsigned int last_pos, CompressionCommand& cmd) {
590 if (src_pos + 2 <= last_pos && data[src_pos] != data[src_pos + 1]) {
591 unsigned int pos = src_pos;
592 char byte1 = data[pos];
593 char byte2 = data[pos + 1];
594 pos += 2;
596 while (pos + 1 <= last_pos) {
597 if (data[pos] == byte1 && data[pos + 1] == byte2)
598 cmd.data_size[kCommandWordFill] += 2;
599 else
600 break;
601 pos += 2;
602 }
603 cmd.arguments[kCommandWordFill][0] = byte1;
604 cmd.arguments[kCommandWordFill][1] = byte2;
605 }
606}
607
608void CheckIncByteV2(const uint8_t* rom_data, uint& src_data_pos,
609 const unsigned int last_pos, CompressionCommand& cmd) {
610 unsigned int pos = src_data_pos;
611 char byte = rom_data[pos];
612 pos++;
614 byte++;
615 while (pos <= last_pos && byte == rom_data[pos]) {
617 byte++;
618 pos++;
619 }
620 cmd.arguments[kCommandIncreasingFill][0] = rom_data[src_data_pos];
621}
622
623void CheckIntraCopyV2(const uint8_t* rom_data, uint& src_data_pos,
624 const unsigned int last_pos, unsigned int start,
625 CompressionCommand& cmd) {
626 if (src_data_pos != start) {
627 unsigned int searching_pos = start;
628 unsigned int current_pos_u = src_data_pos;
629 unsigned int copied_size = 0;
630 unsigned int search_start = start;
631
632 while (searching_pos < src_data_pos && current_pos_u <= last_pos) {
633 while (rom_data[current_pos_u] != rom_data[searching_pos] &&
634 searching_pos < src_data_pos)
635 searching_pos++;
636 search_start = searching_pos;
637 while (current_pos_u <= last_pos &&
638 rom_data[current_pos_u] == rom_data[searching_pos] &&
639 searching_pos < src_data_pos) {
640 copied_size++;
641 current_pos_u++;
642 searching_pos++;
643 }
644 if (copied_size > cmd.data_size[kCommandRepeatingBytes]) {
645 search_start -= start;
646 printf("- Found repeat of %d at %d\n", copied_size, search_start);
647 cmd.data_size[kCommandRepeatingBytes] = copied_size;
648 cmd.arguments[kCommandRepeatingBytes][0] = search_start & kSnesByteMax;
649 cmd.arguments[kCommandRepeatingBytes][1] = search_start >> 8;
650 }
651 current_pos_u = src_data_pos;
652 copied_size = 0;
653 }
654 }
655}
656
657// Table indicating command sizes, in bytes
658const std::array<int, 5> kCommandSizes = {1, 2, 2, 2, 3};
659
660// TODO(@scawful): TEST ME
662 uint& cmd_with_max) {
663 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
664 unsigned int cmd_size_taken = cmd.data_size[cmd_i];
665 // Check if the command size exceeds the maximum win and the size in the
666 // command sizes table, except for the repeating bytes command when the size
667 // taken is 3
668 if (cmd_size_taken > max_win && cmd_size_taken > kCommandSizes[cmd_i] &&
669 !(cmd_i == kCommandRepeatingBytes && cmd_size_taken == 3)) {
670 printf("==> C:%d / S:%d\n", cmd_i, cmd_size_taken);
671 cmd_with_max = cmd_i;
672 max_win = cmd_size_taken;
673 }
674 }
675}
676
677void CompressionCommandAlternativeV2(const uint8_t* rom_data,
678 const CompressionCommand& cmd,
679 CompressionPiecePointer& compressed_chain,
680 uint& src_data_pos, uint& comp_accumulator,
681 uint& cmd_with_max, uint& max_win) {
682 printf("- Ok we get a gain from %d\n", cmd_with_max);
683 std::string buffer;
684 buffer.push_back(cmd.arguments[cmd_with_max][0]);
685 if (cmd.cmd_size[cmd_with_max] == 2) {
686 buffer.push_back(cmd.arguments[cmd_with_max][1]);
687 }
688
689 auto new_comp_piece = std::make_shared<CompressionPiece>(
690 cmd_with_max, max_win, buffer, cmd.cmd_size[cmd_with_max]);
691 PrintCompressionPiece(new_comp_piece);
692 // If we let non compressed stuff, we need to add a copy chunk before
693 if (comp_accumulator != 0) {
694 std::string copy_buff;
695 copy_buff.resize(comp_accumulator);
696 for (int i = 0; i < comp_accumulator; ++i) {
697 copy_buff[i] = rom_data[i + src_data_pos - comp_accumulator];
698 }
699 auto copy_chunk = std::make_shared<CompressionPiece>(
700 kCommandDirectCopy, comp_accumulator, copy_buff, comp_accumulator);
701 compressed_chain->next = copy_chunk;
702 compressed_chain = copy_chunk;
703 } else {
704 compressed_chain->next = new_comp_piece;
705 compressed_chain = new_comp_piece;
706 }
707 src_data_pos += max_win;
708 comp_accumulator = 0;
709}
710
712 const uint8_t* rom_data, CompressionPiecePointer& compressed_chain,
713 const CompressionCommand& command, uint& source_data_position,
714 uint& uncompressed_data_size, uint& best_command, uint& best_command_gain) {
715 std::cout << "- Identified a gain from command: " << best_command
716 << std::endl;
717
718 // Create a buffer to store the arguments for the best command.
719 std::string argument_buffer;
720 argument_buffer.push_back(command.arguments[best_command][0]);
721 if (command.cmd_size[best_command] == 2) {
722 argument_buffer.push_back(command.arguments[best_command][1]);
723 }
724
725 // Create a new compression piece for the best command.
726 auto new_compression_piece = std::make_shared<CompressionPiece>(
727 best_command, best_command_gain, argument_buffer,
728 command.cmd_size[best_command]);
729 PrintCompressionPiece(new_compression_piece);
730
731 // If there is uncompressed data, create a direct copy compression piece for
732 // it.
733 if (uncompressed_data_size != 0) {
734 std::string copy_buffer(uncompressed_data_size, 0);
735 for (int i = 0; i < uncompressed_data_size; ++i) {
736 copy_buffer[i] =
737 rom_data[i + source_data_position - uncompressed_data_size];
738 }
739 auto direct_copy_piece = std::make_shared<CompressionPiece>(
740 kCommandDirectCopy, uncompressed_data_size, copy_buffer,
741 uncompressed_data_size);
742
743 // Append the direct copy piece to the chain.
744 compressed_chain->next = direct_copy_piece;
745 compressed_chain = direct_copy_piece;
746 }
747
748 // Append the new compression piece to the chain.
749 compressed_chain->next = new_compression_piece;
750 compressed_chain = new_compression_piece;
751
752 // Update the position in the source data and reset the uncompressed data
753 // size.
754 source_data_position += best_command_gain;
755 uncompressed_data_size = 0;
756}
757
758absl::StatusOr<CompressionPiecePointer> SplitCompressionPiece(
759 CompressionPiecePointer& piece, int mode) {
760 CompressionPiecePointer new_piece;
761 unsigned int length_left = piece->length - kMaxLengthCompression;
762 piece->length = kMaxLengthCompression;
763
764 switch (piece->command) {
765 case kCommandByteFill:
766 case kCommandWordFill:
767 new_piece = std::make_shared<CompressionPiece>(
768 piece->command, length_left, piece->argument, piece->argument_length);
769 break;
771 new_piece = std::make_shared<CompressionPiece>(
772 piece->command, length_left, piece->argument, piece->argument_length);
773 new_piece->argument[0] =
774 (char)(piece->argument[0] + kMaxLengthCompression);
775 break;
776 case kCommandDirectCopy: {
777 std::string empty;
778 piece->argument_length = kMaxLengthCompression;
779 new_piece = std::make_shared<CompressionPiece>(
780 piece->command, length_left, empty, length_left);
781 // MEMCPY
782 for (int i = 0; i < length_left; ++i) {
783 new_piece->argument[i] = piece->argument[i + kMaxLengthCompression];
784 }
785 break;
786 }
788 piece->argument_length = kMaxLengthCompression;
789 unsigned int offset = piece->argument[0] + (piece->argument[1] << 8);
790 new_piece = std::make_shared<CompressionPiece>(
791 piece->command, length_left, piece->argument, piece->argument_length);
792 if (mode == kNintendoMode2) {
793 new_piece->argument[0] =
795 new_piece->argument[1] = (offset + kMaxLengthCompression) >> 8;
796 }
797 if (mode == kNintendoMode1) {
798 new_piece->argument[1] =
800 new_piece->argument[0] = (offset + kMaxLengthCompression) >> 8;
801 }
802 } break;
803 default: {
804 return absl::InvalidArgumentError(
805 "SplitCompressionCommand: Invalid Command");
806 }
807 }
808 return new_piece;
809}
810
812 int mode) {
813 unsigned int pos = 0;
814 auto piece = start;
815 std::vector<uint8_t> output;
816
817 while (piece != nullptr) {
818 if (piece->length <= kMaxLengthNormalHeader) { // Normal header
819 output.push_back(BUILD_HEADER(piece->command, piece->length));
820 pos++;
821 } else {
822 if (piece->length <= kMaxLengthCompression) {
823 output.push_back(kCompressionStringMod |
824 ((uint8_t)piece->command << 2) |
825 (((piece->length - 1) & 0xFF00) >> 8));
826 pos++;
827 printf("Building extended header : cmd: %d, length: %d - %02X\n",
828 piece->command, piece->length, output[pos - 1]);
829 output.push_back(((piece->length - 1) & 0x00FF)); // (char)
830 pos++;
831 } else {
832 // We need to split the command
833 auto new_piece = SplitCompressionPiece(piece, mode);
834 if (!new_piece.ok()) {
835 std::cout << new_piece.status().ToString() << std::endl;
836 }
837 printf("New added piece\n");
838 auto piece_data = new_piece.value();
839 PrintCompressionPiece(piece_data);
840 piece_data->next = piece->next;
841 piece->next = piece_data;
842 continue;
843 }
844 }
845
846 if (piece->command == kCommandRepeatingBytes) {
847 char tmp[2];
848 tmp[0] = piece->argument[0];
849 tmp[1] = piece->argument[1];
850 if (mode == kNintendoMode1) {
851 tmp[0] = piece->argument[1];
852 tmp[1] = piece->argument[0];
853 }
854 for (const auto& each : tmp) {
855 output.push_back(each);
856 pos++;
857 }
858 } else {
859 for (int i = 0; i < piece->argument_length; ++i) {
860 output.push_back(piece->argument[i]);
861 pos++;
862 }
863 }
864 pos += piece->argument_length;
865 piece = piece->next;
866 }
867 output.push_back(kSnesByteMax);
868 return output;
869}
870
872 int mode, int start, int src_data_pos) {
873 if (chain_head->next != nullptr) {
874 Rom temp_rom;
876 temp_rom.LoadFromData(CreateCompressionString(chain_head->next, mode)))
878 auto decomp_data,
879 DecompressV2(temp_rom.data(), 0, temp_rom.size(), 1, temp_rom.size()));
880 if (!std::equal(decomp_data.begin() + start, decomp_data.end(),
881 temp_rom.begin())) {
882 return absl::InternalError(absl::StrFormat(
883 "Compressed data does not match uncompressed data at %d\n",
884 (uint)(src_data_pos - start)));
885 }
886 }
887 return absl::OkStatus();
888}
889
890// Merge consecutive copy if possible
892 CompressionPiecePointer piece = start;
893
894 while (piece != nullptr) {
895 if (piece->command == kCommandDirectCopy && piece->next != nullptr &&
896 piece->next->command == kCommandDirectCopy &&
897 piece->length + piece->next->length <= kMaxLengthCompression) {
898 unsigned int previous_length = piece->length;
899 piece->length = piece->length + piece->next->length;
900
901 for (int i = 0; i < piece->next->argument_length; ++i) {
902 piece->argument[i + previous_length] = piece->next->argument[i];
903 }
904 piece->argument_length = piece->length;
906
907 auto p_next_next = piece->next->next;
908 piece->next = p_next_next;
909 continue; // Next could be another copy
910 }
911 piece = piece->next;
912 }
913 return start;
914}
915
916absl::StatusOr<std::vector<uint8_t>> CompressV2(const uint8_t* data,
917 const int start,
918 const int length, int mode,
919 bool check) {
920 // Surely there's no need to compress zero...
921 if (length == 0) {
922 return std::vector<uint8_t>();
923 }
924
925 // Worst case should be a copy of the string with extended header
926 std::string worst_case = "aaa";
927 auto compressed_chain =
928 std::make_shared<CompressionPiece>(1, 1, worst_case, 2);
929 auto compressed_chain_start = compressed_chain;
930
931 CompressionCommand current_cmd = {/*argument*/ {{}},
932 /*cmd_size*/ {0, 1, 2, 1, 2},
933 /*data_size*/ {0, 0, 0, 0, 0}};
934
935 unsigned int src_pos = start;
936 unsigned int last_pos = start + length - 1;
937 unsigned int comp_accumulator = 0; // Used when skipping using copy
938
939 while (true) {
940 current_cmd.data_size.fill({});
941 current_cmd.arguments.fill({{}});
942
943 CheckByteRepeatV2(data, src_pos, last_pos, current_cmd);
944 CheckWordRepeatV2(data, src_pos, last_pos, current_cmd);
945 CheckIncByteV2(data, src_pos, last_pos, current_cmd);
946 CheckIntraCopyV2(data, src_pos, last_pos, start, current_cmd);
947
948 unsigned int max_win = 2;
949 unsigned int cmd_with_max = kCommandDirectCopy;
950 ValidateForByteGain(current_cmd.data_size, current_cmd.cmd_size, max_win,
951 cmd_with_max);
952 // ValidateForByteGainV2(current_cmd, max_win, cmd_with_max);
953
954 if (cmd_with_max == kCommandDirectCopy) {
955 // This is the worst case scenario
956 // Progress through the next byte, in case there's a different
957 // compression command we can implement before we hit 32 bytes.
958 src_pos++;
959 comp_accumulator++;
960
961 // Arbitrary choice to do a 32 bytes grouping for copy.
962 if (comp_accumulator == 32 || src_pos > last_pos) {
963 std::string buffer = SetBuffer(data, src_pos, comp_accumulator);
964 auto new_comp_piece = std::make_shared<CompressionPiece>(
965 kCommandDirectCopy, comp_accumulator, buffer, comp_accumulator);
966 compressed_chain->next = new_comp_piece;
967 comp_accumulator = 0;
968 }
969 } else {
970 AddAlternativeCompressionCommand(data, compressed_chain, current_cmd,
971 src_pos, comp_accumulator, cmd_with_max,
972 max_win);
973 }
974
975 if (src_pos > last_pos) {
976 printf("Breaking compression loop\n");
977 break;
978 }
979
980 if (check) {
981 RETURN_IF_ERROR(ValidateCompressionResult(compressed_chain_start, mode,
982 start, src_pos))
983 }
984 }
985
986 // Skipping compression chain header
987 MergeCopy(compressed_chain_start->next);
988 PrintCompressionChain(compressed_chain_start);
989 return CreateCompressionString(compressed_chain_start->next, mode);
990}
991
992absl::StatusOr<std::vector<uint8_t>> CompressGraphics(const uint8_t* data,
993 const int pos,
994 const int length) {
995 return CompressV2(data, pos, length, kNintendoMode2);
996}
997
998absl::StatusOr<std::vector<uint8_t>> CompressOverworld(const uint8_t* data,
999 const int pos,
1000 const int length) {
1001 return CompressV2(data, pos, length, kNintendoMode1);
1002}
1003
1004absl::StatusOr<std::vector<uint8_t>> CompressOverworld(
1005 const std::vector<uint8_t> data, const int pos, const int length) {
1006 return CompressV3(data, pos, length, kNintendoMode1);
1007}
1008
1010 unsigned int pos = context.src_pos;
1011
1012 // Ensure the sequence does not start with an uncompressable byte
1013 if (pos == 0 || context.data[pos - 1] != context.data[pos]) {
1014 char byte_to_repeat = context.data[pos];
1015 while (pos <= context.last_pos && context.data[pos] == byte_to_repeat) {
1017 pos++;
1018 }
1019
1020 context.current_cmd.arguments[kCommandByteFill][0] = byte_to_repeat;
1021
1022 // Added debug log
1023 DEBUG_LOG("CheckByteRepeatV3: byte_to_repeat = "
1024 << (int)byte_to_repeat << ", size = "
1026 }
1027}
1028
1030 if (context.src_pos + 1 <= context.last_pos) { // Changed the condition here
1031 unsigned int pos = context.src_pos;
1032 char byte1 = context.data[pos];
1033 char byte2 = context.data[pos + 1];
1034 pos += 2;
1036 while (pos + 1 <= context.last_pos) {
1037 if (context.data[pos] == byte1 && context.data[pos + 1] == byte2)
1039 else
1040 break;
1041 pos += 2;
1042 }
1043
1044 context.current_cmd.arguments[kCommandWordFill][0] = byte1;
1045 context.current_cmd.arguments[kCommandWordFill][1] = byte2;
1046 }
1047
1048 DEBUG_LOG("CheckWordRepeatV3: byte1 = "
1049 << (int)context.current_cmd.arguments[kCommandWordFill][0]
1050 << ", byte2 = "
1051 << (int)context.current_cmd.arguments[kCommandWordFill][1]
1052 << ", size = " << context.current_cmd.data_size[kCommandWordFill]);
1053}
1054
1056 unsigned int pos = context.src_pos;
1057 uint8_t byte = context.data[pos];
1058 pos++;
1060 byte++;
1061
1062 while (pos <= context.last_pos && byte == context.data[pos]) {
1064 byte++;
1065 pos++;
1066 }
1067
1068 // Let's see if the sequence is surrounded by identical bytes and if so,
1069 // consider if a direct copy is better.
1070 if (context.current_cmd.data_size[kCommandIncreasingFill] == 3 &&
1071 context.src_pos > 0 && pos < context.data.size() &&
1072 context.data[context.src_pos - 1] == context.data[pos]) {
1074 0; // Reset the size to 0 to prioritize direct copy
1075 return;
1076 }
1077
1079 context.data[context.src_pos];
1080
1081 DEBUG_LOG("CheckIncByteV3: byte = "
1082 << (int)context.current_cmd.arguments[kCommandIncreasingFill][0]
1083 << ", size = "
1085}
1086
1088 const int window_size =
1089 32; // This can be adjusted for optimal performance and results
1090
1091 // We'll only search for repeating sequences if we're not at the very
1092 // beginning
1093 if (context.src_pos > 0 &&
1094 context.src_pos + window_size <= context.data.size()) {
1095 unsigned int max_copied_size = 0;
1096 unsigned int best_search_start = 0;
1097
1098 // Slide the window over the source data
1099 for (int win_pos = 1; win_pos < window_size && win_pos < context.src_pos;
1100 ++win_pos) {
1101 auto start_search_from = context.data.begin() + context.src_pos - win_pos;
1102 auto search_end = context.data.begin() + context.src_pos;
1103
1104 // Use std::search to find the sequence in the window in the previous
1105 // source data
1106 auto found_pos = std::search(
1107 start_search_from, search_end, context.data.begin() + context.src_pos,
1108 context.data.begin() + context.src_pos + win_pos);
1109
1110 if (found_pos != search_end) {
1111 // Check the entire length of the match
1112 unsigned int len = 0;
1113 while (context.src_pos + len < context.data.size() &&
1114 context.data[context.src_pos + len] == *(found_pos + len)) {
1115 len++;
1116 }
1117
1118 if (len > max_copied_size) {
1119 max_copied_size = len;
1120 best_search_start = found_pos - context.data.begin();
1121 }
1122 }
1123 }
1124
1125 if (max_copied_size >
1127 DEBUG_LOG("CheckIntraCopyV3: Detected repeating sequence of length "
1128 << max_copied_size << " starting from " << best_search_start);
1129 context.current_cmd.data_size[kCommandRepeatingBytes] = max_copied_size;
1131 best_search_start & kSnesByteMax;
1133 best_search_start >> 8;
1134 }
1135
1136 DEBUG_LOG("CheckIntraCopyV3: max_copied_size = " << max_copied_size
1137 << ", best_search_start = "
1138 << best_search_start);
1139 }
1140}
1141
1143 // Initialize the current_cmd with default values.
1144 context.current_cmd = {/*argument*/ {{}},
1145 /*cmd_size*/ {0, 1, 2, 1, 2},
1146 /*data_size*/ {0, 0, 0, 0, 0}};
1147}
1148
1150 // Reset the data_size and arguments for a fresh check.
1151 context.current_cmd.data_size.fill({});
1152 context.current_cmd.arguments.fill({{}});
1153
1154 CheckByteRepeatV3(context);
1155 CheckWordRepeatV3(context);
1156 CheckIncByteV3(context);
1157 CheckIntraCopyV3(context);
1158
1159 DEBUG_LOG("CheckAvailableCompressionCommands: src_pos = " << context.src_pos);
1160}
1161
1163 int max_net_savings = -1; // Adjusted the bias to consider any savings
1164
1165 // Start with the default scenario.
1167
1168 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
1169 unsigned int cmd_size_taken = context.current_cmd.data_size[cmd_i];
1170 int net_savings = cmd_size_taken - context.current_cmd.cmd_size[cmd_i];
1171
1172 // Skip commands that aren't efficient.
1173 if (cmd_size_taken <= 2 && cmd_i != kCommandDirectCopy) {
1174 continue;
1175 }
1176
1177 // Check surrounding data for optimization.
1178 if (context.src_pos > 0 &&
1179 context.src_pos + cmd_size_taken < context.data.size()) {
1180 char prev_byte = context.data[context.src_pos - 1];
1181 char next_byte = context.data[context.src_pos + cmd_size_taken];
1182 if (prev_byte != next_byte && cmd_size_taken == 3) {
1183 continue;
1184 }
1185 }
1186
1187 // Check if the current command offers more net savings.
1188 if (net_savings > max_net_savings) {
1189 context.cmd_with_max = cmd_i;
1190 max_net_savings = net_savings;
1191 }
1192 }
1193
1194 DEBUG_LOG("DetermineBestCompression: cmd_with_max = "
1195 << context.cmd_with_max << ", data_size = "
1196 << context.current_cmd.data_size[context.cmd_with_max]);
1197}
1198
1200 // If the next best compression method isn't direct copy and we have bytes
1201 // accumulated for direct copy, flush them out.
1202 if (context.cmd_with_max != kCommandDirectCopy &&
1203 context.comp_accumulator > 0) {
1204 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1205 context.compressed_data.push_back(header);
1206 std::vector<uint8_t> uncompressed_data(
1207 context.data.begin() + context.src_pos - context.comp_accumulator,
1208 context.data.begin() + context.src_pos);
1209 context.compressed_data.insert(context.compressed_data.end(),
1210 uncompressed_data.begin(),
1211 uncompressed_data.end());
1212 context.comp_accumulator = 0;
1213 return;
1214 }
1215
1216 // If the next best compression method is not direct copy and we haven't
1217 // accumulated any bytes, treat it as a single byte direct copy.
1218 if (context.cmd_with_max != kCommandDirectCopy &&
1219 context.comp_accumulator == 0) {
1220 context.compressed_data.push_back(
1221 0x00); // Command for a single byte direct copy
1222 context.compressed_data.push_back(
1223 context.data[context.src_pos]); // The single byte
1224 context.src_pos++;
1225 return;
1226 }
1227
1228 // If we reach here, accumulate bytes for a direct copy.
1229 context.src_pos++;
1230 context.comp_accumulator++;
1231
1232 // If we've accumulated the maximum bytes for a direct copy command or
1233 // reached the end, flush them.
1234 if (context.comp_accumulator >= 32 || context.src_pos > context.last_pos) {
1235 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1236 context.compressed_data.push_back(header);
1237 std::vector<uint8_t> uncompressed_data(
1238 context.data.begin() + context.src_pos - context.comp_accumulator,
1239 context.data.begin() + context.src_pos);
1240 context.compressed_data.insert(context.compressed_data.end(),
1241 uncompressed_data.begin(),
1242 uncompressed_data.end());
1243 context.comp_accumulator = 0;
1244 }
1245
1246 DEBUG_LOG("HandleDirectCopy: src_pos = " << context.src_pos
1247 << ", compressed_data size = "
1248 << context.compressed_data.size());
1249}
1250
1252 DEBUG_LOG("AddCompressionToChain: Adding command arguments: ");
1253
1254 // If there's uncompressed data, add a copy chunk before the compression
1255 // command
1256 if (context.comp_accumulator != 0) {
1257 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1258 context.compressed_data.push_back(header);
1259 std::vector<uint8_t> uncompressed_data(
1260 context.data.begin() + context.src_pos - context.comp_accumulator,
1261 context.data.begin() + context.src_pos);
1262 context.compressed_data.insert(context.compressed_data.end(),
1263 uncompressed_data.begin(),
1264 uncompressed_data.end());
1265 context.comp_accumulator = 0;
1266 }
1267
1268 // Now, add the compression command
1269 uint8_t header =
1270 BUILD_HEADER(context.cmd_with_max,
1271 context.current_cmd.data_size[context.cmd_with_max]);
1272 context.compressed_data.push_back(header);
1273
1274 DEBUG_LOG("AddCompressionToChain: (Before) src_pos = "
1275 << context.src_pos
1276 << ", compressed_data size = " << context.compressed_data.size());
1277
1278 // Add the command arguments to the compressed_data vector
1279 context.compressed_data.push_back(
1280 context.current_cmd.arguments[context.cmd_with_max][0]);
1281 if (context.current_cmd.cmd_size[context.cmd_with_max] == 2) {
1282 context.compressed_data.push_back(
1283 context.current_cmd.arguments[context.cmd_with_max][1]);
1284 }
1285
1286 context.src_pos += context.current_cmd.data_size[context.cmd_with_max];
1287 context.comp_accumulator = 0;
1288
1289 DEBUG_LOG("AddCompressionToChain: (After) src_pos = "
1290 << context.src_pos
1291 << ", compressed_data size = " << context.compressed_data.size());
1292}
1293
1295 if (!context.compressed_data.empty()) {
1296 Rom temp_rom;
1297 RETURN_IF_ERROR(temp_rom.LoadFromData(context.compressed_data));
1299 auto decomp_data,
1300 DecompressV2(temp_rom.data(), 0, temp_rom.size(), 1, temp_rom.size()));
1301
1302 if (!std::equal(decomp_data.begin() + context.start, decomp_data.end(),
1303 temp_rom.begin())) {
1304 return absl::InternalError(absl::StrFormat(
1305 "Compressed data does not match uncompressed data at %d\n",
1306 (context.src_pos - context.start)));
1307 }
1308 }
1309 return absl::OkStatus();
1310}
1311
1312absl::StatusOr<CompressionPiece> SplitCompressionPieceV3(
1313 CompressionPiece& piece, int mode) {
1314 CompressionPiece new_piece;
1315 unsigned int length_left = piece.length - kMaxLengthCompression;
1317
1318 switch (piece.command) {
1319 case kCommandByteFill:
1320 case kCommandWordFill:
1321 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1322 piece.argument_length);
1323 break;
1325 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1326 piece.argument_length);
1327 new_piece.argument[0] = (char)(piece.argument[0] + kMaxLengthCompression);
1328 break;
1329 case kCommandDirectCopy: {
1331 std::string empty_string = "";
1332 new_piece = CompressionPiece(piece.command, length_left, empty_string,
1333 length_left);
1334 // MEMCPY
1335 for (int i = 0; i < length_left; ++i) {
1336 new_piece.argument[i] = piece.argument[i + kMaxLengthCompression];
1337 }
1338 break;
1339 }
1342 unsigned int offset = piece.argument[0] + (piece.argument[1] << 8);
1343 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1344 piece.argument_length);
1345 if (mode == kNintendoMode2) {
1346 new_piece.argument[0] = (offset + kMaxLengthCompression) & kSnesByteMax;
1347 new_piece.argument[1] = (offset + kMaxLengthCompression) >> 8;
1348 }
1349 if (mode == kNintendoMode1) {
1350 new_piece.argument[1] = (offset + kMaxLengthCompression) & kSnesByteMax;
1351 new_piece.argument[0] = (offset + kMaxLengthCompression) >> 8;
1352 }
1353 } break;
1354 default: {
1355 return absl::InvalidArgumentError(
1356 "SplitCompressionCommand: Invalid Command");
1357 }
1358 }
1359
1360 return new_piece;
1361}
1362
1364 unsigned int pos = 0;
1365
1366 for (CompressionPiece& piece : context.compression_pieces) {
1367 if (piece.length <= kMaxLengthNormalHeader) { // Normal Header
1368 context.compression_string.push_back(
1369 BUILD_HEADER(piece.command, piece.length));
1370 pos++;
1371 } else {
1372 if (piece.length <= kMaxLengthCompression) {
1373 context.compression_string.push_back(
1374 kCompressionStringMod | ((uint8_t)piece.command << 2) |
1375 (((piece.length - 1) & 0xFF00) >> 8));
1376 pos++;
1377 std::cout << "Building extended header : cmd: " << piece.command
1378 << ", length: " << piece.length << " - "
1379 << (int)context.compression_string[pos - 1] << std::endl;
1380 context.compression_string.push_back(
1381 ((piece.length - 1) & 0x00FF)); // (char)
1382 } else {
1383 // We need to split the command
1384 auto new_piece = SplitCompressionPieceV3(piece, context.mode);
1385 if (!new_piece.ok()) {
1386 std::cout << new_piece.status().ToString() << std::endl;
1387 }
1388 context.compression_pieces.insert(
1389 context.compression_pieces.begin() + pos + 1, new_piece.value());
1390 continue;
1391 }
1392 }
1393
1394 if (piece.command == kCommandRepeatingBytes) {
1395 char tmp[2];
1396 tmp[0] = piece.argument[0];
1397 tmp[1] = piece.argument[1];
1398 if (context.mode == kNintendoMode1) {
1399 tmp[0] = piece.argument[1];
1400 tmp[1] = piece.argument[0];
1401 }
1402 for (const auto& each : tmp) {
1403 context.compression_string.push_back(each);
1404 pos++;
1405 }
1406 } else {
1407 for (int i = 0; i < piece.argument_length; ++i) {
1408 context.compression_string.push_back(piece.argument[i]);
1409 pos++;
1410 }
1411 }
1412 pos += piece.argument_length;
1413 }
1414
1415 // Add any remaining uncompressed data
1416 if (context.comp_accumulator > 0) {
1417 context.compressed_data.insert(
1418 context.compressed_data.end(),
1419 context.data.begin() + context.src_pos - context.comp_accumulator,
1420 context.data.begin() + context.src_pos);
1421 context.comp_accumulator = 0;
1422 }
1423
1424 // Add the end marker to the compressed data
1425 context.compressed_data.push_back(kSnesByteMax);
1426 DEBUG_LOG("FinalizeCompression: compressed_data size = "
1427 << context.compressed_data.size());
1428}
1429
1430absl::StatusOr<std::vector<uint8_t>> CompressV3(
1431 const std::vector<uint8_t>& data, const int start, const int length,
1432 int mode, bool check) {
1433 if (length == 0) {
1434 return std::vector<uint8_t>();
1435 }
1436
1437 CompressionContext context(data, start, length, mode);
1438 InitializeCompression(context);
1439
1440 while (context.src_pos <= context.last_pos) {
1442 DetermineBestCompression(context);
1443
1444 DEBUG_LOG("CompressV3 Loop: cmd_with_max = " << context.cmd_with_max);
1445
1446 if (context.cmd_with_max == kCommandDirectCopy) {
1447 HandleDirectCopy(context);
1448 } else {
1449 AddCompressionToChain(context);
1450 }
1451
1452 if (check) {
1454 }
1455 }
1456
1457 FinalizeCompression(context);
1458 return std::vector<uint8_t>(context.compressed_data.begin(),
1459 context.compressed_data.end());
1460}
1461
1462std::string SetBuffer(const uint8_t* data, int src_pos, int comp_accumulator) {
1463 std::string buffer;
1464 for (int i = 0; i < comp_accumulator; ++i) {
1465 buffer.push_back(data[i + src_pos - comp_accumulator]);
1466 }
1467 return buffer;
1468}
1469
1470std::string SetBuffer(const std::vector<uint8_t>& data, int src_pos,
1471 int comp_accumulator) {
1472 std::string buffer;
1473 for (int i = 0; i < comp_accumulator; ++i) {
1474 buffer.push_back(data[i + src_pos - comp_accumulator]);
1475 }
1476 return buffer;
1477}
1478
1479void memfill(const uint8_t* data, std::vector<uint8_t>& buffer, int buffer_pos,
1480 int offset, int length) {
1481 auto a = data[offset];
1482 auto b = data[offset + 1];
1483 for (int i = 0; i < length; i = i + 2) {
1484 buffer[buffer_pos + i] = a;
1485 if ((i + 1) < length)
1486 buffer[buffer_pos + i + 1] = b;
1487 }
1488}
1489
1490absl::StatusOr<std::vector<uint8_t>> DecompressV2(const uint8_t* data,
1491 int offset, int size,
1492 int mode, size_t rom_size) {
1493 if (size == 0) {
1494 return std::vector<uint8_t>();
1495 }
1496
1497 // Validate initial offset is within bounds (if rom_size provided)
1498 // rom_size == static_cast<size_t>(-1) means "no bounds checking"
1499 if (rom_size != static_cast<size_t>(-1) &&
1500 (offset < 0 || static_cast<size_t>(offset) >= rom_size)) {
1501 return absl::OutOfRangeError(
1502 absl::StrFormat("DecompressV2: Initial offset %d exceeds ROM size %zu",
1503 offset, rom_size));
1504 }
1505
1506 std::vector<uint8_t> buffer(size, 0);
1507 unsigned int length = 0;
1508 unsigned int buffer_pos = 0;
1509 uint8_t command = 0;
1510
1511 // Bounds check before initial header read
1512 if (rom_size != static_cast<size_t>(-1) &&
1513 static_cast<size_t>(offset) >= rom_size) {
1514 return absl::OutOfRangeError(
1515 absl::StrFormat("DecompressV2: Initial offset %d exceeds ROM size %zu",
1516 offset, rom_size));
1517 }
1518 uint8_t header = data[offset];
1519
1520 while (header != kSnesByteMax) {
1521 // Bounds check before reading command (if rom_size provided)
1522 if (rom_size != static_cast<size_t>(-1) &&
1523 static_cast<size_t>(offset) >= rom_size) {
1524 return absl::OutOfRangeError(absl::StrFormat(
1525 "DecompressV2: Offset %d exceeds ROM size %zu while reading command",
1526 offset, rom_size));
1527 }
1528
1529 if ((header & kExpandedMod) == kExpandedMod) {
1530 // Expanded Command - needs 2 bytes
1531 if (rom_size != static_cast<size_t>(-1) &&
1532 static_cast<size_t>(offset + 1) >= rom_size) {
1533 return absl::OutOfRangeError(
1534 absl::StrFormat("DecompressV2: Offset %d+1 exceeds ROM size %zu "
1535 "for expanded command",
1536 offset, rom_size));
1537 }
1538 command = ((header >> 2) & kCommandMod);
1539 length = (((header << 8) | data[offset + 1]) & kExpandedLengthMod);
1540 offset += 2; // Advance 2 bytes in ROM
1541 } else {
1542 // Normal Command
1543 command = ((header >> 5) & kCommandMod);
1544 length = (header & kNormalLengthMod);
1545 offset += 1; // Advance 1 byte in ROM
1546 }
1547 length += 1; // each commands is at least of size 1 even if index 00
1548
1549 // CRITICAL: Check and resize buffer BEFORE any command writes
1550 // This prevents WASM "index out of bounds" errors
1551 // The buffer may need to grow if decompressed data exceeds initial size
1552 if (buffer_pos + length > static_cast<unsigned int>(size)) {
1553 // Double the buffer size (with overflow protection)
1554 int new_size = size;
1555 while (buffer_pos + length > static_cast<unsigned int>(new_size)) {
1556 if (new_size > INT_MAX / 2) {
1557 return absl::ResourceExhaustedError(absl::StrFormat(
1558 "DecompressV2: Buffer size overflow at pos %u, length %u",
1559 buffer_pos, length));
1560 }
1561 new_size *= 2;
1562 }
1563 size = new_size;
1564 buffer.resize(size);
1565 }
1566
1567 switch (command) {
1568 case kCommandDirectCopy: // Does not advance in the ROM
1569 // Bounds check for direct copy
1570 if (rom_size != static_cast<size_t>(-1) &&
1571 static_cast<size_t>(offset + length) > rom_size) {
1572 return absl::OutOfRangeError(
1573 absl::StrFormat("DecompressV2: DirectCopy offset %d + length %u "
1574 "exceeds ROM size %zu",
1575 offset, length, rom_size));
1576 }
1577 memcpy(buffer.data() + buffer_pos, data + offset, length);
1578 buffer_pos += length;
1579 offset += length;
1580 break;
1581 case kCommandByteFill:
1582 // Bounds check for byte fill
1583 if (rom_size != static_cast<size_t>(-1) &&
1584 static_cast<size_t>(offset) >= rom_size) {
1585 return absl::OutOfRangeError(absl::StrFormat(
1586 "DecompressV2: ByteFill offset %d exceeds ROM size %zu", offset,
1587 rom_size));
1588 }
1589 memset(buffer.data() + buffer_pos, (int)(data[offset]), length);
1590 buffer_pos += length;
1591 offset += 1; // Advances 1 byte in the ROM
1592 break;
1593 case kCommandWordFill:
1594 // Bounds check for word fill (needs 2 bytes)
1595 if (rom_size != static_cast<size_t>(-1) &&
1596 static_cast<size_t>(offset + 1) >= rom_size) {
1597 return absl::OutOfRangeError(absl::StrFormat(
1598 "DecompressV2: WordFill offset %d+1 exceeds ROM size %zu", offset,
1599 rom_size));
1600 }
1601 memfill(data, buffer, buffer_pos, offset, length);
1602 buffer_pos += length;
1603 offset += 2; // Advance 2 byte in the ROM
1604 break;
1606 // Bounds check for increasing fill
1607 if (rom_size != static_cast<size_t>(-1) &&
1608 static_cast<size_t>(offset) >= rom_size) {
1609 return absl::OutOfRangeError(absl::StrFormat(
1610 "DecompressV2: IncreasingFill offset %d exceeds ROM size %zu",
1611 offset, rom_size));
1612 }
1613 auto inc_byte = data[offset];
1614 for (unsigned int i = 0; i < length; i++) {
1615 buffer[buffer_pos] = inc_byte++;
1616 buffer_pos++;
1617 }
1618 offset += 1; // Advance 1 byte in the ROM
1619 } break;
1621 // Bounds check for repeating bytes (needs 2 bytes)
1622 if (rom_size != static_cast<size_t>(-1) &&
1623 static_cast<size_t>(offset + 1) >= rom_size) {
1624 return absl::OutOfRangeError(absl::StrFormat(
1625 "DecompressV2: RepeatingBytes offset %d+1 exceeds ROM size %zu",
1626 offset, rom_size));
1627 }
1628 uint16_t s1 = ((data[offset + 1] & kSnesByteMax) << 8);
1629 uint16_t s2 = (data[offset] & kSnesByteMax);
1630 int addr = (s1 | s2);
1631 if (mode == kNintendoMode1) { // Reversed byte order for
1632 // overworld maps
1633 addr = (data[offset + 1] & kSnesByteMax) |
1634 ((data[offset] & kSnesByteMax) << 8);
1635 }
1636 if (addr > offset) {
1637 return absl::InternalError(
1638 absl::StrFormat("Decompress: Offset for command copy exceeds "
1639 "current position "
1640 "(Offset : %#04x | Pos : %#06x)\n",
1641 addr, offset));
1642 }
1643 // Buffer resize already done above, no need to check again
1644 memcpy(buffer.data() + buffer_pos, buffer.data() + addr, length);
1645 buffer_pos += length;
1646 offset += 2;
1647 } break;
1648 default: {
1649 std::cout << absl::StrFormat(
1650 "Decompress: Invalid header (Offset : %#06x, Command: %#04x)\n",
1651 offset, command);
1652 } break;
1653 }
1654 // Bounds check before reading next header byte
1655 if (rom_size != static_cast<size_t>(-1) &&
1656 static_cast<size_t>(offset) >= rom_size) {
1657 return absl::OutOfRangeError(
1658 absl::StrFormat("DecompressV2: Offset %d exceeds ROM size %zu while "
1659 "reading next header",
1660 offset, rom_size));
1661 }
1662 header = data[offset];
1663 }
1664
1665 return buffer;
1666}
1667
1668absl::StatusOr<std::vector<uint8_t>> DecompressGraphics(const uint8_t* data,
1669 int pos, int size) {
1670 return DecompressV2(data, pos, size, kNintendoMode2);
1671}
1672
1673absl::StatusOr<std::vector<uint8_t>> DecompressOverworld(const uint8_t* data,
1674 int pos, int size) {
1675 return DecompressV2(data, pos, size, kNintendoMode1);
1676}
1677
1678absl::StatusOr<std::vector<uint8_t>> DecompressOverworld(
1679 const std::vector<uint8_t> data, int pos, int size) {
1680 return DecompressV2(data.data(), pos, size, kNintendoMode1);
1681}
1682
1683} // namespace lc_lz2
1684} // namespace gfx
1685} // namespace yaze
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
Definition rom.h:24
auto begin()
Definition rom.h:137
auto data() const
Definition rom.h:135
auto size() const
Definition rom.h:134
absl::Status LoadFromData(const std::vector< uint8_t > &data, const LoadOptions &options=LoadOptions::Defaults())
Definition rom.cc:148
#define DEBUG_LOG(msg)
#define BUILD_HEADER(command, length)
Definition compression.h:13
unsigned int uint
Definition macro.h:4
#define ASSIGN_OR_RETURN(type_variable_name, expression)
Definition macro.h:62
void CheckIntraCopyV3(CompressionContext &context)
absl::StatusOr< std::vector< uint8_t > > CompressGraphics(const uint8_t *data, const int pos, const int length)
absl::StatusOr< std::vector< uint8_t > > CompressV3(const std::vector< uint8_t > &data, const int start, const int length, int mode, bool check)
Compresses a buffer of data using the LC_LZ2 algorithm.
constexpr int kExpandedLengthMod
Definition compression.h:59
constexpr int kCommandDirectCopy
Definition compression.h:46
void memfill(const uint8_t *data, std::vector< uint8_t > &buffer, int buffer_pos, int offset, int length)
void FinalizeCompression(CompressionContext &context)
void CheckWordRepeatV2(const uint8_t *data, uint &src_pos, const unsigned int last_pos, CompressionCommand &cmd)
void InitializeCompression(CompressionContext &context)
absl::StatusOr< std::vector< uint8_t > > DecompressV2(const uint8_t *data, int offset, int size, int mode, size_t rom_size)
Decompresses a buffer of data using the LC_LZ2 algorithm.
struct CompressionPiece CompressionPiece
Definition compression.h:90
absl::StatusOr< std::vector< uint8_t > > CompressOverworld(const uint8_t *data, const int pos, const int length)
constexpr int kSnesByteMax
Definition compression.h:56
constexpr int kNintendoMode1
Definition compression.h:54
void AddAlternativeCompressionCommand(const uint8_t *rom_data, CompressionPiecePointer &compressed_chain, const CompressionCommand &command, uint &source_data_position, uint &uncompressed_data_size, uint &best_command, uint &best_command_gain)
std::shared_ptr< CompressionPiece > CompressionPiecePointer
Definition compression.h:91
void CheckIncByteV3(CompressionContext &context)
absl::Status ValidateCompressionResultV3(const CompressionContext &context)
constexpr int kNintendoMode2
Definition compression.h:55
std::string SetBuffer(const uint8_t *data, int src_pos, int comp_accumulator)
constexpr int kCommandRepeatingBytes
Definition compression.h:50
std::array< std::array< char, 2 >, 5 > CommandArgumentArray
Definition compression.h:75
void CheckByteRepeat(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
void CheckByteRepeatV2(const uint8_t *data, uint &src_pos, const unsigned int last_pos, CompressionCommand &cmd)
void HandleDirectCopy(CompressionContext &context)
void CheckIncByteV2(const uint8_t *rom_data, uint &src_data_pos, const unsigned int last_pos, CompressionCommand &cmd)
absl::StatusOr< std::vector< uint8_t > > DecompressGraphics(const uint8_t *data, int pos, int size)
constexpr int kCommandMod
Definition compression.h:57
void PrintCompressionPiece(const CompressionPiecePointer &piece)
constexpr int kCommandWordFill
Definition compression.h:48
constexpr int kMaxLengthNormalHeader
Definition compression.h:52
void CheckAvailableCompressionCommands(CompressionContext &context)
constexpr int kMaxLengthCompression
Definition compression.h:53
void CompressionCommandAlternativeV2(const uint8_t *rom_data, const CompressionCommand &cmd, CompressionPiecePointer &compressed_chain, uint &src_data_pos, uint &comp_accumulator, uint &cmd_with_max, uint &max_win)
void CheckIntraCopyV2(const uint8_t *rom_data, uint &src_data_pos, const unsigned int last_pos, unsigned int start, CompressionCommand &cmd)
std::vector< uint8_t > CreateCompressionString(CompressionPiecePointer &start, int mode)
absl::StatusOr< std::vector< uint8_t > > DecompressOverworld(const uint8_t *data, int pos, int size)
std::array< uint, 5 > DataSizeArray
Definition compression.h:77
constexpr int kCommandIncreasingFill
Definition compression.h:49
void ValidateForByteGainV2(const CompressionCommand &cmd, uint &max_win, uint &cmd_with_max)
void ValidateForByteGain(const DataSizeArray &data_size_taken, const CommandSizeArray &cmd_size, uint &max_win, uint &cmd_with_max)
constexpr int kCommandByteFill
Definition compression.h:47
void PrintCompressionChain(const CompressionPiecePointer &chain_head)
void CheckIncByte(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
constexpr int kExpandedMod
Definition compression.h:58
const std::array< int, 5 > kCommandSizes
void AddCompressionToChain(CompressionContext &context)
absl::StatusOr< CompressionPiecePointer > SplitCompressionPiece(CompressionPiecePointer &piece, int mode)
void CompressionCommandAlternative(const uint8_t *rom_data, CompressionPiecePointer &compressed_chain, const CommandSizeArray &cmd_size, const CommandArgumentArray &cmd_args, uint &src_data_pos, uint &comp_accumulator, uint &cmd_with_max, uint &max_win)
constexpr int kNormalLengthMod
Definition compression.h:60
constexpr int kCompressionStringMod
Definition compression.h:61
void CheckWordRepeat(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
void CheckByteRepeatV3(CompressionContext &context)
CompressionPiecePointer MergeCopy(CompressionPiecePointer &start)
std::array< uint, 5 > CommandSizeArray
Definition compression.h:76
void DetermineBestCompression(CompressionContext &context)
absl::StatusOr< CompressionPiece > SplitCompressionPieceV3(CompressionPiece &piece, int mode)
absl::StatusOr< std::vector< uint8_t > > CompressV2(const uint8_t *data, const int start, const int length, int mode, bool check)
Compresses a buffer of data using the LC_LZ2 algorithm.
void CheckWordRepeatV3(CompressionContext &context)
void CheckIntraCopy(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos, unsigned int start)
absl::Status ValidateCompressionResult(CompressionPiecePointer &chain_head, int mode, int start, int src_data_pos)
std::vector< uint8_t > HyruleMagicDecompress(uint8_t const *src, int *const size, int const p_big_endian, size_t max_src_size)
std::vector< uint8_t > HyruleMagicCompress(uint8_t const *const src, int const oldsize, int *const size, int const flag)
uint16_t ldle16b(uint8_t const *const p_arr)
void stle16b(uint8_t *const p_arr, uint16_t const p_val)
#define RETURN_IF_ERROR(expr)
Definition snes.cc:22
std::array< std::array< char, 2 >, 5 > arguments
Definition compression.h:66
std::vector< CompressionPiece > compression_pieces
std::vector< uint8_t > compression_string
std::vector< uint8_t > compressed_data