yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
compression.cc
Go to the documentation of this file.
1#include "compression.h"
2
3#include <climits>
4#include <iostream>
5#include <memory>
6#include <string>
7
8#include "absl/status/status.h"
9#include "absl/status/statusor.h"
10#include "absl/strings/str_format.h"
11#include "rom/rom.h"
12#include "util/hyrule_magic.h"
13#include "util/macro.h"
14
15#define DEBUG_LOG(msg) std::cout << msg << std::endl
16
17namespace yaze {
18namespace gfx {
19
20std::vector<uint8_t> HyruleMagicCompress(uint8_t const* const src,
21 int const oldsize, int* const size,
22 int const flag) {
23 // Allocate buffer large enough for worst-case output
24 std::vector<uint8_t> b2(std::max(0x1000, oldsize * 2));
25
26 int i, j, k, l, m = 0, n, o = 0, bd = 0, p, q = 0, r;
27
28 for (i = 0; i < oldsize;) {
29 l = src[i]; // grab a char from the buffer.
30
31 k = 0;
32
33 r = !!q; // r = the same logical value (0 or 1) as q, but not the same
34 // value necesarily.
35
36 for (j = 0; j < i - 1; j++) {
37 if (src[j] == l) {
38 m = oldsize - j;
39
40 for (n = 0; n < m; n++)
41 if (src[n + j] != src[n + i])
42 break;
43
44 if (n > k)
45 k = n, o = j;
46 }
47 }
48
49 for (n = i + 1; n < oldsize; n++) {
50 if (src[n] != l) {
51 break;
52 }
53 }
54
55 n -= i; // offset back by i. i.e. n = k+1 as above.
56
57 if (n > 1 + r)
58 p = 1;
59 else {
60 m = src[i + 1];
61
62 for (n = i + 2; n < oldsize; n++) {
63 if (src[n] != l)
64 break;
65
66 n++;
67
68 if (src[n] != m)
69 break;
70 }
71
72 n -= i;
73
74 if (n > 2 + r)
75 p = 2;
76 else {
77 m = oldsize - i;
78
79 for (n = 1; n < m; n++)
80 if (src[i + n] != l + n)
81 break;
82
83 if (n > 1 + r)
84 p = 3;
85 else
86 p = 0;
87 }
88 }
89
90 if (k > 3 + r && k > n + (p & 1))
91 p = 4, n = k;
92
93 if (!p)
94 q++, i++;
95 else {
96 if (q) {
97 q--;
98
99 if (q > 31) {
100 b2[bd++] = (unsigned char)(224 + (q >> 8));
101 }
102
103 b2[bd++] = (unsigned char)q;
104 q++;
105
106 memcpy(b2.data() + bd, src + i - q, q);
107
108 bd += q;
109 q = 0;
110 }
111
112 i += n;
113 n--;
114
115 if (n > 31) {
116 b2[bd++] = (unsigned char)(224 + (n >> 8) + (p << 2));
117 b2[bd++] = (unsigned char)n;
118 } else
119 b2[bd++] = (unsigned char)((p << 5) + n);
120
121 switch (p) {
122 case 1:
123 case 3:
124 b2[bd++] = (unsigned char)l;
125 break;
126
127 case 2:
128 b2[bd++] = (unsigned char)l;
129 b2[bd++] = (unsigned char)m;
130
131 break;
132
133 case 4:
134 if (flag) {
135 b2[bd++] = (unsigned char)(o >> 8);
136 b2[bd++] = (unsigned char)o;
137 } else {
138 b2[bd++] = (unsigned char)o;
139 b2[bd++] = (unsigned char)(o >> 8);
140 }
141 }
142
143 continue;
144 }
145 }
146
147 if (q) {
148 q--;
149
150 if (q > 31) {
151 b2[bd++] = (unsigned char)(224 + (q >> 8));
152 }
153
154 b2[bd++] = (unsigned char)q;
155 q++;
156
157 memcpy(b2.data() + bd, src + i - q, q);
158
159 bd += q;
160 }
161
162 b2[bd++] = 255;
163 b2.resize(bd);
164 *size = bd;
165
166 return b2;
167}
168
169std::vector<uint8_t> HyruleMagicDecompress(uint8_t const* src, int* const size,
170 int const p_big_endian,
171 size_t max_src_size) {
172 std::vector<uint8_t> b2(1024);
173 const uint8_t* const src_start = src;
174
175 int bd = 0, bs = 1024;
176
177 unsigned char a;
178 unsigned char b;
179 unsigned short c, d;
180
181 // Use a lambda to allow early returns instead of goto end_decompression
182 auto decompress_loop = [&]() {
183 for (;;) {
184 // Bounds check: Ensure we can read at least one byte (command)
185 if (max_src_size != static_cast<size_t>(-1) &&
186 (size_t)(src - src_start) >= max_src_size) {
187 std::cerr
188 << "HyruleMagicDecompress: Reached end of buffer unexpectedly."
189 << std::endl;
190 return;
191 }
192
193 // retrieve a uint8_t from the buffer.
194 a = *(src++);
195
196 // end the decompression routine if we encounter 0xff.
197 if (a == 0xff)
198 return;
199
200 // examine the top 3 bits of a.
201 b = (a >> 5);
202
203 if (b == 7) // i.e. 0b 111
204 {
205 // get bits 0b 0001 1100
206 b = ((a >> 2) & 7);
207
208 // get bits 0b 0000 0011, multiply by 256, OR with the next byte.
209 c = ((a & 0x0003) << 8);
210
211 // Bounds check: Ensure we can read the next byte
212 if (max_src_size != static_cast<size_t>(-1) &&
213 (size_t)(src - src_start) >= max_src_size) {
214 std::cerr << "HyruleMagicDecompress: Reached end of buffer reading "
215 "extended len"
216 << std::endl;
217 return;
218 }
219 c |= *(src++);
220 } else
221 // or get bits 0b 0001 1111
222 c = (uint16_t)(a & 31);
223
224 c++;
225
226 if ((bd + c) > (bs - 512)) {
227 // need to increase the buffer size.
228 bs += 1024;
229 // Safety check for excessive allocation
230 if (bs > 1024 * 1024 * 16) { // 16MB limit
231 std::cerr << "HyruleMagicDecompress: Excessive allocation detected."
232 << std::endl;
233 bd = 0;
234 return;
235 }
236 b2.resize(bs);
237 }
238
239 // 7 was handled, here we handle other decompression codes.
240
241 switch (b) {
242 case 0: // 0b 000
243
244 // raw copy
245
246 // Bounds check: Ensure we can read c bytes
247 if (max_src_size != static_cast<size_t>(-1) &&
248 (size_t)(src - src_start + c) > max_src_size) {
249 std::cerr << "HyruleMagicDecompress: Raw copy exceeds buffer."
250 << std::endl;
251 return;
252 }
253
254 // copy info from the src buffer to our new buffer
255 memcpy(b2.data() + bd, src, c);
256
257 // increment the src pointer accordingly.
258 src += c;
259
260 bd += c;
261
262 break;
263
264 case 1: // 0b 001
265
266 // rle copy
267
268 // Bounds check: Ensure we can read 1 byte
269 if (max_src_size != static_cast<size_t>(-1) &&
270 (size_t)(src - src_start) >= max_src_size) {
271 std::cerr << "HyruleMagicDecompress: RLE copy exceeds buffer."
272 << std::endl;
273 return;
274 }
275
276 // make c duplicates of one byte, inc the src pointer.
277 memset(b2.data() + bd, *(src++), c);
278
279 // increase the b2 offset.
280 bd += c;
281
282 break;
283
284 case 2: // 0b 010
285
286 // rle 16-bit alternating copy
287
288 // Bounds check: Ensure we can read 2 bytes
289 if (max_src_size != static_cast<size_t>(-1) &&
290 (size_t)(src - src_start + 2) > max_src_size) {
291 std::cerr
292 << "HyruleMagicDecompress: RLE 16-bit copy exceeds buffer."
293 << std::endl;
294 return;
295 }
296
297 d = zelda3::ldle16b(src);
298
299 src += 2;
300
301 while (c > 1) {
302 // copy that 16-bit number c/2 times into the b2 buffer.
303 zelda3::stle16b(b2.data() + bd, d);
304
305 bd += 2;
306 c -= 2; // hence c/2
307 }
308
309 if (c) // if there's a remainder of c/2, this handles it.
310 b2[bd++] = (char)d;
311
312 break;
313
314 case 3: // 0b 011
315
316 // incrementing copy
317
318 // Bounds check: Ensure we can read 1 byte
319 if (max_src_size != static_cast<size_t>(-1) &&
320 (size_t)(src - src_start) >= max_src_size) {
321 std::cerr << "HyruleMagicDecompress: Inc copy exceeds buffer."
322 << std::endl;
323 return;
324 }
325
326 // get the current src byte.
327 a = *(src++);
328
329 while (c--) {
330 b2[bd++] = a++;
331 }
332
333 break;
334
335 default: // 0b 100, 101, 110
336
337 // lz copy
338
339 // Bounds check: Ensure we can read 2 bytes
340 if (max_src_size != static_cast<size_t>(-1) &&
341 (size_t)(src - src_start + 2) > max_src_size) {
342 std::cerr << "HyruleMagicDecompress: LZ copy exceeds buffer."
343 << std::endl;
344 return;
345 }
346
347 if (p_big_endian) {
348 d = (*src << 8) + src[1];
349 } else {
350 d = zelda3::ldle16b(src);
351 }
352
353 while (c--) {
354 if (d >= static_cast<unsigned short>(bs)) {
355 std::cerr << "HyruleMagicDecompress: LZ ref out of bounds."
356 << std::endl;
357 return;
358 }
359 b2[bd++] = b2[d++];
360 }
361
362 src += 2;
363 }
364 }
365 };
366
367 decompress_loop();
368
369 b2.resize(bd);
370
371 if (size)
372 (*size) = bd;
373
374 return b2;
375}
376
377namespace lc_lz2 {
378
380 std::cout << "Command: " << std::to_string(piece->command) << "\n";
381 std::cout << "Command Length: " << piece->length << "\n";
382 std::cout << "Argument: ";
383 auto arg_size = piece->argument.size();
384 for (int i = 0; i < arg_size; ++i) {
385 printf("%02X ", piece->argument.at(i));
386 }
387 std::cout << "\nArgument Length: " << piece->argument_length << "\n";
388}
389
391 auto compressed_chain = chain_head->next;
392 while (compressed_chain != nullptr) {
393 std::cout << "- Compression Piece -\n";
394 PrintCompressionPiece(compressed_chain);
395 compressed_chain = compressed_chain->next;
396 }
397}
398
399void CheckByteRepeat(const uint8_t* rom_data, DataSizeArray& data_size_taken,
400 CommandArgumentArray& cmd_args, uint& src_data_pos,
401 const unsigned int last_pos) {
402 unsigned int pos = src_data_pos;
403 char byte_to_repeat = rom_data[pos];
404 while (pos <= last_pos && rom_data[pos] == byte_to_repeat) {
405 data_size_taken[kCommandByteFill]++;
406 pos++;
407 }
408 cmd_args[kCommandByteFill][0] = byte_to_repeat;
409}
410
411void CheckWordRepeat(const uint8_t* rom_data, DataSizeArray& data_size_taken,
412 CommandArgumentArray& cmd_args, uint& src_data_pos,
413 const unsigned int last_pos) {
414 if (src_data_pos + 2 <= last_pos &&
415 rom_data[src_data_pos] != rom_data[src_data_pos + 1]) {
416 unsigned int pos = src_data_pos;
417 char byte1 = rom_data[pos];
418 char byte2 = rom_data[pos + 1];
419 pos += 2;
420 data_size_taken[kCommandWordFill] = 2;
421 while (pos + 1 <= last_pos) {
422 if (rom_data[pos] == byte1 && rom_data[pos + 1] == byte2)
423 data_size_taken[kCommandWordFill] += 2;
424 else
425 break;
426 pos += 2;
427 }
428 cmd_args[kCommandWordFill][0] = byte1;
429 cmd_args[kCommandWordFill][1] = byte2;
430 }
431}
432
433void CheckIncByte(const uint8_t* rom_data, DataSizeArray& data_size_taken,
434 CommandArgumentArray& cmd_args, uint& src_data_pos,
435 const unsigned int last_pos) {
436 unsigned int pos = src_data_pos;
437 char byte = rom_data[pos];
438 pos++;
439 data_size_taken[kCommandIncreasingFill] = 1;
440 byte++;
441 while (pos <= last_pos && byte == rom_data[pos]) {
442 data_size_taken[kCommandIncreasingFill]++;
443 byte++;
444 pos++;
445 }
446 cmd_args[kCommandIncreasingFill][0] = rom_data[src_data_pos];
447}
448
449void CheckIntraCopy(const uint8_t* rom_data, DataSizeArray& data_size_taken,
450 CommandArgumentArray& cmd_args, uint& src_data_pos,
451 const unsigned int last_pos, unsigned int start) {
452 if (src_data_pos != start) {
453 unsigned int searching_pos = start;
454 unsigned int current_pos_u = src_data_pos;
455 unsigned int copied_size = 0;
456 unsigned int search_start = start;
457
458 while (searching_pos < src_data_pos && current_pos_u <= last_pos) {
459 while (rom_data[current_pos_u] != rom_data[searching_pos] &&
460 searching_pos < src_data_pos)
461 searching_pos++;
462 search_start = searching_pos;
463 while (current_pos_u <= last_pos &&
464 rom_data[current_pos_u] == rom_data[searching_pos] &&
465 searching_pos < src_data_pos) {
466 copied_size++;
467 current_pos_u++;
468 searching_pos++;
469 }
470 if (copied_size > data_size_taken[kCommandRepeatingBytes]) {
471 search_start -= start;
472 printf("- Found repeat of %d at %d\n", copied_size, search_start);
473 data_size_taken[kCommandRepeatingBytes] = copied_size;
474 cmd_args[kCommandRepeatingBytes][0] = search_start & kSnesByteMax;
475 cmd_args[kCommandRepeatingBytes][1] = search_start >> 8;
476 }
477 current_pos_u = src_data_pos;
478 copied_size = 0;
479 }
480 }
481}
482
483// Check if a command managed to pick up `max_win` or more bytes
484// Avoids being even with copy command, since it's possible to merge copy
485void ValidateForByteGain(const DataSizeArray& data_size_taken,
486 const CommandSizeArray& cmd_size, uint& max_win,
487 uint& cmd_with_max) {
488 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
489 unsigned int cmd_size_taken = data_size_taken[cmd_i];
490 // TODO(@scawful): Replace conditional with table of command sizes
491 // "Table that is even with copy but all other cmd are 2"
492 auto table_check =
493 !(cmd_i == kCommandRepeatingBytes && cmd_size_taken == 3);
494 if (cmd_size_taken > max_win && cmd_size_taken > cmd_size[cmd_i] &&
495 table_check) {
496 printf("==> C:%d / S:%d\n", cmd_i, cmd_size_taken);
497 cmd_with_max = cmd_i;
498 max_win = cmd_size_taken;
499 }
500 }
501}
502
503void CompressionCommandAlternative(const uint8_t* rom_data,
504 CompressionPiecePointer& compressed_chain,
505 const CommandSizeArray& cmd_size,
506 const CommandArgumentArray& cmd_args,
507 uint& src_data_pos, uint& comp_accumulator,
508 uint& cmd_with_max, uint& max_win) {
509 printf("- Ok we get a gain from %d\n", cmd_with_max);
510 std::string buffer;
511 buffer.push_back(cmd_args[cmd_with_max][0]);
512 if (cmd_size[cmd_with_max] == 2) {
513 buffer.push_back(cmd_args[cmd_with_max][1]);
514 }
515
516 auto new_comp_piece = std::make_shared<CompressionPiece>(
517 cmd_with_max, max_win, buffer, cmd_size[cmd_with_max]);
518 PrintCompressionPiece(new_comp_piece);
519 // If we let non compressed stuff, we need to add a copy chunk before
520 if (comp_accumulator != 0) {
521 std::string copy_buff;
522 copy_buff.resize(comp_accumulator);
523 for (int i = 0; i < comp_accumulator; ++i) {
524 copy_buff[i] = rom_data[i + src_data_pos - comp_accumulator];
525 }
526 auto copy_chunk = std::make_shared<CompressionPiece>(
527 kCommandDirectCopy, comp_accumulator, copy_buff, comp_accumulator);
528 compressed_chain->next = copy_chunk;
529 compressed_chain = copy_chunk;
530 } else {
531 compressed_chain->next = new_comp_piece;
532 compressed_chain = new_comp_piece;
533 }
534 src_data_pos += max_win;
535 comp_accumulator = 0;
536}
537
538void CheckByteRepeatV2(const uint8_t* data, uint& src_pos,
539 const unsigned int last_pos, CompressionCommand& cmd) {
540 unsigned int i = 0;
541 while (src_pos + i < last_pos && data[src_pos] == data[src_pos + i]) {
542 ++i;
543 }
545 cmd.arguments[kCommandByteFill][0] = data[src_pos];
546}
547
548void CheckWordRepeatV2(const uint8_t* data, uint& src_pos,
549 const unsigned int last_pos, CompressionCommand& cmd) {
550 if (src_pos + 2 <= last_pos && data[src_pos] != data[src_pos + 1]) {
551 unsigned int pos = src_pos;
552 char byte1 = data[pos];
553 char byte2 = data[pos + 1];
554 pos += 2;
556 while (pos + 1 <= last_pos) {
557 if (data[pos] == byte1 && data[pos + 1] == byte2)
558 cmd.data_size[kCommandWordFill] += 2;
559 else
560 break;
561 pos += 2;
562 }
563 cmd.arguments[kCommandWordFill][0] = byte1;
564 cmd.arguments[kCommandWordFill][1] = byte2;
565 }
566}
567
568void CheckIncByteV2(const uint8_t* rom_data, uint& src_data_pos,
569 const unsigned int last_pos, CompressionCommand& cmd) {
570 unsigned int pos = src_data_pos;
571 char byte = rom_data[pos];
572 pos++;
574 byte++;
575 while (pos <= last_pos && byte == rom_data[pos]) {
577 byte++;
578 pos++;
579 }
580 cmd.arguments[kCommandIncreasingFill][0] = rom_data[src_data_pos];
581}
582
583void CheckIntraCopyV2(const uint8_t* rom_data, uint& src_data_pos,
584 const unsigned int last_pos, unsigned int start,
585 CompressionCommand& cmd) {
586 if (src_data_pos != start) {
587 unsigned int searching_pos = start;
588 unsigned int current_pos_u = src_data_pos;
589 unsigned int copied_size = 0;
590 unsigned int search_start = start;
591
592 while (searching_pos < src_data_pos && current_pos_u <= last_pos) {
593 while (rom_data[current_pos_u] != rom_data[searching_pos] &&
594 searching_pos < src_data_pos)
595 searching_pos++;
596 search_start = searching_pos;
597 while (current_pos_u <= last_pos &&
598 rom_data[current_pos_u] == rom_data[searching_pos] &&
599 searching_pos < src_data_pos) {
600 copied_size++;
601 current_pos_u++;
602 searching_pos++;
603 }
604 if (copied_size > cmd.data_size[kCommandRepeatingBytes]) {
605 search_start -= start;
606 printf("- Found repeat of %d at %d\n", copied_size, search_start);
607 cmd.data_size[kCommandRepeatingBytes] = copied_size;
608 cmd.arguments[kCommandRepeatingBytes][0] = search_start & kSnesByteMax;
609 cmd.arguments[kCommandRepeatingBytes][1] = search_start >> 8;
610 }
611 current_pos_u = src_data_pos;
612 copied_size = 0;
613 }
614 }
615}
616
617// Table indicating command sizes, in bytes
618const std::array<int, 5> kCommandSizes = {1, 2, 2, 2, 3};
619
620// TODO(@scawful): TEST ME
622 uint& cmd_with_max) {
623 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
624 unsigned int cmd_size_taken = cmd.data_size[cmd_i];
625 // Check if the command size exceeds the maximum win and the size in the
626 // command sizes table, except for the repeating bytes command when the size
627 // taken is 3
628 if (cmd_size_taken > max_win && cmd_size_taken > kCommandSizes[cmd_i] &&
629 !(cmd_i == kCommandRepeatingBytes && cmd_size_taken == 3)) {
630 printf("==> C:%d / S:%d\n", cmd_i, cmd_size_taken);
631 cmd_with_max = cmd_i;
632 max_win = cmd_size_taken;
633 }
634 }
635}
636
637void CompressionCommandAlternativeV2(const uint8_t* rom_data,
638 const CompressionCommand& cmd,
639 CompressionPiecePointer& compressed_chain,
640 uint& src_data_pos, uint& comp_accumulator,
641 uint& cmd_with_max, uint& max_win) {
642 printf("- Ok we get a gain from %d\n", cmd_with_max);
643 std::string buffer;
644 buffer.push_back(cmd.arguments[cmd_with_max][0]);
645 if (cmd.cmd_size[cmd_with_max] == 2) {
646 buffer.push_back(cmd.arguments[cmd_with_max][1]);
647 }
648
649 auto new_comp_piece = std::make_shared<CompressionPiece>(
650 cmd_with_max, max_win, buffer, cmd.cmd_size[cmd_with_max]);
651 PrintCompressionPiece(new_comp_piece);
652 // If we let non compressed stuff, we need to add a copy chunk before
653 if (comp_accumulator != 0) {
654 std::string copy_buff;
655 copy_buff.resize(comp_accumulator);
656 for (int i = 0; i < comp_accumulator; ++i) {
657 copy_buff[i] = rom_data[i + src_data_pos - comp_accumulator];
658 }
659 auto copy_chunk = std::make_shared<CompressionPiece>(
660 kCommandDirectCopy, comp_accumulator, copy_buff, comp_accumulator);
661 compressed_chain->next = copy_chunk;
662 compressed_chain = copy_chunk;
663 } else {
664 compressed_chain->next = new_comp_piece;
665 compressed_chain = new_comp_piece;
666 }
667 src_data_pos += max_win;
668 comp_accumulator = 0;
669}
670
672 const uint8_t* rom_data, CompressionPiecePointer& compressed_chain,
673 const CompressionCommand& command, uint& source_data_position,
674 uint& uncompressed_data_size, uint& best_command, uint& best_command_gain) {
675 std::cout << "- Identified a gain from command: " << best_command
676 << std::endl;
677
678 // Create a buffer to store the arguments for the best command.
679 std::string argument_buffer;
680 argument_buffer.push_back(command.arguments[best_command][0]);
681 if (command.cmd_size[best_command] == 2) {
682 argument_buffer.push_back(command.arguments[best_command][1]);
683 }
684
685 // Create a new compression piece for the best command.
686 auto new_compression_piece = std::make_shared<CompressionPiece>(
687 best_command, best_command_gain, argument_buffer,
688 command.cmd_size[best_command]);
689 PrintCompressionPiece(new_compression_piece);
690
691 // If there is uncompressed data, create a direct copy compression piece for
692 // it.
693 if (uncompressed_data_size != 0) {
694 std::string copy_buffer(uncompressed_data_size, 0);
695 for (int i = 0; i < uncompressed_data_size; ++i) {
696 copy_buffer[i] =
697 rom_data[i + source_data_position - uncompressed_data_size];
698 }
699 auto direct_copy_piece = std::make_shared<CompressionPiece>(
700 kCommandDirectCopy, uncompressed_data_size, copy_buffer,
701 uncompressed_data_size);
702
703 // Append the direct copy piece to the chain.
704 compressed_chain->next = direct_copy_piece;
705 compressed_chain = direct_copy_piece;
706 }
707
708 // Append the new compression piece to the chain.
709 compressed_chain->next = new_compression_piece;
710 compressed_chain = new_compression_piece;
711
712 // Update the position in the source data and reset the uncompressed data
713 // size.
714 source_data_position += best_command_gain;
715 uncompressed_data_size = 0;
716}
717
718absl::StatusOr<CompressionPiecePointer> SplitCompressionPiece(
719 CompressionPiecePointer& piece, int mode) {
720 CompressionPiecePointer new_piece;
721 unsigned int length_left = piece->length - kMaxLengthCompression;
722 piece->length = kMaxLengthCompression;
723
724 switch (piece->command) {
725 case kCommandByteFill:
726 case kCommandWordFill:
727 new_piece = std::make_shared<CompressionPiece>(
728 piece->command, length_left, piece->argument, piece->argument_length);
729 break;
731 new_piece = std::make_shared<CompressionPiece>(
732 piece->command, length_left, piece->argument, piece->argument_length);
733 new_piece->argument[0] =
734 (char)(piece->argument[0] + kMaxLengthCompression);
735 break;
736 case kCommandDirectCopy: {
737 std::string empty;
738 piece->argument_length = kMaxLengthCompression;
739 new_piece = std::make_shared<CompressionPiece>(
740 piece->command, length_left, empty, length_left);
741 // MEMCPY
742 for (int i = 0; i < length_left; ++i) {
743 new_piece->argument[i] = piece->argument[i + kMaxLengthCompression];
744 }
745 break;
746 }
748 piece->argument_length = kMaxLengthCompression;
749 unsigned int offset = piece->argument[0] + (piece->argument[1] << 8);
750 new_piece = std::make_shared<CompressionPiece>(
751 piece->command, length_left, piece->argument, piece->argument_length);
752 if (mode == kNintendoMode2) {
753 new_piece->argument[0] =
755 new_piece->argument[1] = (offset + kMaxLengthCompression) >> 8;
756 }
757 if (mode == kNintendoMode1) {
758 new_piece->argument[1] =
760 new_piece->argument[0] = (offset + kMaxLengthCompression) >> 8;
761 }
762 } break;
763 default: {
764 return absl::InvalidArgumentError(
765 "SplitCompressionCommand: Invalid Command");
766 }
767 }
768 return new_piece;
769}
770
772 int mode) {
773 unsigned int pos = 0;
774 auto piece = start;
775 std::vector<uint8_t> output;
776
777 while (piece != nullptr) {
778 if (piece->length <= kMaxLengthNormalHeader) { // Normal header
779 output.push_back(BUILD_HEADER(piece->command, piece->length));
780 pos++;
781 } else {
782 if (piece->length <= kMaxLengthCompression) {
783 output.push_back(kCompressionStringMod |
784 ((uint8_t)piece->command << 2) |
785 (((piece->length - 1) & 0xFF00) >> 8));
786 pos++;
787 printf("Building extended header : cmd: %d, length: %d - %02X\n",
788 piece->command, piece->length, output[pos - 1]);
789 output.push_back(((piece->length - 1) & 0x00FF)); // (char)
790 pos++;
791 } else {
792 // We need to split the command
793 auto new_piece = SplitCompressionPiece(piece, mode);
794 if (!new_piece.ok()) {
795 std::cout << new_piece.status().ToString() << std::endl;
796 }
797 printf("New added piece\n");
798 auto piece_data = new_piece.value();
799 PrintCompressionPiece(piece_data);
800 piece_data->next = piece->next;
801 piece->next = piece_data;
802 continue;
803 }
804 }
805
806 if (piece->command == kCommandRepeatingBytes) {
807 char tmp[2];
808 tmp[0] = piece->argument[0];
809 tmp[1] = piece->argument[1];
810 if (mode == kNintendoMode1) {
811 tmp[0] = piece->argument[1];
812 tmp[1] = piece->argument[0];
813 }
814 for (const auto& each : tmp) {
815 output.push_back(each);
816 pos++;
817 }
818 } else {
819 for (int i = 0; i < piece->argument_length; ++i) {
820 output.push_back(piece->argument[i]);
821 pos++;
822 }
823 }
824 pos += piece->argument_length;
825 piece = piece->next;
826 }
827 output.push_back(kSnesByteMax);
828 return output;
829}
830
832 int mode, int start, int src_data_pos) {
833 if (chain_head->next != nullptr) {
834 Rom temp_rom;
836 temp_rom.LoadFromData(CreateCompressionString(chain_head->next, mode)))
838 auto decomp_data,
839 DecompressV2(temp_rom.data(), 0, temp_rom.size(), 1, temp_rom.size()));
840 if (!std::equal(decomp_data.begin() + start, decomp_data.end(),
841 temp_rom.begin())) {
842 return absl::InternalError(absl::StrFormat(
843 "Compressed data does not match uncompressed data at %d\n",
844 (uint)(src_data_pos - start)));
845 }
846 }
847 return absl::OkStatus();
848}
849
850// Merge consecutive copy if possible
852 CompressionPiecePointer piece = start;
853
854 while (piece != nullptr) {
855 if (piece->command == kCommandDirectCopy && piece->next != nullptr &&
856 piece->next->command == kCommandDirectCopy &&
857 piece->length + piece->next->length <= kMaxLengthCompression) {
858 unsigned int previous_length = piece->length;
859 piece->length = piece->length + piece->next->length;
860
861 for (int i = 0; i < piece->next->argument_length; ++i) {
862 piece->argument[i + previous_length] = piece->next->argument[i];
863 }
864 piece->argument_length = piece->length;
866
867 auto p_next_next = piece->next->next;
868 piece->next = p_next_next;
869 continue; // Next could be another copy
870 }
871 piece = piece->next;
872 }
873 return start;
874}
875
876absl::StatusOr<std::vector<uint8_t>> CompressV2(const uint8_t* data,
877 const int start,
878 const int length, int mode,
879 bool check) {
880 // Surely there's no need to compress zero...
881 if (length == 0) {
882 return std::vector<uint8_t>();
883 }
884
885 // Worst case should be a copy of the string with extended header
886 std::string worst_case = "aaa";
887 auto compressed_chain =
888 std::make_shared<CompressionPiece>(1, 1, worst_case, 2);
889 auto compressed_chain_start = compressed_chain;
890
891 CompressionCommand current_cmd = {/*argument*/ {{}},
892 /*cmd_size*/ {0, 1, 2, 1, 2},
893 /*data_size*/ {0, 0, 0, 0, 0}};
894
895 unsigned int src_pos = start;
896 unsigned int last_pos = start + length - 1;
897 unsigned int comp_accumulator = 0; // Used when skipping using copy
898
899 while (true) {
900 current_cmd.data_size.fill({});
901 current_cmd.arguments.fill({{}});
902
903 CheckByteRepeatV2(data, src_pos, last_pos, current_cmd);
904 CheckWordRepeatV2(data, src_pos, last_pos, current_cmd);
905 CheckIncByteV2(data, src_pos, last_pos, current_cmd);
906 CheckIntraCopyV2(data, src_pos, last_pos, start, current_cmd);
907
908 unsigned int max_win = 2;
909 unsigned int cmd_with_max = kCommandDirectCopy;
910 ValidateForByteGain(current_cmd.data_size, current_cmd.cmd_size, max_win,
911 cmd_with_max);
912 // ValidateForByteGainV2(current_cmd, max_win, cmd_with_max);
913
914 if (cmd_with_max == kCommandDirectCopy) {
915 // This is the worst case scenario
916 // Progress through the next byte, in case there's a different
917 // compression command we can implement before we hit 32 bytes.
918 src_pos++;
919 comp_accumulator++;
920
921 // Arbitrary choice to do a 32 bytes grouping for copy.
922 if (comp_accumulator == 32 || src_pos > last_pos) {
923 std::string buffer = SetBuffer(data, src_pos, comp_accumulator);
924 auto new_comp_piece = std::make_shared<CompressionPiece>(
925 kCommandDirectCopy, comp_accumulator, buffer, comp_accumulator);
926 compressed_chain->next = new_comp_piece;
927 comp_accumulator = 0;
928 }
929 } else {
930 AddAlternativeCompressionCommand(data, compressed_chain, current_cmd,
931 src_pos, comp_accumulator, cmd_with_max,
932 max_win);
933 }
934
935 if (src_pos > last_pos) {
936 printf("Breaking compression loop\n");
937 break;
938 }
939
940 if (check) {
941 RETURN_IF_ERROR(ValidateCompressionResult(compressed_chain_start, mode,
942 start, src_pos))
943 }
944 }
945
946 // Skipping compression chain header
947 MergeCopy(compressed_chain_start->next);
948 PrintCompressionChain(compressed_chain_start);
949 return CreateCompressionString(compressed_chain_start->next, mode);
950}
951
952absl::StatusOr<std::vector<uint8_t>> CompressGraphics(const uint8_t* data,
953 const int pos,
954 const int length) {
955 return CompressV2(data, pos, length, kNintendoMode2);
956}
957
958absl::StatusOr<std::vector<uint8_t>> CompressOverworld(const uint8_t* data,
959 const int pos,
960 const int length) {
961 return CompressV2(data, pos, length, kNintendoMode1);
962}
963
964absl::StatusOr<std::vector<uint8_t>> CompressOverworld(
965 const std::vector<uint8_t> data, const int pos, const int length) {
966 return CompressV3(data, pos, length, kNintendoMode1);
967}
968
970 unsigned int pos = context.src_pos;
971
972 // Ensure the sequence does not start with an uncompressable byte
973 if (pos == 0 || context.data[pos - 1] != context.data[pos]) {
974 char byte_to_repeat = context.data[pos];
975 while (pos <= context.last_pos && context.data[pos] == byte_to_repeat) {
977 pos++;
978 }
979
980 context.current_cmd.arguments[kCommandByteFill][0] = byte_to_repeat;
981
982 // Added debug log
983 DEBUG_LOG("CheckByteRepeatV3: byte_to_repeat = "
984 << (int)byte_to_repeat << ", size = "
986 }
987}
988
990 if (context.src_pos + 1 <= context.last_pos) { // Changed the condition here
991 unsigned int pos = context.src_pos;
992 char byte1 = context.data[pos];
993 char byte2 = context.data[pos + 1];
994 pos += 2;
996 while (pos + 1 <= context.last_pos) {
997 if (context.data[pos] == byte1 && context.data[pos + 1] == byte2)
999 else
1000 break;
1001 pos += 2;
1002 }
1003
1004 context.current_cmd.arguments[kCommandWordFill][0] = byte1;
1005 context.current_cmd.arguments[kCommandWordFill][1] = byte2;
1006 }
1007
1008 DEBUG_LOG("CheckWordRepeatV3: byte1 = "
1009 << (int)context.current_cmd.arguments[kCommandWordFill][0]
1010 << ", byte2 = "
1011 << (int)context.current_cmd.arguments[kCommandWordFill][1]
1012 << ", size = " << context.current_cmd.data_size[kCommandWordFill]);
1013}
1014
1016 unsigned int pos = context.src_pos;
1017 uint8_t byte = context.data[pos];
1018 pos++;
1020 byte++;
1021
1022 while (pos <= context.last_pos && byte == context.data[pos]) {
1024 byte++;
1025 pos++;
1026 }
1027
1028 // Let's see if the sequence is surrounded by identical bytes and if so,
1029 // consider if a direct copy is better.
1030 if (context.current_cmd.data_size[kCommandIncreasingFill] == 3 &&
1031 context.src_pos > 0 && pos < context.data.size() &&
1032 context.data[context.src_pos - 1] == context.data[pos]) {
1034 0; // Reset the size to 0 to prioritize direct copy
1035 return;
1036 }
1037
1039 context.data[context.src_pos];
1040
1041 DEBUG_LOG("CheckIncByteV3: byte = "
1042 << (int)context.current_cmd.arguments[kCommandIncreasingFill][0]
1043 << ", size = "
1045}
1046
1048 const int window_size =
1049 32; // This can be adjusted for optimal performance and results
1050
1051 // We'll only search for repeating sequences if we're not at the very
1052 // beginning
1053 if (context.src_pos > 0 &&
1054 context.src_pos + window_size <= context.data.size()) {
1055 unsigned int max_copied_size = 0;
1056 unsigned int best_search_start = 0;
1057
1058 // Slide the window over the source data
1059 for (int win_pos = 1; win_pos < window_size && win_pos < context.src_pos;
1060 ++win_pos) {
1061 auto start_search_from = context.data.begin() + context.src_pos - win_pos;
1062 auto search_end = context.data.begin() + context.src_pos;
1063
1064 // Use std::search to find the sequence in the window in the previous
1065 // source data
1066 auto found_pos = std::search(
1067 start_search_from, search_end, context.data.begin() + context.src_pos,
1068 context.data.begin() + context.src_pos + win_pos);
1069
1070 if (found_pos != search_end) {
1071 // Check the entire length of the match
1072 unsigned int len = 0;
1073 while (context.src_pos + len < context.data.size() &&
1074 context.data[context.src_pos + len] == *(found_pos + len)) {
1075 len++;
1076 }
1077
1078 if (len > max_copied_size) {
1079 max_copied_size = len;
1080 best_search_start = found_pos - context.data.begin();
1081 }
1082 }
1083 }
1084
1085 if (max_copied_size >
1087 DEBUG_LOG("CheckIntraCopyV3: Detected repeating sequence of length "
1088 << max_copied_size << " starting from " << best_search_start);
1089 context.current_cmd.data_size[kCommandRepeatingBytes] = max_copied_size;
1091 best_search_start & kSnesByteMax;
1093 best_search_start >> 8;
1094 }
1095
1096 DEBUG_LOG("CheckIntraCopyV3: max_copied_size = " << max_copied_size
1097 << ", best_search_start = "
1098 << best_search_start);
1099 }
1100}
1101
1103 // Initialize the current_cmd with default values.
1104 context.current_cmd = {/*argument*/ {{}},
1105 /*cmd_size*/ {0, 1, 2, 1, 2},
1106 /*data_size*/ {0, 0, 0, 0, 0}};
1107}
1108
1110 // Reset the data_size and arguments for a fresh check.
1111 context.current_cmd.data_size.fill({});
1112 context.current_cmd.arguments.fill({{}});
1113
1114 CheckByteRepeatV3(context);
1115 CheckWordRepeatV3(context);
1116 CheckIncByteV3(context);
1117 CheckIntraCopyV3(context);
1118
1119 DEBUG_LOG("CheckAvailableCompressionCommands: src_pos = " << context.src_pos);
1120}
1121
1123 int max_net_savings = -1; // Adjusted the bias to consider any savings
1124
1125 // Start with the default scenario.
1127
1128 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
1129 unsigned int cmd_size_taken = context.current_cmd.data_size[cmd_i];
1130 int net_savings = cmd_size_taken - context.current_cmd.cmd_size[cmd_i];
1131
1132 // Skip commands that aren't efficient.
1133 if (cmd_size_taken <= 2 && cmd_i != kCommandDirectCopy) {
1134 continue;
1135 }
1136
1137 // Check surrounding data for optimization.
1138 if (context.src_pos > 0 &&
1139 context.src_pos + cmd_size_taken < context.data.size()) {
1140 char prev_byte = context.data[context.src_pos - 1];
1141 char next_byte = context.data[context.src_pos + cmd_size_taken];
1142 if (prev_byte != next_byte && cmd_size_taken == 3) {
1143 continue;
1144 }
1145 }
1146
1147 // Check if the current command offers more net savings.
1148 if (net_savings > max_net_savings) {
1149 context.cmd_with_max = cmd_i;
1150 max_net_savings = net_savings;
1151 }
1152 }
1153
1154 DEBUG_LOG("DetermineBestCompression: cmd_with_max = "
1155 << context.cmd_with_max << ", data_size = "
1156 << context.current_cmd.data_size[context.cmd_with_max]);
1157}
1158
1160 // If the next best compression method isn't direct copy and we have bytes
1161 // accumulated for direct copy, flush them out.
1162 if (context.cmd_with_max != kCommandDirectCopy &&
1163 context.comp_accumulator > 0) {
1164 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1165 context.compressed_data.push_back(header);
1166 std::vector<uint8_t> uncompressed_data(
1167 context.data.begin() + context.src_pos - context.comp_accumulator,
1168 context.data.begin() + context.src_pos);
1169 context.compressed_data.insert(context.compressed_data.end(),
1170 uncompressed_data.begin(),
1171 uncompressed_data.end());
1172 context.comp_accumulator = 0;
1173 return;
1174 }
1175
1176 // If the next best compression method is not direct copy and we haven't
1177 // accumulated any bytes, treat it as a single byte direct copy.
1178 if (context.cmd_with_max != kCommandDirectCopy &&
1179 context.comp_accumulator == 0) {
1180 context.compressed_data.push_back(
1181 0x00); // Command for a single byte direct copy
1182 context.compressed_data.push_back(
1183 context.data[context.src_pos]); // The single byte
1184 context.src_pos++;
1185 return;
1186 }
1187
1188 // If we reach here, accumulate bytes for a direct copy.
1189 context.src_pos++;
1190 context.comp_accumulator++;
1191
1192 // If we've accumulated the maximum bytes for a direct copy command or
1193 // reached the end, flush them.
1194 if (context.comp_accumulator >= 32 || context.src_pos > context.last_pos) {
1195 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1196 context.compressed_data.push_back(header);
1197 std::vector<uint8_t> uncompressed_data(
1198 context.data.begin() + context.src_pos - context.comp_accumulator,
1199 context.data.begin() + context.src_pos);
1200 context.compressed_data.insert(context.compressed_data.end(),
1201 uncompressed_data.begin(),
1202 uncompressed_data.end());
1203 context.comp_accumulator = 0;
1204 }
1205
1206 DEBUG_LOG("HandleDirectCopy: src_pos = " << context.src_pos
1207 << ", compressed_data size = "
1208 << context.compressed_data.size());
1209}
1210
1212 DEBUG_LOG("AddCompressionToChain: Adding command arguments: ");
1213
1214 // If there's uncompressed data, add a copy chunk before the compression
1215 // command
1216 if (context.comp_accumulator != 0) {
1217 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1218 context.compressed_data.push_back(header);
1219 std::vector<uint8_t> uncompressed_data(
1220 context.data.begin() + context.src_pos - context.comp_accumulator,
1221 context.data.begin() + context.src_pos);
1222 context.compressed_data.insert(context.compressed_data.end(),
1223 uncompressed_data.begin(),
1224 uncompressed_data.end());
1225 context.comp_accumulator = 0;
1226 }
1227
1228 // Now, add the compression command
1229 uint8_t header =
1230 BUILD_HEADER(context.cmd_with_max,
1231 context.current_cmd.data_size[context.cmd_with_max]);
1232 context.compressed_data.push_back(header);
1233
1234 DEBUG_LOG("AddCompressionToChain: (Before) src_pos = "
1235 << context.src_pos
1236 << ", compressed_data size = " << context.compressed_data.size());
1237
1238 // Add the command arguments to the compressed_data vector
1239 context.compressed_data.push_back(
1240 context.current_cmd.arguments[context.cmd_with_max][0]);
1241 if (context.current_cmd.cmd_size[context.cmd_with_max] == 2) {
1242 context.compressed_data.push_back(
1243 context.current_cmd.arguments[context.cmd_with_max][1]);
1244 }
1245
1246 context.src_pos += context.current_cmd.data_size[context.cmd_with_max];
1247 context.comp_accumulator = 0;
1248
1249 DEBUG_LOG("AddCompressionToChain: (After) src_pos = "
1250 << context.src_pos
1251 << ", compressed_data size = " << context.compressed_data.size());
1252}
1253
1255 if (!context.compressed_data.empty()) {
1256 Rom temp_rom;
1257 RETURN_IF_ERROR(temp_rom.LoadFromData(context.compressed_data));
1259 auto decomp_data,
1260 DecompressV2(temp_rom.data(), 0, temp_rom.size(), 1, temp_rom.size()));
1261
1262 if (!std::equal(decomp_data.begin() + context.start, decomp_data.end(),
1263 temp_rom.begin())) {
1264 return absl::InternalError(absl::StrFormat(
1265 "Compressed data does not match uncompressed data at %d\n",
1266 (context.src_pos - context.start)));
1267 }
1268 }
1269 return absl::OkStatus();
1270}
1271
1272absl::StatusOr<CompressionPiece> SplitCompressionPieceV3(
1273 CompressionPiece& piece, int mode) {
1274 CompressionPiece new_piece;
1275 unsigned int length_left = piece.length - kMaxLengthCompression;
1277
1278 switch (piece.command) {
1279 case kCommandByteFill:
1280 case kCommandWordFill:
1281 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1282 piece.argument_length);
1283 break;
1285 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1286 piece.argument_length);
1287 new_piece.argument[0] = (char)(piece.argument[0] + kMaxLengthCompression);
1288 break;
1289 case kCommandDirectCopy: {
1291 std::string empty_string = "";
1292 new_piece = CompressionPiece(piece.command, length_left, empty_string,
1293 length_left);
1294 // MEMCPY
1295 for (int i = 0; i < length_left; ++i) {
1296 new_piece.argument[i] = piece.argument[i + kMaxLengthCompression];
1297 }
1298 break;
1299 }
1302 unsigned int offset = piece.argument[0] + (piece.argument[1] << 8);
1303 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1304 piece.argument_length);
1305 if (mode == kNintendoMode2) {
1306 new_piece.argument[0] = (offset + kMaxLengthCompression) & kSnesByteMax;
1307 new_piece.argument[1] = (offset + kMaxLengthCompression) >> 8;
1308 }
1309 if (mode == kNintendoMode1) {
1310 new_piece.argument[1] = (offset + kMaxLengthCompression) & kSnesByteMax;
1311 new_piece.argument[0] = (offset + kMaxLengthCompression) >> 8;
1312 }
1313 } break;
1314 default: {
1315 return absl::InvalidArgumentError(
1316 "SplitCompressionCommand: Invalid Command");
1317 }
1318 }
1319
1320 return new_piece;
1321}
1322
1324 unsigned int pos = 0;
1325
1326 for (CompressionPiece& piece : context.compression_pieces) {
1327 if (piece.length <= kMaxLengthNormalHeader) { // Normal Header
1328 context.compression_string.push_back(
1329 BUILD_HEADER(piece.command, piece.length));
1330 pos++;
1331 } else {
1332 if (piece.length <= kMaxLengthCompression) {
1333 context.compression_string.push_back(
1334 kCompressionStringMod | ((uint8_t)piece.command << 2) |
1335 (((piece.length - 1) & 0xFF00) >> 8));
1336 pos++;
1337 std::cout << "Building extended header : cmd: " << piece.command
1338 << ", length: " << piece.length << " - "
1339 << (int)context.compression_string[pos - 1] << std::endl;
1340 context.compression_string.push_back(
1341 ((piece.length - 1) & 0x00FF)); // (char)
1342 } else {
1343 // We need to split the command
1344 auto new_piece = SplitCompressionPieceV3(piece, context.mode);
1345 if (!new_piece.ok()) {
1346 std::cout << new_piece.status().ToString() << std::endl;
1347 }
1348 context.compression_pieces.insert(
1349 context.compression_pieces.begin() + pos + 1, new_piece.value());
1350 continue;
1351 }
1352 }
1353
1354 if (piece.command == kCommandRepeatingBytes) {
1355 char tmp[2];
1356 tmp[0] = piece.argument[0];
1357 tmp[1] = piece.argument[1];
1358 if (context.mode == kNintendoMode1) {
1359 tmp[0] = piece.argument[1];
1360 tmp[1] = piece.argument[0];
1361 }
1362 for (const auto& each : tmp) {
1363 context.compression_string.push_back(each);
1364 pos++;
1365 }
1366 } else {
1367 for (int i = 0; i < piece.argument_length; ++i) {
1368 context.compression_string.push_back(piece.argument[i]);
1369 pos++;
1370 }
1371 }
1372 pos += piece.argument_length;
1373 }
1374
1375 // Add any remaining uncompressed data
1376 if (context.comp_accumulator > 0) {
1377 context.compressed_data.insert(
1378 context.compressed_data.end(),
1379 context.data.begin() + context.src_pos - context.comp_accumulator,
1380 context.data.begin() + context.src_pos);
1381 context.comp_accumulator = 0;
1382 }
1383
1384 // Add the end marker to the compressed data
1385 context.compressed_data.push_back(kSnesByteMax);
1386 DEBUG_LOG("FinalizeCompression: compressed_data size = "
1387 << context.compressed_data.size());
1388}
1389
1390absl::StatusOr<std::vector<uint8_t>> CompressV3(
1391 const std::vector<uint8_t>& data, const int start, const int length,
1392 int mode, bool check) {
1393 if (length == 0) {
1394 return std::vector<uint8_t>();
1395 }
1396
1397 CompressionContext context(data, start, length, mode);
1398 InitializeCompression(context);
1399
1400 while (context.src_pos <= context.last_pos) {
1402 DetermineBestCompression(context);
1403
1404 DEBUG_LOG("CompressV3 Loop: cmd_with_max = " << context.cmd_with_max);
1405
1406 if (context.cmd_with_max == kCommandDirectCopy) {
1407 HandleDirectCopy(context);
1408 } else {
1409 AddCompressionToChain(context);
1410 }
1411
1412 if (check) {
1414 }
1415 }
1416
1417 FinalizeCompression(context);
1418 return std::vector<uint8_t>(context.compressed_data.begin(),
1419 context.compressed_data.end());
1420}
1421
1422std::string SetBuffer(const uint8_t* data, int src_pos, int comp_accumulator) {
1423 std::string buffer;
1424 for (int i = 0; i < comp_accumulator; ++i) {
1425 buffer.push_back(data[i + src_pos - comp_accumulator]);
1426 }
1427 return buffer;
1428}
1429
1430std::string SetBuffer(const std::vector<uint8_t>& data, int src_pos,
1431 int comp_accumulator) {
1432 std::string buffer;
1433 for (int i = 0; i < comp_accumulator; ++i) {
1434 buffer.push_back(data[i + src_pos - comp_accumulator]);
1435 }
1436 return buffer;
1437}
1438
1439void memfill(const uint8_t* data, std::vector<uint8_t>& buffer, int buffer_pos,
1440 int offset, int length) {
1441 auto a = data[offset];
1442 auto b = data[offset + 1];
1443 for (int i = 0; i < length; i = i + 2) {
1444 buffer[buffer_pos + i] = a;
1445 if ((i + 1) < length)
1446 buffer[buffer_pos + i + 1] = b;
1447 }
1448}
1449
1450absl::StatusOr<std::vector<uint8_t>> DecompressV2(const uint8_t* data,
1451 int offset, int size,
1452 int mode, size_t rom_size) {
1453 if (size == 0) {
1454 return std::vector<uint8_t>();
1455 }
1456
1457 // Validate initial offset is within bounds (if rom_size provided)
1458 // rom_size == static_cast<size_t>(-1) means "no bounds checking"
1459 if (rom_size != static_cast<size_t>(-1) &&
1460 (offset < 0 || static_cast<size_t>(offset) >= rom_size)) {
1461 return absl::OutOfRangeError(
1462 absl::StrFormat("DecompressV2: Initial offset %d exceeds ROM size %zu",
1463 offset, rom_size));
1464 }
1465
1466 std::vector<uint8_t> buffer(size, 0);
1467 unsigned int length = 0;
1468 unsigned int buffer_pos = 0;
1469 uint8_t command = 0;
1470
1471 // Bounds check before initial header read
1472 if (rom_size != static_cast<size_t>(-1) &&
1473 static_cast<size_t>(offset) >= rom_size) {
1474 return absl::OutOfRangeError(
1475 absl::StrFormat("DecompressV2: Initial offset %d exceeds ROM size %zu",
1476 offset, rom_size));
1477 }
1478 uint8_t header = data[offset];
1479
1480 while (header != kSnesByteMax) {
1481 // Bounds check before reading command (if rom_size provided)
1482 if (rom_size != static_cast<size_t>(-1) &&
1483 static_cast<size_t>(offset) >= rom_size) {
1484 return absl::OutOfRangeError(absl::StrFormat(
1485 "DecompressV2: Offset %d exceeds ROM size %zu while reading command",
1486 offset, rom_size));
1487 }
1488
1489 if ((header & kExpandedMod) == kExpandedMod) {
1490 // Expanded Command - needs 2 bytes
1491 if (rom_size != static_cast<size_t>(-1) &&
1492 static_cast<size_t>(offset + 1) >= rom_size) {
1493 return absl::OutOfRangeError(
1494 absl::StrFormat("DecompressV2: Offset %d+1 exceeds ROM size %zu "
1495 "for expanded command",
1496 offset, rom_size));
1497 }
1498 command = ((header >> 2) & kCommandMod);
1499 length = (((header << 8) | data[offset + 1]) & kExpandedLengthMod);
1500 offset += 2; // Advance 2 bytes in ROM
1501 } else {
1502 // Normal Command
1503 command = ((header >> 5) & kCommandMod);
1504 length = (header & kNormalLengthMod);
1505 offset += 1; // Advance 1 byte in ROM
1506 }
1507 length += 1; // each commands is at least of size 1 even if index 00
1508
1509 // CRITICAL: Check and resize buffer BEFORE any command writes
1510 // This prevents WASM "index out of bounds" errors
1511 // The buffer may need to grow if decompressed data exceeds initial size
1512 if (buffer_pos + length > static_cast<unsigned int>(size)) {
1513 // Double the buffer size (with overflow protection)
1514 int new_size = size;
1515 while (buffer_pos + length > static_cast<unsigned int>(new_size)) {
1516 if (new_size > INT_MAX / 2) {
1517 return absl::ResourceExhaustedError(absl::StrFormat(
1518 "DecompressV2: Buffer size overflow at pos %u, length %u",
1519 buffer_pos, length));
1520 }
1521 new_size *= 2;
1522 }
1523 size = new_size;
1524 buffer.resize(size);
1525 }
1526
1527 switch (command) {
1528 case kCommandDirectCopy: // Does not advance in the ROM
1529 // Bounds check for direct copy
1530 if (rom_size != static_cast<size_t>(-1) &&
1531 static_cast<size_t>(offset + length) > rom_size) {
1532 return absl::OutOfRangeError(
1533 absl::StrFormat("DecompressV2: DirectCopy offset %d + length %u "
1534 "exceeds ROM size %zu",
1535 offset, length, rom_size));
1536 }
1537 memcpy(buffer.data() + buffer_pos, data + offset, length);
1538 buffer_pos += length;
1539 offset += length;
1540 break;
1541 case kCommandByteFill:
1542 // Bounds check for byte fill
1543 if (rom_size != static_cast<size_t>(-1) &&
1544 static_cast<size_t>(offset) >= rom_size) {
1545 return absl::OutOfRangeError(absl::StrFormat(
1546 "DecompressV2: ByteFill offset %d exceeds ROM size %zu", offset,
1547 rom_size));
1548 }
1549 memset(buffer.data() + buffer_pos, (int)(data[offset]), length);
1550 buffer_pos += length;
1551 offset += 1; // Advances 1 byte in the ROM
1552 break;
1553 case kCommandWordFill:
1554 // Bounds check for word fill (needs 2 bytes)
1555 if (rom_size != static_cast<size_t>(-1) &&
1556 static_cast<size_t>(offset + 1) >= rom_size) {
1557 return absl::OutOfRangeError(absl::StrFormat(
1558 "DecompressV2: WordFill offset %d+1 exceeds ROM size %zu", offset,
1559 rom_size));
1560 }
1561 memfill(data, buffer, buffer_pos, offset, length);
1562 buffer_pos += length;
1563 offset += 2; // Advance 2 byte in the ROM
1564 break;
1566 // Bounds check for increasing fill
1567 if (rom_size != static_cast<size_t>(-1) &&
1568 static_cast<size_t>(offset) >= rom_size) {
1569 return absl::OutOfRangeError(absl::StrFormat(
1570 "DecompressV2: IncreasingFill offset %d exceeds ROM size %zu",
1571 offset, rom_size));
1572 }
1573 auto inc_byte = data[offset];
1574 for (unsigned int i = 0; i < length; i++) {
1575 buffer[buffer_pos] = inc_byte++;
1576 buffer_pos++;
1577 }
1578 offset += 1; // Advance 1 byte in the ROM
1579 } break;
1581 // Bounds check for repeating bytes (needs 2 bytes)
1582 if (rom_size != static_cast<size_t>(-1) &&
1583 static_cast<size_t>(offset + 1) >= rom_size) {
1584 return absl::OutOfRangeError(absl::StrFormat(
1585 "DecompressV2: RepeatingBytes offset %d+1 exceeds ROM size %zu",
1586 offset, rom_size));
1587 }
1588 uint16_t s1 = ((data[offset + 1] & kSnesByteMax) << 8);
1589 uint16_t s2 = (data[offset] & kSnesByteMax);
1590 int addr = (s1 | s2);
1591 if (mode == kNintendoMode1) { // Reversed byte order for
1592 // overworld maps
1593 addr = (data[offset + 1] & kSnesByteMax) |
1594 ((data[offset] & kSnesByteMax) << 8);
1595 }
1596 if (addr > offset) {
1597 return absl::InternalError(
1598 absl::StrFormat("Decompress: Offset for command copy exceeds "
1599 "current position "
1600 "(Offset : %#04x | Pos : %#06x)\n",
1601 addr, offset));
1602 }
1603 // Buffer resize already done above, no need to check again
1604 memcpy(buffer.data() + buffer_pos, buffer.data() + addr, length);
1605 buffer_pos += length;
1606 offset += 2;
1607 } break;
1608 default: {
1609 std::cout << absl::StrFormat(
1610 "Decompress: Invalid header (Offset : %#06x, Command: %#04x)\n",
1611 offset, command);
1612 } break;
1613 }
1614 // Bounds check before reading next header byte
1615 if (rom_size != static_cast<size_t>(-1) &&
1616 static_cast<size_t>(offset) >= rom_size) {
1617 return absl::OutOfRangeError(
1618 absl::StrFormat("DecompressV2: Offset %d exceeds ROM size %zu while "
1619 "reading next header",
1620 offset, rom_size));
1621 }
1622 header = data[offset];
1623 }
1624
1625 return buffer;
1626}
1627
1628absl::StatusOr<std::vector<uint8_t>> DecompressGraphics(const uint8_t* data,
1629 int pos, int size) {
1630 return DecompressV2(data, pos, size, kNintendoMode2);
1631}
1632
1633absl::StatusOr<std::vector<uint8_t>> DecompressOverworld(const uint8_t* data,
1634 int pos, int size) {
1635 return DecompressV2(data, pos, size, kNintendoMode1);
1636}
1637
1638absl::StatusOr<std::vector<uint8_t>> DecompressOverworld(
1639 const std::vector<uint8_t> data, int pos, int size) {
1640 return DecompressV2(data.data(), pos, size, kNintendoMode1);
1641}
1642
1643} // namespace lc_lz2
1644} // namespace gfx
1645} // namespace yaze
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
Definition rom.h:28
auto begin()
Definition rom.h:141
auto data() const
Definition rom.h:139
auto size() const
Definition rom.h:138
absl::Status LoadFromData(const std::vector< uint8_t > &data, const LoadOptions &options=LoadOptions::Defaults())
Definition rom.cc:255
#define DEBUG_LOG(msg)
#define BUILD_HEADER(command, length)
Definition compression.h:13
unsigned int uint
Definition macro.h:4
#define ASSIGN_OR_RETURN(type_variable_name, expression)
Definition macro.h:62
void CheckIntraCopyV3(CompressionContext &context)
absl::StatusOr< std::vector< uint8_t > > CompressGraphics(const uint8_t *data, const int pos, const int length)
absl::StatusOr< std::vector< uint8_t > > CompressV3(const std::vector< uint8_t > &data, const int start, const int length, int mode, bool check)
Compresses a buffer of data using the LC_LZ2 algorithm.
constexpr int kExpandedLengthMod
Definition compression.h:59
constexpr int kCommandDirectCopy
Definition compression.h:46
void memfill(const uint8_t *data, std::vector< uint8_t > &buffer, int buffer_pos, int offset, int length)
void FinalizeCompression(CompressionContext &context)
void CheckWordRepeatV2(const uint8_t *data, uint &src_pos, const unsigned int last_pos, CompressionCommand &cmd)
void InitializeCompression(CompressionContext &context)
absl::StatusOr< std::vector< uint8_t > > DecompressV2(const uint8_t *data, int offset, int size, int mode, size_t rom_size)
Decompresses a buffer of data using the LC_LZ2 algorithm.
struct CompressionPiece CompressionPiece
Definition compression.h:90
absl::StatusOr< std::vector< uint8_t > > CompressOverworld(const uint8_t *data, const int pos, const int length)
constexpr int kSnesByteMax
Definition compression.h:56
constexpr int kNintendoMode1
Definition compression.h:54
void AddAlternativeCompressionCommand(const uint8_t *rom_data, CompressionPiecePointer &compressed_chain, const CompressionCommand &command, uint &source_data_position, uint &uncompressed_data_size, uint &best_command, uint &best_command_gain)
std::shared_ptr< CompressionPiece > CompressionPiecePointer
Definition compression.h:91
void CheckIncByteV3(CompressionContext &context)
absl::Status ValidateCompressionResultV3(const CompressionContext &context)
constexpr int kNintendoMode2
Definition compression.h:55
std::string SetBuffer(const uint8_t *data, int src_pos, int comp_accumulator)
constexpr int kCommandRepeatingBytes
Definition compression.h:50
std::array< std::array< char, 2 >, 5 > CommandArgumentArray
Definition compression.h:75
void CheckByteRepeat(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
void CheckByteRepeatV2(const uint8_t *data, uint &src_pos, const unsigned int last_pos, CompressionCommand &cmd)
void HandleDirectCopy(CompressionContext &context)
void CheckIncByteV2(const uint8_t *rom_data, uint &src_data_pos, const unsigned int last_pos, CompressionCommand &cmd)
absl::StatusOr< std::vector< uint8_t > > DecompressGraphics(const uint8_t *data, int pos, int size)
constexpr int kCommandMod
Definition compression.h:57
void PrintCompressionPiece(const CompressionPiecePointer &piece)
constexpr int kCommandWordFill
Definition compression.h:48
constexpr int kMaxLengthNormalHeader
Definition compression.h:52
void CheckAvailableCompressionCommands(CompressionContext &context)
constexpr int kMaxLengthCompression
Definition compression.h:53
void CompressionCommandAlternativeV2(const uint8_t *rom_data, const CompressionCommand &cmd, CompressionPiecePointer &compressed_chain, uint &src_data_pos, uint &comp_accumulator, uint &cmd_with_max, uint &max_win)
void CheckIntraCopyV2(const uint8_t *rom_data, uint &src_data_pos, const unsigned int last_pos, unsigned int start, CompressionCommand &cmd)
std::vector< uint8_t > CreateCompressionString(CompressionPiecePointer &start, int mode)
absl::StatusOr< std::vector< uint8_t > > DecompressOverworld(const uint8_t *data, int pos, int size)
std::array< uint, 5 > DataSizeArray
Definition compression.h:77
constexpr int kCommandIncreasingFill
Definition compression.h:49
void ValidateForByteGainV2(const CompressionCommand &cmd, uint &max_win, uint &cmd_with_max)
void ValidateForByteGain(const DataSizeArray &data_size_taken, const CommandSizeArray &cmd_size, uint &max_win, uint &cmd_with_max)
constexpr int kCommandByteFill
Definition compression.h:47
void PrintCompressionChain(const CompressionPiecePointer &chain_head)
void CheckIncByte(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
constexpr int kExpandedMod
Definition compression.h:58
const std::array< int, 5 > kCommandSizes
void AddCompressionToChain(CompressionContext &context)
absl::StatusOr< CompressionPiecePointer > SplitCompressionPiece(CompressionPiecePointer &piece, int mode)
void CompressionCommandAlternative(const uint8_t *rom_data, CompressionPiecePointer &compressed_chain, const CommandSizeArray &cmd_size, const CommandArgumentArray &cmd_args, uint &src_data_pos, uint &comp_accumulator, uint &cmd_with_max, uint &max_win)
constexpr int kNormalLengthMod
Definition compression.h:60
constexpr int kCompressionStringMod
Definition compression.h:61
void CheckWordRepeat(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
void CheckByteRepeatV3(CompressionContext &context)
CompressionPiecePointer MergeCopy(CompressionPiecePointer &start)
std::array< uint, 5 > CommandSizeArray
Definition compression.h:76
void DetermineBestCompression(CompressionContext &context)
absl::StatusOr< CompressionPiece > SplitCompressionPieceV3(CompressionPiece &piece, int mode)
absl::StatusOr< std::vector< uint8_t > > CompressV2(const uint8_t *data, const int start, const int length, int mode, bool check)
Compresses a buffer of data using the LC_LZ2 algorithm.
void CheckWordRepeatV3(CompressionContext &context)
void CheckIntraCopy(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos, unsigned int start)
absl::Status ValidateCompressionResult(CompressionPiecePointer &chain_head, int mode, int start, int src_data_pos)
std::vector< uint8_t > HyruleMagicDecompress(uint8_t const *src, int *const size, int const p_big_endian, size_t max_src_size)
std::vector< uint8_t > HyruleMagicCompress(uint8_t const *const src, int const oldsize, int *const size, int const flag)
uint16_t ldle16b(uint8_t const *const p_arr)
void stle16b(uint8_t *const p_arr, uint16_t const p_val)
#define RETURN_IF_ERROR(expr)
Definition snes.cc:22
std::array< std::array< char, 2 >, 5 > arguments
Definition compression.h:66
std::vector< CompressionPiece > compression_pieces
std::vector< uint8_t > compression_string
std::vector< uint8_t > compressed_data