yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
compression.cc
Go to the documentation of this file.
1#include "compression.h"
2
3#include <climits>
4#include <iostream>
5#include <memory>
6#include <string>
7
8#include "absl/status/status.h"
9#include "absl/status/statusor.h"
10#include "rom/rom.h"
11#include "util/hyrule_magic.h"
12#include "util/macro.h"
13
14#define DEBUG_LOG(msg) std::cout << msg << std::endl
15
16namespace yaze {
17namespace gfx {
18
19std::vector<uint8_t> HyruleMagicCompress(uint8_t const* const src,
20 int const oldsize, int* const size,
21 int const flag) {
22 unsigned char* b2 =
23 (unsigned char*)malloc(0x1000); // allocate a 2^12 sized buffer
24
25 int i, j, k, l, m = 0, n, o = 0, bd = 0, p, q = 0, r;
26
27 for (i = 0; i < oldsize;) {
28 l = src[i]; // grab a char from the buffer.
29
30 k = 0;
31
32 r = !!q; // r = the same logical value (0 or 1) as q, but not the same
33 // value necesarily.
34
35 for (j = 0; j < i - 1; j++) {
36 if (src[j] == l) {
37 m = oldsize - j;
38
39 for (n = 0; n < m; n++)
40 if (src[n + j] != src[n + i])
41 break;
42
43 if (n > k)
44 k = n, o = j;
45 }
46 }
47
48 for (n = i + 1; n < oldsize; n++) {
49 if (src[n] != l) {
50 // look for chars identical to the first one.
51 // stop if we can't find one.
52 // n will reach i+k+1 for some k >= 0.
53
54 break;
55 }
56 }
57
58 n -= i; // offset back by i. i.e. n = k+1 as above.
59
60 if (n > 1 + r)
61 p = 1;
62 else {
63 m = src[i + 1];
64
65 for (n = i + 2; n < oldsize; n++) {
66 if (src[n] != l)
67 break;
68
69 n++;
70
71 if (src[n] != m)
72 break;
73 }
74
75 n -= i;
76
77 if (n > 2 + r)
78 p = 2;
79 else {
80 m = oldsize - i;
81
82 for (n = 1; n < m; n++)
83 if (src[i + n] != l + n)
84 break;
85
86 if (n > 1 + r)
87 p = 3;
88 else
89 p = 0;
90 }
91 }
92
93 if (k > 3 + r && k > n + (p & 1))
94 p = 4, n = k;
95
96 if (!p)
97 q++, i++;
98 else {
99 if (q) {
100 q--;
101
102 if (q > 31) {
103 b2[bd++] = (unsigned char)(224 + (q >> 8));
104 }
105
106 b2[bd++] = (unsigned char)q;
107 q++;
108
109 memcpy(b2 + bd, src + i - q, q);
110
111 bd += q;
112 q = 0;
113 }
114
115 i += n;
116 n--;
117
118 if (n > 31) {
119 b2[bd++] = (unsigned char)(224 + (n >> 8) + (p << 2));
120 b2[bd++] = (unsigned char)n;
121 } else
122 b2[bd++] = (unsigned char)((p << 5) + n);
123
124 switch (p) {
125 case 1:
126 case 3:
127 b2[bd++] = (unsigned char)l;
128 break;
129
130 case 2:
131 b2[bd++] = (unsigned char)l;
132 b2[bd++] = (unsigned char)m;
133
134 break;
135
136 case 4:
137 if (flag) {
138 b2[bd++] = (unsigned char)(o >> 8);
139 b2[bd++] = (unsigned char)o;
140 } else {
141 b2[bd++] = (unsigned char)o;
142 b2[bd++] = (unsigned char)(o >> 8);
143 }
144 }
145
146 continue;
147 }
148 }
149
150 if (q) {
151 q--;
152
153 if (q > 31) {
154 b2[bd++] = (unsigned char)(224 + (q >> 8));
155 }
156
157 b2[bd++] = (unsigned char)q;
158 q++;
159
160 memcpy(b2 + bd, src + i - q, q);
161
162 bd += q;
163 }
164
165 b2[bd++] = 255;
166 b2 = (unsigned char*)realloc(b2, bd);
167 *size = bd;
168
169 std::vector<uint8_t> compressed_data(b2, b2 + bd);
170 free(b2);
171 return compressed_data;
172}
173
174std::vector<uint8_t> HyruleMagicDecompress(uint8_t const* src, int* const size,
175 int const p_big_endian,
176 size_t max_src_size) {
177 unsigned char* b2 = (unsigned char*)malloc(1024);
178 const uint8_t* const src_start = src;
179
180 int bd = 0, bs = 1024;
181
182 unsigned char a;
183 unsigned char b;
184 unsigned short c, d;
185
186 for (;;) {
187 // Bounds check: Ensure we can read at least one byte (command)
188 if (max_src_size != static_cast<size_t>(-1) &&
189 (size_t)(src - src_start) >= max_src_size) {
190 std::cerr << "HyruleMagicDecompress: Reached end of buffer unexpectedly."
191 << std::endl;
192 break;
193 }
194
195 // retrieve a uint8_t from the buffer.
196 a = *(src++);
197
198 // end the decompression routine if we encounter 0xff.
199 if (a == 0xff)
200 break;
201
202 // examine the top 3 bits of a.
203 b = (a >> 5);
204
205 if (b == 7) // i.e. 0b 111
206 {
207 // get bits 0b 0001 1100
208 b = ((a >> 2) & 7);
209
210 // get bits 0b 0000 0011, multiply by 256, OR with the next byte.
211 c = ((a & 0x0003) << 8);
212
213 // Bounds check: Ensure we can read the next byte
214 if (max_src_size != static_cast<size_t>(-1) &&
215 (size_t)(src - src_start) >= max_src_size) {
216 std::cerr
217 << "HyruleMagicDecompress: Reached end of buffer reading extended len"
218 << std::endl;
219 break;
220 }
221 c |= *(src++);
222 } else
223 // or get bits 0b 0001 1111
224 c = (uint16_t)(a & 31);
225
226 c++;
227
228 if ((bd + c) > (bs - 512)) {
229 // need to increase the buffer size.
230 bs += 1024;
231 // Safety check for excessive allocation
232 if (bs > 1024 * 1024 * 16) { // 16MB limit
233 std::cerr << "HyruleMagicDecompress: Excessive allocation detected." << std::endl;
234 free(b2);
235 return std::vector<uint8_t>();
236 }
237 unsigned char* new_b2 = (unsigned char*)realloc(b2, bs);
238 if (!new_b2) {
239 free(b2);
240 return std::vector<uint8_t>();
241 }
242 b2 = new_b2;
243 }
244
245 // 7 was handled, here we handle other decompression codes.
246
247 switch (b) {
248 case 0: // 0b 000
249
250 // raw copy
251
252 // Bounds check: Ensure we can read c bytes
253 if (max_src_size != static_cast<size_t>(-1) &&
254 (size_t)(src - src_start + c) > max_src_size) {
255 std::cerr << "HyruleMagicDecompress: Raw copy exceeds buffer."
256 << std::endl;
257 goto end_decompression;
258 }
259
260 // copy info from the src buffer to our new buffer,
261 // at offset bd (which we'll be increasing;
262 memcpy(b2 + bd, src, c);
263
264 // increment the src pointer accordingly.
265 src += c;
266
267 bd += c;
268
269 break;
270
271 case 1: // 0b 001
272
273 // rle copy
274
275 // Bounds check: Ensure we can read 1 byte
276 if (max_src_size != static_cast<size_t>(-1) &&
277 (size_t)(src - src_start) >= max_src_size) {
278 std::cerr << "HyruleMagicDecompress: RLE copy exceeds buffer."
279 << std::endl;
280 goto end_decompression;
281 }
282
283 // make c duplicates of one byte, inc the src pointer.
284 memset(b2 + bd, *(src++), c);
285
286 // increase the b2 offset.
287 bd += c;
288
289 break;
290
291 case 2: // 0b 010
292
293 // rle 16-bit alternating copy
294
295 // Bounds check: Ensure we can read 2 bytes
296 if (max_src_size != static_cast<size_t>(-1) &&
297 (size_t)(src - src_start + 2) > max_src_size) {
298 std::cerr << "HyruleMagicDecompress: RLE 16-bit copy exceeds buffer."
299 << std::endl;
300 goto end_decompression;
301 }
302
303 d = zelda3::ldle16b(src);
304
305 src += 2;
306
307 while (c > 1) {
308 // copy that 16-bit number c/2 times into the b2 buffer.
309 zelda3::stle16b(b2 + bd, d);
310
311 bd += 2;
312 c -= 2; // hence c/2
313 }
314
315 if (c) // if there's a remainder of c/2, this handles it.
316 b2[bd++] = (char)d;
317
318 break;
319
320 case 3: // 0b 011
321
322 // incrementing copy
323
324 // Bounds check: Ensure we can read 1 byte
325 if (max_src_size != static_cast<size_t>(-1) &&
326 (size_t)(src - src_start) >= max_src_size) {
327 std::cerr << "HyruleMagicDecompress: Inc copy exceeds buffer."
328 << std::endl;
329 goto end_decompression;
330 }
331
332 // get the current src byte.
333 a = *(src++);
334
335 while (c--) {
336 // increment that byte and copy to b2 in c iterations.
337 // e.g. a = 4, b2 will have 4,5,6,7,8... written to it.
338 b2[bd++] = a++;
339 }
340
341 break;
342
343 default: // 0b 100, 101, 110
344
345 // lz copy
346
347 // Bounds check: Ensure we can read 2 bytes
348 if (max_src_size != static_cast<size_t>(-1) &&
349 (size_t)(src - src_start + 2) > max_src_size) {
350 std::cerr << "HyruleMagicDecompress: LZ copy exceeds buffer."
351 << std::endl;
352 goto end_decompression;
353 }
354
355 if (p_big_endian) {
356 d = (*src << 8) + src[1];
357 } else {
358 d = zelda3::ldle16b(src);
359 }
360
361 // Safety check for LZ back-reference
362 if (d >= bd) {
363 // This is technically allowed in some LZ variants (referencing future bytes that are 0 initialized)
364 // but usually indicates corruption in this context if d is way out.
365 // However, standard LZ references previous data.
366 // If d is an absolute offset in the buffer, it must be < bd?
367 // Wait, b2[d++] implies d is an index into b2.
368 // If d >= bd, we are reading uninitialized data or data we haven't written yet.
369 // But if it's a sliding window, d could be relative?
370 // The code says `b2[d++]`. This looks like absolute offset in output buffer.
371 // If d > bd, it's definitely suspicious, but maybe valid if it wraps?
372 // But this implementation reallocs b2, so it's a linear buffer.
373 // Let's just check if d + c > bs (buffer size) or something?
374 // Actually, if d points to garbage, we just copy garbage.
375 // But if d exceeds allocated memory, we crash.
376 // We realloc b2 to `bs`. So we must ensure `d + c <= bs`?
377 // No, `d` increments.
378 // We need to ensure `d` stays within valid `b2` range.
379 // Since we are writing to `bd`, `d` should probably be < `bd` usually.
380 // But let's just ensure `d < bs`.
381 }
382
383 while (c--) {
384 // copy from a different location in the buffer.
385 if (d >= bs) {
386 // Expand buffer if needed? Or just clamp?
387 // If we are reading past buffer size, it's bad.
388 // But we might have realloced b2 to be larger than bd.
389 // So d < bs is the hard limit.
390 if (d >= bs) {
391 std::cerr << "HyruleMagicDecompress: LZ ref out of bounds." << std::endl;
392 goto end_decompression;
393 }
394 }
395 b2[bd++] = b2[d++];
396 }
397
398 src += 2;
399 }
400 }
401
402end_decompression:
403 b2 = (unsigned char*)realloc(b2, bd);
404
405 if (size)
406 (*size) = bd;
407
408 // return the unsigned char* buffer b2, which contains the uncompressed data.
409 std::vector<uint8_t> decompressed_data(b2, b2 + bd);
410 free(b2);
411 return decompressed_data;
412}
413
414namespace lc_lz2 {
415
417 std::cout << "Command: " << std::to_string(piece->command) << "\n";
418 std::cout << "Command Length: " << piece->length << "\n";
419 std::cout << "Argument: ";
420 auto arg_size = piece->argument.size();
421 for (int i = 0; i < arg_size; ++i) {
422 printf("%02X ", piece->argument.at(i));
423 }
424 std::cout << "\nArgument Length: " << piece->argument_length << "\n";
425}
426
428 auto compressed_chain = chain_head->next;
429 while (compressed_chain != nullptr) {
430 std::cout << "- Compression Piece -\n";
431 PrintCompressionPiece(compressed_chain);
432 compressed_chain = compressed_chain->next;
433 }
434}
435
436void CheckByteRepeat(const uint8_t* rom_data, DataSizeArray& data_size_taken,
437 CommandArgumentArray& cmd_args, uint& src_data_pos,
438 const unsigned int last_pos) {
439 unsigned int pos = src_data_pos;
440 char byte_to_repeat = rom_data[pos];
441 while (pos <= last_pos && rom_data[pos] == byte_to_repeat) {
442 data_size_taken[kCommandByteFill]++;
443 pos++;
444 }
445 cmd_args[kCommandByteFill][0] = byte_to_repeat;
446}
447
448void CheckWordRepeat(const uint8_t* rom_data, DataSizeArray& data_size_taken,
449 CommandArgumentArray& cmd_args, uint& src_data_pos,
450 const unsigned int last_pos) {
451 if (src_data_pos + 2 <= last_pos &&
452 rom_data[src_data_pos] != rom_data[src_data_pos + 1]) {
453 unsigned int pos = src_data_pos;
454 char byte1 = rom_data[pos];
455 char byte2 = rom_data[pos + 1];
456 pos += 2;
457 data_size_taken[kCommandWordFill] = 2;
458 while (pos + 1 <= last_pos) {
459 if (rom_data[pos] == byte1 && rom_data[pos + 1] == byte2)
460 data_size_taken[kCommandWordFill] += 2;
461 else
462 break;
463 pos += 2;
464 }
465 cmd_args[kCommandWordFill][0] = byte1;
466 cmd_args[kCommandWordFill][1] = byte2;
467 }
468}
469
470void CheckIncByte(const uint8_t* rom_data, DataSizeArray& data_size_taken,
471 CommandArgumentArray& cmd_args, uint& src_data_pos,
472 const unsigned int last_pos) {
473 unsigned int pos = src_data_pos;
474 char byte = rom_data[pos];
475 pos++;
476 data_size_taken[kCommandIncreasingFill] = 1;
477 byte++;
478 while (pos <= last_pos && byte == rom_data[pos]) {
479 data_size_taken[kCommandIncreasingFill]++;
480 byte++;
481 pos++;
482 }
483 cmd_args[kCommandIncreasingFill][0] = rom_data[src_data_pos];
484}
485
486void CheckIntraCopy(const uint8_t* rom_data, DataSizeArray& data_size_taken,
487 CommandArgumentArray& cmd_args, uint& src_data_pos,
488 const unsigned int last_pos, unsigned int start) {
489 if (src_data_pos != start) {
490 unsigned int searching_pos = start;
491 unsigned int current_pos_u = src_data_pos;
492 unsigned int copied_size = 0;
493 unsigned int search_start = start;
494
495 while (searching_pos < src_data_pos && current_pos_u <= last_pos) {
496 while (rom_data[current_pos_u] != rom_data[searching_pos] &&
497 searching_pos < src_data_pos)
498 searching_pos++;
499 search_start = searching_pos;
500 while (current_pos_u <= last_pos &&
501 rom_data[current_pos_u] == rom_data[searching_pos] &&
502 searching_pos < src_data_pos) {
503 copied_size++;
504 current_pos_u++;
505 searching_pos++;
506 }
507 if (copied_size > data_size_taken[kCommandRepeatingBytes]) {
508 search_start -= start;
509 printf("- Found repeat of %d at %d\n", copied_size, search_start);
510 data_size_taken[kCommandRepeatingBytes] = copied_size;
511 cmd_args[kCommandRepeatingBytes][0] = search_start & kSnesByteMax;
512 cmd_args[kCommandRepeatingBytes][1] = search_start >> 8;
513 }
514 current_pos_u = src_data_pos;
515 copied_size = 0;
516 }
517 }
518}
519
520// Check if a command managed to pick up `max_win` or more bytes
521// Avoids being even with copy command, since it's possible to merge copy
522void ValidateForByteGain(const DataSizeArray& data_size_taken,
523 const CommandSizeArray& cmd_size, uint& max_win,
524 uint& cmd_with_max) {
525 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
526 unsigned int cmd_size_taken = data_size_taken[cmd_i];
527 // TODO(@scawful): Replace conditional with table of command sizes
528 // "Table that is even with copy but all other cmd are 2"
529 auto table_check =
530 !(cmd_i == kCommandRepeatingBytes && cmd_size_taken == 3);
531 if (cmd_size_taken > max_win && cmd_size_taken > cmd_size[cmd_i] &&
532 table_check) {
533 printf("==> C:%d / S:%d\n", cmd_i, cmd_size_taken);
534 cmd_with_max = cmd_i;
535 max_win = cmd_size_taken;
536 }
537 }
538}
539
540void CompressionCommandAlternative(const uint8_t* rom_data,
541 CompressionPiecePointer& compressed_chain,
542 const CommandSizeArray& cmd_size,
543 const CommandArgumentArray& cmd_args,
544 uint& src_data_pos, uint& comp_accumulator,
545 uint& cmd_with_max, uint& max_win) {
546 printf("- Ok we get a gain from %d\n", cmd_with_max);
547 std::string buffer;
548 buffer.push_back(cmd_args[cmd_with_max][0]);
549 if (cmd_size[cmd_with_max] == 2) {
550 buffer.push_back(cmd_args[cmd_with_max][1]);
551 }
552
553 auto new_comp_piece = std::make_shared<CompressionPiece>(
554 cmd_with_max, max_win, buffer, cmd_size[cmd_with_max]);
555 PrintCompressionPiece(new_comp_piece);
556 // If we let non compressed stuff, we need to add a copy chunk before
557 if (comp_accumulator != 0) {
558 std::string copy_buff;
559 copy_buff.resize(comp_accumulator);
560 for (int i = 0; i < comp_accumulator; ++i) {
561 copy_buff[i] = rom_data[i + src_data_pos - comp_accumulator];
562 }
563 auto copy_chunk = std::make_shared<CompressionPiece>(
564 kCommandDirectCopy, comp_accumulator, copy_buff, comp_accumulator);
565 compressed_chain->next = copy_chunk;
566 compressed_chain = copy_chunk;
567 } else {
568 compressed_chain->next = new_comp_piece;
569 compressed_chain = new_comp_piece;
570 }
571 src_data_pos += max_win;
572 comp_accumulator = 0;
573}
574
575void CheckByteRepeatV2(const uint8_t* data, uint& src_pos,
576 const unsigned int last_pos, CompressionCommand& cmd) {
577 unsigned int i = 0;
578 while (src_pos + i < last_pos && data[src_pos] == data[src_pos + i]) {
579 ++i;
580 }
582 cmd.arguments[kCommandByteFill][0] = data[src_pos];
583}
584
585void CheckWordRepeatV2(const uint8_t* data, uint& src_pos,
586 const unsigned int last_pos, CompressionCommand& cmd) {
587 if (src_pos + 2 <= last_pos && data[src_pos] != data[src_pos + 1]) {
588 unsigned int pos = src_pos;
589 char byte1 = data[pos];
590 char byte2 = data[pos + 1];
591 pos += 2;
593 while (pos + 1 <= last_pos) {
594 if (data[pos] == byte1 && data[pos + 1] == byte2)
595 cmd.data_size[kCommandWordFill] += 2;
596 else
597 break;
598 pos += 2;
599 }
600 cmd.arguments[kCommandWordFill][0] = byte1;
601 cmd.arguments[kCommandWordFill][1] = byte2;
602 }
603}
604
605void CheckIncByteV2(const uint8_t* rom_data, uint& src_data_pos,
606 const unsigned int last_pos, CompressionCommand& cmd) {
607 unsigned int pos = src_data_pos;
608 char byte = rom_data[pos];
609 pos++;
611 byte++;
612 while (pos <= last_pos && byte == rom_data[pos]) {
614 byte++;
615 pos++;
616 }
617 cmd.arguments[kCommandIncreasingFill][0] = rom_data[src_data_pos];
618}
619
620void CheckIntraCopyV2(const uint8_t* rom_data, uint& src_data_pos,
621 const unsigned int last_pos, unsigned int start,
622 CompressionCommand& cmd) {
623 if (src_data_pos != start) {
624 unsigned int searching_pos = start;
625 unsigned int current_pos_u = src_data_pos;
626 unsigned int copied_size = 0;
627 unsigned int search_start = start;
628
629 while (searching_pos < src_data_pos && current_pos_u <= last_pos) {
630 while (rom_data[current_pos_u] != rom_data[searching_pos] &&
631 searching_pos < src_data_pos)
632 searching_pos++;
633 search_start = searching_pos;
634 while (current_pos_u <= last_pos &&
635 rom_data[current_pos_u] == rom_data[searching_pos] &&
636 searching_pos < src_data_pos) {
637 copied_size++;
638 current_pos_u++;
639 searching_pos++;
640 }
641 if (copied_size > cmd.data_size[kCommandRepeatingBytes]) {
642 search_start -= start;
643 printf("- Found repeat of %d at %d\n", copied_size, search_start);
644 cmd.data_size[kCommandRepeatingBytes] = copied_size;
645 cmd.arguments[kCommandRepeatingBytes][0] = search_start & kSnesByteMax;
646 cmd.arguments[kCommandRepeatingBytes][1] = search_start >> 8;
647 }
648 current_pos_u = src_data_pos;
649 copied_size = 0;
650 }
651 }
652}
653
654// Table indicating command sizes, in bytes
655const std::array<int, 5> kCommandSizes = {1, 2, 2, 2, 3};
656
657// TODO(@scawful): TEST ME
659 uint& cmd_with_max) {
660 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
661 unsigned int cmd_size_taken = cmd.data_size[cmd_i];
662 // Check if the command size exceeds the maximum win and the size in the
663 // command sizes table, except for the repeating bytes command when the size
664 // taken is 3
665 if (cmd_size_taken > max_win && cmd_size_taken > kCommandSizes[cmd_i] &&
666 !(cmd_i == kCommandRepeatingBytes && cmd_size_taken == 3)) {
667 printf("==> C:%d / S:%d\n", cmd_i, cmd_size_taken);
668 cmd_with_max = cmd_i;
669 max_win = cmd_size_taken;
670 }
671 }
672}
673
674void CompressionCommandAlternativeV2(const uint8_t* rom_data,
675 const CompressionCommand& cmd,
676 CompressionPiecePointer& compressed_chain,
677 uint& src_data_pos, uint& comp_accumulator,
678 uint& cmd_with_max, uint& max_win) {
679 printf("- Ok we get a gain from %d\n", cmd_with_max);
680 std::string buffer;
681 buffer.push_back(cmd.arguments[cmd_with_max][0]);
682 if (cmd.cmd_size[cmd_with_max] == 2) {
683 buffer.push_back(cmd.arguments[cmd_with_max][1]);
684 }
685
686 auto new_comp_piece = std::make_shared<CompressionPiece>(
687 cmd_with_max, max_win, buffer, cmd.cmd_size[cmd_with_max]);
688 PrintCompressionPiece(new_comp_piece);
689 // If we let non compressed stuff, we need to add a copy chunk before
690 if (comp_accumulator != 0) {
691 std::string copy_buff;
692 copy_buff.resize(comp_accumulator);
693 for (int i = 0; i < comp_accumulator; ++i) {
694 copy_buff[i] = rom_data[i + src_data_pos - comp_accumulator];
695 }
696 auto copy_chunk = std::make_shared<CompressionPiece>(
697 kCommandDirectCopy, comp_accumulator, copy_buff, comp_accumulator);
698 compressed_chain->next = copy_chunk;
699 compressed_chain = copy_chunk;
700 } else {
701 compressed_chain->next = new_comp_piece;
702 compressed_chain = new_comp_piece;
703 }
704 src_data_pos += max_win;
705 comp_accumulator = 0;
706}
707
709 const uint8_t* rom_data, CompressionPiecePointer& compressed_chain,
710 const CompressionCommand& command, uint& source_data_position,
711 uint& uncompressed_data_size, uint& best_command, uint& best_command_gain) {
712 std::cout << "- Identified a gain from command: " << best_command
713 << std::endl;
714
715 // Create a buffer to store the arguments for the best command.
716 std::string argument_buffer;
717 argument_buffer.push_back(command.arguments[best_command][0]);
718 if (command.cmd_size[best_command] == 2) {
719 argument_buffer.push_back(command.arguments[best_command][1]);
720 }
721
722 // Create a new compression piece for the best command.
723 auto new_compression_piece = std::make_shared<CompressionPiece>(
724 best_command, best_command_gain, argument_buffer,
725 command.cmd_size[best_command]);
726 PrintCompressionPiece(new_compression_piece);
727
728 // If there is uncompressed data, create a direct copy compression piece for
729 // it.
730 if (uncompressed_data_size != 0) {
731 std::string copy_buffer(uncompressed_data_size, 0);
732 for (int i = 0; i < uncompressed_data_size; ++i) {
733 copy_buffer[i] =
734 rom_data[i + source_data_position - uncompressed_data_size];
735 }
736 auto direct_copy_piece = std::make_shared<CompressionPiece>(
737 kCommandDirectCopy, uncompressed_data_size, copy_buffer,
738 uncompressed_data_size);
739
740 // Append the direct copy piece to the chain.
741 compressed_chain->next = direct_copy_piece;
742 compressed_chain = direct_copy_piece;
743 }
744
745 // Append the new compression piece to the chain.
746 compressed_chain->next = new_compression_piece;
747 compressed_chain = new_compression_piece;
748
749 // Update the position in the source data and reset the uncompressed data
750 // size.
751 source_data_position += best_command_gain;
752 uncompressed_data_size = 0;
753}
754
755absl::StatusOr<CompressionPiecePointer> SplitCompressionPiece(
756 CompressionPiecePointer& piece, int mode) {
757 CompressionPiecePointer new_piece;
758 unsigned int length_left = piece->length - kMaxLengthCompression;
759 piece->length = kMaxLengthCompression;
760
761 switch (piece->command) {
762 case kCommandByteFill:
763 case kCommandWordFill:
764 new_piece = std::make_shared<CompressionPiece>(
765 piece->command, length_left, piece->argument, piece->argument_length);
766 break;
768 new_piece = std::make_shared<CompressionPiece>(
769 piece->command, length_left, piece->argument, piece->argument_length);
770 new_piece->argument[0] =
771 (char)(piece->argument[0] + kMaxLengthCompression);
772 break;
773 case kCommandDirectCopy: {
774 std::string empty;
775 piece->argument_length = kMaxLengthCompression;
776 new_piece = std::make_shared<CompressionPiece>(
777 piece->command, length_left, empty, length_left);
778 // MEMCPY
779 for (int i = 0; i < length_left; ++i) {
780 new_piece->argument[i] = piece->argument[i + kMaxLengthCompression];
781 }
782 break;
783 }
785 piece->argument_length = kMaxLengthCompression;
786 unsigned int offset = piece->argument[0] + (piece->argument[1] << 8);
787 new_piece = std::make_shared<CompressionPiece>(
788 piece->command, length_left, piece->argument, piece->argument_length);
789 if (mode == kNintendoMode2) {
790 new_piece->argument[0] =
792 new_piece->argument[1] = (offset + kMaxLengthCompression) >> 8;
793 }
794 if (mode == kNintendoMode1) {
795 new_piece->argument[1] =
797 new_piece->argument[0] = (offset + kMaxLengthCompression) >> 8;
798 }
799 } break;
800 default: {
801 return absl::InvalidArgumentError(
802 "SplitCompressionCommand: Invalid Command");
803 }
804 }
805 return new_piece;
806}
807
809 int mode) {
810 unsigned int pos = 0;
811 auto piece = start;
812 std::vector<uint8_t> output;
813
814 while (piece != nullptr) {
815 if (piece->length <= kMaxLengthNormalHeader) { // Normal header
816 output.push_back(BUILD_HEADER(piece->command, piece->length));
817 pos++;
818 } else {
819 if (piece->length <= kMaxLengthCompression) {
820 output.push_back(kCompressionStringMod |
821 ((uint8_t)piece->command << 2) |
822 (((piece->length - 1) & 0xFF00) >> 8));
823 pos++;
824 printf("Building extended header : cmd: %d, length: %d - %02X\n",
825 piece->command, piece->length, output[pos - 1]);
826 output.push_back(((piece->length - 1) & 0x00FF)); // (char)
827 pos++;
828 } else {
829 // We need to split the command
830 auto new_piece = SplitCompressionPiece(piece, mode);
831 if (!new_piece.ok()) {
832 std::cout << new_piece.status().ToString() << std::endl;
833 }
834 printf("New added piece\n");
835 auto piece_data = new_piece.value();
836 PrintCompressionPiece(piece_data);
837 piece_data->next = piece->next;
838 piece->next = piece_data;
839 continue;
840 }
841 }
842
843 if (piece->command == kCommandRepeatingBytes) {
844 char tmp[2];
845 tmp[0] = piece->argument[0];
846 tmp[1] = piece->argument[1];
847 if (mode == kNintendoMode1) {
848 tmp[0] = piece->argument[1];
849 tmp[1] = piece->argument[0];
850 }
851 for (const auto& each : tmp) {
852 output.push_back(each);
853 pos++;
854 }
855 } else {
856 for (int i = 0; i < piece->argument_length; ++i) {
857 output.push_back(piece->argument[i]);
858 pos++;
859 }
860 }
861 pos += piece->argument_length;
862 piece = piece->next;
863 }
864 output.push_back(kSnesByteMax);
865 return output;
866}
867
869 int mode, int start, int src_data_pos) {
870 if (chain_head->next != nullptr) {
871 Rom temp_rom;
873 temp_rom.LoadFromData(CreateCompressionString(chain_head->next, mode)))
874 ASSIGN_OR_RETURN(auto decomp_data,
875 DecompressV2(temp_rom.data(), 0, temp_rom.size(), 1,
876 temp_rom.size()));
877 if (!std::equal(decomp_data.begin() + start, decomp_data.end(),
878 temp_rom.begin())) {
879 return absl::InternalError(absl::StrFormat(
880 "Compressed data does not match uncompressed data at %d\n",
881 (uint)(src_data_pos - start)));
882 }
883 }
884 return absl::OkStatus();
885}
886
887// Merge consecutive copy if possible
889 CompressionPiecePointer piece = start;
890
891 while (piece != nullptr) {
892 if (piece->command == kCommandDirectCopy && piece->next != nullptr &&
893 piece->next->command == kCommandDirectCopy &&
894 piece->length + piece->next->length <= kMaxLengthCompression) {
895 unsigned int previous_length = piece->length;
896 piece->length = piece->length + piece->next->length;
897
898 for (int i = 0; i < piece->next->argument_length; ++i) {
899 piece->argument[i + previous_length] = piece->next->argument[i];
900 }
901 piece->argument_length = piece->length;
903
904 auto p_next_next = piece->next->next;
905 piece->next = p_next_next;
906 continue; // Next could be another copy
907 }
908 piece = piece->next;
909 }
910 return start;
911}
912
913absl::StatusOr<std::vector<uint8_t>> CompressV2(const uint8_t* data,
914 const int start,
915 const int length, int mode,
916 bool check) {
917 // Surely there's no need to compress zero...
918 if (length == 0) {
919 return std::vector<uint8_t>();
920 }
921
922 // Worst case should be a copy of the string with extended header
923 std::string worst_case = "aaa";
924 auto compressed_chain =
925 std::make_shared<CompressionPiece>(1, 1, worst_case, 2);
926 auto compressed_chain_start = compressed_chain;
927
928 CompressionCommand current_cmd = {/*argument*/ {{}},
929 /*cmd_size*/ {0, 1, 2, 1, 2},
930 /*data_size*/ {0, 0, 0, 0, 0}};
931
932 unsigned int src_pos = start;
933 unsigned int last_pos = start + length - 1;
934 unsigned int comp_accumulator = 0; // Used when skipping using copy
935
936 while (true) {
937 current_cmd.data_size.fill({});
938 current_cmd.arguments.fill({{}});
939
940 CheckByteRepeatV2(data, src_pos, last_pos, current_cmd);
941 CheckWordRepeatV2(data, src_pos, last_pos, current_cmd);
942 CheckIncByteV2(data, src_pos, last_pos, current_cmd);
943 CheckIntraCopyV2(data, src_pos, last_pos, start, current_cmd);
944
945 unsigned int max_win = 2;
946 unsigned int cmd_with_max = kCommandDirectCopy;
947 ValidateForByteGain(current_cmd.data_size, current_cmd.cmd_size, max_win,
948 cmd_with_max);
949 // ValidateForByteGainV2(current_cmd, max_win, cmd_with_max);
950
951 if (cmd_with_max == kCommandDirectCopy) {
952 // This is the worst case scenario
953 // Progress through the next byte, in case there's a different
954 // compression command we can implement before we hit 32 bytes.
955 src_pos++;
956 comp_accumulator++;
957
958 // Arbitrary choice to do a 32 bytes grouping for copy.
959 if (comp_accumulator == 32 || src_pos > last_pos) {
960 std::string buffer = SetBuffer(data, src_pos, comp_accumulator);
961 auto new_comp_piece = std::make_shared<CompressionPiece>(
962 kCommandDirectCopy, comp_accumulator, buffer, comp_accumulator);
963 compressed_chain->next = new_comp_piece;
964 comp_accumulator = 0;
965 }
966 } else {
967 AddAlternativeCompressionCommand(data, compressed_chain, current_cmd,
968 src_pos, comp_accumulator, cmd_with_max,
969 max_win);
970 }
971
972 if (src_pos > last_pos) {
973 printf("Breaking compression loop\n");
974 break;
975 }
976
977 if (check) {
978 RETURN_IF_ERROR(ValidateCompressionResult(compressed_chain_start, mode,
979 start, src_pos))
980 }
981 }
982
983 // Skipping compression chain header
984 MergeCopy(compressed_chain_start->next);
985 PrintCompressionChain(compressed_chain_start);
986 return CreateCompressionString(compressed_chain_start->next, mode);
987}
988
989absl::StatusOr<std::vector<uint8_t>> CompressGraphics(const uint8_t* data,
990 const int pos,
991 const int length) {
992 return CompressV2(data, pos, length, kNintendoMode2);
993}
994
995absl::StatusOr<std::vector<uint8_t>> CompressOverworld(const uint8_t* data,
996 const int pos,
997 const int length) {
998 return CompressV2(data, pos, length, kNintendoMode1);
999}
1000
1001absl::StatusOr<std::vector<uint8_t>> CompressOverworld(
1002 const std::vector<uint8_t> data, const int pos, const int length) {
1003 return CompressV3(data, pos, length, kNintendoMode1);
1004}
1005
1007 unsigned int pos = context.src_pos;
1008
1009 // Ensure the sequence does not start with an uncompressable byte
1010 if (pos == 0 || context.data[pos - 1] != context.data[pos]) {
1011 char byte_to_repeat = context.data[pos];
1012 while (pos <= context.last_pos && context.data[pos] == byte_to_repeat) {
1014 pos++;
1015 }
1016
1017 context.current_cmd.arguments[kCommandByteFill][0] = byte_to_repeat;
1018
1019 // Added debug log
1020 DEBUG_LOG("CheckByteRepeatV3: byte_to_repeat = "
1021 << (int)byte_to_repeat << ", size = "
1023 }
1024}
1025
1027 if (context.src_pos + 1 <= context.last_pos) { // Changed the condition here
1028 unsigned int pos = context.src_pos;
1029 char byte1 = context.data[pos];
1030 char byte2 = context.data[pos + 1];
1031 pos += 2;
1033 while (pos + 1 <= context.last_pos) {
1034 if (context.data[pos] == byte1 && context.data[pos + 1] == byte2)
1036 else
1037 break;
1038 pos += 2;
1039 }
1040
1041 context.current_cmd.arguments[kCommandWordFill][0] = byte1;
1042 context.current_cmd.arguments[kCommandWordFill][1] = byte2;
1043 }
1044
1045 DEBUG_LOG("CheckWordRepeatV3: byte1 = "
1046 << (int)context.current_cmd.arguments[kCommandWordFill][0]
1047 << ", byte2 = "
1048 << (int)context.current_cmd.arguments[kCommandWordFill][1]
1049 << ", size = " << context.current_cmd.data_size[kCommandWordFill]);
1050}
1051
1053 unsigned int pos = context.src_pos;
1054 uint8_t byte = context.data[pos];
1055 pos++;
1057 byte++;
1058
1059 while (pos <= context.last_pos && byte == context.data[pos]) {
1061 byte++;
1062 pos++;
1063 }
1064
1065 // Let's see if the sequence is surrounded by identical bytes and if so,
1066 // consider if a direct copy is better.
1067 if (context.current_cmd.data_size[kCommandIncreasingFill] == 3 &&
1068 context.src_pos > 0 && pos < context.data.size() &&
1069 context.data[context.src_pos - 1] == context.data[pos]) {
1071 0; // Reset the size to 0 to prioritize direct copy
1072 return;
1073 }
1074
1076 context.data[context.src_pos];
1077
1078 DEBUG_LOG("CheckIncByteV3: byte = "
1079 << (int)context.current_cmd.arguments[kCommandIncreasingFill][0]
1080 << ", size = "
1082}
1083
1085 const int window_size =
1086 32; // This can be adjusted for optimal performance and results
1087
1088 // We'll only search for repeating sequences if we're not at the very
1089 // beginning
1090 if (context.src_pos > 0 &&
1091 context.src_pos + window_size <= context.data.size()) {
1092 unsigned int max_copied_size = 0;
1093 unsigned int best_search_start = 0;
1094
1095 // Slide the window over the source data
1096 for (int win_pos = 1; win_pos < window_size && win_pos < context.src_pos;
1097 ++win_pos) {
1098 auto start_search_from = context.data.begin() + context.src_pos - win_pos;
1099 auto search_end = context.data.begin() + context.src_pos;
1100
1101 // Use std::search to find the sequence in the window in the previous
1102 // source data
1103 auto found_pos = std::search(
1104 start_search_from, search_end, context.data.begin() + context.src_pos,
1105 context.data.begin() + context.src_pos + win_pos);
1106
1107 if (found_pos != search_end) {
1108 // Check the entire length of the match
1109 unsigned int len = 0;
1110 while (context.src_pos + len < context.data.size() &&
1111 context.data[context.src_pos + len] == *(found_pos + len)) {
1112 len++;
1113 }
1114
1115 if (len > max_copied_size) {
1116 max_copied_size = len;
1117 best_search_start = found_pos - context.data.begin();
1118 }
1119 }
1120 }
1121
1122 if (max_copied_size >
1124 DEBUG_LOG("CheckIntraCopyV3: Detected repeating sequence of length "
1125 << max_copied_size << " starting from " << best_search_start);
1126 context.current_cmd.data_size[kCommandRepeatingBytes] = max_copied_size;
1128 best_search_start & kSnesByteMax;
1130 best_search_start >> 8;
1131 }
1132
1133 DEBUG_LOG("CheckIntraCopyV3: max_copied_size = " << max_copied_size
1134 << ", best_search_start = "
1135 << best_search_start);
1136 }
1137}
1138
1140 // Initialize the current_cmd with default values.
1141 context.current_cmd = {/*argument*/ {{}},
1142 /*cmd_size*/ {0, 1, 2, 1, 2},
1143 /*data_size*/ {0, 0, 0, 0, 0}};
1144}
1145
1147 // Reset the data_size and arguments for a fresh check.
1148 context.current_cmd.data_size.fill({});
1149 context.current_cmd.arguments.fill({{}});
1150
1151 CheckByteRepeatV3(context);
1152 CheckWordRepeatV3(context);
1153 CheckIncByteV3(context);
1154 CheckIntraCopyV3(context);
1155
1156 DEBUG_LOG("CheckAvailableCompressionCommands: src_pos = " << context.src_pos);
1157}
1158
1160 int max_net_savings = -1; // Adjusted the bias to consider any savings
1161
1162 // Start with the default scenario.
1164
1165 for (unsigned int cmd_i = 1; cmd_i < 5; cmd_i++) {
1166 unsigned int cmd_size_taken = context.current_cmd.data_size[cmd_i];
1167 int net_savings = cmd_size_taken - context.current_cmd.cmd_size[cmd_i];
1168
1169 // Skip commands that aren't efficient.
1170 if (cmd_size_taken <= 2 && cmd_i != kCommandDirectCopy) {
1171 continue;
1172 }
1173
1174 // Check surrounding data for optimization.
1175 if (context.src_pos > 0 &&
1176 context.src_pos + cmd_size_taken < context.data.size()) {
1177 char prev_byte = context.data[context.src_pos - 1];
1178 char next_byte = context.data[context.src_pos + cmd_size_taken];
1179 if (prev_byte != next_byte && cmd_size_taken == 3) {
1180 continue;
1181 }
1182 }
1183
1184 // Check if the current command offers more net savings.
1185 if (net_savings > max_net_savings) {
1186 context.cmd_with_max = cmd_i;
1187 max_net_savings = net_savings;
1188 }
1189 }
1190
1191 DEBUG_LOG("DetermineBestCompression: cmd_with_max = "
1192 << context.cmd_with_max << ", data_size = "
1193 << context.current_cmd.data_size[context.cmd_with_max]);
1194}
1195
1197 // If the next best compression method isn't direct copy and we have bytes
1198 // accumulated for direct copy, flush them out.
1199 if (context.cmd_with_max != kCommandDirectCopy &&
1200 context.comp_accumulator > 0) {
1201 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1202 context.compressed_data.push_back(header);
1203 std::vector<uint8_t> uncompressed_data(
1204 context.data.begin() + context.src_pos - context.comp_accumulator,
1205 context.data.begin() + context.src_pos);
1206 context.compressed_data.insert(context.compressed_data.end(),
1207 uncompressed_data.begin(),
1208 uncompressed_data.end());
1209 context.comp_accumulator = 0;
1210 return;
1211 }
1212
1213 // If the next best compression method is not direct copy and we haven't
1214 // accumulated any bytes, treat it as a single byte direct copy.
1215 if (context.cmd_with_max != kCommandDirectCopy &&
1216 context.comp_accumulator == 0) {
1217 context.compressed_data.push_back(
1218 0x00); // Command for a single byte direct copy
1219 context.compressed_data.push_back(
1220 context.data[context.src_pos]); // The single byte
1221 context.src_pos++;
1222 return;
1223 }
1224
1225 // If we reach here, accumulate bytes for a direct copy.
1226 context.src_pos++;
1227 context.comp_accumulator++;
1228
1229 // If we've accumulated the maximum bytes for a direct copy command or
1230 // reached the end, flush them.
1231 if (context.comp_accumulator >= 32 || context.src_pos > context.last_pos) {
1232 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1233 context.compressed_data.push_back(header);
1234 std::vector<uint8_t> uncompressed_data(
1235 context.data.begin() + context.src_pos - context.comp_accumulator,
1236 context.data.begin() + context.src_pos);
1237 context.compressed_data.insert(context.compressed_data.end(),
1238 uncompressed_data.begin(),
1239 uncompressed_data.end());
1240 context.comp_accumulator = 0;
1241 }
1242
1243 DEBUG_LOG("HandleDirectCopy: src_pos = " << context.src_pos
1244 << ", compressed_data size = "
1245 << context.compressed_data.size());
1246}
1247
1249 DEBUG_LOG("AddCompressionToChain: Adding command arguments: ");
1250
1251 // If there's uncompressed data, add a copy chunk before the compression
1252 // command
1253 if (context.comp_accumulator != 0) {
1254 uint8_t header = BUILD_HEADER(kCommandDirectCopy, context.comp_accumulator);
1255 context.compressed_data.push_back(header);
1256 std::vector<uint8_t> uncompressed_data(
1257 context.data.begin() + context.src_pos - context.comp_accumulator,
1258 context.data.begin() + context.src_pos);
1259 context.compressed_data.insert(context.compressed_data.end(),
1260 uncompressed_data.begin(),
1261 uncompressed_data.end());
1262 context.comp_accumulator = 0;
1263 }
1264
1265 // Now, add the compression command
1266 uint8_t header =
1267 BUILD_HEADER(context.cmd_with_max,
1268 context.current_cmd.data_size[context.cmd_with_max]);
1269 context.compressed_data.push_back(header);
1270
1271 DEBUG_LOG("AddCompressionToChain: (Before) src_pos = "
1272 << context.src_pos
1273 << ", compressed_data size = " << context.compressed_data.size());
1274
1275 // Add the command arguments to the compressed_data vector
1276 context.compressed_data.push_back(
1277 context.current_cmd.arguments[context.cmd_with_max][0]);
1278 if (context.current_cmd.cmd_size[context.cmd_with_max] == 2) {
1279 context.compressed_data.push_back(
1280 context.current_cmd.arguments[context.cmd_with_max][1]);
1281 }
1282
1283 context.src_pos += context.current_cmd.data_size[context.cmd_with_max];
1284 context.comp_accumulator = 0;
1285
1286 DEBUG_LOG("AddCompressionToChain: (After) src_pos = "
1287 << context.src_pos
1288 << ", compressed_data size = " << context.compressed_data.size());
1289}
1290
1292 if (!context.compressed_data.empty()) {
1293 Rom temp_rom;
1294 RETURN_IF_ERROR(temp_rom.LoadFromData(context.compressed_data));
1295 ASSIGN_OR_RETURN(auto decomp_data,
1296 DecompressV2(temp_rom.data(), 0, temp_rom.size(), 1,
1297 temp_rom.size()));
1298
1299 if (!std::equal(decomp_data.begin() + context.start, decomp_data.end(),
1300 temp_rom.begin())) {
1301 return absl::InternalError(absl::StrFormat(
1302 "Compressed data does not match uncompressed data at %d\n",
1303 (context.src_pos - context.start)));
1304 }
1305 }
1306 return absl::OkStatus();
1307}
1308
1309absl::StatusOr<CompressionPiece> SplitCompressionPieceV3(
1310 CompressionPiece& piece, int mode) {
1311 CompressionPiece new_piece;
1312 unsigned int length_left = piece.length - kMaxLengthCompression;
1314
1315 switch (piece.command) {
1316 case kCommandByteFill:
1317 case kCommandWordFill:
1318 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1319 piece.argument_length);
1320 break;
1322 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1323 piece.argument_length);
1324 new_piece.argument[0] = (char)(piece.argument[0] + kMaxLengthCompression);
1325 break;
1326 case kCommandDirectCopy: {
1328 std::string empty_string = "";
1329 new_piece = CompressionPiece(piece.command, length_left, empty_string,
1330 length_left);
1331 // MEMCPY
1332 for (int i = 0; i < length_left; ++i) {
1333 new_piece.argument[i] = piece.argument[i + kMaxLengthCompression];
1334 }
1335 break;
1336 }
1339 unsigned int offset = piece.argument[0] + (piece.argument[1] << 8);
1340 new_piece = CompressionPiece(piece.command, length_left, piece.argument,
1341 piece.argument_length);
1342 if (mode == kNintendoMode2) {
1343 new_piece.argument[0] = (offset + kMaxLengthCompression) & kSnesByteMax;
1344 new_piece.argument[1] = (offset + kMaxLengthCompression) >> 8;
1345 }
1346 if (mode == kNintendoMode1) {
1347 new_piece.argument[1] = (offset + kMaxLengthCompression) & kSnesByteMax;
1348 new_piece.argument[0] = (offset + kMaxLengthCompression) >> 8;
1349 }
1350 } break;
1351 default: {
1352 return absl::InvalidArgumentError(
1353 "SplitCompressionCommand: Invalid Command");
1354 }
1355 }
1356
1357 return new_piece;
1358}
1359
1361 unsigned int pos = 0;
1362
1363 for (CompressionPiece& piece : context.compression_pieces) {
1364 if (piece.length <= kMaxLengthNormalHeader) { // Normal Header
1365 context.compression_string.push_back(
1366 BUILD_HEADER(piece.command, piece.length));
1367 pos++;
1368 } else {
1369 if (piece.length <= kMaxLengthCompression) {
1370 context.compression_string.push_back(
1371 kCompressionStringMod | ((uint8_t)piece.command << 2) |
1372 (((piece.length - 1) & 0xFF00) >> 8));
1373 pos++;
1374 std::cout << "Building extended header : cmd: " << piece.command
1375 << ", length: " << piece.length << " - "
1376 << (int)context.compression_string[pos - 1] << std::endl;
1377 context.compression_string.push_back(
1378 ((piece.length - 1) & 0x00FF)); // (char)
1379 } else {
1380 // We need to split the command
1381 auto new_piece = SplitCompressionPieceV3(piece, context.mode);
1382 if (!new_piece.ok()) {
1383 std::cout << new_piece.status().ToString() << std::endl;
1384 }
1385 context.compression_pieces.insert(
1386 context.compression_pieces.begin() + pos + 1, new_piece.value());
1387 continue;
1388 }
1389 }
1390
1391 if (piece.command == kCommandRepeatingBytes) {
1392 char tmp[2];
1393 tmp[0] = piece.argument[0];
1394 tmp[1] = piece.argument[1];
1395 if (context.mode == kNintendoMode1) {
1396 tmp[0] = piece.argument[1];
1397 tmp[1] = piece.argument[0];
1398 }
1399 for (const auto& each : tmp) {
1400 context.compression_string.push_back(each);
1401 pos++;
1402 }
1403 } else {
1404 for (int i = 0; i < piece.argument_length; ++i) {
1405 context.compression_string.push_back(piece.argument[i]);
1406 pos++;
1407 }
1408 }
1409 pos += piece.argument_length;
1410 }
1411
1412 // Add any remaining uncompressed data
1413 if (context.comp_accumulator > 0) {
1414 context.compressed_data.insert(
1415 context.compressed_data.end(),
1416 context.data.begin() + context.src_pos - context.comp_accumulator,
1417 context.data.begin() + context.src_pos);
1418 context.comp_accumulator = 0;
1419 }
1420
1421 // Add the end marker to the compressed data
1422 context.compressed_data.push_back(kSnesByteMax);
1423 DEBUG_LOG("FinalizeCompression: compressed_data size = "
1424 << context.compressed_data.size());
1425}
1426
1427absl::StatusOr<std::vector<uint8_t>> CompressV3(
1428 const std::vector<uint8_t>& data, const int start, const int length,
1429 int mode, bool check) {
1430 if (length == 0) {
1431 return std::vector<uint8_t>();
1432 }
1433
1434 CompressionContext context(data, start, length, mode);
1435 InitializeCompression(context);
1436
1437 while (context.src_pos <= context.last_pos) {
1439 DetermineBestCompression(context);
1440
1441 DEBUG_LOG("CompressV3 Loop: cmd_with_max = " << context.cmd_with_max);
1442
1443 if (context.cmd_with_max == kCommandDirectCopy) {
1444 HandleDirectCopy(context);
1445 } else {
1446 AddCompressionToChain(context);
1447 }
1448
1449 if (check) {
1451 }
1452 }
1453
1454 FinalizeCompression(context);
1455 return std::vector<uint8_t>(context.compressed_data.begin(),
1456 context.compressed_data.end());
1457}
1458
1459std::string SetBuffer(const uint8_t* data, int src_pos, int comp_accumulator) {
1460 std::string buffer;
1461 for (int i = 0; i < comp_accumulator; ++i) {
1462 buffer.push_back(data[i + src_pos - comp_accumulator]);
1463 }
1464 return buffer;
1465}
1466
1467std::string SetBuffer(const std::vector<uint8_t>& data, int src_pos,
1468 int comp_accumulator) {
1469 std::string buffer;
1470 for (int i = 0; i < comp_accumulator; ++i) {
1471 buffer.push_back(data[i + src_pos - comp_accumulator]);
1472 }
1473 return buffer;
1474}
1475
1476void memfill(const uint8_t* data, std::vector<uint8_t>& buffer, int buffer_pos,
1477 int offset, int length) {
1478 auto a = data[offset];
1479 auto b = data[offset + 1];
1480 for (int i = 0; i < length; i = i + 2) {
1481 buffer[buffer_pos + i] = a;
1482 if ((i + 1) < length)
1483 buffer[buffer_pos + i + 1] = b;
1484 }
1485}
1486
1487absl::StatusOr<std::vector<uint8_t>> DecompressV2(const uint8_t* data,
1488 int offset, int size,
1489 int mode, size_t rom_size) {
1490 if (size == 0) {
1491 return std::vector<uint8_t>();
1492 }
1493
1494 // Validate initial offset is within bounds (if rom_size provided)
1495 // rom_size == static_cast<size_t>(-1) means "no bounds checking"
1496 if (rom_size != static_cast<size_t>(-1) && (offset < 0 || static_cast<size_t>(offset) >= rom_size)) {
1497 return absl::OutOfRangeError(
1498 absl::StrFormat("DecompressV2: Initial offset %d exceeds ROM size %zu",
1499 offset, rom_size));
1500 }
1501
1502 std::vector<uint8_t> buffer(size, 0);
1503 unsigned int length = 0;
1504 unsigned int buffer_pos = 0;
1505 uint8_t command = 0;
1506
1507 // Bounds check before initial header read
1508 if (rom_size != static_cast<size_t>(-1) && static_cast<size_t>(offset) >= rom_size) {
1509 return absl::OutOfRangeError(
1510 absl::StrFormat("DecompressV2: Initial offset %d exceeds ROM size %zu",
1511 offset, rom_size));
1512 }
1513 uint8_t header = data[offset];
1514
1515 while (header != kSnesByteMax) {
1516 // Bounds check before reading command (if rom_size provided)
1517 if (rom_size != static_cast<size_t>(-1) && static_cast<size_t>(offset) >= rom_size) {
1518 return absl::OutOfRangeError(
1519 absl::StrFormat("DecompressV2: Offset %d exceeds ROM size %zu while reading command",
1520 offset, rom_size));
1521 }
1522
1523 if ((header & kExpandedMod) == kExpandedMod) {
1524 // Expanded Command - needs 2 bytes
1525 if (rom_size != static_cast<size_t>(-1) && static_cast<size_t>(offset + 1) >= rom_size) {
1526 return absl::OutOfRangeError(
1527 absl::StrFormat("DecompressV2: Offset %d+1 exceeds ROM size %zu for expanded command",
1528 offset, rom_size));
1529 }
1530 command = ((header >> 2) & kCommandMod);
1531 length = (((header << 8) | data[offset + 1]) & kExpandedLengthMod);
1532 offset += 2; // Advance 2 bytes in ROM
1533 } else {
1534 // Normal Command
1535 command = ((header >> 5) & kCommandMod);
1536 length = (header & kNormalLengthMod);
1537 offset += 1; // Advance 1 byte in ROM
1538 }
1539 length += 1; // each commands is at least of size 1 even if index 00
1540
1541 // CRITICAL: Check and resize buffer BEFORE any command writes
1542 // This prevents WASM "index out of bounds" errors
1543 // The buffer may need to grow if decompressed data exceeds initial size
1544 if (buffer_pos + length > static_cast<unsigned int>(size)) {
1545 // Double the buffer size (with overflow protection)
1546 int new_size = size;
1547 while (buffer_pos + length > static_cast<unsigned int>(new_size)) {
1548 if (new_size > INT_MAX / 2) {
1549 return absl::ResourceExhaustedError(
1550 absl::StrFormat("DecompressV2: Buffer size overflow at pos %u, length %u",
1551 buffer_pos, length));
1552 }
1553 new_size *= 2;
1554 }
1555 size = new_size;
1556 buffer.resize(size);
1557 }
1558
1559 switch (command) {
1560 case kCommandDirectCopy: // Does not advance in the ROM
1561 // Bounds check for direct copy
1562 if (rom_size != static_cast<size_t>(-1) &&
1563 static_cast<size_t>(offset + length) > rom_size) {
1564 return absl::OutOfRangeError(
1565 absl::StrFormat("DecompressV2: DirectCopy offset %d + length %u exceeds ROM size %zu",
1566 offset, length, rom_size));
1567 }
1568 memcpy(buffer.data() + buffer_pos, data + offset, length);
1569 buffer_pos += length;
1570 offset += length;
1571 break;
1572 case kCommandByteFill:
1573 // Bounds check for byte fill
1574 if (rom_size != static_cast<size_t>(-1) &&
1575 static_cast<size_t>(offset) >= rom_size) {
1576 return absl::OutOfRangeError(
1577 absl::StrFormat("DecompressV2: ByteFill offset %d exceeds ROM size %zu",
1578 offset, rom_size));
1579 }
1580 memset(buffer.data() + buffer_pos, (int)(data[offset]), length);
1581 buffer_pos += length;
1582 offset += 1; // Advances 1 byte in the ROM
1583 break;
1584 case kCommandWordFill:
1585 // Bounds check for word fill (needs 2 bytes)
1586 if (rom_size != static_cast<size_t>(-1) &&
1587 static_cast<size_t>(offset + 1) >= rom_size) {
1588 return absl::OutOfRangeError(
1589 absl::StrFormat("DecompressV2: WordFill offset %d+1 exceeds ROM size %zu",
1590 offset, rom_size));
1591 }
1592 memfill(data, buffer, buffer_pos, offset, length);
1593 buffer_pos += length;
1594 offset += 2; // Advance 2 byte in the ROM
1595 break;
1597 // Bounds check for increasing fill
1598 if (rom_size != static_cast<size_t>(-1) &&
1599 static_cast<size_t>(offset) >= rom_size) {
1600 return absl::OutOfRangeError(
1601 absl::StrFormat("DecompressV2: IncreasingFill offset %d exceeds ROM size %zu",
1602 offset, rom_size));
1603 }
1604 auto inc_byte = data[offset];
1605 for (unsigned int i = 0; i < length; i++) {
1606 buffer[buffer_pos] = inc_byte++;
1607 buffer_pos++;
1608 }
1609 offset += 1; // Advance 1 byte in the ROM
1610 } break;
1612 // Bounds check for repeating bytes (needs 2 bytes)
1613 if (rom_size != static_cast<size_t>(-1) &&
1614 static_cast<size_t>(offset + 1) >= rom_size) {
1615 return absl::OutOfRangeError(
1616 absl::StrFormat("DecompressV2: RepeatingBytes offset %d+1 exceeds ROM size %zu",
1617 offset, rom_size));
1618 }
1619 uint16_t s1 = ((data[offset + 1] & kSnesByteMax) << 8);
1620 uint16_t s2 = (data[offset] & kSnesByteMax);
1621 int addr = (s1 | s2);
1622 if (mode == kNintendoMode1) { // Reversed byte order for
1623 // overworld maps
1624 addr = (data[offset + 1] & kSnesByteMax) |
1625 ((data[offset] & kSnesByteMax) << 8);
1626 }
1627 if (addr > offset) {
1628 return absl::InternalError(
1629 absl::StrFormat("Decompress: Offset for command copy exceeds "
1630 "current position "
1631 "(Offset : %#04x | Pos : %#06x)\n",
1632 addr, offset));
1633 }
1634 // Buffer resize already done above, no need to check again
1635 memcpy(buffer.data() + buffer_pos, buffer.data() + addr, length);
1636 buffer_pos += length;
1637 offset += 2;
1638 } break;
1639 default: {
1640 std::cout << absl::StrFormat(
1641 "Decompress: Invalid header (Offset : %#06x, Command: %#04x)\n",
1642 offset, command);
1643 } break;
1644 }
1645 // Bounds check before reading next header byte
1646 if (rom_size != static_cast<size_t>(-1) &&
1647 static_cast<size_t>(offset) >= rom_size) {
1648 return absl::OutOfRangeError(
1649 absl::StrFormat("DecompressV2: Offset %d exceeds ROM size %zu while reading next header",
1650 offset, rom_size));
1651 }
1652 header = data[offset];
1653 }
1654
1655 return buffer;
1656}
1657
1658absl::StatusOr<std::vector<uint8_t>> DecompressGraphics(const uint8_t* data,
1659 int pos, int size) {
1660 return DecompressV2(data, pos, size, kNintendoMode2);
1661}
1662
1663absl::StatusOr<std::vector<uint8_t>> DecompressOverworld(const uint8_t* data,
1664 int pos, int size) {
1665 return DecompressV2(data, pos, size, kNintendoMode1);
1666}
1667
1668absl::StatusOr<std::vector<uint8_t>> DecompressOverworld(
1669 const std::vector<uint8_t> data, int pos, int size) {
1670 return DecompressV2(data.data(), pos, size, kNintendoMode1);
1671}
1672
1673} // namespace lc_lz2
1674} // namespace gfx
1675} // namespace yaze
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
Definition rom.h:24
auto begin()
Definition rom.h:137
auto data() const
Definition rom.h:135
auto size() const
Definition rom.h:134
absl::Status LoadFromData(const std::vector< uint8_t > &data, const LoadOptions &options=LoadOptions::Defaults())
Definition rom.cc:147
#define DEBUG_LOG(msg)
#define BUILD_HEADER(command, length)
Definition compression.h:13
unsigned int uint
Definition macro.h:4
#define ASSIGN_OR_RETURN(type_variable_name, expression)
Definition macro.h:62
void CheckIntraCopyV3(CompressionContext &context)
absl::StatusOr< std::vector< uint8_t > > CompressGraphics(const uint8_t *data, const int pos, const int length)
absl::StatusOr< std::vector< uint8_t > > CompressV3(const std::vector< uint8_t > &data, const int start, const int length, int mode, bool check)
Compresses a buffer of data using the LC_LZ2 algorithm.
constexpr int kExpandedLengthMod
Definition compression.h:59
constexpr int kCommandDirectCopy
Definition compression.h:46
void memfill(const uint8_t *data, std::vector< uint8_t > &buffer, int buffer_pos, int offset, int length)
void FinalizeCompression(CompressionContext &context)
void CheckWordRepeatV2(const uint8_t *data, uint &src_pos, const unsigned int last_pos, CompressionCommand &cmd)
void InitializeCompression(CompressionContext &context)
absl::StatusOr< std::vector< uint8_t > > DecompressV2(const uint8_t *data, int offset, int size, int mode, size_t rom_size)
Decompresses a buffer of data using the LC_LZ2 algorithm.
struct CompressionPiece CompressionPiece
Definition compression.h:90
absl::StatusOr< std::vector< uint8_t > > CompressOverworld(const uint8_t *data, const int pos, const int length)
constexpr int kSnesByteMax
Definition compression.h:56
constexpr int kNintendoMode1
Definition compression.h:54
void AddAlternativeCompressionCommand(const uint8_t *rom_data, CompressionPiecePointer &compressed_chain, const CompressionCommand &command, uint &source_data_position, uint &uncompressed_data_size, uint &best_command, uint &best_command_gain)
std::shared_ptr< CompressionPiece > CompressionPiecePointer
Definition compression.h:91
void CheckIncByteV3(CompressionContext &context)
absl::Status ValidateCompressionResultV3(const CompressionContext &context)
constexpr int kNintendoMode2
Definition compression.h:55
std::string SetBuffer(const uint8_t *data, int src_pos, int comp_accumulator)
constexpr int kCommandRepeatingBytes
Definition compression.h:50
std::array< std::array< char, 2 >, 5 > CommandArgumentArray
Definition compression.h:75
void CheckByteRepeat(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
void CheckByteRepeatV2(const uint8_t *data, uint &src_pos, const unsigned int last_pos, CompressionCommand &cmd)
void HandleDirectCopy(CompressionContext &context)
void CheckIncByteV2(const uint8_t *rom_data, uint &src_data_pos, const unsigned int last_pos, CompressionCommand &cmd)
absl::StatusOr< std::vector< uint8_t > > DecompressGraphics(const uint8_t *data, int pos, int size)
constexpr int kCommandMod
Definition compression.h:57
void PrintCompressionPiece(const CompressionPiecePointer &piece)
constexpr int kCommandWordFill
Definition compression.h:48
constexpr int kMaxLengthNormalHeader
Definition compression.h:52
void CheckAvailableCompressionCommands(CompressionContext &context)
constexpr int kMaxLengthCompression
Definition compression.h:53
void CompressionCommandAlternativeV2(const uint8_t *rom_data, const CompressionCommand &cmd, CompressionPiecePointer &compressed_chain, uint &src_data_pos, uint &comp_accumulator, uint &cmd_with_max, uint &max_win)
void CheckIntraCopyV2(const uint8_t *rom_data, uint &src_data_pos, const unsigned int last_pos, unsigned int start, CompressionCommand &cmd)
std::vector< uint8_t > CreateCompressionString(CompressionPiecePointer &start, int mode)
absl::StatusOr< std::vector< uint8_t > > DecompressOverworld(const uint8_t *data, int pos, int size)
std::array< uint, 5 > DataSizeArray
Definition compression.h:77
constexpr int kCommandIncreasingFill
Definition compression.h:49
void ValidateForByteGainV2(const CompressionCommand &cmd, uint &max_win, uint &cmd_with_max)
void ValidateForByteGain(const DataSizeArray &data_size_taken, const CommandSizeArray &cmd_size, uint &max_win, uint &cmd_with_max)
constexpr int kCommandByteFill
Definition compression.h:47
void PrintCompressionChain(const CompressionPiecePointer &chain_head)
void CheckIncByte(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
constexpr int kExpandedMod
Definition compression.h:58
const std::array< int, 5 > kCommandSizes
void AddCompressionToChain(CompressionContext &context)
absl::StatusOr< CompressionPiecePointer > SplitCompressionPiece(CompressionPiecePointer &piece, int mode)
void CompressionCommandAlternative(const uint8_t *rom_data, CompressionPiecePointer &compressed_chain, const CommandSizeArray &cmd_size, const CommandArgumentArray &cmd_args, uint &src_data_pos, uint &comp_accumulator, uint &cmd_with_max, uint &max_win)
constexpr int kNormalLengthMod
Definition compression.h:60
constexpr int kCompressionStringMod
Definition compression.h:61
void CheckWordRepeat(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos)
void CheckByteRepeatV3(CompressionContext &context)
CompressionPiecePointer MergeCopy(CompressionPiecePointer &start)
std::array< uint, 5 > CommandSizeArray
Definition compression.h:76
void DetermineBestCompression(CompressionContext &context)
absl::StatusOr< CompressionPiece > SplitCompressionPieceV3(CompressionPiece &piece, int mode)
absl::StatusOr< std::vector< uint8_t > > CompressV2(const uint8_t *data, const int start, const int length, int mode, bool check)
Compresses a buffer of data using the LC_LZ2 algorithm.
void CheckWordRepeatV3(CompressionContext &context)
void CheckIntraCopy(const uint8_t *rom_data, DataSizeArray &data_size_taken, CommandArgumentArray &cmd_args, uint &src_data_pos, const unsigned int last_pos, unsigned int start)
absl::Status ValidateCompressionResult(CompressionPiecePointer &chain_head, int mode, int start, int src_data_pos)
std::vector< uint8_t > HyruleMagicDecompress(uint8_t const *src, int *const size, int const p_big_endian, size_t max_src_size)
std::vector< uint8_t > HyruleMagicCompress(uint8_t const *const src, int const oldsize, int *const size, int const flag)
uint16_t ldle16b(uint8_t const *const p_arr)
void stle16b(uint8_t *const p_arr, uint16_t const p_val)
#define RETURN_IF_ERROR(expr)
Definition snes.cc:22
std::array< std::array< char, 2 >, 5 > arguments
Definition compression.h:66
std::vector< CompressionPiece > compression_pieces
std::vector< uint8_t > compression_string
std::vector< uint8_t > compressed_data