yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
gfx_optimization_benchmarks.cc
Go to the documentation of this file.
1#include <gtest/gtest.h>
2#include <chrono>
3#include <vector>
4#include <random>
5
6#include "app/gfx/bitmap.h"
7#include "app/gfx/arena.h"
12
13namespace yaze {
14namespace gfx {
15
16class GraphicsOptimizationBenchmarks : public ::testing::Test {
17 protected:
18 void SetUp() override {
19 // Initialize graphics systems
20 Arena::Get();
23 }
24
25 void TearDown() override {
26 // Cleanup
28 }
29
30 // Helper methods for creating test data
31 std::vector<uint8_t> CreateTestBitmapData(int width, int height) {
32 std::vector<uint8_t> data(width * height);
33 std::random_device rd;
34 std::mt19937 gen(rd());
35 std::uniform_int_distribution<> dis(0, 15); // 4-bit color indices
36
37 for (auto& pixel : data) {
38 pixel = static_cast<uint8_t>(dis(gen));
39 }
40 return data;
41 }
42
44 SnesPalette palette;
45 for (int i = 0; i < 16; ++i) {
46 palette.AddColor(SnesColor(i * 16, i * 16, i * 16));
47 }
48 return palette;
49 }
50};
51
52// Benchmark palette lookup optimization
53TEST_F(GraphicsOptimizationBenchmarks, PaletteLookupPerformance) {
54 const int kIterations = 10000;
55 const int kBitmapSize = 128;
56
57 auto test_data = CreateTestBitmapData(kBitmapSize, kBitmapSize);
58 auto test_palette = CreateTestPalette();
59
60 Bitmap bitmap(kBitmapSize, kBitmapSize, 8, test_data, test_palette);
61
62 // Benchmark palette lookup
63 auto start = std::chrono::high_resolution_clock::now();
64
65 for (int i = 0; i < kIterations; ++i) {
66 SnesColor test_color(i % 16, (i + 1) % 16, (i + 2) % 16);
67 uint8_t index = bitmap.FindColorIndex(test_color);
68 (void)index; // Prevent optimization
69 }
70
71 auto end = std::chrono::high_resolution_clock::now();
72 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
73
74 double avg_time_us = static_cast<double>(duration.count()) / kIterations;
75
76 // Verify optimization is working (should be < 1μs per lookup)
77 EXPECT_LT(avg_time_us, 1.0) << "Palette lookup should be optimized to < 1μs";
78
79 std::cout << "Palette lookup average time: " << avg_time_us << " μs" << std::endl;
80}
81
82// Benchmark dirty region tracking
83TEST_F(GraphicsOptimizationBenchmarks, DirtyRegionTrackingPerformance) {
84 const int kBitmapSize = 256;
85 const int kPixelUpdates = 1000;
86
87 auto test_data = CreateTestBitmapData(kBitmapSize, kBitmapSize);
88 auto test_palette = CreateTestPalette();
89
90 Bitmap bitmap(kBitmapSize, kBitmapSize, 8, test_data, test_palette);
91
92 // Benchmark pixel updates with dirty region tracking
93 auto start = std::chrono::high_resolution_clock::now();
94
95 for (int i = 0; i < kPixelUpdates; ++i) {
96 int x = i % kBitmapSize;
97 int y = (i * 7) % kBitmapSize; // Spread updates across bitmap
98 SnesColor color(i % 16, (i + 1) % 16, (i + 2) % 16);
99 bitmap.SetPixel(x, y, color);
100 }
101
102 auto end = std::chrono::high_resolution_clock::now();
103 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
104
105 double avg_time_us = static_cast<double>(duration.count()) / kPixelUpdates;
106
107 // Verify dirty region tracking is efficient
108 EXPECT_LT(avg_time_us, 10.0) << "Pixel updates should be < 10μs with dirty region tracking";
109
110 std::cout << "Pixel update average time: " << avg_time_us << " μs" << std::endl;
111}
112
113// Benchmark memory pool allocation
114TEST_F(GraphicsOptimizationBenchmarks, MemoryPoolAllocationPerformance) {
115 const int kAllocations = 10000;
116 const size_t kAllocationSize = 1024; // 1KB blocks
117
118 auto& memory_pool = MemoryPool::Get();
119
120 std::vector<void*> allocations;
121 allocations.reserve(kAllocations);
122
123 // Benchmark allocations
124 auto start = std::chrono::high_resolution_clock::now();
125
126 for (int i = 0; i < kAllocations; ++i) {
127 void* ptr = memory_pool.Allocate(kAllocationSize);
128 allocations.push_back(ptr);
129 }
130
131 auto end = std::chrono::high_resolution_clock::now();
132 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
133
134 double avg_time_us = static_cast<double>(duration.count()) / kAllocations;
135
136 // Verify memory pool is faster than system malloc
137 EXPECT_LT(avg_time_us, 1.0) << "Memory pool allocation should be < 1μs";
138
139 std::cout << "Memory pool allocation average time: " << avg_time_us << " μs" << std::endl;
140
141 // Benchmark deallocations
142 start = std::chrono::high_resolution_clock::now();
143
144 for (void* ptr : allocations) {
145 memory_pool.Deallocate(ptr);
146 }
147
148 end = std::chrono::high_resolution_clock::now();
149 duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
150
151 avg_time_us = static_cast<double>(duration.count()) / kAllocations;
152
153 EXPECT_LT(avg_time_us, 1.0) << "Memory pool deallocation should be < 1μs";
154
155 std::cout << "Memory pool deallocation average time: " << avg_time_us << " μs" << std::endl;
156}
157
158// Benchmark batch texture updates
159TEST_F(GraphicsOptimizationBenchmarks, BatchTextureUpdatePerformance) {
160 const int kTextureUpdates = 100;
161 const int kBitmapSize = 64;
162
163 auto test_data = CreateTestBitmapData(kBitmapSize, kBitmapSize);
164 auto test_palette = CreateTestPalette();
165
166 std::vector<Bitmap> bitmaps;
167 bitmaps.reserve(kTextureUpdates);
168
169 // Create test bitmaps
170 for (int i = 0; i < kTextureUpdates; ++i) {
171 bitmaps.emplace_back(kBitmapSize, kBitmapSize, 8, test_data, test_palette);
172 }
173
174 auto& arena = Arena::Get();
175
176 // Benchmark individual texture updates
177 auto start = std::chrono::high_resolution_clock::now();
178
179 for (auto& bitmap : bitmaps) {
182 }
183
184 auto end = std::chrono::high_resolution_clock::now();
185 auto individual_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
186
187 // Benchmark batch texture updates
188 start = std::chrono::high_resolution_clock::now();
189
190 for (auto& bitmap : bitmaps) {
193 }
194 gfx::Arena::Get().ProcessTextureQueue(nullptr); // Process all at once
195
196 end = std::chrono::high_resolution_clock::now();
197 auto batch_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
198
199 // Verify batch updates are faster
200 double individual_avg = static_cast<double>(individual_duration.count()) / kTextureUpdates;
201 double batch_avg = static_cast<double>(batch_duration.count()) / kTextureUpdates;
202
203 EXPECT_LT(batch_avg, individual_avg) << "Batch updates should be faster than individual updates";
204
205 std::cout << "Individual texture update average: " << individual_avg << " μs" << std::endl;
206 std::cout << "Batch texture update average: " << batch_avg << " μs" << std::endl;
207 std::cout << "Speedup: " << (individual_avg / batch_avg) << "x" << std::endl;
208}
209
210// Benchmark atlas rendering
211TEST_F(GraphicsOptimizationBenchmarks, AtlasRenderingPerformance) {
212 const int kBitmaps = 50;
213 const int kBitmapSize = 32;
214
215 auto test_data = CreateTestBitmapData(kBitmapSize, kBitmapSize);
216 auto test_palette = CreateTestPalette();
217
218 std::vector<Bitmap> bitmaps;
219 bitmaps.reserve(kBitmaps);
220
221 // Create test bitmaps
222 for (int i = 0; i < kBitmaps; ++i) {
223 bitmaps.emplace_back(kBitmapSize, kBitmapSize, 8, test_data, test_palette);
224 }
225
226 auto& atlas_renderer = AtlasRenderer::Get();
227 atlas_renderer.Initialize(nullptr, 512); // Initialize with 512x512 atlas
228
229 // Add bitmaps to atlas
230 std::vector<int> atlas_ids;
231 for (auto& bitmap : bitmaps) {
232 int atlas_id = atlas_renderer.AddBitmap(bitmap);
233 if (atlas_id >= 0) {
234 atlas_ids.push_back(atlas_id);
235 }
236 }
237
238 // Create render commands
239 std::vector<RenderCommand> render_commands;
240 for (size_t i = 0; i < atlas_ids.size(); ++i) {
241 render_commands.emplace_back(atlas_ids[i], i * 10.0f, i * 10.0f);
242 }
243
244 // Benchmark atlas rendering
245 auto start = std::chrono::high_resolution_clock::now();
246
247 for (int i = 0; i < 1000; ++i) {
248 atlas_renderer.RenderBatch(render_commands);
249 }
250
251 auto end = std::chrono::high_resolution_clock::now();
252 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
253
254 double avg_time_us = static_cast<double>(duration.count()) / 1000.0;
255
256 // Verify atlas rendering is efficient
257 EXPECT_LT(avg_time_us, 100.0) << "Atlas rendering should be < 100μs per batch";
258
259 std::cout << "Atlas rendering average time: " << avg_time_us << " μs per batch" << std::endl;
260
261 // Get atlas statistics
262 auto stats = atlas_renderer.GetStats();
263 std::cout << "Atlas utilization: " << stats.utilization_percent << "%" << std::endl;
264}
265
266// Benchmark performance profiler overhead
267TEST_F(GraphicsOptimizationBenchmarks, PerformanceProfilerOverhead) {
268 const int kOperations = 100000;
269
270 auto& profiler = PerformanceProfiler::Get();
271
272 // Benchmark operations without profiling
273 auto start = std::chrono::high_resolution_clock::now();
274
275 for (int i = 0; i < kOperations; ++i) {
276 // Simulate some work
277 volatile int result = i * i;
278 (void)result;
279 }
280
281 auto end = std::chrono::high_resolution_clock::now();
282 auto no_profiling_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
283
284 // Benchmark operations with profiling
285 start = std::chrono::high_resolution_clock::now();
286
287 for (int i = 0; i < kOperations; ++i) {
288 profiler.StartTimer("test_operation");
289 // Simulate some work
290 volatile int result = i * i;
291 (void)result;
292 profiler.EndTimer("test_operation");
293 }
294
295 end = std::chrono::high_resolution_clock::now();
296 auto with_profiling_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
297
298 // Calculate profiling overhead
299 double no_profiling_avg = static_cast<double>(no_profiling_duration.count()) / kOperations;
300 double with_profiling_avg = static_cast<double>(with_profiling_duration.count()) / kOperations;
301 double overhead = with_profiling_avg - no_profiling_avg;
302
303 // Verify profiling overhead is minimal
304 EXPECT_LT(overhead, 1.0) << "Profiling overhead should be < 1μs per operation";
305
306 std::cout << "No profiling average: " << no_profiling_avg << " μs" << std::endl;
307 std::cout << "With profiling average: " << with_profiling_avg << " μs" << std::endl;
308 std::cout << "Profiling overhead: " << overhead << " μs" << std::endl;
309}
310
311// Benchmark atlas rendering performance
312TEST_F(GraphicsOptimizationBenchmarks, AtlasRenderingPerformance2) {
313 const int kNumTiles = 100;
314 const int kTileSize = 16;
315
316 auto& atlas_renderer = AtlasRenderer::Get();
317 auto& profiler = PerformanceProfiler::Get();
318
319 // Create test tiles
320 std::vector<Bitmap> test_tiles;
321 std::vector<int> atlas_ids;
322
323 for (int i = 0; i < kNumTiles; ++i) {
324 auto tile_data = CreateTestBitmapData(kTileSize, kTileSize);
325 auto tile_palette = CreateTestPalette();
326
327 test_tiles.emplace_back(kTileSize, kTileSize, 8, tile_data, tile_palette);
328
329 // Add to atlas
330 int atlas_id = atlas_renderer.AddBitmap(test_tiles.back());
331 if (atlas_id >= 0) {
332 atlas_ids.push_back(atlas_id);
333 }
334 }
335
336 // Benchmark individual tile rendering
337 auto start = std::chrono::high_resolution_clock::now();
338
339 for (int i = 0; i < kNumTiles; ++i) {
340 if (i < atlas_ids.size()) {
341 atlas_renderer.RenderBitmap(atlas_ids[i], i * 20.0f, 0.0f);
342 }
343 }
344
345 auto end = std::chrono::high_resolution_clock::now();
346 auto individual_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
347
348 // Benchmark batch rendering
349 std::vector<RenderCommand> render_commands;
350 for (size_t i = 0; i < atlas_ids.size(); ++i) {
351 render_commands.emplace_back(atlas_ids[i], i * 20.0f, 100.0f);
352 }
353
354 start = std::chrono::high_resolution_clock::now();
355 atlas_renderer.RenderBatch(render_commands);
356 end = std::chrono::high_resolution_clock::now();
357 auto batch_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
358
359 // Verify batch rendering is faster
360 EXPECT_LT(batch_duration.count(), individual_duration.count())
361 << "Batch rendering should be faster than individual rendering";
362
363 // Get atlas statistics
364 auto stats = atlas_renderer.GetStats();
365 EXPECT_GT(stats.total_entries, 0) << "Atlas should contain entries";
366 EXPECT_GT(stats.used_entries, 0) << "Atlas should have used entries";
367
368 std::cout << "Individual rendering: " << individual_duration.count() << " μs" << std::endl;
369 std::cout << "Batch rendering: " << batch_duration.count() << " μs" << std::endl;
370 std::cout << "Atlas entries: " << stats.used_entries << "/" << stats.total_entries << std::endl;
371 std::cout << "Atlas utilization: " << stats.utilization_percent << "%" << std::endl;
372}
373
374// Integration test for overall performance
375TEST_F(GraphicsOptimizationBenchmarks, OverallPerformanceIntegration) {
376 const int kGraphicsSheets = 10;
377 const int kTilesPerSheet = 100;
378 const int kTileSize = 16;
379
380 auto& memory_pool = MemoryPool::Get();
381 auto& arena = Arena::Get();
382 auto& profiler = PerformanceProfiler::Get();
383
384 // Simulate loading graphics sheets
385 auto start = std::chrono::high_resolution_clock::now();
386
387 std::vector<Bitmap> graphics_sheets;
388 for (int sheet = 0; sheet < kGraphicsSheets; ++sheet) {
389 auto sheet_data = CreateTestBitmapData(kTileSize * 10, kTileSize * 10);
390 auto sheet_palette = CreateTestPalette();
391
392 graphics_sheets.emplace_back(kTileSize * 10, kTileSize * 10, 8, sheet_data, sheet_palette);
393 }
394
395 auto end = std::chrono::high_resolution_clock::now();
396 auto load_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
397
398 // Simulate tile operations
399 start = std::chrono::high_resolution_clock::now();
400
401 for (int sheet = 0; sheet < kGraphicsSheets; ++sheet) {
402 for (int tile = 0; tile < kTilesPerSheet; ++tile) {
403 int x = (tile % 10) * kTileSize;
404 int y = (tile / 10) * kTileSize;
405
406 SnesColor color(tile % 16, (tile + 1) % 16, (tile + 2) % 16);
407 graphics_sheets[sheet].SetPixel(x, y, color);
408 }
409 }
410
411 end = std::chrono::high_resolution_clock::now();
412 auto tile_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
413
414 // Simulate batch texture updates
415 start = std::chrono::high_resolution_clock::now();
416
417 for (auto& sheet : graphics_sheets) {
418 arena.QueueTextureCommand(gfx::Arena::TextureCommandType::UPDATE, &sheet);
419 }
420 arena.ProcessTextureQueue(nullptr);
421
422 end = std::chrono::high_resolution_clock::now();
423 auto batch_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
424
425 // Verify overall performance
426 double load_time_ms = static_cast<double>(load_duration.count()) / 1000.0;
427 double tile_time_ms = static_cast<double>(tile_duration.count()) / 1000.0;
428 double batch_time_ms = static_cast<double>(batch_duration.count()) / 1000.0;
429
430 EXPECT_LT(load_time_ms, 100.0) << "Graphics sheet loading should be < 100ms";
431 EXPECT_LT(tile_time_ms, 50.0) << "Tile operations should be < 50ms";
432 EXPECT_LT(batch_time_ms, 10.0) << "Batch updates should be < 10ms";
433
434 std::cout << "Graphics sheet loading: " << load_time_ms << " ms" << std::endl;
435 std::cout << "Tile operations: " << tile_time_ms << " ms" << std::endl;
436 std::cout << "Batch updates: " << batch_time_ms << " ms" << std::endl;
437
438 // Get performance summary
439 auto summary = PerformanceDashboard::Get().GetSummary();
440 std::cout << "Optimization score: " << summary.optimization_score << "/100" << std::endl;
441 std::cout << "Status: " << summary.status_message << std::endl;
442}
443
444} // namespace gfx
445} // namespace yaze
void QueueTextureCommand(TextureCommandType type, Bitmap *bitmap)
Definition arena.cc:32
void ProcessTextureQueue(IRenderer *renderer)
Definition arena.cc:36
static Arena & Get()
Definition arena.cc:15
static AtlasRenderer & Get()
Represents a bitmap image optimized for SNES ROM hacking.
Definition bitmap.h:66
void SetPixel(int x, int y, const SnesColor &color)
Set a pixel at the given x,y coordinates with SNES color.
Definition bitmap.cc:519
uint8_t FindColorIndex(const SnesColor &color)
Find color index in palette using optimized hash map lookup.
Definition bitmap.cc:638
std::vector< uint8_t > CreateTestBitmapData(int width, int height)
static MemoryPool & Get()
static PerformanceDashboard & Get()
PerformanceSummary GetSummary() const
Get current performance summary.
void Clear()
Clear all timing data.
static PerformanceProfiler & Get()
SNES Color container.
Definition snes_color.h:38
Represents a palette of colors for the Super Nintendo Entertainment System (SNES).
void AddColor(const SnesColor &color)
TEST_F(GraphicsOptimizationBenchmarks, PaletteLookupPerformance)
Main namespace for the application.