yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
test_gemini_vision.cc
Go to the documentation of this file.
1#include <filesystem>
2#include <fstream>
3
4#include "gtest/gtest.h"
5#include "absl/strings/str_cat.h"
7
8#ifdef YAZE_WITH_GRPC
10#endif
11
12namespace yaze {
13namespace test {
14
15class GeminiVisionTest : public ::testing::Test {
16 protected:
17 void SetUp() override {
18 // Check if GEMINI_API_KEY is set
19 const char* api_key = std::getenv("GEMINI_API_KEY");
20 if (!api_key || std::string(api_key).empty()) {
21 GTEST_SKIP() << "GEMINI_API_KEY not set. Skipping multimodal tests.";
22 }
23
24 api_key_ = api_key;
25
26 // Create test data directory
27 test_dir_ = std::filesystem::temp_directory_path() / "yaze_multimodal_test";
28 std::filesystem::create_directories(test_dir_);
29 }
30
31 void TearDown() override {
32 // Clean up test directory
33 if (std::filesystem::exists(test_dir_)) {
34 std::filesystem::remove_all(test_dir_);
35 }
36 }
37
38 // Helper: Create a simple test image (16x16 PNG)
39 std::filesystem::path CreateTestImage() {
40 auto image_path = test_dir_ / "test_image.png";
41
42 // Create a minimal PNG file (16x16 red square)
43 // PNG signature + IHDR + IDAT + IEND
44 const unsigned char png_data[] = {
45 // PNG signature
46 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A,
47 // IHDR chunk
48 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
49 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10,
50 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x91, 0x68,
51 0x36,
52 // IDAT chunk (minimal data)
53 0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41, 0x54,
54 0x08, 0x99, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00,
55 0x03, 0x01, 0x01, 0x00, 0x18, 0xDD, 0x8D, 0xB4,
56 // IEND chunk
57 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44,
58 0xAE, 0x42, 0x60, 0x82
59 };
60
61 std::ofstream file(image_path, std::ios::binary);
62 file.write(reinterpret_cast<const char*>(png_data), sizeof(png_data));
63 file.close();
64
65 return image_path;
66 }
67
68 std::string api_key_;
69 std::filesystem::path test_dir_;
70};
71
72TEST_F(GeminiVisionTest, BasicImageAnalysis) {
73 cli::GeminiConfig config;
74 config.api_key = api_key_;
75 config.model = "gemini-2.5-flash"; // Vision-capable model
76 config.verbose = false;
77
78 cli::GeminiAIService service(config);
79
80 // Create test image
81 auto image_path = CreateTestImage();
82 ASSERT_TRUE(std::filesystem::exists(image_path));
83
84 // Send multimodal request
85 auto response = service.GenerateMultimodalResponse(
86 image_path.string(),
87 "Describe this image in one sentence."
88 );
89
90 ASSERT_TRUE(response.ok()) << response.status().message();
91 EXPECT_FALSE(response->text_response.empty());
92
93 std::cout << "Vision API response: " << response->text_response << std::endl;
94}
95
96TEST_F(GeminiVisionTest, ImageWithSpecificPrompt) {
97 cli::GeminiConfig config;
98 config.api_key = api_key_;
99 config.model = "gemini-2.5-flash";
100 config.verbose = false;
101
102 cli::GeminiAIService service(config);
103
104 auto image_path = CreateTestImage();
105
106 // Ask specific question about the image
107 auto response = service.GenerateMultimodalResponse(
108 image_path.string(),
109 "What color is the dominant color in this image? Answer with just the color name."
110 );
111
112 ASSERT_TRUE(response.ok()) << response.status().message();
113 EXPECT_FALSE(response->text_response.empty());
114
115 // Response should mention "red" since we created a red square
116 std::string response_lower = response->text_response;
117 std::transform(response_lower.begin(), response_lower.end(),
118 response_lower.begin(), ::tolower);
119 EXPECT_TRUE(response_lower.find("red") != std::string::npos ||
120 response_lower.find("pink") != std::string::npos)
121 << "Expected color 'red' or 'pink' in response: " << response->text_response;
122}
123
124TEST_F(GeminiVisionTest, InvalidImagePath) {
125 cli::GeminiConfig config;
126 config.api_key = api_key_;
127 config.model = "gemini-2.5-flash";
128
129 cli::GeminiAIService service(config);
130
131 // Try with non-existent image
132 auto response = service.GenerateMultimodalResponse(
133 "/nonexistent/image.png",
134 "Describe this image."
135 );
136
137 EXPECT_FALSE(response.ok());
138 EXPECT_TRUE(absl::IsNotFound(response.status()) ||
139 absl::IsInternal(response.status()));
140}
141
142#ifdef YAZE_WITH_GRPC
143// Integration test with screenshot capture
144TEST_F(GeminiVisionTest, ScreenshotCaptureIntegration) {
145 // Note: This test requires a running YAZE instance with gRPC test harness
146 // Skip if we can't connect
147
148 cli::GeminiConfig config;
149 config.api_key = api_key_;
150 config.model = "gemini-2.5-flash";
151 config.verbose = false;
152
153 cli::GeminiAIService service(config);
154
155 // Attempt to capture a screenshot
156 auto screenshot_result = yaze::test::CaptureHarnessScreenshot(
157 (test_dir_ / "screenshot.png").string());
158
159 if (!screenshot_result.ok()) {
160 GTEST_SKIP() << "Screenshot capture failed (YAZE may not be running): "
161 << screenshot_result.status().message();
162 }
163
164 // Analyze the captured screenshot
165 auto response = service.GenerateMultimodalResponse(
166 screenshot_result->file_path,
167 "What UI elements are visible in this screenshot? List them."
168 );
169
170 ASSERT_TRUE(response.ok()) << response.status().message();
171 EXPECT_FALSE(response->text_response.empty());
172
173 std::cout << "Screenshot analysis: " << response->text_response << std::endl;
174}
175#endif
176
177// Performance test
178TEST_F(GeminiVisionTest, MultipleRequestsSequential) {
179 cli::GeminiConfig config;
180 config.api_key = api_key_;
181 config.model = "gemini-2.5-flash";
182 config.verbose = false;
183
184 cli::GeminiAIService service(config);
185
186 auto image_path = CreateTestImage();
187
188 // Make 3 sequential requests
189 const int num_requests = 3;
190 for (int i = 0; i < num_requests; ++i) {
191 auto response = service.GenerateMultimodalResponse(
192 image_path.string(),
193 absl::StrCat("Request ", i + 1, ": Describe this image briefly.")
194 );
195
196 ASSERT_TRUE(response.ok()) << "Request " << i + 1 << " failed: "
197 << response.status().message();
198 EXPECT_FALSE(response->text_response.empty());
199 }
200}
201
202// Rate limiting test (should handle gracefully)
203TEST_F(GeminiVisionTest, RateLimitHandling) {
204 cli::GeminiConfig config;
205 config.api_key = api_key_;
206 config.model = "gemini-2.5-flash";
207 config.verbose = false;
208
209 cli::GeminiAIService service(config);
210
211 auto image_path = CreateTestImage();
212
213 // Make many rapid requests (may hit rate limit)
214 int successful = 0;
215 int rate_limited = 0;
216
217 for (int i = 0; i < 10; ++i) {
218 auto response = service.GenerateMultimodalResponse(
219 image_path.string(),
220 "Describe this image."
221 );
222
223 if (response.ok()) {
224 successful++;
225 } else if (absl::IsResourceExhausted(response.status()) ||
226 response.status().message().find("429") != std::string::npos) {
227 rate_limited++;
228 }
229 }
230
231 // At least some requests should succeed
232 EXPECT_GT(successful, 0) << "No successful requests out of 10";
233
234 // If we hit rate limits, that's expected behavior (not a failure)
235 if (rate_limited > 0) {
236 std::cout << "Note: Hit rate limit on " << rate_limited << " out of 10 requests (expected)" << std::endl;
237 }
238}
239
240} // namespace test
241} // namespace yaze
242
243// Note: main() is provided by yaze_test.cc for the unified test runner
absl::StatusOr< AgentResponse > GenerateMultimodalResponse(const std::string &image_path, const std::string &prompt)
std::filesystem::path test_dir_
std::filesystem::path CreateTestImage()
TEST_F(DungeonObjectRenderingE2ETests, RunAllTests)
Main namespace for the application.