8#include "absl/strings/str_cat.h"
9#include "absl/strings/str_format.h"
13#ifdef YAZE_AI_RUNTIME_AVAILABLE
26 const std::string& condition) {
27 auto start = std::chrono::steady_clock::now();
31 if (!screenshot_result.ok()) {
32 return screenshot_result.status();
36 std::string prompt = absl::StrFormat(
37 "Analyze this screenshot and verify the following condition:\n\n"
41 "2. Confidence level (0.0 to 1.0)\n"
42 "3. Brief explanation of what you observe\n"
43 "4. Any discrepancies if FAIL\n\n"
44 "Format your response as:\n"
45 "RESULT: [PASS/FAIL]\n"
46 "CONFIDENCE: [0.0-1.0]\n"
47 "OBSERVATIONS: [what you see]\n"
48 "DISCREPANCIES: [if any]",
53 if (!ai_response.ok()) {
54 return ai_response.status();
60 auto end = std::chrono::steady_clock::now();
61 result.latency = std::chrono::duration_cast<std::chrono::milliseconds>(
68 const std::vector<std::string>& conditions) {
69 if (conditions.empty()) {
70 return absl::InvalidArgumentError(
"No conditions provided");
73 auto start = std::chrono::steady_clock::now();
76 if (!screenshot_result.ok()) {
77 return screenshot_result.status();
81 std::ostringstream prompt;
82 prompt <<
"Analyze this screenshot and verify ALL of the following conditions:\n\n";
83 for (
size_t i = 0; i < conditions.size(); ++i) {
84 prompt << (i + 1) <<
". " << conditions[i] <<
"\n";
86 prompt <<
"\nFor EACH condition, respond with:\n"
88 <<
"- Brief explanation\n\n"
89 <<
"Then provide an OVERALL result (PASS only if ALL conditions pass).\n"
91 <<
"CONDITION 1: [PASS/FAIL] - [explanation]\n"
93 <<
"OVERALL: [PASS/FAIL]\n"
94 <<
"CONFIDENCE: [0.0-1.0]";
97 if (!ai_response.ok()) {
98 return ai_response.status();
103 auto end = std::chrono::steady_clock::now();
104 result.latency = std::chrono::duration_cast<std::chrono::milliseconds>(
111 const std::string& reference_path,
float tolerance) {
112 auto start = std::chrono::steady_clock::now();
115 if (!screenshot_result.ok()) {
116 return screenshot_result.status();
120 std::string prompt = absl::StrFormat(
121 "Compare this screenshot to the reference image.\n"
122 "Tolerance level: %.0f%% (lower = stricter)\n\n"
123 "Describe any visual differences you observe.\n"
124 "Consider: layout, colors, text, UI elements, game state.\n\n"
127 "SIMILARITY: [0.0-1.0]\n"
128 "DIFFERENCES: [list any differences found]",
132 if (!ai_response.ok()) {
133 return ai_response.status();
138 auto end = std::chrono::steady_clock::now();
139 result.latency = std::chrono::duration_cast<std::chrono::milliseconds>(
146 const std::string& question) {
148 if (!screenshot_result.ok()) {
149 return screenshot_result.status();
152 std::string prompt = absl::StrFormat(
153 "Based on this screenshot of the yaze ROM editor, please answer:\n\n%s",
160 int x,
int y,
int expected_tile_id) {
161 std::string condition = absl::StrFormat(
162 "The tile at canvas position (%d, %d) should be tile ID 0x%04X",
163 x, y, expected_tile_id);
168 const std::string& panel_name) {
169 std::string condition = absl::StrFormat(
170 "The '%s' panel/window should be visible and not obscured",
176 const std::string& state_description) {
177 std::string condition = absl::StrFormat(
178 "In the emulator view, verify: %s", state_description);
183 int x,
int y,
const std::string& sprite_description) {
184 std::string condition = absl::StrFormat(
185 "At position (%d, %d), there should be a sprite matching: %s",
186 x, y, sprite_description);
191 const std::string& name) {
193 return absl::FailedPreconditionError(
"Screenshot callback not set");
198 return result.status();
206 LOG_DEBUG(
"AIVisionVerifier",
"Screenshot captured: %s (%dx%d)",
228 return absl::FailedPreconditionError(
"Not in iterative session");
232 return absl::ResourceExhaustedError(
"Max iterations reached");
238 auto result =
Verify(condition);
243 return absl::OkStatus();
250 return absl::NotFoundError(
"No results in iterative session");
256 float total_confidence = 0.0f;
259 if (!result.passed) {
262 total_confidence += result.confidence;
264 result.observations.begin(),
265 result.observations.end());
267 result.discrepancies.begin(),
268 result.discrepancies.end());
278 return absl::FailedPreconditionError(
"Screenshot callback not set");
283 return result.status();
289 return "base64_encoded_screenshot_placeholder";
293 const std::string& prompt,
const std::string& image_base64) {
294 LOG_DEBUG(
"AIVisionVerifier",
"Calling vision model: %s",
297#ifdef YAZE_AI_RUNTIME_AVAILABLE
301 std::string temp_image_path =
309 std::ofstream temp_file(temp_image_path, std::ios::binary);
312 temp_file.write(
reinterpret_cast<const char*
>(&
last_width_),
314 temp_file.write(
reinterpret_cast<const char*
>(&
last_height_),
324 auto* gemini_service =
326 if (gemini_service) {
330 return response->text_response;
332 LOG_DEBUG(
"AIVisionVerifier",
"Gemini multimodal failed: %s",
333 response.status().message().data());
339 return response->text_response;
341 return response.status();
346 LOG_DEBUG(
"AIVisionVerifier",
"No AI service configured, using placeholder");
347 return absl::StrFormat(
350 "OBSERVATIONS: Placeholder response - no AI service configured. "
351 "Set AI service with SetAIService() for real vision verification.\n"
352 "DISCREPANCIES: None");
356 const std::string& response,
const std::string& screenshot_path) {
362 if (response.find(
"RESULT: PASS") != std::string::npos ||
363 response.find(
"PASS") != std::string::npos) {
368 auto conf_pos = response.find(
"CONFIDENCE:");
369 if (conf_pos != std::string::npos) {
370 std::string conf_str = response.substr(conf_pos + 11, 4);
381 auto obs_pos = response.find(
"OBSERVATIONS:");
382 if (obs_pos != std::string::npos) {
383 auto end_pos = response.find(
'\n', obs_pos);
384 if (end_pos == std::string::npos) end_pos = response.length();
386 response.substr(obs_pos + 13, end_pos - obs_pos - 13));
390 auto disc_pos = response.find(
"DISCREPANCIES:");
391 if (disc_pos != std::string::npos) {
392 auto end_pos = response.find(
'\n', disc_pos);
393 if (end_pos == std::string::npos) end_pos = response.length();
394 std::string disc = response.substr(disc_pos + 14, end_pos - disc_pos - 14);
395 if (disc !=
"None" && !disc.empty()) {
virtual absl::StatusOr< AgentResponse > GenerateResponse(const std::string &prompt)=0
absl::StatusOr< AgentResponse > GenerateMultimodalResponse(const std::string &, const std::string &)
absl::Status AddIterativeCheck(const std::string &condition)
Add a verification to the iterative session.
absl::StatusOr< std::string > CallVisionModel(const std::string &prompt, const std::string &image_base64)
std::vector< VisionVerificationResult > iterative_results_
void BeginIterativeSession(int max_iterations=5)
Begin an iterative verification session.
AIVisionVerifier(const VisionVerifierConfig &config={})
absl::StatusOr< VisionVerificationResult > VerifySpriteAt(int x, int y, const std::string &sprite_description)
Verify sprite rendering at specific location.
absl::StatusOr< std::string > AskAboutState(const std::string &question)
Ask the AI an open-ended question about the current state.
VisionVerificationResult ParseAIResponse(const std::string &response, const std::string &screenshot_path)
absl::StatusOr< VisionVerificationResult > VerifyPanelVisible(const std::string &panel_name)
Verify that a specific editor panel is visible.
std::vector< std::string > iterative_conditions_
absl::StatusOr< VisionVerificationResult > VerifyConditions(const std::vector< std::string > &conditions)
Verify multiple conditions in a single screenshot.
absl::StatusOr< VisionVerificationResult > VerifyTileAt(int x, int y, int expected_tile_id)
Verify tile at canvas position matches expected tile ID.
VisionVerifierConfig config_
absl::StatusOr< VisionVerificationResult > CompareToReference(const std::string &reference_path, float tolerance=0.1f)
Compare current state against a reference screenshot.
int iterative_max_iterations_
absl::StatusOr< VisionVerificationResult > VerifyEmulatorState(const std::string &state_description)
Verify game state in emulator matches expected values.
ScreenshotCaptureCallback screenshot_callback_
absl::StatusOr< std::string > CaptureAndEncodeScreenshot()
absl::StatusOr< std::string > CaptureScreenshot(const std::string &name)
Capture and save a screenshot.
int iterative_current_iteration_
std::vector< uint8_t > last_screenshot_data_
bool in_iterative_session_
absl::StatusOr< VisionVerificationResult > CompleteIterativeSession()
Complete the iterative session and get results.
cli::AIService * ai_service_
void ClearScreenshotCache()
Clear cached screenshots to free memory.
absl::StatusOr< VisionVerificationResult > Verify(const std::string &condition)
Verify a single condition using AI vision.
#define LOG_DEBUG(category, format,...)
Result of an AI vision verification check.
std::string screenshot_path
std::vector< std::string > observations
std::vector< std::string > discrepancies
Configuration for vision verification.
std::string screenshot_dir