8#include "absl/strings/str_cat.h"
9#include "absl/strings/str_format.h"
13#ifdef YAZE_AI_RUNTIME_AVAILABLE
26 const std::string& condition) {
27 auto start = std::chrono::steady_clock::now();
31 if (!screenshot_result.ok()) {
32 return screenshot_result.status();
36 std::string prompt = absl::StrFormat(
37 "Analyze this screenshot and verify the following condition:\n\n"
41 "2. Confidence level (0.0 to 1.0)\n"
42 "3. Brief explanation of what you observe\n"
43 "4. Any discrepancies if FAIL\n\n"
44 "Format your response as:\n"
45 "RESULT: [PASS/FAIL]\n"
46 "CONFIDENCE: [0.0-1.0]\n"
47 "OBSERVATIONS: [what you see]\n"
48 "DISCREPANCIES: [if any]",
53 if (!ai_response.ok()) {
54 return ai_response.status();
60 auto end = std::chrono::steady_clock::now();
62 std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
68 const std::vector<std::string>& conditions) {
69 if (conditions.empty()) {
70 return absl::InvalidArgumentError(
"No conditions provided");
73 auto start = std::chrono::steady_clock::now();
76 if (!screenshot_result.ok()) {
77 return screenshot_result.status();
81 std::ostringstream prompt;
82 prompt <<
"Analyze this screenshot and verify ALL of the following "
84 for (
size_t i = 0; i < conditions.size(); ++i) {
85 prompt << (i + 1) <<
". " << conditions[i] <<
"\n";
88 <<
"\nFor EACH condition, respond with:\n"
90 <<
"- Brief explanation\n\n"
91 <<
"Then provide an OVERALL result (PASS only if ALL conditions pass).\n"
93 <<
"CONDITION 1: [PASS/FAIL] - [explanation]\n"
95 <<
"OVERALL: [PASS/FAIL]\n"
96 <<
"CONFIDENCE: [0.0-1.0]";
99 if (!ai_response.ok()) {
100 return ai_response.status();
105 auto end = std::chrono::steady_clock::now();
107 std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
113 const std::string& reference_path,
float tolerance) {
114 auto start = std::chrono::steady_clock::now();
117 if (!screenshot_result.ok()) {
118 return screenshot_result.status();
122 std::string prompt = absl::StrFormat(
123 "Compare this screenshot to the reference image.\n"
124 "Tolerance level: %.0f%% (lower = stricter)\n\n"
125 "Describe any visual differences you observe.\n"
126 "Consider: layout, colors, text, UI elements, game state.\n\n"
129 "SIMILARITY: [0.0-1.0]\n"
130 "DIFFERENCES: [list any differences found]",
134 if (!ai_response.ok()) {
135 return ai_response.status();
140 auto end = std::chrono::steady_clock::now();
142 std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
148 const std::string& question) {
150 if (!screenshot_result.ok()) {
151 return screenshot_result.status();
154 std::string prompt = absl::StrFormat(
155 "Based on this screenshot of the yaze ROM editor, please answer:\n\n%s",
162 int x,
int y,
int expected_tile_id) {
163 std::string condition = absl::StrFormat(
164 "The tile at canvas position (%d, %d) should be tile ID 0x%04X", x, y,
170 const std::string& panel_name) {
171 std::string condition = absl::StrFormat(
172 "The '%s' panel/window should be visible and not obscured", panel_name);
177 const std::string& state_description) {
178 std::string condition =
179 absl::StrFormat(
"In the emulator view, verify: %s", state_description);
184 int x,
int y,
const std::string& sprite_description) {
185 std::string condition = absl::StrFormat(
186 "At position (%d, %d), there should be a sprite matching: %s", x, y,
192 const std::string& name) {
194 return absl::FailedPreconditionError(
"Screenshot callback not set");
199 return result.status();
207 LOG_DEBUG(
"AIVisionVerifier",
"Screenshot captured: %s (%dx%d)", path.c_str(),
229 return absl::FailedPreconditionError(
"Not in iterative session");
233 return absl::ResourceExhaustedError(
"Max iterations reached");
239 auto result =
Verify(condition);
244 return absl::OkStatus();
247absl::StatusOr<VisionVerificationResult>
252 return absl::NotFoundError(
"No results in iterative session");
258 float total_confidence = 0.0f;
261 if (!result.passed) {
264 total_confidence += result.confidence;
266 result.observations.begin(),
267 result.observations.end());
269 result.discrepancies.begin(),
270 result.discrepancies.end());
280 return absl::FailedPreconditionError(
"Screenshot callback not set");
285 return result.status();
291 return "base64_encoded_screenshot_placeholder";
295 const std::string& prompt,
const std::string& image_base64) {
296 LOG_DEBUG(
"AIVisionVerifier",
"Calling vision model: %s",
299#ifdef YAZE_AI_RUNTIME_AVAILABLE
303 std::string temp_image_path =
311 std::ofstream temp_file(temp_image_path, std::ios::binary);
314 temp_file.write(
reinterpret_cast<const char*
>(&
last_width_),
316 temp_file.write(
reinterpret_cast<const char*
>(&
last_height_),
327 if (gemini_service) {
331 return response->text_response;
333 LOG_DEBUG(
"AIVisionVerifier",
"Gemini multimodal failed: %s",
334 response.status().message().data());
340 return response->text_response;
342 return response.status();
347 LOG_DEBUG(
"AIVisionVerifier",
"No AI service configured, using placeholder");
348 return absl::StrFormat(
351 "OBSERVATIONS: Placeholder response - no AI service configured. "
352 "Set AI service with SetAIService() for real vision verification.\n"
353 "DISCREPANCIES: None");
357 const std::string& response,
const std::string& screenshot_path) {
363 if (response.find(
"RESULT: PASS") != std::string::npos ||
364 response.find(
"PASS") != std::string::npos) {
369 auto conf_pos = response.find(
"CONFIDENCE:");
370 if (conf_pos != std::string::npos) {
371 std::string conf_str = response.substr(conf_pos + 11, 4);
382 auto obs_pos = response.find(
"OBSERVATIONS:");
383 if (obs_pos != std::string::npos) {
384 auto end_pos = response.find(
'\n', obs_pos);
385 if (end_pos == std::string::npos)
386 end_pos = response.length();
388 response.substr(obs_pos + 13, end_pos - obs_pos - 13));
392 auto disc_pos = response.find(
"DISCREPANCIES:");
393 if (disc_pos != std::string::npos) {
394 auto end_pos = response.find(
'\n', disc_pos);
395 if (end_pos == std::string::npos)
396 end_pos = response.length();
397 std::string disc = response.substr(disc_pos + 14, end_pos - disc_pos - 14);
398 if (disc !=
"None" && !disc.empty()) {
virtual absl::StatusOr< AgentResponse > GenerateResponse(const std::string &prompt)=0
absl::StatusOr< AgentResponse > GenerateMultimodalResponse(const std::string &, const std::string &)
absl::Status AddIterativeCheck(const std::string &condition)
Add a verification to the iterative session.
absl::StatusOr< std::string > CallVisionModel(const std::string &prompt, const std::string &image_base64)
std::vector< VisionVerificationResult > iterative_results_
void BeginIterativeSession(int max_iterations=5)
Begin an iterative verification session.
AIVisionVerifier(const VisionVerifierConfig &config={})
absl::StatusOr< VisionVerificationResult > VerifySpriteAt(int x, int y, const std::string &sprite_description)
Verify sprite rendering at specific location.
absl::StatusOr< std::string > AskAboutState(const std::string &question)
Ask the AI an open-ended question about the current state.
VisionVerificationResult ParseAIResponse(const std::string &response, const std::string &screenshot_path)
absl::StatusOr< VisionVerificationResult > VerifyPanelVisible(const std::string &panel_name)
Verify that a specific editor panel is visible.
std::vector< std::string > iterative_conditions_
absl::StatusOr< VisionVerificationResult > VerifyConditions(const std::vector< std::string > &conditions)
Verify multiple conditions in a single screenshot.
absl::StatusOr< VisionVerificationResult > VerifyTileAt(int x, int y, int expected_tile_id)
Verify tile at canvas position matches expected tile ID.
VisionVerifierConfig config_
absl::StatusOr< VisionVerificationResult > CompareToReference(const std::string &reference_path, float tolerance=0.1f)
Compare current state against a reference screenshot.
int iterative_max_iterations_
absl::StatusOr< VisionVerificationResult > VerifyEmulatorState(const std::string &state_description)
Verify game state in emulator matches expected values.
ScreenshotCaptureCallback screenshot_callback_
absl::StatusOr< std::string > CaptureAndEncodeScreenshot()
absl::StatusOr< std::string > CaptureScreenshot(const std::string &name)
Capture and save a screenshot.
int iterative_current_iteration_
std::vector< uint8_t > last_screenshot_data_
bool in_iterative_session_
absl::StatusOr< VisionVerificationResult > CompleteIterativeSession()
Complete the iterative session and get results.
cli::AIService * ai_service_
void ClearScreenshotCache()
Clear cached screenshots to free memory.
absl::StatusOr< VisionVerificationResult > Verify(const std::string &condition)
Verify a single condition using AI vision.
#define LOG_DEBUG(category, format,...)
Result of an AI vision verification check.
std::string screenshot_path
std::vector< std::string > observations
std::vector< std::string > discrepancies
Configuration for vision verification.
std::string screenshot_dir