yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
ai_vision_verifier.h
Go to the documentation of this file.
1#ifndef YAZE_APP_TEST_AI_VISION_VERIFIER_H
2#define YAZE_APP_TEST_AI_VISION_VERIFIER_H
3
4#include <chrono>
5#include <functional>
6#include <memory>
7#include <string>
8#include <vector>
9
10#include "absl/status/status.h"
11#include "absl/status/statusor.h"
12
13// Forward declare AI service types (avoid circular dependency)
14namespace yaze {
15namespace cli {
16class AIService;
17}
18} // namespace yaze
19
20namespace yaze {
21namespace test {
22
29 bool passed = false;
30 float confidence = 0.0f; // 0.0 to 1.0
31 std::string ai_response;
32 std::string screenshot_path;
33 std::chrono::milliseconds latency{0};
34 std::string error_message;
35
36 // Detailed findings from the AI
37 std::vector<std::string> observations;
38 std::vector<std::string> discrepancies;
39};
40
45 // AI model settings
46 std::string model_provider = "gemini"; // "gemini", "ollama", "openai"
47 std::string model_name = "gemini-1.5-flash";
48 float temperature = 0.1f; // Low temperature for consistent verification
49
50 // Screenshot settings
51 std::string screenshot_dir = "/tmp/yaze_test_screenshots";
54
55 // Verification settings
57 int max_retries = 2;
58 std::chrono::seconds timeout{30};
59
60 // Prompt templates
61 std::string system_prompt =
62 "You are a visual verification assistant for a SNES ROM editor called "
63 "yaze. "
64 "Your task is to analyze screenshots and verify that UI elements, game "
65 "states, "
66 "and visual properties match expected conditions. Be precise and "
67 "objective.";
68};
69
76 std::function<absl::StatusOr<std::vector<uint8_t>>(int* width, int* height)>;
77
103class AIVisionVerifier {
104 public:
105 explicit AIVisionVerifier(const VisionVerifierConfig& config = {});
108 // Configuration
109 void SetConfig(const VisionVerifierConfig& config) { config_ = config; }
110 const VisionVerifierConfig& GetConfig() const { return config_; }
112 // Screenshot capture setup
114 screenshot_callback_ = std::move(callback);
115 }
116
125 void SetAIService(cli::AIService* service) { ai_service_ = service; }
126
127 // --- Core Verification Methods ---
128
134 absl::StatusOr<VisionVerificationResult> Verify(const std::string& condition);
135
141 absl::StatusOr<VisionVerificationResult> VerifyConditions(
142 const std::vector<std::string>& conditions);
143
150 absl::StatusOr<VisionVerificationResult> CompareToReference(
151 const std::string& reference_path, float tolerance = 0.1f);
152
158 absl::StatusOr<std::string> AskAboutState(const std::string& question);
159
160 // --- Specialized Verifications for yaze ---
161
165 absl::StatusOr<VisionVerificationResult> VerifyTileAt(int x, int y,
166 int expected_tile_id);
167
171 absl::StatusOr<VisionVerificationResult> VerifyPanelVisible(
172 const std::string& panel_name);
173
177 absl::StatusOr<VisionVerificationResult> VerifyEmulatorState(
178 const std::string& state_description);
179
183 absl::StatusOr<VisionVerificationResult> VerifySpriteAt(
184 int x, int y, const std::string& sprite_description);
185
186 // --- Screenshot Management ---
187
193 absl::StatusOr<std::string> CaptureScreenshot(const std::string& name);
194
198 const std::vector<uint8_t>& GetLastScreenshotData() const {
200 }
201
206
207 // --- Iterative Refinement ---
208
215 void BeginIterativeSession(int max_iterations = 5);
216
220 absl::Status AddIterativeCheck(const std::string& condition);
221
225 absl::StatusOr<VisionVerificationResult> CompleteIterativeSession();
226
227 private:
228 // Internal helpers
229 absl::StatusOr<std::string> CaptureAndEncodeScreenshot();
230 absl::StatusOr<std::string> CallVisionModel(const std::string& prompt,
231 const std::string& image_base64);
232 VisionVerificationResult ParseAIResponse(const std::string& response,
233 const std::string& screenshot_path);
237 cli::AIService* ai_service_ = nullptr; // Optional AI service for real API calls
238 std::vector<uint8_t> last_screenshot_data_;
239 int last_width_ = 0;
240 int last_height_ = 0;
242 // Iterative session state
246 std::vector<std::string> iterative_conditions_;
247 std::vector<VisionVerificationResult> iterative_results_;
248};
249
254 public:
256 int max_iterations = 5)
257 : verifier_(verifier) {
258 verifier_.BeginIterativeSession(max_iterations);
264 }
266
267 absl::Status Check(const std::string& condition) {
268 return verifier_.AddIterativeCheck(condition);
270
271 absl::StatusOr<VisionVerificationResult> Complete() {
272 completed_ = true;
274 }
276 private:
278 bool completed_ = false;
279};
280
281} // namespace test
282} // namespace yaze
283
284#endif // YAZE_APP_TEST_AI_VISION_VERIFIER_H
AI-powered visual verification for GUI testing.
absl::Status AddIterativeCheck(const std::string &condition)
Add a verification to the iterative session.
absl::StatusOr< std::string > CallVisionModel(const std::string &prompt, const std::string &image_base64)
std::vector< VisionVerificationResult > iterative_results_
void BeginIterativeSession(int max_iterations=5)
Begin an iterative verification session.
AIVisionVerifier(const VisionVerifierConfig &config={})
void SetAIService(cli::AIService *service)
Set the AI service to use for vision verification.
absl::StatusOr< VisionVerificationResult > VerifySpriteAt(int x, int y, const std::string &sprite_description)
Verify sprite rendering at specific location.
absl::StatusOr< std::string > AskAboutState(const std::string &question)
Ask the AI an open-ended question about the current state.
VisionVerificationResult ParseAIResponse(const std::string &response, const std::string &screenshot_path)
absl::StatusOr< VisionVerificationResult > VerifyPanelVisible(const std::string &panel_name)
Verify that a specific editor panel is visible.
void SetConfig(const VisionVerifierConfig &config)
const std::vector< uint8_t > & GetLastScreenshotData() const
Get the last captured screenshot data.
std::vector< std::string > iterative_conditions_
absl::StatusOr< VisionVerificationResult > VerifyConditions(const std::vector< std::string > &conditions)
Verify multiple conditions in a single screenshot.
absl::StatusOr< VisionVerificationResult > VerifyTileAt(int x, int y, int expected_tile_id)
Verify tile at canvas position matches expected tile ID.
absl::StatusOr< VisionVerificationResult > CompareToReference(const std::string &reference_path, float tolerance=0.1f)
Compare current state against a reference screenshot.
absl::StatusOr< VisionVerificationResult > VerifyEmulatorState(const std::string &state_description)
Verify game state in emulator matches expected values.
ScreenshotCaptureCallback screenshot_callback_
absl::StatusOr< std::string > CaptureAndEncodeScreenshot()
absl::StatusOr< std::string > CaptureScreenshot(const std::string &name)
Capture and save a screenshot.
std::vector< uint8_t > last_screenshot_data_
const VisionVerifierConfig & GetConfig() const
absl::StatusOr< VisionVerificationResult > CompleteIterativeSession()
Complete the iterative session and get results.
void SetScreenshotCallback(ScreenshotCaptureCallback callback)
void ClearScreenshotCache()
Clear cached screenshots to free memory.
absl::StatusOr< VisionVerificationResult > Verify(const std::string &condition)
Verify a single condition using AI vision.
RAII helper for iterative verification sessions.
absl::Status Check(const std::string &condition)
ScopedIterativeVerification(AIVisionVerifier &verifier, int max_iterations=5)
absl::StatusOr< VisionVerificationResult > Complete()
std::function< absl::StatusOr< std::vector< uint8_t > >(int *width, int *height)> ScreenshotCaptureCallback
Callback for custom screenshot capture.
Result of an AI vision verification check.
std::vector< std::string > observations
std::vector< std::string > discrepancies
Configuration for vision verification.