yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
ai_gui_controller.h
Go to the documentation of this file.
1#ifndef YAZE_CLI_SERVICE_AI_AI_GUI_CONTROLLER_H_
2#define YAZE_CLI_SERVICE_AI_AI_GUI_CONTROLLER_H_
3
4#include <filesystem>
5#include <memory>
6#include <string>
7#include <vector>
8
9#include "absl/status/status.h"
10#include "absl/status/statusor.h"
14
15namespace yaze {
16namespace cli {
17
18// Forward declares
19class GeminiAIService;
20class GuiAutomationClient; // In cli namespace, not cli::gui
21
22namespace ai {
23
29 int max_iterations = 10; // Max attempts before giving up
30 int screenshot_delay_ms = 500; // Delay before taking screenshots
31 bool enable_vision_verification = true; // Use vision to verify actions
32 bool enable_iterative_refinement = true; // Retry with refined actions
33 int max_retries_per_action = 3; // Max retries for a single action
34 std::string screenshots_dir = "/tmp/yaze/ai_gui_control";
35};
36
42 bool success = false;
44 std::vector<ai::AIAction> actions_executed;
45 std::vector<VisionAnalysisResult> vision_analyses;
46 std::vector<std::filesystem::path> screenshots_taken;
47 std::string error_message;
49};
50
81 public:
87 AIGUIController(GeminiAIService* gemini_service,
88 GuiAutomationClient* gui_client);
89
90 ~AIGUIController() = default;
91
95 absl::Status Initialize(const ControlLoopConfig& config);
96
102 absl::StatusOr<ControlResult> ExecuteCommand(const std::string& command);
103
109 absl::StatusOr<ControlResult> ExecuteActions(
110 const std::vector<ai::AIAction>& actions);
111
118 absl::StatusOr<VisionAnalysisResult> ExecuteSingleAction(
119 const AIAction& action,
120 bool verify_with_vision = true);
121
127 absl::StatusOr<VisionAnalysisResult> AnalyzeCurrentGUIState(
128 const std::string& context = "");
129
133 const ControlLoopConfig& config() const { return config_; }
134
139
140 private:
143 std::unique_ptr<VisionActionRefiner> vision_refiner_;
146 std::filesystem::path screenshots_dir_;
147
148 // Helper methods
149 absl::StatusOr<std::filesystem::path> CaptureCurrentState(
150 const std::string& description);
151
152 absl::Status ExecuteGRPCAction(const AIAction& action);
153
154 absl::StatusOr<VisionAnalysisResult> VerifyActionSuccess(
155 const AIAction& action,
156 const std::filesystem::path& before_screenshot,
157 const std::filesystem::path& after_screenshot);
158
159 absl::StatusOr<AIAction> RefineActionWithVision(
160 const AIAction& original_action,
161 const VisionAnalysisResult& analysis);
162
164 std::filesystem::path GenerateScreenshotPath(const std::string& suffix);
165};
166
167} // namespace ai
168} // namespace cli
169} // namespace yaze
170
171#endif // YAZE_CLI_SERVICE_AI_AI_GUI_CONTROLLER_H_
Client for automating YAZE GUI through gRPC.
High-level controller for AI-driven GUI automation with vision feedback.
std::unique_ptr< VisionActionRefiner > vision_refiner_
absl::StatusOr< AIAction > RefineActionWithVision(const AIAction &original_action, const VisionAnalysisResult &analysis)
absl::Status ExecuteGRPCAction(const AIAction &action)
absl::Status Initialize(const ControlLoopConfig &config)
Initialize the controller with configuration.
std::filesystem::path GenerateScreenshotPath(const std::string &suffix)
std::filesystem::path screenshots_dir_
absl::StatusOr< std::filesystem::path > CaptureCurrentState(const std::string &description)
absl::StatusOr< ControlResult > ExecuteActions(const std::vector< ai::AIAction > &actions)
Execute a sequence of pre-parsed actions.
absl::StatusOr< ControlResult > ExecuteCommand(const std::string &command)
Execute a natural language command with AI vision guidance.
gui::GuiActionGenerator action_generator_
absl::StatusOr< VisionAnalysisResult > ExecuteSingleAction(const AIAction &action, bool verify_with_vision=true)
Execute a single action with optional vision verification.
GuiAutomationClient * gui_client_
const ControlLoopConfig & config() const
Get the current configuration.
absl::StatusOr< VisionAnalysisResult > AnalyzeCurrentGUIState(const std::string &context="")
Analyze the current GUI state without executing actions.
void SetConfig(const ControlLoopConfig &config)
Update configuration.
absl::StatusOr< VisionAnalysisResult > VerifyActionSuccess(const AIAction &action, const std::filesystem::path &before_screenshot, const std::filesystem::path &after_screenshot)
Converts high-level AI actions into executable GUI test scripts.
Main namespace for the application.
Definition controller.cc:20
Represents a single action to be performed in the GUI.
Configuration for the AI GUI control loop.
Result of AI-controlled GUI automation.
std::vector< std::filesystem::path > screenshots_taken
std::vector< ai::AIAction > actions_executed
std::vector< VisionAnalysisResult > vision_analyses
Result of analyzing a screenshot with Gemini Vision.