yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
ai_gui_controller.h
Go to the documentation of this file.
1#ifndef YAZE_CLI_SERVICE_AI_AI_GUI_CONTROLLER_H_
2#define YAZE_CLI_SERVICE_AI_AI_GUI_CONTROLLER_H_
3
4#include <filesystem>
5#include <memory>
6#include <string>
7#include <vector>
8
9#include "absl/status/status.h"
10#include "absl/status/statusor.h"
14
15namespace yaze {
16namespace cli {
17
18// Forward declares
19class GeminiAIService;
20class GuiAutomationClient; // In cli namespace, not cli::gui
21
22namespace ai {
23
29 int max_iterations = 10; // Max attempts before giving up
30 int screenshot_delay_ms = 500; // Delay before taking screenshots
31 bool enable_vision_verification = true; // Use vision to verify actions
32 bool enable_iterative_refinement = true; // Retry with refined actions
33 int max_retries_per_action = 3; // Max retries for a single action
34 std::string screenshots_dir = "/tmp/yaze/ai_gui_control";
35};
36
42 bool success = false;
44 std::vector<ai::AIAction> actions_executed;
45 std::vector<VisionAnalysisResult> vision_analyses;
46 std::vector<std::filesystem::path> screenshots_taken;
47 std::string error_message;
49};
50
82 public:
88 AIGUIController(GeminiAIService* gemini_service,
89 GuiAutomationClient* gui_client);
90
91 ~AIGUIController() = default;
92
96 absl::Status Initialize(const ControlLoopConfig& config);
97
103 absl::StatusOr<ControlResult> ExecuteCommand(const std::string& command);
104
110 absl::StatusOr<ControlResult> ExecuteActions(
111 const std::vector<ai::AIAction>& actions);
112
119 absl::StatusOr<VisionAnalysisResult> ExecuteSingleAction(
120 const AIAction& action, bool verify_with_vision = true);
121
127 absl::StatusOr<VisionAnalysisResult> AnalyzeCurrentGUIState(
128 const std::string& context = "");
129
133 const ControlLoopConfig& config() const { return config_; }
134
139
140 private:
143 std::unique_ptr<VisionActionRefiner> vision_refiner_;
146 std::filesystem::path screenshots_dir_;
147
148 // Helper methods
149 absl::StatusOr<std::filesystem::path> CaptureCurrentState(
150 const std::string& description);
151
152 absl::Status ExecuteGRPCAction(const AIAction& action);
153
154 absl::StatusOr<VisionAnalysisResult> VerifyActionSuccess(
155 const AIAction& action, const std::filesystem::path& before_screenshot,
156 const std::filesystem::path& after_screenshot);
157
158 absl::StatusOr<AIAction> RefineActionWithVision(
159 const AIAction& original_action, const VisionAnalysisResult& analysis);
160
162 std::filesystem::path GenerateScreenshotPath(const std::string& suffix);
163};
164
165} // namespace ai
166} // namespace cli
167} // namespace yaze
168
169#endif // YAZE_CLI_SERVICE_AI_AI_GUI_CONTROLLER_H_
Client for automating YAZE GUI through gRPC.
High-level controller for AI-driven GUI automation with vision feedback.
std::unique_ptr< VisionActionRefiner > vision_refiner_
absl::StatusOr< AIAction > RefineActionWithVision(const AIAction &original_action, const VisionAnalysisResult &analysis)
absl::Status ExecuteGRPCAction(const AIAction &action)
absl::Status Initialize(const ControlLoopConfig &config)
Initialize the controller with configuration.
std::filesystem::path GenerateScreenshotPath(const std::string &suffix)
std::filesystem::path screenshots_dir_
absl::StatusOr< std::filesystem::path > CaptureCurrentState(const std::string &description)
AIGUIController(GeminiAIService *gemini_service, GuiAutomationClient *gui_client)
Construct controller with required services.
absl::StatusOr< ControlResult > ExecuteActions(const std::vector< ai::AIAction > &actions)
Execute a sequence of pre-parsed actions.
absl::StatusOr< ControlResult > ExecuteCommand(const std::string &command)
Execute a natural language command with AI vision guidance.
gui::GuiActionGenerator action_generator_
absl::StatusOr< VisionAnalysisResult > ExecuteSingleAction(const AIAction &action, bool verify_with_vision=true)
Execute a single action with optional vision verification.
GuiAutomationClient * gui_client_
const ControlLoopConfig & config() const
Get the current configuration.
absl::StatusOr< VisionAnalysisResult > AnalyzeCurrentGUIState(const std::string &context="")
Analyze the current GUI state without executing actions.
void SetConfig(const ControlLoopConfig &config)
Update configuration.
absl::StatusOr< VisionAnalysisResult > VerifyActionSuccess(const AIAction &action, const std::filesystem::path &before_screenshot, const std::filesystem::path &after_screenshot)
Converts high-level AI actions into executable GUI test scripts.
Represents a single action to be performed in the GUI.
Configuration for the AI GUI control loop.
Result of AI-controlled GUI automation.
std::vector< std::filesystem::path > screenshots_taken
std::vector< ai::AIAction > actions_executed
std::vector< VisionAnalysisResult > vision_analyses
Result of analyzing a screenshot with Gemini Vision.