6#include "absl/strings/str_cat.h"
7#include "absl/strings/str_format.h"
8#include "absl/time/clock.h"
9#include "absl/time/time.h"
23 : gemini_service_(gemini_service),
24 gui_client_(gui_client),
27 throw std::invalid_argument(
"Gemini service cannot be null");
31 throw std::invalid_argument(
"GUI client cannot be null");
41 return absl::OkStatus();
45 const std::string& command) {
48 if (!actions_result.ok()) {
49 return actions_result.status();
56 const std::vector<AIAction>& actions) {
60 for (
const auto& action : actions) {
62 bool action_succeeded =
false;
77 if (!execute_result.ok()) {
78 result.
error_message = std::string(execute_result.status().message());
85 if (execute_result->action_successful) {
86 action_succeeded =
true;
92 if (!refinement.ok()) {
93 result.
error_message = absl::StrCat(
"Failed to refine action: ",
94 refinement.status().message());
98 if (refinement->needs_different_approach) {
100 "Action requires different approach: ", refinement->reasoning);
104 if (refinement->needs_retry) {
106 for (
const auto& [key, value] : refinement->adjusted_parameters) {
119 if (!action_succeeded) {
121 absl::StrFormat(
"Action failed after %d retries", retry_count);
130 if (final_screenshot.ok()) {
135 *final_screenshot,
"Verify all actions completed successfully");
137 if (final_analysis.ok()) {
146 const AIAction& action,
bool verify_with_vision) {
150 std::filesystem::path before_screenshot;
151 if (verify_with_vision) {
153 if (!before_result.ok()) {
154 return before_result.status();
156 before_screenshot = *before_result;
161 std::this_thread::sleep_for(
167 if (!execute_status.ok()) {
169 result.
error_message = std::string(execute_status.message());
174 std::this_thread::sleep_for(
177 if (verify_with_vision) {
180 if (!after_result.ok()) {
181 return after_result.status();
189 result.
description =
"Action executed (no vision verification)";
195 const std::string& context) {
197 if (!screenshot.ok()) {
198 return screenshot.status();
207 const std::string& description) {
211 auto result = yaze::test::CaptureHarnessScreenshot(path.string());
213 return result.status();
216 return std::filesystem::path(result->file_path);
218 return absl::UnimplementedError(
"Screenshot capture requires gRPC support");
226 if (!test_script_result.ok()) {
227 return test_script_result.status();
232 return absl::FailedPreconditionError(
233 "GUI automation client not initialized");
239 std::string target =
"button:Unknown";
246 if (action.
parameters.count(
"click_type") > 0) {
247 const std::string& type = action.
parameters.at(
"click_type");
248 if (type ==
"right") {
250 }
else if (type ==
"middle") {
252 }
else if (type ==
"double") {
259 return result.status();
262 if (!result->success) {
263 return absl::InternalError(
264 absl::StrCat(
"Click action failed: ", result->message));
267 return absl::OkStatus();
271 std::string target =
"input:Unknown";
272 std::string text =
"";
273 bool clear_first =
true;
281 if (action.
parameters.count(
"clear_first") > 0) {
282 clear_first = (action.
parameters.at(
"clear_first") ==
"true");
287 return result.status();
290 if (!result->success) {
291 return absl::InternalError(
292 absl::StrCat(
"Type action failed: ", result->message));
295 return absl::OkStatus();
298 std::string condition =
"visible";
299 int timeout_ms = 5000;
300 int poll_interval_ms = 100;
302 if (action.
parameters.count(
"condition") > 0) {
303 condition = action.
parameters.at(
"condition");
305 if (action.
parameters.count(
"timeout_ms") > 0) {
306 timeout_ms = std::stoi(action.
parameters.at(
"timeout_ms"));
308 if (action.
parameters.count(
"poll_interval_ms") > 0) {
309 poll_interval_ms = std::stoi(action.
parameters.at(
"poll_interval_ms"));
312 auto result =
gui_client_->
Wait(condition, timeout_ms, poll_interval_ms);
314 return result.status();
317 if (!result->success) {
318 return absl::InternalError(
319 absl::StrCat(
"Wait action failed: ", result->message));
322 return absl::OkStatus();
325 std::string condition =
"";
326 if (action.
parameters.count(
"condition") > 0) {
327 condition = action.
parameters.at(
"condition");
332 return result.status();
335 if (!result->success) {
336 return absl::InternalError(absl::StrCat(
337 "Assert action failed: ", result->message,
" (expected: ",
338 result->expected_value,
", actual: ", result->actual_value,
")"));
341 return absl::OkStatus();
349 return absl::InvalidArgumentError(
350 "set_tile action requires map_id, x, y, and tile parameters");
353 int map_id = std::stoi(action.
parameters.at(
"map_id"));
356 std::string tile_str = action.
parameters.at(
"tile");
360 if (!click_result.ok() || !click_result->success) {
361 return absl::InternalError(
"Failed to open Overworld editor");
365 auto wait_result =
gui_client_->
Wait(
"window:Overworld Editor", 2000, 100);
366 if (!wait_result.ok() || !wait_result->success) {
367 return absl::InternalError(
"Overworld editor did not appear");
373 if (!type_result.ok() || !type_result->success) {
374 return absl::InternalError(
"Failed to set map ID");
379 std::string target = absl::StrCat(
"canvas:overworld@", x * 16,
",", y * 16);
381 if (!click_result.ok() || !click_result->success) {
382 return absl::InternalError(
"Failed to click tile position");
385 return absl::OkStatus();
387 return absl::UnimplementedError(absl::StrCat(
388 "Action type not implemented: ",
static_cast<int>(action.
type)));
391 return absl::UnimplementedError(
392 "gRPC GUI automation requires building with -DYAZE_WITH_GRPC=ON");
397 const AIAction& action,
const std::filesystem::path& before_screenshot,
398 const std::filesystem::path& after_screenshot) {
405 auto refinement =
vision_refiner_->RefineAction(original_action, analysis);
406 if (!refinement.ok()) {
407 return refinement.status();
410 AIAction refined_action = original_action;
413 for (
const auto& [key, value] : refinement->adjusted_parameters) {
417 return refined_action;
425 std::cerr <<
"Warning: Failed to create screenshots directory: "
426 << ec.message() << std::endl;
431 const std::string& suffix) {
432 int64_t timestamp = absl::ToUnixMillis(absl::Now());
434 std::string filename = absl::StrFormat(
"ai_gui_%s_%lld.png", suffix,
435 static_cast<long long>(timestamp));
Client for automating YAZE GUI through gRPC.
absl::StatusOr< AutomationResult > Type(const std::string &target, const std::string &text, bool clear_first=false)
Type text into an input field.
absl::StatusOr< AutomationResult > Wait(const std::string &condition, int timeout_ms=5000, int poll_interval_ms=100)
Wait for a condition to be met.
absl::StatusOr< AutomationResult > Assert(const std::string &condition)
Assert a GUI state condition.
absl::StatusOr< AutomationResult > Click(const std::string &target, ClickType type=ClickType::kLeft)
Click a GUI element.
static absl::StatusOr< std::vector< AIAction > > ParseCommand(const std::string &command)
std::unique_ptr< VisionActionRefiner > vision_refiner_
absl::StatusOr< AIAction > RefineActionWithVision(const AIAction &original_action, const VisionAnalysisResult &analysis)
absl::Status ExecuteGRPCAction(const AIAction &action)
GeminiAIService * gemini_service_
absl::Status Initialize(const ControlLoopConfig &config)
Initialize the controller with configuration.
std::filesystem::path GenerateScreenshotPath(const std::string &suffix)
std::filesystem::path screenshots_dir_
absl::StatusOr< std::filesystem::path > CaptureCurrentState(const std::string &description)
AIGUIController(GeminiAIService *gemini_service, GuiAutomationClient *gui_client)
Construct controller with required services.
absl::StatusOr< ControlResult > ExecuteActions(const std::vector< ai::AIAction > &actions)
Execute a sequence of pre-parsed actions.
absl::StatusOr< ControlResult > ExecuteCommand(const std::string &command)
Execute a natural language command with AI vision guidance.
gui::GuiActionGenerator action_generator_
absl::StatusOr< VisionAnalysisResult > ExecuteSingleAction(const AIAction &action, bool verify_with_vision=true)
Execute a single action with optional vision verification.
GuiAutomationClient * gui_client_
const ControlLoopConfig & config() const
Get the current configuration.
ControlLoopConfig config_
absl::StatusOr< VisionAnalysisResult > AnalyzeCurrentGUIState(const std::string &context="")
Analyze the current GUI state without executing actions.
void EnsureScreenshotsDirectory()
absl::StatusOr< VisionAnalysisResult > VerifyActionSuccess(const AIAction &action, const std::filesystem::path &before_screenshot, const std::filesystem::path &after_screenshot)
Uses Gemini Vision to analyze GUI screenshots and refine AI actions.
absl::StatusOr< std::string > GenerateTestScript(const std::vector< ai::AIAction > &actions)
ClickType
Type of click action to perform.
Represents a single action to be performed in the GUI.
std::map< std::string, std::string > parameters
Configuration for the AI GUI control loop.
std::string screenshots_dir
bool enable_vision_verification
bool enable_iterative_refinement
int max_retries_per_action
Result of AI-controlled GUI automation.
std::vector< std::filesystem::path > screenshots_taken
std::vector< ai::AIAction > actions_executed
std::vector< VisionAnalysisResult > vision_analyses
std::string error_message
std::string final_state_description
Result of analyzing a screenshot with Gemini Vision.
std::string error_message