1#ifndef YAZE_CLI_SERVICE_AI_VISION_ACTION_REFINER_H_
2#define YAZE_CLI_SERVICE_AI_VISION_ACTION_REFINER_H_
8#include "absl/status/statusor.h"
93 const std::filesystem::path& screenshot_path,
94 const std::string& context =
"");
105 const std::filesystem::path& before_screenshot,
106 const std::filesystem::path& after_screenshot);
125 const std::filesystem::path& screenshot_path,
126 const std::string& element_name);
134 const std::filesystem::path& screenshot_path);
148 const std::string& response,
const AIAction& action);
Uses Gemini Vision to analyze GUI screenshots and refine AI actions.
std::string BuildWidgetExtractionPrompt()
VisionAnalysisResult ParseAnalysisResponse(const std::string &response)
absl::StatusOr< VisionAnalysisResult > VerifyAction(const AIAction &action, const std::filesystem::path &before_screenshot, const std::filesystem::path &after_screenshot)
Verify an action was successful by comparing before/after screenshots.
GeminiAIService * gemini_service_
std::string BuildVerificationPrompt(const AIAction &action)
absl::StatusOr< std::vector< std::string > > ExtractVisibleWidgets(const std::filesystem::path &screenshot_path)
Extract all visible widgets from a screenshot.
absl::StatusOr< ActionRefinement > RefineAction(const AIAction &original_action, const VisionAnalysisResult &analysis)
Refine an action based on vision analysis feedback.
absl::StatusOr< std::map< std::string, std::string > > LocateUIElement(const std::filesystem::path &screenshot_path, const std::string &element_name)
Find a specific UI element in a screenshot.
std::string BuildAnalysisPrompt(const std::string &context)
std::string BuildElementLocationPrompt(const std::string &element_name)
absl::StatusOr< VisionAnalysisResult > AnalyzeScreenshot(const std::filesystem::path &screenshot_path, const std::string &context="")
Analyze the current GUI state from a screenshot.
VisionAnalysisResult ParseVerificationResponse(const std::string &response, const AIAction &action)
Main namespace for the application.
Represents a single action to be performed in the GUI.
Refined action parameters based on vision analysis.
std::map< std::string, std::string > adjusted_parameters
bool needs_different_approach
Result of analyzing a screenshot with Gemini Vision.
std::vector< std::string > widgets
std::vector< std::string > suggestions
std::string error_message