11#include "absl/strings/str_cat.h"
12#include "absl/strings/str_format.h"
13#include "absl/strings/str_split.h"
14#include "absl/strings/strip.h"
15#include "absl/time/clock.h"
16#include "absl/time/time.h"
21#include <TargetConditionals.h>
24#if defined(__APPLE__) && (TARGET_OS_IPHONE == 1 || TARGET_IPHONE_SIMULATOR == 1)
26#define YAZE_AI_IOS_URLSESSION 1
34#include "nlohmann/json.hpp"
37#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
38#include <openssl/crypto.h>
39#include <openssl/err.h>
40#include <openssl/ssl.h>
43static std::atomic<bool> g_openssl_initialized{
false};
44static std::mutex g_openssl_init_mutex;
46static void InitializeOpenSSL() {
47 std::lock_guard<std::mutex> lock(g_openssl_init_mutex);
48 if (!g_openssl_initialized.exchange(
true)) {
50 OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS,
52 std::cerr <<
"✓ OpenSSL initialized for HTTPS support" << std::endl;
62 : function_calling_enabled_(config.use_function_calling), config_(config) {
63 if (config_.verbose) {
64 std::cerr <<
"[DEBUG] Initializing Gemini service..." << std::endl;
65 std::cerr <<
"[DEBUG] Function calling: "
66 << (function_calling_enabled_ ?
"enabled" :
"disabled")
68 std::cerr <<
"[DEBUG] Prompt version: " << config_.prompt_version
72#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
75 if (config_.verbose) {
76 std::cerr <<
"[DEBUG] OpenSSL initialized for HTTPS" << std::endl;
81 std::string catalogue_path = config_.prompt_version ==
"v2"
82 ?
"assets/agent/prompt_catalogue_v2.yaml"
83 :
"assets/agent/prompt_catalogue.yaml";
84 if (
auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path);
86 std::cerr <<
"⚠️ Failed to load agent prompt catalogue: "
87 << status.message() << std::endl;
90 if (config_.verbose) {
91 std::cerr <<
"[DEBUG] Loaded prompt catalogue" << std::endl;
94 if (config_.system_instruction.empty()) {
95 if (config_.verbose) {
96 std::cerr <<
"[DEBUG] Building system instruction..." << std::endl;
100 std::string prompt_file;
101 if (config_.prompt_version ==
"v3") {
102 prompt_file =
"agent/system_prompt_v3.txt";
103 }
else if (config_.prompt_version ==
"v2") {
104 prompt_file =
"agent/system_prompt_v2.txt";
106 prompt_file =
"agent/system_prompt.txt";
109 auto prompt_path = util::PlatformPaths::FindAsset(prompt_file);
112 if (prompt_path.ok()) {
113 std::ifstream file(prompt_path->string());
115 std::stringstream buffer;
116 buffer << file.rdbuf();
117 config_.system_instruction = buffer.str();
118 if (config_.verbose) {
119 std::cerr <<
"[DEBUG] Loaded prompt: " << prompt_path->string()
128 if (config_.use_enhanced_prompting) {
129 config_.system_instruction =
130 prompt_builder_.BuildSystemInstructionWithExamples();
132 config_.system_instruction = BuildSystemInstruction();
137 if (config_.verbose) {
138 std::cerr <<
"[DEBUG] Gemini service initialized" << std::endl;
142void GeminiAIService::EnableFunctionCalling(
bool enable) {
143 function_calling_enabled_ = enable;
146std::vector<std::string> GeminiAIService::GetAvailableTools()
const {
147 return {
"resource-list",
"resource-search",
148 "dungeon-list-sprites",
"dungeon-describe-room",
149 "overworld-find-tile",
"overworld-describe-map",
150 "overworld-list-warps"};
153std::string GeminiAIService::BuildFunctionCallSchemas() {
154#ifndef YAZE_WITH_JSON
159 std::string schemas = prompt_builder_.BuildFunctionCallSchemas();
160 if (!schemas.empty() && schemas !=
"[]") {
165 auto schema_path_or =
166 util::PlatformPaths::FindAsset(
"agent/function_schemas.json");
168 if (!schema_path_or.ok()) {
169 if (config_.verbose) {
170 std::cerr <<
"⚠️ Function schemas file not found: "
171 << schema_path_or.status().message() << std::endl;
177 std::ifstream file(schema_path_or->string());
178 if (!file.is_open()) {
179 std::cerr <<
"⚠️ Failed to open function schemas file: "
180 << schema_path_or->string() << std::endl;
185 nlohmann::json schemas_json;
186 file >> schemas_json;
187 return schemas_json.dump();
188 }
catch (
const nlohmann::json::exception& e) {
189 std::cerr <<
"⚠️ Failed to parse function schemas JSON: " << e.what()
196std::string GeminiAIService::BuildSystemInstruction() {
199 return prompt_builder_.BuildSystemInstruction();
202void GeminiAIService::SetRomContext(Rom* rom) {
203 prompt_builder_.SetRom(rom);
206absl::StatusOr<std::vector<ModelInfo>> GeminiAIService::ListAvailableModels() {
207#ifndef YAZE_WITH_JSON
208 return absl::UnimplementedError(
"Gemini AI service requires JSON support");
210 if (config_.api_key.empty()) {
212 std::vector<ModelInfo> defaults = {
213 {.name =
"gemini-3.0-preview",
214 .display_name =
"Gemini 3.0 Preview",
215 .provider =
"gemini",
216 .description =
"Cutting-edge model, currently in preview"},
217 {.name =
"gemini-3.0-flash-preview",
218 .display_name =
"Gemini 3.0 Flash Preview",
219 .provider =
"gemini",
220 .description =
"Fastest preview model"},
221 {.name =
"gemini-2.5-pro",
222 .display_name =
"Gemini 2.5 Pro",
223 .provider =
"gemini",
224 .description =
"High intelligence for complex tasks"},
225 {.name =
"gemini-2.5-flash",
226 .display_name =
"Gemini 2.5 Flash",
227 .provider =
"gemini",
228 .description =
"Fastest multimodal model"}};
233 std::string endpoint =
234 "https://generativelanguage.googleapis.com/v1beta/models?key=" +
237 if (config_.verbose) {
238 std::cerr <<
"[DEBUG] Listing models: "
239 << endpoint.substr(0, endpoint.find(
"key=")) <<
"...'"
243 std::string response_str;
244#if defined(YAZE_AI_IOS_URLSESSION)
245 auto resp_or = ios::UrlSessionHttpRequest(
"GET", endpoint, {},
"", 8000);
247 if (config_.verbose) {
248 std::cerr <<
"[DEBUG] Gemini models request failed: "
249 << resp_or.status().message() << std::endl;
251 return absl::InternalError(
"Failed to list Gemini models");
253 response_str = resp_or->body;
256 std::string curl_cmd =
"curl -s -X GET '" + endpoint +
"' 2>&1";
259 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
261 FILE* pipe = popen(curl_cmd.c_str(),
"r");
264 return absl::InternalError(
"Failed to execute curl command");
268 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
269 response_str += buffer;
279 auto models_json = nlohmann::json::parse(response_str,
nullptr,
false);
280 if (models_json.is_discarded()) {
281 return absl::InternalError(
"Failed to parse Gemini models JSON");
284 if (!models_json.contains(
"models")) {
286 std::vector<ModelInfo> defaults = {{.name =
"gemini-2.5-flash",
287 .display_name =
"Gemini 2.0 Flash",
288 .provider =
"gemini"},
289 {.name =
"gemini-1.5-flash",
290 .display_name =
"Gemini 1.5 Flash",
291 .provider =
"gemini"},
292 {.name =
"gemini-1.5-pro",
293 .display_name =
"Gemini 1.5 Pro",
294 .provider =
"gemini"}};
298 std::vector<ModelInfo> models;
299 for (
const auto& m : models_json[
"models"]) {
300 std::string
name = m.value(
"name",
"");
302 if (absl::StartsWith(name,
"models/")) {
307 if (absl::StartsWith(name,
"gemini")) {
310 info.display_name = m.value(
"displayName", name);
311 info.provider =
"gemini";
312 info.description = m.value(
"description",
"");
313 info.family =
"gemini";
314 info.is_local =
false;
315 models.push_back(std::move(info));
320 }
catch (
const std::exception& e) {
321 return absl::InternalError(
322 absl::StrCat(
"Failed to list models: ", e.what()));
327absl::Status GeminiAIService::CheckAvailability() {
328#ifndef YAZE_WITH_JSON
329 return absl::UnimplementedError(
330 "Gemini AI service requires JSON support. Build with "
331 "-DYAZE_WITH_JSON=ON");
334 if (config_.verbose) {
335 std::cerr <<
"[DEBUG] CheckAvailability: start" << std::endl;
338 if (config_.api_key.empty()) {
339 return absl::FailedPreconditionError(
340 "❌ Gemini API key not configured\n"
341 " Set GEMINI_API_KEY environment variable\n"
342 " Get your API key at: https://makersuite.google.com/app/apikey");
345 if (config_.verbose) {
346 std::cerr <<
"[DEBUG] CheckAvailability: creating HTTPS client"
350 httplib::Client cli(
"https://generativelanguage.googleapis.com");
351 if (config_.verbose) {
352 std::cerr <<
"[DEBUG] CheckAvailability: client created" << std::endl;
355 cli.set_connection_timeout(5, 0);
357 if (config_.verbose) {
358 std::cerr <<
"[DEBUG] CheckAvailability: building endpoint" << std::endl;
360 std::string test_endpoint =
"/v1beta/models/" + config_.model;
361 httplib::Headers headers = {
362 {
"x-goog-api-key", config_.api_key},
365 if (config_.verbose) {
366 std::cerr <<
"[DEBUG] CheckAvailability: making request to "
367 << test_endpoint << std::endl;
369 auto res = cli.Get(test_endpoint.c_str(), headers);
371 if (config_.verbose) {
372 std::cerr <<
"[DEBUG] CheckAvailability: got response" << std::endl;
376 return absl::UnavailableError(
377 "❌ Cannot reach Gemini API\n"
378 " Check your internet connection");
381 if (res->status == 401 || res->status == 403) {
382 return absl::PermissionDeniedError(
383 "❌ Invalid Gemini API key\n"
384 " Verify your key at: https://makersuite.google.com/app/apikey");
387 if (res->status == 404) {
388 return absl::NotFoundError(
389 absl::StrCat(
"❌ Model '", config_.model,
"' not found\n",
390 " Try: gemini-2.5-flash or gemini-1.5-pro"));
393 if (res->status != 200) {
394 return absl::InternalError(absl::StrCat(
395 "❌ Gemini API error: ", res->status,
"\n ", res->body));
398 return absl::OkStatus();
399 }
catch (
const std::exception& e) {
400 if (config_.verbose) {
401 std::cerr <<
"[DEBUG] CheckAvailability: EXCEPTION: " << e.what()
404 return absl::InternalError(
405 absl::StrCat(
"Exception during availability check: ", e.what()));
407 if (config_.verbose) {
408 std::cerr <<
"[DEBUG] CheckAvailability: UNKNOWN EXCEPTION" << std::endl;
410 return absl::InternalError(
"Unknown exception during availability check");
415absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
416 const std::string& prompt) {
417 return GenerateResponse(
418 {{{agent::ChatMessage::Sender::kUser, prompt, absl::Now()}}});
421absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
422 const std::vector<agent::ChatMessage>& history) {
423#ifndef YAZE_WITH_JSON
424 return absl::UnimplementedError(
425 "Gemini AI service requires JSON support. Build with "
426 "-DYAZE_WITH_JSON=ON");
428 if (history.empty()) {
429 return absl::InvalidArgumentError(
"History cannot be empty.");
434 std::string prompt = prompt_builder_.BuildPromptFromHistory(history);
442 if (config_.api_key.empty()) {
443 return absl::FailedPreconditionError(
"Gemini API key not configured");
446 absl::Time request_start = absl::Now();
449 if (config_.verbose) {
450 std::cerr <<
"[DEBUG] Using curl for HTTPS request" << std::endl;
451 std::cerr <<
"[DEBUG] Processing " << history.size()
452 <<
" messages in history" << std::endl;
457 nlohmann::json contents = nlohmann::json::array();
460 int start_idx = std::max(0,
static_cast<int>(history.size()) - 10);
461 for (
size_t i = start_idx; i < history.size(); ++i) {
462 const auto& msg = history[i];
464 (msg.sender == agent::ChatMessage::Sender::kUser) ?
"user" :
"model";
466 nlohmann::json message = {{
"role", role},
467 {
"parts", {{{
"text", msg.message}}}}};
468 contents.push_back(message);
473 if (!history.empty() &&
474 history.back().sender == agent::ChatMessage::Sender::kAgent) {
476 nlohmann::json user_continuation = {
478 {
"parts", {{{
"text",
"Please continue or clarify your response."}}}}};
479 contents.push_back(user_continuation);
483 nlohmann::json request_body = {
484 {
"system_instruction",
485 {{
"parts", {{
"text", config_.system_instruction}}}}},
486 {
"contents", contents},
488 {{
"temperature", config_.temperature},
489 {
"maxOutputTokens", config_.max_output_tokens}}}};
491 if (config_.verbose) {
492 std::cerr <<
"[DEBUG] Sending " << contents.size()
493 <<
" conversation turns to Gemini" << std::endl;
498 if (!function_calling_enabled_) {
499 request_body[
"generationConfig"][
"responseMimeType"] =
"application/json";
503 if (function_calling_enabled_) {
505 std::string schemas_str = BuildFunctionCallSchemas();
506 if (config_.verbose) {
507 std::cerr <<
"[DEBUG] Function calling schemas: "
508 << schemas_str.substr(0, 200) <<
"..." << std::endl;
511 nlohmann::json schemas = nlohmann::json::parse(schemas_str);
515 if (schemas.is_array()) {
517 request_body[
"tools"] = {{{
"function_declarations", schemas}}};
518 }
else if (schemas.is_object() &&
519 schemas.contains(
"function_declarations")) {
521 request_body[
"tools"] = {
522 {{
"function_declarations", schemas[
"function_declarations"]}}};
525 request_body[
"tools"] = {
526 {{
"function_declarations", nlohmann::json::array({schemas})}}};
528 }
catch (
const nlohmann::json::exception& e) {
529 std::cerr <<
"⚠️ Failed to parse function schemas: " << e.what()
534 std::string endpoint =
535 "https://generativelanguage.googleapis.com/v1beta/models/" +
536 config_.model +
":generateContent";
537 std::string response_str;
538#if defined(YAZE_AI_IOS_URLSESSION)
539 std::map<std::string, std::string> headers;
540 headers.emplace(
"Content-Type",
"application/json");
541 headers.emplace(
"x-goog-api-key", config_.api_key);
542 auto resp_or = ios::UrlSessionHttpRequest(
543 "POST", endpoint, headers, request_body.dump(), 60000);
545 return resp_or.status();
547 if (resp_or->status_code != 200) {
548 return absl::InternalError(
549 absl::StrCat(
"Gemini API error: ", resp_or->status_code,
"\n",
552 response_str = resp_or->body;
555 std::string temp_file =
"/tmp/gemini_request.json";
556 std::ofstream out(temp_file);
557 out << request_body.dump();
561 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
563 "-H 'Content-Type: application/json' "
564 "-H 'x-goog-api-key: " +
570 if (config_.verbose) {
571 std::cerr <<
"[DEBUG] Executing API request..." << std::endl;
575 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
577 FILE* pipe = popen(curl_cmd.c_str(),
"r");
580 return absl::InternalError(
"Failed to execute curl command");
584 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
585 response_str += buffer;
589 int status = _pclose(pipe);
591 int status = pclose(pipe);
593 std::remove(temp_file.c_str());
596 return absl::InternalError(
597 absl::StrCat(
"Curl failed with status ", status));
601 if (response_str.empty()) {
602 return absl::InternalError(
"Empty response from Gemini API");
606 if (config_.verbose) {
609 <<
"🔍 Raw Gemini API Response:"
612 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m"
616 if (config_.verbose) {
617 std::cerr <<
"[DEBUG] Parsing response..." << std::endl;
619 auto parsed_or = ParseGeminiResponse(response_str);
620 if (!parsed_or.ok()) {
621 return parsed_or.status();
623 AgentResponse agent_response = std::move(parsed_or.value());
624 agent_response.provider =
"gemini";
625 agent_response.model = config_.model;
626 agent_response.latency_seconds =
627 absl::ToDoubleSeconds(absl::Now() - request_start);
628 agent_response.parameters[
"prompt_version"] = config_.prompt_version;
629 agent_response.parameters[
"temperature"] =
630 absl::StrFormat(
"%.2f", config_.temperature);
631 agent_response.parameters[
"max_output_tokens"] =
632 absl::StrFormat(
"%d", config_.max_output_tokens);
633 agent_response.parameters[
"function_calling"] =
634 function_calling_enabled_ ?
"true" :
"false";
635 return agent_response;
637 }
catch (
const std::exception& e) {
638 if (config_.verbose) {
639 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
641 return absl::InternalError(
642 absl::StrCat(
"Exception during generation: ", e.what()));
644 if (config_.verbose) {
645 std::cerr <<
"[ERROR] Unknown exception" << std::endl;
647 return absl::InternalError(
"Unknown exception during generation");
652absl::StatusOr<AgentResponse> GeminiAIService::ParseGeminiResponse(
653 const std::string& response_body) {
654#ifndef YAZE_WITH_JSON
655 return absl::UnimplementedError(
"JSON support required");
657 AgentResponse agent_response;
659 auto response_json = nlohmann::json::parse(response_body,
nullptr,
false);
660 if (response_json.is_discarded()) {
661 return absl::InternalError(
"❌ Failed to parse Gemini response JSON");
665 if (!response_json.contains(
"candidates") ||
666 response_json[
"candidates"].empty()) {
667 return absl::InternalError(
"❌ No candidates in Gemini response");
670 for (
const auto& candidate : response_json[
"candidates"]) {
671 if (!candidate.contains(
"content") ||
672 !candidate[
"content"].contains(
"parts")) {
676 for (
const auto& part : candidate[
"content"][
"parts"]) {
677 if (part.contains(
"text")) {
678 std::string text_content = part[
"text"].get<std::string>();
681 if (config_.verbose) {
684 <<
"🔍 Raw LLM Response:"
687 <<
"\033[2m" << text_content <<
"\033[0m"
692 text_content = std::string(absl::StripAsciiWhitespace(text_content));
693 if (absl::StartsWith(text_content,
"```json")) {
694 text_content = text_content.substr(7);
695 }
else if (absl::StartsWith(text_content,
"```")) {
696 text_content = text_content.substr(3);
698 if (absl::EndsWith(text_content,
"```")) {
699 text_content = text_content.substr(0, text_content.length() - 3);
701 text_content = std::string(absl::StripAsciiWhitespace(text_content));
704 auto parsed_text = nlohmann::json::parse(text_content,
nullptr,
false);
705 if (!parsed_text.is_discarded()) {
707 if (parsed_text.contains(
"text_response") &&
708 parsed_text[
"text_response"].is_string()) {
709 agent_response.text_response =
710 parsed_text[
"text_response"].get<std::string>();
714 if (parsed_text.contains(
"reasoning") &&
715 parsed_text[
"reasoning"].is_string()) {
716 agent_response.reasoning =
717 parsed_text[
"reasoning"].get<std::string>();
721 if (parsed_text.contains(
"commands") &&
722 parsed_text[
"commands"].is_array()) {
723 for (
const auto& cmd : parsed_text[
"commands"]) {
724 if (cmd.is_string()) {
725 std::string command = cmd.get<std::string>();
726 if (absl::StartsWith(command,
"z3ed ")) {
727 command = command.substr(5);
729 agent_response.commands.push_back(command);
735 if (parsed_text.contains(
"tool_calls") &&
736 parsed_text[
"tool_calls"].is_array()) {
737 for (
const auto& call : parsed_text[
"tool_calls"]) {
738 if (call.contains(
"tool_name") && call[
"tool_name"].is_string()) {
740 tool_call.tool_name = call[
"tool_name"].get<std::string>();
742 if (call.contains(
"args") && call[
"args"].is_object()) {
743 for (
auto& [key, value] : call[
"args"].items()) {
744 if (value.is_string()) {
745 tool_call.args[
key] = value.get<std::string>();
746 }
else if (value.is_number()) {
747 tool_call.args[
key] = std::to_string(value.get<
double>());
748 }
else if (value.is_boolean()) {
749 tool_call.args[
key] =
750 value.get<
bool>() ?
"true" :
"false";
754 agent_response.tool_calls.push_back(tool_call);
761 std::vector<std::string> lines = absl::StrSplit(text_content,
'\n');
762 for (
const auto& line : lines) {
763 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
764 if (!trimmed.empty() && (absl::StartsWith(trimmed,
"z3ed ") ||
765 absl::StartsWith(trimmed,
"palette ") ||
766 absl::StartsWith(trimmed,
"overworld ") ||
767 absl::StartsWith(trimmed,
"sprite ") ||
768 absl::StartsWith(trimmed,
"dungeon "))) {
769 if (absl::StartsWith(trimmed,
"z3ed ")) {
770 trimmed = trimmed.substr(5);
772 agent_response.commands.push_back(trimmed);
776 }
else if (part.contains(
"functionCall")) {
777 const auto& call = part[
"functionCall"];
778 if (call.contains(
"name") && call[
"name"].is_string()) {
780 tool_call.tool_name = call[
"name"].get<std::string>();
781 if (call.contains(
"args") && call[
"args"].is_object()) {
782 for (
auto& [key, value] : call[
"args"].items()) {
783 if (value.is_string()) {
784 tool_call.args[
key] = value.get<std::string>();
785 }
else if (value.is_number()) {
786 tool_call.args[
key] = std::to_string(value.get<
double>());
790 agent_response.tool_calls.push_back(tool_call);
796 if (agent_response.text_response.empty() && agent_response.commands.empty() &&
797 agent_response.tool_calls.empty()) {
798 return absl::InternalError(
799 "❌ No valid response extracted from Gemini\n"
800 " Expected at least one of: text_response, commands, or tool_calls\n"
805 return agent_response;
809absl::StatusOr<std::string> GeminiAIService::EncodeImageToBase64(
810 const std::string& image_path)
const {
811#ifndef YAZE_WITH_JSON
813 return absl::UnimplementedError(
814 "Gemini AI service requires JSON support. Build with "
815 "-DYAZE_WITH_JSON=ON");
817 std::ifstream file(image_path, std::ios::binary);
818 if (!file.is_open()) {
819 return absl::NotFoundError(
820 absl::StrCat(
"Failed to open image file: ", image_path));
824 file.seekg(0, std::ios::end);
825 size_t size = file.tellg();
826 file.seekg(0, std::ios::beg);
828 std::vector<unsigned char> buffer(size);
829 if (!file.read(
reinterpret_cast<char*
>(buffer.data()), size)) {
830 return absl::InternalError(
"Failed to read image file");
834 static const char* base64_chars =
835 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
838 encoded.reserve(((size + 2) / 3) * 4);
842 unsigned char char_array_3[3];
843 unsigned char char_array_4[4];
845 for (
size_t idx = 0; idx < size; idx++) {
846 char_array_3[i++] = buffer[idx];
848 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
850 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
852 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
853 char_array_4[3] = char_array_3[2] & 0x3f;
855 for (i = 0; i < 4; i++)
856 encoded += base64_chars[char_array_4[i]];
862 for (j = i; j < 3; j++)
863 char_array_3[j] =
'\0';
865 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
867 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
869 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
871 for (j = 0; j < i + 1; j++)
872 encoded += base64_chars[char_array_4[j]];
882absl::StatusOr<AgentResponse> GeminiAIService::GenerateMultimodalResponse(
883 const std::string& image_path,
const std::string& prompt) {
884#ifndef YAZE_WITH_JSON
887 return absl::UnimplementedError(
888 "Gemini AI service requires JSON support. Build with "
889 "-DYAZE_WITH_JSON=ON");
891 if (config_.api_key.empty()) {
892 return absl::FailedPreconditionError(
"Gemini API key not configured");
896 std::string mime_type =
"image/png";
897 if (image_path.ends_with(
".jpg") || image_path.ends_with(
".jpeg")) {
898 mime_type =
"image/jpeg";
899 }
else if (image_path.ends_with(
".bmp")) {
900 mime_type =
"image/bmp";
901 }
else if (image_path.ends_with(
".webp")) {
902 mime_type =
"image/webp";
906 auto encoded_or = EncodeImageToBase64(image_path);
907 if (!encoded_or.ok()) {
908 return encoded_or.status();
910 std::string encoded_image = std::move(encoded_or.value());
913 if (config_.verbose) {
914 std::cerr <<
"[DEBUG] Preparing multimodal request with image"
919 nlohmann::json request_body = {
923 {{
"mime_type", mime_type}, {
"data", encoded_image}}}},
924 {{
"text", prompt}}}}}}},
926 {{
"temperature", config_.temperature},
927 {
"maxOutputTokens", config_.max_output_tokens}}}};
929 std::string endpoint =
930 "https://generativelanguage.googleapis.com/v1beta/models/" +
931 config_.model +
":generateContent";
932 std::string response_str;
933#if defined(YAZE_AI_IOS_URLSESSION)
934 std::map<std::string, std::string> headers;
935 headers.emplace(
"Content-Type",
"application/json");
936 headers.emplace(
"x-goog-api-key", config_.api_key);
937 auto resp_or = ios::UrlSessionHttpRequest(
938 "POST", endpoint, headers, request_body.dump(), 60000);
940 return resp_or.status();
942 if (resp_or->status_code != 200) {
943 return absl::InternalError(
944 absl::StrCat(
"Gemini API error: ", resp_or->status_code,
"\n",
947 response_str = resp_or->body;
950 std::string temp_file =
"/tmp/gemini_multimodal_request.json";
951 std::ofstream out(temp_file);
952 out << request_body.dump();
956 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
958 "-H 'Content-Type: application/json' "
959 "-H 'x-goog-api-key: " +
965 if (config_.verbose) {
966 std::cerr <<
"[DEBUG] Executing multimodal API request..." << std::endl;
970 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
972 FILE* pipe = popen(curl_cmd.c_str(),
"r");
975 return absl::InternalError(
"Failed to execute curl command");
979 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
980 response_str += buffer;
984 int status = _pclose(pipe);
986 int status = pclose(pipe);
988 std::remove(temp_file.c_str());
991 return absl::InternalError(
992 absl::StrCat(
"Curl failed with status ", status));
996 if (response_str.empty()) {
997 return absl::InternalError(
"Empty response from Gemini API");
1000 if (config_.verbose) {
1003 <<
"🔍 Raw Gemini Multimodal Response:"
1006 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m"
1010 return ParseGeminiResponse(response_str);
1012 }
catch (
const std::exception& e) {
1013 if (config_.verbose) {
1014 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
1016 return absl::InternalError(
1017 absl::StrCat(
"Exception during multimodal generation: ", e.what()));
GeminiAIService(const GeminiConfig &)