10#include "absl/strings/str_cat.h"
11#include "absl/strings/str_format.h"
12#include "absl/strings/str_split.h"
13#include "absl/strings/strip.h"
14#include "absl/time/clock.h"
15#include "absl/time/time.h"
24#include "nlohmann/json.hpp"
27#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
28#include <openssl/crypto.h>
29#include <openssl/err.h>
30#include <openssl/ssl.h>
33static std::atomic<bool> g_openssl_initialized{
false};
34static std::mutex g_openssl_init_mutex;
36static void InitializeOpenSSL() {
37 std::lock_guard<std::mutex> lock(g_openssl_init_mutex);
38 if (!g_openssl_initialized.exchange(
true)) {
40 OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS,
42 std::cerr <<
"✓ OpenSSL initialized for HTTPS support" << std::endl;
52 : function_calling_enabled_(config.use_function_calling), config_(config) {
53 if (config_.verbose) {
54 std::cerr <<
"[DEBUG] Initializing Gemini service..." << std::endl;
55 std::cerr <<
"[DEBUG] Function calling: "
56 << (function_calling_enabled_ ?
"enabled" :
"disabled")
58 std::cerr <<
"[DEBUG] Prompt version: " << config_.prompt_version
62#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
65 if (config_.verbose) {
66 std::cerr <<
"[DEBUG] OpenSSL initialized for HTTPS" << std::endl;
71 std::string catalogue_path = config_.prompt_version ==
"v2"
72 ?
"assets/agent/prompt_catalogue_v2.yaml"
73 :
"assets/agent/prompt_catalogue.yaml";
74 if (
auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path);
76 std::cerr <<
"⚠️ Failed to load agent prompt catalogue: "
77 << status.message() << std::endl;
80 if (config_.verbose) {
81 std::cerr <<
"[DEBUG] Loaded prompt catalogue" << std::endl;
84 if (config_.system_instruction.empty()) {
85 if (config_.verbose) {
86 std::cerr <<
"[DEBUG] Building system instruction..." << std::endl;
90 std::string prompt_file;
91 if (config_.prompt_version ==
"v3") {
92 prompt_file =
"agent/system_prompt_v3.txt";
93 }
else if (config_.prompt_version ==
"v2") {
94 prompt_file =
"agent/system_prompt_v2.txt";
96 prompt_file =
"agent/system_prompt.txt";
99 auto prompt_path = util::PlatformPaths::FindAsset(prompt_file);
102 if (prompt_path.ok()) {
103 std::ifstream file(prompt_path->string());
105 std::stringstream buffer;
106 buffer << file.rdbuf();
107 config_.system_instruction = buffer.str();
108 if (config_.verbose) {
109 std::cerr <<
"[DEBUG] Loaded prompt: " << prompt_path->string()
118 if (config_.use_enhanced_prompting) {
119 config_.system_instruction =
120 prompt_builder_.BuildSystemInstructionWithExamples();
122 config_.system_instruction = BuildSystemInstruction();
127 if (config_.verbose) {
128 std::cerr <<
"[DEBUG] Gemini service initialized" << std::endl;
132void GeminiAIService::EnableFunctionCalling(
bool enable) {
133 function_calling_enabled_ = enable;
136std::vector<std::string> GeminiAIService::GetAvailableTools()
const {
137 return {
"resource-list",
"resource-search",
138 "dungeon-list-sprites",
"dungeon-describe-room",
139 "overworld-find-tile",
"overworld-describe-map",
140 "overworld-list-warps"};
143std::string GeminiAIService::BuildFunctionCallSchemas() {
144#ifndef YAZE_WITH_JSON
149 std::string schemas = prompt_builder_.BuildFunctionCallSchemas();
150 if (!schemas.empty() && schemas !=
"[]") {
155 auto schema_path_or =
156 util::PlatformPaths::FindAsset(
"agent/function_schemas.json");
158 if (!schema_path_or.ok()) {
159 if (config_.verbose) {
160 std::cerr <<
"⚠️ Function schemas file not found: "
161 << schema_path_or.status().message() << std::endl;
167 std::ifstream file(schema_path_or->string());
168 if (!file.is_open()) {
169 std::cerr <<
"⚠️ Failed to open function schemas file: "
170 << schema_path_or->string() << std::endl;
175 nlohmann::json schemas_json;
176 file >> schemas_json;
177 return schemas_json.dump();
178 }
catch (
const nlohmann::json::exception& e) {
179 std::cerr <<
"⚠️ Failed to parse function schemas JSON: " << e.what()
186std::string GeminiAIService::BuildSystemInstruction() {
189 return prompt_builder_.BuildSystemInstruction();
192void GeminiAIService::SetRomContext(Rom* rom) {
193 prompt_builder_.SetRom(rom);
196absl::StatusOr<std::vector<ModelInfo>> GeminiAIService::ListAvailableModels() {
197#ifndef YAZE_WITH_JSON
198 return absl::UnimplementedError(
"Gemini AI service requires JSON support");
200 if (config_.api_key.empty()) {
202 std::vector<ModelInfo> defaults = {
203 {.name =
"gemini-3.0-preview",
204 .display_name =
"Gemini 3.0 Preview",
205 .provider =
"gemini",
206 .description =
"Cutting-edge model, currently in preview"},
207 {.name =
"gemini-3.0-flash-preview",
208 .display_name =
"Gemini 3.0 Flash Preview",
209 .provider =
"gemini",
210 .description =
"Fastest preview model"},
211 {.name =
"gemini-2.5-pro",
212 .display_name =
"Gemini 2.5 Pro",
213 .provider =
"gemini",
214 .description =
"High intelligence for complex tasks"},
215 {.name =
"gemini-2.5-flash",
216 .display_name =
"Gemini 2.5 Flash",
217 .provider =
"gemini",
218 .description =
"Fastest multimodal model"}};
224 std::string endpoint =
225 "https://generativelanguage.googleapis.com/v1beta/models?key=" +
227 std::string curl_cmd =
"curl -s -X GET '" + endpoint +
"' 2>&1";
229 if (config_.verbose) {
230 std::cerr <<
"[DEBUG] Listing models: "
231 << curl_cmd.substr(0, curl_cmd.find(
"key=")) <<
"...'"
236 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
238 FILE* pipe = popen(curl_cmd.c_str(),
"r");
241 return absl::InternalError(
"Failed to execute curl command");
244 std::string response_str;
246 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
247 response_str += buffer;
256 auto models_json = nlohmann::json::parse(response_str,
nullptr,
false);
257 if (models_json.is_discarded()) {
258 return absl::InternalError(
"Failed to parse Gemini models JSON");
261 if (!models_json.contains(
"models")) {
263 std::vector<ModelInfo> defaults = {{.name =
"gemini-2.5-flash",
264 .display_name =
"Gemini 2.0 Flash",
265 .provider =
"gemini"},
266 {.name =
"gemini-1.5-flash",
267 .display_name =
"Gemini 1.5 Flash",
268 .provider =
"gemini"},
269 {.name =
"gemini-1.5-pro",
270 .display_name =
"Gemini 1.5 Pro",
271 .provider =
"gemini"}};
275 std::vector<ModelInfo> models;
276 for (
const auto& m : models_json[
"models"]) {
277 std::string
name = m.value(
"name",
"");
279 if (absl::StartsWith(name,
"models/")) {
284 if (absl::StartsWith(name,
"gemini")) {
287 info.display_name = m.value(
"displayName", name);
288 info.provider =
"gemini";
289 info.description = m.value(
"description",
"");
290 info.family =
"gemini";
291 info.is_local =
false;
292 models.push_back(std::move(info));
297 }
catch (
const std::exception& e) {
298 return absl::InternalError(
299 absl::StrCat(
"Failed to list models: ", e.what()));
304absl::Status GeminiAIService::CheckAvailability() {
305#ifndef YAZE_WITH_JSON
306 return absl::UnimplementedError(
307 "Gemini AI service requires JSON support. Build with "
308 "-DYAZE_WITH_JSON=ON");
311 if (config_.verbose) {
312 std::cerr <<
"[DEBUG] CheckAvailability: start" << std::endl;
315 if (config_.api_key.empty()) {
316 return absl::FailedPreconditionError(
317 "❌ Gemini API key not configured\n"
318 " Set GEMINI_API_KEY environment variable\n"
319 " Get your API key at: https://makersuite.google.com/app/apikey");
322 if (config_.verbose) {
323 std::cerr <<
"[DEBUG] CheckAvailability: creating HTTPS client"
327 httplib::Client cli(
"https://generativelanguage.googleapis.com");
328 if (config_.verbose) {
329 std::cerr <<
"[DEBUG] CheckAvailability: client created" << std::endl;
332 cli.set_connection_timeout(5, 0);
334 if (config_.verbose) {
335 std::cerr <<
"[DEBUG] CheckAvailability: building endpoint" << std::endl;
337 std::string test_endpoint =
"/v1beta/models/" + config_.model;
338 httplib::Headers headers = {
339 {
"x-goog-api-key", config_.api_key},
342 if (config_.verbose) {
343 std::cerr <<
"[DEBUG] CheckAvailability: making request to "
344 << test_endpoint << std::endl;
346 auto res = cli.Get(test_endpoint.c_str(), headers);
348 if (config_.verbose) {
349 std::cerr <<
"[DEBUG] CheckAvailability: got response" << std::endl;
353 return absl::UnavailableError(
354 "❌ Cannot reach Gemini API\n"
355 " Check your internet connection");
358 if (res->status == 401 || res->status == 403) {
359 return absl::PermissionDeniedError(
360 "❌ Invalid Gemini API key\n"
361 " Verify your key at: https://makersuite.google.com/app/apikey");
364 if (res->status == 404) {
365 return absl::NotFoundError(
366 absl::StrCat(
"❌ Model '", config_.model,
"' not found\n",
367 " Try: gemini-2.5-flash or gemini-1.5-pro"));
370 if (res->status != 200) {
371 return absl::InternalError(absl::StrCat(
372 "❌ Gemini API error: ", res->status,
"\n ", res->body));
375 return absl::OkStatus();
376 }
catch (
const std::exception& e) {
377 if (config_.verbose) {
378 std::cerr <<
"[DEBUG] CheckAvailability: EXCEPTION: " << e.what()
381 return absl::InternalError(
382 absl::StrCat(
"Exception during availability check: ", e.what()));
384 if (config_.verbose) {
385 std::cerr <<
"[DEBUG] CheckAvailability: UNKNOWN EXCEPTION" << std::endl;
387 return absl::InternalError(
"Unknown exception during availability check");
392absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
393 const std::string& prompt) {
394 return GenerateResponse(
395 {{{agent::ChatMessage::Sender::kUser, prompt, absl::Now()}}});
398absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
399 const std::vector<agent::ChatMessage>& history) {
400#ifndef YAZE_WITH_JSON
401 return absl::UnimplementedError(
402 "Gemini AI service requires JSON support. Build with "
403 "-DYAZE_WITH_JSON=ON");
405 if (history.empty()) {
406 return absl::InvalidArgumentError(
"History cannot be empty.");
411 std::string prompt = prompt_builder_.BuildPromptFromHistory(history);
419 if (config_.api_key.empty()) {
420 return absl::FailedPreconditionError(
"Gemini API key not configured");
423 absl::Time request_start = absl::Now();
426 if (config_.verbose) {
427 std::cerr <<
"[DEBUG] Using curl for HTTPS request" << std::endl;
428 std::cerr <<
"[DEBUG] Processing " << history.size()
429 <<
" messages in history" << std::endl;
434 nlohmann::json contents = nlohmann::json::array();
437 int start_idx = std::max(0,
static_cast<int>(history.size()) - 10);
438 for (
size_t i = start_idx; i < history.size(); ++i) {
439 const auto& msg = history[i];
441 (msg.sender == agent::ChatMessage::Sender::kUser) ?
"user" :
"model";
443 nlohmann::json message = {{
"role", role},
444 {
"parts", {{{
"text", msg.message}}}}};
445 contents.push_back(message);
450 if (!history.empty() &&
451 history.back().sender == agent::ChatMessage::Sender::kAgent) {
453 nlohmann::json user_continuation = {
455 {
"parts", {{{
"text",
"Please continue or clarify your response."}}}}};
456 contents.push_back(user_continuation);
460 nlohmann::json request_body = {
461 {
"system_instruction",
462 {{
"parts", {{
"text", config_.system_instruction}}}}},
463 {
"contents", contents},
465 {{
"temperature", config_.temperature},
466 {
"maxOutputTokens", config_.max_output_tokens}}}};
468 if (config_.verbose) {
469 std::cerr <<
"[DEBUG] Sending " << contents.size()
470 <<
" conversation turns to Gemini" << std::endl;
475 if (!function_calling_enabled_) {
476 request_body[
"generationConfig"][
"responseMimeType"] =
"application/json";
480 if (function_calling_enabled_) {
482 std::string schemas_str = BuildFunctionCallSchemas();
483 if (config_.verbose) {
484 std::cerr <<
"[DEBUG] Function calling schemas: "
485 << schemas_str.substr(0, 200) <<
"..." << std::endl;
488 nlohmann::json schemas = nlohmann::json::parse(schemas_str);
492 if (schemas.is_array()) {
494 request_body[
"tools"] = {{{
"function_declarations", schemas}}};
495 }
else if (schemas.is_object() &&
496 schemas.contains(
"function_declarations")) {
498 request_body[
"tools"] = {
499 {{
"function_declarations", schemas[
"function_declarations"]}}};
502 request_body[
"tools"] = {
503 {{
"function_declarations", nlohmann::json::array({schemas})}}};
505 }
catch (
const nlohmann::json::exception& e) {
506 std::cerr <<
"⚠️ Failed to parse function schemas: " << e.what()
512 std::string temp_file =
"/tmp/gemini_request.json";
513 std::ofstream out(temp_file);
514 out << request_body.dump();
518 std::string endpoint =
519 "https://generativelanguage.googleapis.com/v1beta/models/" +
520 config_.model +
":generateContent";
521 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
523 "-H 'Content-Type: application/json' "
524 "-H 'x-goog-api-key: " +
530 if (config_.verbose) {
531 std::cerr <<
"[DEBUG] Executing API request..." << std::endl;
535 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
537 FILE* pipe = popen(curl_cmd.c_str(),
"r");
540 return absl::InternalError(
"Failed to execute curl command");
543 std::string response_str;
545 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
546 response_str += buffer;
550 int status = _pclose(pipe);
552 int status = pclose(pipe);
554 std::remove(temp_file.c_str());
557 return absl::InternalError(
558 absl::StrCat(
"Curl failed with status ", status));
561 if (response_str.empty()) {
562 return absl::InternalError(
"Empty response from Gemini API");
566 if (config_.verbose) {
569 <<
"🔍 Raw Gemini API Response:"
572 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m"
576 if (config_.verbose) {
577 std::cerr <<
"[DEBUG] Parsing response..." << std::endl;
579 auto parsed_or = ParseGeminiResponse(response_str);
580 if (!parsed_or.ok()) {
581 return parsed_or.status();
583 AgentResponse agent_response = std::move(parsed_or.value());
584 agent_response.provider =
"gemini";
585 agent_response.model = config_.model;
586 agent_response.latency_seconds =
587 absl::ToDoubleSeconds(absl::Now() - request_start);
588 agent_response.parameters[
"prompt_version"] = config_.prompt_version;
589 agent_response.parameters[
"temperature"] =
590 absl::StrFormat(
"%.2f", config_.temperature);
591 agent_response.parameters[
"max_output_tokens"] =
592 absl::StrFormat(
"%d", config_.max_output_tokens);
593 agent_response.parameters[
"function_calling"] =
594 function_calling_enabled_ ?
"true" :
"false";
595 return agent_response;
597 }
catch (
const std::exception& e) {
598 if (config_.verbose) {
599 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
601 return absl::InternalError(
602 absl::StrCat(
"Exception during generation: ", e.what()));
604 if (config_.verbose) {
605 std::cerr <<
"[ERROR] Unknown exception" << std::endl;
607 return absl::InternalError(
"Unknown exception during generation");
612absl::StatusOr<AgentResponse> GeminiAIService::ParseGeminiResponse(
613 const std::string& response_body) {
614#ifndef YAZE_WITH_JSON
615 return absl::UnimplementedError(
"JSON support required");
617 AgentResponse agent_response;
619 auto response_json = nlohmann::json::parse(response_body,
nullptr,
false);
620 if (response_json.is_discarded()) {
621 return absl::InternalError(
"❌ Failed to parse Gemini response JSON");
625 if (!response_json.contains(
"candidates") ||
626 response_json[
"candidates"].empty()) {
627 return absl::InternalError(
"❌ No candidates in Gemini response");
630 for (
const auto& candidate : response_json[
"candidates"]) {
631 if (!candidate.contains(
"content") ||
632 !candidate[
"content"].contains(
"parts")) {
636 for (
const auto& part : candidate[
"content"][
"parts"]) {
637 if (part.contains(
"text")) {
638 std::string text_content = part[
"text"].get<std::string>();
641 if (config_.verbose) {
644 <<
"🔍 Raw LLM Response:"
647 <<
"\033[2m" << text_content <<
"\033[0m"
652 text_content = std::string(absl::StripAsciiWhitespace(text_content));
653 if (absl::StartsWith(text_content,
"```json")) {
654 text_content = text_content.substr(7);
655 }
else if (absl::StartsWith(text_content,
"```")) {
656 text_content = text_content.substr(3);
658 if (absl::EndsWith(text_content,
"```")) {
659 text_content = text_content.substr(0, text_content.length() - 3);
661 text_content = std::string(absl::StripAsciiWhitespace(text_content));
664 auto parsed_text = nlohmann::json::parse(text_content,
nullptr,
false);
665 if (!parsed_text.is_discarded()) {
667 if (parsed_text.contains(
"text_response") &&
668 parsed_text[
"text_response"].is_string()) {
669 agent_response.text_response =
670 parsed_text[
"text_response"].get<std::string>();
674 if (parsed_text.contains(
"reasoning") &&
675 parsed_text[
"reasoning"].is_string()) {
676 agent_response.reasoning =
677 parsed_text[
"reasoning"].get<std::string>();
681 if (parsed_text.contains(
"commands") &&
682 parsed_text[
"commands"].is_array()) {
683 for (
const auto& cmd : parsed_text[
"commands"]) {
684 if (cmd.is_string()) {
685 std::string command = cmd.get<std::string>();
686 if (absl::StartsWith(command,
"z3ed ")) {
687 command = command.substr(5);
689 agent_response.commands.push_back(command);
695 if (parsed_text.contains(
"tool_calls") &&
696 parsed_text[
"tool_calls"].is_array()) {
697 for (
const auto& call : parsed_text[
"tool_calls"]) {
698 if (call.contains(
"tool_name") && call[
"tool_name"].is_string()) {
700 tool_call.tool_name = call[
"tool_name"].get<std::string>();
702 if (call.contains(
"args") && call[
"args"].is_object()) {
703 for (
auto& [key, value] : call[
"args"].items()) {
704 if (value.is_string()) {
705 tool_call.args[
key] = value.get<std::string>();
706 }
else if (value.is_number()) {
707 tool_call.args[
key] = std::to_string(value.get<
double>());
708 }
else if (value.is_boolean()) {
709 tool_call.args[
key] =
710 value.get<
bool>() ?
"true" :
"false";
714 agent_response.tool_calls.push_back(tool_call);
721 std::vector<std::string> lines = absl::StrSplit(text_content,
'\n');
722 for (
const auto& line : lines) {
723 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
724 if (!trimmed.empty() && (absl::StartsWith(trimmed,
"z3ed ") ||
725 absl::StartsWith(trimmed,
"palette ") ||
726 absl::StartsWith(trimmed,
"overworld ") ||
727 absl::StartsWith(trimmed,
"sprite ") ||
728 absl::StartsWith(trimmed,
"dungeon "))) {
729 if (absl::StartsWith(trimmed,
"z3ed ")) {
730 trimmed = trimmed.substr(5);
732 agent_response.commands.push_back(trimmed);
736 }
else if (part.contains(
"functionCall")) {
737 const auto& call = part[
"functionCall"];
738 if (call.contains(
"name") && call[
"name"].is_string()) {
740 tool_call.tool_name = call[
"name"].get<std::string>();
741 if (call.contains(
"args") && call[
"args"].is_object()) {
742 for (
auto& [key, value] : call[
"args"].items()) {
743 if (value.is_string()) {
744 tool_call.args[
key] = value.get<std::string>();
745 }
else if (value.is_number()) {
746 tool_call.args[
key] = std::to_string(value.get<
double>());
750 agent_response.tool_calls.push_back(tool_call);
756 if (agent_response.text_response.empty() && agent_response.commands.empty() &&
757 agent_response.tool_calls.empty()) {
758 return absl::InternalError(
759 "❌ No valid response extracted from Gemini\n"
760 " Expected at least one of: text_response, commands, or tool_calls\n"
765 return agent_response;
769absl::StatusOr<std::string> GeminiAIService::EncodeImageToBase64(
770 const std::string& image_path)
const {
771#ifndef YAZE_WITH_JSON
773 return absl::UnimplementedError(
774 "Gemini AI service requires JSON support. Build with "
775 "-DYAZE_WITH_JSON=ON");
777 std::ifstream file(image_path, std::ios::binary);
778 if (!file.is_open()) {
779 return absl::NotFoundError(
780 absl::StrCat(
"Failed to open image file: ", image_path));
784 file.seekg(0, std::ios::end);
785 size_t size = file.tellg();
786 file.seekg(0, std::ios::beg);
788 std::vector<unsigned char> buffer(size);
789 if (!file.read(
reinterpret_cast<char*
>(buffer.data()), size)) {
790 return absl::InternalError(
"Failed to read image file");
794 static const char* base64_chars =
795 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
798 encoded.reserve(((size + 2) / 3) * 4);
802 unsigned char char_array_3[3];
803 unsigned char char_array_4[4];
805 for (
size_t idx = 0; idx < size; idx++) {
806 char_array_3[i++] = buffer[idx];
808 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
810 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
812 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
813 char_array_4[3] = char_array_3[2] & 0x3f;
815 for (i = 0; i < 4; i++)
816 encoded += base64_chars[char_array_4[i]];
822 for (j = i; j < 3; j++)
823 char_array_3[j] =
'\0';
825 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
827 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
829 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
831 for (j = 0; j < i + 1; j++)
832 encoded += base64_chars[char_array_4[j]];
842absl::StatusOr<AgentResponse> GeminiAIService::GenerateMultimodalResponse(
843 const std::string& image_path,
const std::string& prompt) {
844#ifndef YAZE_WITH_JSON
847 return absl::UnimplementedError(
848 "Gemini AI service requires JSON support. Build with "
849 "-DYAZE_WITH_JSON=ON");
851 if (config_.api_key.empty()) {
852 return absl::FailedPreconditionError(
"Gemini API key not configured");
856 std::string mime_type =
"image/png";
857 if (image_path.ends_with(
".jpg") || image_path.ends_with(
".jpeg")) {
858 mime_type =
"image/jpeg";
859 }
else if (image_path.ends_with(
".bmp")) {
860 mime_type =
"image/bmp";
861 }
else if (image_path.ends_with(
".webp")) {
862 mime_type =
"image/webp";
866 auto encoded_or = EncodeImageToBase64(image_path);
867 if (!encoded_or.ok()) {
868 return encoded_or.status();
870 std::string encoded_image = std::move(encoded_or.value());
873 if (config_.verbose) {
874 std::cerr <<
"[DEBUG] Preparing multimodal request with image"
879 nlohmann::json request_body = {
883 {{
"mime_type", mime_type}, {
"data", encoded_image}}}},
884 {{
"text", prompt}}}}}}},
886 {{
"temperature", config_.temperature},
887 {
"maxOutputTokens", config_.max_output_tokens}}}};
890 std::string temp_file =
"/tmp/gemini_multimodal_request.json";
891 std::ofstream out(temp_file);
892 out << request_body.dump();
896 std::string endpoint =
897 "https://generativelanguage.googleapis.com/v1beta/models/" +
898 config_.model +
":generateContent";
899 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
901 "-H 'Content-Type: application/json' "
902 "-H 'x-goog-api-key: " +
908 if (config_.verbose) {
909 std::cerr <<
"[DEBUG] Executing multimodal API request..." << std::endl;
913 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
915 FILE* pipe = popen(curl_cmd.c_str(),
"r");
918 return absl::InternalError(
"Failed to execute curl command");
921 std::string response_str;
923 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
924 response_str += buffer;
928 int status = _pclose(pipe);
930 int status = pclose(pipe);
932 std::remove(temp_file.c_str());
935 return absl::InternalError(
936 absl::StrCat(
"Curl failed with status ", status));
939 if (response_str.empty()) {
940 return absl::InternalError(
"Empty response from Gemini API");
943 if (config_.verbose) {
946 <<
"🔍 Raw Gemini Multimodal Response:"
949 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m"
953 return ParseGeminiResponse(response_str);
955 }
catch (
const std::exception& e) {
956 if (config_.verbose) {
957 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
959 return absl::InternalError(
960 absl::StrCat(
"Exception during multimodal generation: ", e.what()));
GeminiAIService(const GeminiConfig &)