10#include "absl/strings/str_cat.h"
11#include "absl/strings/str_split.h"
12#include "absl/strings/strip.h"
13#include "absl/time/clock.h"
14#include "absl/time/time.h"
23#include "nlohmann/json.hpp"
26#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
27#include <openssl/crypto.h>
28#include <openssl/err.h>
29#include <openssl/ssl.h>
32static std::atomic<bool> g_openssl_initialized{
false};
33static std::mutex g_openssl_init_mutex;
35static void InitializeOpenSSL() {
36 std::lock_guard<std::mutex> lock(g_openssl_init_mutex);
37 if (!g_openssl_initialized.exchange(
true)) {
39 OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS,
41 std::cerr <<
"✓ OpenSSL initialized for HTTPS support" << std::endl;
51 : function_calling_enabled_(config.use_function_calling), config_(config) {
52 if (config_.verbose) {
53 std::cerr <<
"[DEBUG] Initializing Gemini service..." << std::endl;
54 std::cerr <<
"[DEBUG] Function calling: "
55 << (function_calling_enabled_ ?
"enabled" :
"disabled")
57 std::cerr <<
"[DEBUG] Prompt version: " << config_.prompt_version
61#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
64 if (config_.verbose) {
65 std::cerr <<
"[DEBUG] OpenSSL initialized for HTTPS" << std::endl;
70 std::string catalogue_path = config_.prompt_version ==
"v2"
71 ?
"assets/agent/prompt_catalogue_v2.yaml"
72 :
"assets/agent/prompt_catalogue.yaml";
73 if (
auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path);
75 std::cerr <<
"⚠️ Failed to load agent prompt catalogue: "
76 << status.message() << std::endl;
79 if (config_.verbose) {
80 std::cerr <<
"[DEBUG] Loaded prompt catalogue" << std::endl;
83 if (config_.system_instruction.empty()) {
84 if (config_.verbose) {
85 std::cerr <<
"[DEBUG] Building system instruction..." << std::endl;
89 std::string prompt_file;
90 if (config_.prompt_version ==
"v3") {
91 prompt_file =
"agent/system_prompt_v3.txt";
92 }
else if (config_.prompt_version ==
"v2") {
93 prompt_file =
"agent/system_prompt_v2.txt";
95 prompt_file =
"agent/system_prompt.txt";
98 auto prompt_path = util::PlatformPaths::FindAsset(prompt_file);
101 if (prompt_path.ok()) {
102 std::ifstream file(prompt_path->string());
104 std::stringstream buffer;
105 buffer << file.rdbuf();
106 config_.system_instruction = buffer.str();
107 if (config_.verbose) {
108 std::cerr <<
"[DEBUG] Loaded prompt: " << prompt_path->string()
117 if (config_.use_enhanced_prompting) {
118 config_.system_instruction =
119 prompt_builder_.BuildSystemInstructionWithExamples();
121 config_.system_instruction = BuildSystemInstruction();
126 if (config_.verbose) {
127 std::cerr <<
"[DEBUG] Gemini service initialized" << std::endl;
131void GeminiAIService::EnableFunctionCalling(
bool enable) {
132 function_calling_enabled_ = enable;
135std::vector<std::string> GeminiAIService::GetAvailableTools()
const {
136 return {
"resource-list",
"resource-search",
137 "dungeon-list-sprites",
"dungeon-describe-room",
138 "overworld-find-tile",
"overworld-describe-map",
139 "overworld-list-warps"};
142std::string GeminiAIService::BuildFunctionCallSchemas() {
143#ifndef YAZE_WITH_JSON
148 std::string schemas = prompt_builder_.BuildFunctionCallSchemas();
149 if (!schemas.empty() && schemas !=
"[]") {
154 auto schema_path_or =
155 util::PlatformPaths::FindAsset(
"agent/function_schemas.json");
157 if (!schema_path_or.ok()) {
158 if (config_.verbose) {
159 std::cerr <<
"⚠️ Function schemas file not found: "
160 << schema_path_or.status().message() << std::endl;
166 std::ifstream file(schema_path_or->string());
167 if (!file.is_open()) {
168 std::cerr <<
"⚠️ Failed to open function schemas file: "
169 << schema_path_or->string() << std::endl;
174 nlohmann::json schemas_json;
175 file >> schemas_json;
176 return schemas_json.dump();
177 }
catch (
const nlohmann::json::exception& e) {
178 std::cerr <<
"⚠️ Failed to parse function schemas JSON: " << e.what()
185std::string GeminiAIService::BuildSystemInstruction() {
188 return prompt_builder_.BuildSystemInstruction();
191void GeminiAIService::SetRomContext(Rom* rom) {
192 prompt_builder_.SetRom(rom);
195absl::StatusOr<std::vector<ModelInfo>> GeminiAIService::ListAvailableModels() {
196#ifndef YAZE_WITH_JSON
197 return absl::UnimplementedError(
"Gemini AI service requires JSON support");
199 if (config_.api_key.empty()) {
201 std::vector<ModelInfo> defaults = {
202 {.name =
"gemini-3.0-preview",
203 .display_name =
"Gemini 3.0 Preview",
204 .provider =
"gemini",
205 .description =
"Cutting-edge model, currently in preview"},
206 {.name =
"gemini-2.5-pro",
207 .display_name =
"Gemini 2.5 Pro",
208 .provider =
"gemini",
209 .description =
"High intelligence for complex tasks"},
210 {.name =
"gemini-2.5-flash",
211 .display_name =
"Gemini 2.5 Flash",
212 .provider =
"gemini",
213 .description =
"Fastest multimodal model"}};
219 std::string endpoint =
220 "https://generativelanguage.googleapis.com/v1beta/models?key=" +
222 std::string curl_cmd =
"curl -s -X GET '" + endpoint +
"' 2>&1";
224 if (config_.verbose) {
225 std::cerr <<
"[DEBUG] Listing models: "
226 << curl_cmd.substr(0, curl_cmd.find(
"key=")) <<
"...'"
231 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
233 FILE* pipe = popen(curl_cmd.c_str(),
"r");
236 return absl::InternalError(
"Failed to execute curl command");
239 std::string response_str;
241 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
242 response_str += buffer;
251 auto models_json = nlohmann::json::parse(response_str,
nullptr,
false);
252 if (models_json.is_discarded()) {
253 return absl::InternalError(
"Failed to parse Gemini models JSON");
256 if (!models_json.contains(
"models")) {
258 std::vector<ModelInfo> defaults = {{.name =
"gemini-2.5-flash",
259 .display_name =
"Gemini 2.0 Flash",
260 .provider =
"gemini"},
261 {.name =
"gemini-1.5-flash",
262 .display_name =
"Gemini 1.5 Flash",
263 .provider =
"gemini"},
264 {.name =
"gemini-1.5-pro",
265 .display_name =
"Gemini 1.5 Pro",
266 .provider =
"gemini"}};
270 std::vector<ModelInfo> models;
271 for (
const auto& m : models_json[
"models"]) {
272 std::string
name = m.value(
"name",
"");
274 if (absl::StartsWith(name,
"models/")) {
279 if (absl::StartsWith(name,
"gemini")) {
282 info.display_name = m.value(
"displayName", name);
283 info.provider =
"gemini";
284 info.description = m.value(
"description",
"");
285 info.family =
"gemini";
286 info.is_local =
false;
287 models.push_back(std::move(info));
292 }
catch (
const std::exception& e) {
293 return absl::InternalError(
294 absl::StrCat(
"Failed to list models: ", e.what()));
299absl::Status GeminiAIService::CheckAvailability() {
300#ifndef YAZE_WITH_JSON
301 return absl::UnimplementedError(
302 "Gemini AI service requires JSON support. Build with "
303 "-DYAZE_WITH_JSON=ON");
306 if (config_.verbose) {
307 std::cerr <<
"[DEBUG] CheckAvailability: start" << std::endl;
310 if (config_.api_key.empty()) {
311 return absl::FailedPreconditionError(
312 "❌ Gemini API key not configured\n"
313 " Set GEMINI_API_KEY environment variable\n"
314 " Get your API key at: https://makersuite.google.com/app/apikey");
317 if (config_.verbose) {
318 std::cerr <<
"[DEBUG] CheckAvailability: creating HTTPS client"
322 httplib::Client cli(
"https://generativelanguage.googleapis.com");
323 if (config_.verbose) {
324 std::cerr <<
"[DEBUG] CheckAvailability: client created" << std::endl;
327 cli.set_connection_timeout(5, 0);
329 if (config_.verbose) {
330 std::cerr <<
"[DEBUG] CheckAvailability: building endpoint" << std::endl;
332 std::string test_endpoint =
"/v1beta/models/" + config_.model;
333 httplib::Headers headers = {
334 {
"x-goog-api-key", config_.api_key},
337 if (config_.verbose) {
338 std::cerr <<
"[DEBUG] CheckAvailability: making request to "
339 << test_endpoint << std::endl;
341 auto res = cli.Get(test_endpoint.c_str(), headers);
343 if (config_.verbose) {
344 std::cerr <<
"[DEBUG] CheckAvailability: got response" << std::endl;
348 return absl::UnavailableError(
349 "❌ Cannot reach Gemini API\n"
350 " Check your internet connection");
353 if (res->status == 401 || res->status == 403) {
354 return absl::PermissionDeniedError(
355 "❌ Invalid Gemini API key\n"
356 " Verify your key at: https://makersuite.google.com/app/apikey");
359 if (res->status == 404) {
360 return absl::NotFoundError(
361 absl::StrCat(
"❌ Model '", config_.model,
"' not found\n",
362 " Try: gemini-2.5-flash or gemini-1.5-pro"));
365 if (res->status != 200) {
366 return absl::InternalError(absl::StrCat(
367 "❌ Gemini API error: ", res->status,
"\n ", res->body));
370 return absl::OkStatus();
371 }
catch (
const std::exception& e) {
372 if (config_.verbose) {
373 std::cerr <<
"[DEBUG] CheckAvailability: EXCEPTION: " << e.what()
376 return absl::InternalError(
377 absl::StrCat(
"Exception during availability check: ", e.what()));
379 if (config_.verbose) {
380 std::cerr <<
"[DEBUG] CheckAvailability: UNKNOWN EXCEPTION" << std::endl;
382 return absl::InternalError(
"Unknown exception during availability check");
387absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
388 const std::string& prompt) {
389 return GenerateResponse(
390 {{{agent::ChatMessage::Sender::kUser, prompt, absl::Now()}}});
393absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
394 const std::vector<agent::ChatMessage>& history) {
395#ifndef YAZE_WITH_JSON
396 return absl::UnimplementedError(
397 "Gemini AI service requires JSON support. Build with "
398 "-DYAZE_WITH_JSON=ON");
400 if (history.empty()) {
401 return absl::InvalidArgumentError(
"History cannot be empty.");
406 std::string prompt = prompt_builder_.BuildPromptFromHistory(history);
414 if (config_.api_key.empty()) {
415 return absl::FailedPreconditionError(
"Gemini API key not configured");
418 absl::Time request_start = absl::Now();
421 if (config_.verbose) {
422 std::cerr <<
"[DEBUG] Using curl for HTTPS request" << std::endl;
423 std::cerr <<
"[DEBUG] Processing " << history.size()
424 <<
" messages in history" << std::endl;
429 nlohmann::json contents = nlohmann::json::array();
432 int start_idx = std::max(0,
static_cast<int>(history.size()) - 10);
433 for (
size_t i = start_idx; i < history.size(); ++i) {
434 const auto& msg = history[i];
436 (msg.sender == agent::ChatMessage::Sender::kUser) ?
"user" :
"model";
438 nlohmann::json message = {{
"role", role},
439 {
"parts", {{{
"text", msg.message}}}}};
440 contents.push_back(message);
445 if (!history.empty() &&
446 history.back().sender == agent::ChatMessage::Sender::kAgent) {
448 nlohmann::json user_continuation = {
450 {
"parts", {{{
"text",
"Please continue or clarify your response."}}}}};
451 contents.push_back(user_continuation);
455 nlohmann::json request_body = {
456 {
"system_instruction",
457 {{
"parts", {{
"text", config_.system_instruction}}}}},
458 {
"contents", contents},
460 {{
"temperature", config_.temperature},
461 {
"maxOutputTokens", config_.max_output_tokens}}}};
463 if (config_.verbose) {
464 std::cerr <<
"[DEBUG] Sending " << contents.size()
465 <<
" conversation turns to Gemini" << std::endl;
470 if (!function_calling_enabled_) {
471 request_body[
"generationConfig"][
"responseMimeType"] =
"application/json";
475 if (function_calling_enabled_) {
477 std::string schemas_str = BuildFunctionCallSchemas();
478 if (config_.verbose) {
479 std::cerr <<
"[DEBUG] Function calling schemas: "
480 << schemas_str.substr(0, 200) <<
"..." << std::endl;
483 nlohmann::json schemas = nlohmann::json::parse(schemas_str);
487 if (schemas.is_array()) {
489 request_body[
"tools"] = {{{
"function_declarations", schemas}}};
490 }
else if (schemas.is_object() &&
491 schemas.contains(
"function_declarations")) {
493 request_body[
"tools"] = {
494 {{
"function_declarations", schemas[
"function_declarations"]}}};
497 request_body[
"tools"] = {
498 {{
"function_declarations", nlohmann::json::array({schemas})}}};
500 }
catch (
const nlohmann::json::exception& e) {
501 std::cerr <<
"⚠️ Failed to parse function schemas: " << e.what()
507 std::string temp_file =
"/tmp/gemini_request.json";
508 std::ofstream out(temp_file);
509 out << request_body.dump();
513 std::string endpoint =
514 "https://generativelanguage.googleapis.com/v1beta/models/" +
515 config_.model +
":generateContent";
516 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
518 "-H 'Content-Type: application/json' "
519 "-H 'x-goog-api-key: " +
525 if (config_.verbose) {
526 std::cerr <<
"[DEBUG] Executing API request..." << std::endl;
530 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
532 FILE* pipe = popen(curl_cmd.c_str(),
"r");
535 return absl::InternalError(
"Failed to execute curl command");
538 std::string response_str;
540 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
541 response_str += buffer;
545 int status = _pclose(pipe);
547 int status = pclose(pipe);
549 std::remove(temp_file.c_str());
552 return absl::InternalError(
553 absl::StrCat(
"Curl failed with status ", status));
556 if (response_str.empty()) {
557 return absl::InternalError(
"Empty response from Gemini API");
561 if (config_.verbose) {
563 <<
"\033[35m" <<
"🔍 Raw Gemini API Response:" <<
"\033[0m"
565 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m"
569 if (config_.verbose) {
570 std::cerr <<
"[DEBUG] Parsing response..." << std::endl;
572 auto parsed_or = ParseGeminiResponse(response_str);
573 if (!parsed_or.ok()) {
574 return parsed_or.status();
576 AgentResponse agent_response = std::move(parsed_or.value());
577 agent_response.provider =
"gemini";
578 agent_response.model = config_.model;
579 agent_response.latency_seconds =
580 absl::ToDoubleSeconds(absl::Now() - request_start);
581 agent_response.parameters[
"prompt_version"] = config_.prompt_version;
582 agent_response.parameters[
"temperature"] =
583 absl::StrFormat(
"%.2f", config_.temperature);
584 agent_response.parameters[
"max_output_tokens"] =
585 absl::StrFormat(
"%d", config_.max_output_tokens);
586 agent_response.parameters[
"function_calling"] =
587 function_calling_enabled_ ?
"true" :
"false";
588 return agent_response;
590 }
catch (
const std::exception& e) {
591 if (config_.verbose) {
592 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
594 return absl::InternalError(
595 absl::StrCat(
"Exception during generation: ", e.what()));
597 if (config_.verbose) {
598 std::cerr <<
"[ERROR] Unknown exception" << std::endl;
600 return absl::InternalError(
"Unknown exception during generation");
605absl::StatusOr<AgentResponse> GeminiAIService::ParseGeminiResponse(
606 const std::string& response_body) {
607#ifndef YAZE_WITH_JSON
608 return absl::UnimplementedError(
"JSON support required");
610 AgentResponse agent_response;
612 auto response_json = nlohmann::json::parse(response_body,
nullptr,
false);
613 if (response_json.is_discarded()) {
614 return absl::InternalError(
"❌ Failed to parse Gemini response JSON");
618 if (!response_json.contains(
"candidates") ||
619 response_json[
"candidates"].empty()) {
620 return absl::InternalError(
"❌ No candidates in Gemini response");
623 for (
const auto& candidate : response_json[
"candidates"]) {
624 if (!candidate.contains(
"content") ||
625 !candidate[
"content"].contains(
"parts")) {
629 for (
const auto& part : candidate[
"content"][
"parts"]) {
630 if (part.contains(
"text")) {
631 std::string text_content = part[
"text"].get<std::string>();
634 if (config_.verbose) {
636 <<
"\033[35m" <<
"🔍 Raw LLM Response:" <<
"\033[0m" <<
"\n"
637 <<
"\033[2m" << text_content <<
"\033[0m" <<
"\n\n";
641 text_content = std::string(absl::StripAsciiWhitespace(text_content));
642 if (absl::StartsWith(text_content,
"```json")) {
643 text_content = text_content.substr(7);
644 }
else if (absl::StartsWith(text_content,
"```")) {
645 text_content = text_content.substr(3);
647 if (absl::EndsWith(text_content,
"```")) {
648 text_content = text_content.substr(0, text_content.length() - 3);
650 text_content = std::string(absl::StripAsciiWhitespace(text_content));
653 auto parsed_text = nlohmann::json::parse(text_content,
nullptr,
false);
654 if (!parsed_text.is_discarded()) {
656 if (parsed_text.contains(
"text_response") &&
657 parsed_text[
"text_response"].is_string()) {
658 agent_response.text_response =
659 parsed_text[
"text_response"].get<std::string>();
663 if (parsed_text.contains(
"reasoning") &&
664 parsed_text[
"reasoning"].is_string()) {
665 agent_response.reasoning =
666 parsed_text[
"reasoning"].get<std::string>();
670 if (parsed_text.contains(
"commands") &&
671 parsed_text[
"commands"].is_array()) {
672 for (
const auto& cmd : parsed_text[
"commands"]) {
673 if (cmd.is_string()) {
674 std::string command = cmd.get<std::string>();
675 if (absl::StartsWith(command,
"z3ed ")) {
676 command = command.substr(5);
678 agent_response.commands.push_back(command);
684 if (parsed_text.contains(
"tool_calls") &&
685 parsed_text[
"tool_calls"].is_array()) {
686 for (
const auto& call : parsed_text[
"tool_calls"]) {
687 if (call.contains(
"tool_name") && call[
"tool_name"].is_string()) {
689 tool_call.tool_name = call[
"tool_name"].get<std::string>();
691 if (call.contains(
"args") && call[
"args"].is_object()) {
692 for (
auto& [key, value] : call[
"args"].items()) {
693 if (value.is_string()) {
694 tool_call.args[
key] = value.get<std::string>();
695 }
else if (value.is_number()) {
696 tool_call.args[
key] = std::to_string(value.get<
double>());
697 }
else if (value.is_boolean()) {
698 tool_call.args[
key] =
699 value.get<
bool>() ?
"true" :
"false";
703 agent_response.tool_calls.push_back(tool_call);
710 std::vector<std::string> lines = absl::StrSplit(text_content,
'\n');
711 for (
const auto& line : lines) {
712 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
713 if (!trimmed.empty() && (absl::StartsWith(trimmed,
"z3ed ") ||
714 absl::StartsWith(trimmed,
"palette ") ||
715 absl::StartsWith(trimmed,
"overworld ") ||
716 absl::StartsWith(trimmed,
"sprite ") ||
717 absl::StartsWith(trimmed,
"dungeon "))) {
718 if (absl::StartsWith(trimmed,
"z3ed ")) {
719 trimmed = trimmed.substr(5);
721 agent_response.commands.push_back(trimmed);
725 }
else if (part.contains(
"functionCall")) {
726 const auto& call = part[
"functionCall"];
727 if (call.contains(
"name") && call[
"name"].is_string()) {
729 tool_call.tool_name = call[
"name"].get<std::string>();
730 if (call.contains(
"args") && call[
"args"].is_object()) {
731 for (
auto& [key, value] : call[
"args"].items()) {
732 if (value.is_string()) {
733 tool_call.args[
key] = value.get<std::string>();
734 }
else if (value.is_number()) {
735 tool_call.args[
key] = std::to_string(value.get<
double>());
739 agent_response.tool_calls.push_back(tool_call);
745 if (agent_response.text_response.empty() && agent_response.commands.empty() &&
746 agent_response.tool_calls.empty()) {
747 return absl::InternalError(
748 "❌ No valid response extracted from Gemini\n"
749 " Expected at least one of: text_response, commands, or tool_calls\n"
754 return agent_response;
758absl::StatusOr<std::string> GeminiAIService::EncodeImageToBase64(
759 const std::string& image_path)
const {
760#ifndef YAZE_WITH_JSON
762 return absl::UnimplementedError(
763 "Gemini AI service requires JSON support. Build with "
764 "-DYAZE_WITH_JSON=ON");
766 std::ifstream file(image_path, std::ios::binary);
767 if (!file.is_open()) {
768 return absl::NotFoundError(
769 absl::StrCat(
"Failed to open image file: ", image_path));
773 file.seekg(0, std::ios::end);
774 size_t size = file.tellg();
775 file.seekg(0, std::ios::beg);
777 std::vector<unsigned char> buffer(size);
778 if (!file.read(
reinterpret_cast<char*
>(buffer.data()), size)) {
779 return absl::InternalError(
"Failed to read image file");
783 static const char* base64_chars =
784 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
787 encoded.reserve(((size + 2) / 3) * 4);
791 unsigned char char_array_3[3];
792 unsigned char char_array_4[4];
794 for (
size_t idx = 0; idx < size; idx++) {
795 char_array_3[i++] = buffer[idx];
797 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
799 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
801 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
802 char_array_4[3] = char_array_3[2] & 0x3f;
804 for (i = 0; i < 4; i++)
805 encoded += base64_chars[char_array_4[i]];
811 for (j = i; j < 3; j++)
812 char_array_3[j] =
'\0';
814 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
816 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
818 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
820 for (j = 0; j < i + 1; j++)
821 encoded += base64_chars[char_array_4[j]];
831absl::StatusOr<AgentResponse> GeminiAIService::GenerateMultimodalResponse(
832 const std::string& image_path,
const std::string& prompt) {
833#ifndef YAZE_WITH_JSON
836 return absl::UnimplementedError(
837 "Gemini AI service requires JSON support. Build with "
838 "-DYAZE_WITH_JSON=ON");
840 if (config_.api_key.empty()) {
841 return absl::FailedPreconditionError(
"Gemini API key not configured");
845 std::string mime_type =
"image/png";
846 if (image_path.ends_with(
".jpg") || image_path.ends_with(
".jpeg")) {
847 mime_type =
"image/jpeg";
848 }
else if (image_path.ends_with(
".bmp")) {
849 mime_type =
"image/bmp";
850 }
else if (image_path.ends_with(
".webp")) {
851 mime_type =
"image/webp";
855 auto encoded_or = EncodeImageToBase64(image_path);
856 if (!encoded_or.ok()) {
857 return encoded_or.status();
859 std::string encoded_image = std::move(encoded_or.value());
862 if (config_.verbose) {
863 std::cerr <<
"[DEBUG] Preparing multimodal request with image"
868 nlohmann::json request_body = {
872 {{
"mime_type", mime_type}, {
"data", encoded_image}}}},
873 {{
"text", prompt}}}}}}},
875 {{
"temperature", config_.temperature},
876 {
"maxOutputTokens", config_.max_output_tokens}}}};
879 std::string temp_file =
"/tmp/gemini_multimodal_request.json";
880 std::ofstream out(temp_file);
881 out << request_body.dump();
885 std::string endpoint =
886 "https://generativelanguage.googleapis.com/v1beta/models/" +
887 config_.model +
":generateContent";
888 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
890 "-H 'Content-Type: application/json' "
891 "-H 'x-goog-api-key: " +
897 if (config_.verbose) {
898 std::cerr <<
"[DEBUG] Executing multimodal API request..." << std::endl;
902 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
904 FILE* pipe = popen(curl_cmd.c_str(),
"r");
907 return absl::InternalError(
"Failed to execute curl command");
910 std::string response_str;
912 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
913 response_str += buffer;
917 int status = _pclose(pipe);
919 int status = pclose(pipe);
921 std::remove(temp_file.c_str());
924 return absl::InternalError(
925 absl::StrCat(
"Curl failed with status ", status));
928 if (response_str.empty()) {
929 return absl::InternalError(
"Empty response from Gemini API");
932 if (config_.verbose) {
935 <<
"🔍 Raw Gemini Multimodal Response:" <<
"\033[0m" <<
"\n"
936 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m"
940 return ParseGeminiResponse(response_str);
942 }
catch (
const std::exception& e) {
943 if (config_.verbose) {
944 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
946 return absl::InternalError(
947 absl::StrCat(
"Exception during multimodal generation: ", e.what()));
GeminiAIService(const GeminiConfig &)