yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
gemini_ai_service.cc
Go to the documentation of this file.
2
3#include <atomic>
4#include <cstdlib>
5#include <iostream>
6#include <map>
7#include <mutex>
8#include <string>
9#include <vector>
10
11#include "absl/strings/str_cat.h"
12#include "absl/strings/str_format.h"
13#include "absl/strings/str_split.h"
14#include "absl/strings/strip.h"
15#include "absl/time/clock.h"
16#include "absl/time/time.h"
18#include "util/platform_paths.h"
19
20#if defined(__APPLE__)
21#include <TargetConditionals.h>
22#endif
23
24#if defined(__APPLE__) && (TARGET_OS_IPHONE == 1 || TARGET_IPHONE_SIMULATOR == 1)
26#define YAZE_AI_IOS_URLSESSION 1
27#endif
28
29#ifdef YAZE_WITH_JSON
30#include <filesystem>
31#include <fstream>
32
33#include "httplib.h"
34#include "nlohmann/json.hpp"
35
36// OpenSSL initialization for HTTPS support
37#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
38#include <openssl/crypto.h>
39#include <openssl/err.h>
40#include <openssl/ssl.h>
41
42// Global flag to track OpenSSL initialization
43static std::atomic<bool> g_openssl_initialized{false};
44static std::mutex g_openssl_init_mutex;
45
46static void InitializeOpenSSL() {
47 std::lock_guard<std::mutex> lock(g_openssl_init_mutex);
48 if (!g_openssl_initialized.exchange(true)) {
49 OPENSSL_init_ssl(
50 OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS,
51 nullptr);
52 std::cerr << "✓ OpenSSL initialized for HTTPS support" << std::endl;
53 }
54}
55#endif
56#endif
57
58namespace yaze {
59namespace cli {
60
61GeminiAIService::GeminiAIService(const GeminiConfig& config)
62 : function_calling_enabled_(config.use_function_calling), config_(config) {
63 if (config_.verbose) {
64 std::cerr << "[DEBUG] Initializing Gemini service..." << std::endl;
65 std::cerr << "[DEBUG] Function calling: "
66 << (function_calling_enabled_ ? "enabled" : "disabled")
67 << std::endl;
68 std::cerr << "[DEBUG] Prompt version: " << config_.prompt_version
69 << std::endl;
70 }
71
72#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
73 // Initialize OpenSSL for HTTPS support
74 InitializeOpenSSL();
75 if (config_.verbose) {
76 std::cerr << "[DEBUG] OpenSSL initialized for HTTPS" << std::endl;
77 }
78#endif
79
80 // Load command documentation into prompt builder with specified version
81 std::string catalogue_path = config_.prompt_version == "v2"
82 ? "assets/agent/prompt_catalogue_v2.yaml"
83 : "assets/agent/prompt_catalogue.yaml";
84 if (auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path);
85 !status.ok()) {
86 std::cerr << "⚠️ Failed to load agent prompt catalogue: "
87 << status.message() << std::endl;
88 }
89
90 if (config_.verbose) {
91 std::cerr << "[DEBUG] Loaded prompt catalogue" << std::endl;
92 }
93
94 if (config_.system_instruction.empty()) {
95 if (config_.verbose) {
96 std::cerr << "[DEBUG] Building system instruction..." << std::endl;
97 }
98
99 // Try to load version-specific system prompt file using FindAsset
100 std::string prompt_file;
101 if (config_.prompt_version == "v3") {
102 prompt_file = "agent/system_prompt_v3.txt";
103 } else if (config_.prompt_version == "v2") {
104 prompt_file = "agent/system_prompt_v2.txt";
105 } else {
106 prompt_file = "agent/system_prompt.txt";
107 }
108
109 auto prompt_path = util::PlatformPaths::FindAsset(prompt_file);
110 bool loaded = false;
111
112 if (prompt_path.ok()) {
113 std::ifstream file(prompt_path->string());
114 if (file.good()) {
115 std::stringstream buffer;
116 buffer << file.rdbuf();
117 config_.system_instruction = buffer.str();
118 if (config_.verbose) {
119 std::cerr << "[DEBUG] Loaded prompt: " << prompt_path->string()
120 << std::endl;
121 }
122 loaded = true;
123 }
124 }
125
126 if (!loaded) {
127 // Fallback to builder
128 if (config_.use_enhanced_prompting) {
129 config_.system_instruction =
130 prompt_builder_.BuildSystemInstructionWithExamples();
131 } else {
132 config_.system_instruction = BuildSystemInstruction();
133 }
134 }
135 }
136
137 if (config_.verbose) {
138 std::cerr << "[DEBUG] Gemini service initialized" << std::endl;
139 }
140}
141
142void GeminiAIService::EnableFunctionCalling(bool enable) {
143 function_calling_enabled_ = enable;
144}
145
146std::vector<std::string> GeminiAIService::GetAvailableTools() const {
147 return {"resource-list", "resource-search",
148 "dungeon-list-sprites", "dungeon-describe-room",
149 "overworld-find-tile", "overworld-describe-map",
150 "overworld-list-warps"};
151}
152
153std::string GeminiAIService::BuildFunctionCallSchemas() {
154#ifndef YAZE_WITH_JSON
155 return "{}"; // Empty object if JSON not available
156#else
157 // Use the prompt builder's schema generation which reads from
158 // prompt_catalogue.yaml
159 std::string schemas = prompt_builder_.BuildFunctionCallSchemas();
160 if (!schemas.empty() && schemas != "[]") {
161 return schemas;
162 }
163
164 // Fallback: Search for function_schemas.json using FindAsset
165 auto schema_path_or =
166 util::PlatformPaths::FindAsset("agent/function_schemas.json");
167
168 if (!schema_path_or.ok()) {
169 if (config_.verbose) {
170 std::cerr << "⚠️ Function schemas file not found: "
171 << schema_path_or.status().message() << std::endl;
172 }
173 return "[]"; // Return empty array as fallback
174 }
175
176 // Load and parse the JSON file
177 std::ifstream file(schema_path_or->string());
178 if (!file.is_open()) {
179 std::cerr << "⚠️ Failed to open function schemas file: "
180 << schema_path_or->string() << std::endl;
181 return "[]";
182 }
183
184 try {
185 nlohmann::json schemas_json;
186 file >> schemas_json;
187 return schemas_json.dump();
188 } catch (const nlohmann::json::exception& e) {
189 std::cerr << "⚠️ Failed to parse function schemas JSON: " << e.what()
190 << std::endl;
191 return "[]";
192 }
193#endif
194}
195
196std::string GeminiAIService::BuildSystemInstruction() {
197 // Fallback prompt if enhanced prompting is disabled
198 // Use PromptBuilder's basic system instruction
199 return prompt_builder_.BuildSystemInstruction();
200}
201
202void GeminiAIService::SetRomContext(Rom* rom) {
203 prompt_builder_.SetRom(rom);
204}
205
206absl::StatusOr<std::vector<ModelInfo>> GeminiAIService::ListAvailableModels() {
207#ifndef YAZE_WITH_JSON
208 return absl::UnimplementedError("Gemini AI service requires JSON support");
209#else
210 if (config_.api_key.empty()) {
211 // Return default known models if API key is missing
212 std::vector<ModelInfo> defaults = {
213 {.name = "gemini-3.0-preview",
214 .display_name = "Gemini 3.0 Preview",
215 .provider = "gemini",
216 .description = "Cutting-edge model, currently in preview"},
217 {.name = "gemini-3.0-flash-preview",
218 .display_name = "Gemini 3.0 Flash Preview",
219 .provider = "gemini",
220 .description = "Fastest preview model"},
221 {.name = "gemini-2.5-pro",
222 .display_name = "Gemini 2.5 Pro",
223 .provider = "gemini",
224 .description = "High intelligence for complex tasks"},
225 {.name = "gemini-2.5-flash",
226 .display_name = "Gemini 2.5 Flash",
227 .provider = "gemini",
228 .description = "Fastest multimodal model"}};
229 return defaults;
230 }
231
232 try {
233 std::string endpoint =
234 "https://generativelanguage.googleapis.com/v1beta/models?key=" +
235 config_.api_key;
236
237 if (config_.verbose) {
238 std::cerr << "[DEBUG] Listing models: "
239 << endpoint.substr(0, endpoint.find("key=")) << "...'"
240 << std::endl;
241 }
242
243 std::string response_str;
244#if defined(YAZE_AI_IOS_URLSESSION)
245 auto resp_or = ios::UrlSessionHttpRequest("GET", endpoint, {}, "", 8000);
246 if (!resp_or.ok()) {
247 if (config_.verbose) {
248 std::cerr << "[DEBUG] Gemini models request failed: "
249 << resp_or.status().message() << std::endl;
250 }
251 return absl::InternalError("Failed to list Gemini models");
252 }
253 response_str = resp_or->body;
254#else
255 // Use curl to list models from the API
256 std::string curl_cmd = "curl -s -X GET '" + endpoint + "' 2>&1";
257
258#ifdef _WIN32
259 FILE* pipe = _popen(curl_cmd.c_str(), "r");
260#else
261 FILE* pipe = popen(curl_cmd.c_str(), "r");
262#endif
263 if (!pipe) {
264 return absl::InternalError("Failed to execute curl command");
265 }
266
267 char buffer[4096];
268 while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
269 response_str += buffer;
270 }
271
272#ifdef _WIN32
273 _pclose(pipe);
274#else
275 pclose(pipe);
276#endif
277#endif // YAZE_AI_IOS_URLSESSION
278
279 auto models_json = nlohmann::json::parse(response_str, nullptr, false);
280 if (models_json.is_discarded()) {
281 return absl::InternalError("Failed to parse Gemini models JSON");
282 }
283
284 if (!models_json.contains("models")) {
285 // Return defaults on error
286 std::vector<ModelInfo> defaults = {{.name = "gemini-2.5-flash",
287 .display_name = "Gemini 2.0 Flash",
288 .provider = "gemini"},
289 {.name = "gemini-1.5-flash",
290 .display_name = "Gemini 1.5 Flash",
291 .provider = "gemini"},
292 {.name = "gemini-1.5-pro",
293 .display_name = "Gemini 1.5 Pro",
294 .provider = "gemini"}};
295 return defaults;
296 }
297
298 std::vector<ModelInfo> models;
299 for (const auto& m : models_json["models"]) {
300 std::string name = m.value("name", "");
301 // Name comes as "models/gemini-pro", strip prefix
302 if (absl::StartsWith(name, "models/")) {
303 name = name.substr(7);
304 }
305
306 // Filter for gemini models
307 if (absl::StartsWith(name, "gemini")) {
308 ModelInfo info;
309 info.name = name;
310 info.display_name = m.value("displayName", name);
311 info.provider = "gemini";
312 info.description = m.value("description", "");
313 info.family = "gemini";
314 info.is_local = false;
315 models.push_back(std::move(info));
316 }
317 }
318 return models;
319
320 } catch (const std::exception& e) {
321 return absl::InternalError(
322 absl::StrCat("Failed to list models: ", e.what()));
323 }
324#endif
325}
326
327absl::Status GeminiAIService::CheckAvailability() {
328#ifndef YAZE_WITH_JSON
329 return absl::UnimplementedError(
330 "Gemini AI service requires JSON support. Build with "
331 "-DYAZE_WITH_JSON=ON");
332#else
333 try {
334 if (config_.verbose) {
335 std::cerr << "[DEBUG] CheckAvailability: start" << std::endl;
336 }
337
338 if (config_.api_key.empty()) {
339 return absl::FailedPreconditionError(
340 "❌ Gemini API key not configured\n"
341 " Set GEMINI_API_KEY environment variable\n"
342 " Get your API key at: https://makersuite.google.com/app/apikey");
343 }
344
345 if (config_.verbose) {
346 std::cerr << "[DEBUG] CheckAvailability: creating HTTPS client"
347 << std::endl;
348 }
349 // Test API connectivity with a simple request
350 httplib::Client cli("https://generativelanguage.googleapis.com");
351 if (config_.verbose) {
352 std::cerr << "[DEBUG] CheckAvailability: client created" << std::endl;
353 }
354
355 cli.set_connection_timeout(5, 0); // 5 seconds timeout
356
357 if (config_.verbose) {
358 std::cerr << "[DEBUG] CheckAvailability: building endpoint" << std::endl;
359 }
360 std::string test_endpoint = "/v1beta/models/" + config_.model;
361 httplib::Headers headers = {
362 {"x-goog-api-key", config_.api_key},
363 };
364
365 if (config_.verbose) {
366 std::cerr << "[DEBUG] CheckAvailability: making request to "
367 << test_endpoint << std::endl;
368 }
369 auto res = cli.Get(test_endpoint.c_str(), headers);
370
371 if (config_.verbose) {
372 std::cerr << "[DEBUG] CheckAvailability: got response" << std::endl;
373 }
374
375 if (!res) {
376 return absl::UnavailableError(
377 "❌ Cannot reach Gemini API\n"
378 " Check your internet connection");
379 }
380
381 if (res->status == 401 || res->status == 403) {
382 return absl::PermissionDeniedError(
383 "❌ Invalid Gemini API key\n"
384 " Verify your key at: https://makersuite.google.com/app/apikey");
385 }
386
387 if (res->status == 404) {
388 return absl::NotFoundError(
389 absl::StrCat("❌ Model '", config_.model, "' not found\n",
390 " Try: gemini-2.5-flash or gemini-1.5-pro"));
391 }
392
393 if (res->status != 200) {
394 return absl::InternalError(absl::StrCat(
395 "❌ Gemini API error: ", res->status, "\n ", res->body));
396 }
397
398 return absl::OkStatus();
399 } catch (const std::exception& e) {
400 if (config_.verbose) {
401 std::cerr << "[DEBUG] CheckAvailability: EXCEPTION: " << e.what()
402 << std::endl;
403 }
404 return absl::InternalError(
405 absl::StrCat("Exception during availability check: ", e.what()));
406 } catch (...) {
407 if (config_.verbose) {
408 std::cerr << "[DEBUG] CheckAvailability: UNKNOWN EXCEPTION" << std::endl;
409 }
410 return absl::InternalError("Unknown exception during availability check");
411 }
412#endif
413}
414
415absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
416 const std::string& prompt) {
417 return GenerateResponse(
418 {{{agent::ChatMessage::Sender::kUser, prompt, absl::Now()}}});
419}
420
421absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
422 const std::vector<agent::ChatMessage>& history) {
423#ifndef YAZE_WITH_JSON
424 return absl::UnimplementedError(
425 "Gemini AI service requires JSON support. Build with "
426 "-DYAZE_WITH_JSON=ON");
427#else
428 if (history.empty()) {
429 return absl::InvalidArgumentError("History cannot be empty.");
430 }
431
432 // Build a structured conversation history for better context
433 // Gemini supports multi-turn conversations via the contents array
434 std::string prompt = prompt_builder_.BuildPromptFromHistory(history);
435
436 // Skip availability check - causes segfault with current SSL setup
437 // TODO: Fix SSL/TLS initialization issue
438 // if (auto status = CheckAvailability(); !status.ok()) {
439 // return status;
440 // }
441
442 if (config_.api_key.empty()) {
443 return absl::FailedPreconditionError("Gemini API key not configured");
444 }
445
446 absl::Time request_start = absl::Now();
447
448 try {
449 if (config_.verbose) {
450 std::cerr << "[DEBUG] Using curl for HTTPS request" << std::endl;
451 std::cerr << "[DEBUG] Processing " << history.size()
452 << " messages in history" << std::endl;
453 }
454
455 // Build conversation history for multi-turn context
456 // Gemini supports alternating user/model messages for better context
457 nlohmann::json contents = nlohmann::json::array();
458
459 // Add conversation history (up to last 10 messages for context window)
460 int start_idx = std::max(0, static_cast<int>(history.size()) - 10);
461 for (size_t i = start_idx; i < history.size(); ++i) {
462 const auto& msg = history[i];
463 std::string role =
464 (msg.sender == agent::ChatMessage::Sender::kUser) ? "user" : "model";
465
466 nlohmann::json message = {{"role", role},
467 {"parts", {{{"text", msg.message}}}}};
468 contents.push_back(message);
469 }
470
471 // If the last message is from the model, we need to ensure the conversation
472 // ends with a user message for Gemini
473 if (!history.empty() &&
474 history.back().sender == agent::ChatMessage::Sender::kAgent) {
475 // Add a continuation prompt
476 nlohmann::json user_continuation = {
477 {"role", "user"},
478 {"parts", {{{"text", "Please continue or clarify your response."}}}}};
479 contents.push_back(user_continuation);
480 }
481
482 // Build request with proper Gemini API v1beta format
483 nlohmann::json request_body = {
484 {"system_instruction",
485 {{"parts", {{"text", config_.system_instruction}}}}},
486 {"contents", contents},
487 {"generationConfig",
488 {{"temperature", config_.temperature},
489 {"maxOutputTokens", config_.max_output_tokens}}}};
490
491 if (config_.verbose) {
492 std::cerr << "[DEBUG] Sending " << contents.size()
493 << " conversation turns to Gemini" << std::endl;
494 }
495
496 // Only add responseMimeType if NOT using function calling
497 // (Gemini doesn't support both at the same time)
498 if (!function_calling_enabled_) {
499 request_body["generationConfig"]["responseMimeType"] = "application/json";
500 }
501
502 // Add function calling tools if enabled
503 if (function_calling_enabled_) {
504 try {
505 std::string schemas_str = BuildFunctionCallSchemas();
506 if (config_.verbose) {
507 std::cerr << "[DEBUG] Function calling schemas: "
508 << schemas_str.substr(0, 200) << "..." << std::endl;
509 }
510
511 nlohmann::json schemas = nlohmann::json::parse(schemas_str);
512
513 // Build tools array - schemas might be an array of tools or a
514 // function_declarations object
515 if (schemas.is_array()) {
516 // If it's already an array of tools, use it directly
517 request_body["tools"] = {{{"function_declarations", schemas}}};
518 } else if (schemas.is_object() &&
519 schemas.contains("function_declarations")) {
520 // If it's a wrapper object with function_declarations
521 request_body["tools"] = {
522 {{"function_declarations", schemas["function_declarations"]}}};
523 } else {
524 // Treat as single tool object
525 request_body["tools"] = {
526 {{"function_declarations", nlohmann::json::array({schemas})}}};
527 }
528 } catch (const nlohmann::json::exception& e) {
529 std::cerr << "⚠️ Failed to parse function schemas: " << e.what()
530 << std::endl;
531 }
532 }
533
534 std::string endpoint =
535 "https://generativelanguage.googleapis.com/v1beta/models/" +
536 config_.model + ":generateContent";
537 std::string response_str;
538#if defined(YAZE_AI_IOS_URLSESSION)
539 std::map<std::string, std::string> headers;
540 headers.emplace("Content-Type", "application/json");
541 headers.emplace("x-goog-api-key", config_.api_key);
542 auto resp_or = ios::UrlSessionHttpRequest(
543 "POST", endpoint, headers, request_body.dump(), 60000);
544 if (!resp_or.ok()) {
545 return resp_or.status();
546 }
547 if (resp_or->status_code != 200) {
548 return absl::InternalError(
549 absl::StrCat("Gemini API error: ", resp_or->status_code, "\n",
550 resp_or->body));
551 }
552 response_str = resp_or->body;
553#else
554 // Write request body to temp file
555 std::string temp_file = "/tmp/gemini_request.json";
556 std::ofstream out(temp_file);
557 out << request_body.dump();
558 out.close();
559
560 // Use curl to make the request (avoiding httplib SSL issues)
561 std::string curl_cmd = "curl -s -X POST '" + endpoint +
562 "' "
563 "-H 'Content-Type: application/json' "
564 "-H 'x-goog-api-key: " +
565 config_.api_key +
566 "' "
567 "-d @" +
568 temp_file + " 2>&1";
569
570 if (config_.verbose) {
571 std::cerr << "[DEBUG] Executing API request..." << std::endl;
572 }
573
574#ifdef _WIN32
575 FILE* pipe = _popen(curl_cmd.c_str(), "r");
576#else
577 FILE* pipe = popen(curl_cmd.c_str(), "r");
578#endif
579 if (!pipe) {
580 return absl::InternalError("Failed to execute curl command");
581 }
582
583 char buffer[4096];
584 while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
585 response_str += buffer;
586 }
587
588#ifdef _WIN32
589 int status = _pclose(pipe);
590#else
591 int status = pclose(pipe);
592#endif
593 std::remove(temp_file.c_str());
594
595 if (status != 0) {
596 return absl::InternalError(
597 absl::StrCat("Curl failed with status ", status));
598 }
599#endif // YAZE_AI_IOS_URLSESSION
600
601 if (response_str.empty()) {
602 return absl::InternalError("Empty response from Gemini API");
603 }
604
605 // Debug: print response
606 if (config_.verbose) {
607 std::cout << "\n"
608 << "\033[35m"
609 << "🔍 Raw Gemini API Response:"
610 << "\033[0m"
611 << "\n"
612 << "\033[2m" << response_str.substr(0, 500) << "\033[0m"
613 << "\n\n";
614 }
615
616 if (config_.verbose) {
617 std::cerr << "[DEBUG] Parsing response..." << std::endl;
618 }
619 auto parsed_or = ParseGeminiResponse(response_str);
620 if (!parsed_or.ok()) {
621 return parsed_or.status();
622 }
623 AgentResponse agent_response = std::move(parsed_or.value());
624 agent_response.provider = "gemini";
625 agent_response.model = config_.model;
626 agent_response.latency_seconds =
627 absl::ToDoubleSeconds(absl::Now() - request_start);
628 agent_response.parameters["prompt_version"] = config_.prompt_version;
629 agent_response.parameters["temperature"] =
630 absl::StrFormat("%.2f", config_.temperature);
631 agent_response.parameters["max_output_tokens"] =
632 absl::StrFormat("%d", config_.max_output_tokens);
633 agent_response.parameters["function_calling"] =
634 function_calling_enabled_ ? "true" : "false";
635 return agent_response;
636
637 } catch (const std::exception& e) {
638 if (config_.verbose) {
639 std::cerr << "[ERROR] Exception: " << e.what() << std::endl;
640 }
641 return absl::InternalError(
642 absl::StrCat("Exception during generation: ", e.what()));
643 } catch (...) {
644 if (config_.verbose) {
645 std::cerr << "[ERROR] Unknown exception" << std::endl;
646 }
647 return absl::InternalError("Unknown exception during generation");
648 }
649#endif
650}
651
652absl::StatusOr<AgentResponse> GeminiAIService::ParseGeminiResponse(
653 const std::string& response_body) {
654#ifndef YAZE_WITH_JSON
655 return absl::UnimplementedError("JSON support required");
656#else
657 AgentResponse agent_response;
658
659 auto response_json = nlohmann::json::parse(response_body, nullptr, false);
660 if (response_json.is_discarded()) {
661 return absl::InternalError("❌ Failed to parse Gemini response JSON");
662 }
663
664 // Navigate Gemini's response structure
665 if (!response_json.contains("candidates") ||
666 response_json["candidates"].empty()) {
667 return absl::InternalError("❌ No candidates in Gemini response");
668 }
669
670 for (const auto& candidate : response_json["candidates"]) {
671 if (!candidate.contains("content") ||
672 !candidate["content"].contains("parts")) {
673 continue;
674 }
675
676 for (const auto& part : candidate["content"]["parts"]) {
677 if (part.contains("text")) {
678 std::string text_content = part["text"].get<std::string>();
679
680 // Debug: Print raw LLM output when verbose mode is enabled
681 if (config_.verbose) {
682 std::cout << "\n"
683 << "\033[35m"
684 << "🔍 Raw LLM Response:"
685 << "\033[0m"
686 << "\n"
687 << "\033[2m" << text_content << "\033[0m"
688 << "\n\n";
689 }
690
691 // Strip markdown code blocks if present (```json ... ```)
692 text_content = std::string(absl::StripAsciiWhitespace(text_content));
693 if (absl::StartsWith(text_content, "```json")) {
694 text_content = text_content.substr(7); // Remove ```json
695 } else if (absl::StartsWith(text_content, "```")) {
696 text_content = text_content.substr(3); // Remove ```
697 }
698 if (absl::EndsWith(text_content, "```")) {
699 text_content = text_content.substr(0, text_content.length() - 3);
700 }
701 text_content = std::string(absl::StripAsciiWhitespace(text_content));
702
703 // Try to parse as JSON object
704 auto parsed_text = nlohmann::json::parse(text_content, nullptr, false);
705 if (!parsed_text.is_discarded()) {
706 // Extract text_response
707 if (parsed_text.contains("text_response") &&
708 parsed_text["text_response"].is_string()) {
709 agent_response.text_response =
710 parsed_text["text_response"].get<std::string>();
711 }
712
713 // Extract reasoning
714 if (parsed_text.contains("reasoning") &&
715 parsed_text["reasoning"].is_string()) {
716 agent_response.reasoning =
717 parsed_text["reasoning"].get<std::string>();
718 }
719
720 // Extract commands
721 if (parsed_text.contains("commands") &&
722 parsed_text["commands"].is_array()) {
723 for (const auto& cmd : parsed_text["commands"]) {
724 if (cmd.is_string()) {
725 std::string command = cmd.get<std::string>();
726 if (absl::StartsWith(command, "z3ed ")) {
727 command = command.substr(5);
728 }
729 agent_response.commands.push_back(command);
730 }
731 }
732 }
733
734 // Extract tool_calls from the parsed JSON
735 if (parsed_text.contains("tool_calls") &&
736 parsed_text["tool_calls"].is_array()) {
737 for (const auto& call : parsed_text["tool_calls"]) {
738 if (call.contains("tool_name") && call["tool_name"].is_string()) {
739 ToolCall tool_call;
740 tool_call.tool_name = call["tool_name"].get<std::string>();
741
742 if (call.contains("args") && call["args"].is_object()) {
743 for (auto& [key, value] : call["args"].items()) {
744 if (value.is_string()) {
745 tool_call.args[key] = value.get<std::string>();
746 } else if (value.is_number()) {
747 tool_call.args[key] = std::to_string(value.get<double>());
748 } else if (value.is_boolean()) {
749 tool_call.args[key] =
750 value.get<bool>() ? "true" : "false";
751 }
752 }
753 }
754 agent_response.tool_calls.push_back(tool_call);
755 }
756 }
757 }
758 } else {
759 // If parsing the full object fails, fallback to extracting commands
760 // from text
761 std::vector<std::string> lines = absl::StrSplit(text_content, '\n');
762 for (const auto& line : lines) {
763 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
764 if (!trimmed.empty() && (absl::StartsWith(trimmed, "z3ed ") ||
765 absl::StartsWith(trimmed, "palette ") ||
766 absl::StartsWith(trimmed, "overworld ") ||
767 absl::StartsWith(trimmed, "sprite ") ||
768 absl::StartsWith(trimmed, "dungeon "))) {
769 if (absl::StartsWith(trimmed, "z3ed ")) {
770 trimmed = trimmed.substr(5);
771 }
772 agent_response.commands.push_back(trimmed);
773 }
774 }
775 }
776 } else if (part.contains("functionCall")) {
777 const auto& call = part["functionCall"];
778 if (call.contains("name") && call["name"].is_string()) {
779 ToolCall tool_call;
780 tool_call.tool_name = call["name"].get<std::string>();
781 if (call.contains("args") && call["args"].is_object()) {
782 for (auto& [key, value] : call["args"].items()) {
783 if (value.is_string()) {
784 tool_call.args[key] = value.get<std::string>();
785 } else if (value.is_number()) {
786 tool_call.args[key] = std::to_string(value.get<double>());
787 }
788 }
789 }
790 agent_response.tool_calls.push_back(tool_call);
791 }
792 }
793 }
794 }
795
796 if (agent_response.text_response.empty() && agent_response.commands.empty() &&
797 agent_response.tool_calls.empty()) {
798 return absl::InternalError(
799 "❌ No valid response extracted from Gemini\n"
800 " Expected at least one of: text_response, commands, or tool_calls\n"
801 " Raw response: " +
802 response_body);
803 }
804
805 return agent_response;
806#endif
807}
808
809absl::StatusOr<std::string> GeminiAIService::EncodeImageToBase64(
810 const std::string& image_path) const {
811#ifndef YAZE_WITH_JSON
812 (void)image_path; // Suppress unused parameter warning
813 return absl::UnimplementedError(
814 "Gemini AI service requires JSON support. Build with "
815 "-DYAZE_WITH_JSON=ON");
816#else
817 std::ifstream file(image_path, std::ios::binary);
818 if (!file.is_open()) {
819 return absl::NotFoundError(
820 absl::StrCat("Failed to open image file: ", image_path));
821 }
822
823 // Read file into buffer
824 file.seekg(0, std::ios::end);
825 size_t size = file.tellg();
826 file.seekg(0, std::ios::beg);
827
828 std::vector<unsigned char> buffer(size);
829 if (!file.read(reinterpret_cast<char*>(buffer.data()), size)) {
830 return absl::InternalError("Failed to read image file");
831 }
832
833 // Base64 encode
834 static const char* base64_chars =
835 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
836
837 std::string encoded;
838 encoded.reserve(((size + 2) / 3) * 4);
839
840 int i = 0;
841 int j = 0;
842 unsigned char char_array_3[3];
843 unsigned char char_array_4[4];
844
845 for (size_t idx = 0; idx < size; idx++) {
846 char_array_3[i++] = buffer[idx];
847 if (i == 3) {
848 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
849 char_array_4[1] =
850 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
851 char_array_4[2] =
852 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
853 char_array_4[3] = char_array_3[2] & 0x3f;
854
855 for (i = 0; i < 4; i++)
856 encoded += base64_chars[char_array_4[i]];
857 i = 0;
858 }
859 }
860
861 if (i) {
862 for (j = i; j < 3; j++)
863 char_array_3[j] = '\0';
864
865 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
866 char_array_4[1] =
867 ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
868 char_array_4[2] =
869 ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
870
871 for (j = 0; j < i + 1; j++)
872 encoded += base64_chars[char_array_4[j]];
873
874 while (i++ < 3)
875 encoded += '=';
876 }
877
878 return encoded;
879#endif
880}
881
882absl::StatusOr<AgentResponse> GeminiAIService::GenerateMultimodalResponse(
883 const std::string& image_path, const std::string& prompt) {
884#ifndef YAZE_WITH_JSON
885 (void)image_path; // Suppress unused parameter warnings
886 (void)prompt;
887 return absl::UnimplementedError(
888 "Gemini AI service requires JSON support. Build with "
889 "-DYAZE_WITH_JSON=ON");
890#else
891 if (config_.api_key.empty()) {
892 return absl::FailedPreconditionError("Gemini API key not configured");
893 }
894
895 // Determine MIME type from file extension
896 std::string mime_type = "image/png";
897 if (image_path.ends_with(".jpg") || image_path.ends_with(".jpeg")) {
898 mime_type = "image/jpeg";
899 } else if (image_path.ends_with(".bmp")) {
900 mime_type = "image/bmp";
901 } else if (image_path.ends_with(".webp")) {
902 mime_type = "image/webp";
903 }
904
905 // Encode image to base64
906 auto encoded_or = EncodeImageToBase64(image_path);
907 if (!encoded_or.ok()) {
908 return encoded_or.status();
909 }
910 std::string encoded_image = std::move(encoded_or.value());
911
912 try {
913 if (config_.verbose) {
914 std::cerr << "[DEBUG] Preparing multimodal request with image"
915 << std::endl;
916 }
917
918 // Build multimodal request with image and text
919 nlohmann::json request_body = {
920 {"contents",
921 {{{"parts",
922 {{{"inline_data",
923 {{"mime_type", mime_type}, {"data", encoded_image}}}},
924 {{"text", prompt}}}}}}},
925 {"generationConfig",
926 {{"temperature", config_.temperature},
927 {"maxOutputTokens", config_.max_output_tokens}}}};
928
929 std::string endpoint =
930 "https://generativelanguage.googleapis.com/v1beta/models/" +
931 config_.model + ":generateContent";
932 std::string response_str;
933#if defined(YAZE_AI_IOS_URLSESSION)
934 std::map<std::string, std::string> headers;
935 headers.emplace("Content-Type", "application/json");
936 headers.emplace("x-goog-api-key", config_.api_key);
937 auto resp_or = ios::UrlSessionHttpRequest(
938 "POST", endpoint, headers, request_body.dump(), 60000);
939 if (!resp_or.ok()) {
940 return resp_or.status();
941 }
942 if (resp_or->status_code != 200) {
943 return absl::InternalError(
944 absl::StrCat("Gemini API error: ", resp_or->status_code, "\n",
945 resp_or->body));
946 }
947 response_str = resp_or->body;
948#else
949 // Write request body to temp file
950 std::string temp_file = "/tmp/gemini_multimodal_request.json";
951 std::ofstream out(temp_file);
952 out << request_body.dump();
953 out.close();
954
955 // Use curl to make the request
956 std::string curl_cmd = "curl -s -X POST '" + endpoint +
957 "' "
958 "-H 'Content-Type: application/json' "
959 "-H 'x-goog-api-key: " +
960 config_.api_key +
961 "' "
962 "-d @" +
963 temp_file + " 2>&1";
964
965 if (config_.verbose) {
966 std::cerr << "[DEBUG] Executing multimodal API request..." << std::endl;
967 }
968
969#ifdef _WIN32
970 FILE* pipe = _popen(curl_cmd.c_str(), "r");
971#else
972 FILE* pipe = popen(curl_cmd.c_str(), "r");
973#endif
974 if (!pipe) {
975 return absl::InternalError("Failed to execute curl command");
976 }
977
978 char buffer[4096];
979 while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
980 response_str += buffer;
981 }
982
983#ifdef _WIN32
984 int status = _pclose(pipe);
985#else
986 int status = pclose(pipe);
987#endif
988 std::remove(temp_file.c_str());
989
990 if (status != 0) {
991 return absl::InternalError(
992 absl::StrCat("Curl failed with status ", status));
993 }
994#endif // YAZE_AI_IOS_URLSESSION
995
996 if (response_str.empty()) {
997 return absl::InternalError("Empty response from Gemini API");
998 }
999
1000 if (config_.verbose) {
1001 std::cout << "\n"
1002 << "\033[35m"
1003 << "🔍 Raw Gemini Multimodal Response:"
1004 << "\033[0m"
1005 << "\n"
1006 << "\033[2m" << response_str.substr(0, 500) << "\033[0m"
1007 << "\n\n";
1008 }
1009
1010 return ParseGeminiResponse(response_str);
1011
1012 } catch (const std::exception& e) {
1013 if (config_.verbose) {
1014 std::cerr << "[ERROR] Exception: " << e.what() << std::endl;
1015 }
1016 return absl::InternalError(
1017 absl::StrCat("Exception during multimodal generation: ", e.what()));
1018 }
1019#endif
1020}
1021
1022} // namespace cli
1023} // namespace yaze
GeminiAIService(const GeminiConfig &)