46 : function_calling_enabled_(config.use_function_calling), config_(config) {
48 std::cerr <<
"[DEBUG] Initializing Gemini service..." << std::endl;
53#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
57 std::cerr <<
"[DEBUG] OpenSSL initialized for HTTPS" << std::endl;
63 ?
"assets/agent/prompt_catalogue_v2.yaml"
64 :
"assets/agent/prompt_catalogue.yaml";
66 std::cerr <<
"⚠️ Failed to load agent prompt catalogue: "
67 << status.message() << std::endl;
71 std::cerr <<
"[DEBUG] Loaded prompt catalogue" << std::endl;
76 std::cerr <<
"[DEBUG] Building system instruction..." << std::endl;
80 std::string prompt_file;
82 prompt_file =
"agent/system_prompt_v3.txt";
84 prompt_file =
"agent/system_prompt_v2.txt";
86 prompt_file =
"agent/system_prompt.txt";
92 if (prompt_path.ok()) {
93 std::ifstream file(prompt_path->string());
95 std::stringstream buffer;
96 buffer << file.rdbuf();
99 std::cerr <<
"[DEBUG] Loaded prompt: " << prompt_path->string() << std::endl;
116 std::cerr <<
"[DEBUG] Gemini service initialized" << std::endl;
137#ifndef YAZE_WITH_JSON
142 if (!schemas.empty() && schemas !=
"[]") {
149 if (!schema_path_or.ok()) {
151 std::cerr <<
"⚠️ Function schemas file not found: "
152 << schema_path_or.status().message() << std::endl;
158 std::ifstream file(schema_path_or->string());
159 if (!file.is_open()) {
160 std::cerr <<
"⚠️ Failed to open function schemas file: "
161 << schema_path_or->string() << std::endl;
166 nlohmann::json schemas_json;
167 file >> schemas_json;
168 return schemas_json.dump();
169 }
catch (
const nlohmann::json::exception& e) {
170 std::cerr <<
"⚠️ Failed to parse function schemas JSON: "
171 << e.what() << std::endl;
188#ifndef YAZE_WITH_JSON
189 return absl::UnimplementedError(
190 "Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
194 std::cerr <<
"[DEBUG] CheckAvailability: start" << std::endl;
198 return absl::FailedPreconditionError(
199 "❌ Gemini API key not configured\n"
200 " Set GEMINI_API_KEY environment variable\n"
201 " Get your API key at: https://makersuite.google.com/app/apikey");
205 std::cerr <<
"[DEBUG] CheckAvailability: creating HTTPS client" << std::endl;
208 httplib::Client cli(
"https://generativelanguage.googleapis.com");
210 std::cerr <<
"[DEBUG] CheckAvailability: client created" << std::endl;
213 cli.set_connection_timeout(5, 0);
216 std::cerr <<
"[DEBUG] CheckAvailability: building endpoint" << std::endl;
218 std::string test_endpoint =
"/v1beta/models/" +
config_.
model;
219 httplib::Headers headers = {
224 std::cerr <<
"[DEBUG] CheckAvailability: making request to " << test_endpoint << std::endl;
226 auto res = cli.Get(test_endpoint.c_str(), headers);
229 std::cerr <<
"[DEBUG] CheckAvailability: got response" << std::endl;
233 return absl::UnavailableError(
234 "❌ Cannot reach Gemini API\n"
235 " Check your internet connection");
238 if (res->status == 401 || res->status == 403) {
239 return absl::PermissionDeniedError(
240 "❌ Invalid Gemini API key\n"
241 " Verify your key at: https://makersuite.google.com/app/apikey");
244 if (res->status == 404) {
245 return absl::NotFoundError(
246 absl::StrCat(
"❌ Model '",
config_.
model,
"' not found\n",
247 " Try: gemini-2.5-flash or gemini-1.5-pro"));
250 if (res->status != 200) {
251 return absl::InternalError(
252 absl::StrCat(
"❌ Gemini API error: ", res->status,
"\n ", res->body));
255 return absl::OkStatus();
256 }
catch (
const std::exception& e) {
258 std::cerr <<
"[DEBUG] CheckAvailability: EXCEPTION: " << e.what() << std::endl;
260 return absl::InternalError(absl::StrCat(
"Exception during availability check: ", e.what()));
263 std::cerr <<
"[DEBUG] CheckAvailability: UNKNOWN EXCEPTION" << std::endl;
265 return absl::InternalError(
"Unknown exception during availability check");
276 const std::vector<agent::ChatMessage>& history) {
277#ifndef YAZE_WITH_JSON
278 return absl::UnimplementedError(
279 "Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
281 if (history.empty()) {
282 return absl::InvalidArgumentError(
"History cannot be empty.");
296 return absl::FailedPreconditionError(
"Gemini API key not configured");
301 std::cerr <<
"[DEBUG] Using curl for HTTPS request" << std::endl;
302 std::cerr <<
"[DEBUG] Processing " << history.size() <<
" messages in history" << std::endl;
307 nlohmann::json contents = nlohmann::json::array();
310 int start_idx = std::max(0,
static_cast<int>(history.size()) - 10);
311 for (
size_t i = start_idx; i < history.size(); ++i) {
312 const auto& msg = history[i];
315 nlohmann::json message = {
318 {
"text", msg.message}
321 contents.push_back(message);
326 if (!history.empty() &&
329 nlohmann::json user_continuation = {
332 {
"text",
"Please continue or clarify your response."}
335 contents.push_back(user_continuation);
339 nlohmann::json request_body = {
340 {
"system_instruction", {
345 {
"contents", contents},
346 {
"generationConfig", {
353 std::cerr <<
"[DEBUG] Sending " << contents.size() <<
" conversation turns to Gemini" << std::endl;
359 request_body[
"generationConfig"][
"responseMimeType"] =
"application/json";
367 std::cerr <<
"[DEBUG] Function calling schemas: " << schemas_str.substr(0, 200) <<
"..." << std::endl;
370 nlohmann::json schemas = nlohmann::json::parse(schemas_str);
373 if (schemas.is_array()) {
375 request_body[
"tools"] = {{
376 {
"function_declarations", schemas}
378 }
else if (schemas.is_object() && schemas.contains(
"function_declarations")) {
380 request_body[
"tools"] = {{
381 {
"function_declarations", schemas[
"function_declarations"]}
385 request_body[
"tools"] = {{
386 {
"function_declarations", nlohmann::json::array({schemas})}
389 }
catch (
const nlohmann::json::exception& e) {
390 std::cerr <<
"⚠️ Failed to parse function schemas: " << e.what() << std::endl;
395 std::string temp_file =
"/tmp/gemini_request.json";
396 std::ofstream out(temp_file);
397 out << request_body.dump();
401 std::string endpoint =
"https://generativelanguage.googleapis.com/v1beta/models/" +
403 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
"' "
404 "-H 'Content-Type: application/json' "
406 "-d @" + temp_file +
" 2>&1";
409 std::cerr <<
"[DEBUG] Executing API request..." << std::endl;
413 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
415 FILE* pipe = popen(curl_cmd.c_str(),
"r");
418 return absl::InternalError(
"Failed to execute curl command");
421 std::string response_str;
423 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
424 response_str += buffer;
428 int status = _pclose(pipe);
430 int status = pclose(pipe);
432 std::remove(temp_file.c_str());
435 return absl::InternalError(absl::StrCat(
"Curl failed with status ", status));
438 if (response_str.empty()) {
439 return absl::InternalError(
"Empty response from Gemini API");
444 std::cout <<
"\n" <<
"\033[35m" <<
"🔍 Raw Gemini API Response:" <<
"\033[0m" <<
"\n"
445 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m" <<
"\n\n";
449 std::cerr <<
"[DEBUG] Parsing response..." << std::endl;
453 }
catch (
const std::exception& e) {
455 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
457 return absl::InternalError(absl::StrCat(
"Exception during generation: ", e.what()));
460 std::cerr <<
"[ERROR] Unknown exception" << std::endl;
462 return absl::InternalError(
"Unknown exception during generation");
468 const std::string& response_body) {
469#ifndef YAZE_WITH_JSON
470 return absl::UnimplementedError(
"JSON support required");
474 auto response_json = nlohmann::json::parse(response_body,
nullptr,
false);
475 if (response_json.is_discarded()) {
476 return absl::InternalError(
"❌ Failed to parse Gemini response JSON");
480 if (!response_json.contains(
"candidates") ||
481 response_json[
"candidates"].empty()) {
482 return absl::InternalError(
"❌ No candidates in Gemini response");
485 for (
const auto& candidate : response_json[
"candidates"]) {
486 if (!candidate.contains(
"content") ||
487 !candidate[
"content"].contains(
"parts")) {
491 for (
const auto& part : candidate[
"content"][
"parts"]) {
492 if (part.contains(
"text")) {
493 std::string text_content = part[
"text"].get<std::string>();
497 std::cout <<
"\n" <<
"\033[35m" <<
"🔍 Raw LLM Response:" <<
"\033[0m" <<
"\n"
498 <<
"\033[2m" << text_content <<
"\033[0m" <<
"\n\n";
502 text_content = std::string(absl::StripAsciiWhitespace(text_content));
503 if (absl::StartsWith(text_content,
"```json")) {
504 text_content = text_content.substr(7);
505 }
else if (absl::StartsWith(text_content,
"```")) {
506 text_content = text_content.substr(3);
508 if (absl::EndsWith(text_content,
"```")) {
509 text_content = text_content.substr(0, text_content.length() - 3);
511 text_content = std::string(absl::StripAsciiWhitespace(text_content));
514 auto parsed_text = nlohmann::json::parse(text_content,
nullptr,
false);
515 if (!parsed_text.is_discarded()) {
517 if (parsed_text.contains(
"text_response") &&
518 parsed_text[
"text_response"].is_string()) {
520 parsed_text[
"text_response"].get<std::string>();
524 if (parsed_text.contains(
"reasoning") &&
525 parsed_text[
"reasoning"].is_string()) {
527 parsed_text[
"reasoning"].get<std::string>();
531 if (parsed_text.contains(
"commands") &&
532 parsed_text[
"commands"].is_array()) {
533 for (
const auto& cmd : parsed_text[
"commands"]) {
534 if (cmd.is_string()) {
535 std::string command = cmd.get<std::string>();
536 if (absl::StartsWith(command,
"z3ed ")) {
537 command = command.substr(5);
539 agent_response.
commands.push_back(command);
545 if (parsed_text.contains(
"tool_calls") &&
546 parsed_text[
"tool_calls"].is_array()) {
547 for (
const auto& call : parsed_text[
"tool_calls"]) {
548 if (call.contains(
"tool_name") && call[
"tool_name"].is_string()) {
550 tool_call.
tool_name = call[
"tool_name"].get<std::string>();
552 if (call.contains(
"args") && call[
"args"].is_object()) {
553 for (
auto& [key, value] : call[
"args"].items()) {
554 if (value.is_string()) {
555 tool_call.
args[key] = value.get<std::string>();
556 }
else if (value.is_number()) {
557 tool_call.
args[key] = std::to_string(value.get<
double>());
558 }
else if (value.is_boolean()) {
559 tool_call.
args[key] = value.get<
bool>() ?
"true" :
"false";
563 agent_response.
tool_calls.push_back(tool_call);
569 std::vector<std::string> lines = absl::StrSplit(text_content,
'\n');
570 for (
const auto& line : lines) {
571 std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
572 if (!trimmed.empty() &&
573 (absl::StartsWith(trimmed,
"z3ed ") ||
574 absl::StartsWith(trimmed,
"palette ") ||
575 absl::StartsWith(trimmed,
"overworld ") ||
576 absl::StartsWith(trimmed,
"sprite ") ||
577 absl::StartsWith(trimmed,
"dungeon "))) {
578 if (absl::StartsWith(trimmed,
"z3ed ")) {
579 trimmed = trimmed.substr(5);
581 agent_response.
commands.push_back(trimmed);
585 }
else if (part.contains(
"functionCall")) {
586 const auto& call = part[
"functionCall"];
587 if (call.contains(
"name") && call[
"name"].is_string()) {
589 tool_call.
tool_name = call[
"name"].get<std::string>();
590 if (call.contains(
"args") && call[
"args"].is_object()) {
591 for (
auto& [key, value] : call[
"args"].items()) {
592 if (value.is_string()) {
593 tool_call.
args[key] = value.get<std::string>();
594 }
else if (value.is_number()) {
595 tool_call.
args[key] = std::to_string(value.get<
double>());
599 agent_response.
tool_calls.push_back(tool_call);
608 return absl::InternalError(
609 "❌ No valid response extracted from Gemini\n"
610 " Expected at least one of: text_response, commands, or tool_calls\n"
611 " Raw response: " + response_body);
614 return agent_response;
619 const std::string& image_path)
const {
620#ifndef YAZE_WITH_JSON
622 return absl::UnimplementedError(
623 "Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
625 std::ifstream file(image_path, std::ios::binary);
626 if (!file.is_open()) {
627 return absl::NotFoundError(
628 absl::StrCat(
"Failed to open image file: ", image_path));
632 file.seekg(0, std::ios::end);
633 size_t size = file.tellg();
634 file.seekg(0, std::ios::beg);
636 std::vector<unsigned char> buffer(size);
637 if (!file.read(
reinterpret_cast<char*
>(buffer.data()), size)) {
638 return absl::InternalError(
"Failed to read image file");
642 static const char* base64_chars =
643 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
646 encoded.reserve(((size + 2) / 3) * 4);
650 unsigned char char_array_3[3];
651 unsigned char char_array_4[4];
653 for (
size_t idx = 0; idx < size; idx++) {
654 char_array_3[i++] = buffer[idx];
656 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
657 char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
658 char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
659 char_array_4[3] = char_array_3[2] & 0x3f;
661 for (i = 0; i < 4; i++)
662 encoded += base64_chars[char_array_4[i]];
668 for (j = i; j < 3; j++)
669 char_array_3[j] =
'\0';
671 char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
672 char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
673 char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
675 for (j = 0; j < i + 1; j++)
676 encoded += base64_chars[char_array_4[j]];
687 const std::string& image_path,
const std::string& prompt) {
688#ifndef YAZE_WITH_JSON
691 return absl::UnimplementedError(
692 "Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
695 return absl::FailedPreconditionError(
"Gemini API key not configured");
699 std::string mime_type =
"image/png";
700 if (image_path.ends_with(
".jpg") || image_path.ends_with(
".jpeg")) {
701 mime_type =
"image/jpeg";
702 }
else if (image_path.ends_with(
".bmp")) {
703 mime_type =
"image/bmp";
704 }
else if (image_path.ends_with(
".webp")) {
705 mime_type =
"image/webp";
710 if (!encoded_or.ok()) {
711 return encoded_or.status();
713 std::string encoded_image = std::move(encoded_or.value());
717 std::cerr <<
"[DEBUG] Preparing multimodal request with image" << std::endl;
721 nlohmann::json request_body = {
726 {
"mime_type", mime_type},
727 {
"data", encoded_image}
733 {
"generationConfig", {
740 std::string temp_file =
"/tmp/gemini_multimodal_request.json";
741 std::ofstream out(temp_file);
742 out << request_body.dump();
746 std::string endpoint =
"https://generativelanguage.googleapis.com/v1beta/models/" +
748 std::string curl_cmd =
"curl -s -X POST '" + endpoint +
"' "
749 "-H 'Content-Type: application/json' "
751 "-d @" + temp_file +
" 2>&1";
754 std::cerr <<
"[DEBUG] Executing multimodal API request..." << std::endl;
758 FILE* pipe = _popen(curl_cmd.c_str(),
"r");
760 FILE* pipe = popen(curl_cmd.c_str(),
"r");
763 return absl::InternalError(
"Failed to execute curl command");
766 std::string response_str;
768 while (fgets(buffer,
sizeof(buffer), pipe) !=
nullptr) {
769 response_str += buffer;
773 int status = _pclose(pipe);
775 int status = pclose(pipe);
777 std::remove(temp_file.c_str());
780 return absl::InternalError(absl::StrCat(
"Curl failed with status ", status));
783 if (response_str.empty()) {
784 return absl::InternalError(
"Empty response from Gemini API");
788 std::cout <<
"\n" <<
"\033[35m" <<
"🔍 Raw Gemini Multimodal Response:" <<
"\033[0m" <<
"\n"
789 <<
"\033[2m" << response_str.substr(0, 500) <<
"\033[0m" <<
"\n\n";
794 }
catch (
const std::exception& e) {
796 std::cerr <<
"[ERROR] Exception: " << e.what() << std::endl;
798 return absl::InternalError(absl::StrCat(
"Exception during multimodal generation: ", e.what()));