8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_join.h"
20std::string
EscapeJson(
const std::string& str) {
47 ss <<
"\\u" << std::hex << std::setw(4) << std::setfill(
'0')
48 <<
static_cast<int>(c);
59std::string ConvertHistoryToGeminiFormat(
60 const std::vector<agent::ChatMessage>& history) {
61 nlohmann::json contents = nlohmann::json::array();
63 for (
const auto& msg : history) {
65 part[
"text"] = msg.message;
67 nlohmann::json content;
68 content[
"parts"] = nlohmann::json::array({part});
72 contents.push_back(content);
75 return contents.dump();
80BrowserAIService::BrowserAIService(
81 const BrowserAIConfig& config,
82 std::unique_ptr<net::IHttpClient> http_client)
83 : config_(config), http_client_(std::move(http_client)) {
85 config_.provider = absl::AsciiStrToLower(config_.provider);
86 if (config_.provider.empty()) {
87 config_.provider =
"gemini";
90 if (config_.provider ==
"openai") {
91 if (config_.model.empty())
92 config_.model =
"gpt-4o-mini";
93 if (config_.api_base.empty())
94 config_.api_base = kOpenAIApiBaseUrl;
96 if (config_.model.empty())
97 config_.model =
"gemini-2.5-flash";
102 LogDebug(
"Warning: No HTTP client provided to BrowserAIService");
107 http_client_->SetTimeout(config_.timeout_seconds);
110 LogDebug(absl::StrFormat(
"BrowserAIService initialized with model: %s",
114void BrowserAIService::SetRomContext(Rom* rom) {
115 std::lock_guard<std::mutex> lock(mutex_);
117 if (rom_ && rom_->is_loaded()) {
119 config_.system_instruction = absl::StrFormat(
120 "You are assisting with ROM hacking for The Legend of Zelda: A Link to "
122 "The ROM file '%s' is currently loaded. %s",
123 rom_->filename(), config_.system_instruction);
127absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
128 const std::string& prompt) {
129 std::lock_guard<std::mutex> lock(mutex_);
131 return absl::FailedPreconditionError(
"HTTP client not initialized");
134 if (RequiresApiKey() && config_.api_key.empty()) {
135 if (config_.provider ==
"openai") {
136 return absl::InvalidArgumentError(
137 "OpenAI API key not set. Provide a key for https://api.openai.com, "
138 "or use a local OpenAI-compatible endpoint.");
140 return absl::InvalidArgumentError(
141 "API key not set. Please provide a Gemini API key.");
144 LogDebug(absl::StrFormat(
"Generating response for prompt: %s", prompt));
147 std::string url = BuildApiUrl(
"generateContent");
150 std::string request_body;
151 if (config_.provider ==
"openai") {
152 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
153 url +=
"/chat/completions";
154 request_body = BuildOpenAIRequestBody(prompt,
nullptr);
156 request_body = BuildRequestBody(prompt);
160 net::Headers headers;
161 headers[
"Content-Type"] =
"application/json";
162 if (config_.provider ==
"openai" && !config_.api_key.empty()) {
163 headers[
"Authorization"] =
"Bearer " + config_.api_key;
167 auto response_or = http_client_->Post(url, request_body, headers);
168 if (!response_or.ok()) {
169 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
170 response_or.status().message()));
173 const auto& response = response_or.value();
176 if (!response.IsSuccess()) {
177 if (response.IsClientError()) {
178 return absl::InvalidArgumentError(
179 absl::StrFormat(
"API request failed with status %d: %s",
180 response.status_code, response.body));
182 return absl::InternalError(absl::StrFormat(
183 "API server error %d: %s", response.status_code, response.body));
188 if (config_.provider ==
"openai") {
189 return ParseOpenAIResponse(response.body);
191 return ParseGeminiResponse(response.body);
194absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
195 const std::vector<agent::ChatMessage>& history) {
196 std::lock_guard<std::mutex> lock(mutex_);
198 return absl::FailedPreconditionError(
"HTTP client not initialized");
201 if (RequiresApiKey() && config_.api_key.empty()) {
202 if (config_.provider ==
"openai") {
203 return absl::InvalidArgumentError(
204 "OpenAI API key not set. Provide a key for https://api.openai.com, "
205 "or use a local OpenAI-compatible endpoint.");
207 return absl::InvalidArgumentError(
208 "API key not set. Please provide a Gemini API key.");
211 if (history.empty()) {
212 return absl::InvalidArgumentError(
"Chat history cannot be empty");
216 absl::StrFormat(
"Generating response from %zu messages", history.size()));
219 std::string url = BuildApiUrl(
"generateContent");
221 std::string request_body;
222 if (config_.provider ==
"openai") {
223 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
224 url +=
"/chat/completions";
225 request_body = BuildOpenAIRequestBody(
"", &history);
228 nlohmann::json request;
229 request[
"contents"] =
230 nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
233 request[
"generationConfig"][
"temperature"] = config_.temperature;
234 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
237 if (!config_.system_instruction.empty()) {
238 request[
"systemInstruction"][
"parts"][0][
"text"] =
239 config_.system_instruction;
242 request_body = request.dump();
246 net::Headers headers;
247 headers[
"Content-Type"] =
"application/json";
248 if (config_.provider ==
"openai" && !config_.api_key.empty()) {
249 headers[
"Authorization"] =
"Bearer " + config_.api_key;
253 auto response_or = http_client_->Post(url, request_body, headers);
254 if (!response_or.ok()) {
255 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
256 response_or.status().message()));
259 const auto& response = response_or.value();
262 if (!response.IsSuccess()) {
263 return absl::InternalError(
264 absl::StrFormat(
"API request failed with status %d: %s",
265 response.status_code, response.body));
269 if (config_.provider ==
"openai") {
270 return ParseOpenAIResponse(response.body);
272 return ParseGeminiResponse(response.body);
275absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
276 std::lock_guard<std::mutex> lock(mutex_);
278 std::vector<ModelInfo> models;
280 const std::string provider =
281 config_.provider.empty() ?
"gemini" : config_.provider;
283 if (provider ==
"openai") {
284 models.push_back({.name =
"gpt-4o-mini",
285 .display_name =
"GPT-4o Mini",
286 .provider =
"openai",
287 .description =
"Fast/cheap OpenAI model",
290 models.push_back({.name =
"gpt-4o",
291 .display_name =
"GPT-4o",
292 .provider =
"openai",
293 .description =
"Balanced OpenAI flagship model",
296 models.push_back({.name =
"gpt-4.1-mini",
297 .display_name =
"GPT-4.1 Mini",
298 .provider =
"openai",
299 .description =
"Lightweight 4.1 variant",
304 {.name =
"gemini-2.5-flash",
305 .display_name =
"Gemini 2.0 Flash (Experimental)",
306 .provider =
"gemini",
307 .description =
"Fastest Gemini model with experimental features",
311 models.push_back({.name =
"gemini-1.5-flash",
312 .display_name =
"Gemini 1.5 Flash",
313 .provider =
"gemini",
314 .description =
"Fast and efficient for most tasks",
318 models.push_back({.name =
"gemini-1.5-flash-8b",
319 .display_name =
"Gemini 1.5 Flash 8B",
320 .provider =
"gemini",
321 .description =
"Smaller, faster variant of Flash",
323 .parameter_size =
"8B",
326 models.push_back({.name =
"gemini-1.5-pro",
327 .display_name =
"Gemini 1.5 Pro",
328 .provider =
"gemini",
329 .description =
"Most capable model for complex tasks",
337absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
338 const std::string& image_data,
const std::string& prompt) {
339 std::lock_guard<std::mutex> lock(mutex_);
341 return absl::FailedPreconditionError(
"HTTP client not initialized");
344 if (config_.provider ==
"openai") {
345 return absl::UnimplementedError(
346 "Image analysis not yet supported for OpenAI in WASM build");
349 if (config_.api_key.empty()) {
350 return absl::InvalidArgumentError(
351 "API key not set. Please provide a Gemini API key.");
354 LogDebug(absl::StrFormat(
"Analyzing image with prompt: %s", prompt));
357 std::string url = BuildApiUrl(
"generateContent");
360 std::string mime_type =
"image/png";
361 if (image_data.find(
"data:image/jpeg") == 0 ||
362 image_data.find(
"data:image/jpg") == 0) {
363 mime_type =
"image/jpeg";
367 std::string clean_image_data = image_data;
368 size_t comma_pos = image_data.find(
',');
369 if (comma_pos != std::string::npos && image_data.find(
"data:") == 0) {
370 clean_image_data = image_data.substr(comma_pos + 1);
374 std::string request_body =
375 BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
378 net::Headers headers;
379 headers[
"Content-Type"] =
"application/json";
382 auto response_or = http_client_->Post(url, request_body, headers);
383 if (!response_or.ok()) {
384 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
385 response_or.status().message()));
388 const auto& response = response_or.value();
391 if (!response.IsSuccess()) {
392 return absl::InternalError(
393 absl::StrFormat(
"API request failed with status %d: %s",
394 response.status_code, response.body));
398 return ParseGeminiResponse(response.body);
401absl::Status BrowserAIService::CheckAvailability() {
402 std::lock_guard<std::mutex> lock(mutex_);
404 return absl::FailedPreconditionError(
"HTTP client not initialized");
407 if (RequiresApiKey() && config_.api_key.empty()) {
408 if (config_.provider ==
"openai") {
409 return absl::InvalidArgumentError(
410 "OpenAI API key not set. Provide a key for https://api.openai.com, "
411 "or use a local OpenAI-compatible endpoint.");
413 return absl::InvalidArgumentError(
"Gemini API key not set");
416 net::Headers headers;
419 if (config_.provider ==
"openai") {
420 url = GetOpenAIApiBase();
422 if (!config_.api_key.empty()) {
423 headers[
"Authorization"] =
"Bearer " + config_.api_key;
426 url = absl::StrFormat(
"%s%s?key=%s", kGeminiApiBaseUrl, config_.model,
430 auto response_or = http_client_->Get(url, headers);
432 if (!response_or.ok()) {
433 return absl::UnavailableError(
434 absl::StrFormat(
"Cannot reach %s API: %s", config_.provider,
435 response_or.status().message()));
438 const auto& response = response_or.value();
439 if (!response.IsSuccess()) {
440 if (response.status_code == 401 || response.status_code == 403) {
441 return absl::PermissionDeniedError(
"Invalid API key");
443 return absl::UnavailableError(absl::StrFormat(
444 "%s API returned error %d", config_.provider, response.status_code));
447 return absl::OkStatus();
450void BrowserAIService::UpdateApiKey(
const std::string& api_key) {
451 std::lock_guard<std::mutex> lock(mutex_);
452 config_.api_key = api_key;
456 LogDebug(
"API key updated");
459bool BrowserAIService::RequiresApiKey()
const {
460 if (config_.provider ==
"openai") {
461 return GetOpenAIApiBase() == kOpenAIApiBaseUrl;
466std::string BrowserAIService::GetOpenAIApiBase()
const {
468 config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
469 if (!base.empty() && base.back() ==
'/') {
475std::string BrowserAIService::BuildApiUrl(
const std::string& endpoint)
const {
476 if (config_.provider ==
"openai") {
477 std::string base = GetOpenAIApiBase();
478 return absl::StrFormat(
"%s/%s", base, endpoint);
481 return absl::StrFormat(
"%s%s:%s?key=%s", kGeminiApiBaseUrl, config_.model,
482 endpoint, config_.api_key);
485std::string BrowserAIService::BuildRequestBody(
const std::string& prompt,
486 bool include_system)
const {
487 nlohmann::json request;
490 nlohmann::json user_part;
491 user_part[
"text"] = prompt;
493 nlohmann::json user_content;
494 user_content[
"parts"] = nlohmann::json::array({user_part});
495 user_content[
"role"] =
"user";
497 request[
"contents"] = nlohmann::json::array({user_content});
500 request[
"generationConfig"][
"temperature"] = config_.temperature;
501 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
504 if (include_system && !config_.system_instruction.empty()) {
505 nlohmann::json system_part;
506 system_part[
"text"] = config_.system_instruction;
507 request[
"systemInstruction"][
"parts"] =
508 nlohmann::json::array({system_part});
511 return request.dump();
514std::string BrowserAIService::BuildMultimodalRequestBody(
515 const std::string& prompt,
const std::string& image_data,
516 const std::string& mime_type)
const {
517 nlohmann::json request;
520 nlohmann::json text_part;
521 text_part[
"text"] = prompt;
523 nlohmann::json image_part;
524 image_part[
"inline_data"][
"mime_type"] = mime_type;
525 image_part[
"inline_data"][
"data"] = image_data;
527 nlohmann::json content;
528 content[
"parts"] = nlohmann::json::array({text_part, image_part});
529 content[
"role"] =
"user";
531 request[
"contents"] = nlohmann::json::array({content});
534 request[
"generationConfig"][
"temperature"] = config_.temperature;
535 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
538 if (!config_.system_instruction.empty()) {
539 nlohmann::json system_part;
540 system_part[
"text"] = config_.system_instruction;
541 request[
"systemInstruction"][
"parts"] =
542 nlohmann::json::array({system_part});
545 return request.dump();
548std::string BrowserAIService::BuildOpenAIRequestBody(
549 const std::string& prompt,
550 const std::vector<agent::ChatMessage>* history)
const {
551 nlohmann::json request;
552 request[
"model"] = config_.model.empty() ?
"gpt-4o-mini" : config_.model;
554 nlohmann::json messages = nlohmann::json::array();
555 if (!config_.system_instruction.empty()) {
557 {{
"role",
"system"}, {
"content", config_.system_instruction}});
560 if (history && !history->empty()) {
561 for (
const auto& msg : *history) {
563 {{
"role", msg.sender == agent::ChatMessage::Sender::kUser
566 {
"content", msg.message}});
568 }
else if (!prompt.empty()) {
569 messages.push_back({{
"role",
"user"}, {
"content", prompt}});
572 request[
"messages"] = messages;
573 request[
"temperature"] = config_.temperature;
574 request[
"max_tokens"] = config_.max_output_tokens;
576 return request.dump();
579absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
580 const std::string& response_body)
const {
582 nlohmann::json
json = nlohmann::json::parse(response_body);
585 auto error_status = CheckForApiError(json);
586 if (!error_status.ok()) {
591 std::string text_content = ExtractTextFromCandidates(json);
593 if (text_content.empty()) {
594 return absl::InternalError(
"Empty response from Gemini API");
598 AgentResponse response;
599 response.text_response = text_content;
600 response.provider =
"gemini";
601 response.model = config_.model;
604 if (
json.contains(
"promptFeedback") &&
605 json[
"promptFeedback"].contains(
"safetyRatings")) {
606 for (
const auto& rating :
json[
"promptFeedback"][
"safetyRatings"]) {
607 if (rating.contains(
"probability") &&
608 rating[
"probability"] !=
"NEGLIGIBLE" &&
609 rating[
"probability"] !=
"LOW") {
610 response.warnings.push_back(absl::StrFormat(
611 "Content flagged: %s (%s)", rating.value(
"category",
"unknown"),
612 rating.value(
"probability",
"unknown")));
617 LogDebug(absl::StrFormat(
"Successfully parsed response with %zu characters",
618 text_content.length()));
622 }
catch (
const nlohmann::json::exception& e) {
623 return absl::InternalError(
624 absl::StrFormat(
"Failed to parse Gemini response: %s", e.what()));
628absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
629 const std::string& response_body)
const {
631 nlohmann::json
json = nlohmann::json::parse(response_body);
633 if (
json.contains(
"error")) {
634 const auto& err =
json[
"error"];
635 std::string message = err.value(
"message",
"Unknown error");
636 int code = err.value(
"code", 0);
637 if (code == 401 || code == 403)
638 return absl::UnauthenticatedError(message);
640 return absl::ResourceExhaustedError(message);
641 return absl::InternalError(message);
644 if (!
json.contains(
"choices") || !json[
"choices"].is_array() ||
645 json[
"choices"].empty()) {
646 return absl::InternalError(
"Empty response from OpenAI API");
649 const auto& choice =
json[
"choices"][0];
650 if (!choice.contains(
"message") || !choice[
"message"].contains(
"content")) {
651 return absl::InternalError(
"Malformed OpenAI response");
654 std::string text = choice[
"message"][
"content"].get<std::string>();
656 return absl::InternalError(
"OpenAI returned empty content");
659 AgentResponse response;
660 response.text_response = text;
661 response.provider =
"openai";
662 response.model = config_.model;
664 }
catch (
const nlohmann::json::exception& e) {
665 return absl::InternalError(
666 absl::StrFormat(
"Failed to parse OpenAI response: %s", e.what()));
670std::string BrowserAIService::ExtractTextFromCandidates(
671 const nlohmann::json& json)
const {
672 if (!
json.contains(
"candidates") || !json[
"candidates"].is_array() ||
673 json[
"candidates"].empty()) {
677 const auto& candidate =
json[
"candidates"][0];
679 if (!candidate.contains(
"content") ||
680 !candidate[
"content"].contains(
"parts") ||
681 !candidate[
"content"][
"parts"].is_array() ||
682 candidate[
"content"][
"parts"].empty()) {
687 for (
const auto& part : candidate[
"content"][
"parts"]) {
688 if (part.contains(
"text")) {
689 result += part[
"text"].get<std::string>();
696absl::Status BrowserAIService::CheckForApiError(
697 const nlohmann::json& json)
const {
698 if (
json.contains(
"error")) {
699 const auto& error =
json[
"error"];
700 int code = error.value(
"code", 0);
701 std::string message = error.value(
"message",
"Unknown error");
702 std::string status = error.value(
"status",
"");
705 if (code == 400 || status ==
"INVALID_ARGUMENT") {
706 return absl::InvalidArgumentError(message);
707 }
else if (code == 401 || status ==
"UNAUTHENTICATED") {
708 return absl::UnauthenticatedError(message);
709 }
else if (code == 403 || status ==
"PERMISSION_DENIED") {
710 return absl::PermissionDeniedError(message);
711 }
else if (code == 429 || status ==
"RESOURCE_EXHAUSTED") {
712 return absl::ResourceExhaustedError(message);
713 }
else if (code == 503 || status ==
"UNAVAILABLE") {
714 return absl::UnavailableError(message);
716 return absl::InternalError(message);
720 return absl::OkStatus();
723void BrowserAIService::LogDebug(
const std::string& message)
const {
724 if (config_.verbose) {
726 EM_ASM({ console.log(
'[BrowserAIService] ' + UTF8ToString($0)); },
std::string EscapeJson(const std::string &input)
Rom * rom()
Get the current ROM instance.