8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_join.h"
20std::string
EscapeJson(
const std::string& str) {
47 ss <<
"\\u" << std::hex << std::setw(4) << std::setfill(
'0')
48 <<
static_cast<int>(c);
59std::string ConvertHistoryToGeminiFormat(
60 const std::vector<agent::ChatMessage>& history) {
61 nlohmann::json contents = nlohmann::json::array();
63 for (
const auto& msg : history) {
65 part[
"text"] = msg.message;
67 nlohmann::json content;
68 content[
"parts"] = nlohmann::json::array({part});
72 contents.push_back(content);
75 return contents.dump();
80BrowserAIService::BrowserAIService(
81 const BrowserAIConfig& config,
82 std::unique_ptr<net::IHttpClient> http_client)
83 : config_(config), http_client_(std::move(http_client)) {
85 config_.provider = absl::AsciiStrToLower(config_.provider);
86 if (config_.provider.empty()) {
87 config_.provider =
"gemini";
90 if (config_.provider ==
"openai") {
91 if (config_.model.empty())
92 config_.model =
"gpt-4o-mini";
93 if (config_.api_base.empty())
94 config_.api_base = kOpenAIApiBaseUrl;
96 if (config_.model.empty())
97 config_.model =
"gemini-2.5-flash";
102 LogDebug(
"Warning: No HTTP client provided to BrowserAIService");
107 http_client_->SetTimeout(config_.timeout_seconds);
110 LogDebug(absl::StrFormat(
"BrowserAIService initialized with model: %s",
114void BrowserAIService::SetRomContext(Rom* rom) {
115 std::lock_guard<std::mutex> lock(mutex_);
117 if (rom_ && rom_->is_loaded()) {
119 config_.system_instruction = absl::StrFormat(
120 "You are assisting with ROM hacking for The Legend of Zelda: A Link to "
122 "The ROM file '%s' is currently loaded. %s",
123 rom_->filename(), config_.system_instruction);
127absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
128 const std::string& prompt) {
129 std::lock_guard<std::mutex> lock(mutex_);
131 return absl::FailedPreconditionError(
"HTTP client not initialized");
134 if (config_.api_key.empty()) {
135 return absl::InvalidArgumentError(
136 "API key not set. Please provide an API key.");
139 LogDebug(absl::StrFormat(
"Generating response for prompt: %s", prompt));
142 std::string url = BuildApiUrl(
"generateContent");
145 std::string request_body;
146 if (config_.provider ==
"openai") {
147 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
148 url +=
"/chat/completions";
149 request_body = BuildOpenAIRequestBody(prompt,
nullptr);
151 request_body = BuildRequestBody(prompt);
155 net::Headers headers;
156 headers[
"Content-Type"] =
"application/json";
157 if (config_.provider ==
"openai") {
158 headers[
"Authorization"] =
"Bearer " + config_.api_key;
162 auto response_or = http_client_->Post(url, request_body, headers);
163 if (!response_or.ok()) {
164 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
165 response_or.status().message()));
168 const auto& response = response_or.value();
171 if (!response.IsSuccess()) {
172 if (response.IsClientError()) {
173 return absl::InvalidArgumentError(
174 absl::StrFormat(
"API request failed with status %d: %s",
175 response.status_code, response.body));
177 return absl::InternalError(absl::StrFormat(
178 "API server error %d: %s", response.status_code, response.body));
183 if (config_.provider ==
"openai") {
184 return ParseOpenAIResponse(response.body);
186 return ParseGeminiResponse(response.body);
189absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
190 const std::vector<agent::ChatMessage>& history) {
191 std::lock_guard<std::mutex> lock(mutex_);
193 return absl::FailedPreconditionError(
"HTTP client not initialized");
196 if (config_.api_key.empty()) {
197 return absl::InvalidArgumentError(
198 "API key not set. Please provide an API key.");
201 if (history.empty()) {
202 return absl::InvalidArgumentError(
"Chat history cannot be empty");
206 absl::StrFormat(
"Generating response from %zu messages", history.size()));
209 std::string url = BuildApiUrl(
"generateContent");
211 std::string request_body;
212 if (config_.provider ==
"openai") {
213 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
214 url +=
"/chat/completions";
215 request_body = BuildOpenAIRequestBody(
"", &history);
218 nlohmann::json request;
219 request[
"contents"] =
220 nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
223 request[
"generationConfig"][
"temperature"] = config_.temperature;
224 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
227 if (!config_.system_instruction.empty()) {
228 request[
"systemInstruction"][
"parts"][0][
"text"] =
229 config_.system_instruction;
232 request_body = request.dump();
236 net::Headers headers;
237 headers[
"Content-Type"] =
"application/json";
238 if (config_.provider ==
"openai") {
239 headers[
"Authorization"] =
"Bearer " + config_.api_key;
243 auto response_or = http_client_->Post(url, request_body, headers);
244 if (!response_or.ok()) {
245 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
246 response_or.status().message()));
249 const auto& response = response_or.value();
252 if (!response.IsSuccess()) {
253 return absl::InternalError(
254 absl::StrFormat(
"API request failed with status %d: %s",
255 response.status_code, response.body));
259 if (config_.provider ==
"openai") {
260 return ParseOpenAIResponse(response.body);
262 return ParseGeminiResponse(response.body);
265absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
266 std::lock_guard<std::mutex> lock(mutex_);
268 std::vector<ModelInfo> models;
270 const std::string provider =
271 config_.provider.empty() ?
"gemini" : config_.provider;
273 if (provider ==
"openai") {
274 models.push_back({.name =
"gpt-4o-mini",
275 .display_name =
"GPT-4o Mini",
276 .provider =
"openai",
277 .description =
"Fast/cheap OpenAI model",
280 models.push_back({.name =
"gpt-4o",
281 .display_name =
"GPT-4o",
282 .provider =
"openai",
283 .description =
"Balanced OpenAI flagship model",
286 models.push_back({.name =
"gpt-4.1-mini",
287 .display_name =
"GPT-4.1 Mini",
288 .provider =
"openai",
289 .description =
"Lightweight 4.1 variant",
294 {.name =
"gemini-2.5-flash",
295 .display_name =
"Gemini 2.0 Flash (Experimental)",
296 .provider =
"gemini",
297 .description =
"Fastest Gemini model with experimental features",
301 models.push_back({.name =
"gemini-1.5-flash",
302 .display_name =
"Gemini 1.5 Flash",
303 .provider =
"gemini",
304 .description =
"Fast and efficient for most tasks",
308 models.push_back({.name =
"gemini-1.5-flash-8b",
309 .display_name =
"Gemini 1.5 Flash 8B",
310 .provider =
"gemini",
311 .description =
"Smaller, faster variant of Flash",
313 .parameter_size =
"8B",
316 models.push_back({.name =
"gemini-1.5-pro",
317 .display_name =
"Gemini 1.5 Pro",
318 .provider =
"gemini",
319 .description =
"Most capable model for complex tasks",
327absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
328 const std::string& image_data,
const std::string& prompt) {
329 std::lock_guard<std::mutex> lock(mutex_);
331 return absl::FailedPreconditionError(
"HTTP client not initialized");
334 if (config_.provider ==
"openai") {
335 return absl::UnimplementedError(
336 "Image analysis not yet supported for OpenAI in WASM build");
339 if (config_.api_key.empty()) {
340 return absl::InvalidArgumentError(
341 "API key not set. Please provide a Gemini API key.");
344 LogDebug(absl::StrFormat(
"Analyzing image with prompt: %s", prompt));
347 std::string url = BuildApiUrl(
"generateContent");
350 std::string mime_type =
"image/png";
351 if (image_data.find(
"data:image/jpeg") == 0 ||
352 image_data.find(
"data:image/jpg") == 0) {
353 mime_type =
"image/jpeg";
357 std::string clean_image_data = image_data;
358 size_t comma_pos = image_data.find(
',');
359 if (comma_pos != std::string::npos && image_data.find(
"data:") == 0) {
360 clean_image_data = image_data.substr(comma_pos + 1);
364 std::string request_body =
365 BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
368 net::Headers headers;
369 headers[
"Content-Type"] =
"application/json";
372 auto response_or = http_client_->Post(url, request_body, headers);
373 if (!response_or.ok()) {
374 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
375 response_or.status().message()));
378 const auto& response = response_or.value();
381 if (!response.IsSuccess()) {
382 return absl::InternalError(
383 absl::StrFormat(
"API request failed with status %d: %s",
384 response.status_code, response.body));
388 return ParseGeminiResponse(response.body);
391absl::Status BrowserAIService::CheckAvailability() {
392 std::lock_guard<std::mutex> lock(mutex_);
394 return absl::FailedPreconditionError(
"HTTP client not initialized");
397 if (config_.api_key.empty()) {
398 return absl::InvalidArgumentError(
"API key not set");
401 net::Headers headers;
404 if (config_.provider ==
"openai") {
405 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
406 if (!url.empty() && url.back() ==
'/')
409 headers[
"Authorization"] =
"Bearer " + config_.api_key;
411 url = absl::StrFormat(
"%s%s?key=%s", kGeminiApiBaseUrl, config_.model,
415 auto response_or = http_client_->Get(url, headers);
417 if (!response_or.ok()) {
418 return absl::UnavailableError(
419 absl::StrFormat(
"Cannot reach %s API: %s", config_.provider,
420 response_or.status().message()));
423 const auto& response = response_or.value();
424 if (!response.IsSuccess()) {
425 if (response.status_code == 401 || response.status_code == 403) {
426 return absl::PermissionDeniedError(
"Invalid API key");
428 return absl::UnavailableError(absl::StrFormat(
429 "%s API returned error %d", config_.provider, response.status_code));
432 return absl::OkStatus();
435void BrowserAIService::UpdateApiKey(
const std::string& api_key) {
436 std::lock_guard<std::mutex> lock(mutex_);
437 config_.api_key = api_key;
441 LogDebug(
"API key updated");
444std::string BrowserAIService::BuildApiUrl(
const std::string& endpoint)
const {
445 if (config_.provider ==
"openai") {
447 config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
448 if (!base.empty() && base.back() ==
'/') {
451 return absl::StrFormat(
"%s/%s", base, endpoint);
454 return absl::StrFormat(
"%s%s:%s?key=%s", kGeminiApiBaseUrl, config_.model,
455 endpoint, config_.api_key);
458std::string BrowserAIService::BuildRequestBody(
const std::string& prompt,
459 bool include_system)
const {
460 nlohmann::json request;
463 nlohmann::json user_part;
464 user_part[
"text"] = prompt;
466 nlohmann::json user_content;
467 user_content[
"parts"] = nlohmann::json::array({user_part});
468 user_content[
"role"] =
"user";
470 request[
"contents"] = nlohmann::json::array({user_content});
473 request[
"generationConfig"][
"temperature"] = config_.temperature;
474 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
477 if (include_system && !config_.system_instruction.empty()) {
478 nlohmann::json system_part;
479 system_part[
"text"] = config_.system_instruction;
480 request[
"systemInstruction"][
"parts"] =
481 nlohmann::json::array({system_part});
484 return request.dump();
487std::string BrowserAIService::BuildMultimodalRequestBody(
488 const std::string& prompt,
const std::string& image_data,
489 const std::string& mime_type)
const {
490 nlohmann::json request;
493 nlohmann::json text_part;
494 text_part[
"text"] = prompt;
496 nlohmann::json image_part;
497 image_part[
"inline_data"][
"mime_type"] = mime_type;
498 image_part[
"inline_data"][
"data"] = image_data;
500 nlohmann::json content;
501 content[
"parts"] = nlohmann::json::array({text_part, image_part});
502 content[
"role"] =
"user";
504 request[
"contents"] = nlohmann::json::array({content});
507 request[
"generationConfig"][
"temperature"] = config_.temperature;
508 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
511 if (!config_.system_instruction.empty()) {
512 nlohmann::json system_part;
513 system_part[
"text"] = config_.system_instruction;
514 request[
"systemInstruction"][
"parts"] =
515 nlohmann::json::array({system_part});
518 return request.dump();
521std::string BrowserAIService::BuildOpenAIRequestBody(
522 const std::string& prompt,
523 const std::vector<agent::ChatMessage>* history)
const {
524 nlohmann::json request;
525 request[
"model"] = config_.model.empty() ?
"gpt-4o-mini" : config_.model;
527 nlohmann::json messages = nlohmann::json::array();
528 if (!config_.system_instruction.empty()) {
530 {{
"role",
"system"}, {
"content", config_.system_instruction}});
533 if (history && !history->empty()) {
534 for (
const auto& msg : *history) {
536 {{
"role", msg.sender == agent::ChatMessage::Sender::kUser
539 {
"content", msg.message}});
541 }
else if (!prompt.empty()) {
542 messages.push_back({{
"role",
"user"}, {
"content", prompt}});
545 request[
"messages"] = messages;
546 request[
"temperature"] = config_.temperature;
547 request[
"max_tokens"] = config_.max_output_tokens;
549 return request.dump();
552absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
553 const std::string& response_body)
const {
555 nlohmann::json
json = nlohmann::json::parse(response_body);
558 auto error_status = CheckForApiError(json);
559 if (!error_status.ok()) {
564 std::string text_content = ExtractTextFromCandidates(json);
566 if (text_content.empty()) {
567 return absl::InternalError(
"Empty response from Gemini API");
571 AgentResponse response;
572 response.text_response = text_content;
573 response.provider =
"gemini";
574 response.model = config_.model;
577 if (
json.contains(
"promptFeedback") &&
578 json[
"promptFeedback"].contains(
"safetyRatings")) {
579 for (
const auto& rating :
json[
"promptFeedback"][
"safetyRatings"]) {
580 if (rating.contains(
"probability") &&
581 rating[
"probability"] !=
"NEGLIGIBLE" &&
582 rating[
"probability"] !=
"LOW") {
583 response.warnings.push_back(absl::StrFormat(
584 "Content flagged: %s (%s)", rating.value(
"category",
"unknown"),
585 rating.value(
"probability",
"unknown")));
590 LogDebug(absl::StrFormat(
"Successfully parsed response with %zu characters",
591 text_content.length()));
595 }
catch (
const nlohmann::json::exception& e) {
596 return absl::InternalError(
597 absl::StrFormat(
"Failed to parse Gemini response: %s", e.what()));
601absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
602 const std::string& response_body)
const {
604 nlohmann::json
json = nlohmann::json::parse(response_body);
606 if (
json.contains(
"error")) {
607 const auto& err =
json[
"error"];
608 std::string message = err.value(
"message",
"Unknown error");
609 int code = err.value(
"code", 0);
610 if (code == 401 || code == 403)
611 return absl::UnauthenticatedError(message);
613 return absl::ResourceExhaustedError(message);
614 return absl::InternalError(message);
617 if (!
json.contains(
"choices") || !json[
"choices"].is_array() ||
618 json[
"choices"].empty()) {
619 return absl::InternalError(
"Empty response from OpenAI API");
622 const auto& choice =
json[
"choices"][0];
623 if (!choice.contains(
"message") || !choice[
"message"].contains(
"content")) {
624 return absl::InternalError(
"Malformed OpenAI response");
627 std::string text = choice[
"message"][
"content"].get<std::string>();
629 return absl::InternalError(
"OpenAI returned empty content");
632 AgentResponse response;
633 response.text_response = text;
634 response.provider =
"openai";
635 response.model = config_.model;
637 }
catch (
const nlohmann::json::exception& e) {
638 return absl::InternalError(
639 absl::StrFormat(
"Failed to parse OpenAI response: %s", e.what()));
643std::string BrowserAIService::ExtractTextFromCandidates(
644 const nlohmann::json& json)
const {
645 if (!
json.contains(
"candidates") || !json[
"candidates"].is_array() ||
646 json[
"candidates"].empty()) {
650 const auto& candidate =
json[
"candidates"][0];
652 if (!candidate.contains(
"content") ||
653 !candidate[
"content"].contains(
"parts") ||
654 !candidate[
"content"][
"parts"].is_array() ||
655 candidate[
"content"][
"parts"].empty()) {
660 for (
const auto& part : candidate[
"content"][
"parts"]) {
661 if (part.contains(
"text")) {
662 result += part[
"text"].get<std::string>();
669absl::Status BrowserAIService::CheckForApiError(
670 const nlohmann::json& json)
const {
671 if (
json.contains(
"error")) {
672 const auto& error =
json[
"error"];
673 int code = error.value(
"code", 0);
674 std::string message = error.value(
"message",
"Unknown error");
675 std::string status = error.value(
"status",
"");
678 if (code == 400 || status ==
"INVALID_ARGUMENT") {
679 return absl::InvalidArgumentError(message);
680 }
else if (code == 401 || status ==
"UNAUTHENTICATED") {
681 return absl::UnauthenticatedError(message);
682 }
else if (code == 403 || status ==
"PERMISSION_DENIED") {
683 return absl::PermissionDeniedError(message);
684 }
else if (code == 429 || status ==
"RESOURCE_EXHAUSTED") {
685 return absl::ResourceExhaustedError(message);
686 }
else if (code == 503 || status ==
"UNAVAILABLE") {
687 return absl::UnavailableError(message);
689 return absl::InternalError(message);
693 return absl::OkStatus();
696void BrowserAIService::LogDebug(
const std::string& message)
const {
697 if (config_.verbose) {
699 EM_ASM({ console.log(
'[BrowserAIService] ' + UTF8ToString($0)); },
std::string EscapeJson(const std::string &input)