8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_join.h"
20std::string
EscapeJson(
const std::string& str) {
47 ss <<
"\\u" << std::hex << std::setw(4) << std::setfill(
'0')
48 <<
static_cast<int>(c);
59std::string ConvertHistoryToGeminiFormat(
60 const std::vector<agent::ChatMessage>& history) {
61 nlohmann::json contents = nlohmann::json::array();
63 for (
const auto& msg : history) {
65 part[
"text"] = msg.message;
67 nlohmann::json content;
68 content[
"parts"] = nlohmann::json::array({part});
71 contents.push_back(content);
74 return contents.dump();
79BrowserAIService::BrowserAIService(
const BrowserAIConfig& config,
80 std::unique_ptr<net::IHttpClient> http_client)
81 : config_(config), http_client_(std::move(http_client)) {
83 config_.provider = absl::AsciiStrToLower(config_.provider);
84 if (config_.provider.empty()) {
85 config_.provider =
"gemini";
88 if (config_.provider ==
"openai") {
89 if (config_.model.empty()) config_.model =
"gpt-4o-mini";
90 if (config_.api_base.empty()) config_.api_base = kOpenAIApiBaseUrl;
92 if (config_.model.empty()) config_.model =
"gemini-2.5-flash";
97 LogDebug(
"Warning: No HTTP client provided to BrowserAIService");
102 http_client_->SetTimeout(config_.timeout_seconds);
105 LogDebug(absl::StrFormat(
"BrowserAIService initialized with model: %s",
109void BrowserAIService::SetRomContext(Rom* rom) {
110 std::lock_guard<std::mutex> lock(mutex_);
112 if (rom_ && rom_->is_loaded()) {
114 config_.system_instruction = absl::StrFormat(
115 "You are assisting with ROM hacking for The Legend of Zelda: A Link to the Past. "
116 "The ROM file '%s' is currently loaded. %s",
117 rom_->filename(), config_.system_instruction);
121absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
122 const std::string& prompt) {
123 std::lock_guard<std::mutex> lock(mutex_);
125 return absl::FailedPreconditionError(
"HTTP client not initialized");
128 if (config_.api_key.empty()) {
129 return absl::InvalidArgumentError(
130 "API key not set. Please provide an API key.");
133 LogDebug(absl::StrFormat(
"Generating response for prompt: %s", prompt));
136 std::string url = BuildApiUrl(
"generateContent");
139 std::string request_body;
140 if (config_.provider ==
"openai") {
141 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
142 url +=
"/chat/completions";
143 request_body = BuildOpenAIRequestBody(prompt,
nullptr);
145 request_body = BuildRequestBody(prompt);
149 net::Headers headers;
150 headers[
"Content-Type"] =
"application/json";
151 if (config_.provider ==
"openai") {
152 headers[
"Authorization"] =
"Bearer " + config_.api_key;
156 auto response_or = http_client_->Post(url, request_body, headers);
157 if (!response_or.ok()) {
158 return absl::InternalError(
159 absl::StrFormat(
"Failed to make API request: %s",
160 response_or.status().message()));
163 const auto& response = response_or.value();
166 if (!response.IsSuccess()) {
167 if (response.IsClientError()) {
168 return absl::InvalidArgumentError(
169 absl::StrFormat(
"API request failed with status %d: %s",
170 response.status_code, response.body));
172 return absl::InternalError(
173 absl::StrFormat(
"API server error %d: %s",
174 response.status_code, response.body));
179 if (config_.provider ==
"openai") {
180 return ParseOpenAIResponse(response.body);
182 return ParseGeminiResponse(response.body);
185absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
186 const std::vector<agent::ChatMessage>& history) {
187 std::lock_guard<std::mutex> lock(mutex_);
189 return absl::FailedPreconditionError(
"HTTP client not initialized");
192 if (config_.api_key.empty()) {
193 return absl::InvalidArgumentError(
194 "API key not set. Please provide an API key.");
197 if (history.empty()) {
198 return absl::InvalidArgumentError(
"Chat history cannot be empty");
201 LogDebug(absl::StrFormat(
"Generating response from %zu messages", history.size()));
204 std::string url = BuildApiUrl(
"generateContent");
206 std::string request_body;
207 if (config_.provider ==
"openai") {
208 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
209 url +=
"/chat/completions";
210 request_body = BuildOpenAIRequestBody(
"", &history);
213 nlohmann::json request;
214 request[
"contents"] = nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
217 request[
"generationConfig"][
"temperature"] = config_.temperature;
218 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
221 if (!config_.system_instruction.empty()) {
222 request[
"systemInstruction"][
"parts"][0][
"text"] = config_.system_instruction;
225 request_body = request.dump();
229 net::Headers headers;
230 headers[
"Content-Type"] =
"application/json";
231 if (config_.provider ==
"openai") {
232 headers[
"Authorization"] =
"Bearer " + config_.api_key;
236 auto response_or = http_client_->Post(url, request_body, headers);
237 if (!response_or.ok()) {
238 return absl::InternalError(
239 absl::StrFormat(
"Failed to make API request: %s",
240 response_or.status().message()));
243 const auto& response = response_or.value();
246 if (!response.IsSuccess()) {
247 return absl::InternalError(
248 absl::StrFormat(
"API request failed with status %d: %s",
249 response.status_code, response.body));
253 if (config_.provider ==
"openai") {
254 return ParseOpenAIResponse(response.body);
256 return ParseGeminiResponse(response.body);
259absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
260 std::lock_guard<std::mutex> lock(mutex_);
262 std::vector<ModelInfo> models;
264 const std::string provider = config_.provider.empty() ?
"gemini" : config_.provider;
266 if (provider ==
"openai") {
268 .name =
"gpt-4o-mini",
269 .display_name =
"GPT-4o Mini",
270 .provider =
"openai",
271 .description =
"Fast/cheap OpenAI model",
277 .display_name =
"GPT-4o",
278 .provider =
"openai",
279 .description =
"Balanced OpenAI flagship model",
284 .name =
"gpt-4.1-mini",
285 .display_name =
"GPT-4.1 Mini",
286 .provider =
"openai",
287 .description =
"Lightweight 4.1 variant",
293 .name =
"gemini-2.5-flash",
294 .display_name =
"Gemini 2.0 Flash (Experimental)",
295 .provider =
"gemini",
296 .description =
"Fastest Gemini model with experimental features",
302 .name =
"gemini-1.5-flash",
303 .display_name =
"Gemini 1.5 Flash",
304 .provider =
"gemini",
305 .description =
"Fast and efficient for most tasks",
311 .name =
"gemini-1.5-flash-8b",
312 .display_name =
"Gemini 1.5 Flash 8B",
313 .provider =
"gemini",
314 .description =
"Smaller, faster variant of Flash",
316 .parameter_size =
"8B",
321 .name =
"gemini-1.5-pro",
322 .display_name =
"Gemini 1.5 Pro",
323 .provider =
"gemini",
324 .description =
"Most capable model for complex tasks",
333absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
334 const std::string& image_data,
335 const std::string& prompt) {
336 std::lock_guard<std::mutex> lock(mutex_);
338 return absl::FailedPreconditionError(
"HTTP client not initialized");
341 if (config_.provider ==
"openai") {
342 return absl::UnimplementedError(
343 "Image analysis not yet supported for OpenAI in WASM build");
346 if (config_.api_key.empty()) {
347 return absl::InvalidArgumentError(
348 "API key not set. Please provide a Gemini API key.");
351 LogDebug(absl::StrFormat(
"Analyzing image with prompt: %s", prompt));
354 std::string url = BuildApiUrl(
"generateContent");
357 std::string mime_type =
"image/png";
358 if (image_data.find(
"data:image/jpeg") == 0 ||
359 image_data.find(
"data:image/jpg") == 0) {
360 mime_type =
"image/jpeg";
364 std::string clean_image_data = image_data;
365 size_t comma_pos = image_data.find(
',');
366 if (comma_pos != std::string::npos && image_data.find(
"data:") == 0) {
367 clean_image_data = image_data.substr(comma_pos + 1);
371 std::string request_body = BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
374 net::Headers headers;
375 headers[
"Content-Type"] =
"application/json";
378 auto response_or = http_client_->Post(url, request_body, headers);
379 if (!response_or.ok()) {
380 return absl::InternalError(
381 absl::StrFormat(
"Failed to make API request: %s",
382 response_or.status().message()));
385 const auto& response = response_or.value();
388 if (!response.IsSuccess()) {
389 return absl::InternalError(
390 absl::StrFormat(
"API request failed with status %d: %s",
391 response.status_code, response.body));
395 return ParseGeminiResponse(response.body);
398absl::Status BrowserAIService::CheckAvailability() {
399 std::lock_guard<std::mutex> lock(mutex_);
401 return absl::FailedPreconditionError(
"HTTP client not initialized");
404 if (config_.api_key.empty()) {
405 return absl::InvalidArgumentError(
"API key not set");
408 net::Headers headers;
411 if (config_.provider ==
"openai") {
412 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
413 if (!url.empty() && url.back() ==
'/') url.pop_back();
415 headers[
"Authorization"] =
"Bearer " + config_.api_key;
417 url = absl::StrFormat(
"%s%s?key=%s",
423 auto response_or = http_client_->Get(url, headers);
425 if (!response_or.ok()) {
426 return absl::UnavailableError(
427 absl::StrFormat(
"Cannot reach %s API: %s",
429 response_or.status().message()));
432 const auto& response = response_or.value();
433 if (!response.IsSuccess()) {
434 if (response.status_code == 401 || response.status_code == 403) {
435 return absl::PermissionDeniedError(
"Invalid API key");
437 return absl::UnavailableError(
438 absl::StrFormat(
"%s API returned error %d",
440 response.status_code));
443 return absl::OkStatus();
446void BrowserAIService::UpdateApiKey(
const std::string& api_key) {
447 std::lock_guard<std::mutex> lock(mutex_);
448 config_.api_key = api_key;
452 LogDebug(
"API key updated");
455std::string BrowserAIService::BuildApiUrl(
const std::string& endpoint)
const {
456 if (config_.provider ==
"openai") {
457 std::string base = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
458 if (!base.empty() && base.back() ==
'/') {
461 return absl::StrFormat(
"%s/%s", base, endpoint);
464 return absl::StrFormat(
"%s%s:%s?key=%s",
471std::string BrowserAIService::BuildRequestBody(
const std::string& prompt,
472 bool include_system)
const {
473 nlohmann::json request;
476 nlohmann::json user_part;
477 user_part[
"text"] = prompt;
479 nlohmann::json user_content;
480 user_content[
"parts"] = nlohmann::json::array({user_part});
481 user_content[
"role"] =
"user";
483 request[
"contents"] = nlohmann::json::array({user_content});
486 request[
"generationConfig"][
"temperature"] = config_.temperature;
487 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
490 if (include_system && !config_.system_instruction.empty()) {
491 nlohmann::json system_part;
492 system_part[
"text"] = config_.system_instruction;
493 request[
"systemInstruction"][
"parts"] = nlohmann::json::array({system_part});
496 return request.dump();
499std::string BrowserAIService::BuildMultimodalRequestBody(
500 const std::string& prompt,
501 const std::string& image_data,
502 const std::string& mime_type)
const {
503 nlohmann::json request;
506 nlohmann::json text_part;
507 text_part[
"text"] = prompt;
509 nlohmann::json image_part;
510 image_part[
"inline_data"][
"mime_type"] = mime_type;
511 image_part[
"inline_data"][
"data"] = image_data;
513 nlohmann::json content;
514 content[
"parts"] = nlohmann::json::array({text_part, image_part});
515 content[
"role"] =
"user";
517 request[
"contents"] = nlohmann::json::array({content});
520 request[
"generationConfig"][
"temperature"] = config_.temperature;
521 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
524 if (!config_.system_instruction.empty()) {
525 nlohmann::json system_part;
526 system_part[
"text"] = config_.system_instruction;
527 request[
"systemInstruction"][
"parts"] = nlohmann::json::array({system_part});
530 return request.dump();
533std::string BrowserAIService::BuildOpenAIRequestBody(
534 const std::string& prompt,
535 const std::vector<agent::ChatMessage>* history)
const {
536 nlohmann::json request;
537 request[
"model"] = config_.model.empty() ?
"gpt-4o-mini" : config_.model;
539 nlohmann::json messages = nlohmann::json::array();
540 if (!config_.system_instruction.empty()) {
541 messages.push_back({{
"role",
"system"},
542 {
"content", config_.system_instruction}});
545 if (history && !history->empty()) {
546 for (
const auto& msg : *history) {
548 {
"role", msg.sender == agent::ChatMessage::Sender::kUser ?
"user"
550 {
"content", msg.message}});
552 }
else if (!prompt.empty()) {
553 messages.push_back({{
"role",
"user"}, {
"content", prompt}});
556 request[
"messages"] = messages;
557 request[
"temperature"] = config_.temperature;
558 request[
"max_tokens"] = config_.max_output_tokens;
560 return request.dump();
563absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
564 const std::string& response_body)
const {
566 nlohmann::json
json = nlohmann::json::parse(response_body);
569 auto error_status = CheckForApiError(json);
570 if (!error_status.ok()) {
575 std::string text_content = ExtractTextFromCandidates(json);
577 if (text_content.empty()) {
578 return absl::InternalError(
"Empty response from Gemini API");
582 AgentResponse response;
583 response.text_response = text_content;
584 response.provider =
"gemini";
585 response.model = config_.model;
588 if (
json.contains(
"promptFeedback") &&
589 json[
"promptFeedback"].contains(
"safetyRatings")) {
590 for (
const auto& rating :
json[
"promptFeedback"][
"safetyRatings"]) {
591 if (rating.contains(
"probability") &&
592 rating[
"probability"] !=
"NEGLIGIBLE" &&
593 rating[
"probability"] !=
"LOW") {
594 response.warnings.push_back(
595 absl::StrFormat(
"Content flagged: %s (%s)",
596 rating.value(
"category",
"unknown"),
597 rating.value(
"probability",
"unknown")));
602 LogDebug(absl::StrFormat(
"Successfully parsed response with %zu characters",
603 text_content.length()));
607 }
catch (
const nlohmann::json::exception& e) {
608 return absl::InternalError(
609 absl::StrFormat(
"Failed to parse Gemini response: %s", e.what()));
613absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
614 const std::string& response_body)
const {
616 nlohmann::json
json = nlohmann::json::parse(response_body);
618 if (
json.contains(
"error")) {
619 const auto& err =
json[
"error"];
620 std::string message = err.value(
"message",
"Unknown error");
621 int code = err.value(
"code", 0);
622 if (code == 401 || code == 403)
return absl::UnauthenticatedError(message);
623 if (code == 429)
return absl::ResourceExhaustedError(message);
624 return absl::InternalError(message);
627 if (!
json.contains(
"choices") || !json[
"choices"].is_array() ||
628 json[
"choices"].empty()) {
629 return absl::InternalError(
"Empty response from OpenAI API");
632 const auto& choice =
json[
"choices"][0];
633 if (!choice.contains(
"message") || !choice[
"message"].contains(
"content")) {
634 return absl::InternalError(
"Malformed OpenAI response");
637 std::string text = choice[
"message"][
"content"].get<std::string>();
639 return absl::InternalError(
"OpenAI returned empty content");
642 AgentResponse response;
643 response.text_response = text;
644 response.provider =
"openai";
645 response.model = config_.model;
647 }
catch (
const nlohmann::json::exception& e) {
648 return absl::InternalError(
649 absl::StrFormat(
"Failed to parse OpenAI response: %s", e.what()));
653std::string BrowserAIService::ExtractTextFromCandidates(
654 const nlohmann::json& json)
const {
655 if (!
json.contains(
"candidates") || !json[
"candidates"].is_array() ||
656 json[
"candidates"].empty()) {
660 const auto& candidate =
json[
"candidates"][0];
662 if (!candidate.contains(
"content") ||
663 !candidate[
"content"].contains(
"parts") ||
664 !candidate[
"content"][
"parts"].is_array() ||
665 candidate[
"content"][
"parts"].empty()) {
670 for (
const auto& part : candidate[
"content"][
"parts"]) {
671 if (part.contains(
"text")) {
672 result += part[
"text"].get<std::string>();
679absl::Status BrowserAIService::CheckForApiError(
680 const nlohmann::json& json)
const {
681 if (
json.contains(
"error")) {
682 const auto& error =
json[
"error"];
683 int code = error.value(
"code", 0);
684 std::string message = error.value(
"message",
"Unknown error");
685 std::string status = error.value(
"status",
"");
688 if (code == 400 || status ==
"INVALID_ARGUMENT") {
689 return absl::InvalidArgumentError(message);
690 }
else if (code == 401 || status ==
"UNAUTHENTICATED") {
691 return absl::UnauthenticatedError(message);
692 }
else if (code == 403 || status ==
"PERMISSION_DENIED") {
693 return absl::PermissionDeniedError(message);
694 }
else if (code == 429 || status ==
"RESOURCE_EXHAUSTED") {
695 return absl::ResourceExhaustedError(message);
696 }
else if (code == 503 || status ==
"UNAVAILABLE") {
697 return absl::UnavailableError(message);
699 return absl::InternalError(message);
703 return absl::OkStatus();
706void BrowserAIService::LogDebug(
const std::string& message)
const {
707 if (config_.verbose) {
710 console.log(
'[BrowserAIService] ' + UTF8ToString($0));
std::string EscapeJson(const std::string &str)