yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
browser_ai_service.cc
Go to the documentation of this file.
1#ifdef __EMSCRIPTEN__
2
4
5#include <emscripten.h>
6#include <sstream>
7
8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_join.h"
12#include "rom/rom.h" // Full definition needed for Rom member access
13
14namespace yaze {
15namespace cli {
16
17namespace {
18
19// Helper function to escape JSON strings
20std::string EscapeJson(const std::string& str) {
21 std::stringstream ss;
22 for (char c : str) {
23 switch (c) {
24 case '"':
25 ss << "\\\"";
26 break;
27 case '\\':
28 ss << "\\\\";
29 break;
30 case '\b':
31 ss << "\\b";
32 break;
33 case '\f':
34 ss << "\\f";
35 break;
36 case '\n':
37 ss << "\\n";
38 break;
39 case '\r':
40 ss << "\\r";
41 break;
42 case '\t':
43 ss << "\\t";
44 break;
45 default:
46 if (c < 0x20) {
47 ss << "\\u" << std::hex << std::setw(4) << std::setfill('0')
48 << static_cast<int>(c);
49 } else {
50 ss << c;
51 }
52 break;
53 }
54 }
55 return ss.str();
56}
57
58// Helper to convert chat history to Gemini format
59std::string ConvertHistoryToGeminiFormat(
60 const std::vector<agent::ChatMessage>& history) {
61 nlohmann::json contents = nlohmann::json::array();
62
63 for (const auto& msg : history) {
64 nlohmann::json part;
65 part["text"] = msg.message;
66
67 nlohmann::json content;
68 content["parts"] = nlohmann::json::array({part});
69 content["role"] =
70 (msg.sender == agent::ChatMessage::Sender::kUser) ? "user" : "model";
71
72 contents.push_back(content);
73 }
74
75 return contents.dump();
76}
77
78} // namespace
79
80BrowserAIService::BrowserAIService(
81 const BrowserAIConfig& config,
82 std::unique_ptr<net::IHttpClient> http_client)
83 : config_(config), http_client_(std::move(http_client)) {
84 // Normalize provider name
85 config_.provider = absl::AsciiStrToLower(config_.provider);
86 if (config_.provider.empty()) {
87 config_.provider = "gemini";
88 }
89 // Set sensible defaults per provider
90 if (config_.provider == "openai") {
91 if (config_.model.empty())
92 config_.model = "gpt-4o-mini";
93 if (config_.api_base.empty())
94 config_.api_base = kOpenAIApiBaseUrl;
95 } else {
96 if (config_.model.empty())
97 config_.model = "gemini-2.5-flash";
98 }
99
100 if (!http_client_) {
101 // This shouldn't happen in normal usage but handle gracefully
102 LogDebug("Warning: No HTTP client provided to BrowserAIService");
103 }
104
105 // Set timeout on HTTP client
106 if (http_client_) {
107 http_client_->SetTimeout(config_.timeout_seconds);
108 }
109
110 LogDebug(absl::StrFormat("BrowserAIService initialized with model: %s",
111 config_.model));
112}
113
114void BrowserAIService::SetRomContext(Rom* rom) {
115 std::lock_guard<std::mutex> lock(mutex_);
116 rom_ = rom;
117 if (rom_ && rom_->is_loaded()) {
118 // Add ROM-specific context to system instruction
119 config_.system_instruction = absl::StrFormat(
120 "You are assisting with ROM hacking for The Legend of Zelda: A Link to "
121 "the Past. "
122 "The ROM file '%s' is currently loaded. %s",
123 rom_->filename(), config_.system_instruction);
124 }
125}
126
127absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
128 const std::string& prompt) {
129 std::lock_guard<std::mutex> lock(mutex_);
130 if (!http_client_) {
131 return absl::FailedPreconditionError("HTTP client not initialized");
132 }
133
134 if (RequiresApiKey() && config_.api_key.empty()) {
135 if (config_.provider == "openai") {
136 return absl::InvalidArgumentError(
137 "OpenAI API key not set. Provide a key for https://api.openai.com, "
138 "or use a local OpenAI-compatible endpoint.");
139 }
140 return absl::InvalidArgumentError(
141 "API key not set. Please provide a Gemini API key.");
142 }
143
144 LogDebug(absl::StrFormat("Generating response for prompt: %s", prompt));
145
146 // Build API URL
147 std::string url = BuildApiUrl("generateContent");
148
149 // Build request body
150 std::string request_body;
151 if (config_.provider == "openai") {
152 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
153 url += "/chat/completions";
154 request_body = BuildOpenAIRequestBody(prompt, nullptr);
155 } else {
156 request_body = BuildRequestBody(prompt);
157 }
158
159 // Set headers
160 net::Headers headers;
161 headers["Content-Type"] = "application/json";
162 if (config_.provider == "openai" && !config_.api_key.empty()) {
163 headers["Authorization"] = "Bearer " + config_.api_key;
164 }
165
166 // Make API request
167 auto response_or = http_client_->Post(url, request_body, headers);
168 if (!response_or.ok()) {
169 return absl::InternalError(absl::StrFormat("Failed to make API request: %s",
170 response_or.status().message()));
171 }
172
173 const auto& response = response_or.value();
174
175 // Check HTTP status
176 if (!response.IsSuccess()) {
177 if (response.IsClientError()) {
178 return absl::InvalidArgumentError(
179 absl::StrFormat("API request failed with status %d: %s",
180 response.status_code, response.body));
181 } else {
182 return absl::InternalError(absl::StrFormat(
183 "API server error %d: %s", response.status_code, response.body));
184 }
185 }
186
187 // Parse response
188 if (config_.provider == "openai") {
189 return ParseOpenAIResponse(response.body);
190 }
191 return ParseGeminiResponse(response.body);
192}
193
194absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
195 const std::vector<agent::ChatMessage>& history) {
196 std::lock_guard<std::mutex> lock(mutex_);
197 if (!http_client_) {
198 return absl::FailedPreconditionError("HTTP client not initialized");
199 }
200
201 if (RequiresApiKey() && config_.api_key.empty()) {
202 if (config_.provider == "openai") {
203 return absl::InvalidArgumentError(
204 "OpenAI API key not set. Provide a key for https://api.openai.com, "
205 "or use a local OpenAI-compatible endpoint.");
206 }
207 return absl::InvalidArgumentError(
208 "API key not set. Please provide a Gemini API key.");
209 }
210
211 if (history.empty()) {
212 return absl::InvalidArgumentError("Chat history cannot be empty");
213 }
214
215 LogDebug(
216 absl::StrFormat("Generating response from %zu messages", history.size()));
217
218 // Build API URL
219 std::string url = BuildApiUrl("generateContent");
220
221 std::string request_body;
222 if (config_.provider == "openai") {
223 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
224 url += "/chat/completions";
225 request_body = BuildOpenAIRequestBody("", &history);
226 } else {
227 // Convert history to Gemini format and build request
228 nlohmann::json request;
229 request["contents"] =
230 nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
231
232 // Add generation config
233 request["generationConfig"]["temperature"] = config_.temperature;
234 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
235
236 // Add system instruction if provided
237 if (!config_.system_instruction.empty()) {
238 request["systemInstruction"]["parts"][0]["text"] =
239 config_.system_instruction;
240 }
241
242 request_body = request.dump();
243 }
244
245 // Set headers
246 net::Headers headers;
247 headers["Content-Type"] = "application/json";
248 if (config_.provider == "openai" && !config_.api_key.empty()) {
249 headers["Authorization"] = "Bearer " + config_.api_key;
250 }
251
252 // Make API request
253 auto response_or = http_client_->Post(url, request_body, headers);
254 if (!response_or.ok()) {
255 return absl::InternalError(absl::StrFormat("Failed to make API request: %s",
256 response_or.status().message()));
257 }
258
259 const auto& response = response_or.value();
260
261 // Check HTTP status
262 if (!response.IsSuccess()) {
263 return absl::InternalError(
264 absl::StrFormat("API request failed with status %d: %s",
265 response.status_code, response.body));
266 }
267
268 // Parse response
269 if (config_.provider == "openai") {
270 return ParseOpenAIResponse(response.body);
271 }
272 return ParseGeminiResponse(response.body);
273}
274
275absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
276 std::lock_guard<std::mutex> lock(mutex_);
277 // For browser context, return curated lists for configured provider
278 std::vector<ModelInfo> models;
279
280 const std::string provider =
281 config_.provider.empty() ? "gemini" : config_.provider;
282
283 if (provider == "openai") {
284 models.push_back({.name = "gpt-4o-mini",
285 .display_name = "GPT-4o Mini",
286 .provider = "openai",
287 .description = "Fast/cheap OpenAI model",
288 .family = "gpt-4o",
289 .is_local = false});
290 models.push_back({.name = "gpt-4o",
291 .display_name = "GPT-4o",
292 .provider = "openai",
293 .description = "Balanced OpenAI flagship model",
294 .family = "gpt-4o",
295 .is_local = false});
296 models.push_back({.name = "gpt-4.1-mini",
297 .display_name = "GPT-4.1 Mini",
298 .provider = "openai",
299 .description = "Lightweight 4.1 variant",
300 .family = "gpt-4.1",
301 .is_local = false});
302 } else {
303 models.push_back(
304 {.name = "gemini-2.5-flash",
305 .display_name = "Gemini 2.0 Flash (Experimental)",
306 .provider = "gemini",
307 .description = "Fastest Gemini model with experimental features",
308 .family = "gemini",
309 .is_local = false});
310
311 models.push_back({.name = "gemini-1.5-flash",
312 .display_name = "Gemini 1.5 Flash",
313 .provider = "gemini",
314 .description = "Fast and efficient for most tasks",
315 .family = "gemini",
316 .is_local = false});
317
318 models.push_back({.name = "gemini-1.5-flash-8b",
319 .display_name = "Gemini 1.5 Flash 8B",
320 .provider = "gemini",
321 .description = "Smaller, faster variant of Flash",
322 .family = "gemini",
323 .parameter_size = "8B",
324 .is_local = false});
325
326 models.push_back({.name = "gemini-1.5-pro",
327 .display_name = "Gemini 1.5 Pro",
328 .provider = "gemini",
329 .description = "Most capable model for complex tasks",
330 .family = "gemini",
331 .is_local = false});
332 }
333
334 return models;
335}
336
337absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
338 const std::string& image_data, const std::string& prompt) {
339 std::lock_guard<std::mutex> lock(mutex_);
340 if (!http_client_) {
341 return absl::FailedPreconditionError("HTTP client not initialized");
342 }
343
344 if (config_.provider == "openai") {
345 return absl::UnimplementedError(
346 "Image analysis not yet supported for OpenAI in WASM build");
347 }
348
349 if (config_.api_key.empty()) {
350 return absl::InvalidArgumentError(
351 "API key not set. Please provide a Gemini API key.");
352 }
353
354 LogDebug(absl::StrFormat("Analyzing image with prompt: %s", prompt));
355
356 // Build API URL
357 std::string url = BuildApiUrl("generateContent");
358
359 // Determine MIME type from image data prefix if present
360 std::string mime_type = "image/png"; // Default
361 if (image_data.find("data:image/jpeg") == 0 ||
362 image_data.find("data:image/jpg") == 0) {
363 mime_type = "image/jpeg";
364 }
365
366 // Strip data URL prefix if present
367 std::string clean_image_data = image_data;
368 size_t comma_pos = image_data.find(',');
369 if (comma_pos != std::string::npos && image_data.find("data:") == 0) {
370 clean_image_data = image_data.substr(comma_pos + 1);
371 }
372
373 // Build multimodal request
374 std::string request_body =
375 BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
376
377 // Set headers
378 net::Headers headers;
379 headers["Content-Type"] = "application/json";
380
381 // Make API request
382 auto response_or = http_client_->Post(url, request_body, headers);
383 if (!response_or.ok()) {
384 return absl::InternalError(absl::StrFormat("Failed to make API request: %s",
385 response_or.status().message()));
386 }
387
388 const auto& response = response_or.value();
389
390 // Check HTTP status
391 if (!response.IsSuccess()) {
392 return absl::InternalError(
393 absl::StrFormat("API request failed with status %d: %s",
394 response.status_code, response.body));
395 }
396
397 // Parse response
398 return ParseGeminiResponse(response.body);
399}
400
401absl::Status BrowserAIService::CheckAvailability() {
402 std::lock_guard<std::mutex> lock(mutex_);
403 if (!http_client_) {
404 return absl::FailedPreconditionError("HTTP client not initialized");
405 }
406
407 if (RequiresApiKey() && config_.api_key.empty()) {
408 if (config_.provider == "openai") {
409 return absl::InvalidArgumentError(
410 "OpenAI API key not set. Provide a key for https://api.openai.com, "
411 "or use a local OpenAI-compatible endpoint.");
412 }
413 return absl::InvalidArgumentError("Gemini API key not set");
414 }
415
416 net::Headers headers;
417 std::string url;
418
419 if (config_.provider == "openai") {
420 url = GetOpenAIApiBase();
421 url += "/models";
422 if (!config_.api_key.empty()) {
423 headers["Authorization"] = "Bearer " + config_.api_key;
424 }
425 } else {
426 url = absl::StrFormat("%s%s?key=%s", kGeminiApiBaseUrl, config_.model,
427 config_.api_key);
428 }
429
430 auto response_or = http_client_->Get(url, headers);
431
432 if (!response_or.ok()) {
433 return absl::UnavailableError(
434 absl::StrFormat("Cannot reach %s API: %s", config_.provider,
435 response_or.status().message()));
436 }
437
438 const auto& response = response_or.value();
439 if (!response.IsSuccess()) {
440 if (response.status_code == 401 || response.status_code == 403) {
441 return absl::PermissionDeniedError("Invalid API key");
442 }
443 return absl::UnavailableError(absl::StrFormat(
444 "%s API returned error %d", config_.provider, response.status_code));
445 }
446
447 return absl::OkStatus();
448}
449
450void BrowserAIService::UpdateApiKey(const std::string& api_key) {
451 std::lock_guard<std::mutex> lock(mutex_);
452 config_.api_key = api_key;
453
454 // Store in sessionStorage for this session
455 // Note: This is handled by the secure storage module
456 LogDebug("API key updated");
457}
458
459bool BrowserAIService::RequiresApiKey() const {
460 if (config_.provider == "openai") {
461 return GetOpenAIApiBase() == kOpenAIApiBaseUrl;
462 }
463 return true;
464}
465
466std::string BrowserAIService::GetOpenAIApiBase() const {
467 std::string base =
468 config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
469 if (!base.empty() && base.back() == '/') {
470 base.pop_back();
471 }
472 return base;
473}
474
475std::string BrowserAIService::BuildApiUrl(const std::string& endpoint) const {
476 if (config_.provider == "openai") {
477 std::string base = GetOpenAIApiBase();
478 return absl::StrFormat("%s/%s", base, endpoint);
479 }
480
481 return absl::StrFormat("%s%s:%s?key=%s", kGeminiApiBaseUrl, config_.model,
482 endpoint, config_.api_key);
483}
484
485std::string BrowserAIService::BuildRequestBody(const std::string& prompt,
486 bool include_system) const {
487 nlohmann::json request;
488
489 // Build contents array with user prompt
490 nlohmann::json user_part;
491 user_part["text"] = prompt;
492
493 nlohmann::json user_content;
494 user_content["parts"] = nlohmann::json::array({user_part});
495 user_content["role"] = "user";
496
497 request["contents"] = nlohmann::json::array({user_content});
498
499 // Add generation config
500 request["generationConfig"]["temperature"] = config_.temperature;
501 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
502
503 // Add system instruction if provided and requested
504 if (include_system && !config_.system_instruction.empty()) {
505 nlohmann::json system_part;
506 system_part["text"] = config_.system_instruction;
507 request["systemInstruction"]["parts"] =
508 nlohmann::json::array({system_part});
509 }
510
511 return request.dump();
512}
513
514std::string BrowserAIService::BuildMultimodalRequestBody(
515 const std::string& prompt, const std::string& image_data,
516 const std::string& mime_type) const {
517 nlohmann::json request;
518
519 // Build parts array with text and image
520 nlohmann::json text_part;
521 text_part["text"] = prompt;
522
523 nlohmann::json image_part;
524 image_part["inline_data"]["mime_type"] = mime_type;
525 image_part["inline_data"]["data"] = image_data;
526
527 nlohmann::json content;
528 content["parts"] = nlohmann::json::array({text_part, image_part});
529 content["role"] = "user";
530
531 request["contents"] = nlohmann::json::array({content});
532
533 // Add generation config
534 request["generationConfig"]["temperature"] = config_.temperature;
535 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
536
537 // Add system instruction if provided
538 if (!config_.system_instruction.empty()) {
539 nlohmann::json system_part;
540 system_part["text"] = config_.system_instruction;
541 request["systemInstruction"]["parts"] =
542 nlohmann::json::array({system_part});
543 }
544
545 return request.dump();
546}
547
548std::string BrowserAIService::BuildOpenAIRequestBody(
549 const std::string& prompt,
550 const std::vector<agent::ChatMessage>* history) const {
551 nlohmann::json request;
552 request["model"] = config_.model.empty() ? "gpt-4o-mini" : config_.model;
553
554 nlohmann::json messages = nlohmann::json::array();
555 if (!config_.system_instruction.empty()) {
556 messages.push_back(
557 {{"role", "system"}, {"content", config_.system_instruction}});
558 }
559
560 if (history && !history->empty()) {
561 for (const auto& msg : *history) {
562 messages.push_back(
563 {{"role", msg.sender == agent::ChatMessage::Sender::kUser
564 ? "user"
565 : "assistant"},
566 {"content", msg.message}});
567 }
568 } else if (!prompt.empty()) {
569 messages.push_back({{"role", "user"}, {"content", prompt}});
570 }
571
572 request["messages"] = messages;
573 request["temperature"] = config_.temperature;
574 request["max_tokens"] = config_.max_output_tokens;
575
576 return request.dump();
577}
578
579absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
580 const std::string& response_body) const {
581 try {
582 nlohmann::json json = nlohmann::json::parse(response_body);
583
584 // Check for API errors
585 auto error_status = CheckForApiError(json);
586 if (!error_status.ok()) {
587 return error_status;
588 }
589
590 // Extract text from candidates
591 std::string text_content = ExtractTextFromCandidates(json);
592
593 if (text_content.empty()) {
594 return absl::InternalError("Empty response from Gemini API");
595 }
596
597 // Build agent response
598 AgentResponse response;
599 response.text_response = text_content;
600 response.provider = "gemini";
601 response.model = config_.model;
602
603 // Add any safety ratings or filters as warnings
604 if (json.contains("promptFeedback") &&
605 json["promptFeedback"].contains("safetyRatings")) {
606 for (const auto& rating : json["promptFeedback"]["safetyRatings"]) {
607 if (rating.contains("probability") &&
608 rating["probability"] != "NEGLIGIBLE" &&
609 rating["probability"] != "LOW") {
610 response.warnings.push_back(absl::StrFormat(
611 "Content flagged: %s (%s)", rating.value("category", "unknown"),
612 rating.value("probability", "unknown")));
613 }
614 }
615 }
616
617 LogDebug(absl::StrFormat("Successfully parsed response with %zu characters",
618 text_content.length()));
619
620 return response;
621
622 } catch (const nlohmann::json::exception& e) {
623 return absl::InternalError(
624 absl::StrFormat("Failed to parse Gemini response: %s", e.what()));
625 }
626}
627
628absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
629 const std::string& response_body) const {
630 try {
631 nlohmann::json json = nlohmann::json::parse(response_body);
632
633 if (json.contains("error")) {
634 const auto& err = json["error"];
635 std::string message = err.value("message", "Unknown error");
636 int code = err.value("code", 0);
637 if (code == 401 || code == 403)
638 return absl::UnauthenticatedError(message);
639 if (code == 429)
640 return absl::ResourceExhaustedError(message);
641 return absl::InternalError(message);
642 }
643
644 if (!json.contains("choices") || !json["choices"].is_array() ||
645 json["choices"].empty()) {
646 return absl::InternalError("Empty response from OpenAI API");
647 }
648
649 const auto& choice = json["choices"][0];
650 if (!choice.contains("message") || !choice["message"].contains("content")) {
651 return absl::InternalError("Malformed OpenAI response");
652 }
653
654 std::string text = choice["message"]["content"].get<std::string>();
655 if (text.empty()) {
656 return absl::InternalError("OpenAI returned empty content");
657 }
658
659 AgentResponse response;
660 response.text_response = text;
661 response.provider = "openai";
662 response.model = config_.model;
663 return response;
664 } catch (const nlohmann::json::exception& e) {
665 return absl::InternalError(
666 absl::StrFormat("Failed to parse OpenAI response: %s", e.what()));
667 }
668}
669
670std::string BrowserAIService::ExtractTextFromCandidates(
671 const nlohmann::json& json) const {
672 if (!json.contains("candidates") || !json["candidates"].is_array() ||
673 json["candidates"].empty()) {
674 return "";
675 }
676
677 const auto& candidate = json["candidates"][0];
678
679 if (!candidate.contains("content") ||
680 !candidate["content"].contains("parts") ||
681 !candidate["content"]["parts"].is_array() ||
682 candidate["content"]["parts"].empty()) {
683 return "";
684 }
685
686 std::string result;
687 for (const auto& part : candidate["content"]["parts"]) {
688 if (part.contains("text")) {
689 result += part["text"].get<std::string>();
690 }
691 }
692
693 return result;
694}
695
696absl::Status BrowserAIService::CheckForApiError(
697 const nlohmann::json& json) const {
698 if (json.contains("error")) {
699 const auto& error = json["error"];
700 int code = error.value("code", 0);
701 std::string message = error.value("message", "Unknown error");
702 std::string status = error.value("status", "");
703
704 // Map common error codes to appropriate status codes
705 if (code == 400 || status == "INVALID_ARGUMENT") {
706 return absl::InvalidArgumentError(message);
707 } else if (code == 401 || status == "UNAUTHENTICATED") {
708 return absl::UnauthenticatedError(message);
709 } else if (code == 403 || status == "PERMISSION_DENIED") {
710 return absl::PermissionDeniedError(message);
711 } else if (code == 429 || status == "RESOURCE_EXHAUSTED") {
712 return absl::ResourceExhaustedError(message);
713 } else if (code == 503 || status == "UNAVAILABLE") {
714 return absl::UnavailableError(message);
715 } else {
716 return absl::InternalError(message);
717 }
718 }
719
720 return absl::OkStatus();
721}
722
723void BrowserAIService::LogDebug(const std::string& message) const {
724 if (config_.verbose) {
725 // Use console.log for browser debugging
726 EM_ASM({ console.log('[BrowserAIService] ' + UTF8ToString($0)); },
727 message.c_str());
728 }
729}
730
731} // namespace cli
732} // namespace yaze
733
734#endif // __EMSCRIPTEN__
std::string EscapeJson(const std::string &input)
Rom * rom()
Get the current ROM instance.