yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
browser_ai_service.cc
Go to the documentation of this file.
1#ifdef __EMSCRIPTEN__
2
4
5#include <emscripten.h>
6#include <sstream>
7
8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_join.h"
12#include "rom/rom.h" // Full definition needed for Rom member access
13
14namespace yaze {
15namespace cli {
16
17namespace {
18
19// Helper function to escape JSON strings
20std::string EscapeJson(const std::string& str) {
21 std::stringstream ss;
22 for (char c : str) {
23 switch (c) {
24 case '"':
25 ss << "\\\"";
26 break;
27 case '\\':
28 ss << "\\\\";
29 break;
30 case '\b':
31 ss << "\\b";
32 break;
33 case '\f':
34 ss << "\\f";
35 break;
36 case '\n':
37 ss << "\\n";
38 break;
39 case '\r':
40 ss << "\\r";
41 break;
42 case '\t':
43 ss << "\\t";
44 break;
45 default:
46 if (c < 0x20) {
47 ss << "\\u" << std::hex << std::setw(4) << std::setfill('0')
48 << static_cast<int>(c);
49 } else {
50 ss << c;
51 }
52 break;
53 }
54 }
55 return ss.str();
56}
57
58// Helper to convert chat history to Gemini format
59std::string ConvertHistoryToGeminiFormat(
60 const std::vector<agent::ChatMessage>& history) {
61 nlohmann::json contents = nlohmann::json::array();
62
63 for (const auto& msg : history) {
64 nlohmann::json part;
65 part["text"] = msg.message;
66
67 nlohmann::json content;
68 content["parts"] = nlohmann::json::array({part});
69 content["role"] =
70 (msg.sender == agent::ChatMessage::Sender::kUser) ? "user" : "model";
71
72 contents.push_back(content);
73 }
74
75 return contents.dump();
76}
77
78} // namespace
79
80BrowserAIService::BrowserAIService(
81 const BrowserAIConfig& config,
82 std::unique_ptr<net::IHttpClient> http_client)
83 : config_(config), http_client_(std::move(http_client)) {
84 // Normalize provider name
85 config_.provider = absl::AsciiStrToLower(config_.provider);
86 if (config_.provider.empty()) {
87 config_.provider = "gemini";
88 }
89 // Set sensible defaults per provider
90 if (config_.provider == "openai") {
91 if (config_.model.empty())
92 config_.model = "gpt-4o-mini";
93 if (config_.api_base.empty())
94 config_.api_base = kOpenAIApiBaseUrl;
95 } else {
96 if (config_.model.empty())
97 config_.model = "gemini-2.5-flash";
98 }
99
100 if (!http_client_) {
101 // This shouldn't happen in normal usage but handle gracefully
102 LogDebug("Warning: No HTTP client provided to BrowserAIService");
103 }
104
105 // Set timeout on HTTP client
106 if (http_client_) {
107 http_client_->SetTimeout(config_.timeout_seconds);
108 }
109
110 LogDebug(absl::StrFormat("BrowserAIService initialized with model: %s",
111 config_.model));
112}
113
114void BrowserAIService::SetRomContext(Rom* rom) {
115 std::lock_guard<std::mutex> lock(mutex_);
116 rom_ = rom;
117 if (rom_ && rom_->is_loaded()) {
118 // Add ROM-specific context to system instruction
119 config_.system_instruction = absl::StrFormat(
120 "You are assisting with ROM hacking for The Legend of Zelda: A Link to "
121 "the Past. "
122 "The ROM file '%s' is currently loaded. %s",
123 rom_->filename(), config_.system_instruction);
124 }
125}
126
127absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
128 const std::string& prompt) {
129 std::lock_guard<std::mutex> lock(mutex_);
130 if (!http_client_) {
131 return absl::FailedPreconditionError("HTTP client not initialized");
132 }
133
134 if (config_.api_key.empty()) {
135 return absl::InvalidArgumentError(
136 "API key not set. Please provide an API key.");
137 }
138
139 LogDebug(absl::StrFormat("Generating response for prompt: %s", prompt));
140
141 // Build API URL
142 std::string url = BuildApiUrl("generateContent");
143
144 // Build request body
145 std::string request_body;
146 if (config_.provider == "openai") {
147 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
148 url += "/chat/completions";
149 request_body = BuildOpenAIRequestBody(prompt, nullptr);
150 } else {
151 request_body = BuildRequestBody(prompt);
152 }
153
154 // Set headers
155 net::Headers headers;
156 headers["Content-Type"] = "application/json";
157 if (config_.provider == "openai") {
158 headers["Authorization"] = "Bearer " + config_.api_key;
159 }
160
161 // Make API request
162 auto response_or = http_client_->Post(url, request_body, headers);
163 if (!response_or.ok()) {
164 return absl::InternalError(absl::StrFormat("Failed to make API request: %s",
165 response_or.status().message()));
166 }
167
168 const auto& response = response_or.value();
169
170 // Check HTTP status
171 if (!response.IsSuccess()) {
172 if (response.IsClientError()) {
173 return absl::InvalidArgumentError(
174 absl::StrFormat("API request failed with status %d: %s",
175 response.status_code, response.body));
176 } else {
177 return absl::InternalError(absl::StrFormat(
178 "API server error %d: %s", response.status_code, response.body));
179 }
180 }
181
182 // Parse response
183 if (config_.provider == "openai") {
184 return ParseOpenAIResponse(response.body);
185 }
186 return ParseGeminiResponse(response.body);
187}
188
189absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
190 const std::vector<agent::ChatMessage>& history) {
191 std::lock_guard<std::mutex> lock(mutex_);
192 if (!http_client_) {
193 return absl::FailedPreconditionError("HTTP client not initialized");
194 }
195
196 if (config_.api_key.empty()) {
197 return absl::InvalidArgumentError(
198 "API key not set. Please provide an API key.");
199 }
200
201 if (history.empty()) {
202 return absl::InvalidArgumentError("Chat history cannot be empty");
203 }
204
205 LogDebug(
206 absl::StrFormat("Generating response from %zu messages", history.size()));
207
208 // Build API URL
209 std::string url = BuildApiUrl("generateContent");
210
211 std::string request_body;
212 if (config_.provider == "openai") {
213 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
214 url += "/chat/completions";
215 request_body = BuildOpenAIRequestBody("", &history);
216 } else {
217 // Convert history to Gemini format and build request
218 nlohmann::json request;
219 request["contents"] =
220 nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
221
222 // Add generation config
223 request["generationConfig"]["temperature"] = config_.temperature;
224 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
225
226 // Add system instruction if provided
227 if (!config_.system_instruction.empty()) {
228 request["systemInstruction"]["parts"][0]["text"] =
229 config_.system_instruction;
230 }
231
232 request_body = request.dump();
233 }
234
235 // Set headers
236 net::Headers headers;
237 headers["Content-Type"] = "application/json";
238 if (config_.provider == "openai") {
239 headers["Authorization"] = "Bearer " + config_.api_key;
240 }
241
242 // Make API request
243 auto response_or = http_client_->Post(url, request_body, headers);
244 if (!response_or.ok()) {
245 return absl::InternalError(absl::StrFormat("Failed to make API request: %s",
246 response_or.status().message()));
247 }
248
249 const auto& response = response_or.value();
250
251 // Check HTTP status
252 if (!response.IsSuccess()) {
253 return absl::InternalError(
254 absl::StrFormat("API request failed with status %d: %s",
255 response.status_code, response.body));
256 }
257
258 // Parse response
259 if (config_.provider == "openai") {
260 return ParseOpenAIResponse(response.body);
261 }
262 return ParseGeminiResponse(response.body);
263}
264
265absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
266 std::lock_guard<std::mutex> lock(mutex_);
267 // For browser context, return curated lists for configured provider
268 std::vector<ModelInfo> models;
269
270 const std::string provider =
271 config_.provider.empty() ? "gemini" : config_.provider;
272
273 if (provider == "openai") {
274 models.push_back({.name = "gpt-4o-mini",
275 .display_name = "GPT-4o Mini",
276 .provider = "openai",
277 .description = "Fast/cheap OpenAI model",
278 .family = "gpt-4o",
279 .is_local = false});
280 models.push_back({.name = "gpt-4o",
281 .display_name = "GPT-4o",
282 .provider = "openai",
283 .description = "Balanced OpenAI flagship model",
284 .family = "gpt-4o",
285 .is_local = false});
286 models.push_back({.name = "gpt-4.1-mini",
287 .display_name = "GPT-4.1 Mini",
288 .provider = "openai",
289 .description = "Lightweight 4.1 variant",
290 .family = "gpt-4.1",
291 .is_local = false});
292 } else {
293 models.push_back(
294 {.name = "gemini-2.5-flash",
295 .display_name = "Gemini 2.0 Flash (Experimental)",
296 .provider = "gemini",
297 .description = "Fastest Gemini model with experimental features",
298 .family = "gemini",
299 .is_local = false});
300
301 models.push_back({.name = "gemini-1.5-flash",
302 .display_name = "Gemini 1.5 Flash",
303 .provider = "gemini",
304 .description = "Fast and efficient for most tasks",
305 .family = "gemini",
306 .is_local = false});
307
308 models.push_back({.name = "gemini-1.5-flash-8b",
309 .display_name = "Gemini 1.5 Flash 8B",
310 .provider = "gemini",
311 .description = "Smaller, faster variant of Flash",
312 .family = "gemini",
313 .parameter_size = "8B",
314 .is_local = false});
315
316 models.push_back({.name = "gemini-1.5-pro",
317 .display_name = "Gemini 1.5 Pro",
318 .provider = "gemini",
319 .description = "Most capable model for complex tasks",
320 .family = "gemini",
321 .is_local = false});
322 }
323
324 return models;
325}
326
327absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
328 const std::string& image_data, const std::string& prompt) {
329 std::lock_guard<std::mutex> lock(mutex_);
330 if (!http_client_) {
331 return absl::FailedPreconditionError("HTTP client not initialized");
332 }
333
334 if (config_.provider == "openai") {
335 return absl::UnimplementedError(
336 "Image analysis not yet supported for OpenAI in WASM build");
337 }
338
339 if (config_.api_key.empty()) {
340 return absl::InvalidArgumentError(
341 "API key not set. Please provide a Gemini API key.");
342 }
343
344 LogDebug(absl::StrFormat("Analyzing image with prompt: %s", prompt));
345
346 // Build API URL
347 std::string url = BuildApiUrl("generateContent");
348
349 // Determine MIME type from image data prefix if present
350 std::string mime_type = "image/png"; // Default
351 if (image_data.find("data:image/jpeg") == 0 ||
352 image_data.find("data:image/jpg") == 0) {
353 mime_type = "image/jpeg";
354 }
355
356 // Strip data URL prefix if present
357 std::string clean_image_data = image_data;
358 size_t comma_pos = image_data.find(',');
359 if (comma_pos != std::string::npos && image_data.find("data:") == 0) {
360 clean_image_data = image_data.substr(comma_pos + 1);
361 }
362
363 // Build multimodal request
364 std::string request_body =
365 BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
366
367 // Set headers
368 net::Headers headers;
369 headers["Content-Type"] = "application/json";
370
371 // Make API request
372 auto response_or = http_client_->Post(url, request_body, headers);
373 if (!response_or.ok()) {
374 return absl::InternalError(absl::StrFormat("Failed to make API request: %s",
375 response_or.status().message()));
376 }
377
378 const auto& response = response_or.value();
379
380 // Check HTTP status
381 if (!response.IsSuccess()) {
382 return absl::InternalError(
383 absl::StrFormat("API request failed with status %d: %s",
384 response.status_code, response.body));
385 }
386
387 // Parse response
388 return ParseGeminiResponse(response.body);
389}
390
391absl::Status BrowserAIService::CheckAvailability() {
392 std::lock_guard<std::mutex> lock(mutex_);
393 if (!http_client_) {
394 return absl::FailedPreconditionError("HTTP client not initialized");
395 }
396
397 if (config_.api_key.empty()) {
398 return absl::InvalidArgumentError("API key not set");
399 }
400
401 net::Headers headers;
402 std::string url;
403
404 if (config_.provider == "openai") {
405 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
406 if (!url.empty() && url.back() == '/')
407 url.pop_back();
408 url += "/models";
409 headers["Authorization"] = "Bearer " + config_.api_key;
410 } else {
411 url = absl::StrFormat("%s%s?key=%s", kGeminiApiBaseUrl, config_.model,
412 config_.api_key);
413 }
414
415 auto response_or = http_client_->Get(url, headers);
416
417 if (!response_or.ok()) {
418 return absl::UnavailableError(
419 absl::StrFormat("Cannot reach %s API: %s", config_.provider,
420 response_or.status().message()));
421 }
422
423 const auto& response = response_or.value();
424 if (!response.IsSuccess()) {
425 if (response.status_code == 401 || response.status_code == 403) {
426 return absl::PermissionDeniedError("Invalid API key");
427 }
428 return absl::UnavailableError(absl::StrFormat(
429 "%s API returned error %d", config_.provider, response.status_code));
430 }
431
432 return absl::OkStatus();
433}
434
435void BrowserAIService::UpdateApiKey(const std::string& api_key) {
436 std::lock_guard<std::mutex> lock(mutex_);
437 config_.api_key = api_key;
438
439 // Store in sessionStorage for this session
440 // Note: This is handled by the secure storage module
441 LogDebug("API key updated");
442}
443
444std::string BrowserAIService::BuildApiUrl(const std::string& endpoint) const {
445 if (config_.provider == "openai") {
446 std::string base =
447 config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
448 if (!base.empty() && base.back() == '/') {
449 base.pop_back();
450 }
451 return absl::StrFormat("%s/%s", base, endpoint);
452 }
453
454 return absl::StrFormat("%s%s:%s?key=%s", kGeminiApiBaseUrl, config_.model,
455 endpoint, config_.api_key);
456}
457
458std::string BrowserAIService::BuildRequestBody(const std::string& prompt,
459 bool include_system) const {
460 nlohmann::json request;
461
462 // Build contents array with user prompt
463 nlohmann::json user_part;
464 user_part["text"] = prompt;
465
466 nlohmann::json user_content;
467 user_content["parts"] = nlohmann::json::array({user_part});
468 user_content["role"] = "user";
469
470 request["contents"] = nlohmann::json::array({user_content});
471
472 // Add generation config
473 request["generationConfig"]["temperature"] = config_.temperature;
474 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
475
476 // Add system instruction if provided and requested
477 if (include_system && !config_.system_instruction.empty()) {
478 nlohmann::json system_part;
479 system_part["text"] = config_.system_instruction;
480 request["systemInstruction"]["parts"] =
481 nlohmann::json::array({system_part});
482 }
483
484 return request.dump();
485}
486
487std::string BrowserAIService::BuildMultimodalRequestBody(
488 const std::string& prompt, const std::string& image_data,
489 const std::string& mime_type) const {
490 nlohmann::json request;
491
492 // Build parts array with text and image
493 nlohmann::json text_part;
494 text_part["text"] = prompt;
495
496 nlohmann::json image_part;
497 image_part["inline_data"]["mime_type"] = mime_type;
498 image_part["inline_data"]["data"] = image_data;
499
500 nlohmann::json content;
501 content["parts"] = nlohmann::json::array({text_part, image_part});
502 content["role"] = "user";
503
504 request["contents"] = nlohmann::json::array({content});
505
506 // Add generation config
507 request["generationConfig"]["temperature"] = config_.temperature;
508 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
509
510 // Add system instruction if provided
511 if (!config_.system_instruction.empty()) {
512 nlohmann::json system_part;
513 system_part["text"] = config_.system_instruction;
514 request["systemInstruction"]["parts"] =
515 nlohmann::json::array({system_part});
516 }
517
518 return request.dump();
519}
520
521std::string BrowserAIService::BuildOpenAIRequestBody(
522 const std::string& prompt,
523 const std::vector<agent::ChatMessage>* history) const {
524 nlohmann::json request;
525 request["model"] = config_.model.empty() ? "gpt-4o-mini" : config_.model;
526
527 nlohmann::json messages = nlohmann::json::array();
528 if (!config_.system_instruction.empty()) {
529 messages.push_back(
530 {{"role", "system"}, {"content", config_.system_instruction}});
531 }
532
533 if (history && !history->empty()) {
534 for (const auto& msg : *history) {
535 messages.push_back(
536 {{"role", msg.sender == agent::ChatMessage::Sender::kUser
537 ? "user"
538 : "assistant"},
539 {"content", msg.message}});
540 }
541 } else if (!prompt.empty()) {
542 messages.push_back({{"role", "user"}, {"content", prompt}});
543 }
544
545 request["messages"] = messages;
546 request["temperature"] = config_.temperature;
547 request["max_tokens"] = config_.max_output_tokens;
548
549 return request.dump();
550}
551
552absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
553 const std::string& response_body) const {
554 try {
555 nlohmann::json json = nlohmann::json::parse(response_body);
556
557 // Check for API errors
558 auto error_status = CheckForApiError(json);
559 if (!error_status.ok()) {
560 return error_status;
561 }
562
563 // Extract text from candidates
564 std::string text_content = ExtractTextFromCandidates(json);
565
566 if (text_content.empty()) {
567 return absl::InternalError("Empty response from Gemini API");
568 }
569
570 // Build agent response
571 AgentResponse response;
572 response.text_response = text_content;
573 response.provider = "gemini";
574 response.model = config_.model;
575
576 // Add any safety ratings or filters as warnings
577 if (json.contains("promptFeedback") &&
578 json["promptFeedback"].contains("safetyRatings")) {
579 for (const auto& rating : json["promptFeedback"]["safetyRatings"]) {
580 if (rating.contains("probability") &&
581 rating["probability"] != "NEGLIGIBLE" &&
582 rating["probability"] != "LOW") {
583 response.warnings.push_back(absl::StrFormat(
584 "Content flagged: %s (%s)", rating.value("category", "unknown"),
585 rating.value("probability", "unknown")));
586 }
587 }
588 }
589
590 LogDebug(absl::StrFormat("Successfully parsed response with %zu characters",
591 text_content.length()));
592
593 return response;
594
595 } catch (const nlohmann::json::exception& e) {
596 return absl::InternalError(
597 absl::StrFormat("Failed to parse Gemini response: %s", e.what()));
598 }
599}
600
601absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
602 const std::string& response_body) const {
603 try {
604 nlohmann::json json = nlohmann::json::parse(response_body);
605
606 if (json.contains("error")) {
607 const auto& err = json["error"];
608 std::string message = err.value("message", "Unknown error");
609 int code = err.value("code", 0);
610 if (code == 401 || code == 403)
611 return absl::UnauthenticatedError(message);
612 if (code == 429)
613 return absl::ResourceExhaustedError(message);
614 return absl::InternalError(message);
615 }
616
617 if (!json.contains("choices") || !json["choices"].is_array() ||
618 json["choices"].empty()) {
619 return absl::InternalError("Empty response from OpenAI API");
620 }
621
622 const auto& choice = json["choices"][0];
623 if (!choice.contains("message") || !choice["message"].contains("content")) {
624 return absl::InternalError("Malformed OpenAI response");
625 }
626
627 std::string text = choice["message"]["content"].get<std::string>();
628 if (text.empty()) {
629 return absl::InternalError("OpenAI returned empty content");
630 }
631
632 AgentResponse response;
633 response.text_response = text;
634 response.provider = "openai";
635 response.model = config_.model;
636 return response;
637 } catch (const nlohmann::json::exception& e) {
638 return absl::InternalError(
639 absl::StrFormat("Failed to parse OpenAI response: %s", e.what()));
640 }
641}
642
643std::string BrowserAIService::ExtractTextFromCandidates(
644 const nlohmann::json& json) const {
645 if (!json.contains("candidates") || !json["candidates"].is_array() ||
646 json["candidates"].empty()) {
647 return "";
648 }
649
650 const auto& candidate = json["candidates"][0];
651
652 if (!candidate.contains("content") ||
653 !candidate["content"].contains("parts") ||
654 !candidate["content"]["parts"].is_array() ||
655 candidate["content"]["parts"].empty()) {
656 return "";
657 }
658
659 std::string result;
660 for (const auto& part : candidate["content"]["parts"]) {
661 if (part.contains("text")) {
662 result += part["text"].get<std::string>();
663 }
664 }
665
666 return result;
667}
668
669absl::Status BrowserAIService::CheckForApiError(
670 const nlohmann::json& json) const {
671 if (json.contains("error")) {
672 const auto& error = json["error"];
673 int code = error.value("code", 0);
674 std::string message = error.value("message", "Unknown error");
675 std::string status = error.value("status", "");
676
677 // Map common error codes to appropriate status codes
678 if (code == 400 || status == "INVALID_ARGUMENT") {
679 return absl::InvalidArgumentError(message);
680 } else if (code == 401 || status == "UNAUTHENTICATED") {
681 return absl::UnauthenticatedError(message);
682 } else if (code == 403 || status == "PERMISSION_DENIED") {
683 return absl::PermissionDeniedError(message);
684 } else if (code == 429 || status == "RESOURCE_EXHAUSTED") {
685 return absl::ResourceExhaustedError(message);
686 } else if (code == 503 || status == "UNAVAILABLE") {
687 return absl::UnavailableError(message);
688 } else {
689 return absl::InternalError(message);
690 }
691 }
692
693 return absl::OkStatus();
694}
695
696void BrowserAIService::LogDebug(const std::string& message) const {
697 if (config_.verbose) {
698 // Use console.log for browser debugging
699 EM_ASM({ console.log('[BrowserAIService] ' + UTF8ToString($0)); },
700 message.c_str());
701 }
702}
703
704} // namespace cli
705} // namespace yaze
706
707#endif // __EMSCRIPTEN__
std::string EscapeJson(const std::string &input)