yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
browser_ai_service.cc
Go to the documentation of this file.
1#ifdef __EMSCRIPTEN__
2
4
5#include <emscripten.h>
6#include <sstream>
7
8#include "absl/strings/ascii.h"
9#include "absl/strings/str_format.h"
10#include "absl/strings/str_join.h"
11#include "rom/rom.h" // Full definition needed for Rom member access
13
14namespace yaze {
15namespace cli {
16
17namespace {
18
19// Helper function to escape JSON strings
20std::string EscapeJson(const std::string& str) {
21 std::stringstream ss;
22 for (char c : str) {
23 switch (c) {
24 case '"':
25 ss << "\\\"";
26 break;
27 case '\\':
28 ss << "\\\\";
29 break;
30 case '\b':
31 ss << "\\b";
32 break;
33 case '\f':
34 ss << "\\f";
35 break;
36 case '\n':
37 ss << "\\n";
38 break;
39 case '\r':
40 ss << "\\r";
41 break;
42 case '\t':
43 ss << "\\t";
44 break;
45 default:
46 if (c < 0x20) {
47 ss << "\\u" << std::hex << std::setw(4) << std::setfill('0')
48 << static_cast<int>(c);
49 } else {
50 ss << c;
51 }
52 break;
53 }
54 }
55 return ss.str();
56}
57
58// Helper to convert chat history to Gemini format
59std::string ConvertHistoryToGeminiFormat(
60 const std::vector<agent::ChatMessage>& history) {
61 nlohmann::json contents = nlohmann::json::array();
62
63 for (const auto& msg : history) {
64 nlohmann::json part;
65 part["text"] = msg.message;
66
67 nlohmann::json content;
68 content["parts"] = nlohmann::json::array({part});
69 content["role"] = (msg.sender == agent::ChatMessage::Sender::kUser) ? "user" : "model";
70
71 contents.push_back(content);
72 }
73
74 return contents.dump();
75}
76
77} // namespace
78
79BrowserAIService::BrowserAIService(const BrowserAIConfig& config,
80 std::unique_ptr<net::IHttpClient> http_client)
81 : config_(config), http_client_(std::move(http_client)) {
82 // Normalize provider name
83 config_.provider = absl::AsciiStrToLower(config_.provider);
84 if (config_.provider.empty()) {
85 config_.provider = "gemini";
86 }
87 // Set sensible defaults per provider
88 if (config_.provider == "openai") {
89 if (config_.model.empty()) config_.model = "gpt-4o-mini";
90 if (config_.api_base.empty()) config_.api_base = kOpenAIApiBaseUrl;
91 } else {
92 if (config_.model.empty()) config_.model = "gemini-2.5-flash";
93 }
94
95 if (!http_client_) {
96 // This shouldn't happen in normal usage but handle gracefully
97 LogDebug("Warning: No HTTP client provided to BrowserAIService");
98 }
99
100 // Set timeout on HTTP client
101 if (http_client_) {
102 http_client_->SetTimeout(config_.timeout_seconds);
103 }
104
105 LogDebug(absl::StrFormat("BrowserAIService initialized with model: %s",
106 config_.model));
107}
108
109void BrowserAIService::SetRomContext(Rom* rom) {
110 std::lock_guard<std::mutex> lock(mutex_);
111 rom_ = rom;
112 if (rom_ && rom_->is_loaded()) {
113 // Add ROM-specific context to system instruction
114 config_.system_instruction = absl::StrFormat(
115 "You are assisting with ROM hacking for The Legend of Zelda: A Link to the Past. "
116 "The ROM file '%s' is currently loaded. %s",
117 rom_->filename(), config_.system_instruction);
118 }
119}
120
121absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
122 const std::string& prompt) {
123 std::lock_guard<std::mutex> lock(mutex_);
124 if (!http_client_) {
125 return absl::FailedPreconditionError("HTTP client not initialized");
126 }
127
128 if (config_.api_key.empty()) {
129 return absl::InvalidArgumentError(
130 "API key not set. Please provide an API key.");
131 }
132
133 LogDebug(absl::StrFormat("Generating response for prompt: %s", prompt));
134
135 // Build API URL
136 std::string url = BuildApiUrl("generateContent");
137
138 // Build request body
139 std::string request_body;
140 if (config_.provider == "openai") {
141 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
142 url += "/chat/completions";
143 request_body = BuildOpenAIRequestBody(prompt, nullptr);
144 } else {
145 request_body = BuildRequestBody(prompt);
146 }
147
148 // Set headers
149 net::Headers headers;
150 headers["Content-Type"] = "application/json";
151 if (config_.provider == "openai") {
152 headers["Authorization"] = "Bearer " + config_.api_key;
153 }
154
155 // Make API request
156 auto response_or = http_client_->Post(url, request_body, headers);
157 if (!response_or.ok()) {
158 return absl::InternalError(
159 absl::StrFormat("Failed to make API request: %s",
160 response_or.status().message()));
161 }
162
163 const auto& response = response_or.value();
164
165 // Check HTTP status
166 if (!response.IsSuccess()) {
167 if (response.IsClientError()) {
168 return absl::InvalidArgumentError(
169 absl::StrFormat("API request failed with status %d: %s",
170 response.status_code, response.body));
171 } else {
172 return absl::InternalError(
173 absl::StrFormat("API server error %d: %s",
174 response.status_code, response.body));
175 }
176 }
177
178 // Parse response
179 if (config_.provider == "openai") {
180 return ParseOpenAIResponse(response.body);
181 }
182 return ParseGeminiResponse(response.body);
183}
184
185absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
186 const std::vector<agent::ChatMessage>& history) {
187 std::lock_guard<std::mutex> lock(mutex_);
188 if (!http_client_) {
189 return absl::FailedPreconditionError("HTTP client not initialized");
190 }
191
192 if (config_.api_key.empty()) {
193 return absl::InvalidArgumentError(
194 "API key not set. Please provide an API key.");
195 }
196
197 if (history.empty()) {
198 return absl::InvalidArgumentError("Chat history cannot be empty");
199 }
200
201 LogDebug(absl::StrFormat("Generating response from %zu messages", history.size()));
202
203 // Build API URL
204 std::string url = BuildApiUrl("generateContent");
205
206 std::string request_body;
207 if (config_.provider == "openai") {
208 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
209 url += "/chat/completions";
210 request_body = BuildOpenAIRequestBody("", &history);
211 } else {
212 // Convert history to Gemini format and build request
213 nlohmann::json request;
214 request["contents"] = nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
215
216 // Add generation config
217 request["generationConfig"]["temperature"] = config_.temperature;
218 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
219
220 // Add system instruction if provided
221 if (!config_.system_instruction.empty()) {
222 request["systemInstruction"]["parts"][0]["text"] = config_.system_instruction;
223 }
224
225 request_body = request.dump();
226 }
227
228 // Set headers
229 net::Headers headers;
230 headers["Content-Type"] = "application/json";
231 if (config_.provider == "openai") {
232 headers["Authorization"] = "Bearer " + config_.api_key;
233 }
234
235 // Make API request
236 auto response_or = http_client_->Post(url, request_body, headers);
237 if (!response_or.ok()) {
238 return absl::InternalError(
239 absl::StrFormat("Failed to make API request: %s",
240 response_or.status().message()));
241 }
242
243 const auto& response = response_or.value();
244
245 // Check HTTP status
246 if (!response.IsSuccess()) {
247 return absl::InternalError(
248 absl::StrFormat("API request failed with status %d: %s",
249 response.status_code, response.body));
250 }
251
252 // Parse response
253 if (config_.provider == "openai") {
254 return ParseOpenAIResponse(response.body);
255 }
256 return ParseGeminiResponse(response.body);
257}
258
259absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
260 std::lock_guard<std::mutex> lock(mutex_);
261 // For browser context, return curated lists for configured provider
262 std::vector<ModelInfo> models;
263
264 const std::string provider = config_.provider.empty() ? "gemini" : config_.provider;
265
266 if (provider == "openai") {
267 models.push_back({
268 .name = "gpt-4o-mini",
269 .display_name = "GPT-4o Mini",
270 .provider = "openai",
271 .description = "Fast/cheap OpenAI model",
272 .family = "gpt-4o",
273 .is_local = false
274 });
275 models.push_back({
276 .name = "gpt-4o",
277 .display_name = "GPT-4o",
278 .provider = "openai",
279 .description = "Balanced OpenAI flagship model",
280 .family = "gpt-4o",
281 .is_local = false
282 });
283 models.push_back({
284 .name = "gpt-4.1-mini",
285 .display_name = "GPT-4.1 Mini",
286 .provider = "openai",
287 .description = "Lightweight 4.1 variant",
288 .family = "gpt-4.1",
289 .is_local = false
290 });
291 } else {
292 models.push_back({
293 .name = "gemini-2.5-flash",
294 .display_name = "Gemini 2.0 Flash (Experimental)",
295 .provider = "gemini",
296 .description = "Fastest Gemini model with experimental features",
297 .family = "gemini",
298 .is_local = false
299 });
300
301 models.push_back({
302 .name = "gemini-1.5-flash",
303 .display_name = "Gemini 1.5 Flash",
304 .provider = "gemini",
305 .description = "Fast and efficient for most tasks",
306 .family = "gemini",
307 .is_local = false
308 });
309
310 models.push_back({
311 .name = "gemini-1.5-flash-8b",
312 .display_name = "Gemini 1.5 Flash 8B",
313 .provider = "gemini",
314 .description = "Smaller, faster variant of Flash",
315 .family = "gemini",
316 .parameter_size = "8B",
317 .is_local = false
318 });
319
320 models.push_back({
321 .name = "gemini-1.5-pro",
322 .display_name = "Gemini 1.5 Pro",
323 .provider = "gemini",
324 .description = "Most capable model for complex tasks",
325 .family = "gemini",
326 .is_local = false
327 });
328 }
329
330 return models;
331}
332
333absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
334 const std::string& image_data,
335 const std::string& prompt) {
336 std::lock_guard<std::mutex> lock(mutex_);
337 if (!http_client_) {
338 return absl::FailedPreconditionError("HTTP client not initialized");
339 }
340
341 if (config_.provider == "openai") {
342 return absl::UnimplementedError(
343 "Image analysis not yet supported for OpenAI in WASM build");
344 }
345
346 if (config_.api_key.empty()) {
347 return absl::InvalidArgumentError(
348 "API key not set. Please provide a Gemini API key.");
349 }
350
351 LogDebug(absl::StrFormat("Analyzing image with prompt: %s", prompt));
352
353 // Build API URL
354 std::string url = BuildApiUrl("generateContent");
355
356 // Determine MIME type from image data prefix if present
357 std::string mime_type = "image/png"; // Default
358 if (image_data.find("data:image/jpeg") == 0 ||
359 image_data.find("data:image/jpg") == 0) {
360 mime_type = "image/jpeg";
361 }
362
363 // Strip data URL prefix if present
364 std::string clean_image_data = image_data;
365 size_t comma_pos = image_data.find(',');
366 if (comma_pos != std::string::npos && image_data.find("data:") == 0) {
367 clean_image_data = image_data.substr(comma_pos + 1);
368 }
369
370 // Build multimodal request
371 std::string request_body = BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
372
373 // Set headers
374 net::Headers headers;
375 headers["Content-Type"] = "application/json";
376
377 // Make API request
378 auto response_or = http_client_->Post(url, request_body, headers);
379 if (!response_or.ok()) {
380 return absl::InternalError(
381 absl::StrFormat("Failed to make API request: %s",
382 response_or.status().message()));
383 }
384
385 const auto& response = response_or.value();
386
387 // Check HTTP status
388 if (!response.IsSuccess()) {
389 return absl::InternalError(
390 absl::StrFormat("API request failed with status %d: %s",
391 response.status_code, response.body));
392 }
393
394 // Parse response
395 return ParseGeminiResponse(response.body);
396}
397
398absl::Status BrowserAIService::CheckAvailability() {
399 std::lock_guard<std::mutex> lock(mutex_);
400 if (!http_client_) {
401 return absl::FailedPreconditionError("HTTP client not initialized");
402 }
403
404 if (config_.api_key.empty()) {
405 return absl::InvalidArgumentError("API key not set");
406 }
407
408 net::Headers headers;
409 std::string url;
410
411 if (config_.provider == "openai") {
412 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
413 if (!url.empty() && url.back() == '/') url.pop_back();
414 url += "/models";
415 headers["Authorization"] = "Bearer " + config_.api_key;
416 } else {
417 url = absl::StrFormat("%s%s?key=%s",
418 kGeminiApiBaseUrl,
419 config_.model,
420 config_.api_key);
421 }
422
423 auto response_or = http_client_->Get(url, headers);
424
425 if (!response_or.ok()) {
426 return absl::UnavailableError(
427 absl::StrFormat("Cannot reach %s API: %s",
428 config_.provider,
429 response_or.status().message()));
430 }
431
432 const auto& response = response_or.value();
433 if (!response.IsSuccess()) {
434 if (response.status_code == 401 || response.status_code == 403) {
435 return absl::PermissionDeniedError("Invalid API key");
436 }
437 return absl::UnavailableError(
438 absl::StrFormat("%s API returned error %d",
439 config_.provider,
440 response.status_code));
441 }
442
443 return absl::OkStatus();
444}
445
446void BrowserAIService::UpdateApiKey(const std::string& api_key) {
447 std::lock_guard<std::mutex> lock(mutex_);
448 config_.api_key = api_key;
449
450 // Store in sessionStorage for this session
451 // Note: This is handled by the secure storage module
452 LogDebug("API key updated");
453}
454
455std::string BrowserAIService::BuildApiUrl(const std::string& endpoint) const {
456 if (config_.provider == "openai") {
457 std::string base = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
458 if (!base.empty() && base.back() == '/') {
459 base.pop_back();
460 }
461 return absl::StrFormat("%s/%s", base, endpoint);
462 }
463
464 return absl::StrFormat("%s%s:%s?key=%s",
465 kGeminiApiBaseUrl,
466 config_.model,
467 endpoint,
468 config_.api_key);
469}
470
471std::string BrowserAIService::BuildRequestBody(const std::string& prompt,
472 bool include_system) const {
473 nlohmann::json request;
474
475 // Build contents array with user prompt
476 nlohmann::json user_part;
477 user_part["text"] = prompt;
478
479 nlohmann::json user_content;
480 user_content["parts"] = nlohmann::json::array({user_part});
481 user_content["role"] = "user";
482
483 request["contents"] = nlohmann::json::array({user_content});
484
485 // Add generation config
486 request["generationConfig"]["temperature"] = config_.temperature;
487 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
488
489 // Add system instruction if provided and requested
490 if (include_system && !config_.system_instruction.empty()) {
491 nlohmann::json system_part;
492 system_part["text"] = config_.system_instruction;
493 request["systemInstruction"]["parts"] = nlohmann::json::array({system_part});
494 }
495
496 return request.dump();
497}
498
499std::string BrowserAIService::BuildMultimodalRequestBody(
500 const std::string& prompt,
501 const std::string& image_data,
502 const std::string& mime_type) const {
503 nlohmann::json request;
504
505 // Build parts array with text and image
506 nlohmann::json text_part;
507 text_part["text"] = prompt;
508
509 nlohmann::json image_part;
510 image_part["inline_data"]["mime_type"] = mime_type;
511 image_part["inline_data"]["data"] = image_data;
512
513 nlohmann::json content;
514 content["parts"] = nlohmann::json::array({text_part, image_part});
515 content["role"] = "user";
516
517 request["contents"] = nlohmann::json::array({content});
518
519 // Add generation config
520 request["generationConfig"]["temperature"] = config_.temperature;
521 request["generationConfig"]["maxOutputTokens"] = config_.max_output_tokens;
522
523 // Add system instruction if provided
524 if (!config_.system_instruction.empty()) {
525 nlohmann::json system_part;
526 system_part["text"] = config_.system_instruction;
527 request["systemInstruction"]["parts"] = nlohmann::json::array({system_part});
528 }
529
530 return request.dump();
531}
532
533std::string BrowserAIService::BuildOpenAIRequestBody(
534 const std::string& prompt,
535 const std::vector<agent::ChatMessage>* history) const {
536 nlohmann::json request;
537 request["model"] = config_.model.empty() ? "gpt-4o-mini" : config_.model;
538
539 nlohmann::json messages = nlohmann::json::array();
540 if (!config_.system_instruction.empty()) {
541 messages.push_back({{"role", "system"},
542 {"content", config_.system_instruction}});
543 }
544
545 if (history && !history->empty()) {
546 for (const auto& msg : *history) {
547 messages.push_back({
548 {"role", msg.sender == agent::ChatMessage::Sender::kUser ? "user"
549 : "assistant"},
550 {"content", msg.message}});
551 }
552 } else if (!prompt.empty()) {
553 messages.push_back({{"role", "user"}, {"content", prompt}});
554 }
555
556 request["messages"] = messages;
557 request["temperature"] = config_.temperature;
558 request["max_tokens"] = config_.max_output_tokens;
559
560 return request.dump();
561}
562
563absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
564 const std::string& response_body) const {
565 try {
566 nlohmann::json json = nlohmann::json::parse(response_body);
567
568 // Check for API errors
569 auto error_status = CheckForApiError(json);
570 if (!error_status.ok()) {
571 return error_status;
572 }
573
574 // Extract text from candidates
575 std::string text_content = ExtractTextFromCandidates(json);
576
577 if (text_content.empty()) {
578 return absl::InternalError("Empty response from Gemini API");
579 }
580
581 // Build agent response
582 AgentResponse response;
583 response.text_response = text_content;
584 response.provider = "gemini";
585 response.model = config_.model;
586
587 // Add any safety ratings or filters as warnings
588 if (json.contains("promptFeedback") &&
589 json["promptFeedback"].contains("safetyRatings")) {
590 for (const auto& rating : json["promptFeedback"]["safetyRatings"]) {
591 if (rating.contains("probability") &&
592 rating["probability"] != "NEGLIGIBLE" &&
593 rating["probability"] != "LOW") {
594 response.warnings.push_back(
595 absl::StrFormat("Content flagged: %s (%s)",
596 rating.value("category", "unknown"),
597 rating.value("probability", "unknown")));
598 }
599 }
600 }
601
602 LogDebug(absl::StrFormat("Successfully parsed response with %zu characters",
603 text_content.length()));
604
605 return response;
606
607 } catch (const nlohmann::json::exception& e) {
608 return absl::InternalError(
609 absl::StrFormat("Failed to parse Gemini response: %s", e.what()));
610 }
611}
612
613absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
614 const std::string& response_body) const {
615 try {
616 nlohmann::json json = nlohmann::json::parse(response_body);
617
618 if (json.contains("error")) {
619 const auto& err = json["error"];
620 std::string message = err.value("message", "Unknown error");
621 int code = err.value("code", 0);
622 if (code == 401 || code == 403) return absl::UnauthenticatedError(message);
623 if (code == 429) return absl::ResourceExhaustedError(message);
624 return absl::InternalError(message);
625 }
626
627 if (!json.contains("choices") || !json["choices"].is_array() ||
628 json["choices"].empty()) {
629 return absl::InternalError("Empty response from OpenAI API");
630 }
631
632 const auto& choice = json["choices"][0];
633 if (!choice.contains("message") || !choice["message"].contains("content")) {
634 return absl::InternalError("Malformed OpenAI response");
635 }
636
637 std::string text = choice["message"]["content"].get<std::string>();
638 if (text.empty()) {
639 return absl::InternalError("OpenAI returned empty content");
640 }
641
642 AgentResponse response;
643 response.text_response = text;
644 response.provider = "openai";
645 response.model = config_.model;
646 return response;
647 } catch (const nlohmann::json::exception& e) {
648 return absl::InternalError(
649 absl::StrFormat("Failed to parse OpenAI response: %s", e.what()));
650 }
651}
652
653std::string BrowserAIService::ExtractTextFromCandidates(
654 const nlohmann::json& json) const {
655 if (!json.contains("candidates") || !json["candidates"].is_array() ||
656 json["candidates"].empty()) {
657 return "";
658 }
659
660 const auto& candidate = json["candidates"][0];
661
662 if (!candidate.contains("content") ||
663 !candidate["content"].contains("parts") ||
664 !candidate["content"]["parts"].is_array() ||
665 candidate["content"]["parts"].empty()) {
666 return "";
667 }
668
669 std::string result;
670 for (const auto& part : candidate["content"]["parts"]) {
671 if (part.contains("text")) {
672 result += part["text"].get<std::string>();
673 }
674 }
675
676 return result;
677}
678
679absl::Status BrowserAIService::CheckForApiError(
680 const nlohmann::json& json) const {
681 if (json.contains("error")) {
682 const auto& error = json["error"];
683 int code = error.value("code", 0);
684 std::string message = error.value("message", "Unknown error");
685 std::string status = error.value("status", "");
686
687 // Map common error codes to appropriate status codes
688 if (code == 400 || status == "INVALID_ARGUMENT") {
689 return absl::InvalidArgumentError(message);
690 } else if (code == 401 || status == "UNAUTHENTICATED") {
691 return absl::UnauthenticatedError(message);
692 } else if (code == 403 || status == "PERMISSION_DENIED") {
693 return absl::PermissionDeniedError(message);
694 } else if (code == 429 || status == "RESOURCE_EXHAUSTED") {
695 return absl::ResourceExhaustedError(message);
696 } else if (code == 503 || status == "UNAVAILABLE") {
697 return absl::UnavailableError(message);
698 } else {
699 return absl::InternalError(message);
700 }
701 }
702
703 return absl::OkStatus();
704}
705
706void BrowserAIService::LogDebug(const std::string& message) const {
707 if (config_.verbose) {
708 // Use console.log for browser debugging
709 EM_ASM({
710 console.log('[BrowserAIService] ' + UTF8ToString($0));
711 }, message.c_str());
712 }
713}
714
715} // namespace cli
716} // namespace yaze
717
718#endif // __EMSCRIPTEN__