yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
ollama_ai_service.cc
Go to the documentation of this file.
2
3#include <cstdlib>
4#include <iostream>
5
6#include "absl/strings/str_cat.h"
7#include "absl/strings/str_format.h"
8#include "absl/time/clock.h"
9#include "absl/time/time.h"
11
12#ifdef YAZE_WITH_JSON
13#include "httplib.h"
14#include "nlohmann/json.hpp"
15#endif
16
17namespace yaze {
18namespace cli {
19
20OllamaAIService::OllamaAIService(const OllamaConfig& config) : config_(config) {
21 // Load command documentation into prompt builder
22 if (auto status = prompt_builder_.LoadResourceCatalogue(""); !status.ok()) {
23 std::cerr << "⚠️ Failed to load agent prompt catalogue: "
24 << status.message() << std::endl;
25 }
26
27 if (config_.system_prompt.empty()) {
28 // Use enhanced prompting by default
29 if (config_.use_enhanced_prompting) {
30 config_.system_prompt =
31 prompt_builder_.BuildSystemInstructionWithExamples();
32 } else {
33 config_.system_prompt = BuildSystemPrompt();
34 }
35 }
36}
37
38std::string OllamaAIService::BuildSystemPrompt() {
39 // Fallback prompt if enhanced prompting is disabled
40 // Use PromptBuilder's basic system instruction
41 return prompt_builder_.BuildSystemInstruction();
42}
43
44void OllamaAIService::SetRomContext(Rom* rom) {
45 prompt_builder_.SetRom(rom);
46}
47
48absl::Status OllamaAIService::CheckAvailability() {
49#ifndef YAZE_WITH_JSON
50 return absl::UnimplementedError(
51 "Ollama service requires JSON support. "
52 "Build with -DZ3ED_AI=ON or -DYAZE_WITH_JSON=ON");
53#else
54 try {
55 httplib::Client cli(config_.base_url);
56 cli.set_connection_timeout(5); // 5 second timeout
57
58 auto res = cli.Get("/api/tags");
59 if (!res) {
60 return absl::UnavailableError(
61 absl::StrFormat("Cannot connect to Ollama server at %s.\n"
62 "Make sure Ollama is installed and running:\n"
63 " 1. Install: brew install ollama (macOS) or "
64 "https://ollama.com/download\n"
65 " 2. Start: ollama serve\n"
66 " 3. Verify: curl http://localhost:11434/api/tags",
67 config_.base_url));
68 }
69
70 if (res->status != 200) {
71 return absl::InternalError(
72 absl::StrFormat("Ollama server error: HTTP %d\nResponse: %s",
73 res->status, res->body));
74 }
75
76 // Check if requested model is available
77 nlohmann::json models_json = nlohmann::json::parse(res->body);
78 bool model_found = false;
79
80 if (models_json.contains("models") && models_json["models"].is_array()) {
81 for (const auto& model : models_json["models"]) {
82 if (model.contains("name")) {
83 std::string model_name = model["name"].get<std::string>();
84 if (model_name.find(config_.model) != std::string::npos) {
85 model_found = true;
86 break;
87 }
88 }
89 }
90 }
91
92 if (!model_found) {
93 return absl::NotFoundError(
94 absl::StrFormat("Model '%s' not found on Ollama server.\n"
95 "Pull it with: ollama pull %s\n"
96 "Available models: ollama list",
97 config_.model, config_.model));
98 }
99
100 return absl::OkStatus();
101 } catch (const std::exception& e) {
102 return absl::InternalError(
103 absl::StrCat("Ollama health check failed: ", e.what()));
104 }
105#endif
106}
107
108absl::StatusOr<std::vector<ModelInfo>> OllamaAIService::ListAvailableModels() {
109#ifndef YAZE_WITH_JSON
110 return absl::UnimplementedError("Requires httplib and JSON support");
111#else
112 try {
113 httplib::Client cli(config_.base_url);
114 cli.set_connection_timeout(5);
115
116 auto res = cli.Get("/api/tags");
117
118 if (!res || res->status != 200) {
119 return absl::UnavailableError(
120 "Cannot list Ollama models. Is the server running?");
121 }
122
123 nlohmann::json models_json = nlohmann::json::parse(res->body);
124 std::vector<ModelInfo> models;
125
126 if (models_json.contains("models") && models_json["models"].is_array()) {
127 for (const auto& model : models_json["models"]) {
128 ModelInfo info;
129 info.provider = "ollama";
130 info.is_local = true;
131
132 if (model.contains("name") && model["name"].is_string()) {
133 info.name = model["name"].get<std::string>();
134 info.display_name = info.name;
135 }
136
137 if (model.contains("size")) {
138 if (model["size"].is_string()) {
139 info.size_bytes = std::strtoull(
140 model["size"].get<std::string>().c_str(), nullptr, 10);
141 } else if (model["size"].is_number_unsigned()) {
142 info.size_bytes = model["size"].get<uint64_t>();
143 }
144 }
145
146 if (model.contains("details") && model["details"].is_object()) {
147 const auto& details = model["details"];
148 info.parameter_size = details.value("parameter_size", "");
149 info.quantization = details.value("quantization_level", "");
150 info.family = details.value("family", "");
151
152 // Build description
153 std::string desc;
154 if (!info.family.empty())
155 desc += info.family + " ";
156 if (!info.parameter_size.empty())
157 desc += info.parameter_size + " ";
158 if (!info.quantization.empty())
159 desc += "(" + info.quantization + ")";
160 info.description = desc;
161 }
162 models.push_back(std::move(info));
163 }
164 }
165
166 return models;
167 } catch (const std::exception& e) {
168 return absl::InternalError(
169 absl::StrCat("Failed to list models: ", e.what()));
170 }
171#endif
172}
173
174absl::StatusOr<std::string> OllamaAIService::ParseOllamaResponse(
175 const std::string& json_response) {
176#if !YAZE_HAS_JSON
177 return absl::UnimplementedError("Requires JSON support");
178#else
179 try {
180 nlohmann::json response_json = nlohmann::json::parse(json_response);
181
182 if (!response_json.contains("response")) {
183 return absl::InvalidArgumentError(
184 "Ollama response missing 'response' field");
185 }
186
187 return response_json["response"].get<std::string>();
188 } catch (const nlohmann::json::exception& e) {
189 return absl::InternalError(
190 absl::StrCat("Failed to parse Ollama response: ", e.what()));
191 }
192#endif
193}
194
195absl::StatusOr<AgentResponse> OllamaAIService::GenerateResponse(
196 const std::string& prompt) {
197 return GenerateResponse(
198 {{{agent::ChatMessage::Sender::kUser, prompt, absl::Now()}}});
199}
200
201absl::StatusOr<AgentResponse> OllamaAIService::GenerateResponse(
202 const std::vector<agent::ChatMessage>& history) {
203#ifndef YAZE_WITH_JSON
204 return absl::UnimplementedError(
205 "Ollama service requires httplib and JSON support. "
206 "Install vcpkg dependencies or use bundled libraries.");
207#else
208 if (history.empty()) {
209 return absl::InvalidArgumentError("History cannot be empty.");
210 }
211
212 nlohmann::json messages = nlohmann::json::array();
213 for (const auto& chat_msg : history) {
214 if (chat_msg.is_internal) {
215 continue;
216 }
217 nlohmann::json entry;
218 entry["role"] = chat_msg.sender == agent::ChatMessage::Sender::kUser
219 ? "user"
220 : "assistant";
221 entry["content"] = chat_msg.message;
222 messages.push_back(std::move(entry));
223 }
224
225 if (messages.empty()) {
226 return absl::InvalidArgumentError(
227 "History does not contain any user/assistant messages.");
228 }
229
230 std::string fallback_prompt = prompt_builder_.BuildPromptFromHistory(history);
231
232 nlohmann::json request_body;
233 request_body["model"] = config_.model;
234 request_body["system"] = config_.system_prompt;
235 request_body["stream"] = config_.stream;
236 request_body["format"] = "json";
237
238 if (config_.use_chat_completions) {
239 request_body["messages"] = messages;
240 } else {
241 request_body["prompt"] = fallback_prompt;
242 }
243
244 nlohmann::json options = {{"temperature", config_.temperature},
245 {"top_p", config_.top_p},
246 {"top_k", config_.top_k},
247 {"num_predict", config_.max_tokens},
248 {"num_ctx", config_.num_ctx}};
249 request_body["options"] = options;
250
251 AgentResponse agent_response;
252 agent_response.provider = "ollama";
253
254 try {
255 httplib::Client cli(config_.base_url);
256 cli.set_read_timeout(60); // Longer timeout for inference
257
258 const char* endpoint =
259 config_.use_chat_completions ? "/api/chat" : "/api/generate";
260 absl::Time request_start = absl::Now();
261 auto res = cli.Post(endpoint, request_body.dump(), "application/json");
262
263 if (!res) {
264 return absl::UnavailableError(
265 "Failed to connect to Ollama. Is 'ollama serve' running?\n"
266 "Start with: ollama serve");
267 }
268
269 if (res->status != 200) {
270 return absl::InternalError(absl::StrFormat(
271 "Ollama API error: HTTP %d\nResponse: %s", res->status, res->body));
272 }
273
274 // Parse Ollama's wrapper JSON
275 nlohmann::json ollama_wrapper;
276 try {
277 ollama_wrapper = nlohmann::json::parse(res->body);
278 } catch (const nlohmann::json::exception& e) {
279 return absl::InternalError(
280 absl::StrFormat("Failed to parse Ollama response: %s\nBody: %s",
281 e.what(), res->body));
282 }
283
284 // Extract the LLM's response from Ollama's "response" field
285 // For chat completions API, it's inside "message" -> "content"
286 std::string llm_output;
287 if (config_.use_chat_completions) {
288 if (ollama_wrapper.contains("message") &&
289 ollama_wrapper["message"].is_object() &&
290 ollama_wrapper["message"].contains("content")) {
291 llm_output = ollama_wrapper["message"]["content"].get<std::string>();
292 } else {
293 return absl::InvalidArgumentError(
294 "Ollama chat response missing 'message.content'");
295 }
296 } else {
297 if (ollama_wrapper.contains("response") &&
298 ollama_wrapper["response"].is_string()) {
299 llm_output = ollama_wrapper["response"].get<std::string>();
300 } else {
301 return absl::InvalidArgumentError(
302 "Ollama response missing 'response' field");
303 }
304 }
305
306 // Debug: Print raw LLM output when verbose mode is enabled
307 const char* verbose_env = std::getenv("Z3ED_VERBOSE");
308 if (verbose_env && std::string(verbose_env) == "1") {
309 std::cout << "\n"
310 << "\033[35m" << "🔍 Raw LLM Response:" << "\033[0m" << "\n"
311 << "\033[2m" << llm_output << "\033[0m" << "\n\n";
312 }
313
314 // Parse the LLM's JSON response (the agent structure)
315 nlohmann::json response_json;
316 try {
317 response_json = nlohmann::json::parse(llm_output);
318 } catch (const nlohmann::json::exception& e) {
319 // Sometimes the LLM includes extra text - try to extract JSON object
320 size_t start = llm_output.find('{');
321 size_t end = llm_output.rfind('}');
322
323 if (start != std::string::npos && end != std::string::npos &&
324 end > start) {
325 std::string json_only = llm_output.substr(start, end - start + 1);
326 try {
327 response_json = nlohmann::json::parse(json_only);
328 } catch (const nlohmann::json::exception&) {
329 agent_response.warnings.push_back(
330 "LLM response was not valid JSON; returning raw text.");
331 agent_response.text_response = llm_output;
332 return agent_response;
333 }
334 } else {
335 agent_response.warnings.push_back(
336 "LLM response did not contain a JSON object; returning raw text.");
337 agent_response.text_response = llm_output;
338 return agent_response;
339 }
340 }
341
342 agent_response.model = ollama_wrapper.value("model", config_.model);
343 agent_response.latency_seconds =
344 absl::ToDoubleSeconds(absl::Now() - request_start);
345 agent_response.parameters["temperature"] =
346 absl::StrFormat("%.2f", config_.temperature);
347 agent_response.parameters["top_p"] = absl::StrFormat("%.2f", config_.top_p);
348 agent_response.parameters["top_k"] = absl::StrFormat("%d", config_.top_k);
349 agent_response.parameters["num_predict"] =
350 absl::StrFormat("%d", config_.max_tokens);
351 agent_response.parameters["num_ctx"] =
352 absl::StrFormat("%d", config_.num_ctx);
353 agent_response.parameters["endpoint"] = endpoint;
354 if (response_json.contains("text_response") &&
355 response_json["text_response"].is_string()) {
356 agent_response.text_response =
357 response_json["text_response"].get<std::string>();
358 }
359 if (response_json.contains("reasoning") &&
360 response_json["reasoning"].is_string()) {
361 agent_response.reasoning = response_json["reasoning"].get<std::string>();
362 }
363 if (response_json.contains("tool_calls") &&
364 response_json["tool_calls"].is_array()) {
365 for (const auto& call : response_json["tool_calls"]) {
366 if (call.contains("tool_name") && call["tool_name"].is_string()) {
367 ToolCall tool_call;
368 tool_call.tool_name = call["tool_name"].get<std::string>();
369 if (call.contains("args") && call["args"].is_object()) {
370 for (auto& [key, value] : call["args"].items()) {
371 if (value.is_string()) {
372 tool_call.args[key] = value.get<std::string>();
373 }
374 }
375 }
376 agent_response.tool_calls.push_back(tool_call);
377 }
378 }
379 }
380 if (response_json.contains("commands") &&
381 response_json["commands"].is_array()) {
382 for (const auto& cmd : response_json["commands"]) {
383 if (cmd.is_string()) {
384 agent_response.commands.push_back(cmd.get<std::string>());
385 }
386 }
387 }
388
389 return agent_response;
390
391 } catch (const std::exception& e) {
392 return absl::InternalError(
393 absl::StrCat("Ollama request failed: ", e.what()));
394 }
395#endif
396}
397
398} // namespace cli
399} // namespace yaze
OllamaAIService(const OllamaConfig &)