6#include "absl/flags/declare.h"
7#include "absl/flags/flag.h"
8#include "absl/status/status.h"
9#include "absl/strings/str_cat.h"
15#include "nlohmann/json.hpp"
29 return ::absl::OkStatus();
33 bool use_mock = ::absl::GetFlag(FLAGS_mock_rom);
40 std::cout <<
"โ
Mock ROM initialized with embedded Zelda3 labels\n";
41 return ::absl::OkStatus();
45 std::string rom_path = ::absl::GetFlag(FLAGS_rom);
46 if (rom_path.empty()) {
47 return ::absl::InvalidArgumentError(
48 "No ROM loaded. Pass --rom=<path> or use --mock-rom for testing.");
53 return ::absl::FailedPreconditionError(::absl::StrCat(
54 "Failed to load ROM from '", rom_path,
"': ", status.message()));
57 return ::absl::OkStatus();
64 std::vector<std::string>
66 bool expect_tool_calls =
false;
67 bool expect_commands =
false;
73 .name =
"embedded_labels_room_query",
74 .description =
"Ask about room names using embedded labels",
75 .user_prompts = {
"What is the name of room 5?"},
76 .expected_keywords = {
"room",
"Tower of Hera",
"Moldorm"},
77 .expect_tool_calls =
false,
78 .expect_commands =
false,
81 .name =
"embedded_labels_sprite_query",
82 .description =
"Ask about sprite names using embedded labels",
83 .user_prompts = {
"What is sprite 9?"},
84 .expected_keywords = {
"sprite",
"Moldorm",
"Boss"},
85 .expect_tool_calls =
false,
86 .expect_commands =
false,
89 .name =
"embedded_labels_entrance_query",
90 .description =
"Ask about entrance names using embedded labels",
91 .user_prompts = {
"What is entrance 0?"},
92 .expected_keywords = {
"entrance",
"Link",
"House"},
93 .expect_tool_calls =
false,
94 .expect_commands =
false,
97 .name =
"simple_question",
98 .description =
"Ask about dungeons in the ROM",
99 .user_prompts = {
"What dungeons are in this ROM?"},
100 .expected_keywords = {
"dungeon",
"palace",
"castle"},
101 .expect_tool_calls =
true,
102 .expect_commands =
false,
105 .name =
"list_all_rooms",
106 .description =
"List all room names with embedded labels",
107 .user_prompts = {
"List the first 10 dungeon rooms"},
108 .expected_keywords = {
"room",
"Ganon",
"Hyrule",
"Palace"},
109 .expect_tool_calls =
true,
110 .expect_commands =
false,
113 .name =
"overworld_tile_search",
114 .description =
"Find specific tiles in overworld",
115 .user_prompts = {
"Find all trees on the overworld"},
116 .expected_keywords = {
"tree",
"tile",
"map"},
117 .expect_tool_calls =
true,
118 .expect_commands =
false,
121 .name =
"multi_step_query",
122 .description =
"Ask multiple questions in sequence",
125 "What is the name of room 0?",
126 "What sprites are defined in the game?",
128 .expected_keywords = {
"Ganon",
"sprite",
"room"},
129 .expect_tool_calls =
true,
130 .expect_commands =
false,
133 .name =
"map_description",
134 .description =
"Get information about a specific map",
135 .user_prompts = {
"Describe overworld map 0"},
136 .expected_keywords = {
"map",
"light world",
"tile"},
137 .expect_tool_calls =
true,
138 .expect_commands =
false,
144 std::cout <<
"\n===========================================\n";
145 std::cout <<
"Test: " << test_case.
name <<
"\n";
146 std::cout <<
"Description: " << test_case.
description <<
"\n";
147 std::cout <<
"===========================================\n\n";
151 std::cout <<
"๐ค User: " << prompt <<
"\n\n";
155 std::cout <<
"๐ค Agent: " << response.
message <<
"\n\n";
158 std::cout <<
"๐งพ JSON Output:\n" << *response.
json_pretty <<
"\n\n";
162 std::cout <<
"๐ Table Output:\n";
163 const auto& table = response.
table_data.value();
167 for (
size_t i = 0; i < table.headers.size(); ++i) {
168 std::cout << table.headers[i];
169 if (i < table.headers.size() - 1) {
174 for (
size_t i = 0; i < table.headers.size(); ++i) {
175 std::cout << std::string(table.headers[i].length(),
'-');
176 if (i < table.headers.size() - 1) {
183 const size_t max_rows = std::min<size_t>(10, table.rows.size());
184 for (
size_t i = 0; i < max_rows; ++i) {
186 for (
size_t j = 0; j < table.rows[i].size(); ++j) {
187 std::cout << table.rows[i][j];
188 if (j < table.rows[i].size() - 1) {
195 if (!verbose && table.rows.size() > max_rows) {
196 std::cout <<
" ... (" << (table.rows.size() - max_rows)
200 if (verbose && table.rows.size() > max_rows) {
201 for (
size_t i = max_rows; i < table.rows.size(); ++i) {
203 for (
size_t j = 0; j < table.rows[i].size(); ++j) {
204 std::cout << table.rows[i][j];
205 if (j < table.rows[i].size() - 1) {
222 if (response.
message.find(keyword) == std::string::npos) {
223 std::cout <<
"โ ๏ธ Warning: Expected keyword '" << keyword
224 <<
"' not found in response\n";
231 std::cout <<
"โ ๏ธ Warning: Expected tool calls but no table data found\n";
237 response.
message.find(
"overworld") != std::string::npos ||
238 response.
message.find(
"dungeon") != std::string::npos ||
239 response.
message.find(
"set-tile") != std::string::npos;
241 std::cout <<
"โ ๏ธ Warning: Expected commands but none found\n";
252 bool all_passed =
true;
260 if (!response_or.ok()) {
261 std::cout <<
"โ FAILED: " << response_or.status().message() <<
"\n\n";
266 const auto& response = response_or.value();
276 std::cout <<
"๐ Conversation Summary (" << history.size() <<
" message"
277 << (history.size() == 1 ?
"" :
"s") <<
")\n";
278 for (
const auto& message : history) {
281 std::cout <<
" [" << sender <<
"] " << message.message <<
"\n";
287 std::cout <<
"โ
Test PASSED: " << test_case.
name <<
"\n";
288 return absl::OkStatus();
291 std::cout <<
"โ ๏ธ Test completed with warnings: " << test_case.
name <<
"\n";
292 return absl::InternalError(
293 absl::StrCat(
"Conversation test failed validation: ", test_case.
name));
297 const std::string& file_path,
298 std::vector<ConversationTestCase>* test_cases) {
299 std::ifstream file(file_path);
300 if (!file.is_open()) {
301 return absl::NotFoundError(
302 absl::StrCat(
"Could not open test file: ", file_path));
305 nlohmann::json test_json;
308 }
catch (
const nlohmann::json::parse_error& e) {
309 return absl::InvalidArgumentError(
310 absl::StrCat(
"Failed to parse test file: ", e.what()));
313 if (!test_json.is_array()) {
314 return absl::InvalidArgumentError(
315 "Test file must contain a JSON array of test cases");
318 for (
const auto& test_obj : test_json) {
320 test_case.
name = test_obj.value(
"name",
"unnamed_test");
321 test_case.
description = test_obj.value(
"description",
"");
323 if (test_obj.contains(
"prompts") && test_obj[
"prompts"].is_array()) {
324 for (
const auto& prompt : test_obj[
"prompts"]) {
325 if (prompt.is_string()) {
326 test_case.
user_prompts.push_back(prompt.get<std::string>());
331 if (test_obj.contains(
"expected_keywords") &&
332 test_obj[
"expected_keywords"].is_array()) {
333 for (
const auto& keyword : test_obj[
"expected_keywords"]) {
334 if (keyword.is_string()) {
343 test_cases->push_back(test_case);
346 return absl::OkStatus();
352 const std::vector<std::string>& arg_vec) {
353 std::string test_file;
354 bool use_defaults =
true;
355 bool verbose =
false;
357 for (
size_t i = 0; i < arg_vec.size(); ++i) {
358 const std::string& arg = arg_vec[i];
359 if (arg ==
"--file" && i + 1 < arg_vec.size()) {
360 test_file = arg_vec[i + 1];
361 use_defaults =
false;
363 }
else if (arg ==
"--verbose") {
368 std::cout <<
"๐ Debug: Starting test-conversation handler...\n";
372 std::cout <<
"๐ Debug: Loading ROM...\n";
373 auto load_status = LoadRomForAgent(rom);
374 if (!load_status.ok()) {
375 std::cerr <<
"โ Error loading ROM: " << load_status.message() <<
"\n";
379 std::cout <<
"โ
ROM loaded: " << rom.
title() <<
"\n";
382 std::cout <<
"๐ Debug: Initializing embedded labels...\n";
386 if (!labels_status.ok()) {
387 std::cerr <<
"โ ๏ธ Warning: Could not initialize embedded labels: "
388 << labels_status.message() <<
"\n";
390 std::cout <<
"โ
Embedded labels initialized successfully\n";
394 std::cout <<
"๐ Debug: Checking resource label manager...\n";
396 std::cout <<
"๐ Debug: Associating labels with ROM...\n";
399 std::cout <<
"โ
Embedded labels loaded and associated with ROM\n";
401 std::cout <<
"โ ๏ธ ROM has no resource label manager\n";
405 std::cout <<
"๐ Debug: Creating conversational agent service...\n";
406 std::cout <<
"๐ Debug: About to construct service object...\n";
409 std::cout <<
"โ
Service object created\n";
411 std::cout <<
"๐ Debug: Setting ROM context...\n";
413 std::cout <<
"โ
Service initialized\n";
416 std::vector<ConversationTestCase> test_cases;
418 test_cases = GetDefaultTestCases();
419 std::cout <<
"Using default test cases (" << test_cases.size()
422 auto status = LoadTestCasesFromFile(test_file, &test_cases);
426 std::cout <<
"Loaded " << test_cases.size() <<
" test cases from "
427 << test_file <<
"\n";
430 if (test_cases.empty()) {
431 return absl::InvalidArgumentError(
"No test cases to run");
438 for (
const auto& test_case : test_cases) {
439 auto status = RunTestCase(test_case, service, verbose);
444 std::cerr <<
"Test case '" << test_case.name
445 <<
"' failed: " << status.message() <<
"\n";
450 std::cout <<
"\n===========================================\n";
451 std::cout <<
"Test Summary\n";
452 std::cout <<
"===========================================\n";
453 std::cout <<
"Total tests: " << test_cases.size() <<
"\n";
454 std::cout <<
"Passed: " << passed <<
"\n";
455 std::cout <<
"Failed: " << failed <<
"\n";
458 std::cout <<
"\nโ
All tests passed!\n";
460 std::cout <<
"\nโ ๏ธ Some tests failed\n";
464 return absl::OkStatus();
467 return absl::InternalError(
468 absl::StrCat(failed,
" conversation test(s) reported failures"));
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
project::ResourceLabelManager * resource_label()
absl::Status LoadFromFile(const std::string &filename, const LoadOptions &options=LoadOptions::Defaults())
absl::StatusOr< ChatMessage > SendMessage(const std::string &message)
void SetRomContext(Rom *rom)
const std::vector< ChatMessage > & GetHistory() const
ABSL_DECLARE_FLAG(std::string, rom)
bool ValidateResponse(const ChatMessage &response, const ConversationTestCase &test_case)
void PrintTestHeader(const ConversationTestCase &test_case)
void PrintUserPrompt(const std::string &prompt)
absl::Status LoadTestCasesFromFile(const std::string &file_path, std::vector< ConversationTestCase > *test_cases)
void PrintAgentResponse(const ChatMessage &response, bool verbose)
std::vector< ConversationTestCase > GetDefaultTestCases()
absl::Status RunTestCase(const ConversationTestCase &test_case, ConversationalAgentService &service, bool verbose)
absl::Status LoadRomForAgent(Rom &rom)
absl::Status HandleTestConversationCommand(const std::vector< std::string > &args)
absl::Status InitializeMockRom(Rom &rom)
Initialize a mock ROM for testing without requiring an actual ROM file.
std::optional< TableData > table_data
std::optional< std::string > json_pretty
std::vector< std::string > expected_keywords
std::vector< std::string > user_prompts
std::unordered_map< std::string, std::unordered_map< std::string, std::string > > labels_
Modern project structure with comprehensive settings consolidation.
std::unordered_map< std::string, std::unordered_map< std::string, std::string > > resource_labels
absl::Status InitializeEmbeddedLabels(const std::unordered_map< std::string, std::unordered_map< std::string, std::string > > &labels)
static std::unordered_map< std::string, std::unordered_map< std::string, std::string > > ToResourceLabels()
Convert all labels to a structured map for project embedding.