yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
build_cleaner.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2"""Automate source list maintenance and self-header includes for YAZE."""
3
4from __future__ import annotations
5
6import argparse
7from dataclasses import dataclass, field
8import re
9from pathlib import Path
10from typing import Any, Iterable, List, Optional, Sequence, Set
11
12try:
13 import pathspec
14 HAS_PATHSPEC = True
15except ImportError:
16 HAS_PATHSPEC = False
17 print("Warning: 'pathspec' module not found. Install with: pip3 install pathspec")
18 print(" .gitignore support will be disabled.")
19
20PROJECT_ROOT = Path(__file__).resolve().parent.parent
21SOURCE_ROOT = PROJECT_ROOT / "src"
22
23SUPPORTED_EXTENSIONS = (".cc", ".c", ".cpp", ".cxx", ".mm")
24HEADER_EXTENSIONS = (".h", ".hh", ".hpp", ".hxx")
25BUILD_CLEANER_IGNORE_TOKEN = "build_cleaner:ignore"
26
27# Common SNES/ROM header patterns to include
28COMMON_HEADERS = {
29 'std::': ['<memory>', '<string>', '<vector>', '<map>', '<set>', '<algorithm>', '<functional>'],
30 'absl::': ['<absl/status/status.h>', '<absl/status/statusor.h>', '<absl/strings/str_format.h>'],
31 'ImGui::': ['<imgui.h>'],
32 'SDL_': ['<SDL.h>'],
33}
34
35
36@dataclass(frozen=True)
38 path: Path
39 recursive: bool = True
40 extensions: Sequence[str] = SUPPORTED_EXTENSIONS
41
42 def iter_files(self) -> Iterable[Path]:
43 if not self.path.exists():
44 return []
45 if self.recursive:
46 iterator = self.path.rglob("*")
47 else:
48 iterator = self.path.glob("*")
49 for candidate in iterator:
50 if candidate.is_file() and candidate.suffix in self.extensions:
51 yield candidate
52
53
54@dataclass
56 variable: str
57 cmake_path: Path
58 directories: Sequence[DirectorySpec]
59 exclude: Set[Path] = field(default_factory=set)
60
61
63 """Load .gitignore patterns into a pathspec matcher."""
64 if not HAS_PATHSPEC:
65 return None
66
67 gitignore_path = PROJECT_ROOT / ".gitignore"
68 if not gitignore_path.exists():
69 return None
70
71 try:
72 with gitignore_path.open('r', encoding='utf-8') as f:
73 patterns = [line.strip() for line in f if line.strip() and not line.startswith('#')]
74 return pathspec.PathSpec.from_lines('gitwildmatch', patterns)
75 except Exception as e:
76 print(f"Warning: Could not load .gitignore: {e}")
77 return None
78
79
80def is_ignored(path: Path, gitignore_spec) -> bool:
81 """Check if a path should be ignored based on .gitignore patterns."""
82 if gitignore_spec is None or not HAS_PATHSPEC:
83 return False
84
85 try:
86 rel_path = path.relative_to(PROJECT_ROOT)
87 return gitignore_spec.match_file(str(rel_path))
88 except ValueError:
89 return False
90
91
92def discover_cmake_libraries() -> List[CMakeSourceBlock]:
93 """
94 Auto-discover CMake library files that explicitly opt-in to auto-maintenance.
95
96 Looks for marker comments like:
97 - "# build_cleaner:auto-maintain"
98 - "# auto-maintained by build_cleaner.py"
99 - "# AUTO-MAINTAINED"
100
101 Only source lists with these markers will be updated.
102
103 Supports decomposed libraries where one cmake file defines multiple PREFIX_SUBDIR_SRC
104 variables (e.g., GFX_TYPES_SRC, GFX_BACKEND_SRC). Automatically scans subdirectories.
105 """
106 # First pass: collect all variables per cmake file
107 file_variables: dict[Path, list[str]] = {}
108
109 for cmake_file in SOURCE_ROOT.rglob("*.cmake"):
110 if 'lib/' in str(cmake_file) or 'third_party/' in str(cmake_file):
111 continue
112
113 try:
114 content = cmake_file.read_text(encoding='utf-8')
115 lines = content.splitlines()
116
117 for i, line in enumerate(lines):
118 # Check if previous lines indicate auto-maintenance
119 auto_maintained = False
120 for j in range(max(0, i-5), i):
121 line_lower = lines[j].lower()
122 if ('build_cleaner' in line_lower and 'auto-maintain' in line_lower) or \
123 'auto_maintain' in line_lower:
124 auto_maintained = True
125 break
126
127 if not auto_maintained:
128 continue
129
130 # Extract variable name (allow for line breaks or closing paren)
131 match = re.search(r'set\s*\‍(\s*(\w+(?:_SRC|_SOURCES|_SOURCE))(?:\s|$|\‍))', line)
132 if match:
133 var_name = match.group(1)
134 if cmake_file not in file_variables:
135 file_variables[cmake_file] = []
136 if var_name not in file_variables[cmake_file]:
137 file_variables[cmake_file].append(var_name)
138
139 except Exception as e:
140 print(f"Warning: Could not process {cmake_file}: {e}")
141
142 # Second pass: create blocks with proper subdirectory detection
143 blocks = []
144 for cmake_file, variables in file_variables.items():
145 cmake_dir = cmake_file.parent
146 is_recursive = cmake_dir != SOURCE_ROOT / "app/core"
147
148 # Analyze variable naming patterns to detect decomposed libraries
149 # Group variables by prefix (e.g., GFX_*, GUI_*, EDITOR_*)
150 prefix_groups: dict[str, list[str]] = {}
151 for var_name in variables:
152 match = re.match(r'([A-Z]+)_([A-Z_]+)_(?:SRC|SOURCES|SOURCE)$', var_name)
153 if match:
154 prefix = match.group(1)
155 if prefix not in prefix_groups:
156 prefix_groups[prefix] = []
157 prefix_groups[prefix].append(var_name)
158
159 # If a prefix has multiple variables, treat it as a decomposed library
160 decomposed_prefixes = {p for p, vars in prefix_groups.items() if len(vars) >= 2}
161
162 # Check if this looks like a decomposed library (multiple *_SRC vars)
163 # even if some don't follow the PREFIX_SUBDIR_SRC pattern
164 is_likely_decomposed = len(variables) >= 2 and any(p in decomposed_prefixes for p in prefix_groups)
165
166 for var_name in variables:
167 # Try to extract subdirectory from variable name
168 subdir_match = re.match(r'([A-Z]+)_([A-Z_]+)_(?:SRC|SOURCES|SOURCE)$', var_name)
169 if subdir_match:
170 prefix = subdir_match.group(1)
171 subdir_part = subdir_match.group(2)
172
173 # If this prefix indicates a decomposed library, scan subdirectory
174 if prefix in decomposed_prefixes:
175 subdir = subdir_part.lower()
176 target_dir = cmake_dir / subdir
177
178 if target_dir.exists() and target_dir.is_dir():
179 blocks.append(CMakeSourceBlock(
180 variable=var_name,
181 cmake_path=cmake_file,
182 directories=(DirectorySpec(target_dir, recursive=True),),
183 ))
184 continue
185
186 # Handle special cases: CANVAS_SRC, etc. (no prefix, just subdirectory name)
187 # Pattern: SUBDIR_SRC where SUBDIR is a lowercase directory name
188 simple_match = re.match(r'([A-Z]+)_(?:SRC|SOURCES|SOURCE)$', var_name)
189 if simple_match and is_likely_decomposed:
190 subdir_part = simple_match.group(1)
191 subdir = subdir_part.lower()
192 target_dir = cmake_dir / subdir
193
194 if target_dir.exists() and target_dir.is_dir():
195 blocks.append(CMakeSourceBlock(
196 variable=var_name,
197 cmake_path=cmake_file,
198 directories=(DirectorySpec(target_dir, recursive=True),),
199 ))
200 continue
201
202 # Fallback: scan entire cmake directory
203 blocks.append(CMakeSourceBlock(
204 variable=var_name,
205 cmake_path=cmake_file,
206 directories=(DirectorySpec(cmake_dir, recursive=is_recursive),),
207 ))
208
209 return blocks
210
211
212# Static configuration for all library source lists
213# The script now auto-maintains all libraries while preserving conditional sections
214STATIC_CONFIG: Sequence[CMakeSourceBlock] = (
216 variable="YAZE_APP_EMU_SRC",
217 cmake_path=SOURCE_ROOT / "CMakeLists.txt",
218 directories=(DirectorySpec(SOURCE_ROOT / "app/emu"),),
219 ),
221 variable="YAZE_APP_CORE_SRC",
222 cmake_path=SOURCE_ROOT / "app/core/core_library.cmake",
223 directories=(DirectorySpec(SOURCE_ROOT / "app/core", recursive=False),),
224 ),
226 variable="YAZE_APP_EDITOR_SRC",
227 cmake_path=SOURCE_ROOT / "app/editor/editor_library.cmake",
228 directories=(DirectorySpec(SOURCE_ROOT / "app/editor"),),
229 ),
231 variable="YAZE_APP_ZELDA3_SRC",
232 cmake_path=SOURCE_ROOT / "zelda3/zelda3_library.cmake",
233 directories=(DirectorySpec(SOURCE_ROOT / "zelda3"),),
234 ),
236 variable="YAZE_NET_SRC",
237 cmake_path=SOURCE_ROOT / "app/net/net_library.cmake",
238 directories=(DirectorySpec(SOURCE_ROOT / "app/net"),),
239 exclude={Path("app/net/rom_service_impl.cc")},
240 ),
242 variable="YAZE_UTIL_SRC",
243 cmake_path=SOURCE_ROOT / "util/util.cmake",
244 directories=(DirectorySpec(SOURCE_ROOT / "util"),),
245 ),
246 # Note: These are commented out in favor of auto-discovery via markers
247 # CMakeSourceBlock(
248 # variable="GFX_TYPES_SRC",
249 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
250 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/types"),),
251 # ),
252 # CMakeSourceBlock(
253 # variable="GFX_BACKEND_SRC",
254 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
255 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/backend"),),
256 # ),
257 # CMakeSourceBlock(
258 # variable="GFX_RESOURCE_SRC",
259 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
260 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/resource"),),
261 # ),
262 # CMakeSourceBlock(
263 # variable="GFX_CORE_SRC",
264 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
265 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/core"),),
266 # ),
267 # CMakeSourceBlock(
268 # variable="GFX_UTIL_SRC",
269 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
270 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/util"),),
271 # ),
272 # CMakeSourceBlock(
273 # variable="GFX_RENDER_SRC",
274 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
275 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/render"),),
276 # ),
277 # CMakeSourceBlock(
278 # variable="GFX_DEBUG_SRC",
279 # cmake_path=SOURCE_ROOT / "app/gfx/gfx_library.cmake",
280 # directories=(DirectorySpec(SOURCE_ROOT / "app/gfx/debug"),),
281 # ),
282 # Note: GUI library variables commented out - now auto-discovered via markers
283 # CMakeSourceBlock(
284 # variable="GUI_CORE_SRC",
285 # cmake_path=SOURCE_ROOT / "app/gui/gui_library.cmake",
286 # directories=(DirectorySpec(SOURCE_ROOT / "app/gui/core"),),
287 # ),
288 # CMakeSourceBlock(
289 # variable="CANVAS_SRC",
290 # cmake_path=SOURCE_ROOT / "app/gui/gui_library.cmake",
291 # directories=(DirectorySpec(SOURCE_ROOT / "app/gui/canvas"),),
292 # ),
293 # CMakeSourceBlock(
294 # variable="GUI_WIDGETS_SRC",
295 # cmake_path=SOURCE_ROOT / "app/gui/gui_library.cmake",
296 # directories=(DirectorySpec(SOURCE_ROOT / "app/gui/widgets"),),
297 # ),
298 # CMakeSourceBlock(
299 # variable="GUI_AUTOMATION_SRC",
300 # cmake_path=SOURCE_ROOT / "app/gui/gui_library.cmake",
301 # directories=(DirectorySpec(SOURCE_ROOT / "app/gui/automation"),),
302 # ),
303 # CMakeSourceBlock(
304 # variable="GUI_APP_SRC",
305 # cmake_path=SOURCE_ROOT / "app/gui/gui_library.cmake",
306 # directories=(DirectorySpec(SOURCE_ROOT / "app/gui/app"),),
307 # ),
309 variable="YAZE_AGENT_SOURCES",
310 cmake_path=SOURCE_ROOT / "cli/agent.cmake",
311 directories=(
312 DirectorySpec(SOURCE_ROOT / "cli", recursive=False), # For flags.cc
313 DirectorySpec(SOURCE_ROOT / "cli/service"),
314 DirectorySpec(SOURCE_ROOT / "cli/handlers"),
315 ),
316 exclude={
317 Path("cli/cli.cc"), # Part of z3ed executable
318 Path("cli/cli_main.cc"), # Part of z3ed executable
319 },
320 ),
322 variable="YAZE_TEST_SOURCES",
323 cmake_path=SOURCE_ROOT / "app/test/test.cmake",
324 directories=(DirectorySpec(SOURCE_ROOT / "app/test"),),
325 ),
326)
327
328
329def relative_to_source(path: Path) -> Path:
330 return path.relative_to(SOURCE_ROOT)
331
332
333def parse_block(lines: List[str], start_idx: int) -> int:
334 """Return index of the closing ')' line for a set/list block."""
335 for idx in range(start_idx + 1, len(lines)):
336 if lines[idx].strip().startswith(")"):
337 return idx
338 raise ValueError(f"Unterminated set/list block starting at line {start_idx}")
339
340
341def parse_entry(line: str) -> Optional[str]:
342 stripped = line.strip()
343 if not stripped or stripped.startswith("#"):
344 return None
345 # Remove trailing inline comment
346 stripped = stripped.split("#", 1)[0].strip()
347 if not stripped:
348 return None
349 if stripped.startswith("$"):
350 return None
351 return stripped
352
353
354def extract_conditional_files(cmake_path: Path, variable: str) -> Set[str]:
355 """Extract files that are added to the variable via conditional blocks (if/endif)."""
356 conditional_files: Set[str] = set()
357
358 try:
359 lines = cmake_path.read_text(encoding='utf-8').splitlines()
360 except Exception:
361 return conditional_files
362
363 in_conditional = False
364 conditional_depth = 0
365
366 for i, line in enumerate(lines):
367 stripped = line.strip()
368
369 # Track if/endif blocks
370 if stripped.startswith('if(') or stripped.startswith('if '):
371 if in_conditional:
372 conditional_depth += 1
373 else:
374 in_conditional = True
375 conditional_depth = 0
376 elif stripped.startswith('endif(') or stripped == 'endif()':
377 if conditional_depth > 0:
378 conditional_depth -= 1
379 else:
380 in_conditional = False
381
382 # Check if this line appends to our variable
383 if in_conditional and f'APPEND {variable}' in line:
384 # Handle single-line list(APPEND VAR file.cc)
385 if ')' in line:
386 # Extract file from same line
387 match = re.search(rf'APPEND\s+{re.escape(variable)}\s+(.+?)\‍)', line)
388 if match:
389 file_str = match.group(1).strip()
390 # Can be multiple files separated by space
391 for f in file_str.split():
392 f = f.strip()
393 if f and not f.startswith('$') and '/' in f and f.endswith('.cc'):
394 conditional_files.add(f)
395 else:
396 # Multi-line list(APPEND) - extract files from following lines
397 j = i + 1
398 while j < len(lines) and not lines[j].strip().startswith(')'):
399 entry = parse_entry(lines[j])
400 if entry:
401 conditional_files.add(entry)
402 j += 1
403
404 return conditional_files
405
406
407def gather_expected_sources(block: CMakeSourceBlock, gitignore_spec: Any = None) -> List[str]:
408 # First, find files that are in conditional blocks
409 conditional_files = extract_conditional_files(block.cmake_path, block.variable)
410
411 entries: Set[str] = set()
412 for directory in block.directories:
413 for source_file in directory.iter_files():
414 if should_ignore_path(source_file):
415 continue
416 if is_ignored(source_file, gitignore_spec):
417 continue
418
419 # Exclude paths are relative to SOURCE_ROOT, so check against that.
420 if relative_to_source(source_file) in block.exclude:
421 continue
422
423 # Generate paths relative to SOURCE_ROOT (src/) for consistency across the project
424 # This matches the format used in editor_library.cmake, etc.
425 rel_path = source_file.relative_to(SOURCE_ROOT)
426 rel_path_str = str(rel_path).replace("\\", "/")
427
428 # This check is imperfect if the conditional blocks have not been updated to use
429 # SOURCE_ROOT relative paths. However, for the current issue, this is sufficient.
430 if rel_path_str not in conditional_files:
431 entries.add(rel_path_str)
432
433 return sorted(entries)
434
435
436def should_ignore_path(path: Path) -> bool:
437 try:
438 with path.open("r", encoding="utf-8") as handle:
439 head = handle.read(256)
440 except (OSError, UnicodeDecodeError):
441 return False
442 return BUILD_CLEANER_IGNORE_TOKEN in head
443
444
445def extract_includes(file_path: Path) -> Set[str]:
446 """Extract all #include statements from a source file."""
447 includes = set()
448 try:
449 with file_path.open('r', encoding='utf-8') as f:
450 for line in f:
451 # Match #include "..." or #include <...>
452 match = re.match(r'^\s*#include\s+[<"]([^>"]+)[>"]', line)
453 if match:
454 includes.add(match.group(1))
455 except (OSError, UnicodeDecodeError):
456 pass
457 return includes
458
459
460def extract_symbols(file_path: Path) -> Set[str]:
461 """Extract potential symbols/identifiers that might need headers."""
462 symbols = set()
463 try:
464 with file_path.open('r', encoding='utf-8') as f:
465 content = f.read()
466
467 # Find namespace-qualified symbols (e.g., std::, absl::, ImGui::)
468 namespace_symbols = re.findall(r'\b([a-zA-Z_]\w*::)', content)
469 symbols.update(namespace_symbols)
470
471 # Find common function calls that might need headers
472 func_calls = re.findall(r'\b([A-Z][a-zA-Z0-9_]*)\s*\‍(', content)
473 symbols.update(func_calls)
474
475 except (OSError, UnicodeDecodeError):
476 pass
477 return symbols
478
479
480def find_missing_headers(source: Path) -> List[str]:
481 """Analyze a source file and suggest missing headers based on symbol usage."""
482 if should_ignore_path(source):
483 return []
484
485 current_includes = extract_includes(source)
486 symbols = extract_symbols(source)
487 missing = []
488
489 # Check for common headers based on symbol prefixes
490 for symbol_prefix, headers in COMMON_HEADERS.items():
491 if any(symbol_prefix in sym for sym in symbols):
492 for header in headers:
493 # Extract just the header name from angle brackets
494 header_name = header.strip('<>')
495 if header_name not in ' '.join(current_includes):
496 missing.append(header)
497
498 return missing
499
500
501def find_conditional_blocks_after(cmake_lines: List[str], end_idx: int, variable: str) -> List[str]:
502 """
503 Find conditional blocks (if/endif) that append to the variable after the main set() block.
504 Returns lines that should be preserved.
505 """
506 conditional_lines = []
507 idx = end_idx + 1
508
509 while idx < len(cmake_lines):
510 line = cmake_lines[idx]
511 stripped = line.strip()
512
513 # Stop at next major block or empty lines
514 if not stripped:
515 idx += 1
516 continue
517
518 # Check if this is a conditional that appends to our variable
519 if stripped.startswith('if(') or stripped.startswith('if ('):
520 # Look ahead to see if this block modifies our variable
521 block_start = idx
522 block_depth = 1
523 modifies_var = False
524
525 temp_idx = idx + 1
526 while temp_idx < len(cmake_lines) and block_depth > 0:
527 temp_line = cmake_lines[temp_idx].strip()
528 if temp_line.startswith('if(') or temp_line.startswith('if '):
529 block_depth += 1
530 elif temp_line.startswith('endif(') or temp_line == 'endif()':
531 block_depth -= 1
532
533 # Check if this block modifies our variable
534 if f'APPEND {variable}' in temp_line or f'APPEND\n {variable}' in cmake_lines[temp_idx]:
535 modifies_var = True
536
537 temp_idx += 1
538
539 if modifies_var:
540 # Include the entire conditional block
541 conditional_lines.extend(cmake_lines[block_start:temp_idx])
542 idx = temp_idx
543 continue
544 else:
545 # This conditional doesn't touch our variable, stop scanning
546 break
547 else:
548 # Hit something else, stop scanning
549 break
550
551 idx += 1
552
553 return conditional_lines
554
555
556def update_cmake_block(block: CMakeSourceBlock, dry_run: bool, gitignore_spec: Any = None) -> bool:
557 cmake_lines = (block.cmake_path.read_text(encoding="utf-8")).splitlines()
558 pattern = re.compile(rf"\s*set\‍(\s*{re.escape(block.variable)}\b")
559
560 start_idx: Optional[int] = None
561 for idx, line in enumerate(cmake_lines):
562 if pattern.match(line):
563 start_idx = idx
564 break
565
566 if start_idx is None:
567 for idx, line in enumerate(cmake_lines):
568 stripped = line.strip()
569 if not stripped.startswith("set("):
570 continue
571 remainder = stripped[4:].strip()
572 if remainder:
573 if remainder.startswith(block.variable):
574 start_idx = idx
575 break
576 continue
577 lookahead = idx + 1
578 while lookahead < len(cmake_lines):
579 next_line = cmake_lines[lookahead].strip()
580 if not next_line or next_line.startswith("#"):
581 lookahead += 1
582 continue
583 if next_line == block.variable:
584 start_idx = idx
585 break
586 if start_idx is not None:
587 break
588
589 if start_idx is None:
590 raise ValueError(f"Could not locate set({block.variable}) in {block.cmake_path}")
591
592 end_idx = parse_block(cmake_lines, start_idx)
593 block_slice = cmake_lines[start_idx + 1 : end_idx]
594
595 prelude: List[str] = []
596 postlude: List[str] = []
597 existing_entries: List[str] = []
598
599 first_entry_idx: Optional[int] = None
600
601 for idx, line in enumerate(block_slice):
602 entry = parse_entry(line)
603 if entry:
604 if entry == block.variable and not existing_entries:
605 prelude.append(line)
606 continue
607 existing_entries.append(entry)
608 if first_entry_idx is None:
609 first_entry_idx = idx
610 else:
611 if first_entry_idx is None:
612 prelude.append(line)
613 else:
614 postlude.append(line)
615
616 expected_entries = gather_expected_sources(block, gitignore_spec)
617 expected_set = set(expected_entries)
618
619 if set(existing_entries) == expected_set:
620 return False
621
622 indent = " "
623 if first_entry_idx is not None:
624 sample_line = block_slice[first_entry_idx]
625 indent = sample_line[: len(sample_line) - len(sample_line.lstrip())]
626
627 rebuilt_block = prelude + [f"{indent}{entry}" for entry in expected_entries] + postlude
628
629 if dry_run:
630 print(f"[DRY-RUN] Would update {block.cmake_path.relative_to(PROJECT_ROOT)} :: {block.variable}")
631 return True
632
633 cmake_lines[start_idx + 1 : end_idx] = rebuilt_block
634 block.cmake_path.write_text("\n".join(cmake_lines) + "\n", encoding="utf-8")
635 print(f"Updated {block.cmake_path.relative_to(PROJECT_ROOT)} :: {block.variable}")
636 missing = sorted(expected_set - set(existing_entries))
637 removed = sorted(set(existing_entries) - expected_set)
638 if missing:
639 print(f" Added: {', '.join(missing)}")
640 if removed:
641 print(f" Removed: {', '.join(removed)}")
642 return True
643
644
645def find_self_header(source: Path) -> Optional[Path]:
646 for ext in HEADER_EXTENSIONS:
647 candidate = source.with_suffix(ext)
648 if candidate.exists():
649 return candidate
650 return None
651
652
653def has_include(lines: Sequence[str], header_variants: Iterable[str]) -> bool:
654 """Check if any line includes one of the header variants (with any path or quote style)."""
655 # Extract just the header filenames for flexible matching
656 header_names = {Path(variant).name for variant in header_variants}
657
658 for line in lines:
659 stripped = line.strip()
660 if not stripped.startswith('#include'):
661 continue
662
663 # Extract the included filename from #include "..." or #include <...>
664 match = re.match(r'^\s*#include\s+[<"]([^>"]+)[>"]', stripped)
665 if match:
666 included_path = match.group(1)
667 included_name = Path(included_path).name
668
669 # If this include references any of our header variants, consider it present
670 if included_name in header_names:
671 return True
672
673 return False
674
675
676def find_insert_index(lines: List[str]) -> int:
677 include_block_start = None
678 for idx, line in enumerate(lines):
679 if line.startswith("#include"):
680 include_block_start = idx
681 break
682
683 if include_block_start is not None:
684 return include_block_start
685
686 # No includes yet; skip leading comments/blank lines
687 index = 0
688 in_block_comment = False
689 while index < len(lines):
690 stripped = lines[index].strip()
691 if not stripped:
692 index += 1
693 continue
694 if stripped.startswith("/*") and not stripped.endswith("*/"):
695 in_block_comment = True
696 index += 1
697 continue
698 if in_block_comment:
699 if "*/" in stripped:
700 in_block_comment = False
701 index += 1
702 continue
703 if stripped.startswith("//"):
704 index += 1
705 continue
706 break
707 return index
708
709
710def ensure_self_header_include(source: Path, dry_run: bool) -> bool:
711 """
712 Ensure a source file includes its corresponding header file.
713
714 Skips files that:
715 - Are explicitly ignored
716 - Have no corresponding header
717 - Already include their header (in any path format)
718 - Are test files or main entry points (typically don't include own header)
719 """
720 if should_ignore_path(source):
721 return False
722
723 # Skip test files and main entry points (they typically don't need self-includes)
724 source_name = source.name.lower()
725 if any(pattern in source_name for pattern in ['_test.cc', '_main.cc', '_benchmark.cc', 'main.cc']):
726 return False
727
728 header = find_self_header(source)
729 if header is None:
730 # No corresponding header found - this is OK, not all sources have headers
731 return False
732
733 try:
734 lines = source.read_text(encoding="utf-8").splitlines()
735 except UnicodeDecodeError:
736 return False
737
738 # Generate header path relative to SOURCE_ROOT (project convention)
739 try:
740 header_rel_path = header.relative_to(SOURCE_ROOT)
741 header_path_str = str(header_rel_path).replace("\\", "/")
742 except ValueError:
743 # Header is outside SOURCE_ROOT, just use filename
744 header_path_str = header.name
745
746 # Check if the header is already included (with any path format)
747 header_variants = {
748 header.name, # Just filename
749 header_path_str, # SOURCE_ROOT-relative
750 str(header.relative_to(source.parent)).replace("\\", "/") if source.parent != header.parent else header.name, # Source-relative
751 }
752
753 if has_include(lines, header_variants):
754 # Header is already included (possibly with different path)
755 return False
756
757 # Double-check: if this source file has very few lines or no code, skip it
758 # (might be a stub or template file)
759 code_lines = [l for l in lines if l.strip() and not l.strip().startswith('//') and not l.strip().startswith('/*')]
760 if len(code_lines) < 3:
761 return False
762
763 # Use SOURCE_ROOT-relative path (project convention)
764 include_line = f'#include "{header_path_str}"'
765
766 insert_idx = find_insert_index(lines)
767 lines.insert(insert_idx, include_line)
768
769 if dry_run:
770 rel = source.relative_to(PROJECT_ROOT)
771 print(f"[DRY-RUN] Would insert self-header include into {rel}")
772 return True
773
774 source.write_text("\n".join(lines) + "\n", encoding="utf-8")
775 print(f"Inserted self-header include into {source.relative_to(PROJECT_ROOT)}")
776 return True
777
778
779def add_missing_headers(source: Path, dry_run: bool, iwyu_mode: bool) -> bool:
780 """Add missing headers based on IWYU-style analysis."""
781 if not iwyu_mode or should_ignore_path(source):
782 return False
783
784 missing_headers = find_missing_headers(source)
785 if not missing_headers:
786 return False
787
788 try:
789 lines = source.read_text(encoding="utf-8").splitlines()
790 except UnicodeDecodeError:
791 return False
792
793 # Find where to insert the headers
794 insert_idx = find_insert_index(lines)
795
796 # Move past any existing includes to add new ones after them
797 while insert_idx < len(lines) and lines[insert_idx].strip().startswith('#include'):
798 insert_idx += 1
799
800 # Insert missing headers
801 for header in missing_headers:
802 lines.insert(insert_idx, f'#include {header}')
803 insert_idx += 1
804
805 if dry_run:
806 rel = source.relative_to(PROJECT_ROOT)
807 print(f"[DRY-RUN] Would add missing headers to {rel}: {', '.join(missing_headers)}")
808 return True
809
810 source.write_text("\n".join(lines) + "\n", encoding="utf-8")
811 print(f"Added missing headers to {source.relative_to(PROJECT_ROOT)}: {', '.join(missing_headers)}")
812 return True
813
814
815def collect_source_files(config: List[CMakeSourceBlock], gitignore_spec: Any = None) -> Set[Path]:
816 """Collect all source files from the given configuration, respecting .gitignore patterns."""
817 managed_dirs: Set[Path] = set()
818
819 for block in config:
820 for directory in block.directories:
821 managed_dirs.add(directory.path)
822
823 result: Set[Path] = set()
824 for directory in managed_dirs:
825 if not directory.exists():
826 continue
827 for file_path in directory.rglob("*"):
828 if file_path.is_file() and file_path.suffix in SUPPORTED_EXTENSIONS:
829 if not is_ignored(file_path, gitignore_spec):
830 result.add(file_path)
831 return result
832
833
834def get_config(auto_discover: bool = False) -> List[CMakeSourceBlock]:
835 """Get the full configuration, optionally including auto-discovered libraries."""
836 # Always start with static config (all known libraries)
837 config = list(STATIC_CONFIG)
838
839 # Optionally add auto-discovered libraries, avoiding duplicates
840 if auto_discover:
841 discovered = discover_cmake_libraries()
842 static_vars = {block.variable for block in STATIC_CONFIG}
843
844 for block in discovered:
845 if block.variable not in static_vars:
846 config.append(block)
847 print(f" Auto-discovered: {block.variable} in {block.cmake_path.name}")
848
849 return config
850
851
852def run(dry_run: bool, cmake_only: bool, includes_only: bool, iwyu_mode: bool, auto_discover: bool) -> int:
853 if cmake_only and includes_only:
854 raise ValueError("Cannot use --cmake-only and --includes-only together")
855
856 # Load .gitignore patterns
857 gitignore_spec = load_gitignore()
858 if gitignore_spec:
859 print("āœ“ Loaded .gitignore patterns")
860
861 changed = False
862
863 # Get configuration (all libraries by default, with optional auto-discovery)
864 config = get_config(auto_discover)
865
866 if auto_discover:
867 print(f"āœ“ Using {len(config)} library configurations (with auto-discovery)")
868 else:
869 print(f"āœ“ Using {len(config)} library configurations")
870
871 if not includes_only:
872 print("\nšŸ“‹ Updating CMake source lists...")
873 for block in config:
874 changed |= update_cmake_block(block, dry_run, gitignore_spec)
875
876 if not cmake_only:
877 print("\nšŸ“ Checking self-header includes...")
878 source_files = collect_source_files(config, gitignore_spec)
879 print(f" Scanning {len(source_files)} source files")
880
881 for source in source_files:
882 changed |= ensure_self_header_include(source, dry_run)
883
884 if iwyu_mode:
885 print("\nšŸ” Running IWYU-style header analysis...")
886 for source in source_files:
887 changed |= add_missing_headers(source, dry_run, iwyu_mode)
888
889 if dry_run and not changed:
890 print("\nāœ… No changes required (dry-run)")
891 elif not dry_run and not changed:
892 print("\nāœ… No changes required")
893 elif dry_run:
894 print("\nāœ… Dry-run complete - use without --dry-run to apply changes")
895 else:
896 print("\nāœ… All changes applied successfully")
897
898 return 0
899
900
901def main() -> int:
902 parser = argparse.ArgumentParser(
903 description="Maintain CMake source lists and ensure proper header includes (IWYU-style).",
904 formatter_class=argparse.RawDescriptionHelpFormatter,
905 epilog="""
906Examples:
907 # Dry-run to see what would change:
908 %(prog)s --dry-run
909
910 # Auto-discover libraries and update CMake files:
911 %(prog)s --auto-discover
912
913 # Run IWYU-style header analysis:
914 %(prog)s --iwyu
915
916 # Update only CMake source lists:
917 %(prog)s --cmake-only
918
919 # Update only header includes:
920 %(prog)s --includes-only
921 """
922 )
923 parser.add_argument("--dry-run", action="store_true",
924 help="Report prospective changes without editing files")
925 parser.add_argument("--cmake-only", action="store_true",
926 help="Only update CMake source lists")
927 parser.add_argument("--includes-only", action="store_true",
928 help="Only ensure self-header includes")
929 parser.add_argument("--iwyu", action="store_true",
930 help="Run IWYU-style analysis to add missing headers")
931 parser.add_argument("--auto-discover", action="store_true",
932 help="Auto-discover CMake library files (*.cmake, *_library.cmake)")
933 args = parser.parse_args()
934
935 try:
936 return run(args.dry_run, args.cmake_only, args.includes_only, args.iwyu, args.auto_discover)
937 except Exception as exc: # pylint: disable=broad-except
938 import traceback
939 print(f"āŒ build_cleaner failed: {exc}")
940 if args.dry_run: # Show traceback in dry-run mode for debugging
941 traceback.print_exc()
942 return 1
943
944
945if __name__ == "__main__":
946 raise SystemExit(main())
Iterable[Path] iter_files(self)
bool ensure_self_header_include(Path source, bool dry_run)
Set[str] extract_symbols(Path file_path)
List[str] gather_expected_sources(CMakeSourceBlock block, Any gitignore_spec=None)
Path relative_to_source(Path path)
bool has_include(Sequence[str] lines, Iterable[str] header_variants)
bool update_cmake_block(CMakeSourceBlock block, bool dry_run, Any gitignore_spec=None)
Optional[Path] find_self_header(Path source)
List[str] find_conditional_blocks_after(List[str] cmake_lines, int end_idx, str variable)
Optional[str] parse_entry(str line)
List[CMakeSourceBlock] get_config(bool auto_discover=False)
bool is_ignored(Path path, gitignore_spec)
Set[str] extract_includes(Path file_path)
int parse_block(List[str] lines, int start_idx)
bool add_missing_headers(Path source, bool dry_run, bool iwyu_mode)
bool should_ignore_path(Path path)
List[str] find_missing_headers(Path source)
List[CMakeSourceBlock] discover_cmake_libraries()
Set[str] extract_conditional_files(Path cmake_path, str variable)
int run(bool dry_run, bool cmake_only, bool includes_only, bool iwyu_mode, bool auto_discover)
Set[Path] collect_source_files(List[CMakeSourceBlock] config, Any gitignore_spec=None)
int find_insert_index(List[str] lines)