icb_clang/project.rs
1//! Project‑level traversal of C/C++ source trees.
2//!
3//! # Entry points
4//!
5//! * [`parse_project`] – processes every translation unit listed in a
6//! [`compile_commands.json`](https://clang.llvm.org/docs/JSONCompilationDatabase.html)
7//! compilation database.
8//! * [`parse_directory`] – recursively discovers C/C++ files under a root
9//! directory and parses each one with uniform compiler flags.
10//!
11//! Both functions distribute work across available CPU cores via
12//! [`rayon::par_iter`] when `parallel` is `true`.
13//!
14//! # File filtering
15//!
16//! Only files whose extension matches one of `c`, `cpp`, `cc`, `cxx`, `h`,
17//! `hpp` are considered. Symbolic links are *not* followed to avoid
18//! infinite loops on recursive directory structures.
19//!
20//! # Error handling
21//!
22//! The first file that fails to parse aborts the entire operation with an
23//! [`IcbError`]. Partial results are discarded.
24//!
25//! # Memory usage
26//!
27//! Each translation unit’s facts are collected independently and then
28//! returned as a flat vector. Rayon’s work‑stealing scheduler ensures that
29//! at most `num_cpus` TUs are resident in memory at any given time.
30//!
31//! # Example
32//!
33//! ```rust,no_run
34//! use std::path::Path;
35//! let facts = icb_clang::project::parse_directory(
36//! Path::new("src"),
37//! &["-std=c++17".into()],
38//! true,
39//! None,
40//! false,
41//! ).unwrap();
42//! ```
43
44use icb_common::IcbError;
45use icb_parser::facts::RawNode;
46use rayon::prelude::*;
47use serde::Deserialize;
48use std::fs;
49use std::path::{Path, PathBuf};
50use walkdir::WalkDir;
51
52use crate::parser::parse_cpp_file;
53
54/// A single entry in a Clang compilation database.
55///
56/// Deserialised from `compile_commands.json`; the schema follows the
57/// [Clang JSON Compilation Database Format
58/// Specification](https://clang.llvm.org/docs/JSONCompilationDatabase.html).
59#[derive(Debug, Deserialize)]
60struct CompileCommandEntry {
61 /// The main source file processed by this compilation step.
62 file: String,
63 /// The full compiler command line as a single string (optional).
64 #[serde(default)]
65 command: Option<String>,
66 /// The compiler command line split into an argument list (optional).
67 #[serde(default)]
68 arguments: Option<Vec<String>>,
69}
70
71/// Parse every source file listed in a compilation database.
72///
73/// Each entry is processed independently; results are collected in the order
74/// they complete.
75///
76/// # Arguments
77///
78/// * `compile_commands` – Path to `compile_commands.json`.
79/// * `base_dir` – Base directory for resolving relative file paths.
80/// * `parallel` – Distribute work across threads if `true`.
81/// * `allow_system` – Forwarded to [`parse_cpp_file`].
82///
83/// # Errors
84///
85/// Returns [`IcbError::Io`] if the database cannot be read, or
86/// [`IcbError::Parse`] for the first file that fails.
87pub fn parse_project(
88 compile_commands: &Path,
89 base_dir: &Path,
90 parallel: bool,
91 allow_system: bool,
92) -> Result<Vec<(String, Vec<RawNode>)>, IcbError> {
93 let data = fs::read_to_string(compile_commands).map_err(IcbError::Io)?;
94 let entries: Vec<CompileCommandEntry> =
95 serde_json::from_str(&data).map_err(|e| IcbError::Parse(e.to_string()))?;
96
97 let process = |entry: CompileCommandEntry| -> Result<(String, Vec<RawNode>), IcbError> {
98 let file_path = resolve_file_path(&entry.file, base_dir);
99 let source = fs::read_to_string(&file_path).map_err(|e| {
100 IcbError::Io(std::io::Error::new(
101 e.kind(),
102 format!("failed to read {}: {}", file_path.display(), e),
103 ))
104 })?;
105 let args = extract_args(&entry);
106 let facts = parse_cpp_file(
107 &source,
108 &args,
109 Some(file_path.to_str().unwrap()),
110 allow_system,
111 )?;
112 Ok((file_path.to_string_lossy().into_owned(), facts))
113 };
114
115 if parallel {
116 entries
117 .into_par_iter()
118 .map(process)
119 .collect::<Result<Vec<_>, _>>()
120 } else {
121 entries
122 .into_iter()
123 .map(process)
124 .collect::<Result<Vec<_>, _>>()
125 }
126}
127
128/// Recursively discover C/C++ files under `root` and parse each one.
129///
130/// Only files with an extension in `{c, cpp, cc, cxx, h, hpp}` are
131/// processed. Symlinks are ignored to prevent infinite recursion.
132///
133/// # Arguments
134///
135/// * `root` – Root directory for the walk.
136/// * `args` – Clang command‑line arguments shared by all files.
137/// * `parallel` – Distribute work across threads if `true`.
138/// * `max_depth` – Maximum directory depth (`None` for unlimited).
139/// * `allow_system` – Forwarded to [`parse_cpp_file`].
140///
141/// # Errors
142///
143/// Returns [`IcbError::Io`] if the directory walk fails, or
144/// [`IcbError::Parse`] for the first file that fails.
145pub fn parse_directory(
146 root: &Path,
147 args: &[String],
148 parallel: bool,
149 max_depth: Option<usize>,
150 allow_system: bool,
151) -> Result<Vec<(String, Vec<RawNode>)>, IcbError> {
152 let mut files = Vec::new();
153 collect_cpp_files(root, &mut files, max_depth)?;
154
155 let process = |path: PathBuf| -> Result<(String, Vec<RawNode>), IcbError> {
156 let source = fs::read_to_string(&path).map_err(IcbError::Io)?;
157 let facts = parse_cpp_file(&source, args, Some(path.to_str().unwrap()), allow_system)?;
158 let rel = path.strip_prefix(root).unwrap_or(&path);
159 Ok((rel.display().to_string(), facts))
160 };
161
162 if parallel {
163 files
164 .into_par_iter()
165 .map(process)
166 .collect::<Result<Vec<_>, _>>()
167 } else {
168 files
169 .into_iter()
170 .map(process)
171 .collect::<Result<Vec<_>, _>>()
172 }
173}
174
175/// Resolve a file path relative to `base_dir`.
176///
177/// Absolute paths are returned unchanged. Leading/trailing whitespace is
178/// trimmed from `file` before resolution.
179fn resolve_file_path(file: &str, base: &Path) -> PathBuf {
180 let path = Path::new(file.trim());
181 if path.is_absolute() {
182 path.to_path_buf()
183 } else {
184 base.join(path)
185 }
186}
187
188/// Extract compiler arguments from a compilation database entry.
189///
190/// Prefers the `arguments` field if present; otherwise splits `command` on
191/// whitespace. Returns an empty vector if neither field is set.
192fn extract_args(entry: &CompileCommandEntry) -> Vec<String> {
193 if let Some(ref arguments) = entry.arguments {
194 return arguments.clone();
195 }
196 if let Some(ref command) = entry.command {
197 return command.split_whitespace().map(|s| s.to_string()).collect();
198 }
199 Vec::new()
200}
201
202/// Walk the directory tree and collect C/C++ source files.
203///
204/// Symlinks are not followed, and the optional `max_depth` limits recursion.
205/// Walk the directory tree and collect C/C++ source files.
206///
207/// Symlinks are not followed, and the optional `max_depth` limits recursion.
208/// File extensions are matched case‑insensitively.
209fn collect_cpp_files(
210 dir: &Path,
211 files: &mut Vec<PathBuf>,
212 max_depth: Option<usize>,
213) -> Result<(), IcbError> {
214 let cpp_extensions: &[&str] = &["c", "cpp", "cc", "cxx", "h", "hpp"];
215 for entry in WalkDir::new(dir).follow_links(false) {
216 let entry = entry.map_err(|e| IcbError::Parse(e.to_string()))?;
217 if let Some(max) = max_depth {
218 if entry.depth() > max {
219 continue;
220 }
221 }
222 if !entry.file_type().is_file() {
223 continue;
224 }
225 if let Some(ext) = entry.path().extension().and_then(|s| s.to_str()) {
226 if cpp_extensions.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
227 files.push(entry.path().to_path_buf());
228 }
229 }
230 }
231 Ok(())
232}
233
234#[doc(hidden)]
235pub fn collect_cpp_files_for_preview(
236 dir: &Path,
237 files: &mut Vec<PathBuf>,
238 max_depth: Option<usize>,
239) -> Result<(), IcbError> {
240 collect_cpp_files(dir, files, max_depth)
241}