Skip to main content

icb_clang/
project.rs

1//! Project‑level traversal of C/C++ source trees.
2//!
3//! # Entry points
4//!
5//! * [`parse_project`] – processes every translation unit listed in a
6//!   [`compile_commands.json`](https://clang.llvm.org/docs/JSONCompilationDatabase.html)
7//!   compilation database.
8//! * [`parse_directory`] – recursively discovers C/C++ files under a root
9//!   directory and parses each one with uniform compiler flags.
10//!
11//! Both functions distribute work across available CPU cores via
12//! [`rayon::par_iter`] when `parallel` is `true`.
13//!
14//! # File filtering
15//!
16//! Only files whose extension matches one of `c`, `cpp`, `cc`, `cxx`, `h`,
17//! `hpp` are considered.  Symbolic links are *not* followed to avoid
18//! infinite loops on recursive directory structures.
19//!
20//! # Error handling
21//!
22//! The first file that fails to parse aborts the entire operation with an
23//! [`IcbError`].  Partial results are discarded.
24//!
25//! # Memory usage
26//!
27//! Each translation unit’s facts are collected independently and then
28//! returned as a flat vector.  Rayon’s work‑stealing scheduler ensures that
29//! at most `num_cpus` TUs are resident in memory at any given time.
30//!
31//! # Example
32//!
33//! ```rust,no_run
34//! use std::path::Path;
35//! let facts = icb_clang::project::parse_directory(
36//!     Path::new("src"),
37//!     &["-std=c++17".into()],
38//!     true,
39//!     None,
40//!     false,
41//! ).unwrap();
42//! ```
43
44use icb_common::IcbError;
45use icb_parser::facts::RawNode;
46use rayon::prelude::*;
47use serde::Deserialize;
48use std::fs;
49use std::path::{Path, PathBuf};
50use walkdir::WalkDir;
51
52use crate::parser::parse_cpp_file;
53
54/// A single entry in a Clang compilation database.
55///
56/// Deserialised from `compile_commands.json`; the schema follows the
57/// [Clang JSON Compilation Database Format
58/// Specification](https://clang.llvm.org/docs/JSONCompilationDatabase.html).
59#[derive(Debug, Deserialize)]
60struct CompileCommandEntry {
61    /// The main source file processed by this compilation step.
62    file: String,
63    /// The full compiler command line as a single string (optional).
64    #[serde(default)]
65    command: Option<String>,
66    /// The compiler command line split into an argument list (optional).
67    #[serde(default)]
68    arguments: Option<Vec<String>>,
69}
70
71/// Parse every source file listed in a compilation database.
72///
73/// Each entry is processed independently; results are collected in the order
74/// they complete.
75///
76/// # Arguments
77///
78/// * `compile_commands` – Path to `compile_commands.json`.
79/// * `base_dir` – Base directory for resolving relative file paths.
80/// * `parallel` – Distribute work across threads if `true`.
81/// * `allow_system` – Forwarded to [`parse_cpp_file`].
82///
83/// # Errors
84///
85/// Returns [`IcbError::Io`] if the database cannot be read, or
86/// [`IcbError::Parse`] for the first file that fails.
87pub fn parse_project(
88    compile_commands: &Path,
89    base_dir: &Path,
90    parallel: bool,
91    allow_system: bool,
92) -> Result<Vec<(String, Vec<RawNode>)>, IcbError> {
93    let data = fs::read_to_string(compile_commands).map_err(IcbError::Io)?;
94    let entries: Vec<CompileCommandEntry> =
95        serde_json::from_str(&data).map_err(|e| IcbError::Parse(e.to_string()))?;
96
97    let process = |entry: CompileCommandEntry| -> Result<(String, Vec<RawNode>), IcbError> {
98        let file_path = resolve_file_path(&entry.file, base_dir);
99        let source = fs::read_to_string(&file_path).map_err(|e| {
100            IcbError::Io(std::io::Error::new(
101                e.kind(),
102                format!("failed to read {}: {}", file_path.display(), e),
103            ))
104        })?;
105        let args = extract_args(&entry);
106        let facts = parse_cpp_file(
107            &source,
108            &args,
109            Some(file_path.to_str().unwrap()),
110            allow_system,
111        )?;
112        Ok((file_path.to_string_lossy().into_owned(), facts))
113    };
114
115    if parallel {
116        entries
117            .into_par_iter()
118            .map(process)
119            .collect::<Result<Vec<_>, _>>()
120    } else {
121        entries
122            .into_iter()
123            .map(process)
124            .collect::<Result<Vec<_>, _>>()
125    }
126}
127
128/// Recursively discover C/C++ files under `root` and parse each one.
129///
130/// Only files with an extension in `{c, cpp, cc, cxx, h, hpp}` are
131/// processed.  Symlinks are ignored to prevent infinite recursion.
132///
133/// # Arguments
134///
135/// * `root` – Root directory for the walk.
136/// * `args` – Clang command‑line arguments shared by all files.
137/// * `parallel` – Distribute work across threads if `true`.
138/// * `max_depth` – Maximum directory depth (`None` for unlimited).
139/// * `allow_system` – Forwarded to [`parse_cpp_file`].
140///
141/// # Errors
142///
143/// Returns [`IcbError::Io`] if the directory walk fails, or
144/// [`IcbError::Parse`] for the first file that fails.
145pub fn parse_directory(
146    root: &Path,
147    args: &[String],
148    parallel: bool,
149    max_depth: Option<usize>,
150    allow_system: bool,
151) -> Result<Vec<(String, Vec<RawNode>)>, IcbError> {
152    let mut files = Vec::new();
153    collect_cpp_files(root, &mut files, max_depth)?;
154
155    let process = |path: PathBuf| -> Result<(String, Vec<RawNode>), IcbError> {
156        let source = fs::read_to_string(&path).map_err(IcbError::Io)?;
157        let facts = parse_cpp_file(&source, args, Some(path.to_str().unwrap()), allow_system)?;
158        let rel = path.strip_prefix(root).unwrap_or(&path);
159        Ok((rel.display().to_string(), facts))
160    };
161
162    if parallel {
163        files
164            .into_par_iter()
165            .map(process)
166            .collect::<Result<Vec<_>, _>>()
167    } else {
168        files
169            .into_iter()
170            .map(process)
171            .collect::<Result<Vec<_>, _>>()
172    }
173}
174
175/// Resolve a file path relative to `base_dir`.
176///
177/// Absolute paths are returned unchanged.  Leading/trailing whitespace is
178/// trimmed from `file` before resolution.
179fn resolve_file_path(file: &str, base: &Path) -> PathBuf {
180    let path = Path::new(file.trim());
181    if path.is_absolute() {
182        path.to_path_buf()
183    } else {
184        base.join(path)
185    }
186}
187
188/// Extract compiler arguments from a compilation database entry.
189///
190/// Prefers the `arguments` field if present; otherwise splits `command` on
191/// whitespace.  Returns an empty vector if neither field is set.
192fn extract_args(entry: &CompileCommandEntry) -> Vec<String> {
193    if let Some(ref arguments) = entry.arguments {
194        return arguments.clone();
195    }
196    if let Some(ref command) = entry.command {
197        return command.split_whitespace().map(|s| s.to_string()).collect();
198    }
199    Vec::new()
200}
201
202/// Walk the directory tree and collect C/C++ source files.
203///
204/// Symlinks are not followed, and the optional `max_depth` limits recursion.
205/// Walk the directory tree and collect C/C++ source files.
206///
207/// Symlinks are not followed, and the optional `max_depth` limits recursion.
208/// File extensions are matched case‑insensitively.
209fn collect_cpp_files(
210    dir: &Path,
211    files: &mut Vec<PathBuf>,
212    max_depth: Option<usize>,
213) -> Result<(), IcbError> {
214    let cpp_extensions: &[&str] = &["c", "cpp", "cc", "cxx", "h", "hpp"];
215    for entry in WalkDir::new(dir).follow_links(false) {
216        let entry = entry.map_err(|e| IcbError::Parse(e.to_string()))?;
217        if let Some(max) = max_depth {
218            if entry.depth() > max {
219                continue;
220            }
221        }
222        if !entry.file_type().is_file() {
223            continue;
224        }
225        if let Some(ext) = entry.path().extension().and_then(|s| s.to_str()) {
226            if cpp_extensions.iter().any(|e| e.eq_ignore_ascii_case(ext)) {
227                files.push(entry.path().to_path_buf());
228            }
229        }
230    }
231    Ok(())
232}
233
234#[doc(hidden)]
235pub fn collect_cpp_files_for_preview(
236    dir: &Path,
237    files: &mut Vec<PathBuf>,
238    max_depth: Option<usize>,
239) -> Result<(), IcbError> {
240    collect_cpp_files(dir, files, max_depth)
241}