icb_server/incremental_cache.rs
1//! Incremental fact cache for the ICB server.
2//!
3//! # Purpose
4//!
5//! Avoid re‑parsing source files that have not changed since the previous
6//! analysis. The first run extracts facts from every file and saves them
7//! into a cache directory together with a SHA‑256 hash of the file content.
8//! On subsequent runs the hash is compared; if it matches, the facts are
9//! loaded directly from the cache, **skipping the parser entirely**.
10//!
11//! # Cache layout
12//!
13//! Given a cache directory (e.g. `.icb_cache`) and a file
14//! `modules/api/src/admin_controller.cpp`, two files are created:
15//!
16//! ```text
17//! .icb_cache/
18//! modules_api_src_admin_controller_cpp.facts.bincode
19//! modules_api_src_admin_controller_cpp.hash
20//! ```
21//!
22//! The sanitised name replaces every `/` and `\` with `_`.
23//!
24//! # Safety
25//!
26//! The hash is computed with SHA‑256, making accidental collisions
27//! practically impossible. The facts are serialised with `bincode`, which
28//! is fast and produces a compact binary representation.
29
30use anyhow::anyhow;
31use icb_parser::facts::RawNode;
32use sha2::{Digest, Sha256};
33use std::fs;
34use std::path::{Path, PathBuf};
35
36/// A boxed closure that takes a source string and returns parsed facts.
37pub type ParseFn = Box<dyn FnOnce(&str) -> anyhow::Result<Vec<RawNode>>>;
38
39/// Stores facts for a single file, along with the relative path.
40pub struct FileFacts {
41 pub relative_path: String,
42 pub facts: Vec<RawNode>,
43}
44
45/// Manages the cache directory and provides the core `process_file` method.
46pub struct IncrementalCache {
47 cache_dir: PathBuf,
48}
49
50impl IncrementalCache {
51 /// Create a new cache manager.
52 ///
53 /// `cache_dir` will be created if it does not exist.
54 pub fn new(cache_dir: &Path) -> anyhow::Result<Self> {
55 fs::create_dir_all(cache_dir)?;
56 Ok(Self {
57 cache_dir: cache_dir.to_path_buf(),
58 })
59 }
60
61 /// Process a single source file.
62 ///
63 /// * `file_path` – absolute path to the source file.
64 /// * `relative_path` – the path that will be used in the cache name
65 /// (usually the path relative to the project root).
66 /// * `parse_fn` – a boxed closure that parses the source and returns facts;
67 /// it is called **only** if the file has changed or is not cached.
68 ///
69 /// Returns [`FileFacts`] containing the extracted or cached facts.
70 pub fn process_file(
71 &self,
72 file_path: &Path,
73 relative_path: &str,
74 parse_fn: ParseFn,
75 ) -> anyhow::Result<FileFacts> {
76 let (facts_path, hash_path) = self.cache_paths(relative_path);
77
78 let source = fs::read_to_string(file_path)
79 .map_err(|e| anyhow!("cannot read {}: {}", file_path.display(), e))?;
80 let current_hash = hex::encode(Sha256::digest(source.as_bytes()));
81
82 if let Ok(saved_hash) = fs::read_to_string(&hash_path) {
83 if saved_hash.trim() == current_hash && facts_path.exists() {
84 let data = fs::read(&facts_path)?;
85 let facts: Vec<RawNode> = bincode::deserialize(&data)
86 .map_err(|e| anyhow!("cache deserialisation error: {}", e))?;
87 return Ok(FileFacts {
88 relative_path: relative_path.to_string(),
89 facts,
90 });
91 }
92 }
93
94 let facts = parse_fn(&source)?;
95
96 let data = bincode::serialize(&facts)
97 .map_err(|e| anyhow!("bincode serialisation error: {}", e))?;
98 fs::write(&facts_path, data)?;
99 fs::write(&hash_path, current_hash)?;
100
101 Ok(FileFacts {
102 relative_path: relative_path.to_string(),
103 facts,
104 })
105 }
106
107 fn cache_paths(&self, relative_path: &str) -> (PathBuf, PathBuf) {
108 let sanitised = relative_path.replace(['/', '\\'], "_").replace(':', "_");
109 let facts_path = self.cache_dir.join(format!("{}.facts.bincode", sanitised));
110 let hash_path = self.cache_dir.join(format!("{}.hash", sanitised));
111 (facts_path, hash_path)
112 }
113}