Skip to main content

icb_server/
incremental_cache.rs

1//! Incremental fact cache for the ICB server.
2//!
3//! # Purpose
4//!
5//! Avoid re‑parsing source files that have not changed since the previous
6//! analysis.  The first run extracts facts from every file and saves them
7//! into a cache directory together with a SHA‑256 hash of the file content.
8//! On subsequent runs the hash is compared; if it matches, the facts are
9//! loaded directly from the cache, **skipping the parser entirely**.
10//!
11//! # Cache layout
12//!
13//! Given a cache directory (e.g. `.icb_cache`) and a file
14//! `modules/api/src/admin_controller.cpp`, two files are created:
15//!
16//! ```text
17//! .icb_cache/
18//!   modules_api_src_admin_controller_cpp.facts.bincode
19//!   modules_api_src_admin_controller_cpp.hash
20//! ```
21//!
22//! The sanitised name replaces every `/` and `\` with `_`.
23//!
24//! # Safety
25//!
26//! The hash is computed with SHA‑256, making accidental collisions
27//! practically impossible.  The facts are serialised with `bincode`, which
28//! is fast and produces a compact binary representation.
29
30use anyhow::anyhow;
31use icb_parser::facts::RawNode;
32use sha2::{Digest, Sha256};
33use std::fs;
34use std::path::{Path, PathBuf};
35
36/// A boxed closure that takes a source string and returns parsed facts.
37pub type ParseFn = Box<dyn FnOnce(&str) -> anyhow::Result<Vec<RawNode>>>;
38
39/// Stores facts for a single file, along with the relative path.
40pub struct FileFacts {
41    pub relative_path: String,
42    pub facts: Vec<RawNode>,
43}
44
45/// Manages the cache directory and provides the core `process_file` method.
46pub struct IncrementalCache {
47    cache_dir: PathBuf,
48}
49
50impl IncrementalCache {
51    /// Create a new cache manager.
52    ///
53    /// `cache_dir` will be created if it does not exist.
54    pub fn new(cache_dir: &Path) -> anyhow::Result<Self> {
55        fs::create_dir_all(cache_dir)?;
56        Ok(Self {
57            cache_dir: cache_dir.to_path_buf(),
58        })
59    }
60
61    /// Process a single source file.
62    ///
63    /// * `file_path` – absolute path to the source file.
64    /// * `relative_path` – the path that will be used in the cache name
65    ///   (usually the path relative to the project root).
66    /// * `parse_fn` – a boxed closure that parses the source and returns facts;
67    ///   it is called **only** if the file has changed or is not cached.
68    ///
69    /// Returns [`FileFacts`] containing the extracted or cached facts.
70    pub fn process_file(
71        &self,
72        file_path: &Path,
73        relative_path: &str,
74        parse_fn: ParseFn,
75    ) -> anyhow::Result<FileFacts> {
76        let (facts_path, hash_path) = self.cache_paths(relative_path);
77
78        let source = fs::read_to_string(file_path)
79            .map_err(|e| anyhow!("cannot read {}: {}", file_path.display(), e))?;
80        let current_hash = hex::encode(Sha256::digest(source.as_bytes()));
81
82        if let Ok(saved_hash) = fs::read_to_string(&hash_path) {
83            if saved_hash.trim() == current_hash && facts_path.exists() {
84                let data = fs::read(&facts_path)?;
85                let facts: Vec<RawNode> = bincode::deserialize(&data)
86                    .map_err(|e| anyhow!("cache deserialisation error: {}", e))?;
87                return Ok(FileFacts {
88                    relative_path: relative_path.to_string(),
89                    facts,
90                });
91            }
92        }
93
94        let facts = parse_fn(&source)?;
95
96        let data = bincode::serialize(&facts)
97            .map_err(|e| anyhow!("bincode serialisation error: {}", e))?;
98        fs::write(&facts_path, data)?;
99        fs::write(&hash_path, current_hash)?;
100
101        Ok(FileFacts {
102            relative_path: relative_path.to_string(),
103            facts,
104        })
105    }
106
107    fn cache_paths(&self, relative_path: &str) -> (PathBuf, PathBuf) {
108        let sanitised = relative_path.replace(['/', '\\'], "_").replace(':', "_");
109        let facts_path = self.cache_dir.join(format!("{}.facts.bincode", sanitised));
110        let hash_path = self.cache_dir.join(format!("{}.hash", sanitised));
111        (facts_path, hash_path)
112    }
113}