Skip to main content

icb_graph/
analysis.rs

1//! Analysis algorithms for the Code Property Graph.
2//!
3//! This module provides functions that operate on a [`CodePropertyGraph`] to
4//! detect call cycles, dead code, and compute function complexity based on
5//! AST subtree sizes.
6//!
7//! # Cycle detection
8//!
9//! [`detect_call_cycles`] builds a directed graph of function nodes
10//! connected by [`Edge::Call`] edges and runs Kosaraju's algorithm for
11//! strongly connected components (SCC).  Every SCC with more than one node
12//! is reported as a cycle. Self‑loops (single‑node SCCs with a call to
13//! themselves) are also reported.
14//!
15//! # Dead code detection
16//!
17//! [`detect_dead_code`] finds functions that are not reachable from a given
18//! set of entry points (e.g., `main`) via call edges.
19//!
20//! # Complexity analysis
21//!
22//! [`detect_complex_functions`] traverses the `AstChild` subtree of each
23//! function to count the number of AST nodes it contains.  This is a
24//! lightweight proxy for cyclomatic complexity.  The traversal uses a
25//! visited set and a per‑function node limit to prevent infinite loops on
26//! structurally cyclic graphs (e.g., when a class contains methods that
27//! reference the class itself).
28
29use crate::graph::{CodePropertyGraph, Edge, Node};
30use icb_common::NodeKind;
31use petgraph::algo::kosaraju_scc;
32use petgraph::graph::Graph;
33use petgraph::visit::{EdgeRef, IntoEdgeReferences};
34use std::collections::{HashMap, HashSet, VecDeque};
35
36/// A cycle of function calls.
37#[derive(Debug, Clone)]
38pub struct CallCycle {
39    /// Function names involved in the cycle, in arbitrary order.
40    pub functions: Vec<String>,
41    /// Number of distinct functions in the cycle.
42    pub length: usize,
43}
44
45/// Detects all call cycles involving one or more functions.
46///
47/// Builds a directed graph of function nodes connected by [`Edge::Call`] edges
48/// and runs Kosaraju's algorithm for strongly connected components (SCC).
49/// Every SCC with more than one node is reported as a cycle. Self‑loops
50/// (single‑node SCCs with a call to themselves) are also reported.
51///
52/// # Examples
53///
54/// ```rust
55/// use icb_graph::analysis::detect_call_cycles;
56/// use icb_graph::graph::{CodePropertyGraph, Node, Edge};
57/// use icb_common::NodeKind;
58///
59/// let mut cpg = CodePropertyGraph::new();
60/// let a = cpg.graph.add_node(Node {
61///     kind: NodeKind::Function,
62///     name: Some("a".into()),
63///     usr: Some("a".into()),
64///     start_line: 1, end_line: 1,
65/// });
66/// let b = cpg.graph.add_node(Node {
67///     kind: NodeKind::Function,
68///     name: Some("b".into()),
69///     usr: Some("b".into()),
70///     start_line: 2, end_line: 2,
71/// });
72/// cpg.graph.add_edge(a, b, Edge::Call);
73/// cpg.graph.add_edge(b, a, Edge::Call);
74///
75/// let cycles = detect_call_cycles(&cpg);
76/// assert_eq!(cycles.len(), 1);
77/// assert!(cycles[0].functions.contains(&"a".into()));
78/// ```
79pub fn detect_call_cycles(cpg: &CodePropertyGraph) -> Vec<CallCycle> {
80    let mut call_graph = Graph::<String, (), petgraph::Directed>::new();
81    let mut node_map: HashMap<petgraph::stable_graph::NodeIndex, petgraph::graph::NodeIndex> =
82        HashMap::new();
83
84    for node_idx in cpg.graph.node_indices() {
85        let node = &cpg.graph[node_idx];
86        if node.kind == NodeKind::Function || node.kind == NodeKind::Class {
87            let name = node.name.clone().unwrap_or_else(|| "?".into());
88            let gidx = call_graph.add_node(name);
89            node_map.insert(node_idx, gidx);
90        }
91    }
92
93    for edge_ref in cpg.graph.edge_references() {
94        if *edge_ref.weight() != Edge::Call {
95            continue;
96        }
97        let source = edge_ref.source();
98        let target = edge_ref.target();
99        if let (Some(&src_g), Some(&tgt_g)) = (node_map.get(&source), node_map.get(&target)) {
100            call_graph.add_edge(src_g, tgt_g, ());
101        }
102    }
103
104    let sccs = kosaraju_scc(&call_graph);
105    let mut cycles = Vec::new();
106
107    for scc in sccs {
108        if scc.len() > 1 {
109            let names: Vec<String> = scc.iter().map(|&i| call_graph[i].clone()).collect();
110            cycles.push(CallCycle {
111                length: names.len(),
112                functions: names,
113            });
114        } else if scc.len() == 1 {
115            let gidx = scc[0];
116            if call_graph.contains_edge(gidx, gidx) {
117                cycles.push(CallCycle {
118                    functions: vec![call_graph[gidx].clone()],
119                    length: 1,
120                });
121            }
122        }
123    }
124
125    cycles
126}
127
128/// A record produced by [`detect_complex_functions`].
129#[derive(Debug, Clone)]
130pub struct ComplexityRecord {
131    /// Name of the function.
132    pub function_name: String,
133    /// Number of AST nodes inside the function body (via `AstChild` edges).
134    pub ast_node_count: usize,
135    /// Starting line of the function in source.
136    pub start_line: usize,
137    /// Whether the count was truncated at the per‑function node limit.
138    pub truncated: bool,
139}
140
141/// Computes the AST‑node count for every function (and method) in the graph.
142///
143/// The traversal follows `AstChild` edges **once** from each function root,
144/// using a visited set to prevent infinite loops caused by structural cycles
145/// (e.g. a class containing a method that references the class). A hard
146/// limit of `MAX_NODES_PER_FUNCTION` (100 000) is enforced per function;
147/// nodes beyond the limit are not visited.
148///
149/// # Arguments
150///
151/// * `cpg` – The Code Property Graph.
152/// * `_threshold` – Unused; retained for API compatibility.
153///
154/// # Returns
155///
156/// A vector of [`ComplexityRecord`] for every `Function` node.
157pub fn detect_complex_functions(
158    cpg: &CodePropertyGraph,
159    _threshold: usize,
160) -> Vec<ComplexityRecord> {
161    let mut records = Vec::new();
162
163    for idx in cpg.graph.node_indices() {
164        let node = &cpg.graph[idx];
165        if node.kind != NodeKind::Function {
166            continue;
167        }
168
169        let mut visited = HashSet::new();
170        let mut queue = VecDeque::new();
171        queue.push_back(idx);
172        visited.insert(idx.index());
173
174        let mut count = 0;
175        const MAX_NODES_PER_FUNCTION: usize = 100_000;
176
177        while let Some(current) = queue.pop_front() {
178            count += 1;
179            if count >= MAX_NODES_PER_FUNCTION {
180                break;
181            }
182
183            for edge_ref in cpg.graph.edges(current) {
184                if *edge_ref.weight() == Edge::AstChild {
185                    let child = edge_ref.target();
186                    if visited.insert(child.index()) {
187                        queue.push_back(child);
188                    }
189                }
190            }
191        }
192
193        let name = node.name.clone().unwrap_or_default();
194        records.push(ComplexityRecord {
195            function_name: name,
196            ast_node_count: count,
197            start_line: node.start_line,
198            truncated: count >= MAX_NODES_PER_FUNCTION,
199        });
200    }
201
202    records
203}
204
205/// Finds functions that are not reachable from any of the specified entry
206/// functions via call edges.
207///
208/// # Arguments
209///
210/// * `cpg` - The Code Property Graph.
211/// * `entry_names` - Names of functions that serve as entry points (e.g. `["main"]`).
212///
213/// # Returns
214///
215/// A vector of references to unreachable function nodes. If no entry function
216/// is found, all functions are considered unreachable.
217pub fn detect_dead_code<'a>(cpg: &'a CodePropertyGraph, entry_names: &[String]) -> Vec<&'a Node> {
218    let mut name_to_idx: HashMap<String, Vec<petgraph::stable_graph::NodeIndex>> = HashMap::new();
219    for idx in cpg.graph.node_indices() {
220        let node = &cpg.graph[idx];
221        if node.kind == NodeKind::Function || node.kind == NodeKind::Class {
222            if let Some(name) = &node.name {
223                name_to_idx.entry(name.clone()).or_default().push(idx);
224            }
225        }
226    }
227
228    let entry_indices: Vec<petgraph::stable_graph::NodeIndex> = entry_names
229        .iter()
230        .filter_map(|name| name_to_idx.get(name).and_then(|v| v.first().copied()))
231        .collect();
232
233    if entry_indices.is_empty() {
234        return cpg
235            .graph
236            .node_weights()
237            .filter(|n| n.kind == NodeKind::Function)
238            .collect();
239    }
240
241    let mut reachable = HashSet::new();
242    let mut queue = VecDeque::from(entry_indices);
243    while let Some(current) = queue.pop_front() {
244        if !reachable.insert(current) {
245            continue;
246        }
247        for edge_ref in cpg.graph.edges(current) {
248            if *edge_ref.weight() == Edge::Call {
249                queue.push_back(edge_ref.target());
250            }
251        }
252    }
253
254    cpg.graph
255        .node_indices()
256        .filter_map(|idx| {
257            let node = &cpg.graph[idx];
258            if node.kind == NodeKind::Function && !reachable.contains(&idx) {
259                Some(node)
260            } else {
261                None
262            }
263        })
264        .collect()
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    use crate::graph::Node;
271    use icb_common::NodeKind;
272
273    fn build_test_cpg() -> CodePropertyGraph {
274        let mut cpg = CodePropertyGraph::new();
275        let a = cpg.graph.add_node(Node {
276            kind: NodeKind::Function,
277            name: Some("a".into()),
278            usr: Some("a".into()),
279            start_line: 1,
280            end_line: 1,
281        });
282        let b = cpg.graph.add_node(Node {
283            kind: NodeKind::Function,
284            name: Some("b".into()),
285            usr: Some("b".into()),
286            start_line: 2,
287            end_line: 2,
288        });
289        let c = cpg.graph.add_node(Node {
290            kind: NodeKind::Function,
291            name: Some("c".into()),
292            usr: Some("c".into()),
293            start_line: 3,
294            end_line: 3,
295        });
296        cpg.graph.add_edge(a, b, Edge::Call);
297        cpg.graph.add_edge(b, c, Edge::Call);
298        cpg.graph.add_edge(c, a, Edge::Call);
299
300        let d = cpg.graph.add_node(Node {
301            kind: NodeKind::Function,
302            name: Some("d".into()),
303            usr: Some("d".into()),
304            start_line: 4,
305            end_line: 4,
306        });
307        cpg.graph.add_edge(d, d, Edge::Call);
308
309        cpg.graph.add_node(Node {
310            kind: NodeKind::Function,
311            name: Some("e".into()),
312            usr: Some("e".into()),
313            start_line: 5,
314            end_line: 5,
315        });
316
317        let f = cpg.graph.add_node(Node {
318            kind: NodeKind::Function,
319            name: Some("f".into()),
320            usr: Some("f".into()),
321            start_line: 6,
322            end_line: 6,
323        });
324        let mut last = f;
325        for _ in 0..10 {
326            let child = cpg.graph.add_node(Node {
327                kind: NodeKind::Variable,
328                name: None,
329                usr: None,
330                start_line: 6,
331                end_line: 6,
332            });
333            cpg.graph.add_edge(last, child, Edge::AstChild);
334            last = child;
335        }
336
337        cpg
338    }
339
340    #[test]
341    fn test_detect_call_cycles() {
342        let cpg = build_test_cpg();
343        let cycles = detect_call_cycles(&cpg);
344        assert_eq!(cycles.len(), 2);
345        let three_cycle = cycles.iter().find(|c| c.length == 3).unwrap();
346        assert!(three_cycle.functions.contains(&"a".into()));
347        assert!(three_cycle.functions.contains(&"b".into()));
348        assert!(three_cycle.functions.contains(&"c".into()));
349        let self_loop = cycles.iter().find(|c| c.length == 1).unwrap();
350        assert_eq!(self_loop.functions, vec!["d"]);
351    }
352
353    #[test]
354    fn test_dead_code_from_entry() {
355        let cpg = build_test_cpg();
356        let dead = detect_dead_code(&cpg, &["a".to_string()]);
357        assert!(dead.iter().any(|n| n.name.as_deref() == Some("d")));
358        assert!(dead.iter().any(|n| n.name.as_deref() == Some("e")));
359        assert!(dead.iter().any(|n| n.name.as_deref() == Some("f")));
360        assert!(!dead.iter().any(|n| n.name.as_deref() == Some("a")));
361        assert!(!dead.iter().any(|n| n.name.as_deref() == Some("b")));
362        assert!(!dead.iter().any(|n| n.name.as_deref() == Some("c")));
363    }
364
365    #[test]
366    fn test_complex_functions_limit() {
367        let cpg = build_test_cpg();
368        let complex = detect_complex_functions(&cpg, 0);
369        let f_record = complex.iter().find(|r| r.function_name == "f").unwrap();
370        assert_eq!(f_record.ast_node_count, 11);
371        assert!(!f_record.truncated);
372    }
373}