Skip to main content

icb_graph/
builder.rs

1use crate::graph::{CodePropertyGraph, Edge, Node};
2use icb_common::NodeKind;
3use icb_parser::facts::RawNode;
4use petgraph::stable_graph::NodeIndex;
5use petgraph::visit::{EdgeRef, IntoEdgeReferences};
6use std::collections::HashMap;
7
8/// Incrementally builds a [`CodePropertyGraph`] from parser facts.
9///
10/// Supports ingesting facts from multiple files and merging local graphs
11/// (e.g., from parallel parsing) into a single global graph with symbol
12/// deduplication. Call [`Self::resolve_calls`] after all facts are ingested
13/// to wire up call sites to their definitions.
14///
15/// # Example
16///
17/// ```rust
18/// use icb_graph::builder::GraphBuilder;
19/// use icb_parser::facts::RawNode;
20/// use icb_common::{Language, NodeKind};
21///
22/// let mut builder = GraphBuilder::new();
23/// let facts = vec![RawNode {
24///     language: Language::Python,
25///     kind: NodeKind::Function,
26///     name: Some("main".into()),
27///     usr: None,
28///     start_line: 1,
29///     start_col: 0,
30///     end_line: 1,
31///     end_col: 5,
32///     children: vec![],
33///     source_file: None,
34/// }];
35/// builder.ingest_file_facts(&facts);
36/// builder.resolve_calls();
37/// assert_eq!(builder.cpg.node_count(), 1);
38/// ```
39#[derive(Default)]
40pub struct GraphBuilder {
41    pub cpg: CodePropertyGraph,
42    symbol_index: HashMap<String, NodeIndex>,
43    function_defs: HashMap<String, Vec<NodeIndex>>,
44    call_sites: HashMap<String, Vec<NodeIndex>>,
45}
46
47impl GraphBuilder {
48    pub fn new() -> Self {
49        Self::default()
50    }
51
52    /// Ingest facts from a single file.
53    ///
54    /// This method may be called multiple times (even from different threads
55    /// after merging). Each call enriches the same graph.
56    pub fn ingest_file_facts(&mut self, facts: &[RawNode]) {
57        let mut map: HashMap<usize, NodeIndex> = HashMap::new();
58        for (i, raw) in facts.iter().enumerate() {
59            let usr: String = raw
60                .usr
61                .clone()
62                .or_else(|| raw.name.clone())
63                .unwrap_or_default();
64            let node_idx = if let Some(&existing) = self.symbol_index.get(&usr) {
65                existing
66            } else {
67                let idx = self.cpg.graph.add_node(Node {
68                    kind: raw.kind,
69                    name: raw.name.clone(),
70                    usr: Some(usr.clone()),
71                    start_line: raw.start_line,
72                    end_line: raw.end_line,
73                });
74                self.symbol_index.insert(usr, idx);
75                idx
76            };
77            map.insert(i, node_idx);
78
79            if let Some(name) = &raw.name {
80                match raw.kind {
81                    NodeKind::Function | NodeKind::Class => {
82                        self.function_defs
83                            .entry(name.clone())
84                            .or_default()
85                            .push(node_idx);
86                    }
87                    NodeKind::CallSite => {
88                        self.call_sites
89                            .entry(name.clone())
90                            .or_default()
91                            .push(node_idx);
92                    }
93                    _ => {}
94                }
95            }
96        }
97
98        for (i, raw) in facts.iter().enumerate() {
99            let from_idx = map[&i];
100            for &child_raw_idx in &raw.children {
101                if let Some(&to_idx) = map.get(&child_raw_idx) {
102                    self.cpg.graph.add_edge(from_idx, to_idx, Edge::AstChild);
103                }
104            }
105        }
106    }
107
108    /// Resolve calls: for every call site with a matching function/class
109    /// definition, add a [`Edge::Call`] edge from the call site to the definition.
110    pub fn resolve_calls(&mut self) {
111        for (name, call_indices) in &self.call_sites {
112            if let Some(def_indices) = self.function_defs.get(name) {
113                for &call_idx in call_indices {
114                    for &def_idx in def_indices {
115                        self.cpg.graph.add_edge(call_idx, def_idx, Edge::Call);
116                    }
117                }
118            }
119        }
120    }
121
122    /// Merge another `GraphBuilder` into this one.
123    ///
124    /// All nodes from `other` are transferred to `self`. Nodes with the
125    /// same symbolic key (USR or name) that already exist in `self` are
126    /// reused, and edges are rewired accordingly. The temporary call/def
127    /// maps are also merged.
128    pub fn merge(&mut self, other: GraphBuilder) {
129        let mut node_map: HashMap<NodeIndex, NodeIndex> = HashMap::new();
130        for old_idx in other.cpg.graph.node_indices() {
131            let node = &other.cpg.graph[old_idx];
132            let usr = node.usr.clone().unwrap_or_default();
133            let new_idx = if let Some(&existing) = self.symbol_index.get(&usr) {
134                existing
135            } else {
136                let idx = self.cpg.graph.add_node(node.clone());
137                self.symbol_index.insert(usr, idx);
138                idx
139            };
140            node_map.insert(old_idx, new_idx);
141        }
142        for edge_ref in other.cpg.graph.edge_references() {
143            let src = edge_ref.source();
144            let tgt = edge_ref.target();
145            if let (Some(&new_src), Some(&new_tgt)) = (node_map.get(&src), node_map.get(&tgt)) {
146                self.cpg
147                    .graph
148                    .add_edge(new_src, new_tgt, edge_ref.weight().clone());
149            }
150        }
151        for (name, defs) in other.function_defs {
152            let entry = self.function_defs.entry(name).or_default();
153            for idx in defs {
154                if let Some(&new_idx) = node_map.get(&idx) {
155                    entry.push(new_idx);
156                }
157            }
158        }
159        for (name, calls) in other.call_sites {
160            let entry = self.call_sites.entry(name).or_default();
161            for idx in calls {
162                if let Some(&new_idx) = node_map.get(&idx) {
163                    entry.push(new_idx);
164                }
165            }
166        }
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use icb_common::{Language, NodeKind};
174
175    fn make_func_node(name: &str, line: usize) -> RawNode {
176        RawNode {
177            language: Language::Python,
178            kind: NodeKind::Function,
179            name: Some(name.into()),
180            usr: None,
181            start_line: line,
182            start_col: 0,
183            end_line: line,
184            end_col: 10,
185            children: vec![],
186            source_file: None,
187        }
188    }
189
190    fn make_call_node(name: &str, line: usize) -> RawNode {
191        RawNode {
192            language: Language::Python,
193            kind: NodeKind::CallSite,
194            name: Some(name.into()),
195            usr: None,
196            start_line: line,
197            start_col: 0,
198            end_line: line,
199            end_col: 5,
200            children: vec![],
201            source_file: None,
202        }
203    }
204
205    #[test]
206    fn test_deduplication_by_name() {
207        let mut builder = GraphBuilder::new();
208        let facts = vec![make_func_node("foo", 1), make_func_node("foo", 2)];
209        builder.ingest_file_facts(&facts);
210        assert_eq!(builder.cpg.node_count(), 1);
211    }
212
213    #[test]
214    fn test_multiple_different_nodes() {
215        let mut builder = GraphBuilder::new();
216        let facts = vec![make_func_node("foo", 1), make_func_node("bar", 2)];
217        builder.ingest_file_facts(&facts);
218        assert_eq!(builder.cpg.node_count(), 2);
219    }
220
221    #[test]
222    fn test_merge_two_builders() {
223        let mut b1 = GraphBuilder::new();
224        b1.ingest_file_facts(&[make_func_node("foo", 1)]);
225        let mut b2 = GraphBuilder::new();
226        b2.ingest_file_facts(&[make_func_node("bar", 2), make_func_node("foo", 3)]);
227        b1.merge(b2);
228        assert_eq!(b1.cpg.node_count(), 2);
229    }
230
231    #[test]
232    fn test_resolve_calls_creates_edges() {
233        let mut builder = GraphBuilder::new();
234        let facts = vec![make_func_node("foo", 1), make_call_node("foo", 2)];
235        builder.ingest_file_facts(&facts);
236        builder.resolve_calls();
237        let call_edges: Vec<_> = builder
238            .cpg
239            .graph
240            .edge_references()
241            .filter(|e| *e.weight() == Edge::Call)
242            .collect();
243        assert!(!call_edges.is_empty());
244    }
245}