Skip to main content

icb_parser/lang/
python.rs

1//! Python language parser using tree-sitter-python.
2//!
3//! Extracts function definitions (including `async def`), class definitions,
4//! call expressions, lambdas as anonymous functions, and optionally
5//! identifiers as variables.
6
7use crate::facts::RawNode;
8use icb_common::{IcbError, Language, NodeKind};
9use tree_sitter::Parser;
10
11use super::common::traverse_node;
12
13/// Parse Python source code and return the extracted facts.
14///
15/// By default variables (identifiers) are **not** included to keep the
16/// graph small.  Use [`parse_python_detailed`] if you need them.
17pub fn parse_python(source: &str) -> Result<Vec<RawNode>, IcbError> {
18    parse_python_impl(source, false)
19}
20
21/// Parse Python source code and return facts **including** variable nodes.
22pub fn parse_python_detailed(source: &str) -> Result<Vec<RawNode>, IcbError> {
23    parse_python_impl(source, true)
24}
25
26fn parse_python_impl(source: &str, include_variables: bool) -> Result<Vec<RawNode>, IcbError> {
27    let mut parser = Parser::new();
28    parser
29        .set_language(&tree_sitter_python::language())
30        .map_err(|e| IcbError::Parse(format!("cannot set tree-sitter-python language: {e}")))?;
31
32    let tree = parser.parse(source, None).ok_or_else(|| {
33        IcbError::Parse("tree-sitter parse returned None for Python source".into())
34    })?;
35
36    let mut facts = Vec::new();
37
38    let classifier =
39        move |node: &tree_sitter::Node, source: &str| -> Option<(NodeKind, Option<String>, bool)> {
40            match node.kind() {
41                "function_definition" | "async_function_definition" => {
42                    let name = node
43                        .child_by_field_name("name")
44                        .and_then(|n| n.utf8_text(source.as_bytes()).ok())
45                        .map(|s| s.to_string());
46                    Some((NodeKind::Function, name, true))
47                }
48                "class_definition" => {
49                    let name = node
50                        .child_by_field_name("name")
51                        .and_then(|n| n.utf8_text(source.as_bytes()).ok())
52                        .map(|s| s.to_string());
53                    Some((NodeKind::Class, name, true))
54                }
55                "call" => {
56                    let name = node
57                        .child_by_field_name("function")
58                        .and_then(|n| n.utf8_text(source.as_bytes()).ok())
59                        .map(|s| s.to_string());
60                    Some((NodeKind::CallSite, name, false))
61                }
62                "lambda" => Some((NodeKind::Function, Some("lambda".into()), true)),
63                "identifier" if include_variables => {
64                    let name = node
65                        .utf8_text(source.as_bytes())
66                        .ok()
67                        .map(|s| s.to_string());
68                    Some((NodeKind::Variable, name, false))
69                }
70                _ => None,
71            }
72        };
73
74    traverse_node(
75        tree.root_node(),
76        source,
77        &mut facts,
78        None,
79        Language::Python,
80        &classifier,
81    );
82    Ok(facts)
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88    use icb_common::NodeKind;
89
90    #[test]
91    fn test_parse_simple_function() {
92        let source = "def hello(): pass";
93        let facts = parse_python(source).expect("parsing should succeed");
94        let functions: Vec<_> = facts
95            .iter()
96            .filter(|n| n.kind == NodeKind::Function)
97            .collect();
98        assert_eq!(functions.len(), 1);
99        assert_eq!(functions[0].name.as_deref(), Some("hello"));
100    }
101
102    #[test]
103    fn test_parse_nested_function() {
104        let source = "def outer():\n    def inner(): pass";
105        let facts = parse_python(source).expect("parsing should succeed");
106        let outer = facts.iter().find(|n| n.name.as_deref() == Some("outer"));
107        let inner = facts.iter().find(|n| n.name.as_deref() == Some("inner"));
108        assert!(outer.is_some(), "outer function not found");
109        assert!(inner.is_some(), "inner function not found");
110    }
111
112    #[test]
113    fn test_call_site_has_name() {
114        let source = "foo()";
115        let facts = parse_python(source).expect("parsing should succeed");
116        let calls: Vec<_> = facts
117            .iter()
118            .filter(|n| n.kind == NodeKind::CallSite)
119            .collect();
120        assert_eq!(calls.len(), 1);
121        assert_eq!(calls[0].name.as_deref(), Some("foo"));
122    }
123
124    #[test]
125    fn test_async_function() {
126        let source = "async def bar(): pass";
127        let facts = parse_python(source).unwrap();
128        let funcs: Vec<_> = facts
129            .iter()
130            .filter(|n| n.kind == NodeKind::Function)
131            .collect();
132        assert_eq!(funcs.len(), 1);
133        assert_eq!(funcs[0].name.as_deref(), Some("bar"));
134    }
135
136    #[test]
137    fn test_lambda() {
138        let source = "lambda x: x";
139        let facts = parse_python(source).unwrap();
140        // Лямбда создаёт несколько фактов (сама лямбда + её тело)
141        let lambdas: Vec<_> = facts
142            .iter()
143            .filter(|n| n.name.as_deref() == Some("lambda"))
144            .collect();
145        assert!(!lambdas.is_empty(), "expected at least one lambda");
146    }
147
148    #[test]
149    fn test_include_variables() {
150        let source = "x = 1";
151        let facts = parse_python_detailed(source).unwrap();
152        let vars: Vec<_> = facts
153            .iter()
154            .filter(|n| n.kind == NodeKind::Variable)
155            .collect();
156        assert!(!vars.is_empty());
157        let facts_no_vars = parse_python(source).unwrap();
158        assert!(!facts_no_vars.iter().any(|n| n.kind == NodeKind::Variable));
159    }
160}