1use crate::facts::RawNode;
44use icb_common::{IcbError, Language};
45use std::path::{Path, PathBuf};
46use walkdir::WalkDir;
47
48#[derive(Default)]
50pub struct ParserManager;
51
52impl ParserManager {
53 pub fn new() -> Self {
55 Self
56 }
57
58 pub fn parse_file(&self, lang: Language, source: &str) -> Result<Vec<RawNode>, IcbError> {
72 match lang {
73 Language::Python => crate::lang::python::parse_python(source),
74 Language::CppTreeSitter => crate::cpp_tree_sitter::parse_cpp_file(source),
75 Language::Go => crate::lang::go::parse_go(source),
76 Language::Ruby => crate::lang::ruby::parse_ruby(source),
77 Language::Rust => crate::lang::rust::parse_rust(source),
78
79 Language::JavaScript | Language::Unknown => {
80 Ok(crate::heuristic_parser::parse_universal(source, ""))
81 }
82
83 Language::Cpp => Ok(crate::heuristic_parser::parse_universal(source, "")),
84
85 _ => Ok(crate::heuristic_parser::parse_universal(source, "")),
86 }
87 }
88
89 pub fn parse_directory(
106 &self,
107 lang: Language,
108 root: &Path,
109 ) -> Result<Vec<(String, Vec<RawNode>)>, IcbError> {
110 let files = discover_files(root, lang)?;
111 let base = normalize_root(root);
112 let mut results = Vec::with_capacity(files.len());
113
114 for path in files {
115 match process_file(self, lang, &path, &base) {
116 Ok(Some(entry)) => results.push(entry),
117 Ok(None) | Err(_) => continue,
118 }
119 }
120
121 Ok(results)
122 }
123}
124
125fn discover_files(root: &Path, lang: Language) -> Result<Vec<PathBuf>, IcbError> {
130 let extensions = extensions_for_language(lang);
131 let mut out = Vec::new();
132
133 for entry in WalkDir::new(root).follow_links(false) {
134 let entry = match entry {
135 Ok(e) => e,
136 Err(e) => return Err(IcbError::Parse(e.to_string())),
137 };
138 if !entry.file_type().is_file() {
139 continue;
140 }
141 let path = entry.path();
142 if should_include(path, &extensions) {
143 out.push(path.to_path_buf());
144 }
145 }
146
147 Ok(out)
148}
149
150fn process_file(
152 manager: &ParserManager,
153 lang: Language,
154 path: &Path,
155 base: &Path,
156) -> Result<Option<(String, Vec<RawNode>)>, IcbError> {
157 let source = match std::fs::read_to_string(path) {
158 Ok(s) => s,
159 Err(_) => return Ok(None),
160 };
161 let facts = match manager.parse_file(lang, &source) {
162 Ok(f) => f,
163 Err(_) => return Ok(None),
164 };
165 if facts.is_empty() {
166 return Ok(None);
167 }
168 let rel = relative_path(path, base);
169 Ok(Some((rel, facts)))
170}
171
172fn normalize_root(root: &Path) -> PathBuf {
174 root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
175}
176
177fn relative_path(path: &Path, base: &Path) -> String {
179 path.strip_prefix(base)
180 .unwrap_or(path)
181 .to_string_lossy()
182 .to_string()
183}
184
185fn should_include(path: &Path, exts: &[&str]) -> bool {
189 if exts.is_empty() {
190 return true;
191 }
192 match path.extension().and_then(|s| s.to_str()) {
193 Some(ext) => {
194 let ext = ext.to_lowercase();
195 exts.iter().any(|e| *e == ext)
196 }
197 None => false,
198 }
199}
200
201fn extensions_for_language(lang: Language) -> Vec<&'static str> {
203 match lang {
204 Language::Python => vec!["py"],
205 Language::Cpp | Language::CppTreeSitter => vec![
206 "c", "cpp", "cc", "cxx", "h", "hpp", "hxx", "hh", "inl", "inc",
207 ],
208 Language::Rust => vec!["rs"],
209 Language::JavaScript => vec!["js", "jsx", "ts", "tsx"],
210 Language::Go => vec!["go"],
211 Language::Java => vec!["java"],
212 Language::Ruby => vec!["rb"],
213 Language::Php => vec!["php"],
214 Language::Swift => vec!["swift"],
215 Language::Kotlin => vec!["kt", "kts"],
216 Language::Scala => vec!["scala"],
217 Language::CSharp => vec!["cs"],
218 Language::Lua => vec!["lua"],
219 Language::R => vec!["r"],
220 Language::Bash => vec!["sh", "bash"],
221 Language::Perl => vec!["pl", "pm"],
222 Language::Tcl => vec!["tcl"],
223 Language::Dart => vec!["dart"],
224 Language::Unknown => vec![],
225 }
226}