1use anyhow::anyhow;
51use icb_common::Language;
52use icb_graph::{cache, graph::CodePropertyGraph};
53use icb_parser::facts::RawNode;
54
55use std::collections::{HashMap, HashSet};
56use std::fs;
57use std::path::{Path, PathBuf};
58use std::sync::Arc;
59
60use walkdir::WalkDir;
61
62use crate::display_name;
63use crate::incremental_cache::IncrementalCache;
64
65#[derive(Debug, Clone)]
66struct PipelineConfig {
67 pub languages: HashSet<Language>,
68 pub strict_extensions: bool,
69 pub strip_comments: bool,
70 pub no_system_headers: bool,
71 pub inc_cache_dir: Option<PathBuf>,
72}
73
74impl Default for PipelineConfig {
75 fn default() -> Self {
76 Self {
77 languages: HashSet::new(),
78 strict_extensions: true,
79 strip_comments: true,
80 no_system_headers: true,
81 inc_cache_dir: None,
82 }
83 }
84}
85
86pub fn build_or_load_graph(
88 project: &Path,
89 language: &str,
90 graph_cache_path: Option<&PathBuf>,
91 inc_cache_dir: Option<&PathBuf>,
92 no_system_headers: bool,
93) -> anyhow::Result<CodePropertyGraph> {
94 let lang = resolve_language(project, language)?;
95 let strict = lang != Language::Unknown;
96
97 let cfg = PipelineConfig {
98 languages: {
99 let mut set = HashSet::new();
100 set.insert(lang);
101 set
102 },
103 no_system_headers,
104 strict_extensions: strict,
105 inc_cache_dir: inc_cache_dir.cloned(),
106 ..Default::default()
107 };
108
109 run_pipeline(project, cfg, graph_cache_path)
110}
111
112pub fn build_or_load_graph_multi(
114 project: &Path,
115 languages: &[String],
116 graph_cache_path: Option<&PathBuf>,
117 inc_cache_dir: Option<&PathBuf>,
118 no_system_headers: bool,
119) -> anyhow::Result<CodePropertyGraph> {
120 if languages.is_empty() || languages.iter().any(|l| l == "auto") {
121 return build_or_load_graph(
122 project,
123 "auto",
124 graph_cache_path,
125 inc_cache_dir,
126 no_system_headers,
127 );
128 }
129
130 let cfg = PipelineConfig {
131 languages: {
132 let mut set = HashSet::new();
133 for l in languages {
134 if let Some(lang) = parse_language(l) {
135 set.insert(lang);
136 }
137 }
138 set
139 },
140 no_system_headers,
141 strict_extensions: !languages
142 .iter()
143 .any(|l| parse_language(l) == Some(Language::Unknown)),
144 inc_cache_dir: inc_cache_dir.cloned(),
145 ..Default::default()
146 };
147
148 run_pipeline(project, cfg, graph_cache_path)
149}
150
151fn run_pipeline(
153 project: &Path,
154 cfg: PipelineConfig,
155 graph_cache_path: Option<&PathBuf>,
156) -> anyhow::Result<CodePropertyGraph> {
157 if let Some(cache_file) = graph_cache_path {
158 if cache_file.exists() {
159 if let Ok(mut g) = cache::load_graph(cache_file) {
160 display_name::cleanup_node_names(&mut g);
161 return Ok(g);
162 }
163 }
164 }
165
166 let inc_cache = cfg
167 .inc_cache_dir
168 .as_ref()
169 .map(|dir| {
170 if dir.extension().is_some() {
171 let mut d = dir.clone();
172 d.set_extension("");
173 IncrementalCache::new(&d)
174 } else {
175 IncrementalCache::new(dir)
176 }
177 })
178 .transpose()?
179 .or_else(|| IncrementalCache::new(&project.join(".icb_cache")).ok());
180
181 if cfg.languages.contains(&Language::CppTreeSitter) {
182 if let Some(cpg) = try_clang_pipeline(project, &cfg, graph_cache_path, inc_cache.as_ref()) {
183 return Ok(cpg);
184 }
185 }
186
187 let manager = Arc::new(icb_parser::manager::ParserManager::new());
188 let mut facts: Vec<(String, Vec<RawNode>)> = Vec::new();
189
190 for entry in WalkDir::new(project)
191 .into_iter()
192 .filter_map(|e| e.ok())
193 .filter(|e| e.file_type().is_file())
194 {
195 let path = entry.path();
196 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
197
198 if cfg.strict_extensions {
199 let lang = detect_language_from_extension(ext);
200 if !cfg.languages.contains(&lang) {
201 continue;
202 }
203 let allowed = extensions_for_language(lang);
204 if !allowed.contains(&ext) {
205 continue;
206 }
207 }
208
209 let rel = path
210 .strip_prefix(project)
211 .unwrap_or(path)
212 .display()
213 .to_string();
214
215 let lang = if cfg.languages.len() == 1 {
216 *cfg.languages.iter().next().unwrap()
217 } else {
218 detect_language_from_extension(ext)
219 };
220
221 if let Some(ref cache) = inc_cache {
222 let manager = Arc::clone(&manager);
223 let file_facts = cache.process_file(
224 path,
225 &rel,
226 Box::new(move |source: &str| -> anyhow::Result<Vec<RawNode>> {
227 manager.parse_file(lang, source).map_err(|e| anyhow!(e))
228 }),
229 )?;
230 facts.push((file_facts.relative_path, file_facts.facts));
231 } else {
232 let raw_source = fs::read_to_string(path).unwrap_or_default();
233 let source = if cfg.strip_comments {
234 strip_comments(&raw_source)
235 } else {
236 raw_source
237 };
238
239 let file_facts =
240 match icb_parser::manager::ParserManager::new().parse_file(lang, &source) {
241 Ok(f) => f,
242 Err(_) => continue,
243 };
244
245 facts.push((rel, file_facts));
246 }
247 }
248
249 let mut builder = icb_graph::builder::GraphBuilder::new();
250 for (_, file_facts) in facts {
251 let mut local = icb_graph::builder::GraphBuilder::new();
252 local.ingest_file_facts(&file_facts);
253 builder.merge(local);
254 }
255
256 display_name::cleanup_node_names(&mut builder.cpg);
257 builder.resolve_calls();
258
259 let mut cpg = builder.cpg;
260 display_name::cleanup_node_names(&mut cpg);
261
262 if let Some(cache_file) = graph_cache_path {
263 let _ = cache::save_graph(&cpg, cache_file);
264 }
265
266 Ok(cpg)
267}
268
269fn try_clang_pipeline(
270 project: &Path,
271 cfg: &PipelineConfig,
272 graph_cache_path: Option<&PathBuf>,
273 inc_cache: Option<&IncrementalCache>,
274) -> Option<CodePropertyGraph> {
275 #[cfg(feature = "clang")]
276 {
277 log::info!("Attempting Clang graph construction with incremental cache...");
278 let allow_system = !cfg.no_system_headers;
279
280 let mut facts: Vec<(String, Vec<RawNode>)> = Vec::new();
281
282 for entry in WalkDir::new(project)
283 .into_iter()
284 .filter_map(|e| e.ok())
285 .filter(|e| e.file_type().is_file())
286 {
287 let path = entry.path();
288 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
289
290 let allowed = extensions_for_language(Language::CppTreeSitter);
291 if !allowed.contains(&ext) {
292 continue;
293 }
294
295 let rel = path
296 .strip_prefix(project)
297 .unwrap_or(path)
298 .display()
299 .to_string();
300
301 if let Some(cache) = inc_cache {
302 let file_facts = cache
303 .process_file(
304 path,
305 &rel,
306 Box::new(move |source: &str| -> anyhow::Result<Vec<RawNode>> {
307 icb_clang::parser::parse_cpp_file(
308 source,
309 &["-std=c++17".to_string()],
310 None,
311 allow_system,
312 )
313 .map_err(|e| anyhow!(e))
314 }),
315 )
316 .ok()?;
317 facts.push((file_facts.relative_path, file_facts.facts));
318 } else {
319 let source = std::fs::read_to_string(path).ok()?;
320 let file_facts = icb_clang::parser::parse_cpp_file(
321 &source,
322 &["-std=c++17".to_string()],
323 None,
324 allow_system,
325 )
326 .ok()?;
327 facts.push((rel, file_facts));
328 }
329 }
330
331 log::info!("Clang processed {} files", facts.len());
332
333 let mut builder = icb_graph::builder::GraphBuilder::new();
334 for (_, file_facts) in facts {
335 let mut local = icb_graph::builder::GraphBuilder::new();
336 local.ingest_file_facts(&file_facts);
337 builder.merge(local);
338 }
339
340 display_name::cleanup_node_names(&mut builder.cpg);
341 builder.resolve_calls();
342 let mut cpg = builder.cpg;
343 display_name::cleanup_node_names(&mut cpg);
344
345 if let Some(cache_file) = graph_cache_path {
346 let _ = cache::save_graph(&cpg, cache_file);
347 }
348 log::info!("Clang graph built successfully");
349 Some(cpg)
350 }
351 #[cfg(not(feature = "clang"))]
352 {
353 log::debug!("Clang feature not compiled in");
354 None
355 }
356}
357
358fn resolve_language(project: &Path, input: &str) -> anyhow::Result<Language> {
359 if input == "auto" {
360 Ok(detect_language_from_project(project))
361 } else {
362 parse_language(input).ok_or_else(|| anyhow!("unknown language"))
363 }
364}
365
366fn parse_language(s: &str) -> Option<Language> {
367 match s {
368 "cpp" | "c++" => Some(Language::CppTreeSitter),
369 "python" => Some(Language::Python),
370 "go" => Some(Language::Go),
371 "ruby" => Some(Language::Ruby),
372 "rust" => Some(Language::Rust),
373 "javascript" => Some(Language::JavaScript),
374 _ => None,
375 }
376}
377
378fn detect_language_from_extension(ext: &str) -> Language {
379 match ext {
380 "cpp" | "cc" | "cxx" | "h" | "hpp" => Language::CppTreeSitter,
381 "py" => Language::Python,
382 "rs" => Language::Rust,
383 "go" => Language::Go,
384 "rb" => Language::Ruby,
385 "js" | "ts" | "tsx" | "jsx" => Language::JavaScript,
386 _ => Language::Unknown,
387 }
388}
389
390fn detect_language_from_project(path: &Path) -> Language {
391 let mut counts: HashMap<Language, usize> = HashMap::new();
392
393 for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
394 if let Some(ext) = entry.path().extension().and_then(|s| s.to_str()) {
395 let lang = detect_language_from_extension(ext);
396 *counts.entry(lang).or_insert(0) += 1;
397 }
398 }
399
400 counts
401 .into_iter()
402 .max_by_key(|(_, c)| *c)
403 .map(|(l, _)| l)
404 .unwrap_or(Language::Unknown)
405}
406
407fn extensions_for_language(lang: Language) -> &'static [&'static str] {
408 match lang {
409 Language::CppTreeSitter => &["cpp", "cc", "cxx", "h", "hpp"],
410 Language::Python => &["py"],
411 Language::Rust => &["rs"],
412 Language::Go => &["go"],
413 Language::Ruby => &["rb"],
414 Language::JavaScript => &["js", "ts", "tsx", "jsx"],
415 _ => &[],
416 }
417}
418
419fn strip_comments(s: &str) -> String {
420 s.replace("//", " ").replace("/*", " ").replace("*/", " ")
421}
422
423#[allow(dead_code)]
424fn is_valid_identifier(name: &str, lang: Language) -> bool {
425 if matches!(lang, Language::CppTreeSitter | Language::Cpp) && name.contains("::") {
426 return true;
427 }
428 if name.len() == 1 && name.chars().all(|c| c.is_ascii_alphabetic()) {
429 return true;
430 }
431 if name.len() < 2 {
432 return false;
433 }
434 let first = name.chars().next().unwrap();
435 if !first.is_ascii_alphabetic() && first != '_' && first != '~' {
436 return false;
437 }
438 let allowed = |c: char| {
439 c.is_ascii_alphanumeric()
440 || c == '_'
441 || (matches!(lang, Language::CppTreeSitter | Language::Cpp) && (c == ':' || c == '~'))
442 };
443 if !name.chars().all(allowed) {
444 return false;
445 }
446 if name.chars().all(|c| c.is_ascii_digit()) {
447 return false;
448 }
449 if name.starts_with("class")
450 && name.len() > 5
451 && name[5..].chars().next().unwrap().is_uppercase()
452 {
453 return false;
454 }
455 if name.contains("_1_1") || name.contains("_8cpp") || name.contains("_8h") {
456 return false;
457 }
458 if name.len() > 40 && name.contains('_') {
459 return false;
460 }
461 if name.starts_with("dir_") && name.len() > 30 {
462 return false;
463 }
464 true
465}
466
467#[allow(dead_code)]
468fn is_javascript_noise(name: &str) -> bool {
469 static JS_NOISE: &[&str] = &[
470 "isNaN",
471 "eval",
472 "parseInt",
473 "parseFloat",
474 "undefined",
475 "NaN",
476 "Infinity",
477 "Object",
478 "Array",
479 "String",
480 "Number",
481 "Boolean",
482 "Function",
483 "RegExp",
484 "Math",
485 "Date",
486 "JSON",
487 "Promise",
488 "Symbol",
489 "Map",
490 "Set",
491 "WeakMap",
492 "WeakSet",
493 "Proxy",
494 "Reflect",
495 "console",
496 "window",
497 "document",
498 "navigator",
499 "location",
500 "history",
501 "localStorage",
502 "sessionStorage",
503 "alert",
504 "confirm",
505 "prompt",
506 "fetch",
507 "XMLHttpRequest",
508 "getElementById",
509 "getElementsByClassName",
510 "getElementsByTagName",
511 "querySelector",
512 "querySelectorAll",
513 "addEventListener",
514 "removeEventListener",
515 "appendChild",
516 "removeChild",
517 "srChild",
518 "srResult",
519 "srEntry",
520 "srScope",
521 "srLink",
522 "srChildren",
523 "clipboard_div",
524 "clipboard_icon",
525 "clipboard_successIcon",
526 "clipboard_successDuration",
527 "clipboard_title",
528 "pagenav",
529 "navtree",
530 "menudata",
531 "resizeHeight",
532 "resizeWidth",
533 "domSearchBox",
534 "domPopupSearchResults",
535 "domPopupSearchResultsWindow",
536 "domSearchClose",
537 "searchData",
538 "searchResults",
539 "resultsPath",
540 "topOffset",
541 "footerHeight",
542 "headerHeight",
543 "sidenavWidth",
544 "pagenavWidth",
545 "navSync",
546 "navtreeHeight",
547 "PAGENAV_COOKIE_NAME",
548 "RESIZE_COOKIE_NAME",
549 "SEARCH_COOKIE_NAME",
550 "NAVPATH_COOKIE_NAME",
551 "NAVTREE",
552 "NAVTREEINDEX",
553 "NAVTREEINDEX0",
554 "NAVTREEINDEX1",
555 "NAVTREEINDEX2",
556 "NAVTREEINDEX3",
557 "NAVTREEINDEX4",
558 "NAVTREEINDEX5",
559 "NAVTREEINDEX6",
560 "NAVTREEINDEX7",
561 "navTreeSubIndices",
562 "entityMap",
563 "htmlToNode",
564 "codefold",
565 "dynsection",
566 "showHideNavBar",
567 "showSyncOff",
568 "showSyncOn",
569 "SYNCOFFMSG",
570 "SYNCONMSG",
571 "toggleVisibility",
572 "toggleClass",
573 "focusItem",
574 "focusName",
575 "expandNode",
576 "gotoNode",
577 "gotoAnchor",
578 "showNode",
579 "showRoot",
580 "selectAndHighlight",
581 "highlightAnchor",
582 "highlightAdjacentNodes",
583 "highlightEdges",
584 "loadJS",
585 "createIndent",
586 "makeTree",
587 "makeAbsolut",
588 "makeMorphable",
589 "makeInstance",
590 "makeSetterGetter",
591 "getClass",
592 "getClassForType",
593 "getMethodNames",
594 "getMethodsFor",
595 "getEvents",
596 "getEventTarget",
597 "getEventPoint",
598 "createResults",
599 "SearchResults",
600 "handleResults",
601 ];
602 JS_NOISE.contains(&name)
603}
604
605#[allow(dead_code)]
606fn is_type_keyword(name: &str) -> bool {
607 matches!(
608 name,
609 "void"
610 | "int"
611 | "long"
612 | "short"
613 | "char"
614 | "float"
615 | "double"
616 | "signed"
617 | "unsigned"
618 | "bool"
619 | "wchar_t"
620 | "size_t"
621 )
622}