1use anyhow::anyhow;
58use icb_common::Language;
59use icb_graph::{cache, graph::CodePropertyGraph};
60use icb_parser::facts::RawNode;
61
62#[cfg(feature = "rustc")]
63use icb_rustc;
64
65use std::collections::{HashMap, HashSet};
66use std::fs;
67use std::path::{Path, PathBuf};
68use std::sync::Arc;
69
70use walkdir::WalkDir;
71
72use crate::display_name;
73use crate::incremental_cache::IncrementalCache;
74
75#[derive(Debug, Clone)]
76struct PipelineConfig {
77 pub languages: HashSet<Language>,
78 pub strict_extensions: bool,
79 pub strip_comments: bool,
80 pub no_system_headers: bool,
81 pub inc_cache_dir: Option<PathBuf>,
82}
83
84impl Default for PipelineConfig {
85 fn default() -> Self {
86 Self {
87 languages: HashSet::new(),
88 strict_extensions: true,
89 strip_comments: true,
90 no_system_headers: true,
91 inc_cache_dir: None,
92 }
93 }
94}
95
96pub fn build_or_load_graph(
98 project: &Path,
99 language: &str,
100 graph_cache_path: Option<&PathBuf>,
101 inc_cache_dir: Option<&PathBuf>,
102 no_system_headers: bool,
103) -> anyhow::Result<CodePropertyGraph> {
104 let lang = resolve_language(project, language)?;
105 let strict = lang != Language::Unknown;
106
107 let cfg = PipelineConfig {
108 languages: {
109 let mut set = HashSet::new();
110 set.insert(lang);
111 set
112 },
113 no_system_headers,
114 strict_extensions: strict,
115 inc_cache_dir: inc_cache_dir.cloned(),
116 ..Default::default()
117 };
118
119 run_pipeline(project, cfg, graph_cache_path)
120}
121
122pub fn build_or_load_graph_multi(
124 project: &Path,
125 languages: &[String],
126 graph_cache_path: Option<&PathBuf>,
127 inc_cache_dir: Option<&PathBuf>,
128 no_system_headers: bool,
129) -> anyhow::Result<CodePropertyGraph> {
130 if languages.is_empty() || languages.iter().any(|l| l == "auto") {
131 return build_or_load_graph(
132 project,
133 "auto",
134 graph_cache_path,
135 inc_cache_dir,
136 no_system_headers,
137 );
138 }
139
140 let cfg = PipelineConfig {
141 languages: {
142 let mut set = HashSet::new();
143 for l in languages {
144 if let Some(lang) = parse_language(l) {
145 set.insert(lang);
146 }
147 }
148 set
149 },
150 no_system_headers,
151 strict_extensions: !languages
152 .iter()
153 .any(|l| parse_language(l) == Some(Language::Unknown)),
154 inc_cache_dir: inc_cache_dir.cloned(),
155 ..Default::default()
156 };
157
158 run_pipeline(project, cfg, graph_cache_path)
159}
160
161fn run_pipeline(
163 project: &Path,
164 cfg: PipelineConfig,
165 graph_cache_path: Option<&PathBuf>,
166) -> anyhow::Result<CodePropertyGraph> {
167 if let Some(cache_file) = graph_cache_path {
168 if cache_file.exists() {
169 if let Ok(mut g) = cache::load_graph(cache_file) {
170 display_name::cleanup_node_names(&mut g);
171 return Ok(g);
172 }
173 }
174 }
175
176 let inc_cache = cfg
177 .inc_cache_dir
178 .as_ref()
179 .map(|dir| {
180 if dir.extension().is_some() {
181 let mut d = dir.clone();
182 d.set_extension("");
183 IncrementalCache::new(&d)
184 } else {
185 IncrementalCache::new(dir)
186 }
187 })
188 .transpose()?
189 .or_else(|| IncrementalCache::new(&project.join(".icb_cache")).ok());
190
191 if cfg.languages.contains(&Language::Rust) {
192 if let Some(cpg) = try_rustc_pipeline(project, &cfg, graph_cache_path) {
193 return Ok(cpg);
194 }
195 }
196
197 if cfg.languages.contains(&Language::CppTreeSitter) {
198 if let Some(cpg) = try_clang_pipeline(project, &cfg, graph_cache_path, inc_cache.as_ref()) {
199 return Ok(cpg);
200 }
201 }
202
203 let manager = Arc::new(icb_parser::manager::ParserManager::new());
204 let mut facts: Vec<(String, Vec<RawNode>)> = Vec::new();
205
206 for entry in WalkDir::new(project)
207 .into_iter()
208 .filter_map(|e| e.ok())
209 .filter(|e| e.file_type().is_file())
210 {
211 let path = entry.path();
212 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
213
214 if cfg.strict_extensions {
215 let lang = detect_language_from_extension(ext);
216 if !cfg.languages.contains(&lang) {
217 continue;
218 }
219 let allowed = extensions_for_language(lang);
220 if !allowed.contains(&ext) {
221 continue;
222 }
223 }
224
225 let rel = path
226 .strip_prefix(project)
227 .unwrap_or(path)
228 .display()
229 .to_string();
230
231 let lang = if cfg.languages.len() == 1 {
232 *cfg.languages.iter().next().unwrap()
233 } else {
234 detect_language_from_extension(ext)
235 };
236
237 if let Some(ref cache) = inc_cache {
238 let manager = Arc::clone(&manager);
239 let file_facts = cache.process_file(
240 path,
241 &rel,
242 Box::new(move |source: &str| -> anyhow::Result<Vec<RawNode>> {
243 manager.parse_file(lang, source).map_err(|e| anyhow!(e))
244 }),
245 )?;
246 facts.push((file_facts.relative_path, file_facts.facts));
247 } else {
248 let raw_source = fs::read_to_string(path).unwrap_or_default();
249 let source = if cfg.strip_comments {
250 strip_comments(&raw_source)
251 } else {
252 raw_source
253 };
254
255 let file_facts =
256 match icb_parser::manager::ParserManager::new().parse_file(lang, &source) {
257 Ok(f) => f,
258 Err(_) => continue,
259 };
260
261 facts.push((rel, file_facts));
262 }
263 }
264
265 let mut builder = icb_graph::builder::GraphBuilder::new();
266 for (_, file_facts) in facts {
267 let mut local = icb_graph::builder::GraphBuilder::new();
268 local.ingest_file_facts(&file_facts);
269 builder.merge(local);
270 }
271
272 display_name::cleanup_node_names(&mut builder.cpg);
273 builder.resolve_calls();
274
275 let mut cpg = builder.cpg;
276 display_name::cleanup_node_names(&mut cpg);
277
278 if let Some(cache_file) = graph_cache_path {
279 let _ = cache::save_graph(&cpg, cache_file);
280 }
281
282 Ok(cpg)
283}
284
285fn try_rustc_pipeline(
290 _project: &Path,
291 _cfg: &PipelineConfig,
292 _graph_cache_path: Option<&PathBuf>,
293) -> Option<CodePropertyGraph> {
294 #[cfg(feature = "rustc")]
295 {
296 log::info!("Attempting rustc graph construction...");
297
298 let cargo_toml = _project.join("Cargo.toml");
299 if !cargo_toml.exists() {
300 log::warn!(
301 "Cargo.toml not found in {:?}, falling back to tree-sitter",
302 _project
303 );
304 return None;
305 }
306
307 let main_rs = _project.join("src/main.rs");
308 let lib_rs = _project.join("src/lib.rs");
309 let entry = if main_rs.exists() {
310 main_rs
311 } else if lib_rs.exists() {
312 lib_rs
313 } else {
314 log::warn!("No main.rs or lib.rs found, falling back");
315 return None;
316 };
317
318 let args: Vec<String> = vec!["--edition".to_string(), "2021".to_string()];
319 let facts = match icb_rustc::parse_rust_crate(&entry, &args) {
320 Ok(f) => f,
321 Err(e) => {
322 log::warn!("rustc analysis failed: {}, falling back to tree-sitter", e);
323 return None;
324 }
325 };
326
327 log::info!("rustc produced {} facts", facts.len());
328
329 let mut builder = icb_graph::builder::GraphBuilder::new();
330 let mut local = icb_graph::builder::GraphBuilder::new();
331 local.ingest_file_facts(&facts);
332 builder.merge(local);
333
334 display_name::cleanup_node_names(&mut builder.cpg);
335 builder.resolve_calls();
336 let mut cpg = builder.cpg;
337 display_name::cleanup_node_names(&mut cpg);
338
339 if let Some(cache_file) = _graph_cache_path {
340 let _ = cache::save_graph(&cpg, cache_file);
341 }
342 log::info!("rustc graph built successfully");
343 Some(cpg)
344 }
345 #[cfg(not(feature = "rustc"))]
346 {
347 log::debug!("rustc feature not compiled in");
348 None
349 }
350}
351
352fn try_clang_pipeline(
353 project: &Path,
354 cfg: &PipelineConfig,
355 graph_cache_path: Option<&PathBuf>,
356 inc_cache: Option<&IncrementalCache>,
357) -> Option<CodePropertyGraph> {
358 #[cfg(feature = "clang")]
359 {
360 log::info!("Attempting Clang graph construction with incremental cache...");
361 let allow_system = !cfg.no_system_headers;
362
363 let mut facts: Vec<(String, Vec<RawNode>)> = Vec::new();
364
365 for entry in WalkDir::new(project)
366 .into_iter()
367 .filter_map(|e| e.ok())
368 .filter(|e| e.file_type().is_file())
369 {
370 let path = entry.path();
371 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
372
373 let allowed = extensions_for_language(Language::CppTreeSitter);
374 if !allowed.contains(&ext) {
375 continue;
376 }
377
378 let rel = path
379 .strip_prefix(project)
380 .unwrap_or(path)
381 .display()
382 .to_string();
383
384 if let Some(cache) = inc_cache {
385 let file_facts = cache
386 .process_file(
387 path,
388 &rel,
389 Box::new(move |source: &str| -> anyhow::Result<Vec<RawNode>> {
390 icb_clang::parser::parse_cpp_file(
391 source,
392 &["-std=c++17".to_string()],
393 None,
394 allow_system,
395 )
396 .map_err(|e| anyhow!(e))
397 }),
398 )
399 .ok()?;
400 facts.push((file_facts.relative_path, file_facts.facts));
401 } else {
402 let source = std::fs::read_to_string(path).ok()?;
403 let file_facts = icb_clang::parser::parse_cpp_file(
404 &source,
405 &["-std=c++17".to_string()],
406 None,
407 allow_system,
408 )
409 .ok()?;
410 facts.push((rel, file_facts));
411 }
412 }
413
414 log::info!("Clang processed {} files", facts.len());
415
416 let mut builder = icb_graph::builder::GraphBuilder::new();
417 for (_, file_facts) in facts {
418 let mut local = icb_graph::builder::GraphBuilder::new();
419 local.ingest_file_facts(&file_facts);
420 builder.merge(local);
421 }
422
423 display_name::cleanup_node_names(&mut builder.cpg);
424 builder.resolve_calls();
425 let mut cpg = builder.cpg;
426 display_name::cleanup_node_names(&mut cpg);
427
428 if let Some(cache_file) = graph_cache_path {
429 let _ = cache::save_graph(&cpg, cache_file);
430 }
431 log::info!("Clang graph built successfully");
432 Some(cpg)
433 }
434 #[cfg(not(feature = "clang"))]
435 {
436 log::debug!("Clang feature not compiled in");
437 None
438 }
439}
440
441fn resolve_language(project: &Path, input: &str) -> anyhow::Result<Language> {
442 if input == "auto" {
443 Ok(detect_language_from_project(project))
444 } else {
445 parse_language(input).ok_or_else(|| anyhow!("unknown language"))
446 }
447}
448
449fn parse_language(s: &str) -> Option<Language> {
450 match s {
451 "cpp" | "c++" => Some(Language::CppTreeSitter),
452 "python" => Some(Language::Python),
453 "go" => Some(Language::Go),
454 "ruby" => Some(Language::Ruby),
455 "rust" => Some(Language::Rust),
456 "javascript" => Some(Language::JavaScript),
457 _ => None,
458 }
459}
460
461fn detect_language_from_extension(ext: &str) -> Language {
462 match ext {
463 "cpp" | "cc" | "cxx" | "h" | "hpp" => Language::CppTreeSitter,
464 "py" => Language::Python,
465 "rs" => Language::Rust,
466 "go" => Language::Go,
467 "rb" => Language::Ruby,
468 "js" | "ts" | "tsx" | "jsx" => Language::JavaScript,
469 _ => Language::Unknown,
470 }
471}
472
473fn detect_language_from_project(path: &Path) -> Language {
474 let mut counts: HashMap<Language, usize> = HashMap::new();
475
476 for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
477 if let Some(ext) = entry.path().extension().and_then(|s| s.to_str()) {
478 let lang = detect_language_from_extension(ext);
479 *counts.entry(lang).or_insert(0) += 1;
480 }
481 }
482
483 counts
484 .into_iter()
485 .max_by_key(|(_, c)| *c)
486 .map(|(l, _)| l)
487 .unwrap_or(Language::Unknown)
488}
489
490fn extensions_for_language(lang: Language) -> &'static [&'static str] {
491 match lang {
492 Language::CppTreeSitter => &["cpp", "cc", "cxx", "h", "hpp"],
493 Language::Python => &["py"],
494 Language::Rust => &["rs"],
495 Language::Go => &["go"],
496 Language::Ruby => &["rb"],
497 Language::JavaScript => &["js", "ts", "tsx", "jsx"],
498 _ => &[],
499 }
500}
501
502fn strip_comments(s: &str) -> String {
503 s.replace("//", " ").replace("/*", " ").replace("*/", " ")
504}
505
506#[allow(dead_code)]
507fn is_valid_identifier(name: &str, lang: Language) -> bool {
508 if matches!(lang, Language::CppTreeSitter | Language::Cpp) && name.contains("::") {
509 return true;
510 }
511 if name.len() == 1 && name.chars().all(|c| c.is_ascii_alphabetic()) {
512 return true;
513 }
514 if name.len() < 2 {
515 return false;
516 }
517 let first = name.chars().next().unwrap();
518 if !first.is_ascii_alphabetic() && first != '_' && first != '~' {
519 return false;
520 }
521 let allowed = |c: char| {
522 c.is_ascii_alphanumeric()
523 || c == '_'
524 || (matches!(lang, Language::CppTreeSitter | Language::Cpp) && (c == ':' || c == '~'))
525 };
526 if !name.chars().all(allowed) {
527 return false;
528 }
529 if name.chars().all(|c| c.is_ascii_digit()) {
530 return false;
531 }
532 if name.starts_with("class")
533 && name.len() > 5
534 && name[5..].chars().next().unwrap().is_uppercase()
535 {
536 return false;
537 }
538 if name.contains("_1_1") || name.contains("_8cpp") || name.contains("_8h") {
539 return false;
540 }
541 if name.len() > 40 && name.contains('_') {
542 return false;
543 }
544 if name.starts_with("dir_") && name.len() > 30 {
545 return false;
546 }
547 true
548}
549
550#[allow(dead_code)]
551fn is_javascript_noise(name: &str) -> bool {
552 static JS_NOISE: &[&str] = &[
553 "isNaN",
554 "eval",
555 "parseInt",
556 "parseFloat",
557 "undefined",
558 "NaN",
559 "Infinity",
560 "Object",
561 "Array",
562 "String",
563 "Number",
564 "Boolean",
565 "Function",
566 "RegExp",
567 "Math",
568 "Date",
569 "JSON",
570 "Promise",
571 "Symbol",
572 "Map",
573 "Set",
574 "WeakMap",
575 "WeakSet",
576 "Proxy",
577 "Reflect",
578 "console",
579 "window",
580 "document",
581 "navigator",
582 "location",
583 "history",
584 "localStorage",
585 "sessionStorage",
586 "alert",
587 "confirm",
588 "prompt",
589 "fetch",
590 "XMLHttpRequest",
591 "getElementById",
592 "getElementsByClassName",
593 "getElementsByTagName",
594 "querySelector",
595 "querySelectorAll",
596 "addEventListener",
597 "removeEventListener",
598 "appendChild",
599 "removeChild",
600 "srChild",
601 "srResult",
602 "srEntry",
603 "srScope",
604 "srLink",
605 "srChildren",
606 "clipboard_div",
607 "clipboard_icon",
608 "clipboard_successIcon",
609 "clipboard_successDuration",
610 "clipboard_title",
611 "pagenav",
612 "navtree",
613 "menudata",
614 "resizeHeight",
615 "resizeWidth",
616 "domSearchBox",
617 "domPopupSearchResults",
618 "domPopupSearchResultsWindow",
619 "domSearchClose",
620 "searchData",
621 "searchResults",
622 "resultsPath",
623 "topOffset",
624 "footerHeight",
625 "headerHeight",
626 "sidenavWidth",
627 "pagenavWidth",
628 "navSync",
629 "navtreeHeight",
630 "PAGENAV_COOKIE_NAME",
631 "RESIZE_COOKIE_NAME",
632 "SEARCH_COOKIE_NAME",
633 "NAVPATH_COOKIE_NAME",
634 "NAVTREE",
635 "NAVTREEINDEX",
636 "NAVTREEINDEX0",
637 "NAVTREEINDEX1",
638 "NAVTREEINDEX2",
639 "NAVTREEINDEX3",
640 "NAVTREEINDEX4",
641 "NAVTREEINDEX5",
642 "NAVTREEINDEX6",
643 "NAVTREEINDEX7",
644 "navTreeSubIndices",
645 "entityMap",
646 "htmlToNode",
647 "codefold",
648 "dynsection",
649 "showHideNavBar",
650 "showSyncOff",
651 "showSyncOn",
652 "SYNCOFFMSG",
653 "SYNCONMSG",
654 "toggleVisibility",
655 "toggleClass",
656 "focusItem",
657 "focusName",
658 "expandNode",
659 "gotoNode",
660 "gotoAnchor",
661 "showNode",
662 "showRoot",
663 "selectAndHighlight",
664 "highlightAnchor",
665 "highlightAdjacentNodes",
666 "highlightEdges",
667 "loadJS",
668 "createIndent",
669 "makeTree",
670 "makeAbsolut",
671 "makeMorphable",
672 "makeInstance",
673 "makeSetterGetter",
674 "getClass",
675 "getClassForType",
676 "getMethodNames",
677 "getMethodsFor",
678 "getEvents",
679 "getEventTarget",
680 "getEventPoint",
681 "createResults",
682 "SearchResults",
683 "handleResults",
684 ];
685 JS_NOISE.contains(&name)
686}
687
688#[allow(dead_code)]
689fn is_type_keyword(name: &str) -> bool {
690 matches!(
691 name,
692 "void"
693 | "int"
694 | "long"
695 | "short"
696 | "char"
697 | "float"
698 | "double"
699 | "signed"
700 | "unsigned"
701 | "bool"
702 | "wchar_t"
703 | "size_t"
704 )
705}