1use crate::facts::RawNode;
25use icb_common::{Language, NodeKind};
26use std::collections::HashSet;
27
28pub fn parse_universal(source: &str, file: &str) -> Vec<RawNode> {
30 if looks_like_markup(source) {
31 return Vec::new();
32 }
33
34 let tokens = tokenize(source);
35 if tokens.is_empty() {
36 return Vec::new();
37 }
38
39 let scopes = build_scope_map(&tokens);
40 let mut out = Vec::with_capacity(tokens.len() / 5);
41
42 extract_structures(&tokens, &scopes, file, &mut out);
43 extract_calls(&tokens, &mut out);
44 extract_namespaces(&tokens, &mut out);
45
46 dedup(&mut out);
47 out
48}
49
50fn build_scope_map(tokens: &[Token]) -> Vec<u32> {
51 let mut depth: u32 = 0;
52 let mut map = Vec::with_capacity(tokens.len());
53
54 for t in tokens {
55 match t.kind {
56 TokenKind::OpenBrace => depth = depth.saturating_add(1),
57 TokenKind::CloseBrace => depth = depth.saturating_sub(1),
58 _ => {}
59 }
60 map.push(depth);
61 }
62
63 map
64}
65
66fn extract_structures(tokens: &[Token], _scopes: &[u32], file: &str, out: &mut Vec<RawNode>) {
67 let mut i: usize = 0;
68
69 while i < tokens.len() {
70 let t = &tokens[i];
71
72 if t.kind != TokenKind::Ident {
73 i += 1;
74 continue;
75 }
76
77 let word = t.text.as_str();
78
79 if is_class_keyword(word) {
80 if let Some((name, col)) = find_structural_name(tokens, i + 1) {
81 out.push(make_node(NodeKind::Class, name, t.line, col, file));
82 i += 1;
83 continue;
84 }
85 }
86
87 if is_function_keyword(word) {
88 if let Some((name, col)) = find_structural_name(tokens, i + 1) {
89 out.push(make_node(NodeKind::Function, name, t.line, col, file));
90 i += 1;
91 continue;
92 }
93 }
94
95 i += 1;
96 }
97}
98
99fn find_structural_name(tokens: &[Token], mut i: usize) -> Option<(String, usize)> {
100 let skip_noise = true;
101
102 while i < tokens.len() {
103 let t = &tokens[i];
104
105 match t.kind {
106 TokenKind::Ident if skip_noise => {
107 let s = t.text.as_str();
108
109 if is_noise_word(s) || is_modifier_word(s) {
110 i += 1;
111 continue;
112 }
113
114 return Some((t.text.clone(), t.col));
115 }
116
117 TokenKind::Ident => {
118 return Some((t.text.clone(), t.col));
119 }
120
121 TokenKind::LessThan => {
122 i = skip_template(tokens, i);
123 }
124
125 TokenKind::Colon => {
126 i += 1;
127 }
128
129 _ => i += 1,
130 }
131 }
132
133 None
134}
135
136fn skip_template(tokens: &[Token], mut i: usize) -> usize {
137 let mut depth = 0usize;
138
139 while i < tokens.len() {
140 match tokens[i].kind {
141 TokenKind::LessThan => depth += 1,
142 TokenKind::GreaterThan => {
143 if depth == 0 {
144 break;
145 }
146 depth -= 1;
147 if depth == 0 {
148 return i + 1;
149 }
150 }
151 _ => {}
152 }
153
154 i += 1;
155 }
156
157 i
158}
159
160fn extract_calls(tokens: &[Token], out: &mut Vec<RawNode>) {
161 let mut i = 0;
162
163 while i + 1 < tokens.len() {
164 if tokens[i].kind == TokenKind::Ident && tokens[i + 1].kind == TokenKind::OpenParen {
165 let name = build_qualified(tokens, i);
166
167 if is_valid_call(&name) {
168 out.push(RawNode {
169 language: Language::Unknown,
170 kind: NodeKind::CallSite,
171 name: Some(name),
172 usr: None,
173 start_line: tokens[i].line,
174 start_col: tokens[i].col,
175 end_line: tokens[i].line,
176 end_col: tokens[i].col + 1,
177 children: Vec::new(),
178 source_file: None,
179 });
180 }
181 }
182
183 i += 1;
184 }
185}
186
187fn extract_namespaces(tokens: &[Token], out: &mut Vec<RawNode>) {
188 let mut i = 0;
189
190 while i < tokens.len() {
191 if tokens[i].text == "namespace" {
192 if let Some((name, col)) = find_structural_name(tokens, i + 1) {
193 out.push(make_node(NodeKind::Class, name, tokens[i].line, col, ""));
194 }
195 }
196
197 i += 1;
198 }
199}
200
201fn build_qualified(tokens: &[Token], mut pos: usize) -> String {
202 let mut parts = Vec::new();
203
204 while pos > 0 {
205 let t = &tokens[pos];
206
207 if t.kind != TokenKind::Ident {
208 break;
209 }
210
211 parts.push(t.text.clone());
212
213 if pos >= 2 && tokens[pos - 1].kind == TokenKind::Dot {
214 pos -= 2;
215 } else {
216 break;
217 }
218 }
219
220 parts.reverse();
221 parts.join(".")
222}
223
224fn make_node(kind: NodeKind, name: String, line: usize, col: usize, file: &str) -> RawNode {
225 RawNode {
226 language: Language::Unknown,
227 kind,
228 name: Some(name),
229 usr: None,
230 start_line: line,
231 start_col: col,
232 end_line: line,
233 end_col: col + 1,
234 children: Vec::new(),
235 source_file: Some(file.to_string()),
236 }
237}
238
239fn dedup(facts: &mut Vec<RawNode>) {
240 let mut seen = HashSet::new();
241
242 facts.retain(|f| {
243 let key = (
244 f.name.clone().unwrap_or_default(),
245 f.start_line,
246 match f.kind {
247 NodeKind::Function => 1,
248 NodeKind::Class => 2,
249 NodeKind::CallSite => 3,
250 _ => 0,
251 },
252 );
253
254 seen.insert(key)
255 });
256}
257
258fn is_class_keyword(s: &str) -> bool {
259 matches!(
260 s,
261 "class" | "struct" | "interface" | "trait" | "enum" | "namespace" | "union" | "object"
262 )
263}
264
265fn is_function_keyword(s: &str) -> bool {
266 matches!(s, "fn" | "def" | "func" | "function" | "method")
267}
268
269fn is_modifier_word(s: &str) -> bool {
270 matches!(
271 s,
272 "public"
273 | "private"
274 | "protected"
275 | "static"
276 | "final"
277 | "abstract"
278 | "virtual"
279 | "override"
280 | "const"
281 | "inline"
282 )
283}
284
285fn is_noise_word(s: &str) -> bool {
286 matches!(
287 s,
288 "if" | "for" | "while" | "return" | "true" | "false" | "null" | "this" | "self"
289 )
290}
291
292fn is_valid_call(name: &str) -> bool {
293 !name.is_empty() && !is_noise_word(name)
294}
295
296fn looks_like_markup(src: &str) -> bool {
297 let s = src.as_bytes();
298 s.starts_with(b"<html") || s.starts_with(b"<?xml") || s.starts_with(b"<!DOCTYPE")
299}
300
301#[derive(Clone, PartialEq)]
306enum TokenKind {
307 Ident,
308 OpenParen,
309 CloseParen,
310 OpenBrace,
311 CloseBrace,
312 Dot,
313 Comma,
314 Colon,
315 LessThan,
316 GreaterThan,
317}
318
319#[derive(Clone)]
320struct Token {
321 kind: TokenKind,
322 text: String,
323 line: usize,
324 col: usize,
325}
326
327fn tokenize(src: &str) -> Vec<Token> {
328 let bytes = src.as_bytes();
329 let mut out = Vec::with_capacity(src.len() / 4);
330
331 let mut i = 0;
332 let mut line = 1;
333 let mut col = 1;
334
335 while i < bytes.len() {
336 match bytes[i] {
337 b'\n' => {
338 line += 1;
339 col = 1;
340 i += 1;
341 }
342
343 b'(' => push(
344 &mut out,
345 TokenKind::OpenParen,
346 "(",
347 line,
348 col,
349 &mut i,
350 &mut col,
351 ),
352 b')' => push(
353 &mut out,
354 TokenKind::CloseParen,
355 ")",
356 line,
357 col,
358 &mut i,
359 &mut col,
360 ),
361 b'{' => push(
362 &mut out,
363 TokenKind::OpenBrace,
364 "{",
365 line,
366 col,
367 &mut i,
368 &mut col,
369 ),
370 b'}' => push(
371 &mut out,
372 TokenKind::CloseBrace,
373 "}",
374 line,
375 col,
376 &mut i,
377 &mut col,
378 ),
379 b'.' => push(&mut out, TokenKind::Dot, ".", line, col, &mut i, &mut col),
380 b',' => push(&mut out, TokenKind::Comma, ",", line, col, &mut i, &mut col),
381 b':' => push(&mut out, TokenKind::Colon, ":", line, col, &mut i, &mut col),
382 b'<' => push(
383 &mut out,
384 TokenKind::LessThan,
385 "<",
386 line,
387 col,
388 &mut i,
389 &mut col,
390 ),
391 b'>' => push(
392 &mut out,
393 TokenKind::GreaterThan,
394 ">",
395 line,
396 col,
397 &mut i,
398 &mut col,
399 ),
400
401 c if c.is_ascii_alphabetic() || c == b'_' => {
402 let start = i;
403
404 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
405 i += 1;
406 }
407
408 let text = &src[start..i];
409
410 out.push(Token {
411 kind: TokenKind::Ident,
412 text: text.to_string(),
413 line,
414 col,
415 });
416
417 col += i - start;
418 }
419
420 _ => {
421 i += 1;
422 col += 1;
423 }
424 }
425 }
426
427 out
428}
429
430fn push(
431 out: &mut Vec<Token>,
432 kind: TokenKind,
433 text: &str,
434 line: usize,
435 col: usize,
436 i: &mut usize,
437 c: &mut usize,
438) {
439 out.push(Token {
440 kind,
441 text: text.into(),
442 line,
443 col,
444 });
445
446 *i += 1;
447 *c += 1;
448}