Skip to main content

AirLibrary/Indexing/Process/
ExtractSymbols.rs

1//! # ExtractSymbols
2//!
3//! ## File: Indexing/Process/ExtractSymbols.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides symbol extraction functionality for the File Indexer service,
8//! extracting classes, functions, and other code constructs for VSCode
9//! Outline View and Go to Symbol features.
10//!
11//! ## Primary Responsibility
12//!
13//! Extract code symbols from file content based on detected language,
14//! including functions, classes, structs, enums, traits, and more.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Language-specific symbol extraction
19//! - Line and column tracking for symbols
20//! - Symbol kind classification
21//! - Cross-file symbol reference support
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - None (uses std library)
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `super::Language` - Language-specific parsers
31//!
32//! ## Dependents
33//!
34//! - `Indexing::Scan::ScanFile` - Symbol extraction during file scan
35//! - `Indexing::mod::FileIndexer` - Symbol search operations
36//!
37//! ## VSCode Pattern Reference
38//!
39//! Inspired by VSCode's symbol extraction in
40//! `src/vs/workbench/services/search/common/`
41//!
42//! ## Security Considerations
43//!
44//! - Line-by-line parsing without eval
45//! - No code execution during extraction
46//! - Safe string handling
47//!
48//! ## Performance Considerations
49//!
50//! - efficient line-based parsing
51//! - Minimal allocations per file
52//! - Early termination for non-code files
53//!
54//! ## Error Handling Strategy
55//!
56//! Symbol extraction returns empty vectors on parse errors rather than
57//! failures, allowing indexing to continue for other languages.
58//!
59//! ## Thread Safety
60//!
61//! Symbol extraction functions are pure and safe to call from
62//! parallel indexing tasks.
63
64use std::path::PathBuf;
65
66use crate::{
67	Indexing::{
68		Language::{ParseRust::ExtractRustSymbols, ParseTypeScript::ExtractTypeScriptSymbols},
69		State::CreateState::{SymbolInfo, SymbolKind},
70	},
71	Result,
72};
73
74/// Extract symbols from code for VSCode Outline View and Go to Symbol
75///
76/// Supports multiple programming languages:
77/// - Rust: struct, impl, fn, mod, enum, trait, type
78/// - TypeScript/JavaScript: class, interface, function, const, let, var
79/// - Python: class, def
80/// - Go: type, func, struct, interface
81pub async fn ExtractSymbols(file_path:&PathBuf, content:&[u8], language:&str) -> Result<Vec<SymbolInfo>> {
82	let content_str = String::from_utf8_lossy(content);
83
84	let mut symbols = Vec::new();
85
86	match language.to_lowercase().as_str() {
87		"rust" => symbols.extend(ExtractRustSymbols(&content_str, file_path)),
88
89		"typescript" | "javascript" => symbols.extend(ExtractTypeScriptSymbols(&content_str, file_path)),
90
91		_ => {},
92	}
93
94	Ok(symbols)
95}
96
97/// Group symbols by kind for organization
98pub fn GroupSymbolsByKind(symbols:&[SymbolInfo]) -> std::collections::HashMap<SymbolKind, Vec<&SymbolInfo>> {
99	let mut grouped = std::collections::HashMap::new();
100
101	for symbol in symbols {
102		grouped.entry(symbol.kind.clone()).or_insert_with(Vec::new).push(symbol);
103	}
104
105	grouped
106}
107
108/// Sort symbols by line number
109pub fn SortSymbolsByLine(symbols:&mut Vec<SymbolInfo>) { symbols.sort_by(|a, b| a.line.cmp(&b.line)); }
110
111/// Filter symbols by name pattern
112pub fn FilterSymbolsByName<'a>(symbols:&'a [SymbolInfo], pattern:&str) -> Vec<&'a SymbolInfo> {
113	let pattern_lower = pattern.to_lowercase();
114
115	symbols
116		.iter()
117		.filter(|s| s.name.to_lowercase().contains(&pattern_lower))
118		.collect()
119}
120
121/// Get symbols of a specific kind
122pub fn GetSymbolsByKind(symbols:&[SymbolInfo], kind:SymbolKind) -> Vec<&SymbolInfo> {
123	symbols.iter().filter(|s| s.kind == kind).collect()
124}
125
126/// Find symbol at specific line
127pub fn FindSymbolAtLine(symbols:&[SymbolInfo], line:u32) -> Option<&SymbolInfo> {
128	symbols.iter().find(|s| s.line == line)
129}
130
131/// Find symbols in line range
132pub fn FindSymbolsInRange(symbols:&[SymbolInfo], start_line:u32, end_line:u32) -> Vec<&SymbolInfo> {
133	symbols.iter().filter(|s| s.line >= start_line && s.line <= end_line).collect()
134}
135
136/// Create symbol summary statistics
137pub fn GetSymbolStatistics(symbols:&[SymbolInfo]) -> SymbolStatistics {
138	let mut stats = SymbolStatistics { total:symbols.len(), by_kind:std::collections::HashMap::new() };
139
140	for symbol in symbols {
141		*stats.by_kind.entry(symbol.kind.clone()).or_insert(0) += 1;
142	}
143
144	stats
145}
146
147/// Symbol statistics
148#[derive(Debug, Clone)]
149pub struct SymbolStatistics {
150	pub total:usize,
151
152	pub by_kind:std::collections::HashMap<SymbolKind, usize>,
153}
154
155impl std::fmt::Display for SymbolStatistics {
156	fn fmt(&self, f:&mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157		write!(f, "Total symbols: {}", self.total)?;
158
159		for (kind, count) in &self.by_kind {
160			write!(f, ", {:?}: {}", kind, count)?;
161		}
162
163		Ok(())
164	}
165}
166
167/// Validate symbol information
168pub fn ValidateSymbol(symbol:&SymbolInfo) -> bool {
169	!symbol.name.is_empty() && symbol.line > 0 && !symbol.full_path.is_empty()
170}
171
172/// Deduplicate symbols by name and line
173pub fn DeduplicateSymbols(symbols:Vec<SymbolInfo>) -> Vec<SymbolInfo> {
174	let mut seen = std::collections::HashSet::new();
175
176	symbols.into_iter().filter(|s| seen.insert((s.name.clone(), s.line))).collect()
177}
178
179/// Merge symbol lists from multiple files
180pub fn MergeSymbolLists(symbol_lists:Vec<Vec<SymbolInfo>>) -> Vec<SymbolInfo> {
181	let mut merged = Vec::new();
182
183	for symbols in symbol_lists {
184		merged.extend(symbols);
185	}
186
187	DeduplicateSymbols(merged)
188}
189
190/// Deduplicate multiple symbol lists
191pub fn DeduplicateLists(symbol_lists:Vec<Vec<SymbolInfo>>) -> Vec<Vec<SymbolInfo>> {
192	symbol_lists.into_iter().map(|list| DeduplicateSymbols(list)).collect()
193}
194
195/// Create a symbol search index (name -> symbols)
196pub fn CreateSymbolIndex(symbols:&[SymbolInfo]) -> std::collections::HashMap<String, Vec<usize>> {
197	let mut index = std::collections::HashMap::new();
198
199	for (idx, symbol) in symbols.iter().enumerate() {
200		index.entry(symbol.name.to_lowercase()).or_insert_with(Vec::new).push(idx);
201	}
202
203	index
204}
205
206/// Find symbols matching multiple criteria
207pub fn FindSymbolsMatching<'a>(
208	symbols:&'a [SymbolInfo],
209
210	name_pattern:Option<&'a str>,
211
212	kind:&Option<SymbolKind>,
213
214	line_range:Option<(u32, u32)>,
215) -> Vec<&'a SymbolInfo> {
216	symbols
217		.iter()
218		.filter(|s| {
219			if let Some(pattern) = name_pattern {
220				if !s.name.to_lowercase().contains(&pattern.to_lowercase()) {
221					return false;
222				}
223			}
224			if let Some(k) = kind {
225				if s.kind != *k {
226					return false;
227				}
228			}
229			if let Some((start, end)) = line_range {
230				if s.line < start || s.line > end {
231					return false;
232				}
233			}
234			true
235		})
236		.collect()
237}