Skip to main content

AirLibrary/Indexing/
mod.rs

1//! # File Indexing and Search Service
2//!
3//! ## File: Indexing/mod.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides comprehensive file indexing, search, and content analysis
8//! capabilities for the Land ecosystem, inspired by and compatible with
9//! Visual Studio Code's search service.
10//!
11//! ## Primary Responsibility
12//!
13//! Facade module for the Indexing service, exposing the public API for
14//! file indexing, search, and symbol extraction operations.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Re-export public types from submodule
19//! - Provide unified FileIndexer API
20//! - Coordinate between indexing subsystems
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `regex` - Regular expression search patterns
26//! - `serde` - Serialization for index storage
27//! - `tokio` - Async runtime for all operations
28//! - `notify` - File system watching
29//! - `chrono` - Timestamp management
30//!
31//! **Internal Modules:**
32//! - `crate::Result` - Error handling type
33//! - `crate::AirError` - Error types
34//! - `crate::ApplicationState::ApplicationState` - Application state
35//! - `crate::Configuration::ConfigurationManager` - Configuration management
36//!
37//! ## Dependents
38//!
39//! - `Indexing::FileIndexer` - Main indexer implementation
40//! - `Vine::Server::AirVinegRPCService` - gRPC integration
41//!
42//! ## VSCode Integration
43//!
44//! This service integrates with VSCode's search and file service architecture:
45//!
46//! - References: vs/workbench/services/search
47//! - File Service: vs/workbench/services/files
48//!
49//! The indexing system supports VSCode features:
50//! - **Outline View**: Symbol extraction for class/function navigation
51//! - **Go to Symbol**: Cross-file symbol search and lookup
52//! - **Search Integration**: File content and name search with regex support
53//! - **Workspace Search**: Multi-workspace index sharing
54//!
55//! ## FUTURE Enhancements
56//!
57//! - [ ] Implement full ripgrep integration for ultra-fast text search
58//! - [ ] Add project-level search with workspace awareness
59//! - [ ] Implement search query caching
60//! - [ ] Add fuzzy search with typos tolerance
61//! - [ ] Implement search history and recent queries
62//! - [ ] Add search result preview with context
63//! - [ ] Implement parallel indexing for large directories
64
65// Modules - file-based (no inline definitions)
66pub mod State;
67
68pub mod Scan;
69
70pub mod Process;
71
72pub mod Language;
73
74pub mod Store;
75
76pub mod Watch;
77
78pub mod Background;
79
80// Import types and functions needed for the FileIndexer implementation
81use std::{collections::HashMap, path::PathBuf, sync::Arc};
82
83use tokio::sync::{Mutex, RwLock};
84
85use crate::{
86	AirError,
87	ApplicationState::ApplicationState,
88	Configuration::ConfigurationManager,
89	Indexing::{
90		Scan::{
91			ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
92			ScanFile::IndexFileInternal,
93		},
94		State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
95		Store::{
96			QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
97			StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
98			UpdateIndex::UpdateFileContent,
99		},
100	},
101	Result,
102	dev_log,
103};
104// Import types from submodules with explicit full paths
105use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
106
107/// Maximum number of parallel indexing operations
108const MAX_PARALLEL_INDEXING:usize = 10;
109
110/// Indexing result with statistics
111#[derive(Debug, Clone)]
112pub struct IndexResult {
113	/// Number of files successfully indexed
114	pub files_indexed:u32,
115
116	/// Total size of indexed files in bytes
117	pub total_size:u64,
118
119	/// Time taken in seconds
120	pub duration_seconds:f64,
121
122	/// Number of symbols extracted
123	pub symbols_extracted:u32,
124
125	/// Number of files with errors
126	pub files_with_errors:u32,
127}
128
129/// Index statistics
130#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
131pub struct IndexStatistics {
132	pub file_count:u32,
133
134	pub total_size:u64,
135
136	pub total_symbols:u32,
137
138	pub language_counts:HashMap<String, u32>,
139
140	pub last_updated:chrono::DateTime<chrono::Utc>,
141
142	pub index_version:String,
143}
144
145/// File indexer implementation with comprehensive search capabilities
146///
147/// This indexer provides:
148/// - Incremental file watching with real-time updates
149/// - Multi-mode search (literal, regex, fuzzy)
150/// - Symbol extraction for VSCode Outline View
151/// - Language detection for syntax highlighting
152/// - Index corruption detection and recovery
153/// - Parallel indexing with resource limits
154pub struct FileIndexer {
155	/// Application state
156	AppState:Arc<ApplicationState>,
157
158	/// File index with metadata and symbols
159	file_index:Arc<RwLock<FileIndex>>,
160
161	/// Index storage directory
162	index_directory:PathBuf,
163
164	/// File watcher for incremental updates
165	file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
166
167	/// Semaphore for limiting parallel indexing operations
168	indexing_semaphore:Arc<tokio::sync::Semaphore>,
169
170	/// Index corruption detection state
171	corruption_detected:Arc<Mutex<bool>>,
172}
173
174impl FileIndexer {
175	/// Create a new file indexer with comprehensive setup
176	///
177	/// Initializes the indexer with:
178	/// - Index directory creation
179	/// - Existing index loading or fresh creation
180	/// - Index corruption detection
181	/// - Service status initialization
182	pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
183		let config = &AppState.Configuration.Indexing;
184
185		// Expand index directory path with validation
186		let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
187
188		// Create index directory if it doesn't exist with error handling
189		EnsureIndexDirectory(&index_directory).await?;
190
191		// Load or create index with corruption detection
192		let file_index = LoadOrCreateIndex(&index_directory).await?;
193
194		let indexer = Self {
195			AppState:AppState.clone(),
196
197			file_index:Arc::new(RwLock::new(file_index)),
198
199			index_directory:index_directory.clone(),
200
201			file_watcher:Arc::new(Mutex::new(None)),
202
203			indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
204
205			corruption_detected:Arc::new(Mutex::new(false)),
206		};
207
208		// Verify index integrity
209		indexer.VerifyIndexIntegrity().await?;
210
211		// Initialize service status
212		indexer
213			.AppState
214			.UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
215			.await
216			.map_err(|e| AirError::Internal(e.to_string()))?;
217
218		dev_log!(
219			"indexing",
220			"[FileIndexer] Initialized with index directory: {}",
221			index_directory.display()
222		);
223
224		Ok(indexer)
225	}
226
227	/// Validate and expand path with traversal protection
228	fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
229		let expanded = ConfigurationManager::ExpandPath(path)?;
230
231		// Prevent path traversal attacks
232		let path_str = expanded.to_string_lossy();
233
234		if path_str.contains("..") {
235			return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
236		}
237
238		Ok(expanded)
239	}
240
241	/// Verify index integrity and detect corruption
242	async fn VerifyIndexIntegrity(&self) -> Result<()> {
243		let index = self.file_index.read().await;
244
245		// Check consistency
246		ValidateIndexConsistency(&index)?;
247
248		// Verify all indexed files exist
249		let mut missing_files = 0;
250
251		for file_path in index.files.keys() {
252			if !file_path.exists() {
253				missing_files += 1;
254			}
255		}
256
257		if missing_files > 0 {
258			dev_log!("indexing", "warn: [FileIndexer] Found {} missing files in index", missing_files);
259		}
260
261		dev_log!("indexing", "[FileIndexer] Index integrity verified successfully");
262
263		Ok(())
264	}
265
266	/// Index a directory with comprehensive validation and parallel processing
267	pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
268		let start_time = std::time::Instant::now();
269
270		dev_log!("indexing", "[FileIndexer] Starting directory index: {}", path);
271
272		let config = &self.AppState.Configuration.Indexing;
273
274		// Scan directory
275		let (files_to_index, _scan_result) =
276			ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
277
278		// Index files in parallel
279		// Variables cloned for use in async task
280		let _index_arc = self.file_index.clone();
281
282		let semaphore = self.indexing_semaphore.clone();
283
284		let config_clone = config.clone();
285
286		let mut index_tasks = Vec::new();
287
288		for file_path in files_to_index {
289			let permit = semaphore.clone().acquire_owned().await.unwrap();
290
291			let config_for_task = config_clone.clone();
292
293			let task = tokio::spawn(async move {
294				let _permit = permit;
295				IndexFileInternal(&file_path, &config_for_task, &[]).await
296			});
297
298			index_tasks.push(task);
299		}
300
301		// Collect results
302		let mut index = self.file_index.write().await;
303
304		let mut indexed_paths = std::collections::HashSet::new();
305
306		let mut files_indexed = 0u32;
307
308		let mut total_size = 0u64;
309
310		let mut symbols_extracted = 0u32;
311
312		let mut files_with_errors = 0u32;
313
314		for task in index_tasks {
315			match task.await {
316				Ok(Ok((metadata, symbols))) => {
317					let file_path = metadata.path.clone();
318
319					index.files.insert(file_path.clone(), metadata.clone());
320
321					indexed_paths.insert(file_path.clone());
322
323					// Index content for search
324					if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
325						dev_log!(
326							"indexing",
327							"warn: [FileIndexer] Failed to index content for {}: {}",
328							file_path.display(),
329							e
330						);
331					}
332
333					// Index symbols
334					index.file_symbols.insert(file_path.clone(), symbols.clone());
335
336					symbols_extracted += symbols.len() as u32;
337
338					// Update symbol index
339					for symbol in symbols {
340						index
341							.symbol_index
342							.entry(symbol.name.clone())
343							.or_insert_with(Vec::new)
344							.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
345					}
346
347					files_indexed += 1;
348
349					total_size += metadata.size;
350				},
351
352				Ok(Err(_)) => {
353					files_with_errors += 1;
354				},
355
356				Err(e) => {
357					dev_log!("indexing", "error: [FileIndexer] Indexing task failed: {}", e);
358
359					files_with_errors += 1;
360				},
361			}
362		}
363
364		// Remove files that were indexed before but no longer exist
365		ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
366
367		// Update index metadata
368		UpdateIndexMetadata(&mut index)?;
369
370		// Save index to disk
371		SaveIndex(&self.index_directory, &index).await?;
372
373		let duration = start_time.elapsed().as_secs_f64();
374
375		dev_log!(
376			"indexing",
377			"[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
378			files_indexed,
379			total_size,
380			symbols_extracted,
381			files_with_errors,
382			duration
383		);
384
385		Ok(IndexResult {
386			files_indexed,
387			total_size,
388			duration_seconds:duration,
389			symbols_extracted,
390			files_with_errors,
391		})
392	}
393
394	/// Search files with multiple modes
395	pub async fn SearchFiles(
396		&self,
397
398		query:SearchQuery,
399
400		path:Option<String>,
401
402		language:Option<String>,
403	) -> Result<PaginatedSearchResults> {
404		let index = self.file_index.read().await;
405
406		QueryIndexSearch(&index, query, path, language).await
407	}
408
409	/// Search symbols across all files (for VSCode Go to Symbol)
410	pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
411		let index = self.file_index.read().await;
412
413		let query_lower = query.to_lowercase();
414
415		let mut results = Vec::new();
416
417		for (symbol_name, locations) in &index.symbol_index {
418			if symbol_name.to_lowercase().contains(&query_lower) {
419				for loc in locations.iter().take(max_results as usize) {
420					results.push(loc.symbol.clone());
421
422					if results.len() >= max_results as usize {
423						break;
424					}
425				}
426			}
427		}
428
429		Ok(results)
430	}
431
432	/// Get symbols for a specific file (for VSCode Outline View)
433	pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
434		let index = self.file_index.read().await;
435
436		Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
437	}
438
439	/// Get file information
440	pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
441		let file_path = Self::ValidateAndExpandPath(&path)?;
442
443		let index = self.file_index.read().await;
444
445		Ok(index.files.get(&file_path).cloned())
446	}
447
448	/// Get index statistics
449	pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
450		let index = self.file_index.read().await;
451
452		let mut language_counts:HashMap<String, u32> = HashMap::new();
453
454		let total_size = index.files.values().map(|m| m.size).sum();
455
456		let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
457
458		for metadata in index.files.values() {
459			if let Some(lang) = &metadata.language {
460				*language_counts.entry(lang.clone()).or_insert(0) += 1;
461			}
462		}
463
464		Ok(IndexStatistics {
465			file_count:index.files.len() as u32,
466			total_size,
467			total_symbols,
468			language_counts,
469			last_updated:index.last_updated,
470			index_version:index.index_version.clone(),
471		})
472	}
473
474	/// Recover corrupted index
475	pub async fn recover_from_corruption(&self) -> Result<()> {
476		dev_log!("indexing", "[FileIndexer] Recovering from corrupted index...");
477
478		// Backup corrupted index
479		BackupCorruptedIndex(&self.index_directory).await?;
480
481		// Create new index
482		let new_index = CreateNewIndex();
483
484		*self.file_index.write().await = new_index;
485
486		// Clear corruption flag
487		*self.corruption_detected.lock().await = false;
488
489		dev_log!("indexing", "[FileIndexer] Index recovery completed");
490
491		Ok(())
492	}
493}
494
495impl Clone for FileIndexer {
496	fn clone(&self) -> Self {
497		Self {
498			AppState:self.AppState.clone(),
499
500			file_index:self.file_index.clone(),
501
502			index_directory:self.index_directory.clone(),
503
504			file_watcher:self.file_watcher.clone(),
505
506			indexing_semaphore:self.indexing_semaphore.clone(),
507
508			corruption_detected:self.corruption_detected.clone(),
509		}
510	}
511}