1pub mod State;
67
68pub mod Scan;
69
70pub mod Process;
71
72pub mod Language;
73
74pub mod Store;
75
76pub mod Watch;
77
78pub mod Background;
79
80use std::{collections::HashMap, path::PathBuf, sync::Arc};
82
83use tokio::sync::{Mutex, RwLock};
84
85use crate::{
86 AirError,
87 ApplicationState::ApplicationState,
88 Configuration::ConfigurationManager,
89 Indexing::{
90 Scan::{
91 ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
92 ScanFile::IndexFileInternal,
93 },
94 State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
95 Store::{
96 QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
97 StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
98 UpdateIndex::UpdateFileContent,
99 },
100 },
101 Result,
102 dev_log,
103};
104use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
106
107const MAX_PARALLEL_INDEXING:usize = 10;
109
110#[derive(Debug, Clone)]
112pub struct IndexResult {
113 pub files_indexed:u32,
115
116 pub total_size:u64,
118
119 pub duration_seconds:f64,
121
122 pub symbols_extracted:u32,
124
125 pub files_with_errors:u32,
127}
128
129#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
131pub struct IndexStatistics {
132 pub file_count:u32,
133
134 pub total_size:u64,
135
136 pub total_symbols:u32,
137
138 pub language_counts:HashMap<String, u32>,
139
140 pub last_updated:chrono::DateTime<chrono::Utc>,
141
142 pub index_version:String,
143}
144
145pub struct FileIndexer {
155 AppState:Arc<ApplicationState>,
157
158 file_index:Arc<RwLock<FileIndex>>,
160
161 index_directory:PathBuf,
163
164 file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
166
167 indexing_semaphore:Arc<tokio::sync::Semaphore>,
169
170 corruption_detected:Arc<Mutex<bool>>,
172}
173
174impl FileIndexer {
175 pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
183 let config = &AppState.Configuration.Indexing;
184
185 let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
187
188 EnsureIndexDirectory(&index_directory).await?;
190
191 let file_index = LoadOrCreateIndex(&index_directory).await?;
193
194 let indexer = Self {
195 AppState:AppState.clone(),
196
197 file_index:Arc::new(RwLock::new(file_index)),
198
199 index_directory:index_directory.clone(),
200
201 file_watcher:Arc::new(Mutex::new(None)),
202
203 indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
204
205 corruption_detected:Arc::new(Mutex::new(false)),
206 };
207
208 indexer.VerifyIndexIntegrity().await?;
210
211 indexer
213 .AppState
214 .UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
215 .await
216 .map_err(|e| AirError::Internal(e.to_string()))?;
217
218 dev_log!(
219 "indexing",
220 "[FileIndexer] Initialized with index directory: {}",
221 index_directory.display()
222 );
223
224 Ok(indexer)
225 }
226
227 fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
229 let expanded = ConfigurationManager::ExpandPath(path)?;
230
231 let path_str = expanded.to_string_lossy();
233
234 if path_str.contains("..") {
235 return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
236 }
237
238 Ok(expanded)
239 }
240
241 async fn VerifyIndexIntegrity(&self) -> Result<()> {
243 let index = self.file_index.read().await;
244
245 ValidateIndexConsistency(&index)?;
247
248 let mut missing_files = 0;
250
251 for file_path in index.files.keys() {
252 if !file_path.exists() {
253 missing_files += 1;
254 }
255 }
256
257 if missing_files > 0 {
258 dev_log!("indexing", "warn: [FileIndexer] Found {} missing files in index", missing_files);
259 }
260
261 dev_log!("indexing", "[FileIndexer] Index integrity verified successfully");
262
263 Ok(())
264 }
265
266 pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
268 let start_time = std::time::Instant::now();
269
270 dev_log!("indexing", "[FileIndexer] Starting directory index: {}", path);
271
272 let config = &self.AppState.Configuration.Indexing;
273
274 let (files_to_index, _scan_result) =
276 ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
277
278 let _index_arc = self.file_index.clone();
281
282 let semaphore = self.indexing_semaphore.clone();
283
284 let config_clone = config.clone();
285
286 let mut index_tasks = Vec::new();
287
288 for file_path in files_to_index {
289 let permit = semaphore.clone().acquire_owned().await.unwrap();
290
291 let config_for_task = config_clone.clone();
292
293 let task = tokio::spawn(async move {
294 let _permit = permit;
295 IndexFileInternal(&file_path, &config_for_task, &[]).await
296 });
297
298 index_tasks.push(task);
299 }
300
301 let mut index = self.file_index.write().await;
303
304 let mut indexed_paths = std::collections::HashSet::new();
305
306 let mut files_indexed = 0u32;
307
308 let mut total_size = 0u64;
309
310 let mut symbols_extracted = 0u32;
311
312 let mut files_with_errors = 0u32;
313
314 for task in index_tasks {
315 match task.await {
316 Ok(Ok((metadata, symbols))) => {
317 let file_path = metadata.path.clone();
318
319 index.files.insert(file_path.clone(), metadata.clone());
320
321 indexed_paths.insert(file_path.clone());
322
323 if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
325 dev_log!(
326 "indexing",
327 "warn: [FileIndexer] Failed to index content for {}: {}",
328 file_path.display(),
329 e
330 );
331 }
332
333 index.file_symbols.insert(file_path.clone(), symbols.clone());
335
336 symbols_extracted += symbols.len() as u32;
337
338 for symbol in symbols {
340 index
341 .symbol_index
342 .entry(symbol.name.clone())
343 .or_insert_with(Vec::new)
344 .push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
345 }
346
347 files_indexed += 1;
348
349 total_size += metadata.size;
350 },
351
352 Ok(Err(_)) => {
353 files_with_errors += 1;
354 },
355
356 Err(e) => {
357 dev_log!("indexing", "error: [FileIndexer] Indexing task failed: {}", e);
358
359 files_with_errors += 1;
360 },
361 }
362 }
363
364 ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
366
367 UpdateIndexMetadata(&mut index)?;
369
370 SaveIndex(&self.index_directory, &index).await?;
372
373 let duration = start_time.elapsed().as_secs_f64();
374
375 dev_log!(
376 "indexing",
377 "[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
378 files_indexed,
379 total_size,
380 symbols_extracted,
381 files_with_errors,
382 duration
383 );
384
385 Ok(IndexResult {
386 files_indexed,
387 total_size,
388 duration_seconds:duration,
389 symbols_extracted,
390 files_with_errors,
391 })
392 }
393
394 pub async fn SearchFiles(
396 &self,
397
398 query:SearchQuery,
399
400 path:Option<String>,
401
402 language:Option<String>,
403 ) -> Result<PaginatedSearchResults> {
404 let index = self.file_index.read().await;
405
406 QueryIndexSearch(&index, query, path, language).await
407 }
408
409 pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
411 let index = self.file_index.read().await;
412
413 let query_lower = query.to_lowercase();
414
415 let mut results = Vec::new();
416
417 for (symbol_name, locations) in &index.symbol_index {
418 if symbol_name.to_lowercase().contains(&query_lower) {
419 for loc in locations.iter().take(max_results as usize) {
420 results.push(loc.symbol.clone());
421
422 if results.len() >= max_results as usize {
423 break;
424 }
425 }
426 }
427 }
428
429 Ok(results)
430 }
431
432 pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
434 let index = self.file_index.read().await;
435
436 Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
437 }
438
439 pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
441 let file_path = Self::ValidateAndExpandPath(&path)?;
442
443 let index = self.file_index.read().await;
444
445 Ok(index.files.get(&file_path).cloned())
446 }
447
448 pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
450 let index = self.file_index.read().await;
451
452 let mut language_counts:HashMap<String, u32> = HashMap::new();
453
454 let total_size = index.files.values().map(|m| m.size).sum();
455
456 let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
457
458 for metadata in index.files.values() {
459 if let Some(lang) = &metadata.language {
460 *language_counts.entry(lang.clone()).or_insert(0) += 1;
461 }
462 }
463
464 Ok(IndexStatistics {
465 file_count:index.files.len() as u32,
466 total_size,
467 total_symbols,
468 language_counts,
469 last_updated:index.last_updated,
470 index_version:index.index_version.clone(),
471 })
472 }
473
474 pub async fn recover_from_corruption(&self) -> Result<()> {
476 dev_log!("indexing", "[FileIndexer] Recovering from corrupted index...");
477
478 BackupCorruptedIndex(&self.index_directory).await?;
480
481 let new_index = CreateNewIndex();
483
484 *self.file_index.write().await = new_index;
485
486 *self.corruption_detected.lock().await = false;
488
489 dev_log!("indexing", "[FileIndexer] Index recovery completed");
490
491 Ok(())
492 }
493}
494
495impl Clone for FileIndexer {
496 fn clone(&self) -> Self {
497 Self {
498 AppState:self.AppState.clone(),
499
500 file_index:self.file_index.clone(),
501
502 index_directory:self.index_directory.clone(),
503
504 file_watcher:self.file_watcher.clone(),
505
506 indexing_semaphore:self.indexing_semaphore.clone(),
507
508 corruption_detected:self.corruption_detected.clone(),
509 }
510 }
511}