Skip to main content

AirLibrary/Indexing/State/
UpdateState.rs

1//! # UpdateState
2//!
3//! ## File: Indexing/State/UpdateState.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides state update operations for the File Indexer service, handling
8//! modification of index structures including adding, removing, and updating
9//! entries in the file index.
10//!
11//! ## Primary Responsibility
12//!
13//! Update file index state by adding/removing files, symbols, and content
14//! entries in a thread-safe manner.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Remove deleted files from all indexes
19//! - Update symbol index with new symbol locations
20//! - Update content index with new file paths
21//! - Maintain index version and checksum on updates
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - `tokio` - Async runtime for update operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::CreateState` - State structure definitions
32//!
33//! ## Dependents
34//!
35//! - `Indexing::Scan::ScanDirectory` - Updates index after directory scan
36//! - `Indexing::Scan::ScanFile` - Updates index after file scan
37//! - `Indexing::Store::UpdateIndex` - Incremental index updates
38//! - `Indexing::Watch::WatchFile` - Updates index on file changes
39//!
40//! ## VSCode Pattern Reference
41//!
42//! Inspired by VSCode's index update operations in
43//! `src/vs/workbench/services/search/common/`
44//!
45//! ## Security Considerations
46//!
47//! - Thread-safe updates prevent race conditions
48//! - Path validation before state updates
49//! - Size limits enforced on all update operations
50//!
51//! ## Performance Considerations
52//!
53//! - Incremental updates minimize reindexing
54//! - Batch updates for multiple files
55//! - Efficient hash lookups for O(1) updates
56//!
57//! ## Error Handling Strategy
58//!
59//! Update operations silently fail on missing keys (idempotent) and
60//! propagate errors for I/O failures or invalid state transitions.
61//!
62//! ## Thread Safety
63//!
64//! All update operations are designed to work within RwLock write
65//! guards and should be called while holding appropriate locks.
66
67use std::path::PathBuf;
68
69use crate::{
70	AirError,
71	Indexing::State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolLocation},
72	Result,
73};
74
75/// Add a file to the index with its metadata and symbols
76pub fn AddFileToIndex(
77	index:&mut FileIndex,
78
79	file_path:PathBuf,
80
81	metadata:FileMetadata,
82
83	symbols:Vec<SymbolInfo>,
84) -> Result<()> {
85	// Check if file already exists and update accordingly
86	let is_new = !index.files.contains_key(&file_path);
87
88	// Add or update file metadata
89	index.files.insert(file_path.clone(), metadata.clone());
90
91	// Update symbol index
92	if is_new {
93		// Clear old symbols for this file if any
94		index.file_symbols.remove(&file_path);
95	}
96
97	// Add new symbols
98	index.file_symbols.insert(file_path.clone(), symbols.clone());
99
100	// Update symbol index for cross-referencing
101	for symbol in symbols {
102		index
103			.symbol_index
104			.entry(symbol.name.clone())
105			.or_insert_with(Vec::new)
106			.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
107	}
108
109	Ok(())
110}
111
112/// Remove a file from all indexes (content, symbols, files)
113pub fn RemoveFileFromIndex(index:&mut FileIndex, file_path:&PathBuf) -> Result<()> {
114	// Remove from files index
115	index.files.remove(file_path);
116
117	// Remove from file_symbols
118	index.file_symbols.remove(file_path);
119
120	// Remove from symbol index
121	for (_, locations) in index.symbol_index.iter_mut() {
122		locations.retain(|loc| loc.file_path != *file_path);
123	}
124
125	// Remove from content index
126	for (_, files) in index.content_index.iter_mut() {
127		files.retain(|p| p != file_path);
128	}
129
130	Ok(())
131}
132
133/// Remove multiple files from the index in a batch operation
134pub fn RemoveFilesFromIndex(index:&mut FileIndex, file_paths:&[PathBuf]) -> Result<()> {
135	for file_path in file_paths {
136		RemoveFileFromIndex(index, file_path)?;
137	}
138
139	Ok(())
140}
141
142/// Update index metadata (version, timestamp, checksum)
143pub fn UpdateIndexMetadata(index:&mut FileIndex) -> Result<()> {
144	use crate::Indexing::State::CreateState::{CalculateIndexChecksum, GenerateIndexVersion};
145
146	index.last_updated = chrono::Utc::now();
147
148	index.index_version = GenerateIndexVersion();
149
150	index.index_checksum = CalculateIndexChecksum(index)?;
151
152	Ok(())
153}
154
155/// Update file metadata for an existing file
156pub fn UpdateFileMetadata(index:&mut FileIndex, file_path:&PathBuf, metadata:FileMetadata) -> Result<()> {
157	if !index.files.contains_key(file_path) {
158		return Err(AirError::Internal(format!(
159			"Cannot update metadata for file not in index: {}",
160			file_path.display()
161		)));
162	}
163
164	index.files.insert(file_path.clone(), metadata);
165
166	Ok(())
167}
168
169/// Update symbols for a file
170pub fn UpdateFileSymbols(index:&mut FileIndex, file_path:&PathBuf, symbols:Vec<SymbolInfo>) -> Result<()> {
171	if !index.files.contains_key(file_path) {
172		return Err(AirError::Internal(format!(
173			"Cannot update symbols for file not in index: {}",
174			file_path.display()
175		)));
176	}
177
178	// Remove old symbols from symbol index
179	if let Some(old_symbols) = index.file_symbols.get(file_path) {
180		for old_symbol in old_symbols {
181			if let Some(locations) = index.symbol_index.get_mut(&old_symbol.name) {
182				locations.retain(|loc| loc.file_path != *file_path);
183			}
184		}
185	}
186
187	// Add new symbols
188	index.file_symbols.insert(file_path.clone(), symbols.clone());
189
190	for symbol in symbols {
191		index
192			.symbol_index
193			.entry(symbol.name.clone())
194			.or_insert_with(Vec::new)
195			.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
196	}
197
198	Ok(())
199}
200
201/// Update content index for a file
202pub fn UpdateContentIndex(index:&mut FileIndex, file_path:&PathBuf, tokens:Vec<String>) -> Result<()> {
203	// Remove file from existing content index entries
204	for (_, files) in index.content_index.iter_mut() {
205		files.retain(|p| p != file_path);
206	}
207
208	// Add new tokens
209	for token in tokens {
210		if token.len() > 2 {
211			// Only index tokens longer than 2 characters
212			index
213				.content_index
214				.entry(token)
215				.or_insert_with(Vec::new)
216				.push(file_path.clone());
217		}
218	}
219
220	Ok(())
221}
222
223/// Clean up orphaned entries (files with no matching content/symbols)
224pub fn CleanupOrphanedEntries(index:&mut FileIndex) -> Result<u32> {
225	let mut removed_count = 0;
226
227	// Clean up content index entries with no files
228	let orphaned_tokens:Vec<_> = index
229		.content_index
230		.iter()
231		.filter(|(_, files)| files.is_empty())
232		.map(|(token, _)| token.clone())
233		.collect();
234
235	for token in orphaned_tokens {
236		index.content_index.remove(&token);
237
238		removed_count += 1;
239	}
240
241	// Clean up symbol index entries with no locations
242	let orphaned_symbols:Vec<_> = index
243		.symbol_index
244		.iter()
245		.filter(|(_, locations)| locations.is_empty())
246		.map(|(symbol, _)| symbol.clone())
247		.collect();
248
249	for symbol in orphaned_symbols {
250		index.symbol_index.remove(&symbol);
251
252		removed_count += 1;
253	}
254
255	Ok(removed_count)
256}
257
258/// Merge another index into this one
259pub fn MergeIndexes(target:&mut FileIndex, source:FileIndex) -> Result<u32> {
260	let mut merged_files = 0;
261
262	// Merge files
263	for (path, metadata) in source.files {
264		if !target.files.contains_key(&path) {
265			target.files.insert(path.clone(), metadata);
266
267			merged_files += 1;
268		}
269	}
270
271	// Merge content index
272	for (token, mut files) in source.content_index {
273		target.content_index.entry(token).or_insert_with(Vec::new).append(&mut files);
274	}
275
276	// Merge symbol index
277	for (symbol, mut locations) in source.symbol_index {
278		target
279			.symbol_index
280			.entry(symbol)
281			.or_insert_with(Vec::new)
282			.append(&mut locations);
283	}
284
285	// Merge file symbols
286	for (path, symbols) in source.file_symbols {
287		if !target.file_symbols.contains_key(&path) {
288			target.file_symbols.insert(path, symbols);
289		}
290	}
291
292	// Update metadata
293	UpdateIndexMetadata(target)?;
294
295	Ok(merged_files)
296}
297
298/// Validate that index is in a consistent state
299pub fn ValidateIndexConsistency(index:&FileIndex) -> Result<()> {
300	// Check that all files in content_index exist in files
301	for (_, files) in &index.content_index {
302		for file_path in files {
303			if !index.files.contains_key(file_path) {
304				return Err(AirError::Internal(format!(
305					"Content index references non-existent file: {}",
306					file_path.display()
307				)));
308			}
309		}
310	}
311
312	// Check that all files in symbol_index exist in files
313	for (_, locations) in &index.symbol_index {
314		for location in locations {
315			if !index.files.contains_key(&location.file_path) {
316				return Err(AirError::Internal(format!(
317					"Symbol index references non-existent file: {}",
318					location.file_path.display()
319				)));
320			}
321		}
322	}
323
324	// Check that all files in file_symbols exist in files
325	for (file_path, _) in &index.file_symbols {
326		if !index.files.contains_key(file_path) {
327			return Err(AirError::Internal(format!(
328				"File symbols references non-existent file: {}",
329				file_path.display()
330			)));
331		}
332	}
333
334	Ok(())
335}
336
337/// Get index size estimate in bytes
338pub fn GetIndexSizeEstimate(index:&FileIndex) -> usize {
339	let mut size = 0;
340
341	// File metadata
342	for (path, _metadata) in &index.files {
343		size += path.as_os_str().len();
344
345		size += std::mem::size_of::<FileMetadata>();
346	}
347
348	// Content index
349	for (token, files) in &index.content_index {
350		size += token.len();
351
352		size += files.len() * std::mem::size_of::<PathBuf>();
353	}
354
355	// Symbol index
356	for (symbol, locations) in &index.symbol_index {
357		size += symbol.len();
358
359		size += locations.len() * std::mem::size_of::<SymbolLocation>();
360	}
361
362	// File symbols
363	for (path, symbols) in &index.file_symbols {
364		size += path.as_os_str().len();
365
366		size += symbols.len() * std::mem::size_of::<SymbolInfo>();
367	}
368
369	size
370}
371
372/// Check if periodic update is needed based on age
373pub fn NeedsUpdate(index:&FileIndex, max_age_minutes:u64) -> bool {
374	let age_minutes = (chrono::Utc::now() - index.last_updated).num_minutes().abs() as u64;
375
376	age_minutes >= max_age_minutes
377}