Skip to main content

AirLibrary/Indexing/Store/
StoreEntry.rs

1//! # StoreEntry
2//!
3//! ## File: Indexing/Store/StoreEntry.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides index storage functionality for the File Indexer service,
8//! handling serialization and persistence of the file index to disk.
9//!
10//! ## Primary Responsibility
11//!
12//! Store the file index to disk with atomic writes and corruption recovery
13//! mechanisms.
14//!
15//! ## Secondary Responsibilities
16//!
17//! - Load index from disk with validation
18//! - Backup corrupted indexes automatically
19//! - Atomic writes using temp files
20//! - Index integrity verification
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `serde_json` - JSON serialization/deserialization
26//! - `tokio` - Async file I/O operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::super::FileIndex` - Index structure definitions
32//! - `super::super::State::CreateState` - State creation utilities
33//!
34//! ## Dependents
35//!
36//! - `Indexing::mod::FileIndexer` - Main file indexer implementation
37//!
38//! ## VSCode Pattern Reference
39//!
40//! Inspired by VSCode's index storage in
41//! `src/vs/workbench/services/search/common/`
42//!
43//! ## Security Considerations
44//!
45//! - Atomic writes prevent partial index corruption
46//! - Permission checking on index directory
47//! - Path traversal protection
48//!
49//! ## Performance Considerations
50//!
51//! - Temp file pattern for atomic writes
52//! - Lazy loading of in-memory index
53//! - Efficient serialization with serde
54//!
55//! ## Error Handling Strategy
56//!
57//! Storage operations return detailed error messages for failures and
58//! automatically backup corrupted indexes when loading fails.
59//!
60//! ## Thread Safety
61//!
62//! Storage operations use async file I/O and return results that can be
63//! safely merged into shared Ar c<RwLock<>> state.
64
65use std::path::{Path, PathBuf};
66
67use crate::{AirError, Indexing::State::CreateState::FileIndex, Result, dev_log};
68
69/// Save index to disk with atomic write
70pub async fn SaveIndex(index_directory:&Path, index:&FileIndex) -> Result<()> {
71	let index_file = index_directory.join("file_index.json");
72
73	let temp_file = index_directory.join("file_index.json.tmp");
74
75	let content = serde_json::to_string_pretty(index)
76		.map_err(|e| AirError::Serialization(format!("Failed to serialize index: {}", e)))?;
77
78	// Write to temp file first
79	tokio::fs::write(&temp_file, content)
80		.await
81		.map_err(|e| AirError::FileSystem(format!("Failed to write temp index file: {}", e)))?;
82
83	// Atomic rename
84	tokio::fs::rename(&temp_file, &index_file)
85		.await
86		.map_err(|e| AirError::FileSystem(format!("Failed to rename index file: {}", e)))?;
87
88	dev_log!(
89		"indexing",
90		"[StoreEntry] Index saved to: {} ({} files, {} symbols)",
91		index_file.display(),
92		index.files.len(),
93		index.symbol_index.len()
94	);
95
96	Ok(())
97}
98
99/// Load index from disk with corruption detection
100pub async fn LoadIndex(index_directory:&Path) -> Result<FileIndex> {
101	let index_file = index_directory.join("file_index.json");
102
103	if !index_file.exists() {
104		return Err(AirError::FileSystem(format!(
105			"Index file does not exist: {}",
106			index_file.display()
107		)));
108	}
109
110	let content = tokio::fs::read_to_string(&index_file)
111		.await
112		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
113
114	let index:FileIndex = serde_json::from_str(&content)
115		.map_err(|e| AirError::Serialization(format!("Failed to parse index file: {}", e)))?;
116
117	// Verify index structure
118	if index.index_version.is_empty() || index.index_checksum.is_empty() {
119		return Err(AirError::Serialization("Index missing version or checksum".to_string()));
120	}
121
122	// Verify index checksum
123	use crate::Indexing::State::CreateState::CalculateIndexChecksum;
124
125	let expected_checksum = CalculateIndexChecksum(&index)?;
126
127	if index.index_checksum != expected_checksum {
128		return Err(AirError::Serialization(format!(
129			"Index checksum mismatch: expected {}, got {}",
130			expected_checksum, index.index_checksum
131		)));
132	}
133
134	Ok(index)
135}
136
137/// Load or create index with corruption detection
138pub async fn LoadOrCreateIndex(index_directory:&Path) -> Result<FileIndex> {
139	let index_file = index_directory.join("file_index.json");
140
141	if index_file.exists() {
142		// Try to load existing index
143		match LoadIndex(index_directory).await {
144			Ok(index) => {
145				dev_log!("indexing", "[StoreEntry] Loaded index with {} files", index.files.len());
146
147				Ok(index)
148			},
149
150			Err(e) => {
151				dev_log!(
152					"indexing",
153					"warn: [StoreEntry] Failed to load index (may be corrupted): {}. Creating new index.",
154					e
155				);
156
157				// Backup corrupted index
158				BackupCorruptedIndex(index_directory).await?;
159
160				Ok(CreateNewIndex())
161			},
162		}
163	} else {
164		// Create new index
165		Ok(CreateNewIndex())
166	}
167}
168
169/// Create a new empty index
170fn CreateNewIndex() -> FileIndex {
171	use crate::Indexing::State::CreateState::CreateNewIndex as StateCreateNewIndex;
172
173	StateCreateNewIndex()
174}
175
176/// Ensure index directory exists with proper error handling
177pub async fn EnsureIndexDirectory(index_directory:&Path) -> Result<()> {
178	tokio::fs::create_dir_all(index_directory).await.map_err(|e| {
179		AirError::Configuration(format!("Failed to create index directory {}: {}", index_directory.display(), e))
180	})?;
181
182	Ok(())
183}
184
185/// Backup corrupted index before creating new one
186pub async fn BackupCorruptedIndex(index_directory:&Path) -> Result<()> {
187	let index_file = index_directory.join("file_index.json");
188
189	let backup_file = index_directory.join(format!("file_index.corrupted.{}.json", chrono::Utc::now().timestamp()));
190
191	if !index_file.exists() {
192		return Ok(());
193	}
194
195	// Rename corrupted file to backup
196	tokio::fs::rename(&index_file, &backup_file)
197		.await
198		.map_err(|e| AirError::FileSystem(format!("Failed to backup corrupted index: {}", e)))?;
199
200	dev_log!(
201		"indexing",
202		"[StoreEntry] Backed up corrupted index to: {}",
203		backup_file.display()
204	);
205
206	Ok(())
207}
208
209/// Load index with automatic recovery on corruption
210pub async fn LoadIndexWithRecovery(index_directory:&Path, max_retries:usize) -> Result<FileIndex> {
211	let mut last_error = None;
212
213	for attempt in 0..max_retries {
214		match LoadOrCreateIndex(index_directory).await {
215			Ok(index) => {
216				if attempt > 0 {
217					dev_log!(
218						"indexing",
219						"[StoreEntry] Successfully loaded index after {} attempts",
220						attempt + 1
221					);
222				}
223
224				return Ok(index);
225			},
226
227			Err(e) => {
228				last_error = Some(e);
229
230				dev_log!("indexing", "warn: [StoreEntry] Load attempt {} failed", attempt + 1);
231
232				// Wait before retry
233				if attempt < max_retries - 1 {
234					tokio::time::sleep(tokio::time::Duration::from_millis(100 * (attempt + 1) as u64)).await;
235				}
236			},
237		}
238	}
239
240	Err(last_error.unwrap_or_else(|| AirError::Internal("Failed to load index after retries".to_string())))
241}
242
243/// Get index file path
244pub fn GetIndexFilePath(index_directory:&Path) -> PathBuf { index_directory.join("file_index.json") }
245
246/// Check if index file exists and is readable
247pub async fn IndexFileExists(index_directory:&Path) -> Result<bool> {
248	let index_file = index_directory.join("file_index.json");
249
250	if !index_file.exists() {
251		return Ok(false);
252	}
253
254	// Try to read metadata to verify accessibility
255	match tokio::fs::metadata(&index_file).await {
256		Ok(_) => Ok(true),
257
258		Err(_) => Ok(false),
259	}
260}
261
262/// Get index file size in bytes
263pub async fn GetIndexFileSize(index_directory:&Path) -> Result<u64> {
264	let index_file = index_directory.join("file_index.json");
265
266	let metadata = tokio::fs::metadata(&index_file)
267		.await
268		.map_err(|e| AirError::FileSystem(format!("Failed to get index file metadata: {}", e)))?;
269
270	Ok(metadata.len())
271}
272
273/// Clean up old backup files
274pub async fn CleanupOldBackups(index_directory:&Path, keep_count:usize) -> Result<usize> {
275	let mut entries = tokio::fs::read_dir(index_directory)
276		.await
277		.map_err(|e| AirError::FileSystem(format!("Failed to read index directory: {}", e)))?;
278
279	let mut backups = Vec::new();
280
281	while let Some(entry) = entries
282		.next_entry()
283		.await
284		.map_err(|e| AirError::FileSystem(format!("Failed to read directory entry: {}", e)))?
285	{
286		let file_name = entry.file_name().to_string_lossy().to_string();
287
288		if file_name.starts_with("file_index.corrupted.") && file_name.ends_with(".json") {
289			if let Ok(metadata) = entry.metadata().await {
290				if let Ok(modified) = metadata.modified() {
291					backups.push((entry.path(), modified));
292				}
293			}
294		}
295	}
296
297	// Sort by modified time (oldest first)
298	backups.sort_by_key(|b| b.1);
299
300	let mut removed_count = 0;
301
302	// Remove old backups beyond keep_count
303	for (path, _) in backups.iter().take(backups.len().saturating_sub(keep_count)) {
304		match tokio::fs::remove_file(path).await {
305			Ok(_) => {
306				dev_log!("indexing", "[StoreEntry] Removed old backup: {}", path.display());
307
308				removed_count += 1;
309			},
310
311			Err(e) => {
312				dev_log!(
313					"indexing",
314					"warn: [StoreEntry] Failed to remove backup {}: {}",
315					path.display(),
316					e
317				);
318			},
319		}
320	}
321
322	Ok(removed_count)
323}
324
325/// Validate index file format before loading
326pub async fn ValidateIndexFormat(index_directory:&Path) -> Result<()> {
327	let index_file = index_directory.join("file_index.json");
328
329	let content = tokio::fs::read_to_string(&index_file)
330		.await
331		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
332
333	// Try to parse as JSON
334	let _:serde_json::Value = serde_json::from_str(&content)
335		.map_err(|e| AirError::Serialization(format!("Index file is not valid JSON: {}", e)))?;
336
337	Ok(())
338}