AirLibrary/Indexing/Scan/
ScanDirectory.rs1use std::{path::Path, sync::Arc};
68
69use tokio::sync::Semaphore;
70
71use crate::{
72 AirError,
73 Configuration::IndexingConfig,
74 Indexing::{Scan::ScanFile::ValidateFileAccess, State::CreateState::FileIndex},
75 Result,
76 dev_log,
77};
78
79#[derive(Debug, Clone)]
81pub struct ScanDirectoryResult {
82 pub files_found:u32,
84
85 pub files_skipped:u32,
87
88 pub errors:u32,
90
91 pub total_size:u64,
93}
94
95pub async fn ScanDirectory(
105 path:&str,
106
107 patterns:Vec<String>,
108
109 config:&IndexingConfig,
110
111 _max_parallel:usize,
112) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
113 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
114
115 if !directory_path.exists() {
117 return Err(AirError::FileSystem(format!("Directory does not exist: {}", path)));
118 }
119
120 if !directory_path.is_dir() {
121 return Err(AirError::FileSystem(format!("Path is not a directory: {}", path)));
122 }
123
124 CheckDirectoryPermissions(&directory_path).await?;
126
127 let include_patterns = if patterns.is_empty() { config.FileTypes.clone() } else { patterns };
129
130 let walker = ignore::WalkBuilder::new(&directory_path)
132 .max_depth(Some(10)) .hidden(false)
134 .follow_links(false) .build();
136
137 let mut files_to_scan:Vec<std::path::PathBuf> = Vec::new();
138
139 let mut files_found = 0u32;
140
141 let mut files_skipped = 0u32;
142
143 let mut errors = 0u32;
144
145 let mut total_size = 0u64;
146
147 for result in walker {
149 match result {
150 Ok(entry) => {
151 if entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
153 let file_path = entry.path().to_path_buf();
154
155 if entry.path_is_symlink() {
157 dev_log!("indexing", "[ScanDirectory] Skipping symlink: {}", file_path.display());
158
159 files_skipped += 1;
160
161 continue;
162 }
163
164 if let Ok(metadata) = entry.metadata() {
166 let file_size = metadata.len();
167
168 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
169 dev_log!(
170 "indexing",
171 "warn: [ScanDirectory] Skipping oversized file: {} ({} bytes)",
172 file_path.display(),
173 file_size
174 );
175
176 files_skipped += 1;
177
178 continue;
179 }
180
181 if MatchesPatterns(&file_path, &include_patterns) {
183 if ValidateFileAccess(&file_path).await {
185 files_to_scan.push(file_path);
186
187 files_found += 1;
188
189 total_size += file_size;
190 } else {
191 dev_log!(
192 "indexing",
193 "warn: [ScanDirectory] Cannot access file (permission denied): {}",
194 file_path.display()
195 );
196
197 errors += 1;
198 }
199 } else {
200 files_skipped += 1;
201 }
202 } else {
203 errors += 1;
204 }
205 }
206 },
207
208 Err(e) => {
209 dev_log!("indexing", "warn: [ScanDirectory] Error walking directory: {}", e);
210
211 errors += 1;
212 },
213 }
214 }
215
216 dev_log!(
217 "indexing",
218 "[ScanDirectory] Directory scan completed: {} files, {} skipped, {} errors, {} bytes",
219 files_found,
220 files_skipped,
221 errors,
222 total_size
223 );
224
225 Ok((
226 files_to_scan,
227 ScanDirectoryResult { files_found, files_skipped, errors, total_size },
228 ))
229}
230
231pub async fn ScanAndRemoveDeleted(index:&mut FileIndex, directory_path:&Path) -> Result<u32> {
233 let mut paths_to_remove = Vec::new();
234
235 let all_paths:Vec<_> = index.files.keys().cloned().collect();
236
237 for path in all_paths {
238 if !path.exists() && path.starts_with(directory_path) {
239 paths_to_remove.push(path.clone());
240 }
241 }
242
243 let removed_count = paths_to_remove.len();
244
245 for path in paths_to_remove {
246 index.files.remove(&path);
247
248 index.file_symbols.remove(&path);
249
250 for (_, locations) in index.symbol_index.iter_mut() {
252 locations.retain(|loc| loc.file_path != path);
253 }
254
255 for (_, files) in index.content_index.iter_mut() {
257 files.retain(|p| p != &path);
258 }
259 }
260
261 Ok(removed_count as u32)
262}
263
264async fn CheckDirectoryPermissions(path:&Path) -> Result<()> {
266 tokio::task::spawn_blocking({
267 let path = path.to_path_buf();
268 move || {
269 std::fs::read_dir(&path)
270 .map_err(|e| AirError::FileSystem(format!("Cannot read directory {}: {}", path.display(), e)))?;
271 Ok(())
272 }
273 })
274 .await?
275}
276
277pub fn MatchesPatterns(file_path:&std::path::Path, patterns:&[String]) -> bool {
279 if patterns.is_empty() {
280 return true;
281 }
282
283 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
284
285 for pattern in patterns {
286 if MatchesPattern(&file_name, pattern) {
287 return true;
288 }
289 }
290
291 false
292}
293
294pub fn MatchesPattern(filename:&str, pattern:&str) -> bool {
296 if pattern.starts_with("*.") {
297 let extension = &pattern[2..];
298
299 filename.ends_with(extension)
300 } else {
301 filename == pattern
302 }
303}
304
305pub fn GetDefaultExcludePatterns() -> Vec<String> {
307 vec![
308 "node_modules".to_string(),
309 "target".to_string(),
310 ".git".to_string(),
311 ".svn".to_string(),
312 ".hg".to_string(),
313 ".bzr".to_string(),
314 "dist".to_string(),
315 "build".to_string(),
316 ".next".to_string(),
317 ".nuxt".to_string(),
318 "__pycache__".to_string(),
319 "*.pyc".to_string(),
320 ".venv".to_string(),
321 "venv".to_string(),
322 "env".to_string(),
323 ".env".to_string(),
324 ".idea".to_string(),
325 ".vscode".to_string(),
326 ".DS_Store".to_string(),
327 "Thumbs.db".to_string(),
328 ]
329}
330
331pub async fn ScanDirectoriesParallel(
333 directories:Vec<String>,
334
335 patterns:Vec<String>,
336
337 config:&IndexingConfig,
338
339 max_parallel:usize,
340) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
341 let semaphore = Arc::new(Semaphore::new(max_parallel));
342
343 let mut all_files = Vec::new();
344
345 let mut total_result = ScanDirectoryResult { files_found:0, files_skipped:0, errors:0, total_size:0 };
346
347 let mut scan_tasks = Vec::new();
348
349 for directory in directories {
350 let permit = semaphore.clone().acquire_owned().await.unwrap();
351
352 let config_clone = config.clone();
353
354 let patterns_clone = patterns.clone();
355
356 let task = tokio::spawn(async move {
357 let _permit = permit;
358 ScanDirectory(&directory, patterns_clone, &config_clone, max_parallel).await
359 });
360
361 scan_tasks.push(task);
362 }
363
364 for task in scan_tasks {
366 match task.await {
367 Ok(Ok((files, result))) => {
368 all_files.extend(files);
369
370 total_result.files_found += result.files_found;
371
372 total_result.files_skipped += result.files_skipped;
373
374 total_result.errors += result.errors;
375
376 total_result.total_size += result.total_size;
377 },
378
379 Ok(Err(e)) => {
380 dev_log!("indexing", "error: [ScanDirectory] Parallel scan failed: {}", e);
381
382 total_result.errors += 1;
383 },
384
385 Err(e) => {
386 dev_log!("indexing", "error: [ScanDirectory] Parallel task panicked: {}", e);
387
388 total_result.errors += 1;
389 },
390 }
391 }
392
393 Ok((all_files, total_result))
394}
395
396pub async fn GetDirectoryStatistics(path:&str, max_depth:Option<usize>) -> Result<DirectoryStatistics> {
398 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
399
400 if !directory_path.exists() || !directory_path.is_dir() {
401 return Err(AirError::FileSystem(format!("Invalid directory: {}", path)));
402 }
403
404 let mut file_count = 0u64;
405
406 let mut total_size = 0u64;
407
408 let mut directory_count = 0u64;
409
410 let mut hidden_count = 0u64;
411
412 let walker = ignore::WalkBuilder::new(&directory_path)
413 .max_depth(max_depth)
414 .hidden(true)
415 .follow_links(false)
416 .build();
417
418 for entry in walker.flatten() {
419 let file_type = entry.file_type().expect("Failed to get file type");
420
421 if file_type.is_file() {
422 file_count += 1;
423
424 if let Ok(metadata) = entry.metadata() {
425 total_size += metadata.len();
426 }
427 } else if file_type.is_dir() {
428 directory_count += 1;
429 }
430
431 if entry.depth() > 0
432 && entry
433 .path()
434 .components()
435 .any(|c| c.as_os_str().to_string_lossy().starts_with('.'))
436 {
437 hidden_count += 1;
438 }
439 }
440
441 Ok(DirectoryStatistics { file_count, directory_count, hidden_count, total_size })
442}
443
444#[derive(Debug, Clone)]
446pub struct DirectoryStatistics {
447 pub file_count:u64,
448
449 pub directory_count:u64,
450
451 pub hidden_count:u64,
452
453 pub total_size:u64,
454}