0
# Utilities
1
2
File system traversal, path normalization, and helper functions for working with file paths and pattern matching results. These utilities provide the foundation for pathspec's file system operations.
3
4
## Imports
5
6
```python
7
from pathspec import iter_tree_files, iter_tree_entries, normalize_file, RecursionError
8
from pathspec.util import CheckResult, TreeEntry, match_file, check_match_file
9
from typing import Callable, Collection, Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
10
import os
11
import pathlib
12
```
13
14
## Type Definitions
15
16
```python { .api }
17
StrPath = Union[str, os.PathLike[str]] # Python 3.9+
18
TStrPath = TypeVar("TStrPath", bound=StrPath)
19
```
20
21
## Capabilities
22
23
### Directory Tree Traversal
24
25
Functions for walking directory trees and yielding file information.
26
27
```python { .api }
28
def iter_tree_files(
29
root: Union[str, os.PathLike],
30
on_error: Optional[Callable[[OSError], None]] = None,
31
follow_links: Optional[bool] = None
32
) -> Iterator[str]:
33
"""
34
Walk directory tree yielding file paths.
35
36
Parameters:
37
- root: Root directory path to traverse
38
- on_error: Optional callback for handling OS errors during traversal
39
- follow_links: Whether to follow symbolic links (defaults to False)
40
41
Yields:
42
File paths relative to root directory
43
44
Raises:
45
RecursionError: If directory recursion is detected
46
"""
47
48
def iter_tree_entries(
49
root: Union[str, os.PathLike],
50
on_error: Optional[Callable[[OSError], None]] = None,
51
follow_links: Optional[bool] = None
52
) -> Iterator[TreeEntry]:
53
"""
54
Walk directory tree yielding TreeEntry objects with detailed file information.
55
56
Parameters:
57
- root: Root directory path to traverse
58
- on_error: Optional callback for handling OS errors during traversal
59
- follow_links: Whether to follow symbolic links (defaults to False)
60
61
Yields:
62
TreeEntry objects containing file system information
63
64
Raises:
65
RecursionError: If directory recursion is detected
66
"""
67
68
def iter_tree(
69
root: Union[str, os.PathLike],
70
on_error: Optional[Callable[[OSError], None]] = None,
71
follow_links: Optional[bool] = None
72
) -> Iterator[str]:
73
"""
74
DEPRECATED: Alias for iter_tree_files.
75
76
Walk directory tree yielding file paths.
77
Use iter_tree_files instead.
78
"""
79
```
80
81
### Path Normalization
82
83
Functions for normalizing and processing file paths for cross-platform compatibility.
84
85
```python { .api }
86
def normalize_file(
87
file: Union[str, os.PathLike],
88
separators: Optional[Collection[str]] = None
89
) -> str:
90
"""
91
Normalize file path to use POSIX separators and ensure relative paths.
92
93
Parameters:
94
- file: File path to normalize
95
- separators: Collection of path separators to replace (defaults to os.sep)
96
97
Returns:
98
Normalized file path using forward slashes and relative to current directory
99
"""
100
101
def normalize_files(
102
files: Iterable[Union[str, os.PathLike]],
103
separators: Optional[Collection[str]] = None
104
) -> Dict[str, List[Union[str, os.PathLike]]]:
105
"""
106
DEPRECATED: Normalize multiple file paths.
107
108
Parameters:
109
- files: Iterable of file paths to normalize
110
- separators: Collection of path separators to replace
111
112
Returns:
113
Dictionary mapping normalized paths to lists of original paths
114
"""
115
116
def append_dir_sep(path: pathlib.Path) -> str:
117
"""
118
Append directory separator to path if it's a directory.
119
Used to distinguish files from directories in pattern matching.
120
121
Parameters:
122
- path: Path object to process
123
124
Returns:
125
String path with trailing separator if directory, unchanged if file
126
"""
127
```
128
129
### Pattern Matching Utilities
130
131
Helper functions for working with pattern matching operations.
132
133
```python { .api }
134
def match_file(patterns: Iterable[Pattern], file: str) -> bool:
135
"""
136
Test if file matches any pattern in a collection.
137
Simple boolean matching without detailed results.
138
139
Parameters:
140
- patterns: Iterable of Pattern instances to test against
141
- file: File path to test
142
143
Returns:
144
True if file matches any include pattern and no exclude patterns
145
"""
146
147
def match_files(
148
patterns: Iterable[Pattern],
149
files: Iterable[str]
150
) -> Set[str]:
151
"""
152
DEPRECATED: Match files against patterns.
153
Use PathSpec.match_files instead.
154
155
Parameters:
156
- patterns: Iterable of Pattern instances
157
- files: Iterable of file paths to test
158
159
Returns:
160
Set of file paths that match the patterns
161
"""
162
163
def check_match_file(
164
patterns: Iterable[Tuple[int, Pattern]],
165
file: str
166
) -> Tuple[Optional[bool], Optional[int]]:
167
"""
168
Check file against indexed patterns with detailed results.
169
170
Parameters:
171
- patterns: Iterable of (index, Pattern) tuples
172
- file: File path to test
173
174
Returns:
175
Tuple of (match_result, pattern_index) where match_result is:
176
- True: file matches and should be included
177
- False: file matches and should be excluded
178
- None: file doesn't match any patterns
179
"""
180
181
def detailed_match_files(
182
patterns: Iterable[Pattern],
183
files: Iterable[str],
184
all_matches: Optional[bool] = None
185
) -> Dict[str, MatchDetail]:
186
"""
187
Match files against patterns with detailed information about which patterns matched.
188
189
Parameters:
190
- patterns: Iterable of Pattern instances
191
- files: Iterable of file paths to test
192
- all_matches: If True, include all pattern matches; if False, only final result
193
194
Returns:
195
Dictionary mapping file paths to MatchDetail objects
196
"""
197
```
198
199
### Data Classes
200
201
Classes for containing file system and match result information.
202
203
```python { .api }
204
class TreeEntry:
205
"""
206
Contains file system entry information from directory traversal.
207
208
Attributes:
209
- name (str): Entry name (basename)
210
- path (str): Full path relative to traversal root
211
- stat (os.stat_result): File system stat information
212
"""
213
name: str
214
path: str
215
stat: os.stat_result
216
217
def is_dir(self, follow_links: Optional[bool] = None) -> bool:
218
"""
219
Get whether the entry is a directory.
220
221
Parameters:
222
- follow_links: Whether to follow symbolic links when determining type
223
224
Returns:
225
True if entry is a directory
226
"""
227
228
def is_file(self, follow_links: Optional[bool] = None) -> bool:
229
"""
230
Get whether the entry is a regular file.
231
232
Parameters:
233
- follow_links: Whether to follow symbolic links when determining type
234
235
Returns:
236
True if entry is a regular file
237
"""
238
239
def is_symlink(self) -> bool:
240
"""
241
Get whether the entry is a symbolic link.
242
243
Returns:
244
True if entry is a symbolic link
245
"""
246
247
class CheckResult(Generic[TStrPath]):
248
"""
249
Contains detailed results from pattern matching operations.
250
251
Type Parameters:
252
- TStrPath: Type of the file path (str or pathlib.Path)
253
254
Attributes:
255
- file (TStrPath): File path that was tested
256
- include (Optional[bool]): Match result - True (include), False (exclude), None (no match)
257
- index (Optional[int]): Index of the pattern that produced the result
258
"""
259
file: TStrPath
260
include: Optional[bool]
261
index: Optional[int]
262
263
class MatchDetail:
264
"""
265
Contains information about which patterns matched during detailed matching.
266
267
Attributes contain pattern match information and results.
268
"""
269
```
270
271
### Exception Classes
272
273
Specialized exceptions for utility operations.
274
275
```python { .api }
276
class RecursionError(Exception):
277
"""
278
Raised when directory recursion is detected during tree traversal.
279
Prevents infinite loops from circular symbolic links.
280
"""
281
282
class AlreadyRegisteredError(Exception):
283
"""
284
Raised when attempting to register a pattern factory name that already exists.
285
"""
286
```
287
288
### Internal Utilities
289
290
Internal helper functions used by the pathspec system.
291
292
```python { .api }
293
def _filter_check_patterns(
294
patterns: Iterable[Tuple[int, Pattern]]
295
) -> List[Tuple[int, Pattern]]:
296
"""
297
Internal: Filter and prepare patterns for checking operations.
298
"""
299
300
def _is_iterable(value: Any) -> bool:
301
"""
302
Internal: Test if a value is iterable but not a string.
303
"""
304
305
def _iter_tree_entries_next(
306
dir_entry: os.DirEntry,
307
root_full: str,
308
memo: Set[int]
309
) -> Iterator[TreeEntry]:
310
"""
311
Internal: Process directory entries during tree traversal.
312
"""
313
```
314
315
## Usage Examples
316
317
### Directory Tree Traversal
318
319
```python
320
import pathspec
321
322
# Basic file iteration
323
for file_path in pathspec.iter_tree_files("/path/to/project"):
324
print(f"Found file: {file_path}")
325
326
# Detailed file information
327
for entry in pathspec.iter_tree_entries("/path/to/project"):
328
print(f"Entry: {entry.name}")
329
print(f" Path: {entry.path}")
330
print(f" Type: {'dir' if entry.is_dir else 'file'}")
331
print(f" Size: {entry.stat.st_size}")
332
print(f" Modified: {entry.stat.st_mtime}")
333
334
# Handle errors during traversal
335
def handle_error(error):
336
print(f"Warning: Cannot access {error.filename}: {error}")
337
338
for file_path in pathspec.iter_tree_files(
339
"/path/to/project",
340
on_error=handle_error
341
):
342
print(f"Accessible file: {file_path}")
343
344
# Follow symbolic links
345
for file_path in pathspec.iter_tree_files(
346
"/path/to/project",
347
follow_links=True
348
):
349
print(f"File (including symlinks): {file_path}")
350
```
351
352
### Path Normalization
353
354
```python
355
import pathspec
356
357
# Normalize paths for cross-platform compatibility
358
windows_path = "src\\utils\\helper.py"
359
normalized = pathspec.normalize_file(windows_path)
360
print(normalized) # "src/utils/helper.py"
361
362
# Normalize with custom separators
363
weird_path = "src|utils|helper.py"
364
normalized = pathspec.normalize_file(weird_path, separators=["|"])
365
print(normalized) # "src/utils/helper.py"
366
367
# Directory path handling
368
import pathlib
369
dir_path = pathlib.Path("/project/src")
370
dir_with_sep = pathspec.append_dir_sep(dir_path)
371
print(dir_with_sep) # "/project/src/" (if directory)
372
```
373
374
### Direct Pattern Matching
375
376
```python
377
import pathspec
378
from pathspec.patterns import GitWildMatchPattern
379
380
# Create patterns
381
patterns = [
382
GitWildMatchPattern("*.py"),
383
GitWildMatchPattern("!test_*.py"),
384
]
385
386
# Simple boolean matching
387
files = ["main.py", "test_main.py", "utils.py"]
388
for file in files:
389
matches = pathspec.match_file(patterns, file)
390
print(f"{file}: {'matches' if matches else 'no match'}")
391
392
# Detailed matching with pattern information
393
indexed_patterns = list(enumerate(patterns))
394
for file in files:
395
result, index = pathspec.check_match_file(indexed_patterns, file)
396
if result is True:
397
print(f"{file}: included by pattern {index}")
398
elif result is False:
399
print(f"{file}: excluded by pattern {index}")
400
else:
401
print(f"{file}: no match")
402
```
403
404
### Working with CheckResult
405
406
```python
407
import pathspec
408
409
# Create PathSpec and check files
410
spec = pathspec.PathSpec.from_lines('gitwildmatch', [
411
"*.py", # Pattern 0
412
"!test_*.py" # Pattern 1
413
])
414
415
files = ["main.py", "test_main.py", "utils.py"]
416
417
# Process detailed results
418
for result in spec.check_files(files):
419
print(f"File: {result.file}")
420
421
if result.include is True:
422
print(f" Status: INCLUDED (pattern {result.index})")
423
elif result.include is False:
424
print(f" Status: EXCLUDED (pattern {result.index})")
425
else:
426
print(f" Status: NO MATCH")
427
```
428
429
### Working with TreeEntry
430
431
```python
432
import pathspec
433
434
# Filter tree entries by type
435
for entry in pathspec.iter_tree_entries("/project"):
436
if entry.is_dir():
437
print(f"Directory: {entry.path}")
438
elif entry.is_file():
439
print(f"File: {entry.path} ({entry.stat.st_size} bytes)")
440
elif entry.is_symlink():
441
print(f"Symlink: {entry.path}")
442
443
# Use TreeEntry with PathSpec
444
spec = pathspec.PathSpec.from_lines('gitwildmatch', ["*.py"])
445
446
matching_entries = list(spec.match_entries(
447
pathspec.iter_tree_entries("/project")
448
))
449
450
for entry in matching_entries:
451
print(f"Matching file: {entry.path}")
452
```
453
454
### Error Handling
455
456
```python
457
import pathspec
458
459
try:
460
# This might raise RecursionError if there are circular symlinks
461
files = list(pathspec.iter_tree_files("/project", follow_links=True))
462
except pathspec.RecursionError as e:
463
print(f"Circular symlink detected: {e}")
464
465
# Custom error handling
466
def log_and_continue(error):
467
"""Log errors but continue traversal."""
468
print(f"Error accessing {error.filename}: {error}")
469
470
# Traverse with error handling
471
safe_files = list(pathspec.iter_tree_files(
472
"/project",
473
on_error=log_and_continue,
474
follow_links=True
475
))
476
```
477
478
### Advanced TreeEntry Usage
479
480
```python
481
import pathspec
482
import os
483
import time
484
485
# Analyze project structure
486
total_size = 0
487
file_types = {}
488
recent_files = []
489
cutoff_time = time.time() - (7 * 24 * 60 * 60) # 7 days ago
490
491
for entry in pathspec.iter_tree_entries("/project"):
492
if entry.is_file():
493
# Track size
494
total_size += entry.stat.st_size
495
496
# Track file types
497
ext = os.path.splitext(entry.name)[1].lower()
498
file_types[ext] = file_types.get(ext, 0) + 1
499
500
# Track recent files
501
if entry.stat.st_mtime > cutoff_time:
502
recent_files.append(entry.path)
503
504
print(f"Total size: {total_size} bytes")
505
print(f"File types: {file_types}")
506
print(f"Recent files: {len(recent_files)}")
507
```