0
# Directory Operations
1
2
Directory management capabilities including creation, deletion, listing, traversal, and pattern matching. These operations provide cloud-native directory handling that works consistently across different cloud storage services, even those without traditional directory concepts.
3
4
## Capabilities
5
6
### Directory Existence and Type Checking
7
8
Check if paths exist and determine their types.
9
10
```python { .api }
11
def exists(self) -> bool:
12
"""
13
Check if path exists in cloud storage.
14
15
Returns:
16
True if path exists
17
"""
18
19
def is_file(self) -> bool:
20
"""
21
Check if path is a file.
22
23
Returns:
24
True if path points to a file
25
"""
26
27
def is_dir(self) -> bool:
28
"""
29
Check if path is a directory.
30
31
Returns:
32
True if path points to a directory
33
"""
34
```
35
36
### Directory Creation
37
38
Create directories with flexible parent handling.
39
40
```python { .api }
41
def mkdir(
42
self,
43
parents: bool = False,
44
exist_ok: bool = False
45
) -> None:
46
"""
47
Create directory.
48
49
Args:
50
parents: Create parent directories if needed
51
exist_ok: Don't raise error if directory exists
52
53
Raises:
54
CloudPathFileExistsError: Directory exists and exist_ok=False
55
CloudPathNotExistsError: Parent doesn't exist and parents=False
56
"""
57
```
58
59
### Directory Listing
60
61
List directory contents and iterate over files and subdirectories.
62
63
```python { .api }
64
def iterdir(self) -> typing.Iterator["CloudPath"]:
65
"""
66
Iterate over directory contents.
67
68
Returns:
69
Iterator of CloudPath objects for directory contents
70
71
Raises:
72
CloudPathNotADirectoryError: Path is not a directory
73
"""
74
```
75
76
### Directory Removal
77
78
Remove directories and directory trees.
79
80
```python { .api }
81
def rmdir(self) -> None:
82
"""
83
Remove empty directory.
84
85
Raises:
86
DirectoryNotEmptyError: Directory contains files
87
CloudPathNotExistsError: Directory doesn't exist
88
"""
89
90
def rmtree(self) -> None:
91
"""
92
Remove directory tree recursively.
93
Removes all files and subdirectories.
94
"""
95
```
96
97
### Pattern Matching
98
99
Find files and directories using glob patterns.
100
101
```python { .api }
102
def glob(self, pattern: str) -> typing.Iterator["CloudPath"]:
103
"""
104
Find paths matching glob pattern.
105
106
Args:
107
pattern: Glob pattern (e.g., "*.txt", "data/*")
108
109
Returns:
110
Iterator of matching CloudPath objects
111
"""
112
113
def rglob(self, pattern: str) -> typing.Iterator["CloudPath"]:
114
"""
115
Find paths matching pattern recursively.
116
117
Args:
118
pattern: Glob pattern to match
119
120
Returns:
121
Iterator of matching CloudPath objects in all subdirectories
122
"""
123
```
124
125
### Directory Walking
126
127
Traverse directory trees with full control over traversal order.
128
129
```python { .api }
130
def walk(
131
self,
132
top_down: bool = True
133
) -> typing.Iterator[typing.Tuple["CloudPath", typing.List[str], typing.List[str]]]:
134
"""
135
Walk directory tree.
136
137
Args:
138
top_down: Visit directories top-down if True, bottom-up if False
139
140
Returns:
141
Iterator of (directory_path, subdirectory_names, file_names) tuples
142
"""
143
```
144
145
## Usage Examples
146
147
### Basic Directory Operations
148
149
```python
150
from cloudpathlib import CloudPath
151
152
# Check if directory exists
153
dir_path = CloudPath("s3://my-bucket/data/")
154
if dir_path.exists():
155
print("Directory exists")
156
157
# Check path type
158
if dir_path.is_dir():
159
print("This is a directory")
160
elif dir_path.is_file():
161
print("This is a file")
162
```
163
164
### Creating Directories
165
166
```python
167
# Create single directory
168
dir_path = CloudPath("s3://my-bucket/new-folder/")
169
dir_path.mkdir(exist_ok=True)
170
171
# Create nested directories
172
nested_path = CloudPath("s3://my-bucket/level1/level2/level3/")
173
nested_path.mkdir(parents=True, exist_ok=True)
174
175
# Handle creation errors
176
try:
177
dir_path.mkdir()
178
except CloudPathFileExistsError:
179
print("Directory already exists")
180
```
181
182
### Listing Directory Contents
183
184
```python
185
# List all items in directory
186
dir_path = CloudPath("s3://my-bucket/data/")
187
188
for item in dir_path.iterdir():
189
if item.is_file():
190
print(f"File: {item.name}")
191
elif item.is_dir():
192
print(f"Directory: {item.name}")
193
194
# Get lists of files and directories
195
files = [item for item in dir_path.iterdir() if item.is_file()]
196
dirs = [item for item in dir_path.iterdir() if item.is_dir()]
197
```
198
199
### Pattern Matching with Glob
200
201
```python
202
# Find all text files
203
base_path = CloudPath("s3://my-bucket/")
204
205
# Non-recursive glob
206
txt_files = list(base_path.glob("*.txt"))
207
print(f"Found {len(txt_files)} .txt files")
208
209
# Recursive glob
210
all_txt_files = list(base_path.rglob("*.txt"))
211
print(f"Found {len(all_txt_files)} .txt files recursively")
212
213
# Complex patterns
214
csv_files = list(base_path.glob("data/**/*.csv"))
215
log_files = list(base_path.rglob("logs/*.log"))
216
```
217
218
### Advanced Pattern Matching
219
220
```python
221
# Multiple file extensions
222
base_path = CloudPath("s3://my-bucket/")
223
224
# Find multiple types
225
data_files = []
226
for pattern in ["*.csv", "*.json", "*.parquet"]:
227
data_files.extend(base_path.rglob(pattern))
228
229
# Find files with specific naming
230
report_files = list(base_path.glob("reports/report-*.pdf"))
231
dated_logs = list(base_path.glob("logs/2024-*/access.log"))
232
```
233
234
### Directory Walking
235
236
```python
237
# Walk entire directory tree
238
base_path = CloudPath("s3://my-bucket/data/")
239
240
for root, dirs, files in base_path.walk():
241
print(f"Directory: {root}")
242
print(f" Subdirectories: {dirs}")
243
print(f" Files: {files}")
244
print()
245
246
# Process all files recursively
247
for root, dirs, files in base_path.walk():
248
for filename in files:
249
file_path = root / filename
250
if file_path.suffix == '.txt':
251
process_text_file(file_path)
252
```
253
254
### Directory Tree Operations
255
256
```python
257
# Create directory structure
258
base = CloudPath("s3://my-bucket/project/")
259
(base / "src").mkdir(parents=True, exist_ok=True)
260
(base / "tests").mkdir(exist_ok=True)
261
(base / "docs").mkdir(exist_ok=True)
262
(base / "data" / "raw").mkdir(parents=True, exist_ok=True)
263
(base / "data" / "processed").mkdir(exist_ok=True)
264
265
# Remove directory tree
266
old_project = CloudPath("s3://my-bucket/old-project/")
267
if old_project.exists():
268
old_project.rmtree()
269
```
270
271
### Safe Directory Operations
272
273
```python
274
from cloudpathlib import DirectoryNotEmptyError, CloudPathNotExistsError
275
276
dir_path = CloudPath("s3://my-bucket/temp/")
277
278
# Safe directory removal
279
try:
280
dir_path.rmdir() # Remove empty directory
281
except DirectoryNotEmptyError:
282
print("Directory not empty, use rmtree() to remove recursively")
283
dir_path.rmtree()
284
except CloudPathNotExistsError:
285
print("Directory doesn't exist")
286
287
# Check before operations
288
if dir_path.exists() and dir_path.is_dir():
289
# Safe to perform directory operations
290
for item in dir_path.iterdir():
291
print(item)
292
```
293
294
### Finding Specific Files
295
296
```python
297
# Find files by extension
298
base_path = CloudPath("s3://my-bucket/")
299
300
# All Python files
301
py_files = list(base_path.rglob("*.py"))
302
303
# All image files
304
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.gif"]
305
images = []
306
for ext in image_extensions:
307
images.extend(base_path.rglob(ext))
308
309
# Find configuration files
310
config_files = list(base_path.rglob("config.*"))
311
```
312
313
### Directory Size and Statistics
314
315
```python
316
def get_directory_size(dir_path):
317
"""Calculate total size of directory."""
318
total_size = 0
319
file_count = 0
320
321
for root, dirs, files in dir_path.walk():
322
for filename in files:
323
file_path = root / filename
324
try:
325
stats = file_path.stat()
326
total_size += stats.st_size
327
file_count += 1
328
except Exception:
329
continue
330
331
return total_size, file_count
332
333
# Usage
334
dir_path = CloudPath("s3://my-bucket/data/")
335
size, count = get_directory_size(dir_path)
336
print(f"Directory contains {count} files totaling {size} bytes")
337
```
338
339
### Organizing Files
340
341
```python
342
# Organize files by type
343
source_dir = CloudPath("s3://my-bucket/uploads/")
344
target_base = CloudPath("s3://my-bucket/organized/")
345
346
# Create organization structure
347
(target_base / "images").mkdir(parents=True, exist_ok=True)
348
(target_base / "documents").mkdir(exist_ok=True)
349
(target_base / "data").mkdir(exist_ok=True)
350
351
# Organize by file type
352
for file_path in source_dir.rglob("*"):
353
if file_path.is_file():
354
if file_path.suffix.lower() in ['.jpg', '.png', '.gif']:
355
target = target_base / "images" / file_path.name
356
elif file_path.suffix.lower() in ['.pdf', '.doc', '.txt']:
357
target = target_base / "documents" / file_path.name
358
elif file_path.suffix.lower() in ['.csv', '.json', '.xml']:
359
target = target_base / "data" / file_path.name
360
else:
361
continue
362
363
file_path.copy(target)
364
```