0
# Dataset Manipulation
1
2
Comprehensive dataset management providing dict-like access to DICOM elements with full support for the DICOM data model, validation, serialization, and advanced dataset operations.
3
4
## Capabilities
5
6
### Core Dataset Class
7
8
The primary container for DICOM data elements, providing dictionary-like access with DICOM-specific functionality.
9
10
```python { .api }
11
class Dataset:
12
"""
13
A dictionary-like container for DICOM data elements.
14
15
Supports standard dictionary operations plus DICOM-specific functionality
16
for element management, validation, and serialization.
17
"""
18
19
def __init__(self):
20
"""Initialize empty dataset."""
21
22
def __getitem__(self, key):
23
"""
24
Get data element by tag or keyword.
25
26
Parameters:
27
- key: int, tuple, or str - DICOM tag or keyword
28
29
Returns:
30
DataElement value or DataElement object
31
"""
32
33
def __setitem__(self, key, value):
34
"""
35
Set data element value by tag or keyword.
36
37
Parameters:
38
- key: int, tuple, or str - DICOM tag or keyword
39
- value: Any - Value to set
40
"""
41
42
def __delitem__(self, key):
43
"""
44
Delete data element by tag or keyword.
45
46
Parameters:
47
- key: int, tuple, or str - DICOM tag or keyword
48
"""
49
50
def __contains__(self, key):
51
"""
52
Check if dataset contains element.
53
54
Parameters:
55
- key: int, tuple, or str - DICOM tag or keyword
56
57
Returns:
58
bool - True if element exists
59
"""
60
61
def keys(self):
62
"""Return iterator over dataset tags."""
63
64
def values(self):
65
"""Return iterator over data element values."""
66
67
def items(self):
68
"""Return iterator over (tag, data_element) pairs."""
69
70
def get(self, key, default=None):
71
"""
72
Get element value with default.
73
74
Parameters:
75
- key: int, tuple, or str - DICOM tag or keyword
76
- default: Any - Default value if element not found
77
78
Returns:
79
DataElement value or default
80
"""
81
82
def pop(self, key, *args):
83
"""
84
Remove element and return its value.
85
86
Parameters:
87
- key: int, tuple, or str - DICOM tag or keyword
88
- default: Any - Default value if element not found
89
90
Returns:
91
DataElement value
92
"""
93
```
94
95
### Element Management
96
97
Methods for adding, modifying, and managing DICOM data elements with proper validation and type handling.
98
99
```python { .api }
100
class Dataset:
101
def add(self, data_element):
102
"""
103
Add a DataElement to the dataset.
104
105
Parameters:
106
- data_element: DataElement - Element to add
107
"""
108
109
def add_new(self, tag, VR, value):
110
"""
111
Create and add new data element.
112
113
Parameters:
114
- tag: int or tuple - DICOM tag
115
- VR: str - Value Representation
116
- value: Any - Element value
117
"""
118
119
def data_element(self, tag):
120
"""
121
Return the full DataElement object.
122
123
Parameters:
124
- tag: int, tuple, or str - DICOM tag or keyword
125
126
Returns:
127
DataElement object
128
"""
129
130
def get_private_item(self, group, creator, tag):
131
"""
132
Get private data element.
133
134
Parameters:
135
- group: int - Private group number
136
- creator: str - Private creator identification
137
- tag: int - Private tag
138
139
Returns:
140
DataElement value
141
"""
142
143
def private_block(self, group, private_creator, create=False):
144
"""
145
Return private block for managing private elements.
146
147
Parameters:
148
- group: int - Private group number
149
- private_creator: str - Private creator identification
150
- create: bool - Create block if it doesn't exist
151
152
Returns:
153
PrivateBlock object
154
"""
155
```
156
157
### Pixel Data Operations
158
159
Methods for accessing and manipulating pixel data with support for various formats and processing operations.
160
161
```python { .api }
162
class Dataset:
163
@property
164
def pixel_array(self):
165
"""
166
Return pixel data as NumPy array.
167
168
Returns:
169
ndarray - Pixel data array with appropriate shape and dtype
170
171
Raises:
172
AttributeError - If no pixel data present
173
ImportError - If NumPy not available
174
"""
175
176
def compress(self, transfer_syntax_uid, encoding_plugin=None):
177
"""
178
Compress pixel data using specified transfer syntax.
179
180
Parameters:
181
- transfer_syntax_uid: str - Target transfer syntax UID
182
- encoding_plugin: str - Specific encoder to use
183
184
Returns:
185
None - Modifies dataset in place
186
"""
187
188
def decompress(self, handler_name=None):
189
"""
190
Decompress pixel data to uncompressed format.
191
192
Parameters:
193
- handler_name: str - Specific decoder to use
194
195
Returns:
196
None - Modifies dataset in place
197
"""
198
199
def convert_pixel_data(self, handler_name=None):
200
"""
201
Convert pixel data using available handlers.
202
203
Parameters:
204
- handler_name: str - Specific handler to use
205
206
Returns:
207
None - Modifies dataset in place
208
"""
209
```
210
211
### Overlay and Waveform Data
212
213
Methods for accessing overlay graphics and waveform data embedded in DICOM files.
214
215
```python { .api }
216
class Dataset:
217
def overlay_array(self, group):
218
"""
219
Return overlay data as NumPy array.
220
221
Parameters:
222
- group: int - Overlay group number (0x6000-0x60FF range)
223
224
Returns:
225
ndarray - Overlay data as binary array
226
"""
227
228
def waveform_array(self, index=0):
229
"""
230
Return waveform data as NumPy array.
231
232
Parameters:
233
- index: int - Waveform sequence index
234
235
Returns:
236
ndarray - Waveform data array
237
"""
238
```
239
240
### Serialization and Export
241
242
Methods for converting datasets to various formats including JSON, and saving to files.
243
244
```python { .api }
245
class Dataset:
246
def to_json(self, bulk_data_threshold=1024, bulk_data_uri_handler=None):
247
"""
248
Convert dataset to JSON representation.
249
250
Parameters:
251
- bulk_data_threshold: int - Size threshold for bulk data handling
252
- bulk_data_uri_handler: callable - Handler for bulk data URIs
253
254
Returns:
255
str - JSON representation of dataset
256
"""
257
258
@classmethod
259
def from_json(cls, json_dataset, bulk_data_uri_handler=None):
260
"""
261
Create dataset from JSON representation.
262
263
Parameters:
264
- json_dataset: str or dict - JSON representation
265
- bulk_data_uri_handler: callable - Handler for bulk data URIs
266
267
Returns:
268
Dataset object
269
"""
270
271
def save_as(self, filename, write_like_original=True):
272
"""
273
Save dataset to DICOM file.
274
275
Parameters:
276
- filename: str or PathLike - Output filename
277
- write_like_original: bool - Preserve original transfer syntax
278
"""
279
```
280
281
### Validation and Metadata
282
283
Methods for validating datasets and managing file metadata.
284
285
```python { .api }
286
class Dataset:
287
def ensure_file_meta(self):
288
"""
289
Ensure File Meta Information is present and valid.
290
291
Creates missing required File Meta Information elements.
292
"""
293
294
def validate(self):
295
"""
296
Validate dataset according to DICOM standard.
297
298
Returns:
299
list - Validation errors and warnings
300
"""
301
302
def remove_private_tags(self):
303
"""Remove all private data elements from dataset."""
304
305
@property
306
def is_implicit_VR(self):
307
"""bool: Whether dataset uses implicit VR encoding."""
308
309
@property
310
def is_little_endian(self):
311
"""bool: Whether dataset uses little endian byte order."""
312
313
@property
314
def is_original_encoding(self):
315
"""bool: Whether dataset retains original encoding."""
316
```
317
318
### FileDataset Subclass
319
320
Enhanced dataset class for file-based DICOM data with additional file-specific metadata.
321
322
```python { .api }
323
class FileDataset(Dataset):
324
"""
325
Dataset subclass for DICOM files with file-specific metadata.
326
"""
327
328
def __init__(self, filename, dataset, preamble=None, file_meta=None,
329
is_implicit_VR=True, is_little_endian=True):
330
"""
331
Initialize FileDataset.
332
333
Parameters:
334
- filename: str - Source filename
335
- dataset: dict - Dataset elements
336
- preamble: bytes - DICOM file preamble
337
- file_meta: FileMetaDataset - File Meta Information
338
- is_implicit_VR: bool - VR encoding type
339
- is_little_endian: bool - Byte order
340
"""
341
342
@property
343
def filename(self):
344
"""str: Source filename."""
345
346
@property
347
def preamble(self):
348
"""bytes: DICOM file preamble."""
349
350
@property
351
def file_meta(self):
352
"""FileMetaDataset: File Meta Information."""
353
```
354
355
### FileMetaDataset Class
356
357
Specialized dataset for DICOM File Meta Information with validation and required elements.
358
359
```python { .api }
360
class FileMetaDataset(Dataset):
361
"""
362
Specialized dataset for DICOM File Meta Information.
363
"""
364
365
def __init__(self):
366
"""Initialize with required File Meta Information elements."""
367
368
def validate(self):
369
"""
370
Validate File Meta Information completeness.
371
372
Returns:
373
list - Validation errors for missing required elements
374
"""
375
```
376
377
### Private Block Management
378
379
Helper class for managing private DICOM elements with creator identification.
380
381
```python { .api }
382
class PrivateBlock:
383
"""
384
Helper for managing private DICOM elements.
385
"""
386
387
def __init__(self, key, dataset, private_creator):
388
"""
389
Initialize private block.
390
391
Parameters:
392
- key: tuple - (group, creator_tag) identifying block
393
- dataset: Dataset - Parent dataset
394
- private_creator: str - Private creator identification
395
"""
396
397
def add_new(self, tag, VR, value):
398
"""
399
Add new private element to block.
400
401
Parameters:
402
- tag: int - Private tag (element part only)
403
- VR: str - Value Representation
404
- value: Any - Element value
405
"""
406
407
def __contains__(self, tag):
408
"""
409
Check if private element exists in block.
410
411
Parameters:
412
- tag: int - Private tag
413
414
Returns:
415
bool - True if element exists
416
"""
417
418
def __getitem__(self, tag):
419
"""
420
Get private element value.
421
422
Parameters:
423
- tag: int - Private tag
424
425
Returns:
426
DataElement value
427
"""
428
```
429
430
## Usage Examples
431
432
### Basic Dataset Operations
433
434
```python
435
from pydicom import Dataset, DataElement
436
from pydicom.tag import Tag
437
438
# Create new dataset
439
ds = Dataset()
440
441
# Add elements using different methods
442
ds.PatientName = "John Doe"
443
ds[0x00100020] = "12345" # Patient ID
444
ds.add_new(0x00101030, "DS", "75.5") # Patient Weight
445
446
# Access elements
447
patient_name = ds.PatientName
448
patient_id = ds[0x00100020]
449
weight = ds.get(0x00101030, "Unknown")
450
451
# Check element existence
452
if 'PatientName' in ds:
453
print(f"Patient: {ds.PatientName}")
454
455
# Iterate over elements
456
for tag, elem in ds.items():
457
print(f"{elem.keyword}: {elem.value}")
458
```
459
460
### Working with Sequences
461
462
```python
463
from pydicom import Dataset, Sequence
464
465
# Create dataset with sequence
466
ds = Dataset()
467
ds.PatientName = "Test Patient"
468
469
# Create sequence of datasets
470
seq = Sequence()
471
for i in range(3):
472
item = Dataset()
473
item.ReferencedSOPInstanceUID = f"1.2.3.{i}"
474
item.ReferencedSOPClassUID = "1.2.840.10008.5.1.4.1.1.2"
475
seq.append(item)
476
477
ds.ReferencedImageSequence = seq
478
479
# Access sequence items
480
for item in ds.ReferencedImageSequence:
481
print(f"SOP Instance: {item.ReferencedSOPInstanceUID}")
482
```
483
484
### Private Elements
485
486
```python
487
from pydicom import Dataset
488
489
ds = Dataset()
490
491
# Add private elements using private block
492
private_block = ds.private_block(0x0011, "MyCompany", create=True)
493
private_block.add_new(0x01, "LO", "Custom Value")
494
private_block.add_new(0x02, "DS", "123.45")
495
496
# Access private elements
497
custom_value = ds.get_private_item(0x0011, "MyCompany", 0x01)
498
print(f"Custom value: {custom_value}")
499
```
500
501
### Dataset Validation and Cleanup
502
503
```python
504
from pydicom import dcmread
505
506
# Read and validate dataset
507
ds = dcmread("image.dcm")
508
509
# Ensure proper file meta information
510
ds.ensure_file_meta()
511
512
# Validate dataset
513
errors = ds.validate()
514
if errors:
515
print("Validation errors:")
516
for error in errors:
517
print(f" {error}")
518
519
# Remove private tags for anonymization
520
ds.remove_private_tags()
521
522
# Save cleaned dataset
523
ds.save_as("cleaned.dcm")
524
```
525
526
### JSON Serialization
527
528
```python
529
from pydicom import Dataset, dcmread
530
import json
531
532
# Read dataset and convert to JSON
533
ds = dcmread("image.dcm")
534
json_str = ds.to_json()
535
536
# Save JSON representation
537
with open("dataset.json", "w") as f:
538
f.write(json_str)
539
540
# Load from JSON
541
with open("dataset.json", "r") as f:
542
json_data = f.read()
543
544
restored_ds = Dataset.from_json(json_data)
545
```
546
547
### Pixel Data Operations
548
549
```python
550
import numpy as np
551
from pydicom import dcmread
552
553
# Read dataset with pixel data
554
ds = dcmread("image.dcm")
555
556
# Get pixel array
557
if hasattr(ds, 'pixel_array'):
558
pixels = ds.pixel_array
559
print(f"Shape: {pixels.shape}")
560
print(f"Data type: {pixels.dtype}")
561
562
# Modify pixels
563
modified_pixels = pixels * 0.8 # Reduce brightness
564
565
# Save modified dataset
566
ds.PixelData = modified_pixels.tobytes()
567
ds.save_as("dimmed.dcm")
568
569
# Compress pixel data
570
ds.compress("1.2.840.10008.1.2.4.90") # JPEG 2000 Lossless
571
```