or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration-utilities.mddata-elements.mddataset-manipulation.mdfile-operations.mdindex.mdpixel-data-processing.mdsequences-collections.mdtags-and-uids.mdvalue-representations.md

dataset-manipulation.mddocs/

0

# Dataset Manipulation

1

2

Comprehensive dataset management providing dict-like access to DICOM elements with full support for the DICOM data model, validation, serialization, and advanced dataset operations.

3

4

## Capabilities

5

6

### Core Dataset Class

7

8

The primary container for DICOM data elements, providing dictionary-like access with DICOM-specific functionality.

9

10

```python { .api }

11

class Dataset:

12

"""

13

A dictionary-like container for DICOM data elements.

14

15

Supports standard dictionary operations plus DICOM-specific functionality

16

for element management, validation, and serialization.

17

"""

18

19

def __init__(self):

20

"""Initialize empty dataset."""

21

22

def __getitem__(self, key):

23

"""

24

Get data element by tag or keyword.

25

26

Parameters:

27

- key: int, tuple, or str - DICOM tag or keyword

28

29

Returns:

30

DataElement value or DataElement object

31

"""

32

33

def __setitem__(self, key, value):

34

"""

35

Set data element value by tag or keyword.

36

37

Parameters:

38

- key: int, tuple, or str - DICOM tag or keyword

39

- value: Any - Value to set

40

"""

41

42

def __delitem__(self, key):

43

"""

44

Delete data element by tag or keyword.

45

46

Parameters:

47

- key: int, tuple, or str - DICOM tag or keyword

48

"""

49

50

def __contains__(self, key):

51

"""

52

Check if dataset contains element.

53

54

Parameters:

55

- key: int, tuple, or str - DICOM tag or keyword

56

57

Returns:

58

bool - True if element exists

59

"""

60

61

def keys(self):

62

"""Return iterator over dataset tags."""

63

64

def values(self):

65

"""Return iterator over data element values."""

66

67

def items(self):

68

"""Return iterator over (tag, data_element) pairs."""

69

70

def get(self, key, default=None):

71

"""

72

Get element value with default.

73

74

Parameters:

75

- key: int, tuple, or str - DICOM tag or keyword

76

- default: Any - Default value if element not found

77

78

Returns:

79

DataElement value or default

80

"""

81

82

def pop(self, key, *args):

83

"""

84

Remove element and return its value.

85

86

Parameters:

87

- key: int, tuple, or str - DICOM tag or keyword

88

- default: Any - Default value if element not found

89

90

Returns:

91

DataElement value

92

"""

93

```

94

95

### Element Management

96

97

Methods for adding, modifying, and managing DICOM data elements with proper validation and type handling.

98

99

```python { .api }

100

class Dataset:

101

def add(self, data_element):

102

"""

103

Add a DataElement to the dataset.

104

105

Parameters:

106

- data_element: DataElement - Element to add

107

"""

108

109

def add_new(self, tag, VR, value):

110

"""

111

Create and add new data element.

112

113

Parameters:

114

- tag: int or tuple - DICOM tag

115

- VR: str - Value Representation

116

- value: Any - Element value

117

"""

118

119

def data_element(self, tag):

120

"""

121

Return the full DataElement object.

122

123

Parameters:

124

- tag: int, tuple, or str - DICOM tag or keyword

125

126

Returns:

127

DataElement object

128

"""

129

130

def get_private_item(self, group, creator, tag):

131

"""

132

Get private data element.

133

134

Parameters:

135

- group: int - Private group number

136

- creator: str - Private creator identification

137

- tag: int - Private tag

138

139

Returns:

140

DataElement value

141

"""

142

143

def private_block(self, group, private_creator, create=False):

144

"""

145

Return private block for managing private elements.

146

147

Parameters:

148

- group: int - Private group number

149

- private_creator: str - Private creator identification

150

- create: bool - Create block if it doesn't exist

151

152

Returns:

153

PrivateBlock object

154

"""

155

```

156

157

### Pixel Data Operations

158

159

Methods for accessing and manipulating pixel data with support for various formats and processing operations.

160

161

```python { .api }

162

class Dataset:

163

@property

164

def pixel_array(self):

165

"""

166

Return pixel data as NumPy array.

167

168

Returns:

169

ndarray - Pixel data array with appropriate shape and dtype

170

171

Raises:

172

AttributeError - If no pixel data present

173

ImportError - If NumPy not available

174

"""

175

176

def compress(self, transfer_syntax_uid, encoding_plugin=None):

177

"""

178

Compress pixel data using specified transfer syntax.

179

180

Parameters:

181

- transfer_syntax_uid: str - Target transfer syntax UID

182

- encoding_plugin: str - Specific encoder to use

183

184

Returns:

185

None - Modifies dataset in place

186

"""

187

188

def decompress(self, handler_name=None):

189

"""

190

Decompress pixel data to uncompressed format.

191

192

Parameters:

193

- handler_name: str - Specific decoder to use

194

195

Returns:

196

None - Modifies dataset in place

197

"""

198

199

def convert_pixel_data(self, handler_name=None):

200

"""

201

Convert pixel data using available handlers.

202

203

Parameters:

204

- handler_name: str - Specific handler to use

205

206

Returns:

207

None - Modifies dataset in place

208

"""

209

```

210

211

### Overlay and Waveform Data

212

213

Methods for accessing overlay graphics and waveform data embedded in DICOM files.

214

215

```python { .api }

216

class Dataset:

217

def overlay_array(self, group):

218

"""

219

Return overlay data as NumPy array.

220

221

Parameters:

222

- group: int - Overlay group number (0x6000-0x60FF range)

223

224

Returns:

225

ndarray - Overlay data as binary array

226

"""

227

228

def waveform_array(self, index=0):

229

"""

230

Return waveform data as NumPy array.

231

232

Parameters:

233

- index: int - Waveform sequence index

234

235

Returns:

236

ndarray - Waveform data array

237

"""

238

```

239

240

### Serialization and Export

241

242

Methods for converting datasets to various formats including JSON, and saving to files.

243

244

```python { .api }

245

class Dataset:

246

def to_json(self, bulk_data_threshold=1024, bulk_data_uri_handler=None):

247

"""

248

Convert dataset to JSON representation.

249

250

Parameters:

251

- bulk_data_threshold: int - Size threshold for bulk data handling

252

- bulk_data_uri_handler: callable - Handler for bulk data URIs

253

254

Returns:

255

str - JSON representation of dataset

256

"""

257

258

@classmethod

259

def from_json(cls, json_dataset, bulk_data_uri_handler=None):

260

"""

261

Create dataset from JSON representation.

262

263

Parameters:

264

- json_dataset: str or dict - JSON representation

265

- bulk_data_uri_handler: callable - Handler for bulk data URIs

266

267

Returns:

268

Dataset object

269

"""

270

271

def save_as(self, filename, write_like_original=True):

272

"""

273

Save dataset to DICOM file.

274

275

Parameters:

276

- filename: str or PathLike - Output filename

277

- write_like_original: bool - Preserve original transfer syntax

278

"""

279

```

280

281

### Validation and Metadata

282

283

Methods for validating datasets and managing file metadata.

284

285

```python { .api }

286

class Dataset:

287

def ensure_file_meta(self):

288

"""

289

Ensure File Meta Information is present and valid.

290

291

Creates missing required File Meta Information elements.

292

"""

293

294

def validate(self):

295

"""

296

Validate dataset according to DICOM standard.

297

298

Returns:

299

list - Validation errors and warnings

300

"""

301

302

def remove_private_tags(self):

303

"""Remove all private data elements from dataset."""

304

305

@property

306

def is_implicit_VR(self):

307

"""bool: Whether dataset uses implicit VR encoding."""

308

309

@property

310

def is_little_endian(self):

311

"""bool: Whether dataset uses little endian byte order."""

312

313

@property

314

def is_original_encoding(self):

315

"""bool: Whether dataset retains original encoding."""

316

```

317

318

### FileDataset Subclass

319

320

Enhanced dataset class for file-based DICOM data with additional file-specific metadata.

321

322

```python { .api }

323

class FileDataset(Dataset):

324

"""

325

Dataset subclass for DICOM files with file-specific metadata.

326

"""

327

328

def __init__(self, filename, dataset, preamble=None, file_meta=None,

329

is_implicit_VR=True, is_little_endian=True):

330

"""

331

Initialize FileDataset.

332

333

Parameters:

334

- filename: str - Source filename

335

- dataset: dict - Dataset elements

336

- preamble: bytes - DICOM file preamble

337

- file_meta: FileMetaDataset - File Meta Information

338

- is_implicit_VR: bool - VR encoding type

339

- is_little_endian: bool - Byte order

340

"""

341

342

@property

343

def filename(self):

344

"""str: Source filename."""

345

346

@property

347

def preamble(self):

348

"""bytes: DICOM file preamble."""

349

350

@property

351

def file_meta(self):

352

"""FileMetaDataset: File Meta Information."""

353

```

354

355

### FileMetaDataset Class

356

357

Specialized dataset for DICOM File Meta Information with validation and required elements.

358

359

```python { .api }

360

class FileMetaDataset(Dataset):

361

"""

362

Specialized dataset for DICOM File Meta Information.

363

"""

364

365

def __init__(self):

366

"""Initialize with required File Meta Information elements."""

367

368

def validate(self):

369

"""

370

Validate File Meta Information completeness.

371

372

Returns:

373

list - Validation errors for missing required elements

374

"""

375

```

376

377

### Private Block Management

378

379

Helper class for managing private DICOM elements with creator identification.

380

381

```python { .api }

382

class PrivateBlock:

383

"""

384

Helper for managing private DICOM elements.

385

"""

386

387

def __init__(self, key, dataset, private_creator):

388

"""

389

Initialize private block.

390

391

Parameters:

392

- key: tuple - (group, creator_tag) identifying block

393

- dataset: Dataset - Parent dataset

394

- private_creator: str - Private creator identification

395

"""

396

397

def add_new(self, tag, VR, value):

398

"""

399

Add new private element to block.

400

401

Parameters:

402

- tag: int - Private tag (element part only)

403

- VR: str - Value Representation

404

- value: Any - Element value

405

"""

406

407

def __contains__(self, tag):

408

"""

409

Check if private element exists in block.

410

411

Parameters:

412

- tag: int - Private tag

413

414

Returns:

415

bool - True if element exists

416

"""

417

418

def __getitem__(self, tag):

419

"""

420

Get private element value.

421

422

Parameters:

423

- tag: int - Private tag

424

425

Returns:

426

DataElement value

427

"""

428

```

429

430

## Usage Examples

431

432

### Basic Dataset Operations

433

434

```python

435

from pydicom import Dataset, DataElement

436

from pydicom.tag import Tag

437

438

# Create new dataset

439

ds = Dataset()

440

441

# Add elements using different methods

442

ds.PatientName = "John Doe"

443

ds[0x00100020] = "12345" # Patient ID

444

ds.add_new(0x00101030, "DS", "75.5") # Patient Weight

445

446

# Access elements

447

patient_name = ds.PatientName

448

patient_id = ds[0x00100020]

449

weight = ds.get(0x00101030, "Unknown")

450

451

# Check element existence

452

if 'PatientName' in ds:

453

print(f"Patient: {ds.PatientName}")

454

455

# Iterate over elements

456

for tag, elem in ds.items():

457

print(f"{elem.keyword}: {elem.value}")

458

```

459

460

### Working with Sequences

461

462

```python

463

from pydicom import Dataset, Sequence

464

465

# Create dataset with sequence

466

ds = Dataset()

467

ds.PatientName = "Test Patient"

468

469

# Create sequence of datasets

470

seq = Sequence()

471

for i in range(3):

472

item = Dataset()

473

item.ReferencedSOPInstanceUID = f"1.2.3.{i}"

474

item.ReferencedSOPClassUID = "1.2.840.10008.5.1.4.1.1.2"

475

seq.append(item)

476

477

ds.ReferencedImageSequence = seq

478

479

# Access sequence items

480

for item in ds.ReferencedImageSequence:

481

print(f"SOP Instance: {item.ReferencedSOPInstanceUID}")

482

```

483

484

### Private Elements

485

486

```python

487

from pydicom import Dataset

488

489

ds = Dataset()

490

491

# Add private elements using private block

492

private_block = ds.private_block(0x0011, "MyCompany", create=True)

493

private_block.add_new(0x01, "LO", "Custom Value")

494

private_block.add_new(0x02, "DS", "123.45")

495

496

# Access private elements

497

custom_value = ds.get_private_item(0x0011, "MyCompany", 0x01)

498

print(f"Custom value: {custom_value}")

499

```

500

501

### Dataset Validation and Cleanup

502

503

```python

504

from pydicom import dcmread

505

506

# Read and validate dataset

507

ds = dcmread("image.dcm")

508

509

# Ensure proper file meta information

510

ds.ensure_file_meta()

511

512

# Validate dataset

513

errors = ds.validate()

514

if errors:

515

print("Validation errors:")

516

for error in errors:

517

print(f" {error}")

518

519

# Remove private tags for anonymization

520

ds.remove_private_tags()

521

522

# Save cleaned dataset

523

ds.save_as("cleaned.dcm")

524

```

525

526

### JSON Serialization

527

528

```python

529

from pydicom import Dataset, dcmread

530

import json

531

532

# Read dataset and convert to JSON

533

ds = dcmread("image.dcm")

534

json_str = ds.to_json()

535

536

# Save JSON representation

537

with open("dataset.json", "w") as f:

538

f.write(json_str)

539

540

# Load from JSON

541

with open("dataset.json", "r") as f:

542

json_data = f.read()

543

544

restored_ds = Dataset.from_json(json_data)

545

```

546

547

### Pixel Data Operations

548

549

```python

550

import numpy as np

551

from pydicom import dcmread

552

553

# Read dataset with pixel data

554

ds = dcmread("image.dcm")

555

556

# Get pixel array

557

if hasattr(ds, 'pixel_array'):

558

pixels = ds.pixel_array

559

print(f"Shape: {pixels.shape}")

560

print(f"Data type: {pixels.dtype}")

561

562

# Modify pixels

563

modified_pixels = pixels * 0.8 # Reduce brightness

564

565

# Save modified dataset

566

ds.PixelData = modified_pixels.tobytes()

567

ds.save_as("dimmed.dcm")

568

569

# Compress pixel data

570

ds.compress("1.2.840.10008.1.2.4.90") # JPEG 2000 Lossless

571

```