CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pydicom

A pure Python package for reading and writing DICOM data

Pending
Overview
Eval results
Files

dataset-manipulation.mddocs/

Dataset Manipulation

Comprehensive dataset management providing dict-like access to DICOM elements with full support for the DICOM data model, validation, serialization, and advanced dataset operations.

Capabilities

Core Dataset Class

The primary container for DICOM data elements, providing dictionary-like access with DICOM-specific functionality.

class Dataset:
    """
    A dictionary-like container for DICOM data elements.
    
    Supports standard dictionary operations plus DICOM-specific functionality
    for element management, validation, and serialization.
    """
    
    def __init__(self):
        """Initialize empty dataset."""
    
    def __getitem__(self, key):
        """
        Get data element by tag or keyword.
        
        Parameters:
        - key: int, tuple, or str - DICOM tag or keyword
        
        Returns:
        DataElement value or DataElement object
        """
    
    def __setitem__(self, key, value):
        """
        Set data element value by tag or keyword.
        
        Parameters:
        - key: int, tuple, or str - DICOM tag or keyword
        - value: Any - Value to set
        """
    
    def __delitem__(self, key):
        """
        Delete data element by tag or keyword.
        
        Parameters:
        - key: int, tuple, or str - DICOM tag or keyword
        """
    
    def __contains__(self, key):
        """
        Check if dataset contains element.
        
        Parameters:
        - key: int, tuple, or str - DICOM tag or keyword
        
        Returns:
        bool - True if element exists
        """
    
    def keys(self):
        """Return iterator over dataset tags."""
    
    def values(self):
        """Return iterator over data element values."""
    
    def items(self):
        """Return iterator over (tag, data_element) pairs."""
    
    def get(self, key, default=None):
        """
        Get element value with default.
        
        Parameters:
        - key: int, tuple, or str - DICOM tag or keyword
        - default: Any - Default value if element not found
        
        Returns:
        DataElement value or default
        """
    
    def pop(self, key, *args):
        """
        Remove element and return its value.
        
        Parameters:
        - key: int, tuple, or str - DICOM tag or keyword
        - default: Any - Default value if element not found
        
        Returns:
        DataElement value
        """

Element Management

Methods for adding, modifying, and managing DICOM data elements with proper validation and type handling.

class Dataset:
    def add(self, data_element):
        """
        Add a DataElement to the dataset.
        
        Parameters:
        - data_element: DataElement - Element to add
        """
    
    def add_new(self, tag, VR, value):
        """
        Create and add new data element.
        
        Parameters:
        - tag: int or tuple - DICOM tag
        - VR: str - Value Representation
        - value: Any - Element value
        """
    
    def data_element(self, tag):
        """
        Return the full DataElement object.
        
        Parameters:
        - tag: int, tuple, or str - DICOM tag or keyword
        
        Returns:
        DataElement object
        """
    
    def get_private_item(self, group, creator, tag):
        """
        Get private data element.
        
        Parameters:
        - group: int - Private group number
        - creator: str - Private creator identification
        - tag: int - Private tag
        
        Returns:
        DataElement value
        """
    
    def private_block(self, group, private_creator, create=False):
        """
        Return private block for managing private elements.
        
        Parameters:
        - group: int - Private group number
        - private_creator: str - Private creator identification
        - create: bool - Create block if it doesn't exist
        
        Returns:
        PrivateBlock object
        """

Pixel Data Operations

Methods for accessing and manipulating pixel data with support for various formats and processing operations.

class Dataset:
    @property
    def pixel_array(self):
        """
        Return pixel data as NumPy array.
        
        Returns:
        ndarray - Pixel data array with appropriate shape and dtype
        
        Raises:
        AttributeError - If no pixel data present
        ImportError - If NumPy not available
        """
    
    def compress(self, transfer_syntax_uid, encoding_plugin=None):
        """
        Compress pixel data using specified transfer syntax.
        
        Parameters:
        - transfer_syntax_uid: str - Target transfer syntax UID
        - encoding_plugin: str - Specific encoder to use
        
        Returns:
        None - Modifies dataset in place
        """
    
    def decompress(self, handler_name=None):
        """
        Decompress pixel data to uncompressed format.
        
        Parameters:
        - handler_name: str - Specific decoder to use
        
        Returns:
        None - Modifies dataset in place
        """
    
    def convert_pixel_data(self, handler_name=None):
        """
        Convert pixel data using available handlers.
        
        Parameters:
        - handler_name: str - Specific handler to use
        
        Returns:
        None - Modifies dataset in place
        """

Overlay and Waveform Data

Methods for accessing overlay graphics and waveform data embedded in DICOM files.

class Dataset:
    def overlay_array(self, group):
        """
        Return overlay data as NumPy array.
        
        Parameters:
        - group: int - Overlay group number (0x6000-0x60FF range)
        
        Returns:
        ndarray - Overlay data as binary array
        """
    
    def waveform_array(self, index=0):
        """
        Return waveform data as NumPy array.
        
        Parameters:
        - index: int - Waveform sequence index
        
        Returns:
        ndarray - Waveform data array
        """

Serialization and Export

Methods for converting datasets to various formats including JSON, and saving to files.

class Dataset:
    def to_json(self, bulk_data_threshold=1024, bulk_data_uri_handler=None):
        """
        Convert dataset to JSON representation.
        
        Parameters:
        - bulk_data_threshold: int - Size threshold for bulk data handling
        - bulk_data_uri_handler: callable - Handler for bulk data URIs
        
        Returns:
        str - JSON representation of dataset
        """
    
    @classmethod
    def from_json(cls, json_dataset, bulk_data_uri_handler=None):
        """
        Create dataset from JSON representation.
        
        Parameters:
        - json_dataset: str or dict - JSON representation
        - bulk_data_uri_handler: callable - Handler for bulk data URIs
        
        Returns:
        Dataset object
        """
    
    def save_as(self, filename, write_like_original=True):
        """
        Save dataset to DICOM file.
        
        Parameters:
        - filename: str or PathLike - Output filename
        - write_like_original: bool - Preserve original transfer syntax
        """

Validation and Metadata

Methods for validating datasets and managing file metadata.

class Dataset:
    def ensure_file_meta(self):
        """
        Ensure File Meta Information is present and valid.
        
        Creates missing required File Meta Information elements.
        """
    
    def validate(self):
        """
        Validate dataset according to DICOM standard.
        
        Returns:
        list - Validation errors and warnings
        """
    
    def remove_private_tags(self):
        """Remove all private data elements from dataset."""
    
    @property
    def is_implicit_VR(self):
        """bool: Whether dataset uses implicit VR encoding."""
    
    @property
    def is_little_endian(self):
        """bool: Whether dataset uses little endian byte order."""
    
    @property
    def is_original_encoding(self):
        """bool: Whether dataset retains original encoding."""

FileDataset Subclass

Enhanced dataset class for file-based DICOM data with additional file-specific metadata.

class FileDataset(Dataset):
    """
    Dataset subclass for DICOM files with file-specific metadata.
    """
    
    def __init__(self, filename, dataset, preamble=None, file_meta=None, 
                 is_implicit_VR=True, is_little_endian=True):
        """
        Initialize FileDataset.
        
        Parameters:
        - filename: str - Source filename
        - dataset: dict - Dataset elements
        - preamble: bytes - DICOM file preamble
        - file_meta: FileMetaDataset - File Meta Information
        - is_implicit_VR: bool - VR encoding type
        - is_little_endian: bool - Byte order
        """
    
    @property
    def filename(self):
        """str: Source filename."""
    
    @property
    def preamble(self):
        """bytes: DICOM file preamble."""
    
    @property
    def file_meta(self):
        """FileMetaDataset: File Meta Information."""

FileMetaDataset Class

Specialized dataset for DICOM File Meta Information with validation and required elements.

class FileMetaDataset(Dataset):
    """
    Specialized dataset for DICOM File Meta Information.
    """
    
    def __init__(self):
        """Initialize with required File Meta Information elements."""
    
    def validate(self):
        """
        Validate File Meta Information completeness.
        
        Returns:
        list - Validation errors for missing required elements
        """

Private Block Management

Helper class for managing private DICOM elements with creator identification.

class PrivateBlock:
    """
    Helper for managing private DICOM elements.
    """
    
    def __init__(self, key, dataset, private_creator):
        """
        Initialize private block.
        
        Parameters:
        - key: tuple - (group, creator_tag) identifying block
        - dataset: Dataset - Parent dataset
        - private_creator: str - Private creator identification
        """
    
    def add_new(self, tag, VR, value):
        """
        Add new private element to block.
        
        Parameters:
        - tag: int - Private tag (element part only)
        - VR: str - Value Representation
        - value: Any - Element value
        """
    
    def __contains__(self, tag):
        """
        Check if private element exists in block.
        
        Parameters:
        - tag: int - Private tag
        
        Returns:
        bool - True if element exists
        """
    
    def __getitem__(self, tag):
        """
        Get private element value.
        
        Parameters:
        - tag: int - Private tag
        
        Returns:
        DataElement value
        """

Usage Examples

Basic Dataset Operations

from pydicom import Dataset, DataElement
from pydicom.tag import Tag

# Create new dataset
ds = Dataset()

# Add elements using different methods
ds.PatientName = "John Doe"
ds[0x00100020] = "12345"  # Patient ID
ds.add_new(0x00101030, "DS", "75.5")  # Patient Weight

# Access elements
patient_name = ds.PatientName
patient_id = ds[0x00100020]
weight = ds.get(0x00101030, "Unknown")

# Check element existence
if 'PatientName' in ds:
    print(f"Patient: {ds.PatientName}")

# Iterate over elements
for tag, elem in ds.items():
    print(f"{elem.keyword}: {elem.value}")

Working with Sequences

from pydicom import Dataset, Sequence

# Create dataset with sequence
ds = Dataset()
ds.PatientName = "Test Patient"

# Create sequence of datasets
seq = Sequence()
for i in range(3):
    item = Dataset()
    item.ReferencedSOPInstanceUID = f"1.2.3.{i}"
    item.ReferencedSOPClassUID = "1.2.840.10008.5.1.4.1.1.2"
    seq.append(item)

ds.ReferencedImageSequence = seq

# Access sequence items
for item in ds.ReferencedImageSequence:
    print(f"SOP Instance: {item.ReferencedSOPInstanceUID}")

Private Elements

from pydicom import Dataset

ds = Dataset()

# Add private elements using private block
private_block = ds.private_block(0x0011, "MyCompany", create=True)
private_block.add_new(0x01, "LO", "Custom Value")
private_block.add_new(0x02, "DS", "123.45")

# Access private elements
custom_value = ds.get_private_item(0x0011, "MyCompany", 0x01)
print(f"Custom value: {custom_value}")

Dataset Validation and Cleanup

from pydicom import dcmread

# Read and validate dataset
ds = dcmread("image.dcm")

# Ensure proper file meta information
ds.ensure_file_meta()

# Validate dataset
errors = ds.validate()
if errors:
    print("Validation errors:")
    for error in errors:
        print(f"  {error}")

# Remove private tags for anonymization
ds.remove_private_tags()

# Save cleaned dataset
ds.save_as("cleaned.dcm")

JSON Serialization

from pydicom import Dataset, dcmread
import json

# Read dataset and convert to JSON
ds = dcmread("image.dcm")
json_str = ds.to_json()

# Save JSON representation
with open("dataset.json", "w") as f:
    f.write(json_str)

# Load from JSON
with open("dataset.json", "r") as f:
    json_data = f.read()

restored_ds = Dataset.from_json(json_data)

Pixel Data Operations

import numpy as np
from pydicom import dcmread

# Read dataset with pixel data
ds = dcmread("image.dcm")

# Get pixel array
if hasattr(ds, 'pixel_array'):
    pixels = ds.pixel_array
    print(f"Shape: {pixels.shape}")
    print(f"Data type: {pixels.dtype}")
    
    # Modify pixels
    modified_pixels = pixels * 0.8  # Reduce brightness
    
    # Save modified dataset
    ds.PixelData = modified_pixels.tobytes()
    ds.save_as("dimmed.dcm")

# Compress pixel data
ds.compress("1.2.840.10008.1.2.4.90")  # JPEG 2000 Lossless

Install with Tessl CLI

npx tessl i tessl/pypi-pydicom

docs

configuration-utilities.md

data-elements.md

dataset-manipulation.md

file-operations.md

index.md

pixel-data-processing.md

sequences-collections.md

tags-and-uids.md

value-representations.md

tile.json