tessl/pypi-laspy

Native Python ASPRS LAS read/write library for processing LiDAR point cloud data

—

Pending

Overview

Eval results

Files

Data Container Classes

Name: tessl/pypi-laspy
Author: tessl

Primary data container classes for managing LAS file components including headers, point data, and metadata integration. These classes provide the core data structures that synchronize all aspects of LAS files.

Capabilities

LAS Data Container

Main container class that synchronizes header, points, and VLRs into a cohesive LAS file representation.

class LasData:
    def __init__(self, header: LasHeader, points=None):
        """
        Create LAS data container.
        
        Parameters:
        - header: LasHeader - LAS file header
        - points: PackedPointRecord or ScaleAwarePointRecord - Point data (optional)
        """
    
    @property
    def point_format(self) -> PointFormat:
        """Point format definition."""
    
    @property  
    def xyz(self) -> np.ndarray:
        """XYZ coordinates as Nx3 array."""
    
    @property
    def points(self) -> PackedPointRecord:
        """Point record data."""
    
    @property
    def vlrs(self) -> VLRList:
        """Variable Length Records."""
    
    @property
    def evlrs(self) -> Optional[VLRList]:
        """Extended Variable Length Records."""
    
    @property
    def header(self) -> LasHeader:
        """LAS file header."""
    
    def add_extra_dim(self, params: ExtraBytesParams):
        """
        Add single extra dimension to point format.
        
        Parameters:
        - params: ExtraBytesParams - Extra dimension parameters
        """
    
    def add_extra_dims(self, params: List[ExtraBytesParams]):
        """
        Add multiple extra dimensions to point format.
        
        Parameters:
        - params: List[ExtraBytesParams] - List of extra dimension parameters
        """
    
    def remove_extra_dim(self, name: str):
        """
        Remove extra dimension by name.
        
        Parameters:
        - name: str - Name of dimension to remove
        """
    
    def remove_extra_dims(self, names: Iterable[str]):
        """
        Remove multiple extra dimensions.
        
        Parameters:
        - names: Iterable[str] - Names of dimensions to remove
        """
    
    def update_header(self):
        """Update header statistics from current point data."""
    
    def write(self, destination, do_compress=None, laz_backend=None):
        """
        Write LAS data to file.
        
        Parameters:
        - destination: str, Path, or file-like - Output destination
        - do_compress: bool - Force compression on/off (optional)
        - laz_backend: LazBackend - Compression backend (optional)
        """
    
    def change_scaling(self, scales=None, offsets=None):
        """
        Change coordinate scaling factors.
        
        Parameters:
        - scales: array-like - New scale factors for X,Y,Z (optional)
        - offsets: array-like - New offset values for X,Y,Z (optional)
        """
    
    def __getattr__(self, name):
        """Access point dimensions as attributes (e.g., las.x, las.classification)."""
    
    def __setattr__(self, name, value):
        """Set point dimension values as attributes."""
    
    def __getitem__(self, key):
        """Access point dimensions by name or index."""
    
    def __setitem__(self, key, value):
        """Set point dimension values by name or index."""
    
    def __len__(self) -> int:
        """Get number of points."""

Usage Examples:

import laspy
import numpy as np

# Create new LAS data
header = laspy.LasHeader(point_format=3, version=(1, 2))
las = laspy.LasData(header)

# Add point data via attributes
las.x = np.random.uniform(0, 1000, 5000)
las.y = np.random.uniform(0, 1000, 5000)  
las.z = np.random.uniform(0, 100, 5000)
las.classification = np.random.choice([1, 2, 3, 4], 5000)

# Access XYZ as single array
coordinates = las.xyz  # Shape: (5000, 3)
print(f"Point cloud bounds: {coordinates.min(axis=0)} to {coordinates.max(axis=0)}")

# Update header with current data statistics
las.update_header()
print(f"Header bounds: {las.header.mins} to {las.header.maxs}")

# Add custom extra dimension
extra_param = laspy.ExtraBytesParams(
    name="intensity_normalized",
    type="f4",
    description="Normalized intensity values"
)
las.add_extra_dim(extra_param)
las.intensity_normalized = np.random.uniform(0, 1, len(las))

# Write to file
las.write('output.laz', do_compress=True)

LAS Header Management

Comprehensive header management including metadata, coordinate systems, and format specifications.

class LasHeader:
    def __init__(self, *, version=None, point_format=None):
        """
        Create LAS header.
        
        Parameters:
        - version: Version or tuple - LAS version (default: (1, 2))
        - point_format: PointFormat or int - Point format (default: 3)
        """
    
    @property
    def version(self) -> Version: ...
    @property  
    def point_format(self) -> PointFormat: ...
    @property
    def file_source_id(self) -> int: ...
    @property
    def global_encoding(self) -> GlobalEncoding: ...
    @property
    def uuid(self) -> UUID: ...
    @property
    def system_identifier(self) -> str: ...
    @property
    def generating_software(self) -> str: ...
    @property
    def creation_date(self) -> date: ...
    @property
    def point_count(self) -> int: ...
    @property
    def scales(self) -> np.ndarray: ...
    @property
    def offsets(self) -> np.ndarray: ...
    @property
    def maxs(self) -> np.ndarray: ...
    @property
    def mins(self) -> np.ndarray: ...
    @property
    def x_scale(self) -> float: ...
    @property
    def y_scale(self) -> float: ...
    @property
    def z_scale(self) -> float: ...
    @property
    def x_offset(self) -> float: ...
    @property
    def y_offset(self) -> float: ...
    @property
    def z_offset(self) -> float: ...
    @property
    def x_max(self) -> float: ...
    @property
    def y_max(self) -> float: ...
    @property
    def z_max(self) -> float: ...
    @property
    def x_min(self) -> float: ...
    @property
    def y_min(self) -> float: ...
    @property
    def z_min(self) -> float: ...
    @property
    def vlrs(self) -> VLRList: ...
    @property
    def evlrs(self) -> Optional[VLRList]: ...
    @property
    def number_of_points_by_return(self) -> np.ndarray: ...
    
    def add_extra_dims(self, params: List[ExtraBytesParams]):
        """Add multiple extra dimensions to point format."""
    
    def add_extra_dim(self, params: ExtraBytesParams):
        """Add single extra dimension to point format."""
    
    def add_crs(self, crs, keep_compatibility=True):
        """
        Add coordinate reference system information.
        
        Parameters:
        - crs: pyproj.CRS or CRS-like - Coordinate reference system
        - keep_compatibility: bool - Maintain compatibility with older software
        """
    
    def remove_extra_dim(self, name: str):
        """Remove extra dimension by name."""
    
    def remove_extra_dims(self, names: Iterable[str]):
        """Remove multiple extra dimensions."""
    
    def set_version_and_point_format(self, version: Version, point_format: PointFormat):
        """
        Set version and point format together (ensures compatibility).
        
        Parameters:
        - version: Version - Target LAS version
        - point_format: PointFormat - Target point format
        """
    
    def partial_reset(self):
        """Reset header statistics (keeps metadata)."""
    
    def update(self, points: PackedPointRecord):
        """
        Update header statistics from point data.
        
        Parameters:
        - points: PackedPointRecord - Point data to analyze
        """
    
    def grow(self, points: PackedPointRecord):
        """
        Grow header bounds to include new points.
        
        Parameters:
        - points: PackedPointRecord - Additional points to include
        """
    
    def set_compressed(self, state: bool):
        """
        Set compression state in header.
        
        Parameters:
        - state: bool - True for compressed, False for uncompressed
        """
    
    def max_point_count(self) -> int:
        """Get maximum point count for this LAS version."""
    
    def copy(self) -> LasHeader:
        """Create deep copy of header."""
    
    def parse_crs(self, prefer_wkt=True) -> Optional[pyproj.CRS]:
        """
        Parse coordinate reference system from VLRs.
        
        Parameters:
        - prefer_wkt: bool - Prefer WKT over GeoTIFF keys if both present
        
        Returns:
        Optional[pyproj.CRS]: Parsed CRS or None if not found
        """
    
    def read_evlrs(self, stream):
        """Read Extended VLRs from stream."""
    
    def write_to(self, stream, ensure_same_size=False, encoding_errors="strict"):
        """
        Write header to stream.
        
        Parameters:
        - stream: BinaryIO - Output stream
        - ensure_same_size: bool - Ensure header stays same size
        - encoding_errors: str - How to handle encoding errors
        """
    
    @classmethod
    def read_from(cls, original_stream: BinaryIO, read_evlrs=False) -> LasHeader:
        """
        Read header from stream.
        
        Parameters:
        - original_stream: BinaryIO - Input stream
        - read_evlrs: bool - Whether to read Extended VLRs
        
        Returns:
        LasHeader: Parsed header
        """

Usage Examples:

import laspy
from datetime import date
import numpy as np

# Create header with specific settings
header = laspy.LasHeader(
    version=(1, 4),  # LAS 1.4
    point_format=6   # Point format 6 (includes GPS time and RGB)
)

# Set metadata
header.system_identifier = "My LiDAR System"
header.generating_software = "My Processing Software v1.0"
header.creation_date = date.today()

# Set coordinate system scaling
header.scales = np.array([0.01, 0.01, 0.001])  # 1cm XY, 1mm Z
header.offsets = np.array([500000, 4000000, 0])  # UTM zone offsets

# Add CRS information (requires pyproj)
try:
    import pyproj
    crs = pyproj.CRS.from_epsg(32633)  # UTM Zone 33N
    header.add_crs(crs)
    print("Added CRS information")
except ImportError:
    print("pyproj not available, skipping CRS")

# Create data and update header statistics
las = laspy.LasData(header)
# ... add point data ...
las.update_header()

print(f"Header bounds: {header.mins} to {header.maxs}")
print(f"Point count: {header.point_count}")

Header Version and Encoding

Version management and global encoding settings for LAS files.

class Version:
    major: int
    minor: int
    
    @classmethod
    def from_str(cls, string: str) -> Version:
        """Parse version from string (e.g., '1.4')."""
    
    def __str__(self) -> str: ...

class GpsTimeType(IntEnum):
    WEEK_TIME = 0
    STANDARD = 1

class GlobalEncoding:
    def __init__(self, value=0):
        """
        Create global encoding flags.
        
        Parameters:
        - value: int - Raw encoding value
        """
    
    @property
    def gps_time_type(self) -> GpsTimeType: ...
    @property
    def waveform_data_packets_internal(self) -> bool: ...
    @property  
    def waveform_data_packets_external(self) -> bool: ...
    @property
    def synthetic_return_numbers(self) -> bool: ...
    @property
    def wkt(self) -> bool: ...
    
    @classmethod
    def read_from(cls, stream: BinaryIO) -> GlobalEncoding: ...
    
    def write_to(self, stream: BinaryIO): ...

Advanced Data Container Usage

Working with Large Datasets

import laspy
import numpy as np

def process_large_dataset(input_file, output_file, chunk_size=1000000):
    """Process large LAS file in chunks to manage memory."""
    
    with laspy.open(input_file) as reader:
        # Copy header for output  
        header = reader.header.copy()
        
        with laspy.open(output_file, mode='w', header=header) as writer:
            total_written = 0
            
            for chunk in reader.chunk_iterator(chunk_size):
                # Apply processing to chunk
                processed = process_chunk(chunk)
                
                # Write processed chunk
                writer.write_points(processed)
                total_written += len(processed)
                
                print(f"Processed {total_written} points")
            
            print(f"Processing complete: {total_written} total points")

def process_chunk(points):
    """Apply processing to point chunk."""
    # Example: normalize intensity values
    if hasattr(points, 'intensity'):
        max_intensity = points.intensity.max()
        if max_intensity > 0:
            points.intensity = (points.intensity / max_intensity * 65535).astype(np.uint16)
    
    return points

Multi-Scale Coordinate Handling

import laspy
import numpy as np

def handle_multi_scale_data(las_data):
    """Handle data with different coordinate scales."""
    
    # Get current scaling
    current_scales = las_data.header.scales
    current_offsets = las_data.header.offsets
    
    print(f"Current scales: {current_scales}")
    print(f"Current offsets: {current_offsets}")
    
    # Check coordinate precision
    x_precision = 1.0 / current_scales[0]
    y_precision = 1.0 / current_scales[1]  
    z_precision = 1.0 / current_scales[2]
    
    print(f"Coordinate precision: X={x_precision}m, Y={y_precision}m, Z={z_precision}m")
    
    # Increase precision if needed (e.g., for high-accuracy surveys)
    if x_precision > 0.001:  # If precision worse than 1mm
        new_scales = np.array([0.001, 0.001, 0.0001])  # 1mm XY, 0.1mm Z
        las_data.change_scaling(scales=new_scales)
        print(f"Updated to higher precision: {new_scales}")
    
    return las_data

Header Validation and Repair

import laspy
from datetime import date

def validate_and_repair_header(las_data):
    """Validate and repair common header issues."""
    
    header = las_data.header
    changes_made = []
    
    # Check creation date
    if header.creation_date is None:
        header.creation_date = date.today()
        changes_made.append("Set creation date to today")
    
    # Check software identifier
    if not header.generating_software.strip():
        header.generating_software = "laspy"
        changes_made.append("Set generating software")
    
    # Validate point count
    actual_count = len(las_data.points)
    if header.point_count != actual_count:
        header.point_count = actual_count
        changes_made.append(f"Updated point count to {actual_count}")
    
    # Check bounds consistency
    if len(las_data.points) > 0:
        las_data.update_header()
        changes_made.append("Updated header bounds from point data")
    
    # Validate scales (prevent zero or negative scales)
    if np.any(header.scales <= 0):
        header.scales = np.where(header.scales <= 0, 0.01, header.scales)
        changes_made.append("Fixed invalid scale factors")
    
    if changes_made:
        print(f"Header repairs made: {'; '.join(changes_made)}")
    else:
        print("Header validation passed")
    
    return las_data

Install with Tessl CLI