CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ubelt

A Python utility belt containing simple tools, a stdlib like feel, and extra batteries

Overview
Eval results
Files

text-processing.mddocs/

String and Text Processing

String manipulation utilities including indentation, formatting, and text processing functions for code and document formatting.

Capabilities

Text Formatting

Functions for formatting and manipulating text with proper indentation and whitespace handling.

def indent(text, prefix='    '):
    """
    Indent text by adding prefix to each line.
    
    Args:
        text (str): Text to indent
        prefix (str): Prefix to add to each line (default: 4 spaces)
        
    Returns:
        str: Indented text
    """

def codeblock(text):
    """
    Remove common leading whitespace from text block.
    Similar to textwrap.dedent but more robust.
    
    Args:
        text (str): Text block to dedent
        
    Returns:
        str: Text with common indentation removed
    """

def paragraph(text):
    """
    Format text as a single paragraph by removing line breaks.
    
    Args:
        text (str): Text to format as paragraph
        
    Returns:
        str: Single-line paragraph text
    """

Text Concatenation

Functions for combining and arranging text horizontally and vertically.

def hzcat(args, sep='', **kwargs):
    """
    Horizontally concatenate strings with alignment options.
    
    Args:
        args: List of strings or string-like objects
        sep (str): Separator between columns
        **kwargs: Additional formatting options
        
    Returns:
        str: Horizontally concatenated text
    """

Color and Highlighting

Text coloring and syntax highlighting utilities.

def color_text(text, color):
    """
    Color text with ANSI color codes.
    
    Args:
        text (str): Text to color
        color (str): Color name or ANSI code
        
    Returns:
        str: Colored text with ANSI codes
        
    Note:
        Respects NO_COLOR environment variable
    """

def highlight_code(text, lexer_name='python', **kwargs):
    """
    Syntax highlight code with ANSI colors.
    
    Args:
        text (str): Code to highlight
        lexer_name (str): Language lexer ('python', 'bash', 'json', etc.)
        **kwargs: Additional highlighting options
        
    Returns:
        str: Syntax highlighted text
        
    Note:
        Requires pygments package for full functionality
        Falls back to plain text if pygments unavailable
    """

# Global color control
NO_COLOR: bool  # Global flag to disable ANSI coloring

Deprecated String Functions

def ensure_unicode(text):
    """
    Ensure text is unicode string.
    
    DEPRECATED: Python 3 strings are unicode by default.
    
    Args:
        text: Text to convert
        
    Returns:
        str: Unicode string
    """

Usage Examples

Text Indentation and Formatting

import ubelt as ub

# Basic indentation
code = """def hello():
print("Hello, World!")
return True"""

indented = ub.indent(code)
print("Indented code:")
print(indented)
# Output:
#     def hello():
#     print("Hello, World!")
#     return True

# Custom indentation prefix
double_indented = ub.indent(code, prefix='        ')  # 8 spaces
tab_indented = ub.indent(code, prefix='\t')          # Tab

# Remove common indentation
messy_code = """    def function():
        print("hello")
        if True:
            return 42"""

clean_code = ub.codeblock(messy_code)
print("Cleaned code:")
print(clean_code)
# Output:
# def function():
#     print("hello")
#     if True:
#         return 42

Code Block Processing

import ubelt as ub

# Process multi-line string literals
def format_docstring(docstring):
    """Format a docstring by removing common indentation"""
    # Remove first and last empty lines
    lines = docstring.strip().split('\n')
    if not lines[0].strip():
        lines = lines[1:]
    if lines and not lines[-1].strip():
        lines = lines[:-1]
    
    # Remove common indentation
    text = '\n'.join(lines)
    return ub.codeblock(text)

example_docstring = """
    This is a function that does something.
    
    Args:
        param1: First parameter
        param2: Second parameter
        
    Returns:
        The result of the operation
    """

formatted = format_docstring(example_docstring)
print(formatted)

Paragraph Formatting

import ubelt as ub

# Convert multi-line text to paragraph
long_text = """This is a long piece of text
that spans multiple lines
but should be formatted
as a single paragraph."""

paragraph_text = ub.paragraph(long_text)
print(paragraph_text)
# Output: "This is a long piece of text that spans multiple lines but should be formatted as a single paragraph."

# Useful for documentation formatting
def format_description(desc):
    """Format multi-line description as paragraph"""
    return ub.paragraph(ub.codeblock(desc))

description = """
    This function performs complex operations
    on the input data and returns
    a processed result.
    """

formatted_desc = format_description(description)
print(formatted_desc)

Horizontal Text Concatenation

import ubelt as ub

# Side-by-side text display
left_text = """Line 1
Line 2
Line 3"""

right_text = """Column A
Column B
Column C"""

combined = ub.hzcat([left_text, right_text], sep=' | ')
print(combined)
# Output:
# Line 1 | Column A
# Line 2 | Column B  
# Line 3 | Column C

# Multiple columns
col1 = "A\nB\nC"
col2 = "1\n2\n3"
col3 = "X\nY\nZ"

table = ub.hzcat([col1, col2, col3], sep=' ')
print(table)
# Output:
# A 1 X
# B 2 Y
# C 3 Z

Text Coloring

import ubelt as ub

# Basic text coloring
red_text = ub.color_text("Error: Something went wrong", 'red')
green_text = ub.color_text("Success: Operation completed", 'green')
blue_text = ub.color_text("Info: Processing data", 'blue')

print(red_text)
print(green_text)
print(blue_text)

# Conditional coloring based on status
def status_message(message, status):
    """Print colored status message"""
    color_map = {
        'error': 'red',
        'success': 'green',
        'warning': 'yellow',
        'info': 'blue'
    }
    color = color_map.get(status, 'white')
    return ub.color_text(f"{status.upper()}: {message}", color)

print(status_message("File not found", 'error'))
print(status_message("Data saved successfully", 'success'))
print(status_message("Memory usage high", 'warning'))

Code Syntax Highlighting

import ubelt as ub

# Highlight Python code
python_code = '''
def fibonacci(n):
    if n <= 1:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

# Calculate first 10 fibonacci numbers
for i in range(10):
    print(f"fib({i}) = {fibonacci(i)}")
'''

highlighted = ub.highlight_code(python_code, lexer_name='python')
print(highlighted)

# Highlight other languages
json_data = '''
{
    "name": "John Doe",
    "age": 30,
    "city": "New York",
    "hobbies": ["reading", "swimming", "coding"]
}
'''

highlighted_json = ub.highlight_code(json_data, lexer_name='json')
print(highlighted_json)

# Bash highlighting
bash_script = '''
#!/bin/bash
for file in *.txt; do
    echo "Processing $file"
    wc -l "$file"
done
'''

highlighted_bash = ub.highlight_code(bash_script, lexer_name='bash')
print(highlighted_bash)

Advanced Text Processing

import ubelt as ub

# Create formatted code documentation
def create_code_doc(title, code, description):
    """Create formatted documentation with highlighted code"""
    
    # Format title
    title_line = ub.color_text(title, 'blue')
    separator = '=' * len(title)
    
    # Clean and highlight code
    clean_code = ub.codeblock(code)
    highlighted_code = ub.highlight_code(clean_code, lexer_name='python')
    
    # Format description
    desc_paragraph = ub.paragraph(ub.codeblock(description))
    
    # Combine all parts
    parts = [
        title_line,
        separator,
        '',
        desc_paragraph,
        '',
        'Code:',
        highlighted_code,
        ''
    ]
    
    return '\n'.join(parts)

# Example usage
example_code = '''
    def quicksort(arr):
        if len(arr) <= 1:
            return arr
        pivot = arr[len(arr) // 2]
        left = [x for x in arr if x < pivot]
        middle = [x for x in arr if x == pivot]
        right = [x for x in arr if x > pivot]
        return quicksort(left) + middle + quicksort(right)
'''

description = '''
    This function implements the quicksort algorithm
    using a divide-and-conquer approach. It selects
    a pivot element and partitions the array accordingly.
'''

doc = create_code_doc("Quicksort Implementation", example_code, description)
print(doc)

Text Layout and Alignment

import ubelt as ub

# Create aligned columns for data display
def create_table(headers, rows):
    """Create aligned table from headers and rows"""
    
    # Convert all data to strings
    str_headers = [str(h) for h in headers]
    str_rows = [[str(cell) for cell in row] for row in rows]
    
    # Calculate column widths
    all_rows = [str_headers] + str_rows
    col_widths = []
    for col_idx in range(len(str_headers)):
        max_width = max(len(row[col_idx]) for row in all_rows)
        col_widths.append(max_width)
    
    # Format rows
    formatted_rows = []
    for row in all_rows:
        padded_cells = []
        for cell, width in zip(row, col_widths):
            padded_cells.append(cell.ljust(width))
        formatted_rows.append(' | '.join(padded_cells))
    
    # Add separator
    separator = ' | '.join('-' * width for width in col_widths)
    result = [formatted_rows[0], separator] + formatted_rows[1:]
    
    return '\n'.join(result)

# Example data
headers = ['Name', 'Age', 'City']
data = [
    ['Alice', 25, 'New York'],
    ['Bob', 30, 'San Francisco'],
    ['Charlie', 35, 'Chicago']
]

table = create_table(headers, data)
print(table)

Install with Tessl CLI

npx tessl i tessl/pypi-ubelt

docs

dict-operations.md

download-caching.md

function-utilities.md

hashing-imports.md

index.md

list-operations.md

path-operations.md

progress-timing.md

system-integration.md

text-processing.md

tile.json