Simple yet flexible natural sorting in Python that enables developers to sort strings containing numbers in a natural, human-expected order rather than lexicographical order.
—
Utility functions for text processing, decoding, regular expression selection, and command-line interface functionality. These functions provide supporting capabilities for the core natsort functionality.
Functions for handling text encoding and decoding, particularly useful when working with mixed string and bytes data.
def decoder(encoding):
"""
Return a function that can be used to decode bytes to unicode.
Parameters:
- encoding: str - The codec to use for decoding (must be valid unicode codec)
Returns:
Callable - A function that decodes bytes using the supplied codec
Examples:
>>> decode_utf8 = decoder('utf8')
>>> decode_utf8(b'hello') == 'hello'
True
>>> decode_utf8('already string') == 'already string'
True
"""def as_ascii(s):
"""
Function to decode an input with the ASCII codec, or return as-is.
Parameters:
- s: object - Input to potentially decode
Returns:
object - Decoded string if input was bytes, otherwise input unchanged
Examples:
>>> as_ascii(b'hello')
'hello'
>>> as_ascii('hello')
'hello'
>>> as_ascii(123)
123
"""def as_utf8(s):
"""
Function to decode an input with the UTF-8 codec, or return as-is.
Parameters:
- s: object - Input to potentially decode
Returns:
object - Decoded string if input was bytes, otherwise input unchanged
Examples:
>>> as_utf8(b'hello')
'hello'
>>> as_utf8('hello')
'hello'
>>> as_utf8(123)
123
"""Utility for chaining multiple single-argument functions together.
def chain_functions(functions):
"""
Chain a list of single-argument functions together and return.
Functions are applied in list order, with the output of each function
passed as input to the next function.
Parameters:
- functions: Iterable[Callable] - List of single-argument functions to chain
Returns:
Callable - A single argument function that applies all chained functions
Examples:
>>> funcs = [lambda x: x * 4, len, lambda x: x + 5]
>>> chained = chain_functions(funcs)
>>> chained('hey') # 'hey' -> 'heyheyheyheyhey' -> 12 -> 17
17
"""Function for selecting appropriate regular expressions for number matching based on algorithm settings.
def numeric_regex_chooser(alg):
"""
Select an appropriate regex for the type of number of interest.
Parameters:
- alg: ns enum - Algorithm flags indicating the desired number type
Returns:
str - Regular expression string that matches the desired number type
Examples:
>>> numeric_regex_chooser(ns.INT)
r'(\d+|[unicode_digits])'
>>> numeric_regex_chooser(ns.FLOAT | ns.SIGNED)
r'([-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?|[unicode_numeric])'
"""Main entry point for the natsort command-line tool.
def main(*arguments):
"""
Performs a natural sort on entries given on the command-line.
Entry point for the 'natsort' console script. Provides command-line
access to natural sorting with various options for number types,
filtering, and output formatting.
Parameters:
- *arguments: str - Command line arguments (uses sys.argv if not provided)
Command Line Options:
- -p, --paths: Interpret input as file paths
- -f, --filter LOW HIGH: Keep entries with numbers in range
- -F, --reverse-filter LOW HIGH: Exclude entries with numbers in range
- -e, --exclude NUMBER: Exclude entries containing specific number
- -r, --reverse: Return results in reversed order
- -t, --number-type {int,float,real}: Choose number interpretation
- -s, --sign: Consider +/- as part of numbers
- --noexp: Don't parse scientific notation
- -l, --locale: Use locale-aware sorting
Examples:
# Sort lines from stdin
$ echo -e "item10\nitem2\nitem1" | natsort
# Sort file paths
$ natsort --paths file10.txt file2.txt file1.txt
# Sort with real numbers and filtering
$ natsort --number-type real --filter -5 10 data.txt
"""from natsort import natsorted, decoder, as_utf8, as_ascii
# Mixed bytes and string data
mixed_data = [b'file10.txt', 'file2.txt', b'file1.txt', 'file20.txt']
# Method 1: Using decoder function
utf8_decoder = decoder('utf-8')
decoded_data = [utf8_decoder(item) for item in mixed_data]
sorted_decoded = natsorted(decoded_data)
print(f"Decoded and sorted: {sorted_decoded}")
# Method 2: Using as_utf8 directly in key function
sorted_mixed = natsorted(mixed_data, key=as_utf8)
print(f"Sorted with UTF-8 key: {sorted_mixed}")
# Method 3: Using as_ascii for ASCII-only data
ascii_mixed = [b'fileA.txt', 'fileB.txt', b'file1.txt']
sorted_ascii = natsorted(ascii_mixed, key=as_ascii)
print(f"Sorted with ASCII key: {sorted_ascii}")from natsort import natsorted, chain_functions
from pathlib import Path
# File paths that need complex preprocessing
file_paths = [
'/home/user/Documents/Project_v1.10.txt',
'/home/user/Documents/Project_v1.2.txt',
'/var/log/system_log_v2.1.txt',
'/tmp/temp_file_v1.0.txt'
]
# Chain of transformations: Path -> filename -> lowercase -> remove extension
transform_chain = chain_functions([
lambda x: Path(x).name, # Get filename only
lambda x: x.lower(), # Convert to lowercase
lambda x: x.rsplit('.', 1)[0] # Remove extension
])
# Sort using the chained transformation
sorted_files = natsorted(file_paths, key=transform_chain)
print("Sorted by transformed filename:")
for original, sorted_path in zip(file_paths, sorted_files):
transformed = transform_chain(original)
print(f" {original} -> '{transformed}'")from natsort import numeric_regex_chooser, ns
import re
# Explore different regex patterns for number matching
algorithms = [
('INT (default)', ns.INT),
('FLOAT', ns.FLOAT),
('SIGNED', ns.SIGNED),
('REAL (FLOAT|SIGNED)', ns.REAL),
('FLOAT without exponents', ns.FLOAT | ns.NOEXP)
]
test_string = "item-1.5e+3_version2.10_beta"
print("Regular expression patterns and matches:")
for name, alg in algorithms:
pattern = numeric_regex_chooser(alg)
matches = re.findall(pattern, test_string)
print(f"{name:25}: {pattern}")
print(f"{'':25} Matches: {matches}")
print()# Examples of using the natsort command-line interface
# Note: These would be run from the command line, not in Python
"""
# Basic usage - sort lines from a file
$ cat data.txt
item10
item2
item1
item20
$ natsort data.txt
item1
item2
item10
item20
# Sort file paths
$ natsort --paths folder/file10.txt folder/file2.txt folder/file1.txt
folder/file1.txt
folder/file2.txt
folder/file10.txt
# Sort with real numbers and reverse order
$ echo -e "val-1.5\nval2.3\nval-0.8" | natsort --number-type real --reverse
val2.3
val-0.8
val-1.5
# Filter by numeric range
$ echo -e "item1\nitem25\nitem5\nitem30" | natsort --filter 1 10
item1
item5
# Exclude specific numbers
$ echo -e "test1\ntest2\ntest3\ntest10" | natsort --exclude 2
test1
test3
test10
# Locale-aware sorting (results depend on system locale)
$ echo -e "café\nnaive\nresume" | natsort --locale
"""
# Programmatic access to CLI functionality
from natsort.__main__ import main
import sys
from io import StringIO
# Capture stdout to test CLI functionality
old_stdout = sys.stdout
sys.stdout = captured_output = StringIO()
try:
# Simulate command line arguments
main('--number-type', 'real', '--reverse')
# Note: This would normally read from stdin
except SystemExit:
pass # CLI exits normally
# Restore stdout
sys.stdout = old_stdout
output = captured_output.getvalue()from natsort import natsorted, chain_functions, as_utf8
import unicodedata
# Complex text processing for international data
international_files = [
'Résumé_v1.10.pdf',
'résumé_v1.2.pdf',
'NAÏVE_algorithm_v2.1.txt',
'naïve_algorithm_v1.0.txt'
]
# Create a complex processing chain
def normalize_unicode(text):
"""Normalize unicode to standard form."""
return unicodedata.normalize('NFD', text)
def remove_accents(text):
"""Remove accent characters."""
return ''.join(c for c in text if unicodedata.category(c) != 'Mn')
# Chain transformations: decode -> normalize -> remove accents -> lowercase
text_processor = chain_functions([
as_utf8, # Ensure proper string type
normalize_unicode, # Normalize unicode representation
remove_accents, # Remove accent marks
lambda x: x.lower() # Convert to lowercase
])
# Sort using processed text as key
sorted_international = natsorted(international_files, key=text_processor)
print("Original -> Processed key:")
for filename in international_files:
processed = text_processor(filename)
print(f" {filename} -> {processed}")
print(f"\nSorted order: {sorted_international}")from natsort import natsorted, chain_functions
import json
from pathlib import Path
# Simulate a data processing pipeline
def process_log_files(directory):
"""Process log files in natural order."""
# Get all log files
log_files = list(Path(directory).glob('*.log'))
# Create key function for sorting: filename without extension, naturally
filename_key = chain_functions([
lambda x: x.stem, # Get filename without extension
str.lower # Case-insensitive
])
# Sort files naturally
sorted_files = natsorted(log_files, key=filename_key)
results = []
for log_file in sorted_files:
# Process each file (simulated)
file_info = {
'filename': log_file.name,
'size': log_file.stat().st_size if log_file.exists() else 0,
'processed': True
}
results.append(file_info)
return results
# Example usage (would work with real directory)
# results = process_log_files('/var/log/')
# print(json.dumps(results, indent=2))Install with Tessl CLI
npx tessl i tessl/pypi-natsort