Tessl Tile for pypi/boltons@24.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

additional-utilities.md caching.md data-structures.md development-debugging-tools.md file-io-operations.md format-table-utilities.md index.md iteration-processing.md math-stats-operations.md network-url-handling.md string-text-processing.md time-date-utilities.md

string-text-processing.mddocs/

0
# String & Text Processing
1

2
Comprehensive text manipulation including case conversion, slugification, text formatting, HTML processing, ANSI handling, compression, and advanced string operations with internationalization support. Provides utilities for common text processing tasks with robust encoding and formatting capabilities.
3

4
## Capabilities
5

6
### Case Conversion
7

8
Convert between different string case formats.
9

10
```python { .api }
11
def camel2under(camel_string):
12
    """
13
    Convert CamelCase to under_score.
14
    
15
    Parameters:
16
    - camel_string (str): CamelCase string to convert
17
    
18
    Returns:
19
    str: Converted under_score string
20
    """
21

22
def under2camel(under_string):
23
    """
24
    Convert under_score to CamelCase.
25
    
26
    Parameters:
27
    - under_string (str): under_score string to convert
28
    
29
    Returns:
30
    str: Converted CamelCase string
31
    """
32
```
33

34
### Text Slugification
35

36
Convert text to URL-safe slugs and identifiers.
37

38
```python { .api }
39
def slugify(text, delim='_', lower=True, ascii=False):
40
    """
41
    Convert text to URL-safe slug.
42
    
43
    Parameters:
44
    - text (str): Text to slugify
45
    - delim (str): Delimiter character (default: '_')
46
    - lower (bool): Convert to lowercase (default: True)
47
    - ascii (bool): Force ASCII output (default: False)
48
    
49
    Returns:
50
    str: URL-safe slug
51
    """
52

53
def a10n(string):
54
    """
55
    Create internationalization-style abbreviation (a11y, i18n, etc.).
56
    
57
    Parameters:
58
    - string (str): String to abbreviate
59
    
60
    Returns:
61
    str: Abbreviated form (first + count + last)
62
    """
63
```
64

65
### Text Formatting and Manipulation
66

67
Advanced text processing and formatting utilities.
68

69
```python { .api }
70
def split_punct_ws(text):
71
    """
72
    Split text on punctuation and whitespace.
73
    
74
    Parameters:
75
    - text (str): Text to split
76
    
77
    Returns:
78
    list: List of text segments
79
    """
80

81
def unit_len(sized_iterable, unit_noun='item'):
82
    """
83
    Format count with unit noun.
84
    
85
    Parameters:
86
    - sized_iterable: Iterable with __len__
87
    - unit_noun (str): Singular noun for the unit
88
    
89
    Returns:
90
    str: Formatted count with proper pluralization
91
    """
92

93
def ordinalize(number, ext_only=False):
94
    """
95
    Convert number to ordinal (1st, 2nd, etc.).
96
    
97
    Parameters:
98
    - number (int): Number to convert
99
    - ext_only (bool): Return only the suffix (default: False)
100
    
101
    Returns:
102
    str: Ordinal number or suffix
103
    """
104

105
def cardinalize(unit_noun, count):
106
    """
107
    Pluralize unit noun based on count.
108
    
109
    Parameters:
110
    - unit_noun (str): Singular noun
111
    - count (int): Count to determine pluralization
112
    
113
    Returns:
114
    str: Properly pluralized noun
115
    """
116

117
def singularize(word):
118
    """
119
    Convert plural word to singular form.
120
    
121
    Parameters:
122
    - word (str): Plural word
123
    
124
    Returns:
125
    str: Singular form
126
    """
127

128
def pluralize(word):
129
    """
130
    Convert singular word to plural form.
131
    
132
    Parameters:
133
    - word (str): Singular word
134
    
135
    Returns:
136
    str: Plural form
137
    """
138
```
139

140
### Text Analysis and Extraction
141

142
Extract and analyze text content.
143

144
```python { .api }
145
def find_hashtags(string):
146
    """
147
    Extract hashtags from text.
148
    
149
    Parameters:
150
    - string (str): Text containing hashtags
151
    
152
    Returns:
153
    list: List of hashtag strings (including #)
154
    """
155

156
def is_uuid(string):
157
    """
158
    Check if string is valid UUID format.
159
    
160
    Parameters:
161
    - string (str): String to check
162
    
163
    Returns:
164
    bool: True if valid UUID format
165
    """
166

167
def is_ascii(text):
168
    """
169
    Check if text contains only ASCII characters.
170
    
171
    Parameters:
172
    - text (str): Text to check
173
    
174
    Returns:
175
    bool: True if text is ASCII-only
176
    """
177
```
178

179
### Text Cleaning and Normalization
180

181
Clean and normalize text content.
182

183
```python { .api }
184
def strip_ansi(text):
185
    """
186
    Remove ANSI escape sequences from text.
187
    
188
    Parameters:
189
    - text (str): Text with ANSI sequences
190
    
191
    Returns:
192
    str: Text with ANSI sequences removed
193
    """
194

195
def asciify(text, ignore=False):
196
    """
197
    Convert text to ASCII by removing diacritics.
198
    
199
    Parameters:
200
    - text (str): Text to convert
201
    - ignore (bool): Ignore non-convertible characters
202
    
203
    Returns:
204
    str: ASCII-compatible text
205
    """
206

207
def unwrap_text(text, **kwargs):
208
    """
209
    Unwrap text by removing line breaks.
210
    
211
    Parameters:
212
    - text (str): Text to unwrap
213
    
214
    Returns:
215
    str: Text with line breaks removed appropriately
216
    """
217

218
def indent(text, prefix):
219
    """
220
    Indent text lines with prefix.
221
    
222
    Parameters:
223
    - text (str): Text to indent  
224
    - prefix (str): Prefix to add to each line
225
    
226
    Returns:
227
    str: Indented text
228
    """
229
```
230

231
### HTML Processing
232

233
Extract and process HTML content.
234

235
```python { .api }
236
def html2text(html_text):
237
    """
238
    Extract plain text from HTML string.
239
    
240
    Parameters:
241
    - html_text (str): HTML content
242
    
243
    Returns:
244
    str: Plain text content
245
    """
246

247
class HTMLTextExtractor(HTMLParser):
248
    """Extract plain text from HTML."""
249
    def __init__(self): ...
250
    def handle_data(self, data): ...
251
    def get_text(self): ...
252
```
253

254
### Data Formatting
255

256
Format data for human consumption.
257

258
```python { .api }
259
def bytes2human(nbytes, ndigits=0):
260
    """
261
    Convert bytes to human readable format.
262
    
263
    Parameters:
264
    - nbytes (int): Number of bytes
265
    - ndigits (int): Number of decimal places
266
    
267
    Returns:
268
    str: Human readable size (e.g., "1.5 MB")
269
    """
270
```
271

272
### Compression
273

274
Text compression and decompression utilities.
275

276
```python { .api }
277
def gunzip_bytes(data):
278
    """
279
    Decompress gzip bytes.
280
    
281
    Parameters:
282
    - data (bytes): Gzipped data
283
    
284
    Returns:
285
    bytes: Decompressed data
286
    """
287

288
def gzip_bytes(data):
289
    """
290
    Compress data to gzip bytes.
291
    
292
    Parameters:
293
    - data (bytes): Data to compress
294
    
295
    Returns:
296
    bytes: Gzipped data
297
    """
298
```
299

300
### String Replacement
301

302
Efficient multiple string replacement operations.
303

304
```python { .api }
305
def multi_replace(input_string, sub_map, **kwargs):
306
    """
307
    Efficient multiple string replacement.
308
    
309
    Parameters:
310
    - input_string (str): String to process
311
    - sub_map (dict): Mapping of old -> new strings
312
    
313
    Returns:
314
    str: String with all replacements made
315
    """
316

317
class MultiReplace:
318
    """Efficient multiple string replacement."""
319
    def __init__(self, sub_map): ...
320
    def __call__(self, input_string): ...
321
```
322

323
### Shell Command Processing
324

325
Escape and format shell command arguments.
326

327
```python { .api }
328
def escape_shell_args(args, sep=' ', style=None):
329
    """
330
    Escape shell command arguments.
331
    
332
    Parameters:
333
    - args (list): List of arguments
334
    - sep (str): Separator between arguments
335
    - style (str): Shell style ('sh', 'cmd', etc.)
336
    
337
    Returns:
338
    str: Escaped shell command string
339
    """
340

341
def args2sh(args, sep=' '):
342
    """
343
    Convert args to shell-escaped string.
344
    
345
    Parameters:
346
    - args (list): List of arguments
347
    - sep (str): Separator between arguments
348
    
349
    Returns:
350
    str: Shell-escaped command string
351
    """
352

353
def args2cmd(args, sep=' '):
354
    """
355
    Convert args to cmd.exe-escaped string.
356
    
357
    Parameters:
358
    - args (list): List of arguments
359
    - sep (str): Separator between arguments
360
    
361
    Returns:
362
    str: CMD-escaped command string
363
    """
364
```
365

366
### Integer List Processing
367

368
Parse and format integer ranges and lists.
369

370
```python { .api }
371
def parse_int_list(range_string, **kwargs):
372
    """
373
    Parse integer ranges from string.
374
    
375
    Parameters:
376
    - range_string (str): String like "1-5,7,9-12"
377
    
378
    Returns:
379
    list: List of integers
380
    """
381

382
def format_int_list(int_list, **kwargs):
383
    """
384
    Format integer list as range string.
385
    
386
    Parameters:
387
    - int_list (list): List of integers
388
    
389
    Returns:
390
    str: Formatted range string
391
    """
392

393
def complement_int_list(range_string, **kwargs):
394
    """
395
    Get complement of integer ranges.
396
    
397
    Parameters:
398
    - range_string (str): Range string to complement
399
    
400
    Returns:
401
    str: Complement range string
402
    """
403

404
def int_ranges_from_int_list(int_list):
405
    """
406
    Convert integer list to ranges.
407
    
408
    Parameters:
409
    - int_list (list): List of integers
410
    
411
    Returns:
412
    list: List of (start, end) tuples
413
    """
414
```
415

416
### Memory-Efficient Text Processing
417

418
Process large text files efficiently.
419

420
```python { .api }
421
def iter_splitlines(text):
422
    """
423
    Memory-efficient line iteration.
424
    
425
    Parameters:
426
    - text (str): Text to split into lines
427
    
428
    Yields:
429
    str: Each line
430
    """
431
```
432

433
## Usage Examples
434

435
```python
436
from boltons.strutils import (
437
    slugify, camel2under, under2camel, bytes2human,
438
    strip_ansi, html2text, multi_replace, find_hashtags
439
)
440

441
# Create URL-friendly slugs
442
title = "Hello, World! This is a test."
443
slug = slugify(title)
444
print(slug)  # "hello-world-this-is-a-test"
445

446
# Case conversion
447
camel = "myVariableName"
448
under = camel2under(camel)
449
print(under)  # "my_variable_name"
450

451
back_to_camel = under2camel(under)
452
print(back_to_camel)  # "myVariableName"
453

454
# Human-readable byte sizes
455
size = bytes2human(1536)
456
print(size)  # "1.5 KB"
457

458
# Clean ANSI escape sequences
459
ansi_text = "\033[31mRed text\033[0m"
460
clean = strip_ansi(ansi_text)
461
print(clean)  # "Red text"
462

463
# Extract text from HTML
464
html = "<p>Hello <b>world</b>!</p>"
465
text = html2text(html)  
466
print(text)  # "Hello world!"
467

468
# Multiple string replacements
469
text = "Hello world, hello universe"
470
replacements = {"hello": "hi", "world": "earth"}
471
result = multi_replace(text, replacements)
472
print(result)  # "Hi earth, hi universe"
473

474
# Find hashtags in text
475
social_text = "Check out #python and #boltons!"
476
tags = find_hashtags(social_text)
477
print(tags)  # ["#python", "#boltons"]
478
```
479

480
### Advanced Text Processing
481

482
```python
483
from boltons.strutils import (
484
    ordinalize, cardinalize, pluralize, singularize,
485
    parse_int_list, format_int_list, asciify
486
)
487

488
# Number formatting
489
print(ordinalize(1))   # "1st"
490
print(ordinalize(22))  # "22nd"
491
print(ordinalize(103)) # "103rd"
492

493
# Pluralization
494
print(cardinalize("item", 1))  # "1 item"
495
print(cardinalize("item", 5))  # "5 items"
496
print(pluralize("child"))      # "children"  
497
print(singularize("children")) # "child"
498

499
# Integer range processing
500
ranges = "1-5,7,9-12"
501
numbers = parse_int_list(ranges)
502
print(numbers)  # [1, 2, 3, 4, 5, 7, 9, 10, 11, 12]
503

504
formatted = format_int_list([1, 2, 3, 5, 6, 8])
505
print(formatted)  # "1-3,5-6,8"
506

507
# Text normalization
508
accented = "café résumé naïve"
509
ascii_text = asciify(accented)
510
print(ascii_text)  # "cafe resume naive"
511
```
512

513
## Types
514

515
```python { .api }
516
# Character mapping for removing diacritics
517
class DeaccenterDict(dict):
518
    """Dictionary for character deaccenting mappings."""
519
    pass
520

521
# Regular expressions
522
HASHTAG_RE: re.Pattern        # Pattern for matching hashtags
523
ANSI_SEQUENCES: re.Pattern    # Pattern for ANSI escape sequences
524

525
# Character mappings
526
DEACCENT_MAP: dict  # Mapping for removing diacritical marks
527
```

Version

Tile

Files

string-text-processing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

string-text-processing.mddocs/