0
# String and Text Processing
1
2
String manipulation utilities including indentation, formatting, and text processing functions for code and document formatting.
3
4
## Capabilities
5
6
### Text Formatting
7
8
Functions for formatting and manipulating text with proper indentation and whitespace handling.
9
10
```python { .api }
11
def indent(text, prefix=' '):
12
"""
13
Indent text by adding prefix to each line.
14
15
Args:
16
text (str): Text to indent
17
prefix (str): Prefix to add to each line (default: 4 spaces)
18
19
Returns:
20
str: Indented text
21
"""
22
23
def codeblock(text):
24
"""
25
Remove common leading whitespace from text block.
26
Similar to textwrap.dedent but more robust.
27
28
Args:
29
text (str): Text block to dedent
30
31
Returns:
32
str: Text with common indentation removed
33
"""
34
35
def paragraph(text):
36
"""
37
Format text as a single paragraph by removing line breaks.
38
39
Args:
40
text (str): Text to format as paragraph
41
42
Returns:
43
str: Single-line paragraph text
44
"""
45
```
46
47
### Text Concatenation
48
49
Functions for combining and arranging text horizontally and vertically.
50
51
```python { .api }
52
def hzcat(args, sep='', **kwargs):
53
"""
54
Horizontally concatenate strings with alignment options.
55
56
Args:
57
args: List of strings or string-like objects
58
sep (str): Separator between columns
59
**kwargs: Additional formatting options
60
61
Returns:
62
str: Horizontally concatenated text
63
"""
64
```
65
66
### Color and Highlighting
67
68
Text coloring and syntax highlighting utilities.
69
70
```python { .api }
71
def color_text(text, color):
72
"""
73
Color text with ANSI color codes.
74
75
Args:
76
text (str): Text to color
77
color (str): Color name or ANSI code
78
79
Returns:
80
str: Colored text with ANSI codes
81
82
Note:
83
Respects NO_COLOR environment variable
84
"""
85
86
def highlight_code(text, lexer_name='python', **kwargs):
87
"""
88
Syntax highlight code with ANSI colors.
89
90
Args:
91
text (str): Code to highlight
92
lexer_name (str): Language lexer ('python', 'bash', 'json', etc.)
93
**kwargs: Additional highlighting options
94
95
Returns:
96
str: Syntax highlighted text
97
98
Note:
99
Requires pygments package for full functionality
100
Falls back to plain text if pygments unavailable
101
"""
102
103
# Global color control
104
NO_COLOR: bool # Global flag to disable ANSI coloring
105
```
106
107
### Deprecated String Functions
108
109
```python { .api }
110
def ensure_unicode(text):
111
"""
112
Ensure text is unicode string.
113
114
DEPRECATED: Python 3 strings are unicode by default.
115
116
Args:
117
text: Text to convert
118
119
Returns:
120
str: Unicode string
121
"""
122
```
123
124
## Usage Examples
125
126
### Text Indentation and Formatting
127
128
```python
129
import ubelt as ub
130
131
# Basic indentation
132
code = """def hello():
133
print("Hello, World!")
134
return True"""
135
136
indented = ub.indent(code)
137
print("Indented code:")
138
print(indented)
139
# Output:
140
# def hello():
141
# print("Hello, World!")
142
# return True
143
144
# Custom indentation prefix
145
double_indented = ub.indent(code, prefix=' ') # 8 spaces
146
tab_indented = ub.indent(code, prefix='\t') # Tab
147
148
# Remove common indentation
149
messy_code = """ def function():
150
print("hello")
151
if True:
152
return 42"""
153
154
clean_code = ub.codeblock(messy_code)
155
print("Cleaned code:")
156
print(clean_code)
157
# Output:
158
# def function():
159
# print("hello")
160
# if True:
161
# return 42
162
```
163
164
### Code Block Processing
165
166
```python
167
import ubelt as ub
168
169
# Process multi-line string literals
170
def format_docstring(docstring):
171
"""Format a docstring by removing common indentation"""
172
# Remove first and last empty lines
173
lines = docstring.strip().split('\n')
174
if not lines[0].strip():
175
lines = lines[1:]
176
if lines and not lines[-1].strip():
177
lines = lines[:-1]
178
179
# Remove common indentation
180
text = '\n'.join(lines)
181
return ub.codeblock(text)
182
183
example_docstring = """
184
This is a function that does something.
185
186
Args:
187
param1: First parameter
188
param2: Second parameter
189
190
Returns:
191
The result of the operation
192
"""
193
194
formatted = format_docstring(example_docstring)
195
print(formatted)
196
```
197
198
### Paragraph Formatting
199
200
```python
201
import ubelt as ub
202
203
# Convert multi-line text to paragraph
204
long_text = """This is a long piece of text
205
that spans multiple lines
206
but should be formatted
207
as a single paragraph."""
208
209
paragraph_text = ub.paragraph(long_text)
210
print(paragraph_text)
211
# Output: "This is a long piece of text that spans multiple lines but should be formatted as a single paragraph."
212
213
# Useful for documentation formatting
214
def format_description(desc):
215
"""Format multi-line description as paragraph"""
216
return ub.paragraph(ub.codeblock(desc))
217
218
description = """
219
This function performs complex operations
220
on the input data and returns
221
a processed result.
222
"""
223
224
formatted_desc = format_description(description)
225
print(formatted_desc)
226
```
227
228
### Horizontal Text Concatenation
229
230
```python
231
import ubelt as ub
232
233
# Side-by-side text display
234
left_text = """Line 1
235
Line 2
236
Line 3"""
237
238
right_text = """Column A
239
Column B
240
Column C"""
241
242
combined = ub.hzcat([left_text, right_text], sep=' | ')
243
print(combined)
244
# Output:
245
# Line 1 | Column A
246
# Line 2 | Column B
247
# Line 3 | Column C
248
249
# Multiple columns
250
col1 = "A\nB\nC"
251
col2 = "1\n2\n3"
252
col3 = "X\nY\nZ"
253
254
table = ub.hzcat([col1, col2, col3], sep=' ')
255
print(table)
256
# Output:
257
# A 1 X
258
# B 2 Y
259
# C 3 Z
260
```
261
262
### Text Coloring
263
264
```python
265
import ubelt as ub
266
267
# Basic text coloring
268
red_text = ub.color_text("Error: Something went wrong", 'red')
269
green_text = ub.color_text("Success: Operation completed", 'green')
270
blue_text = ub.color_text("Info: Processing data", 'blue')
271
272
print(red_text)
273
print(green_text)
274
print(blue_text)
275
276
# Conditional coloring based on status
277
def status_message(message, status):
278
"""Print colored status message"""
279
color_map = {
280
'error': 'red',
281
'success': 'green',
282
'warning': 'yellow',
283
'info': 'blue'
284
}
285
color = color_map.get(status, 'white')
286
return ub.color_text(f"{status.upper()}: {message}", color)
287
288
print(status_message("File not found", 'error'))
289
print(status_message("Data saved successfully", 'success'))
290
print(status_message("Memory usage high", 'warning'))
291
```
292
293
### Code Syntax Highlighting
294
295
```python
296
import ubelt as ub
297
298
# Highlight Python code
299
python_code = '''
300
def fibonacci(n):
301
if n <= 1:
302
return n
303
return fibonacci(n-1) + fibonacci(n-2)
304
305
# Calculate first 10 fibonacci numbers
306
for i in range(10):
307
print(f"fib({i}) = {fibonacci(i)}")
308
'''
309
310
highlighted = ub.highlight_code(python_code, lexer_name='python')
311
print(highlighted)
312
313
# Highlight other languages
314
json_data = '''
315
{
316
"name": "John Doe",
317
"age": 30,
318
"city": "New York",
319
"hobbies": ["reading", "swimming", "coding"]
320
}
321
'''
322
323
highlighted_json = ub.highlight_code(json_data, lexer_name='json')
324
print(highlighted_json)
325
326
# Bash highlighting
327
bash_script = '''
328
#!/bin/bash
329
for file in *.txt; do
330
echo "Processing $file"
331
wc -l "$file"
332
done
333
'''
334
335
highlighted_bash = ub.highlight_code(bash_script, lexer_name='bash')
336
print(highlighted_bash)
337
```
338
339
### Advanced Text Processing
340
341
```python
342
import ubelt as ub
343
344
# Create formatted code documentation
345
def create_code_doc(title, code, description):
346
"""Create formatted documentation with highlighted code"""
347
348
# Format title
349
title_line = ub.color_text(title, 'blue')
350
separator = '=' * len(title)
351
352
# Clean and highlight code
353
clean_code = ub.codeblock(code)
354
highlighted_code = ub.highlight_code(clean_code, lexer_name='python')
355
356
# Format description
357
desc_paragraph = ub.paragraph(ub.codeblock(description))
358
359
# Combine all parts
360
parts = [
361
title_line,
362
separator,
363
'',
364
desc_paragraph,
365
'',
366
'Code:',
367
highlighted_code,
368
''
369
]
370
371
return '\n'.join(parts)
372
373
# Example usage
374
example_code = '''
375
def quicksort(arr):
376
if len(arr) <= 1:
377
return arr
378
pivot = arr[len(arr) // 2]
379
left = [x for x in arr if x < pivot]
380
middle = [x for x in arr if x == pivot]
381
right = [x for x in arr if x > pivot]
382
return quicksort(left) + middle + quicksort(right)
383
'''
384
385
description = '''
386
This function implements the quicksort algorithm
387
using a divide-and-conquer approach. It selects
388
a pivot element and partitions the array accordingly.
389
'''
390
391
doc = create_code_doc("Quicksort Implementation", example_code, description)
392
print(doc)
393
```
394
395
### Text Layout and Alignment
396
397
```python
398
import ubelt as ub
399
400
# Create aligned columns for data display
401
def create_table(headers, rows):
402
"""Create aligned table from headers and rows"""
403
404
# Convert all data to strings
405
str_headers = [str(h) for h in headers]
406
str_rows = [[str(cell) for cell in row] for row in rows]
407
408
# Calculate column widths
409
all_rows = [str_headers] + str_rows
410
col_widths = []
411
for col_idx in range(len(str_headers)):
412
max_width = max(len(row[col_idx]) for row in all_rows)
413
col_widths.append(max_width)
414
415
# Format rows
416
formatted_rows = []
417
for row in all_rows:
418
padded_cells = []
419
for cell, width in zip(row, col_widths):
420
padded_cells.append(cell.ljust(width))
421
formatted_rows.append(' | '.join(padded_cells))
422
423
# Add separator
424
separator = ' | '.join('-' * width for width in col_widths)
425
result = [formatted_rows[0], separator] + formatted_rows[1:]
426
427
return '\n'.join(result)
428
429
# Example data
430
headers = ['Name', 'Age', 'City']
431
data = [
432
['Alice', 25, 'New York'],
433
['Bob', 30, 'San Francisco'],
434
['Charlie', 35, 'Chicago']
435
]
436
437
table = create_table(headers, data)
438
print(table)
439
```