Python package for manipulating 2-dimensional tabular data structures with emphasis on speed and big data support
—
Text processing and manipulation functions for string columns in datatable.
def str.len(x):
"""
String length function.
Parameters:
- x: String column expression
Returns:
Integer column with string lengths
"""
def str.slice(x, start, stop=None):
"""
String slicing function.
Parameters:
- x: String column expression
- start: Starting index
- stop: Ending index (optional)
Returns:
String column with sliced strings
"""
def str.split_into_nhot(x):
"""
Split strings into n-hot encoding.
Parameters:
- x: String column expression
Returns:
Frame with n-hot encoded columns
"""def re.match(x, pattern):
"""
Regular expression matching.
Parameters:
- x: String column expression
- pattern: Regular expression pattern
Returns:
Boolean column indicating matches
"""import datatable as dt
DT = dt.Frame({
'text': ['hello', 'world', 'datatable', 'python'],
'codes': ['ABC-123', 'DEF-456', 'GHI-789', 'JKL-012']
})
# String operations
result = DT[:, dt.update(
text_length=dt.str.len(f.text),
first_3_chars=dt.str.slice(f.text, 0, 3),
last_2_chars=dt.str.slice(f.text, -2),
matches_pattern=dt.re.match(f.codes, r'[A-Z]{3}-\d{3}')
)]Install with Tessl CLI
npx tessl i tessl/pypi-datatable