Tessl Tile for pypi/datatable@1.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-data-structures.md data-manipulation.md expression-system.md file-io.md index.md mathematical-functions.md reductions-aggregations.md row-operations.md set-operations.md string-operations.md time-operations.md type-system.md

type-system.mddocs/

0
# Type System
1

2
Comprehensive type system with storage types (stype) and logical types (ltype) for precise data type control and efficient memory usage.
3

4
## Capabilities
5

6
### Storage Types (stype)
7

8
```python { .api }
9
class stype(Enum):
10
    """Storage type enumeration for precise memory layout control"""
11
    
12
    void = 0       # No data
13
    bool8 = 1      # 8-bit boolean
14
    int8 = 2       # 8-bit signed integer
15
    int16 = 3      # 16-bit signed integer  
16
    int32 = 4      # 32-bit signed integer
17
    int64 = 5      # 64-bit signed integer
18
    float32 = 6    # 32-bit floating point
19
    float64 = 7    # 64-bit floating point
20
    str32 = 11     # String with 32-bit offsets
21
    str64 = 12     # String with 64-bit offsets
22
    arr32 = 13     # Array with 32-bit offsets
23
    arr64 = 14     # Array with 64-bit offsets
24
    date32 = 17    # Date (days since epoch)
25
    time64 = 18    # Timestamp (nanoseconds since epoch)
26
    obj64 = 21     # Python object references
27
    cat8 = 22      # Categorical with 8-bit codes
28
    cat16 = 23     # Categorical with 16-bit codes
29
    cat32 = 24     # Categorical with 32-bit codes
30
    
31
    @property
32
    def code(self) -> str:
33
        """Two-character string representation"""
34
    
35
    @property
36
    def ltype(self) -> 'ltype':
37
        """Corresponding logical type"""
38
    
39
    @property
40
    def ctype(self):
41
        """ctypes class for C-level type"""
42
    
43
    @property
44
    def dtype(self):
45
        """numpy.dtype equivalent"""
46
    
47
    @property
48
    def min(self):
49
        """Minimum representable value"""
50
    
51
    @property
52
    def max(self):
53
        """Maximum representable value"""
54
```
55

56
### Logical Types (ltype)
57

58
```python { .api }
59
class ltype(Enum):
60
    """Logical type enumeration for high-level data categories"""
61
    
62
    void = 0       # No data
63
    bool = 1       # Boolean values
64
    int = 2        # Integer values  
65
    real = 3       # Real/floating point values
66
    str = 4        # String/text values
67
    time = 5       # Date/time values
68
    obj = 7        # Object values
69
    invalid = 8    # Invalid/unsupported type
70
    
71
    @property
72
    def stypes(self) -> list:
73
        """List of stypes that represent this ltype"""
74
```
75

76
### Type Conversion
77

78
```python { .api }
79
def as_type(frame_or_column, new_type) -> Frame:
80
    """
81
    Convert frame or column to specified type.
82
    
83
    Parameters:
84
    - frame_or_column: Frame or column expression to convert
85
    - new_type: Target stype, ltype, or Type object
86
    
87
    Returns:
88
    Frame or expression with converted types
89
    """
90

91
class Type:
92
    """Type system helper for datatable operations"""
93
    pass
94

95
def categories(column) -> Frame:
96
    """
97
    Extract category labels from categorical column.
98
    
99
    Parameters:
100
    - column: Categorical column expression
101
    
102
    Returns:
103
    Frame with unique category labels
104
    """
105

106
def codes(column) -> FExpr:
107
    """
108
    Extract category codes from categorical column.
109
    
110
    Parameters:
111
    - column: Categorical column expression
112
    
113
    Returns:
114
    Integer codes for categorical values
115
    """
116
```
117

118
## Type Examples
119

120
### Working with Storage Types
121

122
```python
123
import datatable as dt
124

125
# Create Frame with specific types
126
DT = dt.Frame({
127
    'small_int': [1, 2, 3],
128
    'big_int': [1000000, 2000000, 3000000],
129
    'text': ['a', 'b', 'c'],
130
    'flag': [True, False, True]
131
}, stypes=[dt.int8, dt.int64, dt.str32, dt.bool8])
132

133
# Check types
134
print(DT.stypes)    # (stype.int8, stype.int64, stype.str32, stype.bool8)
135
print(DT.ltypes)    # (ltype.int, ltype.int, ltype.str, ltype.bool)
136

137
# Access type properties
138
print(dt.int8.min, dt.int8.max)        # (-127, 127)
139
print(dt.int64.min, dt.int64.max)      # Large integer bounds
140
print(dt.str32.code)                   # 's4'
141
```
142

143
### Type Conversion Examples
144

145
```python
146
# Convert specific columns
147
DT_converted = DT[:, dt.update(
148
    small_as_big=dt.as_type(f.small_int, dt.int64),
149
    big_as_float=dt.as_type(f.big_int, dt.float64),
150
    text_as_cat=dt.as_type(f.text, dt.cat8)
151
)]
152

153
# Convert entire frame
154
DT_all_float = dt.as_type(DT, dt.float64)
155

156
# Convert with expressions
157
DT_conditional = DT[:, dt.update(
158
    smart_type=dt.ifelse(f.big_int > 1500000,
159
                        dt.as_type(f.big_int, dt.float32),
160
                        dt.as_type(f.big_int, dt.int32))
161
)]
162
```
163

164
### Memory Optimization
165

166
```python
167
# Use smaller types for memory efficiency
168
large_data = dt.Frame({
169
    'id': range(1000000),           # Default int64
170
    'category': ['A'] * 500000 + ['B'] * 500000,  # Default str64
171
    'flag': [True, False] * 500000,  # Default bool8
172
    'small_val': [x % 100 for x in range(1000000)]  # Default int64
173
})
174

175
# Optimize memory usage
176
optimized = large_data[:, dt.update(
177
    id=dt.as_type(f.id, dt.int32),          # Sufficient for 1M records
178
    category=dt.as_type(f.category, dt.cat8), # Categorical for repeated values
179
    small_val=dt.as_type(f.small_val, dt.int8)  # Values 0-99 fit in int8
180
)]
181

182
# Check memory savings
183
print(f"Original stypes: {large_data.stypes}")
184
print(f"Optimized stypes: {optimized.stypes}")
185
```
186

187
### Date and Time Types
188

189
```python
190
# Working with temporal data
191
dates = dt.Frame({
192
    'date_str': ['2023-01-01', '2023-06-15', '2023-12-31'],
193
    'timestamp_str': ['2023-01-01 12:30:45', '2023-06-15 09:15:20', '2023-12-31 23:59:59']
194
})
195

196
# Convert to temporal types
197
temporal = dates[:, dt.update(
198
    date_val=dt.as_type(f.date_str, dt.date32),
199
    timestamp_val=dt.as_type(f.timestamp_str, dt.time64)
200
)]
201

202
# Extract components
203
components = temporal[:, dt.update(
204
    year=dt.time.year(f.timestamp_val),
205
    month=dt.time.month(f.timestamp_val),
206
    day=dt.time.day(f.timestamp_val),
207
    hour=dt.time.hour(f.timestamp_val)
208
)]
209
```
210

211
### String Type Optimization
212

213
```python
214
# Choose appropriate string type based on data size
215
short_strings = dt.Frame({'text': ['a', 'bb', 'ccc']})
216
long_strings = dt.Frame({'text': ['very long string' * 100] * 1000})
217

218
# str32 for smaller datasets/strings
219
short_optimized = dt.as_type(short_strings, {'text': dt.str32})
220

221
# str64 for larger datasets/strings  
222
long_optimized = dt.as_type(long_strings, {'text': dt.str64})
223

224
# Check string properties
225
print(f"str32 supports up to {2**31-1} characters")
226
print(f"str64 supports up to {2**63-1} characters")
227
```
228

229
### Categorical Types
230

231
```python
232
# Convert repeated strings to categorical
233
categories = dt.Frame({
234
    'color': ['red', 'blue', 'green'] * 10000,
235
    'size': ['small', 'medium', 'large'] * 10000
236
})
237

238
# Use categorical types for memory efficiency
239
categorical = categories[:, dt.update(
240
    color_cat=dt.as_type(f.color, dt.cat8),    # Up to 255 categories
241
    size_cat=dt.as_type(f.size, dt.cat8)
242
)]
243

244
# Access categorical information
245
color_codes = categorical[:, dt.codes(f.color_cat)]
246
color_categories = categorical[:, dt.categories(f.color_cat)]
247
```
248

249
### Type Checking and Validation
250

251
```python
252
def validate_types(frame, expected_types):
253
    """Validate frame has expected types"""
254
    actual_types = frame.stypes
255
    for i, (actual, expected) in enumerate(zip(actual_types, expected_types)):
256
        if actual != expected:
257
            column_name = frame.names[i]
258
            print(f"Column {column_name}: expected {expected}, got {actual}")
259
            return False
260
    return True
261

262
# Usage
263
DT = dt.Frame({'A': [1, 2, 3], 'B': [1.1, 2.2, 3.3]})
264
is_valid = validate_types(DT, [dt.int64, dt.float64])
265
```
266

267
### Automatic Type Detection
268

269
```python
270
# datatable automatically detects appropriate types
271
mixed_data = dt.Frame({
272
    'integers': [1, 2, 3, 4],
273
    'floats': [1.1, 2.2, 3.3, 4.4],
274
    'strings': ['a', 'b', 'c', 'd'],
275
    'booleans': [True, False, True, False],
276
    'mixed_numbers': [1, 2.5, 3, 4.7]  # Will be float64
277
})
278

279
print("Auto-detected types:", mixed_data.stypes)
280

281
# Override auto-detection
282
explicit_types = dt.Frame({
283
    'integers': [1, 2, 3, 4],
284
    'floats': [1.1, 2.2, 3.3, 4.4]
285
}, stypes=[dt.int32, dt.float32])
286
```
287

288
### Type Compatibility and Coercion
289

290
```python
291
# Type promotion in operations
292
int_col = dt.Frame({'x': [1, 2, 3]}, stype=dt.int32)
293
float_col = dt.Frame({'y': [1.1, 2.2, 3.3]}, stype=dt.float32)
294

295
# Operations promote to common type
296
combined = dt.cbind(int_col, float_col)
297
result = combined[:, f.x + f.y]  # Result will be float64
298

299
# Explicit control over type promotion
300
result_controlled = combined[:, 
301
    dt.as_type(f.x, dt.float32) + f.y  # Keep as float32
302
]
303
```
304

305
## Type Constants
306

307
The following type constants are available directly from the datatable module:
308

309
```python
310
# Available as dt.typename
311
dt.void, dt.bool8
312
dt.int8, dt.int16, dt.int32, dt.int64
313
dt.float32, dt.float64
314
dt.str32, dt.str64
315
dt.obj64
316
```

Version

Tile

Files

type-system.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

type-system.mddocs/