0
# Core Data Structures
1
2
The Frame class is datatable's main data structure for representing and manipulating 2-dimensional tabular data with high-performance columnar storage.
3
4
## Capabilities
5
6
### Frame Class
7
8
The primary data structure for tabular data with column-oriented storage, supporting various data types and high-performance operations.
9
10
```python { .api }
11
class Frame:
12
def __init__(self, data=None, *, names=None, stypes=None,
13
stype=None, types=None, type=None):
14
"""
15
Create a new Frame from various data sources.
16
17
Parameters:
18
- data: Data source (dict, list, numpy array, pandas DataFrame, etc.)
19
- names: Column names (list of strings)
20
- stypes: Column storage types (list of stype objects)
21
- stype: Single stype for all columns
22
- types: Alias for stypes
23
- type: Alias for stype
24
"""
25
26
# Properties
27
@property
28
def shape(self) -> tuple:
29
"""(nrows, ncols) tuple describing Frame dimensions"""
30
31
@property
32
def names(self) -> tuple:
33
"""Column names as a tuple of strings"""
34
35
@property
36
def stypes(self) -> tuple:
37
"""Column storage types as tuple of stype objects"""
38
39
@property
40
def ltypes(self) -> tuple:
41
"""Column logical types as tuple of ltype objects"""
42
43
@property
44
def nrows(self) -> int:
45
"""Number of rows"""
46
47
@property
48
def ncols(self) -> int:
49
"""Number of columns"""
50
51
# Data access and manipulation
52
def __getitem__(self, key):
53
"""Select rows and/or columns using various indexing methods"""
54
55
def __setitem__(self, key, value):
56
"""Update or add columns and rows"""
57
58
def __len__(self) -> int:
59
"""Number of rows in the Frame"""
60
61
# Conversion methods
62
def to_pandas(self) -> 'pandas.DataFrame':
63
"""Convert to pandas DataFrame"""
64
65
def to_numpy(self) -> 'numpy.ndarray':
66
"""Convert to numpy array"""
67
68
def to_dict(self) -> dict:
69
"""Convert to dictionary"""
70
71
def to_list(self) -> list:
72
"""Convert to list of lists"""
73
74
def to_csv(self, file=None, **kwargs):
75
"""Write Frame to CSV file or string"""
76
77
# Display methods
78
def head(self, n=10) -> 'Frame':
79
"""Return first n rows"""
80
81
def tail(self, n=10) -> 'Frame':
82
"""Return last n rows"""
83
84
def view(self, start_row=None, end_row=None):
85
"""Display Frame in terminal or notebook"""
86
87
# Statistical methods
88
def describe(self) -> 'Frame':
89
"""Generate descriptive statistics"""
90
91
def nunique(self) -> 'Frame':
92
"""Count unique values in each column"""
93
94
def countna(self) -> 'Frame':
95
"""Count missing values in each column"""
96
97
# Data manipulation
98
def copy(self, deep=True) -> 'Frame':
99
"""Create a copy of the Frame"""
100
101
def delete(self, rows=None, cols=None):
102
"""Delete specified rows and/or columns"""
103
104
def sort(self, *cols) -> 'Frame':
105
"""Sort Frame by specified columns"""
106
107
def unique(self, *cols) -> 'Frame':
108
"""Return unique rows based on specified columns"""
109
110
def group_by(self, *cols):
111
"""Group Frame by specified columns"""
112
113
# Export methods
114
def export_names(self) -> tuple:
115
"""Export column names"""
116
117
def export_stypes(self) -> tuple:
118
"""Export column storage types"""
119
```
120
121
### Frame Creation Examples
122
123
```python
124
import datatable as dt
125
126
# From dictionary
127
DT = dt.Frame({
128
'A': [1, 2, 3, 4, 5],
129
'B': ['a', 'b', 'c', 'd', 'e'],
130
'C': [1.1, 2.2, 3.3, 4.4, 5.5]
131
})
132
133
# From list of lists
134
DT = dt.Frame([[1, 'a', 1.1], [2, 'b', 2.2], [3, 'c', 3.3]],
135
names=['A', 'B', 'C'])
136
137
# From numpy array
138
import numpy as np
139
arr = np.random.rand(1000, 5)
140
DT = dt.Frame(arr)
141
142
# From pandas DataFrame
143
import pandas as pd
144
pdf = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
145
DT = dt.Frame(pdf)
146
147
# Empty Frame with specified structure
148
DT = dt.Frame(names=['A', 'B', 'C'],
149
stypes=[dt.int64, dt.str64, dt.float64])
150
151
# With type specification
152
DT = dt.Frame([1, 2, 3, 4, 5], stype=dt.float32)
153
```
154
155
### Frame Indexing and Selection
156
157
```python
158
# Column selection
159
DT[:, 'A'] # Select column A
160
DT[:, ['A', 'B']] # Select multiple columns
161
DT[:, f.A] # Select using f object
162
DT[:, f[:]] # Select all columns
163
164
# Row selection
165
DT[0, :] # First row
166
DT[0:5, :] # First 5 rows
167
DT[-1, :] # Last row
168
DT[f.A > 2, :] # Conditional selection
169
170
# Combined selection
171
DT[f.A > 2, ['B', 'C']] # Filter rows and select columns
172
DT[0:10, f.A:f.C] # Slice rows and columns
173
174
# Boolean indexing
175
mask = DT[:, f.A > dt.mean(f.A)]
176
DT[mask, :]
177
```
178
179
### Frame Properties and Inspection
180
181
```python
182
# Basic properties
183
print(DT.shape) # (nrows, ncols)
184
print(DT.names) # Column names
185
print(DT.stypes) # Storage types
186
print(DT.nrows) # Number of rows
187
print(DT.ncols) # Number of columns
188
189
# Data inspection
190
DT.head() # First 10 rows
191
DT.tail(5) # Last 5 rows
192
DT.describe() # Summary statistics
193
DT.nunique() # Unique value counts
194
DT.countna() # Missing value counts
195
196
# Display
197
DT.view() # Interactive view
198
print(DT) # String representation
199
```
200
201
## Types
202
203
### Type Objects
204
205
```python { .api }
206
class Type:
207
"""Type system helper for datatable operations"""
208
pass
209
210
class FExpr:
211
"""Expression object representing column operations and transformations"""
212
pass
213
214
class Namespace:
215
"""Namespace object for organizing column references and operations"""
216
pass
217
```