or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-data-structures.mddata-manipulation.mdexpression-system.mdfile-io.mdindex.mdmathematical-functions.mdreductions-aggregations.mdrow-operations.mdset-operations.mdstring-operations.mdtime-operations.mdtype-system.md

core-data-structures.mddocs/

0

# Core Data Structures

1

2

The Frame class is datatable's main data structure for representing and manipulating 2-dimensional tabular data with high-performance columnar storage.

3

4

## Capabilities

5

6

### Frame Class

7

8

The primary data structure for tabular data with column-oriented storage, supporting various data types and high-performance operations.

9

10

```python { .api }

11

class Frame:

12

def __init__(self, data=None, *, names=None, stypes=None,

13

stype=None, types=None, type=None):

14

"""

15

Create a new Frame from various data sources.

16

17

Parameters:

18

- data: Data source (dict, list, numpy array, pandas DataFrame, etc.)

19

- names: Column names (list of strings)

20

- stypes: Column storage types (list of stype objects)

21

- stype: Single stype for all columns

22

- types: Alias for stypes

23

- type: Alias for stype

24

"""

25

26

# Properties

27

@property

28

def shape(self) -> tuple:

29

"""(nrows, ncols) tuple describing Frame dimensions"""

30

31

@property

32

def names(self) -> tuple:

33

"""Column names as a tuple of strings"""

34

35

@property

36

def stypes(self) -> tuple:

37

"""Column storage types as tuple of stype objects"""

38

39

@property

40

def ltypes(self) -> tuple:

41

"""Column logical types as tuple of ltype objects"""

42

43

@property

44

def nrows(self) -> int:

45

"""Number of rows"""

46

47

@property

48

def ncols(self) -> int:

49

"""Number of columns"""

50

51

# Data access and manipulation

52

def __getitem__(self, key):

53

"""Select rows and/or columns using various indexing methods"""

54

55

def __setitem__(self, key, value):

56

"""Update or add columns and rows"""

57

58

def __len__(self) -> int:

59

"""Number of rows in the Frame"""

60

61

# Conversion methods

62

def to_pandas(self) -> 'pandas.DataFrame':

63

"""Convert to pandas DataFrame"""

64

65

def to_numpy(self) -> 'numpy.ndarray':

66

"""Convert to numpy array"""

67

68

def to_dict(self) -> dict:

69

"""Convert to dictionary"""

70

71

def to_list(self) -> list:

72

"""Convert to list of lists"""

73

74

def to_csv(self, file=None, **kwargs):

75

"""Write Frame to CSV file or string"""

76

77

# Display methods

78

def head(self, n=10) -> 'Frame':

79

"""Return first n rows"""

80

81

def tail(self, n=10) -> 'Frame':

82

"""Return last n rows"""

83

84

def view(self, start_row=None, end_row=None):

85

"""Display Frame in terminal or notebook"""

86

87

# Statistical methods

88

def describe(self) -> 'Frame':

89

"""Generate descriptive statistics"""

90

91

def nunique(self) -> 'Frame':

92

"""Count unique values in each column"""

93

94

def countna(self) -> 'Frame':

95

"""Count missing values in each column"""

96

97

# Data manipulation

98

def copy(self, deep=True) -> 'Frame':

99

"""Create a copy of the Frame"""

100

101

def delete(self, rows=None, cols=None):

102

"""Delete specified rows and/or columns"""

103

104

def sort(self, *cols) -> 'Frame':

105

"""Sort Frame by specified columns"""

106

107

def unique(self, *cols) -> 'Frame':

108

"""Return unique rows based on specified columns"""

109

110

def group_by(self, *cols):

111

"""Group Frame by specified columns"""

112

113

# Export methods

114

def export_names(self) -> tuple:

115

"""Export column names"""

116

117

def export_stypes(self) -> tuple:

118

"""Export column storage types"""

119

```

120

121

### Frame Creation Examples

122

123

```python

124

import datatable as dt

125

126

# From dictionary

127

DT = dt.Frame({

128

'A': [1, 2, 3, 4, 5],

129

'B': ['a', 'b', 'c', 'd', 'e'],

130

'C': [1.1, 2.2, 3.3, 4.4, 5.5]

131

})

132

133

# From list of lists

134

DT = dt.Frame([[1, 'a', 1.1], [2, 'b', 2.2], [3, 'c', 3.3]],

135

names=['A', 'B', 'C'])

136

137

# From numpy array

138

import numpy as np

139

arr = np.random.rand(1000, 5)

140

DT = dt.Frame(arr)

141

142

# From pandas DataFrame

143

import pandas as pd

144

pdf = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

145

DT = dt.Frame(pdf)

146

147

# Empty Frame with specified structure

148

DT = dt.Frame(names=['A', 'B', 'C'],

149

stypes=[dt.int64, dt.str64, dt.float64])

150

151

# With type specification

152

DT = dt.Frame([1, 2, 3, 4, 5], stype=dt.float32)

153

```

154

155

### Frame Indexing and Selection

156

157

```python

158

# Column selection

159

DT[:, 'A'] # Select column A

160

DT[:, ['A', 'B']] # Select multiple columns

161

DT[:, f.A] # Select using f object

162

DT[:, f[:]] # Select all columns

163

164

# Row selection

165

DT[0, :] # First row

166

DT[0:5, :] # First 5 rows

167

DT[-1, :] # Last row

168

DT[f.A > 2, :] # Conditional selection

169

170

# Combined selection

171

DT[f.A > 2, ['B', 'C']] # Filter rows and select columns

172

DT[0:10, f.A:f.C] # Slice rows and columns

173

174

# Boolean indexing

175

mask = DT[:, f.A > dt.mean(f.A)]

176

DT[mask, :]

177

```

178

179

### Frame Properties and Inspection

180

181

```python

182

# Basic properties

183

print(DT.shape) # (nrows, ncols)

184

print(DT.names) # Column names

185

print(DT.stypes) # Storage types

186

print(DT.nrows) # Number of rows

187

print(DT.ncols) # Number of columns

188

189

# Data inspection

190

DT.head() # First 10 rows

191

DT.tail(5) # Last 5 rows

192

DT.describe() # Summary statistics

193

DT.nunique() # Unique value counts

194

DT.countna() # Missing value counts

195

196

# Display

197

DT.view() # Interactive view

198

print(DT) # String representation

199

```

200

201

## Types

202

203

### Type Objects

204

205

```python { .api }

206

class Type:

207

"""Type system helper for datatable operations"""

208

pass

209

210

class FExpr:

211

"""Expression object representing column operations and transformations"""

212

pass

213

214

class Namespace:

215

"""Namespace object for organizing column references and operations"""

216

pass

217

```