or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-data-structures.mddata-manipulation.mdexpression-system.mdfile-io.mdindex.mdmathematical-functions.mdreductions-aggregations.mdrow-operations.mdset-operations.mdstring-operations.mdtime-operations.mdtype-system.md

data-manipulation.mddocs/

0

# Data Manipulation

1

2

Comprehensive functions for combining, transforming, and reshaping data frames with high-performance operations.

3

4

## Capabilities

5

6

### Frame Binding Operations

7

8

Combine multiple frames by columns or rows with flexible options for handling mismatched structures.

9

10

```python { .api }

11

def cbind(*frames, force=False) -> Frame:

12

"""

13

Bind frames column-wise (horizontally).

14

15

Parameters:

16

- *frames: Frame objects to bind

17

- force: Force binding even with mismatched row counts

18

19

Returns:

20

Frame with columns from all input frames

21

"""

22

23

def rbind(*frames, force=False, bynames=True) -> Frame:

24

"""

25

Bind frames row-wise (vertically).

26

27

Parameters:

28

- *frames: Frame objects to bind

29

- force: Force binding even with mismatched column types

30

- bynames: Match columns by name (True) or position (False)

31

32

Returns:

33

Frame with rows from all input frames

34

"""

35

```

36

37

### Sorting and Uniqueness

38

39

Sort frames and extract unique values with flexible column specifications.

40

41

```python { .api }

42

def sort(frame, *cols, reverse=False, na_position='first') -> Frame:

43

"""

44

Sort frame by specified columns.

45

46

Parameters:

47

- frame: Frame to sort

48

- *cols: Column expressions or names to sort by

49

- reverse: Sort in descending order

50

- na_position: Position of NA values ('first' or 'last')

51

52

Returns:

53

Sorted Frame

54

"""

55

56

def unique(frame, *cols) -> Frame:

57

"""

58

Return unique rows based on specified columns.

59

60

Parameters:

61

- frame: Frame to process

62

- *cols: Columns to consider for uniqueness (all if none specified)

63

64

Returns:

65

Frame with unique rows

66

"""

67

```

68

69

### Utility Functions

70

71

Helper functions for data transformation and manipulation.

72

73

```python { .api }

74

def repeat(frame, n) -> Frame:

75

"""

76

Repeat frame rows n times.

77

78

Parameters:

79

- frame: Frame to repeat

80

- n: Number of repetitions

81

82

Returns:

83

Frame with repeated rows

84

"""

85

86

def shift(column, n=1) -> FExpr:

87

"""

88

Shift column values by n positions.

89

90

Parameters:

91

- column: Column expression to shift

92

- n: Number of positions to shift (positive=down, negative=up)

93

94

Returns:

95

Expression with shifted values

96

"""

97

98

def fillna(column, value) -> FExpr:

99

"""

100

Fill missing values in column with specified value.

101

102

Parameters:

103

- column: Column expression with missing values

104

- value: Value to use for filling NAs

105

106

Returns:

107

Expression with filled values

108

"""

109

110

def ifelse(condition, true_value, false_value) -> FExpr:

111

"""

112

Conditional expression returning different values based on condition.

113

114

Parameters:

115

- condition: Boolean expression

116

- true_value: Value when condition is True

117

- false_value: Value when condition is False

118

119

Returns:

120

Expression with conditional values

121

"""

122

```

123

124

### Type Conversion

125

126

Convert between different data types with explicit control over the conversion process.

127

128

```python { .api }

129

def as_type(frame_or_column, new_type) -> Frame:

130

"""

131

Convert frame or column to specified type.

132

133

Parameters:

134

- frame_or_column: Frame or column expression to convert

135

- new_type: Target stype or Type object

136

137

Returns:

138

Frame or expression with converted types

139

"""

140

141

def update(**kwargs) -> UpdateExpr:

142

"""

143

Create update specification for adding or modifying columns.

144

145

Parameters:

146

- **kwargs: column_name=expression pairs

147

148

Returns:

149

Update expression for use in Frame operations

150

"""

151

152

def cut(column, bins, right=True, labels=None) -> FExpr:

153

"""

154

Bin values into discrete intervals.

155

156

Parameters:

157

- column: Column expression to bin

158

- bins: Number of bins or sequence of bin edges

159

- right: Include right edge of intervals

160

- labels: Labels for bins

161

162

Returns:

163

Categorical column with binned values

164

"""

165

166

def qcut(column, q, labels=None) -> FExpr:

167

"""

168

Quantile-based binning of values.

169

170

Parameters:

171

- column: Column expression to bin

172

- q: Number of quantiles or sequence of quantile boundaries

173

- labels: Labels for bins

174

175

Returns:

176

Categorical column with quantile-based bins

177

"""

178

179

def split_into_nhot(frame, delimiter=",") -> Frame:

180

"""

181

One-hot encoding for delimited string values.

182

183

Parameters:

184

- frame: Frame containing delimited strings

185

- delimiter: Character used to separate values

186

187

Returns:

188

Frame with binary columns for each unique value

189

"""

190

```

191

192

## Data Binding Examples

193

194

### Column Binding

195

196

```python

197

import datatable as dt

198

199

# Create sample frames

200

DT1 = dt.Frame({'A': [1, 2, 3], 'B': [4, 5, 6]})

201

DT2 = dt.Frame({'C': [7, 8, 9], 'D': [10, 11, 12]})

202

DT3 = dt.Frame({'E': [13, 14, 15]})

203

204

# Bind columns

205

result = dt.cbind(DT1, DT2, DT3)

206

# Result: Frame with columns A, B, C, D, E

207

208

# Force binding with mismatched row counts

209

DT4 = dt.Frame({'F': [16, 17]}) # Only 2 rows

210

result = dt.cbind(DT1, DT4, force=True) # Shorter frame is recycled

211

```

212

213

### Row Binding

214

215

```python

216

# Create compatible frames

217

DT1 = dt.Frame({'X': [1, 2], 'Y': ['a', 'b']})

218

DT2 = dt.Frame({'X': [3, 4], 'Y': ['c', 'd']})

219

DT3 = dt.Frame({'X': [5, 6], 'Y': ['e', 'f']})

220

221

# Bind rows

222

result = dt.rbind(DT1, DT2, DT3)

223

# Result: Frame with 6 rows and columns X, Y

224

225

# Bind with different column orders

226

DT4 = dt.Frame({'Y': ['g', 'h'], 'X': [7, 8]})

227

result = dt.rbind(DT1, DT4, bynames=True) # Matches by column names

228

229

# Force binding with type mismatches

230

DT5 = dt.Frame({'X': [1.1, 2.2], 'Y': ['i', 'j']}) # X is float

231

result = dt.rbind(DT1, DT5, force=True) # Forces compatible types

232

```

233

234

## Sorting Examples

235

236

### Basic Sorting

237

238

```python

239

DT = dt.Frame({

240

'A': [3, 1, 4, 1, 5],

241

'B': ['c', 'a', 'd', 'a', 'e'],

242

'C': [3.3, 1.1, 4.4, 1.2, 5.5]

243

})

244

245

# Sort by single column

246

sorted_DT = dt.sort(DT, f.A) # Sort by A ascending

247

sorted_DT = dt.sort(DT, -f.A) # Sort by A descending

248

sorted_DT = dt.sort(DT, f.A, reverse=True) # Alternative descending

249

250

# Sort by multiple columns

251

sorted_DT = dt.sort(DT, f.B, f.A) # Sort by B, then A

252

sorted_DT = dt.sort(DT, f.B, -f.C) # Sort by B asc, C desc

253

254

# Sort with NA handling

255

DT_na = dt.Frame({'X': [3, None, 1, None, 2]})

256

sorted_DT = dt.sort(DT_na, f.X, na_position='last')

257

```

258

259

### Sorting in Frame Operations

260

261

```python

262

# Sort as part of selection

263

result = DT[:, :, dt.sort(f.A)]

264

result = DT[f.A > 2, :, dt.sort(f.B)]

265

266

# Sort within groups

267

result = DT[:, :, dt.sort(f.C), dt.by(f.B)]

268

```

269

270

## Uniqueness Examples

271

272

### Basic Unique Operations

273

274

```python

275

DT = dt.Frame({

276

'A': [1, 2, 2, 3, 3, 3],

277

'B': ['x', 'y', 'y', 'z', 'z', 'w'],

278

'C': [1.1, 2.2, 2.2, 3.3, 3.4, 3.5]

279

})

280

281

# Unique rows (all columns)

282

unique_DT = dt.unique(DT)

283

284

# Unique based on specific columns

285

unique_DT = dt.unique(DT, f.A) # Unique values of A

286

unique_DT = dt.unique(DT, f.A, f.B) # Unique combinations of A and B

287

288

# Unique in Frame operations

289

result = DT[:, :, dt.unique(f.A)]

290

```

291

292

## Transformation Examples

293

294

### Conditional Logic

295

296

```python

297

DT = dt.Frame({

298

'score': [85, 92, 78, 95, 67],

299

'category': ['A', 'B', 'A', 'B', 'C']

300

})

301

302

# Simple conditional

303

result = DT[:, dt.update(

304

grade=dt.ifelse(f.score >= 90, "A", "B")

305

)]

306

307

# Nested conditionals

308

result = DT[:, dt.update(

309

grade=dt.ifelse(f.score >= 90, "A",

310

dt.ifelse(f.score >= 80, "B",

311

dt.ifelse(f.score >= 70, "C", "F")))

312

)]

313

314

# Conditional aggregation

315

result = DT[:, dt.sum(dt.ifelse(f.score >= 80, 1, 0)), dt.by(f.category)]

316

```

317

318

### Missing Value Handling

319

320

```python

321

DT = dt.Frame({

322

'A': [1, None, 3, None, 5],

323

'B': [1.1, 2.2, None, 4.4, None]

324

})

325

326

# Fill missing values

327

result = DT[:, dt.update(

328

A_filled=dt.fillna(f.A, 0),

329

B_filled=dt.fillna(f.B, dt.mean(f.B))

330

)]

331

332

# Forward fill

333

result = DT[:, dt.update(

334

A_ffill=dt.fillna(f.A, dt.shift(f.A, 1))

335

)]

336

337

# Conditional filling

338

result = DT[:, dt.update(

339

A_smart=dt.ifelse(dt.isna(f.A), dt.mean(f.A), f.A)

340

)]

341

```

342

343

### Data Shifting

344

345

```python

346

DT = dt.Frame({

347

'date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04'],

348

'value': [10, 15, 12, 18]

349

})

350

351

# Lag values (shift down)

352

result = DT[:, dt.update(

353

prev_value=dt.shift(f.value, 1), # Previous value

354

prev2_value=dt.shift(f.value, 2) # Value 2 periods ago

355

)]

356

357

# Lead values (shift up)

358

result = DT[:, dt.update(

359

next_value=dt.shift(f.value, -1), # Next value

360

next2_value=dt.shift(f.value, -2) # Value 2 periods ahead

361

)]

362

363

# Calculate differences

364

result = DT[:, dt.update(

365

diff=f.value - dt.shift(f.value, 1),

366

pct_change=((f.value - dt.shift(f.value, 1)) / dt.shift(f.value, 1)) * 100

367

)]

368

```

369

370

### Repetition and Expansion

371

372

```python

373

DT = dt.Frame({'A': [1, 2], 'B': ['x', 'y']})

374

375

# Repeat entire frame

376

repeated = dt.repeat(DT, 3) # 6 rows total

377

378

# Repeat with expressions

379

result = DT[:, dt.repeat(f.A, 2)] # Each value repeated twice

380

381

# Create expanding sequences

382

base = dt.Frame({'seq': [1]})

383

expanded = dt.repeat(base, 5)[:, dt.update(seq=range(1, 6))]

384

```

385

386

## Type Conversion Examples

387

388

### Basic Type Conversion

389

390

```python

391

DT = dt.Frame({

392

'A': [1, 2, 3], # int64 by default

393

'B': [1.1, 2.2, 3.3], # float64 by default

394

'C': ['1', '2', '3'] # str64

395

})

396

397

# Convert single column

398

result = DT[:, dt.update(A_float=dt.as_type(f.A, dt.float32))]

399

400

# Convert multiple columns

401

result = DT[:, dt.update(

402

A_str=dt.as_type(f.A, dt.str32),

403

C_int=dt.as_type(f.C, dt.int32)

404

)]

405

406

# Convert entire frame

407

DT_float = dt.as_type(DT, dt.float64)

408

```

409

410

### Advanced Type Operations

411

412

```python

413

# Conditional type conversion

414

result = DT[:, dt.update(

415

A_converted=dt.ifelse(f.A > 2,

416

dt.as_type(f.A, dt.float32),

417

dt.as_type(f.A, dt.int32))

418

)]

419

420

# Safe conversion with error handling

421

try:

422

result = DT[:, dt.update(C_numeric=dt.as_type(f.C, dt.float64))]

423

except dt.exceptions.TypeError as e:

424

# Handle conversion errors

425

result = DT[:, dt.update(C_numeric=dt.fillna(dt.as_type(f.C, dt.float64), 0))]

426

```