or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

code-generation.mddata-structures.mdfilesystem.mdindex.mdruntime-operators.mdtype-system.mdutilities.md

data-structures.mddocs/

0

# Data Structures and Vectorization

1

2

Core data structures for efficient columnar processing, type conversion between internal and external formats, and vectorized operations for high-performance analytics workloads.

3

4

## Capabilities

5

6

### Vectorized Column Access

7

8

Interfaces for accessing data in columnar format, providing efficient memory access patterns and enabling vectorized operations for analytical workloads.

9

10

```java { .api }

11

/**

12

* Base interface for columnar data access

13

*/

14

interface ColumnVector {

15

/** Check if value at given row is null */

16

boolean isNullAt(int rowId);

17

18

/** Get number of rows in this vector */

19

int getLen();

20

}

21

22

/** Columnar access for boolean data */

23

interface BooleanColumnVector extends ColumnVector {

24

boolean getBoolean(int rowId);

25

}

26

27

/** Columnar access for integer data */

28

interface IntColumnVector extends ColumnVector {

29

int getInt(int rowId);

30

}

31

32

/** Columnar access for long data */

33

interface LongColumnVector extends ColumnVector {

34

long getLong(int rowId);

35

}

36

37

/** Columnar access for float data */

38

interface FloatColumnVector extends ColumnVector {

39

float getFloat(int rowId);

40

}

41

42

/** Columnar access for double data */

43

interface DoubleColumnVector extends ColumnVector {

44

double getDouble(int rowId);

45

}

46

47

/** Columnar access for byte array data */

48

interface BytesColumnVector extends ColumnVector {

49

/** Get bytes data at row position */

50

Bytes getBytes(int rowId);

51

52

/** Inner class for byte array handling with offset and length */

53

class Bytes {

54

/** Byte data */

55

public byte[] data;

56

57

/** Offset in data array */

58

public int offset;

59

60

/** Length of data */

61

public int len;

62

63

/** Create bytes wrapper */

64

Bytes(byte[] data, int offset, int len);

65

66

/** Set bytes data */

67

void setByteArray(byte[] data, int offset, int len);

68

}

69

}

70

71

/** Columnar access for decimal data */

72

interface DecimalColumnVector extends ColumnVector {

73

DecimalData getDecimal(int rowId, int precision, int scale);

74

}

75

76

/** Columnar access for timestamp data */

77

interface TimestampColumnVector extends ColumnVector {

78

TimestampData getTimestamp(int rowId, int precision);

79

}

80

81

/** Columnar access for array data */

82

interface ArrayColumnVector extends ColumnVector {

83

ArrayData getArray(int rowId);

84

}

85

86

/** Columnar access for row data */

87

interface RowColumnVector extends ColumnVector {

88

RowData getRow(int rowId);

89

}

90

```

91

92

### Vectorized Column Batch

93

94

Main container for batch columnar processing, organizing multiple column vectors for efficient processing of tabular data.

95

96

```java { .api }

97

/**

98

* Main class for batch columnar processing

99

* Organizes multiple column vectors for efficient tabular data processing

100

*/

101

class VectorizedColumnBatch {

102

/** Column vectors in this batch */

103

public final ColumnVector[] columns;

104

105

/** Default batch size for optimal performance */

106

public static final int DEFAULT_SIZE = 2048;

107

108

/**

109

* Create a new vectorized column batch

110

* @param vectors Array of column vectors to organize

111

*/

112

VectorizedColumnBatch(ColumnVector[] vectors);

113

114

/** Set the number of rows in this batch */

115

void setNumRows(int numRows);

116

117

/** Get the number of rows in this batch */

118

int getNumRows();

119

120

/** Get number of columns (arity) */

121

int getArity();

122

123

/** Check if value is null at given position */

124

boolean isNullAt(int rowId, int colId);

125

126

/** Get boolean value at position */

127

boolean getBoolean(int rowId, int colId);

128

129

/** Get byte value at position */

130

byte getByte(int rowId, int colId);

131

132

/** Get short value at position */

133

short getShort(int rowId, int colId);

134

135

/** Get integer value at position */

136

int getInt(int rowId, int colId);

137

138

/** Get long value at position */

139

long getLong(int rowId, int colId);

140

141

/** Get float value at position */

142

float getFloat(int rowId, int colId);

143

144

/** Get double value at position */

145

double getDouble(int rowId, int colId);

146

147

/** Get string value at position */

148

String getString(int rowId, int colId);

149

150

/** Get decimal value at position */

151

DecimalData getDecimal(int rowId, int colId, int precision, int scale);

152

153

/** Get timestamp value at position */

154

TimestampData getTimestamp(int rowId, int colId, int precision);

155

156

/** Get array value at position */

157

ArrayData getArray(int rowId, int colId);

158

159

/** Get row value at position */

160

RowData getRow(int rowId, int colId);

161

}

162

```

163

164

### Writable Column Vectors

165

166

Mutable column vector implementations that support writing data, used for building columnar data structures.

167

168

```java { .api }

169

/**

170

* Base class for writable column vectors

171

*/

172

abstract class WritableColumnVector implements ColumnVector {

173

/** Set null value at given row */

174

void setNullAt(int rowId);

175

176

/** Set not null at given row */

177

void setNotNullAt(int rowId);

178

179

/** Reset the vector for reuse */

180

abstract void reset();

181

}

182

183

/** Writable boolean vector implementation */

184

class WritableBooleanVector extends WritableColumnVector

185

implements BooleanColumnVector {

186

/** Set boolean value at given row */

187

void setBoolean(int rowId, boolean value);

188

}

189

190

/** Writable integer vector implementation */

191

class WritableIntVector extends WritableColumnVector

192

implements IntColumnVector {

193

/** Set integer value at given row */

194

void setInt(int rowId, int value);

195

}

196

197

/** Writable long vector implementation */

198

class WritableLongVector extends WritableColumnVector

199

implements LongColumnVector {

200

/** Set long value at given row */

201

void setLong(int rowId, long value);

202

}

203

204

/** Writable float vector implementation */

205

class WritableFloatVector extends WritableColumnVector

206

implements FloatColumnVector {

207

/** Set float value at given row */

208

void setFloat(int rowId, float value);

209

}

210

211

/** Writable double vector implementation */

212

class WritableDoubleVector extends WritableColumnVector

213

implements DoubleColumnVector {

214

/** Set double value at given row */

215

void setDouble(int rowId, double value);

216

}

217

218

/** Writable byte vector implementation */

219

class WritableByteVector extends WritableColumnVector

220

implements ByteColumnVector {

221

/** Set byte value at given row */

222

void setByte(int rowId, byte value);

223

}

224

225

/** Writable bytes vector implementation */

226

class WritableBytesVector extends WritableColumnVector

227

implements BytesColumnVector {

228

/** Set bytes value at given row */

229

void setBytes(int rowId, byte[] value);

230

}

231

232

/** Writable timestamp vector implementation */

233

class WritableTimestampVector extends WritableColumnVector

234

implements TimestampColumnVector {

235

/** Set timestamp value at given row */

236

void setTimestamp(int rowId, TimestampData value);

237

}

238

```

239

240

### Heap-based Vector Implementations

241

242

Concrete implementations of column vectors using heap memory storage, providing efficient storage for columnar data.

243

244

```java { .api }

245

/** Heap-based boolean vector */

246

class HeapBooleanVector extends WritableBooleanVector {

247

HeapBooleanVector(int len);

248

}

249

250

/** Heap-based byte vector */

251

class HeapByteVector extends WritableByteVector {

252

HeapByteVector(int len);

253

}

254

255

/** Heap-based integer vector */

256

class HeapIntVector extends WritableIntVector {

257

HeapIntVector(int len);

258

}

259

260

/** Heap-based long vector */

261

class HeapLongVector extends WritableLongVector {

262

HeapLongVector(int len);

263

}

264

265

/** Heap-based float vector */

266

class HeapFloatVector extends WritableFloatVector {

267

HeapFloatVector(int len);

268

}

269

270

/** Heap-based double vector */

271

class HeapDoubleVector extends WritableDoubleVector {

272

HeapDoubleVector(int len);

273

}

274

275

/** Heap-based bytes vector */

276

class HeapBytesVector extends WritableBytesVector {

277

HeapBytesVector(int len);

278

}

279

280

/** Heap-based timestamp vector */

281

class HeapTimestampVector extends WritableTimestampVector {

282

HeapTimestampVector(int len);

283

}

284

```

285

286

### Data Structure Conversion

287

288

Framework for converting between Flink's internal data representations and external formats, enabling interoperability with various data processing systems.

289

290

```java { .api }

291

/**

292

* Key interface for converting between internal and external data formats

293

* Enables interoperability between Flink's internal representations and external systems

294

*/

295

interface DataStructureConverter<I, E> {

296

/** Convert from internal format to external format */

297

E toExternal(I internal);

298

299

/** Convert from external format to internal format */

300

I toInternal(E external);

301

}

302

```

303

304

### Array Converters

305

306

Specialized converters for array data types, handling conversion between internal array representations and external array formats.

307

308

```java { .api }

309

/** Convert boolean arrays */

310

class ArrayBooleanArrayConverter implements DataStructureConverter<ArrayData, boolean[]> {

311

ArrayBooleanArrayConverter();

312

}

313

314

/** Convert integer arrays */

315

class ArrayIntArrayConverter implements DataStructureConverter<ArrayData, int[]> {

316

ArrayIntArrayConverter();

317

}

318

319

/** Convert long arrays */

320

class ArrayLongArrayConverter implements DataStructureConverter<ArrayData, long[]> {

321

ArrayLongArrayConverter();

322

}

323

324

/** Convert float arrays */

325

class ArrayFloatArrayConverter implements DataStructureConverter<ArrayData, float[]> {

326

ArrayFloatArrayConverter();

327

}

328

329

/** Convert double arrays */

330

class ArrayDoubleArrayConverter implements DataStructureConverter<ArrayData, double[]> {

331

ArrayDoubleArrayConverter();

332

}

333

334

/** Convert string arrays */

335

class ArrayStringArrayConverter implements DataStructureConverter<ArrayData, String[]> {

336

ArrayStringArrayConverter();

337

}

338

```

339

340

### Collection Converters

341

342

Converters for common Java collection types, enabling seamless integration with standard Java data structures.

343

344

```java { .api }

345

/** Convert to/from ArrayList */

346

class ArrayListConverter<E> implements DataStructureConverter<ArrayData, ArrayList<E>> {

347

ArrayListConverter(DataStructureConverter<Object, E> elementConverter);

348

}

349

350

/** Convert to/from Map structures */

351

class MapMapConverter<K, V> implements DataStructureConverter<MapData, Map<K, V>> {

352

MapMapConverter(

353

DataStructureConverter<Object, K> keyConverter,

354

DataStructureConverter<Object, V> valueConverter

355

);

356

}

357

```

358

359

### Temporal Data Converters

360

361

Specialized converters for date, time, and timestamp data, handling conversion between internal temporal representations and Java temporal types.

362

363

```java { .api }

364

/** Convert date data */

365

class DateDateConverter implements DataStructureConverter<Integer, Date> {

366

DateDateConverter();

367

}

368

369

/** Convert time data */

370

class TimeTimeConverter implements DataStructureConverter<Integer, Time> {

371

TimeTimeConverter();

372

}

373

374

/** Convert timestamp data */

375

class TimestampTimestampConverter implements DataStructureConverter<TimestampData, Timestamp> {

376

TimestampTimestampConverter(int precision);

377

}

378

```

379

380

### Special Converters

381

382

Converters for complex data types including raw objects, structured objects, and row data.

383

384

```java { .api }

385

/** Convert raw objects */

386

class RawObjectConverter<T> implements DataStructureConverter<RawValueData<T>, T> {

387

RawObjectConverter(Class<T> clazz);

388

}

389

390

/** Convert structured objects */

391

class StructuredObjectConverter<T> implements DataStructureConverter<RowData, T> {

392

StructuredObjectConverter(Class<T> clazz, LogicalType dataType);

393

}

394

395

/** Convert row data */

396

class RowRowConverter implements DataStructureConverter<RowData, Row> {

397

RowRowConverter(LogicalType[] fieldTypes);

398

}

399

```

400

401

### Binary Data Utilities

402

403

Utilities for efficient manipulation of binary data representations, optimized for high-performance data processing.

404

405

```java { .api }

406

/** Utilities for binary row data manipulation */

407

class BinaryRowDataUtil {

408

/** Check if two binary rows are equal */

409

static boolean equals(BinaryRowData row1, BinaryRowData row2);

410

411

/** Get hash code for binary row data */

412

static int hashCode(BinaryRowData row);

413

414

/** Copy binary row data */

415

static BinaryRowData copy(BinaryRowData source);

416

}

417

418

/** Binary string data utilities */

419

class BinaryStringDataUtil {

420

/** Create binary string from Java string */

421

static BinaryStringData fromString(String str);

422

423

/** Convert binary string to Java string */

424

static String toString(BinaryStringData binaryString);

425

426

/** Compare two binary strings */

427

static int compare(BinaryStringData str1, BinaryStringData str2);

428

}

429

430

/** Interface for binary data writing */

431

interface BinaryWriter {

432

/** Write binary data */

433

void writeBytes(byte[] bytes);

434

435

/** Complete the write operation */

436

void complete();

437

}

438

439

/** Wrapper for row data with boxed primitives */

440

class BoxedWrapperRowData implements RowData {

441

BoxedWrapperRowData(RowData row);

442

443

/** Get field value as boxed primitive */

444

Object get(int pos);

445

}

446

```

447

448

### Dictionary Encoding

449

450

Interface for dictionary encoding support, enabling compressed storage and efficient processing of categorical data.

451

452

```java { .api }

453

/**

454

* Interface for dictionary encoding

455

* Enables compressed storage of categorical data

456

*/

457

interface Dictionary {

458

/** Decode dictionary encoded value */

459

Object decodeToBinary(int id);

460

461

/** Get the size of the dictionary */

462

int size();

463

}

464

```

465

466

## Usage Examples

467

468

```java

469

// Create column vectors

470

ColumnVector[] vectors = new ColumnVector[3];

471

vectors[0] = new HeapIntVector(1024);

472

vectors[1] = new HeapDoubleVector(1024);

473

vectors[2] = new HeapBytesVector(1024);

474

475

// Create a vectorized batch for processing

476

VectorizedColumnBatch batch = new VectorizedColumnBatch(vectors);

477

478

// Write data to the batch

479

WritableIntVector intCol = (WritableIntVector) batch.columns[0];

480

WritableDoubleVector doubleCol = (WritableDoubleVector) batch.columns[1];

481

WritableBytesVector bytesCol = (WritableBytesVector) batch.columns[2];

482

483

for (int i = 0; i < 100; i++) {

484

intCol.setInt(i, i * 10);

485

doubleCol.setDouble(i, i * 3.14);

486

bytesCol.setBytes(i, ("value_" + i).getBytes());

487

}

488

batch.setNumRows(100);

489

490

// Read data from the batch

491

for (int i = 0; i < batch.getNumRows(); i++) {

492

int intValue = batch.getInt(i, 0);

493

double doubleValue = batch.getDouble(i, 1);

494

String stringValue = batch.getString(i, 2);

495

}

496

497

// Convert between data formats

498

DataStructureConverter<ArrayData, int[]> arrayConverter =

499

new ArrayIntArrayConverter();

500

501

int[] externalArray = arrayConverter.toExternal(internalArrayData);

502

ArrayData internalArray = arrayConverter.toInternal(externalArray);

503

```