or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

authentication.mdcore-database.mddata-science.mddata-types.mderror-handling.mdindex.mdmetadata.md

data-types.mddocs/

0

# Data Types and Type Conversion

1

2

Comprehensive support for PostgreSQL and Redshift data types with Python object mapping, including arrays, JSON, geometric types, and date/time handling. The redshift_connector provides seamless conversion between database types and Python objects for efficient data processing.

3

4

## Capabilities

5

6

### DB-API 2.0 Type Constructors

7

8

Standard DB-API 2.0 constructor functions for creating database-compatible objects from Python values.

9

10

```python { .api }

11

def Date(year: int, month: int, day: int) -> date:

12

"""

13

Construct an object holding a date value.

14

15

Parameters:

16

- year: Year value

17

- month: Month value (1-12)

18

- day: Day value (1-31)

19

20

Returns:

21

datetime.date object

22

23

This function is part of the DB-API 2.0 specification.

24

"""

25

26

def Time(hour: int, minute: int, second: int) -> time:

27

"""

28

Construct an object holding a time value.

29

30

Parameters:

31

- hour: Hour value (0-23)

32

- minute: Minute value (0-59)

33

- second: Second value (0-59)

34

35

Returns:

36

datetime.time object

37

38

This function is part of the DB-API 2.0 specification.

39

"""

40

41

def Timestamp(year: int, month: int, day: int, hour: int, minute: int, second: int) -> datetime:

42

"""

43

Construct an object holding a timestamp value.

44

45

Parameters:

46

- year: Year value

47

- month: Month value (1-12)

48

- day: Day value (1-31)

49

- hour: Hour value (0-23)

50

- minute: Minute value (0-59)

51

- second: Second value (0-59)

52

53

Returns:

54

datetime.datetime object

55

56

This function is part of the DB-API 2.0 specification.

57

"""

58

59

def DateFromTicks(ticks: float) -> date:

60

"""

61

Construct an object holding a date value from the given ticks value

62

(number of seconds since the epoch).

63

64

Parameters:

65

- ticks: Seconds since Unix epoch

66

67

Returns:

68

datetime.date object

69

70

This function is part of the DB-API 2.0 specification.

71

"""

72

73

def TimeFromTicks(ticks: float) -> time:

74

"""

75

Construct an object holding a time value from the given ticks value

76

(number of seconds since the epoch).

77

78

Parameters:

79

- ticks: Seconds since Unix epoch

80

81

Returns:

82

datetime.time object

83

84

This function is part of the DB-API 2.0 specification.

85

"""

86

87

def TimestampFromTicks(ticks: float) -> datetime:

88

"""

89

Construct an object holding a timestamp value from the given ticks value

90

(number of seconds since the epoch).

91

92

Parameters:

93

- ticks: Seconds since Unix epoch

94

95

Returns:

96

datetime.datetime object

97

98

This function is part of the DB-API 2.0 specification.

99

"""

100

101

def Binary(value: bytes) -> bytes:

102

"""

103

Construct an object holding binary data.

104

105

Parameters:

106

- value: Binary data as bytes

107

108

Returns:

109

bytes object

110

111

This function is part of the DB-API 2.0 specification.

112

"""

113

```

114

115

### PostgreSQL/Redshift Specific Types

116

117

Specialized type classes for PostgreSQL and Redshift specific data types that don't have direct Python equivalents.

118

119

```python { .api }

120

class PGType:

121

"""Base class for PostgreSQL-specific types."""

122

123

def __init__(self, value) -> None:

124

"""Initialize with a value."""

125

self.value = value

126

127

def encode(self, encoding: str) -> bytes:

128

"""Encode the value using specified encoding."""

129

return str(self.value).encode(encoding)

130

131

class PGEnum(PGType):

132

"""

133

PostgreSQL enum type wrapper.

134

135

Handles both string values and Python enum objects.

136

"""

137

138

def __init__(self, value) -> None:

139

"""

140

Initialize with enum value.

141

142

Parameters:

143

- value: String or enum object

144

"""

145

if isinstance(value, str):

146

self.value = value

147

else:

148

self.value = value.value

149

150

class PGJson(PGType):

151

"""

152

PostgreSQL JSON type wrapper.

153

154

Automatically serializes Python objects to JSON strings.

155

"""

156

157

def encode(self, encoding: str) -> bytes:

158

"""Encode the value as JSON using specified encoding."""

159

from json import dumps

160

return dumps(self.value).encode(encoding)

161

162

class PGJsonb(PGType):

163

"""

164

PostgreSQL JSONB type wrapper.

165

166

Automatically serializes Python objects to JSON strings for binary JSON storage.

167

"""

168

169

def encode(self, encoding: str) -> bytes:

170

"""Encode the value as JSON using specified encoding."""

171

from json import dumps

172

return dumps(self.value).encode(encoding)

173

174

class PGTsvector(PGType):

175

"""

176

PostgreSQL text search vector type wrapper.

177

178

Used for full-text search functionality.

179

"""

180

181

class PGVarchar(str):

182

"""

183

PostgreSQL VARCHAR type wrapper.

184

185

Inherits from str but provides type identification for the driver.

186

"""

187

188

class PGText(str):

189

"""

190

PostgreSQL TEXT type wrapper.

191

192

Inherits from str but provides type identification for the driver.

193

"""

194

```

195

196

### Type OID Constants

197

198

Object identifier constants for all supported PostgreSQL and Redshift data types.

199

200

```python { .api }

201

class RedshiftOID(IntEnum):

202

"""Enum containing all PostgreSQL/Redshift type OID constants."""

203

204

# Integer Types

205

BIGINT = 20

206

INTEGER = 23

207

SMALLINT = 21

208

209

# Floating Point Types

210

FLOAT = 701

211

REAL = 700

212

DOUBLE_PRECISION = 701

213

214

# Numeric Types

215

NUMERIC = 1700

216

DECIMAL = 1700

217

218

# Boolean Type

219

BOOLEAN = 16

220

221

# Character Types

222

VARCHAR = 1043

223

CHAR = 1042

224

TEXT = 25

225

BPCHAR = 1042

226

227

# Binary Types

228

BYTES = 17

229

BYTEA = 17

230

231

# Date/Time Types

232

DATE = 1082

233

TIME = 1083

234

TIMETZ = 1266

235

TIMESTAMP = 1114

236

TIMESTAMPTZ = 1184

237

INTERVAL = 1186

238

239

# JSON Types

240

JSON = 114

241

JSONB = 3802

242

243

# Array Types (examples)

244

BIGINT_ARRAY = 1016

245

INTEGER_ARRAY = 1007

246

VARCHAR_ARRAY = 1015

247

BOOLEAN_ARRAY = 1000

248

249

# Geometric Types

250

GEOMETRY = 3000

251

GEOGRAPHY = 3001

252

253

# Network Types

254

INET = 869

255

CIDR = 650

256

257

# UUID Type

258

UUID = 2950

259

260

# Other Types

261

OID = 26

262

NULL = 0

263

264

# All OID constants are also available at module level

265

BIGINT = RedshiftOID.BIGINT.value

266

INTEGER = RedshiftOID.INTEGER.value

267

VARCHAR = RedshiftOID.VARCHAR.value

268

# ... etc

269

```

270

271

### Type Conversion Configuration

272

273

Configuration options for controlling how database types are converted to Python objects.

274

275

```python { .api }

276

# Numeric conversion configuration

277

conn = redshift_connector.connect(

278

# ... connection parameters

279

numeric_to_float=False # Use Decimal objects for NUMERIC/DECIMAL (default)

280

# numeric_to_float=True # Convert NUMERIC/DECIMAL to Python float

281

)

282

283

# Protocol version affects type conversion behavior

284

conn = redshift_connector.connect(

285

# ... connection parameters

286

client_protocol_version=2 # BINARY protocol (default, most efficient)

287

# client_protocol_version=1 # EXTENDED_RESULT_METADATA protocol

288

# client_protocol_version=0 # BASE_SERVER protocol (text-based)

289

)

290

```

291

292

### Array Type Support

293

294

Comprehensive support for PostgreSQL array types with automatic conversion to/from Python lists.

295

296

```python { .api }

297

# Array type usage examples

298

299

# Insert array data

300

cursor.execute(

301

"INSERT INTO array_table (int_array, text_array) VALUES (%s, %s)",

302

([1, 2, 3, 4, 5], ['apple', 'banana', 'cherry'])

303

)

304

305

# Retrieve array data

306

cursor.execute("SELECT int_array, text_array FROM array_table")

307

row = cursor.fetchone()

308

int_array = row[0] # Returns Python list: [1, 2, 3, 4, 5]

309

text_array = row[1] # Returns Python list: ['apple', 'banana', 'cherry']

310

311

# Multi-dimensional arrays

312

cursor.execute(

313

"INSERT INTO matrix_table (matrix) VALUES (%s)",

314

([[1, 2], [3, 4]])

315

)

316

317

# Array utility functions (from utils.array_util)

318

from redshift_connector.utils import (

319

array_check_dimensions,

320

array_dim_lengths,

321

array_find_first_element,

322

array_flatten,

323

array_has_null

324

)

325

326

# Validate array structure before insertion

327

data = [[1, 2, 3], [4, 5, 6]]

328

if array_check_dimensions(data):

329

cursor.execute("INSERT INTO table (matrix_col) VALUES (%s)", (data,))

330

```

331

332

### JSON and JSONB Support

333

334

Native support for JSON data types with automatic serialization/deserialization.

335

336

```python

337

import redshift_connector

338

from redshift_connector import PGJson, PGJsonb

339

340

cursor = conn.cursor()

341

342

# Insert JSON data

343

json_data = {

344

"name": "John Doe",

345

"age": 30,

346

"addresses": [

347

{"type": "home", "city": "New York"},

348

{"type": "work", "city": "Boston"}

349

]

350

}

351

352

# Using PGJson wrapper

353

cursor.execute(

354

"INSERT INTO user_data (id, profile) VALUES (%s, %s)",

355

(1, PGJson(json_data))

356

)

357

358

# Using PGJsonb wrapper for binary JSON

359

cursor.execute(

360

"INSERT INTO user_data (id, profile) VALUES (%s, %s)",

361

(2, PGJsonb(json_data))

362

)

363

364

# Direct insertion (automatically handled)

365

cursor.execute(

366

"INSERT INTO user_data (id, profile) VALUES (%s, %s)",

367

(3, json_data) # Automatically converted to JSON

368

)

369

370

# Retrieve JSON data

371

cursor.execute("SELECT profile FROM user_data WHERE id = %s", (1,))

372

profile = cursor.fetchone()[0] # Returns parsed Python dict/list

373

print(profile['name']) # Access JSON data directly

374

```

375

376

### Date and Time Handling

377

378

Comprehensive date and time type support with timezone handling and interval types.

379

380

```python

381

from datetime import date, time, datetime, timedelta

382

import redshift_connector

383

384

cursor = conn.cursor()

385

386

# Date types

387

today = date.today()

388

cursor.execute("INSERT INTO events (event_date) VALUES (%s)", (today,))

389

390

# Time types

391

event_time = time(14, 30, 0) # 2:30 PM

392

cursor.execute("INSERT INTO events (event_time) VALUES (%s)", (event_time,))

393

394

# Timestamp types

395

event_timestamp = datetime(2023, 12, 25, 14, 30, 0)

396

cursor.execute("INSERT INTO events (event_timestamp) VALUES (%s)", (event_timestamp,))

397

398

# Using DB-API constructors

399

from redshift_connector import Date, Time, Timestamp

400

401

new_date = Date(2023, 12, 25)

402

new_time = Time(14, 30, 0)

403

new_timestamp = Timestamp(2023, 12, 25, 14, 30, 0)

404

405

cursor.execute(

406

"INSERT INTO events (event_date, event_time, event_timestamp) VALUES (%s, %s, %s)",

407

(new_date, new_time, new_timestamp)

408

)

409

410

# Interval types (Redshift specific)

411

cursor.execute("SELECT event_timestamp + INTERVAL '1 day' FROM events")

412

future_dates = cursor.fetchall()

413

```

414

415

### Binary Data Handling

416

417

Support for binary data types including BYTEA and geometry types.

418

419

```python { .api }

420

from redshift_connector import Binary

421

422

# Insert binary data

423

binary_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR...' # PNG image data

424

cursor.execute(

425

"INSERT INTO images (filename, data) VALUES (%s, %s)",

426

('logo.png', Binary(binary_data))

427

)

428

429

# Retrieve binary data

430

cursor.execute("SELECT data FROM images WHERE filename = %s", ('logo.png',))

431

image_data = cursor.fetchone()[0] # Returns bytes object

432

433

# Geometry data (Redshift spatial types)

434

cursor.execute("SELECT ST_AsText(geom) FROM spatial_table")

435

geometry_text = cursor.fetchone()[0] # WKT format string

436

```

437

438

### Type Conversion Utilities

439

440

Low-level utilities for custom type conversion and handling edge cases.

441

442

```python { .api }

443

# Type conversion utility functions (from utils.type_utils)

444

from redshift_connector.utils import (

445

numeric_in, # Parse numeric text input

446

numeric_in_binary, # Parse numeric binary input

447

date_in, # Parse date text input

448

time_in, # Parse time text input

449

text_recv, # Receive text data

450

array_recv_text, # Receive array text data

451

array_recv_binary # Receive array binary data

452

)

453

454

# Type mapping constants

455

from redshift_connector.utils import py_types, redshift_types

456

457

# Python type to Redshift type mapping

458

python_type = int

459

redshift_oid = py_types.get(python_type) # Get corresponding Redshift OID

460

461

# Redshift type information

462

type_info = redshift_types.get(RedshiftOID.INTEGER) # Get type conversion info

463

```

464

465

### Custom Type Handling

466

467

Patterns for handling custom types and extending type conversion capabilities.

468

469

```python

470

import redshift_connector

471

from decimal import Decimal

472

from uuid import UUID

473

474

# Custom type handling examples

475

cursor = conn.cursor()

476

477

# UUID handling

478

user_id = UUID('12345678-1234-5678-1234-567812345678')

479

cursor.execute("INSERT INTO users (id, name) VALUES (%s, %s)", (str(user_id), 'John'))

480

481

# Decimal precision handling

482

price = Decimal('99.99')

483

cursor.execute("INSERT INTO products (price) VALUES (%s)", (price,))

484

485

# Custom enum handling

486

from enum import Enum

487

from redshift_connector import PGEnum

488

489

class Status(Enum):

490

ACTIVE = 'active'

491

INACTIVE = 'inactive'

492

PENDING = 'pending'

493

494

status = Status.ACTIVE

495

cursor.execute("INSERT INTO records (status) VALUES (%s)", (PGEnum(status),))

496

497

# Large object handling

498

with open('large_file.txt', 'rb') as f:

499

large_data = f.read()

500

cursor.execute("INSERT INTO documents (content) VALUES (%s)", (Binary(large_data),))

501

```